diff options
Diffstat (limited to 'arch/s390/kernel')
143 files changed, 24418 insertions, 19594 deletions
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/s390/kernel/.gitignore +++ b/arch/s390/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4bb2a4656163..42c83d60d6fa 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -1,17 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the linux kernel. # ifdef CONFIG_FUNCTION_TRACER -# Don't trace early setup code and tracing code -CFLAGS_REMOVE_early.o = -pg -CFLAGS_REMOVE_ftrace.o = -pg + +# Do not trace tracer code +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) + +# Do not trace early setup code +CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_stacktrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_unwind_bc.o = $(CC_FLAGS_FTRACE) + endif +GCOV_PROFILE_early.o := n +KCOV_INSTRUMENT_early.o := n +UBSAN_SANITIZE_early.o := n +KASAN_SANITIZE_ipl.o := n +KASAN_SANITIZE_machine_kexec.o := n + # # Passing null pointers is ok for smp code, since we access the lowcore here. # -CFLAGS_smp.o := -Wno-nonnull +CFLAGS_smp.o := -Wno-nonnull # # Disable tailcall optimizations for stack / callchain walking functions @@ -20,51 +34,51 @@ CFLAGS_smp.o := -Wno-nonnull # CFLAGS_stacktrace.o += -fno-optimize-sibling-calls CFLAGS_dumpstack.o += -fno-optimize-sibling-calls +CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls -# -# Pass UTS_MACHINE for user_regset definition -# -CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' +obj-y := head.o traps.o time.o process.o early.o setup.o idle.o vtime.o +obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o +obj-y += sysinfo.o lgr.o os_info.o ctlreg.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o +obj-y += entry.o reipl.o kdebugfs.o alternative.o skey.o +obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o +obj-y += diag/ -CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w +always-$(KBUILD_BUILTIN) += vmlinux.lds -obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o -obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o -obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += dumpstack.o +obj-$(CONFIG_SYSFS) += nospec-sysfs.o +CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) -obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) -obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) -obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) - -extra-y += head.o vmlinux.lds -extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) - -obj-$(CONFIG_MODULES) += s390_ksyms.o module.o -obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SCHED_BOOK) += topology.o -obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o +obj-$(CONFIG_SYSFS) += cpacf.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o -compat-obj-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ - compat_wrapper.o compat_exec_domain.o \ - $(compat-obj-y) - -obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_KPROBES) += mcount.o +obj-$(CONFIG_RETHOOK) += rethook.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o +obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o +obj-$(CONFIG_STACKPROTECTOR) += stackprotector.o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o +obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o +obj-$(CONFIG_CERT_STORE) += cert_store.o +obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o -ifdef CONFIG_64BIT -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o -obj-y += runtime_instr.o cache.o -endif +obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o +obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o +obj-$(CONFIG_PERF_EVENTS) += perf_pai.o + +obj-$(CONFIG_TRACEPOINTS) += trace.o # vdso -obj-$(CONFIG_64BIT) += vdso64/ -obj-$(CONFIG_32BIT) += vdso32/ -obj-$(CONFIG_COMPAT) += vdso32/ +obj-y += vdso/ diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c new file mode 100644 index 000000000000..6252b7d115dd --- /dev/null +++ b/arch/s390/kernel/abs_lowcore.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pgtable.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> + +unsigned long __bootdata_preserved(__abs_lowcore); + +int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) +{ + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); + unsigned long phys = __pa(lc); + int rc, i; + + for (i = 0; i < LC_PAGES; i++) { + rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc); + if (rc) { + /* + * Do not unmap allocated page tables in case the + * allocation was not requested. In such a case the + * request is expected coming from an atomic context, + * while the unmap attempt might sleep. + */ + if (alloc) { + for (--i; i >= 0; i--) { + addr -= PAGE_SIZE; + vmem_unmap_4k_page(addr); + } + } + return rc; + } + addr += PAGE_SIZE; + phys += PAGE_SIZE; + } + return 0; +} + +void abs_lowcore_unmap(int cpu) +{ + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); + int i; + + for (i = 0; i < LC_PAGES; i++) { + vmem_unmap_4k_page(addr); + addr += PAGE_SIZE; + } +} diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c new file mode 100644 index 000000000000..90c0e6408992 --- /dev/null +++ b/arch/s390/kernel/alternative.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef pr_fmt +#define pr_fmt(fmt) "alt: " fmt +#endif + +#include <linux/uaccess.h> +#include <linux/printk.h> +#include <asm/nospec-branch.h> +#include <asm/abs_lowcore.h> +#include <asm/alternative.h> +#include <asm/facility.h> +#include <asm/sections.h> +#include <asm/machine.h> + +#ifndef a_debug +#define a_debug pr_debug +#endif + +#ifndef __kernel_va +#define __kernel_va(x) (void *)(x) +#endif + +unsigned long __bootdata_preserved(machine_features[1]); + +struct alt_debug { + unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG]; + unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG]; + int spec; +}; + +static struct alt_debug __bootdata_preserved(alt_debug); + +static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data) +{ + char oinsn[33], ninsn[33]; + unsigned long kptr; + unsigned int pos; + + for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++) + hex_byte_pack(&oinsn[2 * pos], old[pos]); + oinsn[2 * pos] = 0; + for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++) + hex_byte_pack(&ninsn[2 * pos], new[pos]); + ninsn[2 * pos] = 0; + kptr = (unsigned long)__kernel_va(old); + a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn); +} + +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) +{ + struct alt_debug *d; + struct alt_instr *a; + bool debug, replace; + u8 *old, *new; + + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + */ + d = &alt_debug; + for (a = start; a < end; a++) { + if (!(a->ctx & ctx)) + continue; + switch (a->type) { + case ALT_TYPE_FACILITY: + replace = test_facility(a->data); + debug = __test_facility(a->data, d->facilities); + break; + case ALT_TYPE_FEATURE: + replace = test_machine_feature(a->data); + debug = __test_machine_feature(a->data, d->mfeatures); + break; + case ALT_TYPE_SPEC: + replace = nobp_enabled(); + debug = d->spec; + break; + default: + replace = false; + debug = false; + } + if (!replace) + continue; + old = (u8 *)&a->instr_offset + a->instr_offset; + new = (u8 *)&a->repl_offset + a->repl_offset; + if (debug) + alternative_dump(old, new, a->instrlen, a->type, a->data); + s390_kernel_write(old, new, a->instrlen); + } +} diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 2416138ebd3e..e1a5b5b54e4f 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -1,170 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Generate definitions needed by assembly language modules. * This code generates raw asm output which is post-processed to extract * and format the required data. */ - -#define ASM_OFFSETS_C +#define COMPILE_OFFSETS #include <linux/kbuild.h> -#include <linux/kvm_host.h> #include <linux/sched.h> -#include <asm/cputime.h> -#include <asm/vdso.h> -#include <asm/pgtable.h> - -/* - * Make sure that the compiler is new enough. We want a compiler that - * is known to work with the "Q" assembler constraint. - */ -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) -#error Your compiler is too old; please use version 3.3.3 or newer -#endif +#include <linux/purgatory.h> +#include <linux/pgtable.h> +#include <linux/ftrace_regs.h> +#include <asm/kvm_host_types.h> +#include <asm/stacktrace.h> +#include <asm/ptrace.h> int main(void) { - DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); - DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); - DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment)); - BLANK(); - DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); + /* task struct offsets */ + OFFSET(__TASK_stack, task_struct, stack); + OFFSET(__TASK_thread, task_struct, thread); + OFFSET(__TASK_pid, task_struct, pid); +#ifdef CONFIG_STACKPROTECTOR + OFFSET(__TASK_stack_canary, task_struct, stack_canary); +#endif BLANK(); - DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); - DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); - DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); + /* thread struct offsets */ + OFFSET(__THREAD_ksp, thread_struct, ksp); BLANK(); - DEFINE(__TI_task, offsetof(struct thread_info, task)); - DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); - DEFINE(__TI_flags, offsetof(struct thread_info, flags)); - DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); - DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); - DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); - DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); - DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer)); - DEFINE(__TI_last_break, offsetof(struct thread_info, last_break)); + /* thread info offsets */ + OFFSET(__TI_flags, task_struct, thread_info.flags); + OFFSET(__TI_sie, task_struct, thread_info.sie); BLANK(); - DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); - DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); - DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); - DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); - DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); - DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); + /* pt_regs offsets */ + OFFSET(__PT_PSW, pt_regs, psw); + OFFSET(__PT_GPRS, pt_regs, gprs); + OFFSET(__PT_R0, pt_regs, gprs[0]); + OFFSET(__PT_R1, pt_regs, gprs[1]); + OFFSET(__PT_R2, pt_regs, gprs[2]); + OFFSET(__PT_R3, pt_regs, gprs[3]); + OFFSET(__PT_R4, pt_regs, gprs[4]); + OFFSET(__PT_R5, pt_regs, gprs[5]); + OFFSET(__PT_R6, pt_regs, gprs[6]); + OFFSET(__PT_R7, pt_regs, gprs[7]); + OFFSET(__PT_R8, pt_regs, gprs[8]); + OFFSET(__PT_R9, pt_regs, gprs[9]); + OFFSET(__PT_R10, pt_regs, gprs[10]); + OFFSET(__PT_R11, pt_regs, gprs[11]); + OFFSET(__PT_R12, pt_regs, gprs[12]); + OFFSET(__PT_R13, pt_regs, gprs[13]); + OFFSET(__PT_R14, pt_regs, gprs[14]); + OFFSET(__PT_R15, pt_regs, gprs[15]); + OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); + OFFSET(__PT_FLAGS, pt_regs, flags); + OFFSET(__PT_LAST_BREAK, pt_regs, last_break); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); - DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); - DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); + /* stack_frame offsets */ + OFFSET(__SF_BACKCHAIN, stack_frame, back_chain); + OFFSET(__SF_GPRS, stack_frame, gprs); + OFFSET(__SF_EMPTY, stack_frame, empty[0]); + OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block); + OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea); + OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); + OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); + OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); + OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); + OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq); + DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); - /* timeval/timezone offsets for use by vdso */ - DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count)); - DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); - DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); - DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); - DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); - DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); - DEFINE(__VDSO_NTP_MULT, offsetof(struct vdso_data, ntp_mult)); - DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base)); - DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time)); - /* constants used by the vdso */ - DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain); + DEFINE(STACK_FRAME_USER_OVERHEAD, sizeof(struct stack_frame_user)); + OFFSET(__SFVDSO_RETURN_ADDRESS, stack_frame_vdso_wrapper, return_address); + DEFINE(STACK_FRAME_VDSO_OVERHEAD, sizeof(struct stack_frame_vdso_wrapper)); BLANK(); - /* idle data offsets */ - DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); - DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit)); - DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter)); - DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit)); - /* lowcore offsets */ - DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); - DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); - DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); - DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); - DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); - DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); - DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); - DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); - DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid)); - DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); - DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id)); - DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id)); - DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); - DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); - DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm)); - DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); - DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); - DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); - DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); - DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); - DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); - DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); - DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); - DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); - DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); - DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); - DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); - DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); - DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); - DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); + /* hardware defined lowcore locations 0x000 - 0x1ff */ + OFFSET(__LC_EXT_PARAMS, lowcore, ext_params); + OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); + OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); + OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); + OFFSET(__LC_PGM_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code); + OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); + OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); + OFFSET(__LC_PER_CODE, lowcore, per_code); + OFFSET(__LC_PER_ATMID, lowcore, per_atmid); + OFFSET(__LC_PER_ADDRESS, lowcore, per_address); + OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id); + OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id); + OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id); + OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id); + OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code); + OFFSET(__LC_MON_CODE, lowcore, monitor_code); + OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id); + OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr); + OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm); + OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word); + OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code); + OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); + OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); + OFFSET(__LC_PGM_LAST_BREAK, lowcore, pgm_last_break); + OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe); + OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe); + OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); + OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); + OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw); + OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw); + OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw); + OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw); + OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw); + OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw); + OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw); + OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw); + OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw); + OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw); + /* software defined lowcore locations 0x200 - 0xdff*/ + OFFSET(__LC_SAVE_AREA, lowcore, save_area); + OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart); + OFFSET(__LC_PCPU, lowcore, pcpu); + OFFSET(__LC_RETURN_PSW, lowcore, return_psw); + OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); + OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); + OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer); + OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer); + OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); + OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); + OFFSET(__LC_INT_CLOCK, lowcore, int_clock); + OFFSET(__LC_CURRENT, lowcore, current_task); + OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); + OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); + OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack); + OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); + OFFSET(__LC_MCCK_STACK, lowcore, mcck_stack); + OFFSET(__LC_RESTART_FN, lowcore, restart_fn); + OFFSET(__LC_RESTART_DATA, lowcore, restart_data); + OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags); + OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce); + OFFSET(__LC_USER_ASCE, lowcore, user_asce); + OFFSET(__LC_LPP, lowcore, lpp); + OFFSET(__LC_CURRENT_PID, lowcore, current_pid); + OFFSET(__LC_LAST_BREAK, lowcore, last_break); + /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ + OFFSET(__LC_STACK_CANARY, lowcore, stack_canary); + OFFSET(__LC_DUMP_REIPL, lowcore, ipib); + OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info); + OFFSET(__LC_OS_INFO, lowcore, os_info); + /* hardware defined lowcore locations 0x1000 - 0x18ff */ + OFFSET(__LC_MCESAD, lowcore, mcesad); + OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); + OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); + OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); + OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area); + OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area); + OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area); + OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area); + OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area); + OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area); + OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area); + OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area); + OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); + OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); BLANK(); - DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); - DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); - DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); - DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); - DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); - DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); - DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer)); - DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer)); - DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer)); - DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer)); - DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer)); - DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer)); - DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); - DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); - DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); - DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); - DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); - DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); - DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); - DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); - DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); - DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); - DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data)); - DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source)); - DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); - DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); - DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); - DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); - DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); - DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); - DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); - BLANK(); - DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); - DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); - DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); - DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area)); - DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area)); - DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); - DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); - DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); -#ifdef CONFIG_32BIT - DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); -#else /* CONFIG_32BIT */ - DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); - DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); - DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); - DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); - DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); - DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); - DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); - DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); - DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); - DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); - DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); - DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); -#endif /* CONFIG_32BIT */ + /* gmap/sie offsets */ + OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c); + OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20); + /* kexec_sha_region */ + OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start); + OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len); + DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region)); + /* sizeof kernel parameter area */ + DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea)); + /* kernel parameter area offsets */ + DEFINE(IPL_DEVICE, PARMAREA + offsetof(struct parmarea, ipl_device)); + DEFINE(INITRD_START, PARMAREA + offsetof(struct parmarea, initrd_start)); + DEFINE(INITRD_SIZE, PARMAREA + offsetof(struct parmarea, initrd_size)); + DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base)); + DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); + DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); + DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); + OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs); + DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs)); + + OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; } diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c index f4932c22ebe4..7897d9411e13 100644 --- a/arch/s390/kernel/audit.c +++ b/arch/s390/kernel/audit.c @@ -1,8 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> #include <linux/types.h> #include <linux/audit.h> #include <asm/unistd.h> -#include "audit.h" static unsigned dir_class[] = { #include <asm-generic/audit_dir_write.h> @@ -31,42 +31,29 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { -#ifdef CONFIG_COMPAT - if (arch == AUDIT_ARCH_S390) - return 1; -#endif return 0; } int audit_classify_syscall(int abi, unsigned syscall) { -#ifdef CONFIG_COMPAT - if (abi == AUDIT_ARCH_S390) - return s390_classify_syscall(syscall); -#endif switch(syscall) { case __NR_open: - return 2; + return AUDITSC_OPEN; case __NR_openat: - return 3; + return AUDITSC_OPENAT; case __NR_socketcall: - return 4; + return AUDITSC_SOCKETCALL; case __NR_execve: - return 5; + return AUDITSC_EXECVE; + case __NR_openat2: + return AUDITSC_OPENAT2; default: - return 0; + return AUDITSC_NATIVE; } } static int __init audit_classes_init(void) { -#ifdef CONFIG_COMPAT - audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class); - audit_register_class(AUDIT_CLASS_READ_32, s390_read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class); - audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class); -#endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h deleted file mode 100644 index 12b56f4b5a73..000000000000 --- a/arch/s390/kernel/audit.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __ARCH_S390_KERNEL_AUDIT_H -#define __ARCH_S390_KERNEL_AUDIT_H - -#include <linux/types.h> - -#ifdef CONFIG_COMPAT -extern int s390_classify_syscall(unsigned); -extern __u32 s390_dir_class[]; -extern __u32 s390_write_class[]; -extern __u32 s390_read_class[]; -extern __u32 s390_chattr_class[]; -extern __u32 s390_signal_class[]; -#endif /* CONFIG_COMPAT */ - -#endif /* __ARCH_S390_KERNEL_AUDIT_H */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S deleted file mode 100644 index 797a823a2275..000000000000 --- a/arch/s390/kernel/base.S +++ /dev/null @@ -1,206 +0,0 @@ -/* - * arch/s390/kernel/base.S - * - * Copyright IBM Corp. 2006, 2007 - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - * Michael Holzheu <holzheu@de.ibm.com> - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> -#include <asm/sigp.h> - -#ifdef CONFIG_64BIT - -ENTRY(s390_base_mcck_handler) - basr %r13,0 -0: lg %r15,__LC_PANIC_STACK # load panic stack - aghi %r15,-STACK_FRAME_OVERHEAD - larl %r1,s390_base_mcck_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 - jz 1f - basr %r14,%r1 -1: la %r1,4095 - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) - lpswe __LC_MCK_OLD_PSW - - .section .bss - .align 8 - .globl s390_base_mcck_handler_fn -s390_base_mcck_handler_fn: - .quad 0 - .previous - -ENTRY(s390_base_ext_handler) - stmg %r0,%r15,__LC_SAVE_AREA_ASYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD - larl %r1,s390_base_ext_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 - jz 1f - basr %r14,%r1 -1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC - ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit - lpswe __LC_EXT_OLD_PSW - - .section .bss - .align 8 - .globl s390_base_ext_handler_fn -s390_base_ext_handler_fn: - .quad 0 - .previous - -ENTRY(s390_base_pgm_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD - larl %r1,s390_base_pgm_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 - jz 1f - basr %r14,%r1 - lmg %r0,%r15,__LC_SAVE_AREA_SYNC - lpswe __LC_PGM_OLD_PSW -1: lpswe disabled_wait_psw-0b(%r13) - - .align 8 -disabled_wait_psw: - .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler - - .section .bss - .align 8 - .globl s390_base_pgm_handler_fn -s390_base_pgm_handler_fn: - .quad 0 - .previous - -# -# Calls diag 308 subcode 1 and continues execution -# -# The following conditions must be ensured before calling this function: -# * Prefix register = 0 -# * Lowcore protection is disabled -# -ENTRY(diag308_reset) - larl %r4,.Lctlregs # Save control registers - stctg %c0,%c15,0(%r4) - larl %r4,.Lfpctl # Floating point control register - stfpc 0(%r4) - larl %r4,.Lcontinue_psw # Save PSW flags - epsw %r2,%r3 - stm %r2,%r3,0(%r4) - larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 - lghi %r3,0 - lg %r4,0(%r4) # Save PSW - sturg %r4,%r3 # Use sturg, because of large pages - lghi %r1,1 - diag %r1,%r1,0x308 -.Lrestart_part2: - lhi %r0,0 # Load r0 with zero - lhi %r1,2 # Use mode 2 = ESAME (dump) - sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode - sam64 # Switch to 64 bit addressing mode - larl %r4,.Lctlregs # Restore control registers - lctlg %c0,%c15,0(%r4) - larl %r4,.Lfpctl # Restore floating point ctl register - lfpc 0(%r4) - larl %r4,.Lcontinue_psw # Restore PSW flags - lpswe 0(%r4) -.Lcontinue: - br %r14 -.align 16 -.Lrestart_psw: - .long 0x00080000,0x80000000 + .Lrestart_part2 - - .section .data..nosave,"aw",@progbits -.align 8 -.Lcontinue_psw: - .quad 0,.Lcontinue - .previous - - .section .bss -.align 8 -.Lctlregs: - .rept 16 - .quad 0 - .endr -.Lfpctl: - .long 0 - .previous - -#else /* CONFIG_64BIT */ - -ENTRY(s390_base_mcck_handler) - basr %r13,0 -0: l %r15,__LC_PANIC_STACK # load panic stack - ahi %r15,-STACK_FRAME_OVERHEAD - l %r1,2f-0b(%r13) - l %r1,0(%r1) - ltr %r1,%r1 - jz 1f - basr %r14,%r1 -1: lm %r0,%r15,__LC_GPREGS_SAVE_AREA - lpsw __LC_MCK_OLD_PSW - -2: .long s390_base_mcck_handler_fn - - .section .bss - .align 4 - .globl s390_base_mcck_handler_fn -s390_base_mcck_handler_fn: - .long 0 - .previous - -ENTRY(s390_base_ext_handler) - stm %r0,%r15,__LC_SAVE_AREA_ASYNC - basr %r13,0 -0: ahi %r15,-STACK_FRAME_OVERHEAD - l %r1,2f-0b(%r13) - l %r1,0(%r1) - ltr %r1,%r1 - jz 1f - basr %r14,%r1 -1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC - ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit - lpsw __LC_EXT_OLD_PSW - -2: .long s390_base_ext_handler_fn - - .section .bss - .align 4 - .globl s390_base_ext_handler_fn -s390_base_ext_handler_fn: - .long 0 - .previous - -ENTRY(s390_base_pgm_handler) - stm %r0,%r15,__LC_SAVE_AREA_SYNC - basr %r13,0 -0: ahi %r15,-STACK_FRAME_OVERHEAD - l %r1,2f-0b(%r13) - l %r1,0(%r1) - ltr %r1,%r1 - jz 1f - basr %r14,%r1 - lm %r0,%r15,__LC_SAVE_AREA_SYNC - lpsw __LC_PGM_OLD_PSW - -1: lpsw disabled_wait_psw-0b(%r13) - -2: .long s390_base_pgm_handler_fn - -disabled_wait_psw: - .align 8 - .long 0x000a0000,0x00000000 + s390_base_pgm_handler - - .section .bss - .align 4 - .globl s390_base_pgm_handler_fn -s390_base_pgm_handler_fn: - .long 0 - .previous - -#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/bitmap.c b/arch/s390/kernel/bitmap.c deleted file mode 100644 index 102da5e23037..000000000000 --- a/arch/s390/kernel/bitmap.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Bitmaps for set_bit, clear_bit, test_and_set_bit, ... - * See include/asm/{bitops.h|posix_types.h} for details - * - * Copyright IBM Corp. 1999, 2009 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, - */ - -#include <linux/bitops.h> -#include <linux/module.h> - -const char _oi_bitmap[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }; -EXPORT_SYMBOL(_oi_bitmap); - -const char _ni_bitmap[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f }; -EXPORT_SYMBOL(_ni_bitmap); - -const char _zb_findmap[] = { - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 }; -EXPORT_SYMBOL(_zb_findmap); - -const char _sb_findmap[] = { - 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 }; -EXPORT_SYMBOL(_sb_findmap); diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 64b24650e4f8..4f2669030220 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -1,41 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Extract CPU cache information and expose them via sysfs. * * Copyright IBM Corp. 2012 - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/notifier.h> #include <linux/seq_file.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/slab.h> #include <linux/cpu.h> +#include <linux/cacheinfo.h> #include <asm/facility.h> -struct cache { - unsigned long size; - unsigned int line_size; - unsigned int associativity; - unsigned int nr_sets; - unsigned int level : 3; - unsigned int type : 2; - unsigned int private : 1; - struct list_head list; -}; - -struct cache_dir { - struct kobject *kobj; - struct cache_index_dir *index; -}; - -struct cache_index_dir { - struct kobject kobj; - int cpu; - struct cache *cache; - struct cache_index_dir *next; -}; - enum { CACHE_SCOPE_NOTEXISTS, CACHE_SCOPE_PRIVATE, @@ -44,10 +18,10 @@ enum { }; enum { - CACHE_TYPE_SEPARATE, - CACHE_TYPE_DATA, - CACHE_TYPE_INSTRUCTION, - CACHE_TYPE_UNIFIED, + CTYPE_SEPARATE, + CTYPE_DATA, + CTYPE_INSTRUCTION, + CTYPE_UNIFIED, }; enum { @@ -70,319 +44,128 @@ struct cache_info { }; #define CACHE_MAX_LEVEL 8 - union cache_topology { struct cache_info ci[CACHE_MAX_LEVEL]; - unsigned long long raw; + unsigned long raw; }; static const char * const cache_type_string[] = { - "Data", + "", "Instruction", + "Data", + "", "Unified", }; -static struct cache_dir *cache_dir_cpu[NR_CPUS]; -static LIST_HEAD(cache_list); +static const enum cache_type cache_type_map[] = { + [CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE, + [CTYPE_DATA] = CACHE_TYPE_DATA, + [CTYPE_INSTRUCTION] = CACHE_TYPE_INST, + [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, +}; void show_cacheinfo(struct seq_file *m) { - struct cache *cache; - int index = 0; + struct cpu_cacheinfo *this_cpu_ci; + struct cacheinfo *cache; + int idx; - list_for_each_entry(cache, &cache_list, list) { - seq_printf(m, "cache%-11d: ", index); + this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask)); + for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { + cache = this_cpu_ci->info_list + idx; + seq_printf(m, "cache%-11d: ", idx); seq_printf(m, "level=%d ", cache->level); seq_printf(m, "type=%s ", cache_type_string[cache->type]); - seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared"); - seq_printf(m, "size=%luK ", cache->size >> 10); - seq_printf(m, "line_size=%u ", cache->line_size); - seq_printf(m, "associativity=%d", cache->associativity); + seq_printf(m, "scope=%s ", + cache->disable_sysfs ? "Shared" : "Private"); + seq_printf(m, "size=%dK ", cache->size >> 10); + seq_printf(m, "line_size=%u ", cache->coherency_line_size); + seq_printf(m, "associativity=%d", cache->ways_of_associativity); seq_puts(m, "\n"); - index++; } } -static inline unsigned long ecag(int ai, int li, int ti) +static inline enum cache_type get_cache_type(struct cache_info *ci, int level) { - unsigned long cmd, val; + if (level >= CACHE_MAX_LEVEL) + return CACHE_TYPE_NOCACHE; + ci += level; + if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE) + return CACHE_TYPE_NOCACHE; + return cache_type_map[ci->type]; +} - cmd = ai << 4 | li << 1 | ti; - asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ - : "=d" (val) : "a" (cmd)); - return val; +static inline unsigned long ecag(int ai, int li, int ti) +{ + return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti); } -static int __init cache_add(int level, int private, int type) +static void ci_leaf_init(struct cacheinfo *this_leaf, int private, + enum cache_type type, unsigned int level, int cpu) { - struct cache *cache; - int ti; + int ti, num_sets; - cache = kzalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) - return -ENOMEM; - if (type == CACHE_TYPE_INSTRUCTION) + if (type == CACHE_TYPE_INST) ti = CACHE_TI_INSTRUCTION; else ti = CACHE_TI_UNIFIED; - cache->size = ecag(EXTRACT_SIZE, level, ti); - cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti); - cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); - cache->nr_sets = cache->size / cache->associativity; - cache->nr_sets /= cache->line_size; - cache->private = private; - cache->level = level + 1; - cache->type = type - 1; - list_add_tail(&cache->list, &cache_list); - return 0; -} - -static void __init cache_build_info(void) -{ - struct cache *cache, *next; + this_leaf->level = level + 1; + this_leaf->type = type; + this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti); + this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); + this_leaf->size = ecag(EXTRACT_SIZE, level, ti); + num_sets = this_leaf->size / this_leaf->coherency_line_size; + num_sets /= this_leaf->ways_of_associativity; + this_leaf->number_of_sets = num_sets; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + if (!private) + this_leaf->disable_sysfs = true; +} + +int init_cache_level(unsigned int cpu) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + unsigned int level = 0, leaves = 0; union cache_topology ct; - int level, private, rc; + enum cache_type ctype; + if (!this_cpu_ci) + return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); - for (level = 0; level < CACHE_MAX_LEVEL; level++) { - switch (ct.ci[level].scope) { - case CACHE_SCOPE_NOTEXISTS: - case CACHE_SCOPE_RESERVED: - return; - case CACHE_SCOPE_SHARED: - private = 0; - break; - case CACHE_SCOPE_PRIVATE: - private = 1; + do { + ctype = get_cache_type(&ct.ci[0], level); + if (ctype == CACHE_TYPE_NOCACHE) break; - } - if (ct.ci[level].type == CACHE_TYPE_SEPARATE) { - rc = cache_add(level, private, CACHE_TYPE_DATA); - rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION); - } else { - rc = cache_add(level, private, ct.ci[level].type); - } - if (rc) - goto error; - } - return; -error: - list_for_each_entry_safe(cache, next, &cache_list, list) { - list_del(&cache->list); - kfree(cache); - } -} - -static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu) -{ - struct cache_dir *cache_dir; - struct kobject *kobj = NULL; - struct device *dev; - - dev = get_cpu_device(cpu); - if (!dev) - goto out; - kobj = kobject_create_and_add("cache", &dev->kobj); - if (!kobj) - goto out; - cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL); - if (!cache_dir) - goto out; - cache_dir->kobj = kobj; - cache_dir_cpu[cpu] = cache_dir; - return cache_dir; -out: - kobject_put(kobj); - return NULL; -} - -static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj) -{ - return container_of(kobj, struct cache_index_dir, kobj); -} - -static void cache_index_release(struct kobject *kobj) -{ - struct cache_index_dir *index; - - index = kobj_to_cache_index_dir(kobj); - kfree(index); -} - -static ssize_t cache_index_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct kobj_attribute *kobj_attr; - - kobj_attr = container_of(attr, struct kobj_attribute, attr); - return kobj_attr->show(kobj, kobj_attr, buf); -} - -#define DEFINE_CACHE_ATTR(_name, _format, _value) \ -static ssize_t cache_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *buf) \ -{ \ - struct cache_index_dir *index; \ - \ - index = kobj_to_cache_index_dir(kobj); \ - return sprintf(buf, _format, _value); \ -} \ -static struct kobj_attribute cache_##_name##_attr = \ - __ATTR(_name, 0444, cache_##_name##_show, NULL); - -DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10); -DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size); -DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets); -DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity); -DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]); -DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level); - -static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf) -{ - struct cache_index_dir *index; - int len; - - index = kobj_to_cache_index_dir(kobj); - len = type ? - cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) : - cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)); - len += sprintf(&buf[len], "\n"); - return len; -} - -static ssize_t shared_cpu_map_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return shared_cpu_map_func(kobj, 0, buf); -} -static struct kobj_attribute cache_shared_cpu_map_attr = - __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); - -static ssize_t shared_cpu_list_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return shared_cpu_map_func(kobj, 1, buf); -} -static struct kobj_attribute cache_shared_cpu_list_attr = - __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); - -static struct attribute *cache_index_default_attrs[] = { - &cache_type_attr.attr, - &cache_size_attr.attr, - &cache_number_of_sets_attr.attr, - &cache_ways_of_associativity_attr.attr, - &cache_level_attr.attr, - &cache_coherency_line_size_attr.attr, - &cache_shared_cpu_map_attr.attr, - &cache_shared_cpu_list_attr.attr, - NULL, -}; - -static const struct sysfs_ops cache_index_ops = { - .show = cache_index_show, -}; - -static struct kobj_type cache_index_type = { - .sysfs_ops = &cache_index_ops, - .release = cache_index_release, - .default_attrs = cache_index_default_attrs, -}; - -static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir, - struct cache *cache, int index, - int cpu) -{ - struct cache_index_dir *index_dir; - int rc; - - index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); - if (!index_dir) - return -ENOMEM; - index_dir->cache = cache; - index_dir->cpu = cpu; - rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, - cache_dir->kobj, "index%d", index); - if (rc) - goto out; - index_dir->next = cache_dir->index; - cache_dir->index = index_dir; + /* Separate instruction and data caches */ + leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; + } while (++level < CACHE_MAX_LEVEL); + this_cpu_ci->num_levels = level; + this_cpu_ci->num_leaves = leaves; return 0; -out: - kfree(index_dir); - return rc; } -static int __cpuinit cache_add_cpu(int cpu) +int populate_cache_leaves(unsigned int cpu) { - struct cache_dir *cache_dir; - struct cache *cache; - int rc, index = 0; - - if (list_empty(&cache_list)) - return 0; - cache_dir = cache_create_cache_dir(cpu); - if (!cache_dir) - return -ENOMEM; - list_for_each_entry(cache, &cache_list, list) { - if (!cache->private) - break; - rc = cache_create_index_dir(cache_dir, cache, index, cpu); - if (rc) - return rc; - index++; - } - return 0; -} - -static void __cpuinit cache_remove_cpu(int cpu) -{ - struct cache_index_dir *index, *next; - struct cache_dir *cache_dir; - - cache_dir = cache_dir_cpu[cpu]; - if (!cache_dir) - return; - index = cache_dir->index; - while (index) { - next = index->next; - kobject_put(&index->kobj); - index = next; - } - kobject_put(cache_dir->kobj); - kfree(cache_dir); - cache_dir_cpu[cpu] = NULL; -} - -static int __cpuinit cache_hotplug(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - int rc = 0; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + unsigned int level, idx, pvt; + union cache_topology ct; + enum cache_type ctype; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - rc = cache_add_cpu(cpu); - if (rc) - cache_remove_cpu(cpu); - break; - case CPU_DEAD: - cache_remove_cpu(cpu); - break; + ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); + for (idx = 0, level = 0; level < this_cpu_ci->num_levels && + idx < this_cpu_ci->num_leaves; idx++, level++) { + if (!this_leaf) + return -EINVAL; + pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0; + ctype = get_cache_type(&ct.ci[0], level); + if (ctype == CACHE_TYPE_SEPARATE) { + ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu); + ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu); + } else { + ci_leaf_init(this_leaf++, pvt, ctype, level, cpu); + } } - return rc ? NOTIFY_BAD : NOTIFY_OK; -} - -static int __init cache_init(void) -{ - int cpu; - - if (!test_facility(34)) - return 0; - cache_build_info(); - for_each_online_cpu(cpu) - cache_add_cpu(cpu); - hotcpu_notifier(cache_hotplug, 0); + this_cpu_ci->cpu_map_populated = true; return 0; } -device_initcall(cache_init); diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c new file mode 100644 index 000000000000..c217a5e64094 --- /dev/null +++ b/arch/s390/kernel/cert_store.c @@ -0,0 +1,813 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DIAG 0x320 support and certificate store handling + * + * Copyright IBM Corp. 2023 + * Author(s): Anastasia Eskova <anastasia.eskova@ibm.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/key-type.h> +#include <linux/key.h> +#include <linux/keyctl.h> +#include <linux/kobject.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/sysfs.h> +#include <linux/vmalloc.h> +#include <crypto/sha2.h> +#include <keys/user-type.h> +#include <asm/debug.h> +#include <asm/diag.h> +#include <asm/ebcdic.h> +#include <asm/sclp.h> + +#define DIAG_MAX_RETRIES 10 + +#define VCE_FLAGS_VALID_MASK 0x80 + +#define ISM_LEN_DWORDS 4 +#define VCSSB_LEN_BYTES 128 +#define VCSSB_LEN_NO_CERTS 4 +#define VCB_LEN_NO_CERTS 64 +#define VC_NAME_LEN_BYTES 64 + +#define CERT_STORE_KEY_TYPE_NAME "cert_store_key" +#define CERT_STORE_KEYRING_NAME "cert_store" + +static debug_info_t *cert_store_dbf; +static debug_info_t *cert_store_hexdump; + +#define pr_dbf_msg(fmt, ...) \ + debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__) + +enum diag320_subcode { + DIAG320_SUBCODES = 0, + DIAG320_STORAGE = 1, + DIAG320_CERT_BLOCK = 2, +}; + +enum diag320_rc { + DIAG320_RC_OK = 0x0001, + DIAG320_RC_CS_NOMATCH = 0x0306, +}; + +/* Verification Certificates Store Support Block (VCSSB). */ +struct vcssb { + u32 vcssb_length; + u8 pad_0x04[3]; + u8 version; + u8 pad_0x08[8]; + u32 cs_token; + u8 pad_0x14[12]; + u16 total_vc_index_count; + u16 max_vc_index_count; + u8 pad_0x24[28]; + u32 max_vce_length; + u32 max_vcxe_length; + u8 pad_0x48[8]; + u32 max_single_vcb_length; + u32 total_vcb_length; + u32 max_single_vcxb_length; + u32 total_vcxb_length; + u8 pad_0x60[32]; +} __packed __aligned(8); + +/* Verification Certificate Entry (VCE) Header. */ +struct vce_header { + u32 vce_length; + u8 flags; + u8 key_type; + u16 vc_index; + u8 vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */ + u8 vc_format; + u8 pad_0x49; + u16 key_id_length; + u8 pad_0x4c; + u8 vc_hash_type; + u16 vc_hash_length; + u8 pad_0x50[4]; + u32 vc_length; + u8 pad_0x58[8]; + u16 vc_hash_offset; + u16 vc_offset; + u8 pad_0x64[28]; +} __packed __aligned(4); + +/* Verification Certificate Block (VCB) Header. */ +struct vcb_header { + u32 vcb_input_length; + u8 pad_0x04[4]; + u16 first_vc_index; + u16 last_vc_index; + u32 pad_0x0c; + u32 cs_token; + u8 pad_0x14[12]; + u32 vcb_output_length; + u8 pad_0x24[3]; + u8 version; + u16 stored_vc_count; + u16 remaining_vc_count; + u8 pad_0x2c[20]; +} __packed __aligned(4); + +/* Verification Certificate Block (VCB). */ +struct vcb { + struct vcb_header vcb_hdr; + u8 vcb_buf[]; +} __packed __aligned(4); + +/* Verification Certificate Entry (VCE). */ +struct vce { + struct vce_header vce_hdr; + u8 cert_data_buf[]; +} __packed __aligned(4); + +static void cert_store_key_describe(const struct key *key, struct seq_file *m) +{ + char ascii[VC_NAME_LEN_BYTES + 1]; + + /* + * First 64 bytes of the key description is key name in EBCDIC CP 500. + * Convert it to ASCII for displaying in /proc/keys. + */ + strscpy(ascii, key->description); + EBCASC_500(ascii, VC_NAME_LEN_BYTES); + seq_puts(m, ascii); + + seq_puts(m, &key->description[VC_NAME_LEN_BYTES]); + if (key_is_positive(key)) + seq_printf(m, ": %u", key->datalen); +} + +/* + * Certificate store key type takes over properties of + * user key but cannot be updated. + */ +static struct key_type key_type_cert_store_key = { + .name = CERT_STORE_KEY_TYPE_NAME, + .preparse = user_preparse, + .free_preparse = user_free_preparse, + .instantiate = generic_key_instantiate, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = cert_store_key_describe, + .read = user_read, +}; + +/* Logging functions. */ +static void pr_dbf_vcb(const struct vcb *b) +{ + pr_dbf_msg("VCB Header:"); + pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length); + pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index); + pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index); + pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token); + pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length); + pr_dbf_msg("version: %d", b->vcb_hdr.version); + pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count); + pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count); +} + +static void pr_dbf_vce(const struct vce *e) +{ + unsigned char vc_name[VC_NAME_LEN_BYTES + 1]; + char log_string[VC_NAME_LEN_BYTES + 40]; + + pr_dbf_msg("VCE Header:"); + pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length); + pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags); + pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type); + pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index); + pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format); + pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length); + pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type); + pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length); + pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset); + pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length); + pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset); + + /* Certificate name in ASCII. */ + memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES); + EBCASC_500(vc_name, VC_NAME_LEN_BYTES); + vc_name[VC_NAME_LEN_BYTES] = '\0'; + + snprintf(log_string, sizeof(log_string), + "index: %d vce_hdr.vc_name (ASCII): %s", + e->vce_hdr.vc_index, vc_name); + debug_text_event(cert_store_hexdump, 3, log_string); + + /* Certificate data. */ + debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start"); + debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128); + debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end"); + debug_event(cert_store_hexdump, 3, + (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128); +} + +static void pr_dbf_vcssb(const struct vcssb *s) +{ + debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1"); + debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES); + + pr_dbf_msg("VCSSB:"); + pr_dbf_msg("vcssb_length: %u", s->vcssb_length); + pr_dbf_msg("version: %u", s->version); + pr_dbf_msg("cs_token: %u", s->cs_token); + pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count); + pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count); + pr_dbf_msg("max_vce_length: %u", s->max_vce_length); + pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length); + pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length); + pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length); + pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length); + pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length); +} + +static int __diag320(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, }; + + asm_inline volatile( + " diag %[rp],%[subcode],0x320\n" + "0: nopr %%r7\n" + EX_TABLE(0b, 0b) + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "cc", "memory"); + + return rp.odd; +} + +static int diag320(unsigned long subcode, void *addr) +{ + diag_stat_inc(DIAG_STAT_X320); + + return __diag320(subcode, addr); +} + +/* + * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in + * VCE. Return -EINVAL if hashes don't match. + */ +static int check_certificate_hash(const struct vce *vce) +{ + u8 hash[SHA256_DIGEST_SIZE]; + u16 vc_hash_length; + u8 *vce_hash; + + vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset; + vc_hash_length = vce->vce_hdr.vc_hash_length; + sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash); + if (memcmp(vce_hash, hash, vc_hash_length) == 0) + return 0; + + pr_dbf_msg("SHA256 hash of received certificate does not match"); + debug_text_event(cert_store_hexdump, 3, "VCE hash:"); + debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE); + debug_text_event(cert_store_hexdump, 3, "Calculated hash:"); + debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE); + + return -EINVAL; +} + +static int check_certificate_valid(const struct vce *vce) +{ + if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) { + pr_dbf_msg("Certificate entry is invalid"); + return -EINVAL; + } + if (vce->vce_hdr.vc_format != 1) { + pr_dbf_msg("Certificate format is not supported"); + return -EINVAL; + } + if (vce->vce_hdr.vc_hash_type != 1) { + pr_dbf_msg("Hash type is not supported"); + return -EINVAL; + } + + return check_certificate_hash(vce); +} + +static struct key *get_user_session_keyring(void) +{ + key_ref_t us_keyring_ref; + + us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING, + KEY_LOOKUP_CREATE, KEY_NEED_LINK); + if (IS_ERR(us_keyring_ref)) { + pr_dbf_msg("Couldn't get user session keyring: %ld", + PTR_ERR(us_keyring_ref)); + return ERR_PTR(-ENOKEY); + } + key_ref_put(us_keyring_ref); + return key_ref_to_ptr(us_keyring_ref); +} + +/* Invalidate all keys from cert_store keyring. */ +static int invalidate_keyring_keys(struct key *keyring) +{ + unsigned long num_keys, key_index; + size_t keyring_payload_len; + key_serial_t *key_array; + struct key *current_key; + int rc; + + keyring_payload_len = key_type_keyring.read(keyring, NULL, 0); + num_keys = keyring_payload_len / sizeof(key_serial_t); + key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL); + if (!key_array) + return -ENOMEM; + + rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len); + if (rc != keyring_payload_len) { + pr_dbf_msg("Couldn't read keyring payload"); + goto out; + } + + for (key_index = 0; key_index < num_keys; key_index++) { + current_key = key_lookup(key_array[key_index]); + pr_dbf_msg("Invalidating key %08x", current_key->serial); + + key_invalidate(current_key); + key_put(current_key); + rc = key_unlink(keyring, current_key); + if (rc) { + pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc); + break; + } + } +out: + kfree(key_array); + return rc; +} + +static struct key *find_cs_keyring(void) +{ + key_ref_t cs_keyring_ref; + struct key *cs_keyring; + + cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true), + &key_type_keyring, CERT_STORE_KEYRING_NAME, + false); + if (!IS_ERR(cs_keyring_ref)) { + cs_keyring = key_ref_to_ptr(cs_keyring_ref); + key_ref_put(cs_keyring_ref); + goto found; + } + /* Search default locations: thread, process, session keyrings */ + cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL); + if (IS_ERR(cs_keyring)) + return NULL; + key_put(cs_keyring); +found: + return cs_keyring; +} + +static void cleanup_cs_keys(void) +{ + struct key *cs_keyring; + + cs_keyring = find_cs_keyring(); + if (!cs_keyring) + return; + + pr_dbf_msg("Found cert_store keyring. Purging..."); + /* + * Remove cert_store_key_type in case invalidation + * of old cert_store keys failed (= severe error). + */ + if (invalidate_keyring_keys(cs_keyring)) + unregister_key_type(&key_type_cert_store_key); + + keyring_clear(cs_keyring); + key_invalidate(cs_keyring); + key_put(cs_keyring); + key_unlink(get_user_session_keyring(), cs_keyring); +} + +static struct key *create_cs_keyring(void) +{ + static struct key *cs_keyring; + + /* Cleanup previous cs_keyring and all associated keys if any. */ + cleanup_cs_keys(); + cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID, + GLOBAL_ROOT_GID, current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP, + NULL, get_user_session_keyring()); + if (IS_ERR(cs_keyring)) { + pr_dbf_msg("Can't allocate cert_store keyring"); + return NULL; + } + + pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial); + + /* + * In case a previous clean-up ran into an + * error and unregistered key type. + */ + register_key_type(&key_type_cert_store_key); + + return cs_keyring; +} + +/* + * Allocate memory and create key description in format + * [key name in EBCDIC]:[VCE index]:[CS token]. + * Return a pointer to key description or NULL if memory + * allocation failed. Memory should be freed by caller. + */ +static char *get_key_description(struct vcssb *vcssb, const struct vce *vce) +{ + size_t len, name_len; + u32 cs_token; + char *desc; + + cs_token = vcssb->cs_token; + /* Description string contains "%64s:%05u:%010u\0". */ + name_len = sizeof(vce->vce_hdr.vc_name); + len = name_len + 1 + 5 + 1 + 10 + 1; + desc = kmalloc(len, GFP_KERNEL); + if (!desc) + return NULL; + + memcpy(desc, vce->vce_hdr.vc_name, name_len); + snprintf(desc + name_len, len - name_len, ":%05u:%010u", + vce->vce_hdr.vc_index, cs_token); + + return desc; +} + +/* + * Create a key of type "cert_store_key" using the data from VCE for key + * payload and key description. Link the key to "cert_store" keyring. + */ +static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce, + struct key *keyring) +{ + key_ref_t newkey; + char *desc; + int rc; + + desc = get_key_description(vcssb, vce); + if (!desc) + return -ENOMEM; + + newkey = key_create_or_update( + make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME, + desc, (u8 *)vce + vce->vce_hdr.vc_offset, + vce->vce_hdr.vc_length, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + + rc = PTR_ERR_OR_ZERO(newkey); + if (rc) { + pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc); + rc = -ENOKEY; + goto out; + } + + key_ref_put(newkey); +out: + kfree(desc); + return rc; +} + +/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */ +static int get_vcssb(struct vcssb *vcssb) +{ + int diag320_rc; + + memset(vcssb, 0, sizeof(*vcssb)); + vcssb->vcssb_length = VCSSB_LEN_BYTES; + diag320_rc = diag320(DIAG320_STORAGE, vcssb); + pr_dbf_vcssb(vcssb); + + if (diag320_rc != DIAG320_RC_OK) { + pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc); + return -EIO; + } + if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) { + pr_dbf_msg("No certificates available for current configuration"); + return -ENOKEY; + } + + return 0; +} + +static u32 get_4k_mult_vcb_size(struct vcssb *vcssb) +{ + return round_up(vcssb->max_single_vcb_length, PAGE_SIZE); +} + +/* Fill input fields of single-entry VCB that will be read by LPAR. */ +static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index) +{ + memset(vcb, 0, sizeof(*vcb)); + vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb); + vcb->vcb_hdr.cs_token = vcssb->cs_token; + + /* Request single entry. */ + vcb->vcb_hdr.first_vc_index = index; + vcb->vcb_hdr.last_vc_index = index; +} + +static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce) +{ + struct vce *extracted_vce; + + extracted_vce = (struct vce *)vcb->vcb_buf; + memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length); + pr_dbf_vce(vce); +} + +static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb) +{ + int rc, diag320_rc; + + fill_vcb_input(vcssb, vcb, index); + + diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb); + pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc); + pr_dbf_vcb(vcb); + + switch (diag320_rc) { + case DIAG320_RC_OK: + rc = 0; + if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) { + pr_dbf_msg("No certificate entry for index %u", index); + rc = -ENOKEY; + } else if (vcb->vcb_hdr.remaining_vc_count != 0) { + /* Retry on insufficient space. */ + pr_dbf_msg("Couldn't get all requested certificates"); + rc = -EAGAIN; + } + break; + case DIAG320_RC_CS_NOMATCH: + pr_dbf_msg("Certificate Store token mismatch"); + rc = -EAGAIN; + break; + default: + pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc); + rc = -EINVAL; + break; + } + + return rc; +} + +/* + * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call, + * extract VCE and create a key from its' certificate. + */ +static int create_key_from_sevcb(struct vcssb *vcssb, u16 index, + struct key *keyring) +{ + struct vcb *vcb; + struct vce *vce; + int rc; + + rc = -ENOMEM; + vcb = vmalloc(get_4k_mult_vcb_size(vcssb)); + vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr)); + if (!vcb || !vce) + goto out; + + rc = get_sevcb(vcssb, index, vcb); + if (rc) + goto out; + + extract_vce_from_sevcb(vcb, vce); + rc = check_certificate_valid(vce); + if (rc) + goto out; + + rc = create_key_from_vce(vcssb, vce, keyring); + if (rc) + goto out; + + pr_dbf_msg("Successfully created key from Certificate Entry %d", index); +out: + vfree(vce); + vfree(vcb); + return rc; +} + +/* + * Request a single-entry VCB for each VCE available for the partition. + * Create a key from it and link it to cert_store keyring. If no keys + * could be created (i.e. VCEs were invalid) return -ENOKEY. + */ +static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring) +{ + int rc, index, count, added; + + count = 0; + added = 0; + /* Certificate Store entries indices start with 1 and have no gaps. */ + for (index = 1; index < vcssb->total_vc_index_count + 1; index++) { + pr_dbf_msg("Creating key from VCE %u", index); + rc = create_key_from_sevcb(vcssb, index, keyring); + count++; + + if (rc == -EAGAIN) + return rc; + + if (rc) + pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc); + else + added++; + } + + if (added == 0) { + pr_dbf_msg("Processed %d entries. No keys created", count); + return -ENOKEY; + } + + pr_info("Added %d of %d keys to cert_store keyring", added, count); + + /* + * Do not allow to link more keys to certificate store keyring after all + * the VCEs were processed. + */ + rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL); + if (rc) + pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc); + + return 0; +} + +/* + * Check which DIAG320 subcodes are installed. + * Return -ENOENT if subcodes 1 or 2 are not available. + */ +static int query_diag320_subcodes(void) +{ + unsigned long ism[ISM_LEN_DWORDS]; + int rc; + + rc = diag320(0, ism); + if (rc != DIAG320_RC_OK) { + pr_dbf_msg("DIAG320 subcode query returned %04x", rc); + return -ENOENT; + } + + debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0"); + debug_event(cert_store_hexdump, 3, ism, sizeof(ism)); + + if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) { + pr_dbf_msg("Not all required DIAG320 subcodes are installed"); + return -ENOENT; + } + + return 0; +} + +/* + * Check if Certificate Store is supported by the firmware and DIAG320 subcodes + * 1 and 2 are installed. Create cert_store keyring and link all certificates + * available for the current partition to it as "cert_store_key" type + * keys. On refresh or error invalidate cert_store keyring and destroy + * all keys of "cert_store_key" type. + */ +static int fill_cs_keyring(void) +{ + struct key *cs_keyring; + struct vcssb *vcssb; + int rc; + + rc = -ENOMEM; + vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL); + if (!vcssb) + goto cleanup_keys; + + rc = -ENOENT; + if (!sclp.has_diag320) { + pr_dbf_msg("Certificate Store is not supported"); + goto cleanup_keys; + } + + rc = query_diag320_subcodes(); + if (rc) + goto cleanup_keys; + + rc = get_vcssb(vcssb); + if (rc) + goto cleanup_keys; + + rc = -ENOMEM; + cs_keyring = create_cs_keyring(); + if (!cs_keyring) + goto cleanup_keys; + + rc = add_certificates_to_keyring(vcssb, cs_keyring); + if (rc) + goto cleanup_cs_keyring; + + goto out; + +cleanup_cs_keyring: + key_put(cs_keyring); +cleanup_keys: + cleanup_cs_keys(); +out: + kfree(vcssb); + return rc; +} + +static DEFINE_MUTEX(cs_refresh_lock); +static int cs_status_val = -1; + +static ssize_t cs_status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + if (cs_status_val == -1) + return sysfs_emit(buf, "uninitialized\n"); + else if (cs_status_val == 0) + return sysfs_emit(buf, "ok\n"); + + return sysfs_emit(buf, "failed (%d)\n", cs_status_val); +} + +static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status); + +static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int rc, retries; + + pr_dbf_msg("Refresh certificate store information requested"); + rc = mutex_lock_interruptible(&cs_refresh_lock); + if (rc) + return rc; + + for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) { + /* Request certificates from certificate store. */ + rc = fill_cs_keyring(); + if (rc) + pr_dbf_msg("Failed to refresh certificate store information (%d)", rc); + if (rc != -EAGAIN) + break; + } + cs_status_val = rc; + mutex_unlock(&cs_refresh_lock); + + return rc ?: count; +} + +static struct kobj_attribute refresh_attr = __ATTR_WO(refresh); + +static const struct attribute *cert_store_attrs[] __initconst = { + &cs_status_attr.attr, + &refresh_attr.attr, + NULL, +}; + +static struct kobject *cert_store_kobj; + +static int __init cert_store_init(void) +{ + int rc = -ENOMEM; + + cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64); + if (!cert_store_dbf) + goto cleanup_dbf; + + cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128); + if (!cert_store_hexdump) + goto cleanup_dbf; + + debug_register_view(cert_store_hexdump, &debug_hex_ascii_view); + debug_register_view(cert_store_dbf, &debug_sprintf_view); + + /* Create directory /sys/firmware/cert_store. */ + cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj); + if (!cert_store_kobj) + goto cleanup_dbf; + + rc = sysfs_create_files(cert_store_kobj, cert_store_attrs); + if (rc) + goto cleanup_kobj; + + register_key_type(&key_type_cert_store_key); + + return rc; + +cleanup_kobj: + kobject_put(cert_store_kobj); +cleanup_dbf: + debug_unregister(cert_store_dbf); + debug_unregister(cert_store_hexdump); + + return rc; +} +device_initcall(cert_store_init); diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c deleted file mode 100644 index d6487bf879e5..000000000000 --- a/arch/s390/kernel/compat_audit.c +++ /dev/null @@ -1,44 +0,0 @@ -#undef __s390x__ -#include <asm/unistd.h> -#include "audit.h" - -unsigned s390_dir_class[] = { -#include <asm-generic/audit_dir_write.h> -~0U -}; - -unsigned s390_chattr_class[] = { -#include <asm-generic/audit_change_attr.h> -~0U -}; - -unsigned s390_write_class[] = { -#include <asm-generic/audit_write.h> -~0U -}; - -unsigned s390_read_class[] = { -#include <asm-generic/audit_read.h> -~0U -}; - -unsigned s390_signal_class[] = { -#include <asm-generic/audit_signal.h> -~0U -}; - -int s390_classify_syscall(unsigned syscall) -{ - switch(syscall) { - case __NR_open: - return 2; - case __NR_openat: - return 3; - case __NR_socketcall: - return 4; - case __NR_execve: - return 5; - default: - return 1; - } -} diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c deleted file mode 100644 index 765fabdada9f..000000000000 --- a/arch/s390/kernel/compat_exec_domain.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Support for 32-bit Linux for S390 personality. - * - * Copyright IBM Corp. 2000 - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * - * - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/personality.h> -#include <linux/sched.h> - -static struct exec_domain s390_exec_domain; - -static int __init s390_init (void) -{ - s390_exec_domain.name = "Linux/s390"; - s390_exec_domain.handler = NULL; - s390_exec_domain.pers_low = PER_LINUX32; - s390_exec_domain.pers_high = PER_LINUX32; - s390_exec_domain.signal_map = default_exec_domain.signal_map; - s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap; - register_exec_domain(&s390_exec_domain); - return 0; -} - -__initcall(s390_init); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c deleted file mode 100644 index 8b6e4f5288a2..000000000000 --- a/arch/s390/kernel/compat_linux.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * S390 version - * Copyright IBM Corp. 2000 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Gerhard Tonn (ton@de.ibm.com) - * Thomas Spatzier (tspat@de.ibm.com) - * - * Conversion between 31bit and 64bit native syscalls. - * - * Heavily inspired by the 32-bit Sparc compat code which is - * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * - */ - - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/file.h> -#include <linux/signal.h> -#include <linux/resource.h> -#include <linux/times.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/uio.h> -#include <linux/quota.h> -#include <linux/module.h> -#include <linux/poll.h> -#include <linux/personality.h> -#include <linux/stat.h> -#include <linux/filter.h> -#include <linux/highmem.h> -#include <linux/highuid.h> -#include <linux/mman.h> -#include <linux/ipv6.h> -#include <linux/in.h> -#include <linux/icmpv6.h> -#include <linux/syscalls.h> -#include <linux/sysctl.h> -#include <linux/binfmts.h> -#include <linux/capability.h> -#include <linux/compat.h> -#include <linux/vfs.h> -#include <linux/ptrace.h> -#include <linux/fadvise.h> -#include <linux/ipc.h> -#include <linux/slab.h> - -#include <asm/types.h> -#include <asm/uaccess.h> - -#include <net/scm.h> -#include <net/sock.h> - -#include "compat_linux.h" - -u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | - PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE | PSW32_ASC_HOME; - -/* For this source file, we want overflow handling. */ - -#undef high2lowuid -#undef high2lowgid -#undef low2highuid -#undef low2highgid -#undef SET_UID16 -#undef SET_GID16 -#undef NEW_TO_OLD_UID -#undef NEW_TO_OLD_GID -#undef SET_OLDSTAT_UID -#undef SET_OLDSTAT_GID -#undef SET_STAT_UID -#undef SET_STAT_GID - -#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid) -#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid) -#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid) -#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid) -#define SET_UID16(var, uid) var = high2lowuid(uid) -#define SET_GID16(var, gid) var = high2lowgid(gid) -#define NEW_TO_OLD_UID(uid) high2lowuid(uid) -#define NEW_TO_OLD_GID(gid) high2lowgid(gid) -#define SET_OLDSTAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) -#define SET_OLDSTAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) -#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) -#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) - -asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group) -{ - return sys_chown(filename, low2highuid(user), low2highgid(group)); -} - -asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group) -{ - return sys_lchown(filename, low2highuid(user), low2highgid(group)); -} - -asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group) -{ - return sys_fchown(fd, low2highuid(user), low2highgid(group)); -} - -asmlinkage long sys32_setregid16(u16 rgid, u16 egid) -{ - return sys_setregid(low2highgid(rgid), low2highgid(egid)); -} - -asmlinkage long sys32_setgid16(u16 gid) -{ - return sys_setgid((gid_t)gid); -} - -asmlinkage long sys32_setreuid16(u16 ruid, u16 euid) -{ - return sys_setreuid(low2highuid(ruid), low2highuid(euid)); -} - -asmlinkage long sys32_setuid16(u16 uid) -{ - return sys_setuid((uid_t)uid); -} - -asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid) -{ - return sys_setresuid(low2highuid(ruid), low2highuid(euid), - low2highuid(suid)); -} - -asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __user *suidp) -{ - const struct cred *cred = current_cred(); - int retval; - u16 ruid, euid, suid; - - ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid)); - euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid)); - suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid)); - - if (!(retval = put_user(ruid, ruidp)) && - !(retval = put_user(euid, euidp))) - retval = put_user(suid, suidp); - - return retval; -} - -asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid) -{ - return sys_setresgid(low2highgid(rgid), low2highgid(egid), - low2highgid(sgid)); -} - -asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __user *sgidp) -{ - const struct cred *cred = current_cred(); - int retval; - u16 rgid, egid, sgid; - - rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid)); - egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid)); - sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid)); - - if (!(retval = put_user(rgid, rgidp)) && - !(retval = put_user(egid, egidp))) - retval = put_user(sgid, sgidp); - - return retval; -} - -asmlinkage long sys32_setfsuid16(u16 uid) -{ - return sys_setfsuid((uid_t)uid); -} - -asmlinkage long sys32_setfsgid16(u16 gid) -{ - return sys_setfsgid((gid_t)gid); -} - -static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) -{ - struct user_namespace *user_ns = current_user_ns(); - int i; - u16 group; - kgid_t kgid; - - for (i = 0; i < group_info->ngroups; i++) { - kgid = GROUP_AT(group_info, i); - group = (u16)from_kgid_munged(user_ns, kgid); - if (put_user(group, grouplist+i)) - return -EFAULT; - } - - return 0; -} - -static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) -{ - struct user_namespace *user_ns = current_user_ns(); - int i; - u16 group; - kgid_t kgid; - - for (i = 0; i < group_info->ngroups; i++) { - if (get_user(group, grouplist+i)) - return -EFAULT; - - kgid = make_kgid(user_ns, (gid_t)group); - if (!gid_valid(kgid)) - return -EINVAL; - - GROUP_AT(group_info, i) = kgid; - } - - return 0; -} - -asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) -{ - int i; - - if (gidsetsize < 0) - return -EINVAL; - - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; - if (gidsetsize) { - if (i > gidsetsize) { - i = -EINVAL; - goto out; - } - if (groups16_to_user(grouplist, current->cred->group_info)) { - i = -EFAULT; - goto out; - } - } -out: - put_group_info(current->cred->group_info); - return i; -} - -asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) -{ - struct group_info *group_info; - int retval; - - if (!capable(CAP_SETGID)) - return -EPERM; - if ((unsigned)gidsetsize > NGROUPS_MAX) - return -EINVAL; - - group_info = groups_alloc(gidsetsize); - if (!group_info) - return -ENOMEM; - retval = groups16_from_user(group_info, grouplist); - if (retval) { - put_group_info(group_info); - return retval; - } - - retval = set_current_groups(group_info); - put_group_info(group_info); - - return retval; -} - -asmlinkage long sys32_getuid16(void) -{ - return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); -} - -asmlinkage long sys32_geteuid16(void) -{ - return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); -} - -asmlinkage long sys32_getgid16(void) -{ - return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); -} - -asmlinkage long sys32_getegid16(void) -{ - return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); -} - -#ifdef CONFIG_SYSVIPC -COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, - unsigned long, third, compat_uptr_t, ptr) -{ - if (call >> 16) /* hack for backward compatibility */ - return -EINVAL; - return compat_sys_ipc(call, first, second, third, ptr, third); -} -#endif - -asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) -{ - if ((int)high < 0) - return -EINVAL; - else - return sys_truncate(path, (high << 32) | low); -} - -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) -{ - if ((int)high < 0) - return -EINVAL; - else - return sys_ftruncate(fd, (high << 32) | low); -} - -asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, - size_t count, u32 poshi, u32 poslo) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); -} - -asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf, - size_t count, u32 poshi, u32 poslo) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); -} - -asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) -{ - return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); -} - -struct stat64_emu31 { - unsigned long long st_dev; - unsigned int __pad1; -#define STAT64_HAS_BROKEN_ST_INO 1 - u32 __st_ino; - unsigned int st_mode; - unsigned int st_nlink; - u32 st_uid; - u32 st_gid; - unsigned long long st_rdev; - unsigned int __pad3; - long st_size; - u32 st_blksize; - unsigned char __pad4[4]; - u32 __pad5; /* future possible st_blocks high bits */ - u32 st_blocks; /* Number 512-byte blocks allocated. */ - u32 st_atime; - u32 __pad6; - u32 st_mtime; - u32 __pad7; - u32 st_ctime; - u32 __pad8; /* will be high 32 bits of ctime someday */ - unsigned long st_ino; -}; - -static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) -{ - struct stat64_emu31 tmp; - - memset(&tmp, 0, sizeof(tmp)); - - tmp.st_dev = huge_encode_dev(stat->dev); - tmp.st_ino = stat->ino; - tmp.__st_ino = (u32)stat->ino; - tmp.st_mode = stat->mode; - tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); - tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); - tmp.st_rdev = huge_encode_dev(stat->rdev); - tmp.st_size = stat->size; - tmp.st_blksize = (u32)stat->blksize; - tmp.st_blocks = (u32)stat->blocks; - tmp.st_atime = (u32)stat->atime.tv_sec; - tmp.st_mtime = (u32)stat->mtime.tv_sec; - tmp.st_ctime = (u32)stat->ctime.tv_sec; - - return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; -} - -asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf) -{ - struct kstat stat; - int ret = vfs_stat(filename, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf) -{ - struct kstat stat; - int ret = vfs_lstat(filename, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf) -{ - struct kstat stat; - int ret = vfs_fstat(fd, &stat); - if (!ret) - ret = cp_stat64(statbuf, &stat); - return ret; -} - -asmlinkage long sys32_fstatat64(unsigned int dfd, const char __user *filename, - struct stat64_emu31 __user* statbuf, int flag) -{ - struct kstat stat; - int error; - - error = vfs_fstatat(dfd, filename, &stat, flag); - if (error) - return error; - return cp_stat64(statbuf, &stat); -} - -/* - * Linux/i386 didn't use to be able to handle more than - * 4 system call parameters, so these system calls used a memory - * block for parameter passing.. - */ - -struct mmap_arg_struct_emu31 { - compat_ulong_t addr; - compat_ulong_t len; - compat_ulong_t prot; - compat_ulong_t flags; - compat_ulong_t fd; - compat_ulong_t offset; -}; - -asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) -{ - struct mmap_arg_struct_emu31 a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - if (a.offset & ~PAGE_MASK) - return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); -} - -asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) -{ - struct mmap_arg_struct_emu31 a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -} - -asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - - return sys_read(fd, buf, count); -} - -asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t count) -{ - if ((compat_ssize_t) count < 0) - return -EINVAL; - - return sys_write(fd, buf, count); -} - -/* - * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64. - * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE} - * because the 31 bit values differ from the 64 bit values. - */ - -asmlinkage long -sys32_fadvise64(int fd, loff_t offset, size_t len, int advise) -{ - if (advise == 4) - advise = POSIX_FADV_DONTNEED; - else if (advise == 5) - advise = POSIX_FADV_NOREUSE; - return sys_fadvise64(fd, offset, len, advise); -} - -struct fadvise64_64_args { - int fd; - long long offset; - long long len; - int advice; -}; - -asmlinkage long -sys32_fadvise64_64(struct fadvise64_64_args __user *args) -{ - struct fadvise64_64_args a; - - if ( copy_from_user(&a, args, sizeof(a)) ) - return -EFAULT; - if (a.advice == 4) - a.advice = POSIX_FADV_DONTNEED; - else if (a.advice == 5) - a.advice = POSIX_FADV_NOREUSE; - return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); -} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h deleted file mode 100644 index 976518c0592a..000000000000 --- a/arch/s390/kernel/compat_linux.h +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef _ASM_S390X_S390_H -#define _ASM_S390X_S390_H - -#include <linux/compat.h> -#include <linux/socket.h> -#include <linux/syscalls.h> - -/* Macro that masks the high order bit of an 32 bit pointer and converts it*/ -/* to a 64 bit pointer */ -#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL)) -#define AA(__x) \ - ((unsigned long)(__x)) - -/* Now 32bit compatibility types */ -struct ipc_kludge_32 { - __u32 msgp; /* pointer */ - __s32 msgtyp; -}; - -/* asm/sigcontext.h */ -typedef union -{ - __u64 d; - __u32 f; -} freg_t32; - -typedef struct -{ - unsigned int fpc; - freg_t32 fprs[__NUM_FPRS]; -} _s390_fp_regs32; - -typedef struct -{ - __u32 mask; - __u32 addr; -} _psw_t32 __attribute__ ((aligned(8))); - -typedef struct -{ - _psw_t32 psw; - __u32 gprs[__NUM_GPRS]; - __u32 acrs[__NUM_ACRS]; -} _s390_regs_common32; - -typedef struct -{ - _s390_regs_common32 regs; - _s390_fp_regs32 fpregs; -} _sigregs32; - -#define _SIGCONTEXT_NSIG32 64 -#define _SIGCONTEXT_NSIG_BPW32 32 -#define __SIGNAL_FRAMESIZE32 96 -#define _SIGMASK_COPY_SIZE32 (sizeof(u32)*2) - -struct sigcontext32 -{ - __u32 oldmask[_COMPAT_NSIG_WORDS]; - __u32 sregs; /* pointer */ -}; - -/* asm/signal.h */ - -/* asm/ucontext.h */ -struct ucontext32 { - __u32 uc_flags; - __u32 uc_link; /* pointer */ - compat_stack_t uc_stack; - _sigregs32 uc_mcontext; - compat_sigset_t uc_sigmask; /* mask last for extensibility */ -}; - -struct stat64_emu31; -struct mmap_arg_struct_emu31; -struct fadvise64_64_args; - -long sys32_chown16(const char __user * filename, u16 user, u16 group); -long sys32_lchown16(const char __user * filename, u16 user, u16 group); -long sys32_fchown16(unsigned int fd, u16 user, u16 group); -long sys32_setregid16(u16 rgid, u16 egid); -long sys32_setgid16(u16 gid); -long sys32_setreuid16(u16 ruid, u16 euid); -long sys32_setuid16(u16 uid); -long sys32_setresuid16(u16 ruid, u16 euid, u16 suid); -long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); -long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid); -long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); -long sys32_setfsuid16(u16 uid); -long sys32_setfsgid16(u16 gid); -long sys32_getgroups16(int gidsetsize, u16 __user *grouplist); -long sys32_setgroups16(int gidsetsize, u16 __user *grouplist); -long sys32_getuid16(void); -long sys32_geteuid16(void); -long sys32_getgid16(void); -long sys32_getegid16(void); -long sys32_truncate64(const char __user * path, unsigned long high, - unsigned long low); -long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low); -long sys32_init_module(void __user *umod, unsigned long len, - const char __user *uargs); -long sys32_delete_module(const char __user *name_user, unsigned int flags); -long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, - u32 poshi, u32 poslo); -long sys32_pwrite64(unsigned int fd, const char __user *ubuf, - size_t count, u32 poshi, u32 poslo); -compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count); -long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf); -long sys32_lstat64(const char __user * filename, - struct stat64_emu31 __user * statbuf); -long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf); -long sys32_fstatat64(unsigned int dfd, const char __user *filename, - struct stat64_emu31 __user* statbuf, int flag); -unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg); -long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg); -long sys32_read(unsigned int fd, char __user * buf, size_t count); -long sys32_write(unsigned int fd, const char __user * buf, size_t count); -long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise); -long sys32_fadvise64_64(struct fadvise64_64_args __user *args); -#endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h deleted file mode 100644 index 12b823833510..000000000000 --- a/arch/s390/kernel/compat_ptrace.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef _PTRACE32_H -#define _PTRACE32_H - -#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ -#include "compat_linux.h" /* needed for psw_compat_t */ - -struct compat_per_struct_kernel { - __u32 cr9; /* PER control bits */ - __u32 cr10; /* PER starting address */ - __u32 cr11; /* PER ending address */ - __u32 bits; /* Obsolete software bits */ - __u32 starting_addr; /* User specified start address */ - __u32 ending_addr; /* User specified end address */ - __u16 perc_atmid; /* PER trap ATMID */ - __u32 address; /* PER trap instruction address */ - __u8 access_id; /* PER trap access identification */ -}; - -struct compat_user_regs_struct -{ - psw_compat_t psw; - u32 gprs[NUM_GPRS]; - u32 acrs[NUM_ACRS]; - u32 orig_gpr2; - /* nb: there's a 4-byte hole here */ - s390_fp_regs fp_regs; - /* - * These per registers are in here so that gdb can modify them - * itself as there is no "official" ptrace interface for hardware - * watchpoints. This is the way intel does it. - */ - struct compat_per_struct_kernel per_info; - u32 ieee_instruction_pointer; /* obsolete, always 0 */ -}; - -struct compat_user { - /* We start with the registers, to mimic the way that "memory" - is returned from the ptrace(3,...) function. */ - struct compat_user_regs_struct regs; - /* The rest of this junk is to help gdb figure out what goes where */ - u32 u_tsize; /* Text segment size (pages). */ - u32 u_dsize; /* Data segment size (pages). */ - u32 u_ssize; /* Stack segment size (pages). */ - u32 start_code; /* Starting virtual address of text. */ - u32 start_stack; /* Starting virtual address of stack area. - This is actually the bottom of the stack, - the top of the stack is always found in the - esp register. */ - s32 signal; /* Signal that caused the core dump. */ - u32 u_ar0; /* Used by gdb to help find the values for */ - /* the registers. */ - u32 magic; /* To uniquely identify a core file */ - char u_comm[32]; /* User command that was responsible */ -}; - -typedef struct -{ - __u32 len; - __u32 kernel_addr; - __u32 process_addr; -} compat_ptrace_area; - -#endif /* _PTRACE32_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c deleted file mode 100644 index c439ac9ced09..000000000000 --- a/arch/s390/kernel/compat_signal.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright IBM Corp. 2000, 2006 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) - * Gerhard Tonn (ton@de.ibm.com) - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson - */ - -#include <linux/compat.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/ptrace.h> -#include <linux/unistd.h> -#include <linux/stddef.h> -#include <linux/tty.h> -#include <linux/personality.h> -#include <linux/binfmts.h> -#include <asm/ucontext.h> -#include <asm/uaccess.h> -#include <asm/lowcore.h> -#include <asm/switch_to.h> -#include "compat_linux.h" -#include "compat_ptrace.h" -#include "entry.h" - -typedef struct -{ - __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - struct sigcontext32 sc; - _sigregs32 sregs; - int signo; - __u32 gprs_high[NUM_GPRS]; - __u8 retcode[S390_SYSCALL_SIZE]; -} sigframe32; - -typedef struct -{ - __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - __u8 retcode[S390_SYSCALL_SIZE]; - compat_siginfo_t info; - struct ucontext32 uc; - __u32 gprs_high[NUM_GPRS]; -} rt_sigframe32; - -int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) -{ - int err; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. - This routine must convert siginfo from 64bit to 32bit as well - at the same time. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); - if (from->si_code < 0) - err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (from->si_code >> 16) { - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: - err |= __put_user(from->si_int, &to->si_int); - /* fallthrough */ - case __SI_KILL >> 16: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; - case __SI_CHLD >> 16: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - break; - case __SI_FAULT >> 16: - err |= __put_user((unsigned long) from->si_addr, - &to->si_addr); - break; - case __SI_POLL >> 16: - err |= __put_user(from->si_band, &to->si_band); - err |= __put_user(from->si_fd, &to->si_fd); - break; - case __SI_TIMER >> 16: - err |= __put_user(from->si_tid, &to->si_tid); - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(from->si_int, &to->si_int); - break; - default: - break; - } - } - return err; -} - -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - int err; - u32 tmp; - - err = __get_user(to->si_signo, &from->si_signo); - err |= __get_user(to->si_errno, &from->si_errno); - err |= __get_user(to->si_code, &from->si_code); - - if (to->si_code < 0) - err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (to->si_code >> 16) { - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: - err |= __get_user(to->si_int, &from->si_int); - /* fallthrough */ - case __SI_KILL >> 16: - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - break; - case __SI_CHLD >> 16: - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - err |= __get_user(to->si_utime, &from->si_utime); - err |= __get_user(to->si_stime, &from->si_stime); - err |= __get_user(to->si_status, &from->si_status); - break; - case __SI_FAULT >> 16: - err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __force __user *) - (u64) (tmp & PSW32_ADDR_INSN); - break; - case __SI_POLL >> 16: - err |= __get_user(to->si_band, &from->si_band); - err |= __get_user(to->si_fd, &from->si_fd); - break; - case __SI_TIMER >> 16: - err |= __get_user(to->si_tid, &from->si_tid); - err |= __get_user(to->si_overrun, &from->si_overrun); - err |= __get_user(to->si_int, &from->si_int); - break; - default: - break; - } - } - return err; -} - -static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) -{ - _s390_regs_common32 regs32; - int err, i; - - regs32.psw.mask = psw32_user_bits | - ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); - regs32.psw.addr = (__u32) regs->psw.addr | - (__u32)(regs->psw.mask & PSW_MASK_BA); - for (i = 0; i < NUM_GPRS; i++) - regs32.gprs[i] = (__u32) regs->gprs[i]; - save_access_regs(current->thread.acrs); - memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs)); - err = __copy_to_user(&sregs->regs, ®s32, sizeof(regs32)); - if (err) - return err; - save_fp_regs(¤t->thread.fp_regs); - /* s390_fp_regs and _s390_fp_regs32 are the same ! */ - return __copy_to_user(&sregs->fpregs, ¤t->thread.fp_regs, - sizeof(_s390_fp_regs32)); -} - -static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) -{ - _s390_regs_common32 regs32; - int err, i; - - /* Alwys make any pending restarted system call return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; - - err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); - if (err) - return err; - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | - (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); - /* Check for invalid user address space control. */ - if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC)) - regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); - for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = (__u64) regs32.gprs[i]; - memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); - - err = __copy_from_user(¤t->thread.fp_regs, &sregs->fpregs, - sizeof(_s390_fp_regs32)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; - if (err) - return err; - - restore_fp_regs(¤t->thread.fp_regs); - clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ - return 0; -} - -static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) -{ - __u32 gprs_high[NUM_GPRS]; - int i; - - for (i = 0; i < NUM_GPRS; i++) - gprs_high[i] = regs->gprs[i] >> 32; - - return __copy_to_user(uregs, &gprs_high, sizeof(gprs_high)); -} - -static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) -{ - __u32 gprs_high[NUM_GPRS]; - int err, i; - - err = __copy_from_user(&gprs_high, uregs, sizeof(gprs_high)); - if (err) - return err; - for (i = 0; i < NUM_GPRS; i++) - *(__u32 *)®s->gprs[i] = gprs_high[i]; - return 0; -} - -asmlinkage long sys32_sigreturn(void) -{ - struct pt_regs *regs = task_pt_regs(current); - sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; - sigset_t set; - - if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) - goto badframe; - set_current_blocked(&set); - if (restore_sigregs32(regs, &frame->sregs)) - goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) - goto badframe; - return regs->gprs[2]; -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -asmlinkage long sys32_rt_sigreturn(void) -{ - struct pt_regs *regs = task_pt_regs(current); - rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; - sigset_t set; - - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) - goto badframe; - set_current_blocked(&set); - if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) - goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) - goto badframe; - if (compat_restore_altstack(&frame->uc.uc_stack)) - goto badframe; - return regs->gprs[2]; -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -/* - * Set up a signal frame. - */ - - -/* - * Determine which stack to use.. - */ -static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) -{ - unsigned long sp; - - /* Default to using normal stack */ - sp = (unsigned long) A(regs->gprs[15]); - - /* Overflow on alternate signal stack gives SIGSEGV. */ - if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) - return (void __user *) -1UL; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (! sas_ss_flags(sp)) - sp = current->sas_ss_sp + current->sas_ss_size; - } - - return (void __user *)((sp - frame_size) & -8ul); -} - -static inline int map_signal(int sig) -{ - if (current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32) - return current_thread_info()->exec_domain->signal_invmap[sig]; - else - return sig; -} - -static int setup_frame32(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) -{ - sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32)); - - if (frame == (void __user *) -1UL) - goto give_sigsegv; - - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) - goto give_sigsegv; - - if (save_sigregs32(regs, &frame->sregs)) - goto give_sigsegv; - if (save_sigregs_gprs_high(regs, frame->gprs_high)) - goto give_sigsegv; - if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; - } else { - regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __force __user *)(frame->retcode))) - goto give_sigsegv; - } - - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->gprs[15] = (__force __u64) frame; - /* Force 31 bit amode and default user address space control. */ - regs->psw.mask = PSW_MASK_BA | - (psw_user_bits & PSW_MASK_ASC) | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__force __u64) ka->sa.sa_handler; - - regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__force __u64) &frame->sc; - - /* We forgot to include these in the sigcontext. - To avoid breaking binary compatibility, they are passed as args. */ - if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || - sig == SIGTRAP || sig == SIGFPE) { - /* set extra registers only for synchronous signals */ - regs->gprs[4] = regs->int_code & 127; - regs->gprs[5] = regs->int_parm_long; - regs->gprs[6] = task_thread_info(current)->last_break; - } - - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) - goto give_sigsegv; - return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; -} - -static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) -{ - int err = 0; - rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32)); - - if (frame == (void __user *) -1UL) - goto give_sigsegv; - - if (copy_siginfo_to_user32(&frame->info, info)) - goto give_sigsegv; - - /* Create the ucontext. */ - err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs32(regs, &frame->uc.uc_mcontext); - err |= save_sigregs_gprs_high(regs, frame->gprs_high); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; - } else { - regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; - err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __force __user *)(frame->retcode)); - } - - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->gprs[15] = (__force __u64) frame; - /* Force 31 bit amode and default user address space control. */ - regs->psw.mask = PSW_MASK_BA | - (psw_user_bits & PSW_MASK_ASC) | - (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (__u64) ka->sa.sa_handler; - - regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__force __u64) &frame->info; - regs->gprs[4] = (__force __u64) &frame->uc; - regs->gprs[5] = task_thread_info(current)->last_break; - return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; -} - -/* - * OK, we're invoking a handler - */ - -void handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) -{ - int ret; - - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame32(sig, ka, info, oldset, regs); - else - ret = setup_frame32(sig, ka, oldset, regs); - if (ret) - return; - signal_delivered(sig, info, ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); -} - diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S deleted file mode 100644 index 9cb1b975b353..000000000000 --- a/arch/s390/kernel/compat_wrapper.S +++ /dev/null @@ -1,1414 +0,0 @@ -/* -* wrapper for 31 bit compatible system calls. -* -* Copyright IBM Corp. 2000, 2006 -* Author(s): Gerhard Tonn (ton@de.ibm.com), -* Thomas Spatzier (tspat@de.ibm.com) -*/ - -#include <linux/linkage.h> - -ENTRY(sys32_exit_wrapper) - lgfr %r2,%r2 # int - jg sys_exit # branch to sys_exit - -ENTRY(sys32_read_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys32_read # branch to sys_read - -ENTRY(sys32_write_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - jg sys32_write # branch to system call - -ENTRY(sys32_close_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_close # branch to system call - -ENTRY(sys32_creat_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_creat # branch to system call - -ENTRY(sys32_link_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_link # branch to system call - -ENTRY(sys32_unlink_wrapper) - llgtr %r2,%r2 # const char * - jg sys_unlink # branch to system call - -ENTRY(sys32_chdir_wrapper) - llgtr %r2,%r2 # const char * - jg sys_chdir # branch to system call - -ENTRY(sys32_time_wrapper) - llgtr %r2,%r2 # int * - jg compat_sys_time # branch to system call - -ENTRY(sys32_mknod_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # dev - jg sys_mknod # branch to system call - -ENTRY(sys32_chmod_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # mode_t - jg sys_chmod # branch to system call - -ENTRY(sys32_lchown16_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_lchown16 # branch to system call - -#sys32_getpid_wrapper # void - -ENTRY(sys32_mount_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # char * - llgfr %r5,%r5 # unsigned long - llgtr %r6,%r6 # void * - jg compat_sys_mount # branch to system call - -ENTRY(sys32_oldumount_wrapper) - llgtr %r2,%r2 # char * - jg sys_oldumount # branch to system call - -ENTRY(sys32_setuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setuid16 # branch to system call - -#sys32_getuid16_wrapper # void - -ENTRY(sys32_ptrace_wrapper) - lgfr %r2,%r2 # long - lgfr %r3,%r3 # long - llgtr %r4,%r4 # long - llgfr %r5,%r5 # long - jg compat_sys_ptrace # branch to system call - -ENTRY(sys32_alarm_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_alarm # branch to system call - -ENTRY(compat_sys_utime_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_utimbuf * - jg compat_sys_utime # branch to system call - -ENTRY(sys32_access_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_access # branch to system call - -ENTRY(sys32_nice_wrapper) - lgfr %r2,%r2 # int - jg sys_nice # branch to system call - -#sys32_sync_wrapper # void - -ENTRY(sys32_kill_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_kill # branch to system call - -ENTRY(sys32_rename_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_rename # branch to system call - -ENTRY(sys32_mkdir_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_mkdir # branch to system call - -ENTRY(sys32_rmdir_wrapper) - llgtr %r2,%r2 # const char * - jg sys_rmdir # branch to system call - -ENTRY(sys32_dup_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_dup # branch to system call - -ENTRY(sys32_pipe_wrapper) - llgtr %r2,%r2 # u32 * - jg sys_pipe # branch to system call - -ENTRY(compat_sys_times_wrapper) - llgtr %r2,%r2 # struct compat_tms * - jg compat_sys_times # branch to system call - -ENTRY(sys32_brk_wrapper) - llgtr %r2,%r2 # unsigned long - jg sys_brk # branch to system call - -ENTRY(sys32_setgid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setgid16 # branch to system call - -#sys32_getgid16_wrapper # void - -ENTRY(sys32_signal_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __sighandler_t - jg sys_signal - -#sys32_geteuid16_wrapper # void - -#sys32_getegid16_wrapper # void - -ENTRY(sys32_acct_wrapper) - llgtr %r2,%r2 # char * - jg sys_acct # branch to system call - -ENTRY(sys32_umount_wrapper) - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_umount # branch to system call - -ENTRY(compat_sys_ioctl_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned int - jg compat_sys_ioctl # branch to system call - -ENTRY(compat_sys_fcntl_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl # branch to system call - -ENTRY(sys32_setpgid_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - jg sys_setpgid # branch to system call - -ENTRY(sys32_umask_wrapper) - lgfr %r2,%r2 # int - jg sys_umask # branch to system call - -ENTRY(sys32_chroot_wrapper) - llgtr %r2,%r2 # char * - jg sys_chroot # branch to system call - -ENTRY(sys32_ustat_wrapper) - llgfr %r2,%r2 # dev_t - llgtr %r3,%r3 # struct ustat * - jg compat_sys_ustat - -ENTRY(sys32_dup2_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_dup2 # branch to system call - -#sys32_getppid_wrapper # void - -#sys32_getpgrp_wrapper # void - -#sys32_setsid_wrapper # void - -ENTRY(sys32_setreuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - jg sys32_setreuid16 # branch to system call - -ENTRY(sys32_setregid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - jg sys32_setregid16 # branch to system call - -ENTRY(sys_sigsuspend_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # old_sigset_t - jg sys_sigsuspend - -ENTRY(compat_sys_sigpending_wrapper) - llgtr %r2,%r2 # compat_old_sigset_t * - jg compat_sys_sigpending # branch to system call - -ENTRY(sys32_sethostname_wrapper) - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_sethostname # branch to system call - -ENTRY(compat_sys_setrlimit_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_setrlimit # branch to system call - -ENTRY(compat_sys_old_getrlimit_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_old_getrlimit # branch to system call - -ENTRY(compat_sys_getrlimit_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_getrlimit # branch to system call - -ENTRY(sys32_mmap2_wrapper) - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg sys32_mmap2 # branch to system call - -ENTRY(compat_sys_gettimeofday_wrapper) - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg compat_sys_gettimeofday # branch to system call - -ENTRY(compat_sys_settimeofday_wrapper) - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg compat_sys_settimeofday # branch to system call - -ENTRY(sys32_getgroups16_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_getgroups16 # branch to system call - -ENTRY(sys32_setgroups16_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_setgroups16 # branch to system call - -ENTRY(sys32_symlink_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_symlink # branch to system call - -ENTRY(sys32_readlink_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_readlink # branch to system call - -ENTRY(sys32_uselib_wrapper) - llgtr %r2,%r2 # const char * - jg sys_uselib # branch to system call - -ENTRY(sys32_swapon_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_swapon # branch to system call - -ENTRY(sys32_reboot_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # unsigned int - llgtr %r5,%r5 # void * - jg sys_reboot # branch to system call - -ENTRY(old32_readdir_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_old_readdir # branch to system call - -ENTRY(old32_mmap_wrapper) - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg old32_mmap # branch to system call - -ENTRY(sys32_munmap_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munmap # branch to system call - -ENTRY(sys32_fchmod_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # mode_t - jg sys_fchmod # branch to system call - -ENTRY(sys32_fchown16_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # compat_uid_t - llgfr %r4,%r4 # compat_uid_t - jg sys32_fchown16 # branch to system call - -ENTRY(sys32_getpriority_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_getpriority # branch to system call - -ENTRY(sys32_setpriority_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_setpriority # branch to system call - -ENTRY(compat_sys_statfs_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_statfs # branch to system call - -ENTRY(compat_sys_fstatfs_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_fstatfs # branch to system call - -ENTRY(compat_sys_socketcall_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # u32 * - jg compat_sys_socketcall # branch to system call - -ENTRY(sys32_syslog_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_syslog # branch to system call - -ENTRY(compat_sys_newstat_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newstat # branch to system call - -ENTRY(compat_sys_newlstat_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newlstat # branch to system call - -ENTRY(compat_sys_newfstat_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newfstat # branch to system call - -#sys32_vhangup_wrapper # void - -ENTRY(sys32_swapoff_wrapper) - llgtr %r2,%r2 # const char * - jg sys_swapoff # branch to system call - -ENTRY(compat_sys_sysinfo_wrapper) - llgtr %r2,%r2 # struct sysinfo_emu31 * - jg compat_sys_sysinfo # branch to system call - -ENTRY(sys32_fsync_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_fsync # branch to system call - -#sys32_sigreturn_wrapper # done in sigreturn_glue - -#sys32_clone_wrapper # done in clone_glue - -ENTRY(sys32_setdomainname_wrapper) - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_setdomainname # branch to system call - -ENTRY(sys32_newuname_wrapper) - llgtr %r2,%r2 # struct new_utsname * - jg sys_newuname # branch to system call - -ENTRY(compat_sys_adjtimex_wrapper) - llgtr %r2,%r2 # struct compat_timex * - jg compat_sys_adjtimex # branch to system call - -ENTRY(sys32_mprotect_wrapper) - llgtr %r2,%r2 # unsigned long (actually pointer - llgfr %r3,%r3 # size_t - llgfr %r4,%r4 # unsigned long - jg sys_mprotect # branch to system call - -ENTRY(sys_init_module_wrapper) - llgtr %r2,%r2 # void * - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # char * - jg sys_init_module # branch to system call - -ENTRY(sys_delete_module_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned int - jg sys_delete_module # branch to system call - -ENTRY(sys32_quotactl_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # qid_t - llgtr %r5,%r5 # caddr_t - jg sys_quotactl # branch to system call - -ENTRY(sys32_getpgid_wrapper) - lgfr %r2,%r2 # pid_t - jg sys_getpgid # branch to system call - -ENTRY(sys32_fchdir_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_fchdir # branch to system call - -ENTRY(sys32_bdflush_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # long - jg sys_bdflush # branch to system call - -ENTRY(sys32_sysfs_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys_sysfs # branch to system call - -ENTRY(sys32_personality_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_s390_personality # branch to system call - -ENTRY(sys32_setfsuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setfsuid16 # branch to system call - -ENTRY(sys32_setfsgid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setfsgid16 # branch to system call - -ENTRY(sys32_llseek_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # unsigned int - jg sys_llseek # branch to system call - -ENTRY(sys32_getdents_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_getdents # branch to system call - -ENTRY(compat_sys_select_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_fd_set * - llgtr %r4,%r4 # compat_fd_set * - llgtr %r5,%r5 # compat_fd_set * - llgtr %r6,%r6 # struct compat_timeval * - jg compat_sys_select # branch to system call - -ENTRY(sys32_flock_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_flock # branch to system call - -ENTRY(sys32_msync_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_msync # branch to system call - -ENTRY(compat_sys_readv_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_readv # branch to system call - -ENTRY(compat_sys_writev_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_writev # branch to system call - -ENTRY(sys32_getsid_wrapper) - lgfr %r2,%r2 # pid_t - jg sys_getsid # branch to system call - -ENTRY(sys32_fdatasync_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_fdatasync # branch to system call - -ENTRY(sys32_mlock_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_mlock # branch to system call - -ENTRY(sys32_munlock_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munlock # branch to system call - -ENTRY(sys32_mlockall_wrapper) - lgfr %r2,%r2 # int - jg sys_mlockall # branch to system call - -#sys32_munlockall_wrapper # void - -ENTRY(sys32_sched_setparam_wrapper) - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_setparam # branch to system call - -ENTRY(sys32_sched_getparam_wrapper) - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_getparam # branch to system call - -ENTRY(sys32_sched_setscheduler_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct sched_param * - jg sys_sched_setscheduler # branch to system call - -ENTRY(sys32_sched_getscheduler_wrapper) - lgfr %r2,%r2 # pid_t - jg sys_sched_getscheduler # branch to system call - -#sys32_sched_yield_wrapper # void - -ENTRY(sys32_sched_get_priority_max_wrapper) - lgfr %r2,%r2 # int - jg sys_sched_get_priority_max # branch to system call - -ENTRY(sys32_sched_get_priority_min_wrapper) - lgfr %r2,%r2 # int - jg sys_sched_get_priority_min # branch to system call - -ENTRY(compat_sys_nanosleep_wrapper) - llgtr %r2,%r2 # struct compat_timespec * - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_nanosleep # branch to system call - -ENTRY(sys32_mremap_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_mremap # branch to system call - -ENTRY(sys32_setresuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_setresuid16 # branch to system call - -ENTRY(sys32_getresuid16_wrapper) - llgtr %r2,%r2 # __kernel_old_uid_emu31_t * - llgtr %r3,%r3 # __kernel_old_uid_emu31_t * - llgtr %r4,%r4 # __kernel_old_uid_emu31_t * - jg sys32_getresuid16 # branch to system call - -ENTRY(sys32_poll_wrapper) - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # int - jg sys_poll # branch to system call - -ENTRY(sys32_setresgid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_setresgid16 # branch to system call - -ENTRY(sys32_getresgid16_wrapper) - llgtr %r2,%r2 # __kernel_old_gid_emu31_t * - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - llgtr %r4,%r4 # __kernel_old_gid_emu31_t * - jg sys32_getresgid16 # branch to system call - -ENTRY(sys32_prctl_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_prctl # branch to system call - -#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue - -ENTRY(sys32_pread64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pread64 # branch to system call - -ENTRY(sys32_pwrite64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pwrite64 # branch to system call - -ENTRY(sys32_chown16_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_chown16 # branch to system call - -ENTRY(sys32_getcwd_wrapper) - llgtr %r2,%r2 # char * - llgfr %r3,%r3 # unsigned long - jg sys_getcwd # branch to system call - -ENTRY(sys32_capget_wrapper) - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # cap_user_data_t - jg sys_capget # branch to system call - -ENTRY(sys32_capset_wrapper) - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # const cap_user_data_t - jg sys_capset # branch to system call - -#sys32_vfork_wrapper # done in vfork_glue - -ENTRY(sys32_truncate64_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_truncate64 # branch to system call - -ENTRY(sys32_ftruncate64_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_ftruncate64 # branch to system call - -ENTRY(sys32_lchown_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_lchown # branch to system call - -#sys32_getuid_wrapper # void -#sys32_getgid_wrapper # void -#sys32_geteuid_wrapper # void -#sys32_getegid_wrapper # void - -ENTRY(sys32_setreuid_wrapper) - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - jg sys_setreuid # branch to system call - -ENTRY(sys32_setregid_wrapper) - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - jg sys_setregid # branch to system call - -ENTRY(sys32_getgroups_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_getgroups # branch to system call - -ENTRY(sys32_setgroups_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_setgroups # branch to system call - -ENTRY(sys32_fchown_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_fchown # branch to system call - -ENTRY(sys32_setresuid_wrapper) - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # uid_t - jg sys_setresuid # branch to system call - -ENTRY(sys32_getresuid_wrapper) - llgtr %r2,%r2 # uid_t * - llgtr %r3,%r3 # uid_t * - llgtr %r4,%r4 # uid_t * - jg sys_getresuid # branch to system call - -ENTRY(sys32_setresgid_wrapper) - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - llgfr %r4,%r4 # gid_t - jg sys_setresgid # branch to system call - -ENTRY(sys32_getresgid_wrapper) - llgtr %r2,%r2 # gid_t * - llgtr %r3,%r3 # gid_t * - llgtr %r4,%r4 # gid_t * - jg sys_getresgid # branch to system call - -ENTRY(sys32_chown_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_chown # branch to system call - -ENTRY(sys32_setuid_wrapper) - llgfr %r2,%r2 # uid_t - jg sys_setuid # branch to system call - -ENTRY(sys32_setgid_wrapper) - llgfr %r2,%r2 # gid_t - jg sys_setgid # branch to system call - -ENTRY(sys32_setfsuid_wrapper) - llgfr %r2,%r2 # uid_t - jg sys_setfsuid # branch to system call - -ENTRY(sys32_setfsgid_wrapper) - llgfr %r2,%r2 # gid_t - jg sys_setfsgid # branch to system call - -ENTRY(sys32_pivot_root_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_pivot_root # branch to system call - -ENTRY(sys32_mincore_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - llgtr %r4,%r4 # unsigned char * - jg sys_mincore # branch to system call - -ENTRY(sys32_madvise_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_madvise # branch to system call - -ENTRY(sys32_getdents64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg sys_getdents64 # branch to system call - -ENTRY(compat_sys_fcntl64_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl64 # branch to system call - -ENTRY(sys32_stat64_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_stat64 # branch to system call - -ENTRY(sys32_lstat64_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_lstat64 # branch to system call - -ENTRY(sys32_stime_wrapper) - llgtr %r2,%r2 # long * - jg compat_sys_stime # branch to system call - -ENTRY(sys32_fstat64_wrapper) - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # struct stat64 * - jg sys32_fstat64 # branch to system call - -ENTRY(sys32_setxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_setxattr - -ENTRY(sys32_lsetxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_lsetxattr - -ENTRY(sys32_fsetxattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_fsetxattr - -ENTRY(sys32_getxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_getxattr - -ENTRY(sys32_lgetxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_lgetxattr - -ENTRY(sys32_fgetxattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_fgetxattr - -ENTRY(sys32_listxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_listxattr - -ENTRY(sys32_llistxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_llistxattr - -ENTRY(sys32_flistxattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_flistxattr - -ENTRY(sys32_removexattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_removexattr - -ENTRY(sys32_lremovexattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_lremovexattr - -ENTRY(sys32_fremovexattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - jg sys_fremovexattr - -ENTRY(sys32_sched_setaffinity_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_setaffinity - -ENTRY(sys32_sched_getaffinity_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_getaffinity - -ENTRY(sys32_exit_group_wrapper) - lgfr %r2,%r2 # int - jg sys_exit_group # branch to system call - -ENTRY(sys32_set_tid_address_wrapper) - llgtr %r2,%r2 # int * - jg sys_set_tid_address # branch to system call - -ENTRY(sys_epoll_create_wrapper) - lgfr %r2,%r2 # int - jg sys_epoll_create # branch to system call - -ENTRY(sys_epoll_ctl_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct epoll_event * - jg sys_epoll_ctl # branch to system call - -ENTRY(sys_epoll_wait_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - jg sys_epoll_wait # branch to system call - -ENTRY(sys32_fadvise64_wrapper) - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - llgfr %r4,%r5 # size_t (unsigned long) - lgfr %r5,%r6 # int - jg sys32_fadvise64 - -ENTRY(sys32_fadvise64_64_wrapper) - llgtr %r2,%r2 # struct fadvise64_64_args * - jg sys32_fadvise64_64 - -ENTRY(sys32_clock_settime_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_settime - -ENTRY(sys32_clock_gettime_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_gettime - -ENTRY(sys32_clock_getres_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_getres - -ENTRY(sys32_clock_nanosleep_wrapper) - lgfr %r2,%r2 # clockid_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_timespec * - llgtr %r5,%r5 # struct compat_timespec * - jg compat_sys_clock_nanosleep - -ENTRY(sys32_timer_create_wrapper) - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_sigevent * - llgtr %r4,%r4 # timer_t * - jg compat_sys_timer_create - -ENTRY(sys32_timer_settime_wrapper) - lgfr %r2,%r2 # timer_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_itimerspec * - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timer_settime - -ENTRY(sys32_timer_gettime_wrapper) - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_itimerspec * - jg compat_sys_timer_gettime - -ENTRY(sys32_timer_getoverrun_wrapper) - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_getoverrun - -ENTRY(sys32_timer_delete_wrapper) - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_delete - -ENTRY(sys32_io_setup_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # u32 * - jg compat_sys_io_setup - -ENTRY(sys32_io_destroy_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - jg sys_io_destroy - -ENTRY(sys32_io_getevents_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - lgfr %r4,%r4 # long - llgtr %r5,%r5 # struct io_event * - llgtr %r6,%r6 # struct compat_timespec * - jg compat_sys_io_getevents - -ENTRY(sys32_io_submit_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - llgtr %r4,%r4 # struct iocb ** - jg compat_sys_io_submit - -ENTRY(sys32_io_cancel_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - llgtr %r3,%r3 # struct iocb * - llgtr %r4,%r4 # struct io_event * - jg sys_io_cancel - -ENTRY(compat_sys_statfs64_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_statfs64 - -ENTRY(compat_sys_fstatfs64_wrapper) - llgfr %r2,%r2 # unsigned int fd - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_fstatfs64 - -ENTRY(compat_sys_mq_open_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # mode_t - llgtr %r5,%r5 # struct compat_mq_attr * - jg compat_sys_mq_open - -ENTRY(sys32_mq_unlink_wrapper) - llgtr %r2,%r2 # const char * - jg sys_mq_unlink - -ENTRY(compat_sys_mq_timedsend_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedsend - -ENTRY(compat_sys_mq_timedreceive_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgtr %r5,%r5 # unsigned int * - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedreceive - -ENTRY(compat_sys_mq_notify_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_sigevent * - jg compat_sys_mq_notify - -ENTRY(compat_sys_mq_getsetattr_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_mq_attr * - llgtr %r4,%r4 # struct compat_mq_attr * - jg compat_sys_mq_getsetattr - -ENTRY(compat_sys_add_key_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # size_t - llgfr %r6,%r6 # (key_serial_t) u32 - jg sys_add_key - -ENTRY(compat_sys_request_key_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # (key_serial_t) u32 - jg sys_request_key - -ENTRY(sys32_remap_file_pages_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_remap_file_pages - -ENTRY(compat_sys_kexec_load_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # struct kexec_segment * - llgfr %r5,%r5 # unsigned long - jg compat_sys_kexec_load - -ENTRY(sys_ioprio_set_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_ioprio_set - -ENTRY(sys_ioprio_get_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_ioprio_get - -ENTRY(sys_inotify_add_watch_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # u32 - jg sys_inotify_add_watch - -ENTRY(sys_inotify_rm_watch_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - jg sys_inotify_rm_watch - -ENTRY(sys_mkdirat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_mkdirat - -ENTRY(sys_mknodat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgfr %r5,%r5 # unsigned int - jg sys_mknodat - -ENTRY(sys_fchownat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # uid_t - llgfr %r5,%r5 # gid_t - lgfr %r6,%r6 # int - jg sys_fchownat - -ENTRY(compat_sys_futimesat_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct timeval * - jg compat_sys_futimesat - -ENTRY(sys32_fstatat64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct stat64 * - lgfr %r5,%r5 # int - jg sys32_fstatat64 - -ENTRY(sys_unlinkat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_unlinkat - -ENTRY(sys_renameat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - jg sys_renameat - -ENTRY(sys_linkat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - lgfr %r6,%r6 # int - jg sys_linkat - -ENTRY(sys_symlinkat_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgtr %r4,%r4 # const char * - jg sys_symlinkat - -ENTRY(sys_readlinkat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # char * - lgfr %r5,%r5 # int - jg sys_readlinkat - -ENTRY(sys_fchmodat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # mode_t - jg sys_fchmodat - -ENTRY(sys_faccessat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_faccessat - -ENTRY(compat_sys_pselect6_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # fd_set * - llgtr %r4,%r4 # fd_set * - llgtr %r5,%r5 # fd_set * - llgtr %r6,%r6 # struct timespec * - llgt %r0,164(%r15) # void * - stg %r0,160(%r15) - jg compat_sys_pselect6 - -ENTRY(compat_sys_ppoll_wrapper) - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # struct timespec * - llgtr %r5,%r5 # const sigset_t * - llgfr %r6,%r6 # size_t - jg compat_sys_ppoll - -ENTRY(sys_unshare_wrapper) - llgfr %r2,%r2 # unsigned long - jg sys_unshare - -ENTRY(sys_splice_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # loff_t * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # size_t - llgf %r0,164(%r15) # unsigned int - stg %r0,160(%r15) - jg sys_splice - -ENTRY(sys_sync_file_range_wrapper) - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - sllg %r4,%r5,32 # get high word of 64bit loff_t - or %r4,%r6 # get low word of 64bit loff_t - llgf %r5,164(%r15) # unsigned int - jg sys_sync_file_range - -ENTRY(sys_tee_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - jg sys_tee - -ENTRY(sys_getcpu_wrapper) - llgtr %r2,%r2 # unsigned * - llgtr %r3,%r3 # unsigned * - llgtr %r4,%r4 # struct getcpu_cache * - jg sys_getcpu - -ENTRY(compat_sys_utimes_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_timeval * - jg compat_sys_utimes - -ENTRY(compat_sys_utimensat_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct compat_timespec * - lgfr %r5,%r5 # int - jg compat_sys_utimensat - -ENTRY(sys_eventfd_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_eventfd - -ENTRY(sys_fallocate_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - sllg %r4,%r4,32 # get high word of 64bit loff_t - lr %r4,%r5 # get low word of 64bit loff_t - sllg %r5,%r6,32 # get high word of 64bit loff_t - l %r5,164(%r15) # get low word of 64bit loff_t - jg sys_fallocate - -ENTRY(sys_timerfd_create_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_timerfd_create - -ENTRY(sys_eventfd2_wrapper) - llgfr %r2,%r2 # unsigned int - lgfr %r3,%r3 # int - jg sys_eventfd2 - -ENTRY(sys_inotify_init1_wrapper) - lgfr %r2,%r2 # int - jg sys_inotify_init1 - -ENTRY(sys_pipe2_wrapper) - llgtr %r2,%r2 # u32 * - lgfr %r3,%r3 # int - jg sys_pipe2 # branch to system call - -ENTRY(sys_dup3_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # int - jg sys_dup3 # branch to system call - -ENTRY(sys_epoll_create1_wrapper) - lgfr %r2,%r2 # int - jg sys_epoll_create1 # branch to system call - -ENTRY(sys32_readahead_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - llgfr %r4,%r4 # u32 - lgfr %r5,%r5 # s32 - jg sys32_readahead # branch to system call - -ENTRY(sys_tkill_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - jg sys_tkill # branch to system call - -ENTRY(sys_tgkill_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - jg sys_tgkill # branch to system call - -ENTRY(compat_sys_keyctl_wrapper) - llgfr %r2,%r2 # u32 - llgfr %r3,%r3 # u32 - llgfr %r4,%r4 # u32 - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg compat_sys_keyctl # branch to system call - -ENTRY(sys_perf_event_open_wrapper) - llgtr %r2,%r2 # const struct perf_event_attr * - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # unsigned long - jg sys_perf_event_open # branch to system call - -ENTRY(sys_clone_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # int * - llgtr %r5,%r5 # int * - jg sys_clone # branch to system call - -ENTRY(sys32_execve_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # compat_uptr_t * - llgtr %r4,%r4 # compat_uptr_t * - jg compat_sys_execve # branch to system call - -ENTRY(sys_fanotify_init_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_fanotify_init # branch to system call - -ENTRY(sys_prlimit64_wrapper) - lgfr %r2,%r2 # pid_t - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # const struct rlimit64 __user * - llgtr %r5,%r5 # struct rlimit64 __user * - jg sys_prlimit64 # branch to system call - -ENTRY(sys_name_to_handle_at_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char __user * - llgtr %r4,%r4 # struct file_handle __user * - llgtr %r5,%r5 # int __user * - lgfr %r6,%r6 # int - jg sys_name_to_handle_at - -ENTRY(compat_sys_clock_adjtime_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timex __user * - jg compat_sys_clock_adjtime - -ENTRY(sys_syncfs_wrapper) - lgfr %r2,%r2 # int - jg sys_syncfs - -ENTRY(sys_setns_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_setns - -ENTRY(compat_sys_process_vm_readv_wrapper) - lgfr %r2,%r2 # compat_pid_t - llgtr %r3,%r3 # struct compat_iovec __user * - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # struct compat_iovec __user * - llgfr %r6,%r6 # unsigned long - llgf %r0,164(%r15) # unsigned long - stg %r0,160(%r15) - jg compat_sys_process_vm_readv - -ENTRY(compat_sys_process_vm_writev_wrapper) - lgfr %r2,%r2 # compat_pid_t - llgtr %r3,%r3 # struct compat_iovec __user * - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # struct compat_iovec __user * - llgfr %r6,%r6 # unsigned long - llgf %r0,164(%r15) # unsigned long - stg %r0,160(%r15) - jg compat_sys_process_vm_writev - -ENTRY(sys_s390_runtime_instr_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_s390_runtime_instr - -ENTRY(sys_kcmp_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_kcmp - -ENTRY(sys_finit_module_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char __user * - lgfr %r4,%r4 # int - jg sys_finit_module diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c new file mode 100644 index 000000000000..9d85b4bc7036 --- /dev/null +++ b/arch/s390/kernel/cpacf.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "cpacf: " fmt + +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <asm/cpacf.h> + +#define CPACF_QUERY(name, instruction) \ +static ssize_t name##_query_raw_read(struct file *fp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t offs, \ + size_t count) \ +{ \ + cpacf_mask_t mask; \ + \ + if (!cpacf_query(CPACF_##instruction, &mask)) \ + return -EOPNOTSUPP; \ + return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \ +} \ +static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) + +CPACF_QUERY(km, KM); +CPACF_QUERY(kmc, KMC); +CPACF_QUERY(kimd, KIMD); +CPACF_QUERY(klmd, KLMD); +CPACF_QUERY(kmac, KMAC); +CPACF_QUERY(pckmo, PCKMO); +CPACF_QUERY(kmf, KMF); +CPACF_QUERY(kmctr, KMCTR); +CPACF_QUERY(kmo, KMO); +CPACF_QUERY(pcc, PCC); +CPACF_QUERY(prno, PRNO); +CPACF_QUERY(kma, KMA); +CPACF_QUERY(kdsa, KDSA); + +#define CPACF_QAI(name, instruction) \ +static ssize_t name##_query_auth_info_raw_read( \ + struct file *fp, struct kobject *kobj, \ + const struct bin_attribute *attr, char *buf, loff_t offs, \ + size_t count) \ +{ \ + cpacf_qai_t qai; \ + \ + if (!cpacf_qai(CPACF_##instruction, &qai)) \ + return -EOPNOTSUPP; \ + return memory_read_from_buffer(buf, count, &offs, &qai, \ + sizeof(qai)); \ +} \ +static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) + +CPACF_QAI(km, KM); +CPACF_QAI(kmc, KMC); +CPACF_QAI(kimd, KIMD); +CPACF_QAI(klmd, KLMD); +CPACF_QAI(kmac, KMAC); +CPACF_QAI(pckmo, PCKMO); +CPACF_QAI(kmf, KMF); +CPACF_QAI(kmctr, KMCTR); +CPACF_QAI(kmo, KMO); +CPACF_QAI(pcc, PCC); +CPACF_QAI(prno, PRNO); +CPACF_QAI(kma, KMA); +CPACF_QAI(kdsa, KDSA); + +static const struct bin_attribute *const cpacf_attrs[] = { + &bin_attr_km_query_raw, + &bin_attr_kmc_query_raw, + &bin_attr_kimd_query_raw, + &bin_attr_klmd_query_raw, + &bin_attr_kmac_query_raw, + &bin_attr_pckmo_query_raw, + &bin_attr_kmf_query_raw, + &bin_attr_kmctr_query_raw, + &bin_attr_kmo_query_raw, + &bin_attr_pcc_query_raw, + &bin_attr_prno_query_raw, + &bin_attr_kma_query_raw, + &bin_attr_kdsa_query_raw, + &bin_attr_km_query_auth_info_raw, + &bin_attr_kmc_query_auth_info_raw, + &bin_attr_kimd_query_auth_info_raw, + &bin_attr_klmd_query_auth_info_raw, + &bin_attr_kmac_query_auth_info_raw, + &bin_attr_pckmo_query_auth_info_raw, + &bin_attr_kmf_query_auth_info_raw, + &bin_attr_kmctr_query_auth_info_raw, + &bin_attr_kmo_query_auth_info_raw, + &bin_attr_pcc_query_auth_info_raw, + &bin_attr_prno_query_auth_info_raw, + &bin_attr_kma_query_auth_info_raw, + &bin_attr_kdsa_query_auth_info_raw, + NULL, +}; + +static const struct attribute_group cpacf_attr_grp = { + .name = "cpacf", + .bin_attrs = cpacf_attrs, +}; + +static int __init cpacf_init(void) +{ + struct device *cpu_root; + int rc = 0; + + cpu_root = bus_get_dev_root(&cpu_subsys); + if (cpu_root) { + rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp); + put_device(cpu_root); + } + return rc; +} +device_initcall(cpacf_init); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index d7b0c4d27880..ab611764642a 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 version * Copyright IBM Corp. 1999, 2007 @@ -5,68 +6,58 @@ * Christian Borntraeger (cborntra@de.ibm.com), */ -#define KMSG_COMPONENT "cpcmd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpcmd: " fmt #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> -#include <asm/io.h> +#include <asm/asm.h> static DEFINE_SPINLOCK(cpcmd_lock); static char cpcmd_buf[241]; static int diag8_noresponse(int cmdlen) { - register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; - register unsigned long reg3 asm ("3") = cmdlen; - asm volatile( -#ifndef CONFIG_64BIT - " diag %1,%0,0x8\n" -#else /* CONFIG_64BIT */ - " sam31\n" - " diag %1,%0,0x8\n" - " sam64\n" -#endif /* CONFIG_64BIT */ - : "+d" (reg3) : "d" (reg2) : "cc"); - return reg3; + " diag %[rx],%[ry],0x8\n" + : [ry] "+&d" (cmdlen) + : [rx] "d" (__pa(cpcmd_buf)) + : "cc"); + return cmdlen; } static int diag8_response(int cmdlen, char *response, int *rlen) { - register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; - register unsigned long reg3 asm ("3") = (addr_t) response; - register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L; - register unsigned long reg5 asm ("5") = *rlen; + union register_pair rx, ry; + int cc; + rx.even = __pa(cpcmd_buf); + rx.odd = __pa(response); + ry.even = cmdlen | 0x40000000L; + ry.odd = *rlen; asm volatile( -#ifndef CONFIG_64BIT - " diag %2,%0,0x8\n" - " brc 8,1f\n" - " ar %1,%4\n" -#else /* CONFIG_64BIT */ - " sam31\n" - " diag %2,%0,0x8\n" - " sam64\n" - " brc 8,1f\n" - " agr %1,%4\n" -#endif /* CONFIG_64BIT */ - "1:\n" - : "+d" (reg4), "+d" (reg5) - : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc"); - *rlen = reg5; - return reg4; + " diag %[rx],%[ry],0x8\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [ry] "+d" (ry.pair) + : [rx] "d" (rx.pair) + : CC_CLOBBER); + if (CC_TRANSFORM(cc)) + *rlen += ry.odd; + else + *rlen = ry.odd; + return ry.even; } /* * __cpcmd has some restrictions over cpcmd - * - the response buffer must reside below 2GB (if any) * - __cpcmd is unlocked and therefore not SMP-safe */ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) @@ -80,6 +71,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) memcpy(cpcmd_buf, cmd, cmdlen); ASCEBC(cpcmd_buf, cmdlen); + diag_stat_inc(DIAG_STAT_X008); if (response) { memset(response, 0, rlen); response_len = rlen; @@ -96,16 +88,14 @@ EXPORT_SYMBOL(__cpcmd); int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { + unsigned long flags; char *lowbuf; int len; - unsigned long flags; - if ((virt_to_phys(response) != (unsigned long) response) || - (((unsigned long)response + rlen) >> 31)) { - lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); + if (is_vmalloc_or_module_addr(response)) { + lowbuf = kmalloc(rlen, GFP_KERNEL); if (!lowbuf) { - pr_warning("The cpcmd kernel function failed to " - "allocate a response buffer\n"); + pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); return -ENOMEM; } spin_lock_irqsave(&cpcmd_lock, flags); diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c new file mode 100644 index 000000000000..c9eef9ed876b --- /dev/null +++ b/arch/s390/kernel/cpufeature.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2022 + */ + +#include <linux/cpufeature.h> +#include <linux/export.h> +#include <linux/bug.h> +#include <asm/machine.h> +#include <asm/elf.h> + +enum { + TYPE_HWCAP, + TYPE_FACILITY, + TYPE_MACHINE, +}; + +struct s390_cpu_feature { + unsigned int type : 4; + unsigned int num : 28; +}; + +static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = { + [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA}, + [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS}, + [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158}, + [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288}, +}; + +/* + * cpu_have_feature - Test CPU features on module initialization + */ +int cpu_have_feature(unsigned int num) +{ + struct s390_cpu_feature *feature; + + if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES)) + return 0; + feature = &s390_cpu_features[num]; + switch (feature->type) { + case TYPE_HWCAP: + return !!(elf_hwcap & BIT(feature->num)); + case TYPE_FACILITY: + return test_facility(feature->num); + case TYPE_MACHINE: + return test_machine_feature(feature->num); + default: + WARN_ON_ONCE(1); + return 0; + } +} +EXPORT_SYMBOL(cpu_have_feature); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f703d91bf720..d4839de8ce9d 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 kdump implementation * @@ -6,212 +7,346 @@ */ #include <linux/crash_dump.h> +#include <linux/export.h> #include <asm/lowcore.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/mm.h> #include <linux/gfp.h> #include <linux/slab.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/elf.h> +#include <linux/uio.h> +#include <asm/asm-offsets.h> #include <asm/os_info.h> #include <asm/elf.h> #include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/maccess.h> +#include <asm/fpu.h> #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) +static struct memblock_region oldmem_region; + +static struct memblock_type oldmem_type = { + .cnt = 1, + .max = 1, + .total_size = 0, + .regions = &oldmem_region, + .name = "oldmem", +}; + +struct save_area { + struct list_head list; + u64 psw[2]; + u64 ctrs[16]; + u64 gprs[16]; + u32 acrs[16]; + u64 fprs[16]; + u32 fpc; + u32 prefix; + u32 todpreg; + u64 timer; + u64 todcmp; + u64 vxrs_low[16]; + __vector128 vxrs_high[16]; +}; + +static LIST_HEAD(dump_save_areas); + /* - * Copy one page from "oldmem" - * - * For the kdump reserved memory this functions performs a swap operation: - * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. - * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + * Allocate a save area */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) +struct save_area * __init save_area_alloc(bool is_boot_cpu) { - unsigned long src; + struct save_area *sa; - if (!csize) - return 0; + sa = memblock_alloc_or_panic(sizeof(*sa), 8); - src = (pfn << PAGE_SHIFT) + offset; - if (src < OLDMEM_SIZE) - src += OLDMEM_BASE; - else if (src > OLDMEM_BASE && - src < OLDMEM_BASE + OLDMEM_SIZE) - src -= OLDMEM_BASE; - if (userbuf) - copy_to_user_real((void __force __user *) buf, (void *) src, - csize); + if (is_boot_cpu) + list_add(&sa->list, &dump_save_areas); else - memcpy_real(buf, (void *) src, csize); - return csize; + list_add_tail(&sa->list, &dump_save_areas); + return sa; } /* - * Copy memory from old kernel + * Return the address of the save area for the boot CPU */ -int copy_from_oldmem(void *dest, void *src, size_t count) +struct save_area * __init save_area_boot_cpu(void) { - unsigned long copied = 0; - int rc; - - if ((unsigned long) src < OLDMEM_SIZE) { - copied = min(count, OLDMEM_SIZE - (unsigned long) src); - rc = memcpy_real(dest, src + OLDMEM_BASE, copied); - if (rc) - return rc; - } - return memcpy_real(dest + copied, src + copied, count - copied); + return list_first_entry_or_null(&dump_save_areas, struct save_area, list); } /* - * Alloc memory and panic in case of ENOMEM + * Copy CPU registers into the save area */ -static void *kzalloc_panic(int len) +void __init save_area_add_regs(struct save_area *sa, void *regs) { - void *rc; - - rc = kzalloc(len, GFP_KERNEL); - if (!rc) - panic("s390 kdump kzalloc (%d) failed", len); - return rc; + struct lowcore *lc; + + lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA); + memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw)); + memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs)); + memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs)); + memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs)); + memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs)); + memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc)); + memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix)); + memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg)); + memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer)); + memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp)); } /* - * Get memory layout and create hole for oldmem + * Copy vector registers into the save area */ -static struct mem_chunk *get_memory_layout(void) +void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) { - struct mem_chunk *chunk_array; + int i; - chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); - detect_memory_layout(chunk_array, 0); - create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE); - return chunk_array; + /* Copy lower halves of vector registers 0-15 */ + for (i = 0; i < 16; i++) + sa->vxrs_low[i] = vxrs[i].low; + /* Copy vector registers 16-31 */ + memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); } -/* - * Initialize ELF note - */ -static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, - const char *name) +static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) { - Elf64_Nhdr *note; - u64 len; - - note = (Elf64_Nhdr *)buf; - note->n_namesz = strlen(name) + 1; - note->n_descsz = d_len; - note->n_type = type; - len = sizeof(Elf64_Nhdr); - - memcpy(buf + len, name, note->n_namesz); - len = roundup(len + note->n_namesz, 4); - - memcpy(buf + len, desc, note->n_descsz); - len = roundup(len + note->n_descsz, 4); + size_t len, copied, res = 0; + + while (count) { + if (!oldmem_data.start && src < sclp.hsa_size) { + /* Copy from zfcp/nvme dump HSA area */ + len = min(count, sclp.hsa_size - src); + copied = memcpy_hsa_iter(iter, src, len); + } else { + /* Check for swapped kdump oldmem areas */ + if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { + src -= oldmem_data.start; + len = min(count, oldmem_data.size - src); + } else if (oldmem_data.start && src < oldmem_data.size) { + len = min(count, oldmem_data.size - src); + src += oldmem_data.start; + } else { + len = count; + } + copied = memcpy_real_iter(iter, src, len); + } + count -= copied; + src += copied; + res += copied; + if (copied < len) + break; + } + return res; +} - return PTR_ADD(buf, len); +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) +{ + struct iov_iter iter; + struct kvec kvec; + + kvec.iov_base = dst; + kvec.iov_len = count; + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count); + if (copy_oldmem_iter(&iter, src, count) < count) + return -EFAULT; + return 0; } /* - * Initialize prstatus note + * Copy one page from "oldmem" */ -static void *nt_prstatus(void *ptr, struct save_area *sa) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, + unsigned long offset) { - struct elf_prstatus nt_prstatus; - static int cpu_nr = 1; - - memset(&nt_prstatus, 0, sizeof(nt_prstatus)); - memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); - memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); - memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); - nt_prstatus.pr_pid = cpu_nr; - cpu_nr++; + unsigned long src; - return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), - "CORE"); + src = pfn_to_phys(pfn) + offset; + return copy_oldmem_iter(iter, src, csize); } /* - * Initialize fpregset (floating point) note + * Remap "oldmem" for kdump + * + * For the kdump reserved memory this functions performs a swap operation: + * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] */ -static void *nt_fpregset(void *ptr, struct save_area *sa) +static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) { - elf_fpregset_t nt_fpregset; - - memset(&nt_fpregset, 0, sizeof(nt_fpregset)); - memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); - memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + unsigned long size_old; + int rc; - return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), - "CORE"); + if (pfn < oldmem_data.size >> PAGE_SHIFT) { + size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT)); + rc = remap_pfn_range(vma, from, + pfn + (oldmem_data.start >> PAGE_SHIFT), + size_old, prot); + if (rc || size == size_old) + return rc; + size -= size_old; + from += size_old; + pfn += size_old >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); } /* - * Initialize timer note + * Remap "oldmem" for zfcp/nvme dump + * + * We only map available memory above HSA size. Memory below HSA size + * is read on demand using the copy_oldmem_page() function. */ -static void *nt_s390_timer(void *ptr, struct save_area *sa) +static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, + unsigned long from, + unsigned long pfn, + unsigned long size, pgprot_t prot) { - return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), - KEXEC_CORE_NOTE_NAME); + unsigned long hsa_end = sclp.hsa_size; + unsigned long size_hsa; + + if (pfn < hsa_end >> PAGE_SHIFT) { + size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT)); + if (size == size_hsa) + return 0; + size -= size_hsa; + from += size_hsa; + pfn += size_hsa >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); } /* - * Initialize TOD clock comparator note + * Remap "oldmem" for kdump or zfcp/nvme dump */ -static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) { - return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, - sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); + if (oldmem_data.start) + return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); + else + return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, + prot); } /* - * Initialize TOD programmable register note + * Return true only when in a kdump or stand-alone kdump environment. + * Note that /proc/vmcore might also be available in "standard zfcp/nvme dump" + * environments, where this function returns false; see dump_available(). */ -static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +bool is_kdump_kernel(void) { - return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, - sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); + return oldmem_data.start; } +EXPORT_SYMBOL_GPL(is_kdump_kernel); /* - * Initialize control register note + * Initialize ELF note */ -static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) { - return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, - sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); } +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) + /* - * Initialize prefix register note + * Calculate the size of ELF note */ -static void *nt_s390_prefix(void *ptr, struct save_area *sa) +static size_t nt_size_name(int d_len, const char *name) { - return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, - sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); + size_t size; + + size = sizeof(Elf64_Nhdr); + size += roundup(strlen(name) + 1, 4); + size += roundup(d_len, 4); + + return size; } +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) + /* * Fill ELF notes for one CPU with save area registers */ -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) -{ - ptr = nt_prstatus(ptr, sa); - ptr = nt_fpregset(ptr, sa); - ptr = nt_s390_timer(ptr, sa); - ptr = nt_s390_tod_cmp(ptr, sa); - ptr = nt_s390_tod_preg(ptr, sa); - ptr = nt_s390_ctrs(ptr, sa); - ptr = nt_s390_prefix(ptr, sa); +static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + elf_fpregset_t nt_fpregset; + + /* Prepare prstatus note */ + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs)); + nt_prstatus.common.pr_pid = cpu; + /* Prepare fpregset (floating point) note */ + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); + memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); + /* Create ELF notes for the CPU */ + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); + if (cpu_has_vx()) { + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); + } return ptr; } /* + * Calculate size of ELF notes per cpu + */ +static size_t get_cpu_elf_notes_size(void) +{ + struct save_area *sa = NULL; + size_t size; + + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); + if (cpu_has_vx()) { + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); + } + + return size; +} + +/* * Initialize prpsinfo note (new kernel) */ static void *nt_prpsinfo(void *ptr) @@ -220,9 +355,8 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; - strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), - KEXEC_CORE_NOTE_NAME); + strscpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -231,21 +365,26 @@ static void *nt_prpsinfo(void *ptr) static void *get_vmcoreinfo_old(unsigned long *size) { char nt_name[11], *vmcoreinfo; + unsigned long addr; Elf64_Nhdr note; - void *addr; - if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); - if (copy_from_oldmem(¬e, addr, sizeof(note))) + if (copy_oldmem_kernel(¬e, addr, sizeof(note))) + return NULL; + if (copy_oldmem_kernel(nt_name, addr + sizeof(note), + sizeof(nt_name) - 1)) return NULL; - if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0) return NULL; - if (strcmp(nt_name, "VMCOREINFO") != 0) + vmcoreinfo = kzalloc(note.n_descsz, GFP_KERNEL); + if (!vmcoreinfo) return NULL; - vmcoreinfo = kzalloc_panic(note.n_descsz); - if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) { + kfree(vmcoreinfo); return NULL; + } *size = note.n_descsz; return vmcoreinfo; } @@ -255,21 +394,58 @@ static void *get_vmcoreinfo_old(unsigned long *size) */ static void *nt_vmcoreinfo(void *ptr) { + const char *name = VMCOREINFO_NOTE_NAME; unsigned long size; void *vmcoreinfo; vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); - if (!vmcoreinfo) - vmcoreinfo = get_vmcoreinfo_old(&size); + if (vmcoreinfo) + return nt_init_name(ptr, 0, vmcoreinfo, size, name); + + vmcoreinfo = get_vmcoreinfo_old(&size); if (!vmcoreinfo) return ptr; - return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); + ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name); + kfree(vmcoreinfo); + return ptr; +} + +static size_t nt_vmcoreinfo_size(void) +{ + const char *name = VMCOREINFO_NOTE_NAME; + unsigned long size; + void *vmcoreinfo; + + vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); + if (vmcoreinfo) + return nt_size_name(size, name); + + vmcoreinfo = get_vmcoreinfo_old(&size); + if (!vmcoreinfo) + return 0; + + kfree(vmcoreinfo); + return nt_size_name(size, name); +} + +/* + * Initialize final note (needed for /proc/vmcore code) + */ +static void *nt_final(void *ptr) +{ + Elf64_Nhdr *note; + + note = (Elf64_Nhdr *) ptr; + note->n_namesz = 0; + note->n_descsz = 0; + note->n_type = 0; + return PTR_ADD(ptr, sizeof(Elf64_Nhdr)); } /* * Initialize ELF header (new kernel) */ -static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count) { memset(ehdr, 0, sizeof(*ehdr)); memcpy(ehdr->e_ident, ELFMAG, SELFMAG); @@ -283,7 +459,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) ehdr->e_phoff = sizeof(Elf64_Ehdr); ehdr->e_ehsize = sizeof(Elf64_Ehdr); ehdr->e_phentsize = sizeof(Elf64_Phdr); - ehdr->e_phnum = mem_chunk_cnt + 1; + /* Number of PT_LOAD program headers plus PT_NOTE program header */ + ehdr->e_phnum = phdr_count + 1; return ehdr + 1; } @@ -292,13 +469,12 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) */ static int get_cpu_cnt(void) { - int i, cpus = 0; + struct save_area *sa; + int cpus = 0; - for (i = 0; zfcpdump_save_areas[i]; i++) { - if (zfcpdump_save_areas[i]->pref_reg == 0) - continue; - cpus++; - } + list_for_each_entry(sa, &dump_save_areas, list) + if (sa->prefix != 0) + cpus++; return cpus; } @@ -307,60 +483,83 @@ static int get_cpu_cnt(void) */ static int get_mem_chunk_cnt(void) { - struct mem_chunk *chunk_array, *mem_chunk; - int i, cnt = 0; - - chunk_array = get_memory_layout(); - for (i = 0; i < MEMORY_CHUNKS; i++) { - mem_chunk = &chunk_array[i]; - if (chunk_array[i].type != CHUNK_READ_WRITE && - chunk_array[i].type != CHUNK_READ_ONLY) - continue; - if (mem_chunk->size == 0) - continue; + int cnt = 0; + u64 idx; + + for_each_physmem_range(idx, &oldmem_type, NULL, NULL) cnt++; - } - kfree(chunk_array); return cnt; } -/* - * Relocate pointer in order to allow vmcore code access the data - */ -static inline unsigned long relocate(unsigned long addr) +static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr, + unsigned long vaddr, unsigned long size) { - return OLDMEM_BASE + addr; + phdr->p_type = PT_LOAD; + phdr->p_vaddr = vaddr; + phdr->p_offset = paddr; + phdr->p_paddr = paddr; + phdr->p_filesz = size; + phdr->p_memsz = size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; } /* * Initialize ELF loads (new kernel) */ -static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm) { - struct mem_chunk *chunk_array, *mem_chunk; - int i; - - chunk_array = get_memory_layout(); - for (i = 0; i < MEMORY_CHUNKS; i++) { - mem_chunk = &chunk_array[i]; - if (mem_chunk->size == 0) - continue; - if (chunk_array[i].type != CHUNK_READ_WRITE && - chunk_array[i].type != CHUNK_READ_ONLY) - continue; - else - phdr->p_filesz = mem_chunk->size; - phdr->p_type = PT_LOAD; - phdr->p_offset = mem_chunk->addr; - phdr->p_vaddr = mem_chunk->addr; - phdr->p_paddr = mem_chunk->addr; - phdr->p_memsz = mem_chunk->size; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_align = PAGE_SIZE; + unsigned long old_identity_base = 0; + phys_addr_t start, end; + u64 idx; + + if (os_info_has_vm) + old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); + for_each_physmem_range(idx, &oldmem_type, &start, &end) { + fill_ptload(phdr, start, old_identity_base + start, + end - start); phdr++; } - kfree(chunk_array); - return i; +} + +static bool os_info_has_vm(void) +{ + return os_info_old_value(OS_INFO_KASLR_OFFSET); +} + +#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM +/* + * Fill PT_LOAD for a physical memory range owned by a device and detected by + * its device driver. + */ +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size) +{ + unsigned long old_identity_base = 0; + + if (os_info_has_vm()) + old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); + fill_ptload(phdr, paddr, old_identity_base + paddr, size); +} +#endif + +/* + * Prepare PT_LOAD type program header for kernel image region + */ +static void text_init(Elf64_Phdr *phdr) +{ + unsigned long start_phys = os_info_old_value(OS_INFO_IMAGE_PHYS); + unsigned long start = os_info_old_value(OS_INFO_IMAGE_START); + unsigned long end = os_info_old_value(OS_INFO_IMAGE_END); + + phdr->p_type = PT_LOAD; + phdr->p_vaddr = start; + phdr->p_filesz = end - start; + phdr->p_memsz = end - start; + phdr->p_offset = start_phys; + phdr->p_paddr = start_phys; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; } /* @@ -370,74 +569,139 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) { struct save_area *sa; void *ptr_start = ptr; - int i; + int cpu; ptr = nt_prpsinfo(ptr); - for (i = 0; zfcpdump_save_areas[i]; i++) { - sa = zfcpdump_save_areas[i]; - if (sa->pref_reg == 0) - continue; - ptr = fill_cpu_elf_notes(ptr, sa); - } + cpu = 1; + list_for_each_entry(sa, &dump_save_areas, list) + if (sa->prefix != 0) + ptr = fill_cpu_elf_notes(ptr, cpu++, sa); ptr = nt_vmcoreinfo(ptr); + ptr = nt_final(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; - phdr->p_offset = relocate(notes_offset); + phdr->p_offset = notes_offset; phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); phdr->p_memsz = phdr->p_filesz; return ptr; } +static size_t get_elfcorehdr_size(int phdr_count) +{ + size_t size; + + size = sizeof(Elf64_Ehdr); + /* PT_NOTES */ + size += sizeof(Elf64_Phdr); + /* nt_prpsinfo */ + size += nt_size(PRPSINFO, struct elf_prpsinfo); + /* regsets */ + size += get_cpu_cnt() * get_cpu_elf_notes_size(); + /* nt_vmcoreinfo */ + size += nt_vmcoreinfo_size(); + /* nt_final */ + size += sizeof(Elf64_Nhdr); + /* PT_LOADS */ + size += phdr_count * sizeof(Elf64_Phdr); + + return size; +} + /* * Create ELF core header (new kernel) */ -static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) { - Elf64_Phdr *phdr_notes, *phdr_loads; - int mem_chunk_cnt; + Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text; + int mem_chunk_cnt, phdr_text_cnt; + size_t alloc_size; void *ptr, *hdr; - u32 alloc_size; u64 hdr_off; + /* If we are not in kdump or zfcp/nvme dump mode return */ + if (!oldmem_data.start && !is_ipl_type_dump()) + return 0; + /* If we cannot get HSA size for zfcp/nvme dump return error */ + if (is_ipl_type_dump() && !sclp.hsa_size) + return -ENODEV; + + /* For kdump, exclude previous crashkernel memory */ + if (oldmem_data.start) { + oldmem_region.base = oldmem_data.start; + oldmem_region.size = oldmem_data.size; + oldmem_type.total_size = oldmem_data.size; + } + mem_chunk_cnt = get_mem_chunk_cnt(); + phdr_text_cnt = os_info_has_vm() ? 1 : 0; + + alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt); + + hdr = kzalloc(alloc_size, GFP_KERNEL); + + /* + * Without elfcorehdr /proc/vmcore cannot be created. Thus creating + * a dump with this crash kernel will fail. Panic now to allow other + * dump mechanisms to take over. + */ + if (!hdr) + panic("s390 kdump allocating elfcorehdr failed"); - alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + - mem_chunk_cnt * sizeof(Elf64_Phdr); - hdr = kzalloc_panic(alloc_size); /* Init elf header */ - ptr = ehdr_init(hdr, mem_chunk_cnt); + phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt); /* Init program headers */ - phdr_notes = ptr; - ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); - phdr_loads = ptr; - ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + if (phdr_text_cnt) { + phdr_text = phdr_notes + 1; + phdr_loads = phdr_text + 1; + } else { + phdr_loads = phdr_notes + 1; + } + ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt); /* Init notes */ hdr_off = PTR_DIFF(ptr, hdr); ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init kernel text program header */ + if (phdr_text_cnt) + text_init(phdr_text); /* Init loads */ + loads_init(phdr_loads, phdr_text_cnt); + /* Finalize program headers */ hdr_off = PTR_DIFF(ptr, hdr); - loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); - *elfcorebuf_sz = hdr_off; - *elfcorebuf = (void *) relocate((unsigned long) hdr); - BUG_ON(*elfcorebuf_sz > alloc_size); + *addr = (unsigned long long) hdr; + *size = (unsigned long long) hdr_off; + BUG_ON(elfcorehdr_size > alloc_size); + return 0; } /* - * Create kdump ELF core header in new kernel, if it has not been passed via - * the "elfcorehdr" kernel parameter + * Free ELF core header (new kernel) */ -static int setup_kdump_elfcorehdr(void) +void elfcorehdr_free(unsigned long long addr) { - size_t elfcorebuf_sz; - char *elfcorebuf; + kfree((void *)(unsigned long)addr); +} - if (!OLDMEM_BASE || is_kdump_kernel()) - return -EINVAL; - s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); - elfcorehdr_addr = (unsigned long long) elfcorebuf; - elfcorehdr_size = elfcorebuf_sz; - return 0; +/* + * Read from ELF header + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + + memcpy(buf, src, count); + *ppos += count; + return count; } -subsys_initcall(setup_kdump_elfcorehdr); +/* + * Read from ELF notes data + */ +ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + + memcpy(buf, src, count); + *ppos += count; + return count; +} diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c new file mode 100644 index 000000000000..a0501f4c7e7a --- /dev/null +++ b/arch/s390/kernel/ctlreg.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 1999, 2023 + */ + +#include <linux/irqflags.h> +#include <linux/spinlock.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/cache.h> +#include <asm/abs_lowcore.h> +#include <asm/ctlreg.h> + +/* + * ctl_lock guards access to global control register contents which + * are kept in the control register save area within absolute lowcore + * at physical address zero. + */ +static DEFINE_SPINLOCK(system_ctl_lock); + +void system_ctlreg_lock(void) + __acquires(&system_ctl_lock) +{ + spin_lock(&system_ctl_lock); +} + +void system_ctlreg_unlock(void) + __releases(&system_ctl_lock) +{ + spin_unlock(&system_ctl_lock); +} + +static bool system_ctlreg_area_init __ro_after_init; + +void __init system_ctlreg_init_save_area(struct lowcore *lc) +{ + struct lowcore *abs_lc; + + abs_lc = get_abs_lowcore(); + __local_ctl_store(0, 15, lc->cregs_save_area); + __local_ctl_store(0, 15, abs_lc->cregs_save_area); + put_abs_lowcore(abs_lc); + system_ctlreg_area_init = true; +} + +struct ctlreg_parms { + unsigned long andval; + unsigned long orval; + unsigned long val; + int request; + int cr; +}; + +static void ctlreg_callback(void *info) +{ + struct ctlreg_parms *pp = info; + struct ctlreg regs[16]; + + __local_ctl_store(0, 15, regs); + if (pp->request == CTLREG_LOAD) { + regs[pp->cr].val = pp->val; + } else { + regs[pp->cr].val &= pp->andval; + regs[pp->cr].val |= pp->orval; + } + __local_ctl_load(0, 15, regs); +} + +static void system_ctlreg_update(void *info) +{ + unsigned long flags; + + if (system_state == SYSTEM_BOOTING) { + /* + * For very early calls do not call on_each_cpu() + * since not everything might be setup. + */ + local_irq_save(flags); + ctlreg_callback(info); + local_irq_restore(flags); + } else { + on_each_cpu(ctlreg_callback, info, 1); + } +} + +void system_ctlreg_modify(unsigned int cr, unsigned long data, int request) +{ + struct ctlreg_parms pp = { .cr = cr, .request = request, }; + struct lowcore *abs_lc; + + switch (request) { + case CTLREG_SET_BIT: + pp.orval = 1UL << data; + pp.andval = -1UL; + break; + case CTLREG_CLEAR_BIT: + pp.orval = 0; + pp.andval = ~(1UL << data); + break; + case CTLREG_LOAD: + pp.val = data; + break; + } + if (system_ctlreg_area_init) { + system_ctlreg_lock(); + abs_lc = get_abs_lowcore(); + if (request == CTLREG_LOAD) { + abs_lc->cregs_save_area[cr].val = pp.val; + } else { + abs_lc->cregs_save_area[cr].val &= pp.andval; + abs_lc->cregs_save_area[cr].val |= pp.orval; + } + put_abs_lowcore(abs_lc); + system_ctlreg_update(&pp); + system_ctlreg_unlock(); + } else { + system_ctlreg_update(&pp); + } +} +EXPORT_SYMBOL(system_ctlreg_modify); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index f1279dc2e1bc..71cdb6845dd7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1,16 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2012 + * Copyright IBM Corp. 1999, 2020 * * Author(s): Michael Holzheu (holzheu@de.ibm.com), - * Holger Smolinski (Holger.Smolinski@de.ibm.com) + * Holger Smolinski (Holger.Smolinski@de.ibm.com) * * Bugreports to: <Linux390@de.ibm.com> */ -#define KMSG_COMPONENT "s390dbf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "s390dbf: " fmt #include <linux/stddef.h> #include <linux/kernel.h> @@ -19,10 +19,12 @@ #include <linux/ctype.h> #include <linux/string.h> #include <linux/sysctl.h> -#include <asm/uaccess.h> -#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/math.h> +#include <linux/minmax.h> #include <linux/debugfs.h> #include <asm/debug.h> @@ -36,82 +38,67 @@ typedef struct file_private_info { loff_t offset; /* offset of last read in file */ - int act_area; /* number of last formated area */ - int act_page; /* act page in given area */ - int act_entry; /* last formated entry (offset */ - /* relative to beginning of last */ - /* formated page) */ - size_t act_entry_offset; /* up to this offset we copied */ - /* in last read the last formated */ + int act_area; /* number of last formatted area */ + int act_page; /* act page in given area */ + int act_entry; /* last formatted entry (offset */ + /* relative to beginning of last */ + /* formatted page) */ + size_t act_entry_offset; /* up to this offset we copied */ + /* in last read the last formatted */ /* entry to userland */ char temp_buf[2048]; /* buffer for output */ - debug_info_t *debug_info_org; /* original debug information */ + debug_info_t *debug_info_org; /* original debug information */ debug_info_t *debug_info_snap; /* snapshot of debug information */ struct debug_view *view; /* used view of debug info */ } file_private_info_t; -typedef struct -{ +typedef struct { char *string; - /* - * This assumes that all args are converted into longs - * on L/390 this is the case for all types of parameter - * except of floats, and long long (32 bit) + /* + * This assumes that all args are converted into longs + * on L/390 this is the case for all types of parameter + * except of floats, and long long (32 bit) * */ - long args[0]; + long args[]; } debug_sprintf_entry_t; - -/* internal function prototyes */ +/* internal function prototypes */ static int debug_init(void); static ssize_t debug_output(struct file *file, char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t *offset); static ssize_t debug_input(struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t *offset); static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); static debug_info_t *debug_info_create(const char *name, int pages_per_area, - int nr_areas, int buf_size, umode_t mode); + int nr_areas, int buf_size, umode_t mode); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); -static int debug_prolog_level_fn(debug_info_t * id, - struct debug_view *view, char *out_buf); -static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); -static int debug_prolog_pages_fn(debug_info_t * id, - struct debug_view *view, char *out_buf); -static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); -static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); -static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf); -static int debug_raw_format_fn(debug_info_t * id, - struct debug_view *view, char *out_buf, - const char *in_buf); -static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf); - -static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event); +static int debug_prolog_level_fn(debug_info_t *id, + struct debug_view *view, char *out_buf, + size_t out_buf_size); +static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t *offset); +static int debug_prolog_pages_fn(debug_info_t *id, + struct debug_view *view, char *out_buf, + size_t out_buf_size); +static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t *offset); +static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t *offset); +static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, + const char *in_buf); +static void debug_areas_swap(debug_info_t *a, debug_info_t *b); +static void debug_events_append(debug_info_t *dest, debug_info_t *src); /* globals */ -struct debug_view debug_raw_view = { - "raw", - NULL, - &debug_raw_header_fn, - &debug_raw_format_fn, - NULL, - NULL -}; -EXPORT_SYMBOL(debug_raw_view); - struct debug_view debug_hex_ascii_view = { "hex_ascii", NULL, @@ -141,19 +128,19 @@ static struct debug_view debug_pages_view = { }; static struct debug_view debug_flush_view = { - "flush", - NULL, - NULL, - NULL, - &debug_input_flush_fn, - NULL + "flush", + NULL, + NULL, + NULL, + &debug_input_flush_fn, + NULL }; struct debug_view debug_sprintf_view = { "sprintf", NULL, &debug_dflt_header_fn, - (debug_format_proc_t*)&debug_sprintf_format_fn, + &debug_sprintf_format_fn, NULL, NULL }; @@ -164,20 +151,19 @@ static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION; /* static globals */ -static debug_info_t *debug_area_first = NULL; -static debug_info_t *debug_area_last = NULL; +static debug_info_t *debug_area_first; +static debug_info_t *debug_area_last; static DEFINE_MUTEX(debug_mutex); static int initialized; static int debug_critical; static const struct file_operations debug_file_ops = { - .owner = THIS_MODULE, - .read = debug_output, - .write = debug_input, - .open = debug_open, + .owner = THIS_MODULE, + .read = debug_output, + .write = debug_input, + .open = debug_open, .release = debug_close, - .llseek = no_llseek, }; static struct dentry *debug_debugfs_root_entry; @@ -190,29 +176,26 @@ static struct dentry *debug_debugfs_root_entry; * areas[areanumber][pagenumber][pageoffset] */ -static debug_entry_t*** -debug_areas_alloc(int pages_per_area, int nr_areas) +static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) { - debug_entry_t*** areas; - int i,j; + debug_entry_t ***areas; + int i, j; - areas = kmalloc(nr_areas * - sizeof(debug_entry_t**), - GFP_KERNEL); + areas = kmalloc_array(nr_areas, sizeof(debug_entry_t **), GFP_KERNEL); if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { - areas[i] = kmalloc(pages_per_area * - sizeof(debug_entry_t*),GFP_KERNEL); - if (!areas[i]) { + /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */ + areas[i] = kmalloc_array(pages_per_area, + sizeof(debug_entry_t *), + GFP_KERNEL | __GFP_NOWARN); + if (!areas[i]) goto fail_malloc_areas2; - } - for(j = 0; j < pages_per_area; j++) { + for (j = 0; j < pages_per_area; j++) { areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL); - if(!areas[i][j]) { - for(j--; j >=0 ; j--) { + if (!areas[i][j]) { + for (j--; j >= 0 ; j--) kfree(areas[i][j]); - } kfree(areas[i]); goto fail_malloc_areas2; } @@ -221,63 +204,56 @@ debug_areas_alloc(int pages_per_area, int nr_areas) return areas; fail_malloc_areas2: - for(i--; i >= 0; i--){ - for(j=0; j < pages_per_area;j++){ + for (i--; i >= 0; i--) { + for (j = 0; j < pages_per_area; j++) kfree(areas[i][j]); - } kfree(areas[i]); } kfree(areas); fail_malloc_areas: return NULL; - } - /* * debug_info_alloc * - alloc new debug-info */ - -static debug_info_t* -debug_info_alloc(const char *name, int pages_per_area, int nr_areas, - int buf_size, int level, int mode) +static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, + int nr_areas, int buf_size, int level, + int mode) { - debug_info_t* rc; + debug_info_t *rc; /* alloc everything */ - rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL); - if(!rc) + if (!rc) goto fail_malloc_rc; rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); - if(!rc->active_entries) + if (!rc->active_entries) goto fail_malloc_active_entries; rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); - if(!rc->active_pages) + if (!rc->active_pages) goto fail_malloc_active_pages; - if((mode == ALL_AREAS) && (pages_per_area != 0)){ + if ((mode == ALL_AREAS) && (pages_per_area != 0)) { rc->areas = debug_areas_alloc(pages_per_area, nr_areas); - if(!rc->areas) + if (!rc->areas) goto fail_malloc_areas; } else { rc->areas = NULL; } /* initialize members */ - spin_lock_init(&rc->lock); rc->pages_per_area = pages_per_area; - rc->nr_areas = nr_areas; + rc->nr_areas = nr_areas; rc->active_area = 0; - rc->level = level; - rc->buf_size = buf_size; - rc->entry_size = sizeof(debug_entry_t) + buf_size; - strlcpy(rc->name, name, sizeof(rc->name)); + rc->level = level; + rc->buf_size = buf_size; + rc->entry_size = sizeof(debug_entry_t) + buf_size; + strscpy(rc->name, name); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); - memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS * - sizeof(struct dentry*)); - atomic_set(&(rc->ref_count), 0); + memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); + refcount_set(&(rc->ref_count), 0); return rc; @@ -295,18 +271,15 @@ fail_malloc_rc: * debug_areas_free * - free all debug areas */ - -static void -debug_areas_free(debug_info_t* db_info) +static void debug_areas_free(debug_info_t *db_info) { - int i,j; + int i, j; - if(!db_info->areas) + if (!db_info->areas) return; for (i = 0; i < db_info->nr_areas; i++) { - for(j = 0; j < db_info->pages_per_area; j++) { + for (j = 0; j < db_info->pages_per_area; j++) kfree(db_info->areas[i][j]); - } kfree(db_info->areas[i]); } kfree(db_info->areas); @@ -317,9 +290,8 @@ debug_areas_free(debug_info_t* db_info) * debug_info_free * - free memory debug-info */ - -static void -debug_info_free(debug_info_t* db_info){ +static void debug_info_free(debug_info_t *db_info) +{ debug_areas_free(db_info); kfree(db_info->active_entries); kfree(db_info->active_pages); @@ -331,37 +303,18 @@ debug_info_free(debug_info_t* db_info){ * - create new debug-info */ -static debug_info_t* -debug_info_create(const char *name, int pages_per_area, int nr_areas, - int buf_size, umode_t mode) +static debug_info_t *debug_info_create(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode) { - debug_info_t* rc; + debug_info_t *rc; - rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size, - DEBUG_DEFAULT_LEVEL, ALL_AREAS); - if(!rc) + rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size, + DEBUG_DEFAULT_LEVEL, ALL_AREAS); + if (!rc) goto out; rc->mode = mode & ~S_IFMT; - - /* create root directory */ - rc->debugfs_root_entry = debugfs_create_dir(rc->name, - debug_debugfs_root_entry); - - /* append new element to linked list */ - if (!debug_area_first) { - /* first element in list */ - debug_area_first = rc; - rc->prev = NULL; - } else { - /* append element to end of list */ - debug_area_last->next = rc; - rc->prev = debug_area_last; - } - debug_area_last = rc; - rc->next = NULL; - - debug_info_get(rc); + refcount_set(&rc->ref_count, 1); out: return rc; } @@ -370,24 +323,22 @@ out: * debug_info_copy * - copy debug-info */ - -static debug_info_t* -debug_info_copy(debug_info_t* in, int mode) +static debug_info_t *debug_info_copy(debug_info_t *in, int mode) { - int i,j; - debug_info_t* rc; - unsigned long flags; + unsigned long flags; + debug_info_t *rc; + int i, j; /* get a consistent copy of the debug areas */ do { rc = debug_info_alloc(in->name, in->pages_per_area, in->nr_areas, in->buf_size, in->level, mode); spin_lock_irqsave(&in->lock, flags); - if(!rc) + if (!rc) goto out; /* has something changed in the meantime ? */ - if((rc->pages_per_area == in->pages_per_area) && - (rc->nr_areas == in->nr_areas)) { + if ((rc->pages_per_area == in->pages_per_area) && + (rc->nr_areas == in->nr_areas)) { break; } spin_unlock_irqrestore(&in->lock, flags); @@ -395,155 +346,222 @@ debug_info_copy(debug_info_t* in, int mode) } while (1); if (mode == NO_AREAS) - goto out; + goto out; - for(i = 0; i < in->nr_areas; i++){ - for(j = 0; j < in->pages_per_area; j++) { - memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE); - } - } + for (i = 0; i < in->nr_areas; i++) { + for (j = 0; j < in->pages_per_area; j++) + memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + rc->active_pages[i] = in->active_pages[i]; + rc->active_entries[i] = in->active_entries[i]; + } + rc->active_area = in->active_area; out: - spin_unlock_irqrestore(&in->lock, flags); - return rc; + spin_unlock_irqrestore(&in->lock, flags); + return rc; } /* * debug_info_get * - increments reference count for debug-info */ - -static void -debug_info_get(debug_info_t * db_info) +static void debug_info_get(debug_info_t *db_info) { if (db_info) - atomic_inc(&db_info->ref_count); + refcount_inc(&db_info->ref_count); } /* * debug_info_put: * - decreases reference count for debug-info and frees it if necessary */ - -static void -debug_info_put(debug_info_t *db_info) +static void debug_info_put(debug_info_t *db_info) { - int i; - if (!db_info) return; - if (atomic_dec_and_test(&db_info->ref_count)) { - for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (!db_info->views[i]) - continue; - debugfs_remove(db_info->debugfs_entries[i]); - } - debugfs_remove(db_info->debugfs_root_entry); - if(db_info == debug_area_first) - debug_area_first = db_info->next; - if(db_info == debug_area_last) - debug_area_last = db_info->prev; - if(db_info->prev) db_info->prev->next = db_info->next; - if(db_info->next) db_info->next->prev = db_info->prev; + if (refcount_dec_and_test(&db_info->ref_count)) debug_info_free(db_info); - } } /* * debug_format_entry: - * - format one debug entry and return size of formated data + * - format one debug entry and return size of formatted data */ - -static int -debug_format_entry(file_private_info_t *p_info) +static int debug_format_entry(file_private_info_t *p_info) { - debug_info_t *id_snap = p_info->debug_info_snap; + debug_info_t *id_snap = p_info->debug_info_snap; struct debug_view *view = p_info->view; debug_entry_t *act_entry; size_t len = 0; - if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ + + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { /* print prolog */ - if (view->prolog_proc) - len += view->prolog_proc(id_snap,view,p_info->temp_buf); + if (view->prolog_proc) { + len += view->prolog_proc(id_snap, view, p_info->temp_buf, + sizeof(p_info->temp_buf)); + } goto out; } if (!id_snap->areas) /* this is true, if we have a prolog only view */ goto out; /* or if 'pages_per_area' is 0 */ - act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area] - [p_info->act_page] + p_info->act_entry); - - if (act_entry->id.stck == 0LL) - goto out; /* empty entry */ - if (view->header_proc) + act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area] + [p_info->act_page] + p_info->act_entry); + + if (act_entry->clock == 0LL) + goto out; /* empty entry */ + if (view->header_proc) { len += view->header_proc(id_snap, view, p_info->act_area, - act_entry, p_info->temp_buf + len); - if (view->format_proc) + act_entry, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len); + } + if (view->format_proc) { len += view->format_proc(id_snap, view, p_info->temp_buf + len, - DEBUG_DATA(act_entry)); + sizeof(p_info->temp_buf) - len, + DEBUG_DATA(act_entry)); + } out: - return len; + return len; } -/* - * debug_next_entry: - * - goto next entry in p_info +/** + * debug_next_entry - Go to the next entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the next entry. If no further entry + * exists the current position is set to one after the end the return value + * indicates that no further entries exist. + * + * Return: True if there are more following entries, false otherwise */ - -static inline int -debug_next_entry(file_private_info_t *p_info) +static inline bool debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; id = p_info->debug_info_snap; - if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; - goto out; + return true; } - if(!id->areas) - return 1; + if (!id->areas) + return false; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ - if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){ + if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { /* next page */ p_info->act_entry = 0; p_info->act_page += 1; - if((p_info->act_page % id->pages_per_area) == 0) { + if ((p_info->act_page % id->pages_per_area) == 0) { /* next area */ - p_info->act_area++; - p_info->act_page=0; + p_info->act_area++; + p_info->act_page = 0; } - if(p_info->act_area >= id->nr_areas) - return 1; + if (p_info->act_area >= id->nr_areas) + return false; } -out: - return 0; + return true; +} + +/** + * debug_to_act_entry - Go to the currently active entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the currently active + * entry of @p_info->debug_info_snap + */ +static void debug_to_act_entry(file_private_info_t *p_info) +{ + debug_info_t *snap_id; + + snap_id = p_info->debug_info_snap; + p_info->act_area = snap_id->active_area; + p_info->act_page = snap_id->active_pages[snap_id->active_area]; + p_info->act_entry = snap_id->active_entries[snap_id->active_area]; +} + +/** + * debug_prev_entry - Go to the previous entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the previous entry. If no previous entry + * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value + * indicates that no previous entries exist. + * + * Return: True if there are more previous entries, false otherwise + */ + +static inline bool debug_prev_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) + debug_to_act_entry(p_info); + if (!id->areas) + return false; + p_info->act_entry -= id->entry_size; + /* switch to prev page, if we reached the beginning of the page */ + if (p_info->act_entry < 0) { + /* end of previous page */ + p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size; + p_info->act_page--; + if (p_info->act_page < 0) { + /* previous area */ + p_info->act_area--; + p_info->act_page = id->pages_per_area - 1; + } + if (p_info->act_area < 0) + p_info->act_area = (id->nr_areas - 1) % id->nr_areas; + } + /* check full circle */ + if (id->active_area == p_info->act_area && + id->active_pages[id->active_area] == p_info->act_page && + id->active_entries[id->active_area] == p_info->act_entry) + return false; + return true; +} + +/** + * debug_move_entry - Go to next entry in either the forward or backward direction + * @p_info: Private info that is manipulated + * @reverse: If true go to the next entry in reverse i.e. previous + * + * Sets the current position in @p_info to the next (@reverse == false) or + * previous (@reverse == true) entry. + * + * Return: True if there are further entries in that direction, + * false otherwise. + */ +static bool debug_move_entry(file_private_info_t *p_info, bool reverse) +{ + if (reverse) + return debug_prev_entry(p_info); + else + return debug_next_entry(p_info); } /* * debug_output: * - called for user read() - * - copies formated debug entries to the user buffer + * - copies formatted debug entries to the user buffer */ - -static ssize_t -debug_output(struct file *file, /* file descriptor */ - char __user *user_buf, /* user buffer */ - size_t len, /* length of buffer */ - loff_t *offset) /* offset in the file */ +static ssize_t debug_output(struct file *file, /* file descriptor */ + char __user *user_buf, /* user buffer */ + size_t len, /* length of buffer */ + loff_t *offset) /* offset in the file */ { size_t count = 0; size_t entry_offset; file_private_info_t *p_info; - p_info = ((file_private_info_t *) file->private_data); - if (*offset != p_info->offset) + p_info = (file_private_info_t *) file->private_data; + if (*offset != p_info->offset) return -EPIPE; - if(p_info->act_area >= p_info->debug_info_snap->nr_areas) + if (p_info->act_area >= p_info->debug_info_snap->nr_areas) return 0; entry_offset = p_info->act_entry_offset; - while(count < len){ - int formatted_line_size; + while (count < len) { int formatted_line_residue; + int formatted_line_size; int user_buf_residue; size_t copy_size; @@ -551,21 +569,21 @@ debug_output(struct file *file, /* file descriptor */ formatted_line_residue = formatted_line_size - entry_offset; user_buf_residue = len-count; copy_size = min(user_buf_residue, formatted_line_residue); - if(copy_size){ + if (copy_size) { if (copy_to_user(user_buf + count, p_info->temp_buf - + entry_offset, copy_size)) + + entry_offset, copy_size)) return -EFAULT; count += copy_size; entry_offset += copy_size; } - if(copy_size == formatted_line_residue){ + if (copy_size == formatted_line_residue) { entry_offset = 0; - if(debug_next_entry(p_info)) + if (!debug_next_entry(p_info)) goto out; } } out: - p_info->offset = *offset + count; + p_info->offset = *offset + count; p_info->act_entry_offset = entry_offset; *offset = p_info->offset; return count; @@ -576,39 +594,72 @@ out: * - called for user write() * - calls input function of view */ - -static ssize_t -debug_input(struct file *file, const char __user *user_buf, size_t length, - loff_t *offset) +static ssize_t debug_input(struct file *file, const char __user *user_buf, + size_t length, loff_t *offset) { - int rc = 0; file_private_info_t *p_info; + int rc = 0; mutex_lock(&debug_mutex); p_info = ((file_private_info_t *) file->private_data); - if (p_info->view->input_proc) + if (p_info->view->input_proc) { rc = p_info->view->input_proc(p_info->debug_info_org, p_info->view, file, user_buf, length, offset); - else + } else { rc = -EPERM; + } mutex_unlock(&debug_mutex); - return rc; /* number of input characters */ + return rc; /* number of input characters */ +} + +static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info, + struct debug_view *view) +{ + debug_info_t *debug_info_snapshot; + file_private_info_t *p_info; + + /* + * Make snapshot of current debug areas to get it consistent. + * To copy all the areas is only needed, if we have a view which + * formats the debug areas. + */ + if (!view->format_proc && !view->header_proc) + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + else + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + + if (!debug_info_snapshot) + return NULL; + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { + debug_info_free(debug_info_snapshot); + return NULL; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = view; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + debug_info_get(debug_info); + + return p_info; } /* * debug_open: * - called for user open() - * - copies formated output to private_data area of the file + * - copies formatted output to private_data area of the file * handle */ - -static int -debug_open(struct inode *inode, struct file *file) +static int debug_open(struct inode *inode, struct file *file) { - int i, rc = 0; + debug_info_t *debug_info; file_private_info_t *p_info; - debug_info_t *debug_info, *debug_info_snapshot; + int i, rc = 0; mutex_lock(&debug_mutex); debug_info = file_inode(file)->i_private; @@ -616,80 +667,140 @@ debug_open(struct inode *inode, struct file *file) for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) continue; - else if (debug_info->debugfs_entries[i] == - file->f_path.dentry) { - goto found; /* found view ! */ - } + else if (debug_info->debugfs_entries[i] == file->f_path.dentry) + goto found; /* found view ! */ } /* no entry found */ rc = -EINVAL; goto out; found: - - /* Make snapshot of current debug areas to get it consistent. */ - /* To copy all the areas is only needed, if we have a view which */ - /* formats the debug areas. */ - - if(!debug_info->views[i]->format_proc && - !debug_info->views[i]->header_proc){ - debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - } else { - debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - } - - if(!debug_info_snapshot){ - rc = -ENOMEM; - goto out; - } - p_info = kmalloc(sizeof(file_private_info_t), - GFP_KERNEL); - if(!p_info){ - debug_info_free(debug_info_snapshot); + p_info = debug_file_private_alloc(debug_info, debug_info->views[i]); + if (!p_info) { rc = -ENOMEM; goto out; } - p_info->offset = 0; - p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; - p_info->view = debug_info->views[i]; - p_info->act_area = 0; - p_info->act_page = 0; - p_info->act_entry = DEBUG_PROLOG_ENTRY; - p_info->act_entry_offset = 0; file->private_data = p_info; - debug_info_get(debug_info); nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; } +static void debug_file_private_free(file_private_info_t *p_info) +{ + if (p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(p_info); +} + /* * debug_close: * - called for user close() * - deletes private_data area of the file handle */ - -static int -debug_close(struct inode *inode, struct file *file) +static int debug_close(struct inode *inode, struct file *file) { file_private_info_t *p_info; + p_info = (file_private_info_t *) file->private_data; - if(p_info->debug_info_snap) - debug_info_free(p_info->debug_info_snap); - debug_info_put(p_info->debug_info_org); - kfree(file->private_data); - return 0; /* success */ + debug_file_private_free(p_info); + file->private_data = NULL; + return 0; /* success */ } -/* - * debug_register_mode: - * - Creates and initializes debug area for the caller - * The mode parameter allows to specify access rights for the s390dbf files - * - Returns handle for debug area +/** + * debug_dump - Get a textual representation of debug info, or as much as fits + * @id: Debug information to use + * @view: View with which to dump the debug information + * @buf: Buffer the textual debug data representation is written to + * @buf_size: Size of the buffer, including the trailing '\0' byte + * @reverse: Go backwards from the last written entry + * + * This function may be used whenever a textual representation of the debug + * information is required without using an s390dbf file. + * + * Note: It is the callers responsibility to supply a view that is compatible + * with the debug information data. + * + * Return: On success returns the number of bytes written to the buffer not + * including the trailing '\0' byte. If bug_size == 0 the function returns 0. + * On failure an error code less than 0 is returned. */ +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size, bool reverse) +{ + file_private_info_t *p_info; + size_t size, offset = 0; + + /* Need space for '\0' byte */ + if (buf_size < 1) + return 0; + buf_size--; + + p_info = debug_file_private_alloc(id, view); + if (!p_info) + return -ENOMEM; + + /* There is always at least the DEBUG_PROLOG_ENTRY */ + do { + size = debug_format_entry(p_info); + size = min(size, buf_size - offset); + memcpy(buf + offset, p_info->temp_buf, size); + offset += size; + if (offset >= buf_size) + break; + } while (debug_move_entry(p_info, reverse)); + debug_file_private_free(p_info); + buf[offset] = '\0'; + + return offset; +} +/* Create debugfs entries and add to internal list. */ +static void _debug_register(debug_info_t *id) +{ + /* create root directory */ + id->debugfs_root_entry = debugfs_create_dir(id->name, + debug_debugfs_root_entry); + + /* append new element to linked list */ + if (!debug_area_first) { + /* first element in list */ + debug_area_first = id; + id->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = id; + id->prev = debug_area_last; + } + debug_area_last = id; + id->next = NULL; + + debug_register_view(id, &debug_level_view); + debug_register_view(id, &debug_flush_view); + debug_register_view(id, &debug_pages_view); +} + +/** + * debug_register_mode() - creates and initializes debug area. + * + * @name: Name of debug log (e.g. used for debugfs entry) + * @pages_per_area: Number of pages, which will be allocated per area + * @nr_areas: Number of debug areas + * @buf_size: Size of data area in each debug entry + * @mode: File mode for debugfs files. E.g. S_IRWXUGO + * @uid: User ID for debugfs files. Currently only 0 is supported. + * @gid: Group ID for debugfs files. Currently only 0 is supported. + * + * Return: + * - Handle for generated debug area + * - %NULL if register failed + * + * Allocates memory for a debug log. + * Must not be called within an interrupt handler. + */ debug_info_t *debug_register_mode(const char *name, int pages_per_area, int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid) @@ -699,34 +810,38 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, /* Since debugfs currently does not support uid/gid other than root, */ /* we do not allow gid/uid != 0 until we get support for that. */ if ((uid != 0) || (gid != 0)) - pr_warning("Root becomes the owner of all s390dbf files " - "in sysfs\n"); + pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); - mutex_lock(&debug_mutex); - - /* create new debug_info */ + /* create new debug_info */ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); - if(!rc) - goto out; - debug_register_view(rc, &debug_level_view); - debug_register_view(rc, &debug_flush_view); - debug_register_view(rc, &debug_pages_view); -out: - if (!rc){ + if (rc) { + mutex_lock(&debug_mutex); + _debug_register(rc); + mutex_unlock(&debug_mutex); + } else { pr_err("Registering debug feature %s failed\n", name); - } - mutex_unlock(&debug_mutex); + } return rc; } EXPORT_SYMBOL(debug_register_mode); -/* - * debug_register: - * - creates and initializes debug area for the caller - * - returns handle for debug area +/** + * debug_register() - creates and initializes debug area with default file mode. + * + * @name: Name of debug log (e.g. used for debugfs entry) + * @pages_per_area: Number of pages, which will be allocated per area + * @nr_areas: Number of debug areas + * @buf_size: Size of data area in each debug entry + * + * Return: + * - Handle for generated debug area + * - %NULL if register failed + * + * Allocates memory for a debug log. + * The debugfs file mode access permissions are read and write for user. + * Must not be called within an interrupt handler. */ - debug_info_t *debug_register(const char *name, int pages_per_area, int nr_areas, int buf_size) { @@ -735,22 +850,99 @@ debug_info_t *debug_register(const char *name, int pages_per_area, } EXPORT_SYMBOL(debug_register); -/* - * debug_unregister: - * - give back debug area +/** + * debug_register_static() - registers a static debug area + * + * @id: Handle for static debug area + * @pages_per_area: Number of pages per area + * @nr_areas: Number of debug areas + * + * Register debug_info_t defined using DEFINE_STATIC_DEBUG_INFO. + * + * Note: This function is called automatically via an initcall generated by + * DEFINE_STATIC_DEBUG_INFO. */ +void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas) +{ + unsigned long flags; + debug_info_t *copy; + + if (!initialized) { + pr_err("Tried to register debug feature %s too early\n", + id->name); + return; + } + + copy = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size, + id->level, ALL_AREAS); + if (!copy) { + pr_err("Registering debug feature %s failed\n", id->name); + + /* Clear pointers to prevent tracing into released initdata. */ + spin_lock_irqsave(&id->lock, flags); + id->areas = NULL; + id->active_pages = NULL; + id->active_entries = NULL; + spin_unlock_irqrestore(&id->lock, flags); + + return; + } + + /* Replace static trace area with dynamic copy. */ + spin_lock_irqsave(&id->lock, flags); + debug_events_append(copy, id); + debug_areas_swap(id, copy); + spin_unlock_irqrestore(&id->lock, flags); + + /* Clear pointers to initdata and discard copy. */ + copy->areas = NULL; + copy->active_pages = NULL; + copy->active_entries = NULL; + debug_info_free(copy); + + mutex_lock(&debug_mutex); + _debug_register(id); + mutex_unlock(&debug_mutex); +} -void -debug_unregister(debug_info_t * id) +/* Remove debugfs entries and remove from internal list. */ +static void _debug_unregister(debug_info_t *id) +{ + int i; + + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!id->views[i]) + continue; + debugfs_remove(id->debugfs_entries[i]); + } + debugfs_remove(id->debugfs_root_entry); + if (id == debug_area_first) + debug_area_first = id->next; + if (id == debug_area_last) + debug_area_last = id->prev; + if (id->prev) + id->prev->next = id->next; + if (id->next) + id->next->prev = id->prev; +} + +/** + * debug_unregister() - give back debug area. + * + * @id: handle for debug log + * + * Return: + * none + */ +void debug_unregister(debug_info_t *id) { if (!id) - goto out; + return; mutex_lock(&debug_mutex); - debug_info_put(id); + _debug_unregister(id); mutex_unlock(&debug_mutex); -out: - return; + debug_info_put(id); } EXPORT_SYMBOL(debug_unregister); @@ -758,62 +950,59 @@ EXPORT_SYMBOL(debug_unregister); * debug_set_size: * - set area size (number of pages) and number of areas */ -static int -debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) +static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area) { + debug_info_t *new_id; unsigned long flags; - debug_entry_t *** new_areas; - int rc=0; - if(!id || (nr_areas <= 0) || (pages_per_area < 0)) + if (!id || (nr_areas <= 0) || (pages_per_area < 0)) return -EINVAL; - if(pages_per_area > 0){ - new_areas = debug_areas_alloc(pages_per_area, nr_areas); - if(!new_areas) { - pr_info("Allocating memory for %i pages failed\n", - pages_per_area); - rc = -ENOMEM; - goto out; - } - } else { - new_areas = NULL; + + new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size, + id->level, ALL_AREAS); + if (!new_id) { + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); + return -ENOMEM; } - spin_lock_irqsave(&id->lock,flags); - debug_areas_free(id); - id->areas = new_areas; - id->nr_areas = nr_areas; - id->pages_per_area = pages_per_area; - id->active_area = 0; - memset(id->active_entries,0,sizeof(int)*id->nr_areas); - memset(id->active_pages, 0, sizeof(int)*id->nr_areas); - spin_unlock_irqrestore(&id->lock,flags); - pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area); -out: - return rc; + + spin_lock_irqsave(&id->lock, flags); + debug_events_append(new_id, id); + debug_areas_swap(new_id, id); + debug_info_free(new_id); + spin_unlock_irqrestore(&id->lock, flags); + pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area); + + return 0; } -/* - * debug_set_level: - * - set actual debug level +/** + * debug_set_level() - Sets new actual debug level if new_level is valid. + * + * @id: handle for debug log + * @new_level: new debug level + * + * Return: + * none */ - -void -debug_set_level(debug_info_t* id, int new_level) +void debug_set_level(debug_info_t *id, int new_level) { unsigned long flags; - if(!id) - return; - spin_lock_irqsave(&id->lock,flags); - if(new_level == DEBUG_OFF_LEVEL){ - id->level = DEBUG_OFF_LEVEL; - pr_info("%s: switched off\n",id->name); - } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { + + if (!id) + return; + + if (new_level == DEBUG_OFF_LEVEL) { + pr_info("%s: switched off\n", id->name); + } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { pr_info("%s: level %i is out of range (%i - %i)\n", - id->name, new_level, 0, DEBUG_MAX_LEVEL); - } else { - id->level = new_level; - } - spin_unlock_irqrestore(&id->lock,flags); + id->name, new_level, 0, DEBUG_MAX_LEVEL); + return; + } + + spin_lock_irqsave(&id->lock, flags); + id->level = new_level; + spin_unlock_irqrestore(&id->lock, flags); } EXPORT_SYMBOL(debug_set_level); @@ -821,12 +1010,10 @@ EXPORT_SYMBOL(debug_set_level); * proceed_active_entry: * - set active entry to next in the ring buffer */ - -static inline void -proceed_active_entry(debug_info_t * id) +static inline void proceed_active_entry(debug_info_t *id) { if ((id->active_entries[id->active_area] += id->entry_size) - > (PAGE_SIZE - id->entry_size)){ + > (PAGE_SIZE - id->entry_size)) { id->active_entries[id->active_area] = 0; id->active_pages[id->active_area] = (id->active_pages[id->active_area] + 1) % @@ -838,9 +1025,7 @@ proceed_active_entry(debug_info_t * id) * proceed_active_area: * - set active area to next in the ring buffer */ - -static inline void -proceed_active_area(debug_info_t * id) +static inline void proceed_active_area(debug_info_t *id) { id->active_area++; id->active_area = id->active_area % id->nr_areas; @@ -849,13 +1034,47 @@ proceed_active_area(debug_info_t * id) /* * get_active_entry: */ - -static inline debug_entry_t* -get_active_entry(debug_info_t * id) +static inline debug_entry_t *get_active_entry(debug_info_t *id) { return (debug_entry_t *) (((char *) id->areas[id->active_area] - [id->active_pages[id->active_area]]) + - id->active_entries[id->active_area]); + [id->active_pages[id->active_area]]) + + id->active_entries[id->active_area]); +} + +/* Swap debug areas of a and b. */ +static void debug_areas_swap(debug_info_t *a, debug_info_t *b) +{ + swap(a->nr_areas, b->nr_areas); + swap(a->pages_per_area, b->pages_per_area); + swap(a->areas, b->areas); + swap(a->active_area, b->active_area); + swap(a->active_pages, b->active_pages); + swap(a->active_entries, b->active_entries); +} + +/* Append all debug events in active area from source to destination log. */ +static void debug_events_append(debug_info_t *dest, debug_info_t *src) +{ + debug_entry_t *from, *to, *last; + + if (!src->areas || !dest->areas) + return; + + /* Loop over all entries in src, starting with oldest. */ + from = get_active_entry(src); + last = from; + do { + if (from->clock != 0LL) { + to = get_active_entry(dest); + memset(to, 0, dest->entry_size); + memcpy(to, from, min(src->entry_size, + dest->entry_size)); + proceed_active_entry(dest); + } + + proceed_active_entry(src); + from = get_active_entry(src); + } while (from != last); } /* @@ -863,22 +1082,27 @@ get_active_entry(debug_info_t * id) * - set timestamp, caller address, cpu number etc. */ -static inline void -debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, - int exception) +static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, + int level, int exception) { - active->id.stck = get_tod_clock(); - active->id.fields.cpuid = smp_processor_id(); + unsigned long timestamp; + union tod_clock clk; + + store_tod_clock_ext(&clk); + timestamp = clk.us; + timestamp -= TOD_UNIX_EPOCH >> 12; + active->clock = timestamp; + active->cpu = smp_processor_id(); active->caller = __builtin_return_address(0); - active->id.fields.exception = exception; - active->id.fields.level = level; + active->exception = exception; + active->level = level; proceed_active_entry(id); - if(exception) + if (exception) proceed_active_area(id); } -static int debug_stoppable=1; -static int debug_active=1; +static int debug_stoppable = 1; +static int debug_active = 1; #define CTL_S390DBF_STOPPABLE 5678 #define CTL_S390DBF_ACTIVE 5679 @@ -888,9 +1112,8 @@ static int debug_active=1; * always allow read, allow write only if debug_stoppable is set or * if debug_active is already off */ -static int -s390dbf_procactive(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int s390dbf_procactive(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) return proc_dointvec(table, write, buffer, lenp, ppos); @@ -898,45 +1121,51 @@ s390dbf_procactive(ctl_table *table, int write, return 0; } - -static struct ctl_table s390dbf_table[] = { +static const struct ctl_table s390dbf_table[] = { { - .procname = "debug_stoppable", + .procname = "debug_stoppable", .data = &debug_stoppable, .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, }, - { - .procname = "debug_active", + { + .procname = "debug_active", .data = &debug_active, .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = s390dbf_procactive, - }, - { } -}; - -static struct ctl_table s390dbf_dir_table[] = { - { - .procname = "s390dbf", - .maxlen = 0, - .mode = S_IRUGO | S_IXUGO, - .child = s390dbf_table, + .mode = S_IRUGO | S_IWUSR, + .proc_handler = s390dbf_procactive, }, - { } }; static struct ctl_table_header *s390dbf_sysctl_header; -void -debug_stop_all(void) +/** + * debug_stop_all() - stops the debug feature if stopping is allowed. + * + * Return: + * - none + * + * Currently used in case of a kernel oops. + */ +void debug_stop_all(void) { if (debug_stoppable) debug_active = 0; } EXPORT_SYMBOL(debug_stop_all); +/** + * debug_set_critical() - event/exception functions try lock instead of spin. + * + * Return: + * - none + * + * Currently used in case of stopping all CPUs but the current one. + * Once in this state, functions to write a debug entry for an + * event or exception no longer spin on the debug area lock, + * but only try to get it and fail if they do not get the lock. + */ void debug_set_critical(void) { debug_critical = 1; @@ -946,26 +1175,31 @@ void debug_set_critical(void) * debug_event_common: * - write debug entry with given size */ - -debug_entry_t* -debug_event_common(debug_info_t * id, int level, const void *buf, int len) +debug_entry_t *debug_event_common(debug_info_t *id, int level, const void *buf, + int len) { - unsigned long flags; debug_entry_t *active; + unsigned long flags; if (!debug_active || !id->areas) return NULL; if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); - active = get_active_entry(id); - memset(DEBUG_DATA(active), 0, id->buf_size); - memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); - debug_finish_entry(id, active, level, 0); - spin_unlock_irqrestore(&id->lock, flags); + } + do { + active = get_active_entry(id); + memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); + if (len < id->buf_size) + memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len); + debug_finish_entry(id, active, level, 0); + len -= id->buf_size; + buf += id->buf_size; + } while (len > 0); + spin_unlock_irqrestore(&id->lock, flags); return active; } EXPORT_SYMBOL(debug_event_common); @@ -974,26 +1208,31 @@ EXPORT_SYMBOL(debug_event_common); * debug_exception_common: * - write debug entry with given size and switch to next debug area */ - -debug_entry_t -*debug_exception_common(debug_info_t * id, int level, const void *buf, int len) +debug_entry_t *debug_exception_common(debug_info_t *id, int level, + const void *buf, int len) { - unsigned long flags; debug_entry_t *active; + unsigned long flags; if (!debug_active || !id->areas) return NULL; if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); - active = get_active_entry(id); - memset(DEBUG_DATA(active), 0, id->buf_size); - memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); - debug_finish_entry(id, active, level, 1); - spin_unlock_irqrestore(&id->lock, flags); + } + do { + active = get_active_entry(id); + memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); + if (len < id->buf_size) + memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len); + debug_finish_entry(id, active, level, len <= id->buf_size); + len -= id->buf_size; + buf += id->buf_size; + } while (len > 0); + spin_unlock_irqrestore(&id->lock, flags); return active; } EXPORT_SYMBOL(debug_exception_common); @@ -1001,108 +1240,106 @@ EXPORT_SYMBOL(debug_exception_common); /* * counts arguments in format string for sprintf view */ - -static inline int -debug_count_numargs(char *string) +static inline int debug_count_numargs(char *string) { - int numargs=0; + int numargs = 0; - while(*string) { - if(*string++=='%') + while (*string) { + if (*string++ == '%') numargs++; } - return(numargs); + return numargs; } /* * debug_sprintf_event: */ - -debug_entry_t* -debug_sprintf_event(debug_info_t* id, int level,char *string,...) +debug_entry_t *__debug_sprintf_event(debug_info_t *id, int level, char *string, ...) { - va_list ap; - int numargs,idx; - unsigned long flags; debug_sprintf_entry_t *curr_event; debug_entry_t *active; + unsigned long flags; + int numargs, idx; + va_list ap; - if((!id) || (level > id->level)) - return NULL; if (!debug_active || !id->areas) return NULL; - numargs=debug_count_numargs(string); + numargs = debug_count_numargs(string); if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); + } active = get_active_entry(id); - curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); - va_start(ap,string); - curr_event->string=string; - for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++) - curr_event->args[idx]=va_arg(ap,long); + curr_event = (debug_sprintf_entry_t *) DEBUG_DATA(active); + va_start(ap, string); + curr_event->string = string; + for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++) + curr_event->args[idx] = va_arg(ap, long); va_end(ap); debug_finish_entry(id, active, level, 0); spin_unlock_irqrestore(&id->lock, flags); return active; } -EXPORT_SYMBOL(debug_sprintf_event); +EXPORT_SYMBOL(__debug_sprintf_event); /* * debug_sprintf_exception: */ - -debug_entry_t* -debug_sprintf_exception(debug_info_t* id, int level,char *string,...) +debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) { - va_list ap; - int numargs,idx; - unsigned long flags; debug_sprintf_entry_t *curr_event; debug_entry_t *active; + unsigned long flags; + int numargs, idx; + va_list ap; - if((!id) || (level > id->level)) - return NULL; if (!debug_active || !id->areas) return NULL; - numargs=debug_count_numargs(string); + numargs = debug_count_numargs(string); if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); + } active = get_active_entry(id); - curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); - va_start(ap,string); - curr_event->string=string; - for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++) - curr_event->args[idx]=va_arg(ap,long); + curr_event = (debug_sprintf_entry_t *)DEBUG_DATA(active); + va_start(ap, string); + curr_event->string = string; + for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++) + curr_event->args[idx] = va_arg(ap, long); va_end(ap); debug_finish_entry(id, active, level, 1); spin_unlock_irqrestore(&id->lock, flags); return active; } -EXPORT_SYMBOL(debug_sprintf_exception); +EXPORT_SYMBOL(__debug_sprintf_exception); -/* - * debug_register_view: +/** + * debug_register_view() - registers new debug view and creates debugfs + * dir entry + * + * @id: handle for debug log + * @view: pointer to debug view struct + * + * Return: + * - 0 : ok + * - < 0: Error */ - -int -debug_register_view(debug_info_t * id, struct debug_view *view) +int debug_register_view(debug_info_t *id, struct debug_view *view) { - int rc = 0; - int i; unsigned long flags; - umode_t mode; struct dentry *pde; + umode_t mode; + int rc = 0; + int i; if (!id) goto out; @@ -1112,40 +1349,41 @@ debug_register_view(debug_info_t * id, struct debug_view *view) if (!view->input_proc) mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, - id , &debug_file_ops); - if (!pde){ - pr_err("Registering view %s/%s failed due to out of " - "memory\n", id->name,view->name); - rc = -1; - goto out; - } + id, &debug_file_ops); spin_lock_irqsave(&id->lock, flags); for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!id->views[i]) break; } if (i == DEBUG_MAX_VIEWS) { - pr_err("Registering view %s/%s would exceed the maximum " - "number of views %i\n", id->name, view->name, i); rc = -1; } else { id->views[i] = view; id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); - if (rc) + if (rc) { + pr_err("Registering view %s/%s would exceed the maximum " + "number of views %i\n", id->name, view->name, i); debugfs_remove(pde); + } out: return rc; } EXPORT_SYMBOL(debug_register_view); -/* - * debug_unregister_view: +/** + * debug_unregister_view() - unregisters debug view and removes debugfs + * dir entry + * + * @id: handle for debug log + * @view: pointer to debug view struct + * + * Return: + * - 0 : ok + * - < 0: Error */ - -int -debug_unregister_view(debug_info_t * id, struct debug_view *view) +int debug_unregister_view(debug_info_t *id, struct debug_view *view) { struct dentry *dentry = NULL; unsigned long flags; @@ -1158,9 +1396,9 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view) if (id->views[i] == view) break; } - if (i == DEBUG_MAX_VIEWS) + if (i == DEBUG_MAX_VIEWS) { rc = -1; - else { + } else { dentry = id->debugfs_entries[i]; id->views[i] = NULL; id->debugfs_entries[i] = NULL; @@ -1172,36 +1410,28 @@ out: } EXPORT_SYMBOL(debug_unregister_view); -static inline char * -debug_get_user_string(const char __user *user_buf, size_t user_len) +static inline char *debug_get_user_string(const char __user *user_buf, + size_t user_len) { - char* buffer; - - buffer = kmalloc(user_len + 1, GFP_KERNEL); - if (!buffer) - return ERR_PTR(-ENOMEM); - if (copy_from_user(buffer, user_buf, user_len) != 0) { - kfree(buffer); - return ERR_PTR(-EFAULT); - } + char *buffer; + + buffer = memdup_user_nul(user_buf, user_len); + if (IS_ERR(buffer)) + return buffer; /* got the string, now strip linefeed. */ if (buffer[user_len - 1] == '\n') buffer[user_len - 1] = 0; - else - buffer[user_len] = 0; - return buffer; + return buffer; } -static inline int -debug_get_uint(char *buf) +static inline int debug_get_uint(char *buf) { int rc; buf = skip_spaces(buf); rc = simple_strtoul(buf, &buf, 10); - if(*buf){ + if (*buf) rc = -EINVAL; - } return rc; } @@ -1214,43 +1444,41 @@ debug_get_uint(char *buf) * prints out actual debug level */ -static int -debug_prolog_pages_fn(debug_info_t * id, - struct debug_view *view, char *out_buf) +static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size) { - return sprintf(out_buf, "%i\n", id->pages_per_area); + return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area); } /* * reads new size (number of pages per debug area) */ -static int -debug_input_pages_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset) +static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t *offset) { + int rc, new_pages; char *str; - int rc,new_pages; if (user_len > 0x10000) - user_len = 0x10000; - if (*offset != 0){ + user_len = 0x10000; + if (*offset != 0) { rc = -EPIPE; goto out; } - str = debug_get_user_string(user_buf,user_len); - if(IS_ERR(str)){ + str = debug_get_user_string(user_buf, user_len); + if (IS_ERR(str)) { rc = PTR_ERR(str); goto out; } new_pages = debug_get_uint(str); - if(new_pages < 0){ + if (new_pages < 0) { rc = -EINVAL; goto free_str; } - rc = debug_set_size(id,id->nr_areas, new_pages); - if(rc != 0){ + rc = debug_set_size(id, id->nr_areas, new_pages); + if (rc != 0) { rc = -EINVAL; goto free_str; } @@ -1265,54 +1493,48 @@ out: /* * prints out actual debug level */ - -static int -debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf) +static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size) { int rc = 0; - if(id->level == DEBUG_OFF_LEVEL) { - rc = sprintf(out_buf,"-\n"); - } - else { - rc = sprintf(out_buf, "%i\n", id->level); - } + if (id->level == DEBUG_OFF_LEVEL) + rc = scnprintf(out_buf, out_buf_size, "-\n"); + else + rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level); return rc; } /* * reads new debug level */ - -static int -debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset) +static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t *offset) { + int rc, new_level; char *str; - int rc,new_level; if (user_len > 0x10000) - user_len = 0x10000; - if (*offset != 0){ + user_len = 0x10000; + if (*offset != 0) { rc = -EPIPE; goto out; } - str = debug_get_user_string(user_buf,user_len); - if(IS_ERR(str)){ + str = debug_get_user_string(user_buf, user_len); + if (IS_ERR(str)) { rc = PTR_ERR(str); goto out; } - if(str[0] == '-'){ + if (str[0] == '-') { debug_set_level(id, DEBUG_OFF_LEVEL); rc = user_len; goto free_str; } else { new_level = debug_get_uint(str); } - if(new_level < 0) { - pr_warning("%s is not a valid level for a debug " - "feature\n", str); + if (new_level < 0) { + pr_warn("%s is not a valid level for a debug feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); @@ -1325,208 +1547,173 @@ out: return rc; /* number of input characters */ } - /* * flushes debug areas */ - -static void debug_flush(debug_info_t* id, int area) +static void debug_flush(debug_info_t *id, int area) { - unsigned long flags; - int i,j; - - if(!id || !id->areas) - return; - spin_lock_irqsave(&id->lock,flags); - if(area == DEBUG_FLUSH_ALL){ - id->active_area = 0; - memset(id->active_entries, 0, id->nr_areas * sizeof(int)); - for (i = 0; i < id->nr_areas; i++) { + unsigned long flags; + int i, j; + + if (!id || !id->areas) + return; + spin_lock_irqsave(&id->lock, flags); + if (area == DEBUG_FLUSH_ALL) { + id->active_area = 0; + memset(id->active_entries, 0, id->nr_areas * sizeof(int)); + for (i = 0; i < id->nr_areas; i++) { id->active_pages[i] = 0; - for(j = 0; j < id->pages_per_area; j++) { - memset(id->areas[i][j], 0, PAGE_SIZE); - } + for (j = 0; j < id->pages_per_area; j++) + memset(id->areas[i][j], 0, PAGE_SIZE); } - } else if(area >= 0 && area < id->nr_areas) { - id->active_entries[area] = 0; + } else if (area >= 0 && area < id->nr_areas) { + id->active_entries[area] = 0; id->active_pages[area] = 0; - for(i = 0; i < id->pages_per_area; i++) { - memset(id->areas[area][i],0,PAGE_SIZE); - } - } - spin_unlock_irqrestore(&id->lock,flags); + for (i = 0; i < id->pages_per_area; i++) + memset(id->areas[area][i], 0, PAGE_SIZE); + } + spin_unlock_irqrestore(&id->lock, flags); } /* - * view function: flushes debug areas + * view function: flushes debug areas */ - -static int -debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset) +static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t *offset) { - char input_buf[1]; - int rc = user_len; + char input_buf[1]; + int rc = user_len; if (user_len > 0x10000) - user_len = 0x10000; - if (*offset != 0){ + user_len = 0x10000; + if (*offset != 0) { rc = -EPIPE; - goto out; + goto out; + } + if (copy_from_user(input_buf, user_buf, 1)) { + rc = -EFAULT; + goto out; + } + if (input_buf[0] == '-') { + debug_flush(id, DEBUG_FLUSH_ALL); + goto out; + } + if (isdigit(input_buf[0])) { + int area = ((int) input_buf[0] - (int) '0'); + + debug_flush(id, area); + goto out; } - if (copy_from_user(input_buf, user_buf, 1)){ - rc = -EFAULT; - goto out; - } - if(input_buf[0] == '-') { - debug_flush(id, DEBUG_FLUSH_ALL); - goto out; - } - if (isdigit(input_buf[0])) { - int area = ((int) input_buf[0] - (int) '0'); - debug_flush(id, area); - goto out; - } pr_info("Flushing debug data failed because %c is not a valid " "area\n", input_buf[0]); out: - *offset += user_len; - return rc; /* number of input characters */ -} - -/* - * prints debug header in raw format - */ - -static int -debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf) -{ - int rc; - - rc = sizeof(debug_entry_t); - memcpy(out_buf,entry,sizeof(debug_entry_t)); - return rc; -} - -/* - * prints debug data in raw format - */ - -static int -debug_raw_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf) -{ - int rc; - - rc = id->buf_size; - memcpy(out_buf, in_buf, id->buf_size); - return rc; + *offset += user_len; + return rc; /* number of input characters */ } /* * prints debug data in hex/ascii format */ - -static int -debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf) +static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *in_buf) { int i, rc = 0; for (i = 0; i < id->buf_size; i++) { - rc += sprintf(out_buf + rc, "%02x ", - ((unsigned char *) in_buf)[i]); - } - rc += sprintf(out_buf + rc, "| "); + rc += scnprintf(out_buf + rc, out_buf_size - rc, + "%02x ", ((unsigned char *)in_buf)[i]); + } + rc += scnprintf(out_buf + rc, out_buf_size - rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; + if (isascii(c) && isprint(c)) - rc += sprintf(out_buf + rc, "%c", c); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c); else - rc += sprintf(out_buf + rc, "."); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "."); } - rc += sprintf(out_buf + rc, "\n"); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n"); return rc; } /* * prints header for debug entry */ - -int -debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf) +int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) { - struct timespec time_spec; - char *except_str; + unsigned long sec, usec; unsigned long caller; - int rc = 0; unsigned int level; + char *except_str; + int rc = 0; - level = entry->id.fields.level; - stck_to_timespec(entry->id.stck, &time_spec); + level = entry->level; + sec = entry->clock; + usec = do_div(sec, USEC_PER_SEC); - if (entry->id.fields.exception) + if (entry->exception) except_str = "*"; else except_str = "-"; - caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN; - rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ", - area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level, - except_str, entry->id.fields.cpuid, (void *) caller); + caller = (unsigned long) entry->caller; + rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ", + area, sec, usec, level, except_str, + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); /* - * prints debug data sprintf-formated: - * debug_sprinf_event/exception calls must be used together with this view + * prints debug data sprintf-formatted: + * debug_sprintf_event/exception calls must be used together with this view */ #define DEBUG_SPRINTF_MAX_ARGS 10 -static int -debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event) +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *inbuf) { - int num_longs, num_used_args = 0,i, rc = 0; + debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; + int num_longs, num_used_args = 0, i, rc = 0; int index[DEBUG_SPRINTF_MAX_ARGS]; /* count of longs fit into one entry */ - num_longs = id->buf_size / sizeof(long); + num_longs = id->buf_size / sizeof(long); - if(num_longs < 1) + if (num_longs < 1) goto out; /* bufsize of entry too small */ - if(num_longs == 1) { + if (num_longs == 1) { /* no args, we use only the string */ - strcpy(out_buf, curr_event->string); - rc = strlen(curr_event->string); + rc = strscpy(out_buf, curr_event->string, out_buf_size); + if (rc == -E2BIG) + rc = out_buf_size; goto out; } /* number of arguments used for sprintf (without the format string) */ - num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1)); + num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1)); - memset(index,0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int)); + memset(index, 0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int)); - for(i = 0; i < num_used_args; i++) + for (i = 0; i < num_used_args; i++) index[i] = i; - rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], - curr_event->args[index[1]], curr_event->args[index[2]], - curr_event->args[index[3]], curr_event->args[index[4]], - curr_event->args[index[5]], curr_event->args[index[6]], - curr_event->args[index[7]], curr_event->args[index[8]], - curr_event->args[index[9]]); - + rc = scnprintf(out_buf, out_buf_size, + curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); out: - return rc; } +EXPORT_SYMBOL(debug_sprintf_format_fn); /* * debug_init: @@ -1534,7 +1721,7 @@ out: */ static int __init debug_init(void) { - s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); + s390dbf_sysctl_header = register_sysctl("s390dbf", s390dbf_table); mutex_lock(&debug_mutex); debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL); initialized = 1; diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c deleted file mode 100644 index 8237fc07ac79..000000000000 --- a/arch/s390/kernel/diag.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Implementation of s390 diagnose codes - * - * Copyright IBM Corp. 2007 - * Author(s): Michael Holzheu <holzheu@de.ibm.com> - */ - -#include <linux/module.h> -#include <asm/diag.h> - -/* - * Diagnose 14: Input spool file manipulation - */ -int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) -{ - register unsigned long _ry1 asm("2") = ry1; - register unsigned long _ry2 asm("3") = subcode; - int rc = 0; - - asm volatile( -#ifdef CONFIG_64BIT - " sam31\n" - " diag %2,2,0x14\n" - " sam64\n" -#else - " diag %2,2,0x14\n" -#endif - " ipm %0\n" - " srl %0,28\n" - : "=d" (rc), "+d" (_ry2) - : "d" (rx), "d" (_ry1) - : "cc"); - - return rc; -} -EXPORT_SYMBOL(diag14); - -/* - * Diagnose 210: Get information about a virtual device - */ -int diag210(struct diag210 *addr) -{ - /* - * diag 210 needs its data below the 2GB border, so we - * use a static data area to be sure - */ - static struct diag210 diag210_tmp; - static DEFINE_SPINLOCK(diag210_lock); - unsigned long flags; - int ccode; - - spin_lock_irqsave(&diag210_lock, flags); - diag210_tmp = *addr; - -#ifdef CONFIG_64BIT - asm volatile( - " lhi %0,-1\n" - " sam31\n" - " diag %1,0,0x210\n" - "0: ipm %0\n" - " srl %0,28\n" - "1: sam64\n" - EX_TABLE(0b, 1b) - : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); -#else - asm volatile( - " lhi %0,-1\n" - " diag %1,0,0x210\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b, 1b) - : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); -#endif - - *addr = diag210_tmp; - spin_unlock_irqrestore(&diag210_lock, flags); - - return ccode; -} -EXPORT_SYMBOL(diag210); diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile new file mode 100644 index 000000000000..956aee6c4090 --- /dev/null +++ b/arch/s390/kernel/diag/Makefile @@ -0,0 +1 @@ +obj-y := diag_misc.o diag324.o diag.o diag310.o diff --git a/arch/s390/kernel/diag/diag.c b/arch/s390/kernel/diag/diag.c new file mode 100644 index 000000000000..56b862ba9be8 --- /dev/null +++ b/arch/s390/kernel/diag/diag.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implementation of s390 diagnose codes + * + * Copyright IBM Corp. 2007 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/export.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/vmalloc.h> +#include <asm/asm-extable.h> +#include <asm/diag.h> +#include <asm/trace/diag.h> +#include <asm/sections.h> +#include <asm/asm.h> +#include "../entry.h" + +struct diag_stat { + unsigned int counter[NR_DIAG_STAT]; +}; + +static DEFINE_PER_CPU(struct diag_stat, diag_stat); + +struct diag_desc { + int code; + char *name; +}; + +static const struct diag_desc diag_map[NR_DIAG_STAT] = { + [DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" }, + [DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" }, + [DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" }, + [DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" }, + [DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" }, + [DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" }, + [DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" }, + [DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" }, + [DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" }, + [DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" }, + [DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" }, + [DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" }, + [DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" }, + [DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" }, + [DIAG_STAT_X26C] = { .code = 0x26c, .name = "Certain System Information" }, + [DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" }, + [DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" }, + [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, + [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, + [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" }, + [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, + [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, + [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, + [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" }, + [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, +}; + +struct diag_ops __amode31_ref diag_amode31_ops = { + .diag210 = _diag210_amode31, + .diag26c = _diag26c_amode31, + .diag14 = _diag14_amode31, + .diag0c = _diag0c_amode31, + .diag8c = _diag8c_amode31, + .diag308_reset = _diag308_reset_amode31 +}; + +static struct diag210 _diag210_tmp_amode31 __section(".amode31.data"); +struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31; + +static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data"); +static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31; + +static int show_diag_stat(struct seq_file *m, void *v) +{ + struct diag_stat *stat; + unsigned long n = (unsigned long) v - 1; + int cpu, prec, tmp; + + cpus_read_lock(); + if (n == 0) { + seq_puts(m, " "); + + for_each_online_cpu(cpu) { + prec = 10; + for (tmp = 10; cpu >= tmp; tmp *= 10) + prec--; + seq_printf(m, "%*s%d", prec, "CPU", cpu); + } + seq_putc(m, '\n'); + } else if (n <= NR_DIAG_STAT) { + seq_printf(m, "diag %03x:", diag_map[n-1].code); + for_each_online_cpu(cpu) { + stat = &per_cpu(diag_stat, cpu); + seq_printf(m, " %10u", stat->counter[n-1]); + } + seq_printf(m, " %s\n", diag_map[n-1].name); + } + cpus_read_unlock(); + return 0; +} + +static void *show_diag_stat_start(struct seq_file *m, loff_t *pos) +{ + return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL; +} + +static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return show_diag_stat_start(m, pos); +} + +static void show_diag_stat_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations show_diag_stat_sops = { + .start = show_diag_stat_start, + .next = show_diag_stat_next, + .stop = show_diag_stat_stop, + .show = show_diag_stat, +}; + +DEFINE_SEQ_ATTRIBUTE(show_diag_stat); + +static int __init show_diag_stat_init(void) +{ + debugfs_create_file("diag_stat", 0400, NULL, NULL, + &show_diag_stat_fops); + return 0; +} + +device_initcall(show_diag_stat_init); + +void diag_stat_inc(enum diag_stat_enum nr) +{ + this_cpu_inc(diag_stat.counter[nr]); + trace_s390_diagnose(diag_map[nr].code); +} +EXPORT_SYMBOL(diag_stat_inc); + +void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr) +{ + this_cpu_inc(diag_stat.counter[nr]); + trace_s390_diagnose_norecursion(diag_map[nr].code); +} +EXPORT_SYMBOL(diag_stat_inc_norecursion); + +/* + * Diagnose 0c: Pseudo Timer + */ +void diag0c(struct hypfs_diag0c_entry *data) +{ + diag_stat_inc(DIAG_STAT_X00C); + diag_amode31_ops.diag0c(virt_to_phys(data)); +} + +/* + * Diagnose 14: Input spool file manipulation + * + * The subcode parameter determines the type of the first parameter rx. + * Currently used are the following 3 subcommands: + * 0x0: Read the Next Spool File Buffer (Data Record) + * 0x28: Position a Spool File to the Designated Record + * 0xfff: Retrieve Next File Descriptor + * + * For subcommands 0x0 and 0xfff, the value of the first parameter is + * a virtual address of a memory buffer and needs virtual to physical + * address translation. For other subcommands the rx parameter is not + * a virtual address. + */ +int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) +{ + diag_stat_inc(DIAG_STAT_X014); + switch (subcode) { + case 0x0: + case 0xfff: + rx = virt_to_phys((void *)rx); + break; + default: + /* Do nothing */ + break; + } + return diag_amode31_ops.diag14(rx, ry1, subcode); +} +EXPORT_SYMBOL(diag14); + +#define DIAG204_BUSY_RC 8 + +static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) +{ + union register_pair rp = { .even = *subcode, .odd = size }; + + asm_inline volatile( + " diag %[addr],%[rp],0x204\n" + "0: nopr %%r7\n" + EX_TABLE(0b,0b) + : [rp] "+&d" (rp.pair) : [addr] "d" (addr) : "memory"); + *subcode = rp.even; + return rp.odd; +} + +/** + * diag204() - Issue diagnose 204 call. + * @subcode: Subcode of diagnose 204 to be executed. + * @size: Size of area in pages which @area points to, if given. + * @addr: Vmalloc'ed memory area where the result is written to. + * + * Execute diagnose 204 with the given subcode and write the result to the + * memory area specified with @addr. For subcodes which do not write a + * result to memory both @size and @addr must be zero. If @addr is + * specified it must be page aligned and must have been allocated with + * vmalloc(). Conversion to real / physical addresses will be handled by + * this function if required. + */ +int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + if (addr) { + if (WARN_ON_ONCE(!is_vmalloc_addr(addr))) + return -EINVAL; + if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE))) + return -EINVAL; + } + if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4) + addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr)); + diag_stat_inc(DIAG_STAT_X204); + size = __diag204(&subcode, size, addr); + if (subcode == DIAG204_BUSY_RC) + return -EBUSY; + else if (subcode) + return -EOPNOTSUPP; + return size; +} +EXPORT_SYMBOL(diag204); + +/* + * Diagnose 210: Get information about a virtual device + */ +int diag210(struct diag210 *addr) +{ + static DEFINE_SPINLOCK(diag210_lock); + unsigned long flags; + int ccode; + + spin_lock_irqsave(&diag210_lock, flags); + *__diag210_tmp_amode31 = *addr; + + diag_stat_inc(DIAG_STAT_X210); + ccode = diag_amode31_ops.diag210(__diag210_tmp_amode31); + + *addr = *__diag210_tmp_amode31; + spin_unlock_irqrestore(&diag210_lock, flags); + + return ccode; +} +EXPORT_SYMBOL(diag210); + +/* + * Diagnose 8C: Access 3270 Display Device Information + */ +int diag8c(struct diag8c *addr, struct ccw_dev_id *devno) +{ + static DEFINE_SPINLOCK(diag8c_lock); + unsigned long flags; + int ccode; + + spin_lock_irqsave(&diag8c_lock, flags); + + diag_stat_inc(DIAG_STAT_X08C); + ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr)); + + *addr = *__diag8c_tmp_amode31; + spin_unlock_irqrestore(&diag8c_lock, flags); + + return ccode; +} +EXPORT_SYMBOL(diag8c); + +int diag224(void *ptr) +{ + unsigned long addr = __pa(ptr); + int rc = -EOPNOTSUPP; + + diag_stat_inc(DIAG_STAT_X224); + asm_inline volatile("\n" + " diag %[type],%[addr],0x224\n" + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b,1b) + : [rc] "+d" (rc) + , "=m" (*(struct { char buf[PAGE_SIZE]; } *)ptr) + : [type] "d" (0), [addr] "d" (addr)); + return rc; +} +EXPORT_SYMBOL(diag224); + +/* + * Diagnose 26C: Access Certain System Information + */ +int diag26c(void *req, void *resp, enum diag26c_sc subcode) +{ + diag_stat_inc(DIAG_STAT_X26C); + return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode); +} +EXPORT_SYMBOL(diag26c); + +int diag49c(unsigned long subcode) +{ + int cc; + + diag_stat_inc(DIAG_STAT_X49C); + asm volatile( + " diag %[subcode],0,0x49c\n" + CC_IPM(cc) + : CC_OUT(cc, cc) + : [subcode] "d" (subcode) + : CC_CLOBBER); + return CC_TRANSFORM(cc); +} +EXPORT_SYMBOL(diag49c); diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c new file mode 100644 index 000000000000..f411562aa7f6 --- /dev/null +++ b/arch/s390/kernel/diag/diag310.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request memory topology information via diag0x310. + * + * Copyright IBM Corp. 2025 + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/diag.h> +#include <asm/sclp.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +#define DIAG310_LEVELMIN 1 +#define DIAG310_LEVELMAX 6 + +enum diag310_sc { + DIAG310_SUBC_0 = 0, + DIAG310_SUBC_1 = 1, + DIAG310_SUBC_4 = 4, + DIAG310_SUBC_5 = 5 +}; + +enum diag310_retcode { + DIAG310_RET_SUCCESS = 0x0001, + DIAG310_RET_BUSY = 0x0101, + DIAG310_RET_OPNOTSUPP = 0x0102, + DIAG310_RET_SC4_INVAL = 0x0401, + DIAG310_RET_SC4_NODATA = 0x0402, + DIAG310_RET_SC5_INVAL = 0x0501, + DIAG310_RET_SC5_NODATA = 0x0502, + DIAG310_RET_SC5_ESIZE = 0x0503 +}; + +union diag310_response { + u64 response; + struct { + u64 result : 32; + u64 : 16; + u64 rc : 16; + }; +}; + +union diag310_req_subcode { + u64 subcode; + struct { + u64 : 48; + u64 st : 8; + u64 sc : 8; + }; +}; + +union diag310_req_size { + u64 size; + struct { + u64 page_count : 32; + u64 : 32; + }; +}; + +static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, .odd = size }; + + diag_stat_inc(DIAG_STAT_X310); + asm volatile("diag %[rp],%[subcode],0x310" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static int diag310_result_to_errno(unsigned int result) +{ + switch (result) { + case DIAG310_RET_BUSY: + return -EBUSY; + case DIAG310_RET_OPNOTSUPP: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int diag310_get_subcode_mask(unsigned long *mask) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_0, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *mask = res.response; + return 0; +} + +static int diag310_get_memtop_stride(unsigned long *stride) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_1, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *stride = res.result; + return 0; +} + +static int diag310_get_memtop_size(unsigned long *pages, unsigned long level) +{ + union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level }; + union diag310_response res; + + res.response = diag310(req.subcode, 0, NULL); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + *pages = res.result; + return 0; + case DIAG310_RET_SC4_NODATA: + return -ENODATA; + case DIAG310_RET_SC4_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level) +{ + union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level }; + union diag310_req_size req_size = { .page_count = pages }; + union diag310_response res; + + res.response = diag310(req_sc.subcode, req_size.size, buf); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + return 0; + case DIAG310_RET_SC5_NODATA: + return -ENODATA; + case DIAG310_RET_SC5_ESIZE: + return -EOVERFLOW; + case DIAG310_RET_SC5_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_check_features(void) +{ + static int features_available; + unsigned long mask; + int rc; + + if (READ_ONCE(features_available)) + return 0; + if (!sclp.has_diag310) + return -EOPNOTSUPP; + rc = diag310_get_subcode_mask(&mask); + if (rc) + return rc; + if (!test_bit_inv(DIAG310_SUBC_1, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_4, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_5, &mask)) + return -EOPNOTSUPP; + WRITE_ONCE(features_available, 1); + return 0; +} + +static int memtop_get_stride_len(unsigned long *res) +{ + static unsigned long memtop_stride; + unsigned long stride; + int rc; + + stride = READ_ONCE(memtop_stride); + if (!stride) { + rc = diag310_get_memtop_stride(&stride); + if (rc) + return rc; + WRITE_ONCE(memtop_stride, stride); + } + *res = stride; + return 0; +} + +static int memtop_get_page_count(unsigned long *res, unsigned long level) +{ + static unsigned long memtop_pages[DIAG310_LEVELMAX]; + unsigned long pages; + int rc; + + if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN) + return -EINVAL; + pages = READ_ONCE(memtop_pages[level - 1]); + if (!pages) { + rc = diag310_get_memtop_size(&pages, level); + if (rc) + return rc; + WRITE_ONCE(memtop_pages[level - 1], pages); + } + *res = pages; + return 0; +} + +long diag310_memtop_stride(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long stride; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + rc = memtop_get_stride_len(&stride); + if (rc) + return rc; + if (put_user(stride, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_len(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long pages, level; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, argp)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + if (put_user(pages * PAGE_SIZE, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_buf(unsigned long arg) +{ + struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg; + unsigned long level, pages, data_size; + u64 address; + void *buf; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, &udata->nesting_lvl)) + return -EFAULT; + if (get_user(address, &udata->address)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + data_size = pages * PAGE_SIZE; + buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, + NUMA_NO_NODE, __builtin_return_address(0)); + if (!buf) + return -ENOMEM; + rc = diag310_store_topology_map(buf, pages, level); + if (rc) + goto out; + if (copy_to_user((void __user *)address, buf, data_size)) + rc = -EFAULT; +out: + vfree(buf); + return rc; +} diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c new file mode 100644 index 000000000000..fe325c2a2d0d --- /dev/null +++ b/arch/s390/kernel/diag/diag324.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request power readings for resources in a computing environment via + * diag 0x324. diag 0x324 stores the power readings in the power information + * block (pib). + * + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "diag324: " fmt +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/ioctl.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/ktime.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#include <asm/diag.h> +#include <asm/sclp.h> +#include <asm/timex.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +enum subcode { + DIAG324_SUBC_0 = 0, + DIAG324_SUBC_1 = 1, + DIAG324_SUBC_2 = 2, +}; + +enum retcode { + DIAG324_RET_SUCCESS = 0x0001, + DIAG324_RET_SUBC_NOTAVAIL = 0x0103, + DIAG324_RET_INSUFFICIENT_SIZE = 0x0104, + DIAG324_RET_READING_UNAVAILABLE = 0x0105, +}; + +union diag324_response { + u64 response; + struct { + u64 installed : 32; + u64 : 16; + u64 rc : 16; + } sc0; + struct { + u64 format : 16; + u64 : 16; + u64 pib_len : 16; + u64 rc : 16; + } sc1; + struct { + u64 : 48; + u64 rc : 16; + } sc2; +}; + +union diag324_request { + u64 request; + struct { + u64 : 32; + u64 allocated : 16; + u64 : 12; + u64 sc : 4; + } sc2; +}; + +struct pib { + u32 : 8; + u32 num : 8; + u32 len : 16; + u32 : 24; + u32 hlen : 8; + u64 : 64; + u64 intv; + u8 r[]; +} __packed; + +struct pibdata { + struct pib *pib; + ktime_t expire; + u64 sequence; + size_t len; + int rc; +}; + +static DEFINE_MUTEX(pibmutex); +static struct pibdata pibdata; + +#define PIBWORK_DELAY (5 * NSEC_PER_SEC) + +static void pibwork_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(pibwork, pibwork_handler); + +static unsigned long diag324(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr }; + + diag_stat_inc(DIAG_STAT_X324); + asm volatile("diag %[rp],%[subcode],0x324" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static void pibwork_handler(struct work_struct *work) +{ + struct pibdata *data = &pibdata; + ktime_t timedout; + + mutex_lock(&pibmutex); + timedout = ktime_add_ns(data->expire, PIBWORK_DELAY); + if (ktime_before(ktime_get(), timedout)) { + mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + goto out; + } + vfree(data->pib); + data->pib = NULL; +out: + mutex_unlock(&pibmutex); +} + +static void pib_update(struct pibdata *data) +{ + union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len }; + union diag324_response res; + int rc; + + memset(data->pib, 0, data->len); + res.response = diag324(req.request, data->pib); + switch (res.sc2.rc) { + case DIAG324_RET_SUCCESS: + rc = 0; + break; + case DIAG324_RET_SUBC_NOTAVAIL: + rc = -ENOENT; + break; + case DIAG324_RET_INSUFFICIENT_SIZE: + rc = -EMSGSIZE; + break; + case DIAG324_RET_READING_UNAVAILABLE: + rc = -EBUSY; + break; + default: + rc = -EINVAL; + } + data->rc = rc; +} + +long diag324_pibbuf(unsigned long arg) +{ + struct diag324_pib __user *udata = (struct diag324_pib __user *)arg; + struct pibdata *data = &pibdata; + static bool first = true; + u64 address; + int rc; + + if (!data->len) + return -EOPNOTSUPP; + if (get_user(address, &udata->address)) + return -EFAULT; + mutex_lock(&pibmutex); + rc = -ENOMEM; + if (!data->pib) + data->pib = vmalloc(data->len); + if (!data->pib) + goto out; + if (first || ktime_after(ktime_get(), data->expire)) { + pib_update(data); + data->sequence++; + data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv)); + mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + first = false; + } + rc = data->rc; + if (rc != 0 && rc != -EBUSY) + goto out; + rc = copy_to_user((void __user *)address, data->pib, data->pib->len); + rc |= put_user(data->sequence, &udata->sequence); + if (rc) + rc = -EFAULT; +out: + mutex_unlock(&pibmutex); + return rc; +} + +long diag324_piblen(unsigned long arg) +{ + struct pibdata *data = &pibdata; + + if (!data->len) + return -EOPNOTSUPP; + if (put_user(data->len, (size_t __user *)arg)) + return -EFAULT; + return 0; +} + +static int __init diag324_init(void) +{ + union diag324_response res; + unsigned long installed; + + if (!sclp.has_diag324) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_0, NULL); + if (res.sc0.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + installed = res.response; + if (!test_bit_inv(DIAG324_SUBC_1, &installed)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG324_SUBC_2, &installed)) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_1, NULL); + if (res.sc1.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + pibdata.len = res.sc1.pib_len; + return 0; +} +device_initcall(diag324_init); diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h new file mode 100644 index 000000000000..7080be946785 --- /dev/null +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DIAG_IOCTL_H +#define _DIAG_IOCTL_H + +#include <linux/types.h> + +long diag324_pibbuf(unsigned long arg); +long diag324_piblen(unsigned long arg); + +long diag310_memtop_stride(unsigned long arg); +long diag310_memtop_len(unsigned long arg); +long diag310_memtop_buf(unsigned long arg); + +#endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c new file mode 100644 index 000000000000..efffe02ea02e --- /dev/null +++ b/arch/s390/kernel/diag/diag_misc.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide diagnose information via misc device /dev/diag. + * + * Copyright IBM Corp. 2024 + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/types.h> + +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long rc; + + switch (cmd) { + case DIAG324_GET_PIBLEN: + rc = diag324_piblen(arg); + break; + case DIAG324_GET_PIBBUF: + rc = diag324_pibbuf(arg); + break; + case DIAG310_GET_STRIDE: + rc = diag310_memtop_stride(arg); + break; + case DIAG310_GET_MEMTOPLEN: + rc = diag310_memtop_len(arg); + break; + case DIAG310_GET_MEMTOPBUF: + rc = diag310_memtop_buf(arg); + break; + default: + rc = -ENOIOCTLCMD; + break; + } + return rc; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = diag_ioctl, +}; + +static struct miscdevice diagdev = { + .name = "diag", + .minor = MISC_DYNAMIC_MINOR, + .fops = &fops, + .mode = 0444, +}; + +static int diag_init(void) +{ + return misc_register(&diagdev); +} + +device_initcall(diag_init); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index be87d3e05a5b..1cec93895b3a 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Disassemble s390 instructions. * @@ -16,69 +17,95 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/delay.h> -#include <linux/module.h> #include <linux/kallsyms.h> #include <linux/reboot.h> #include <linux/kprobes.h> #include <linux/kdebug.h> - -#include <asm/uaccess.h> -#include <asm/io.h> +#include <linux/uaccess.h> #include <linux/atomic.h> -#include <asm/mathemu.h> +#include <linux/io.h> +#include <asm/dis.h> #include <asm/cpcmd.h> #include <asm/lowcore.h> #include <asm/debug.h> #include <asm/irq.h> -#ifndef CONFIG_64BIT -#define ONELONG "%08lx: " -#else /* CONFIG_64BIT */ -#define ONELONG "%016lx: " -#endif /* CONFIG_64BIT */ - +/* Type of operand */ #define OPERAND_GPR 0x1 /* Operand printed as %rx */ #define OPERAND_FPR 0x2 /* Operand printed as %fx */ #define OPERAND_AR 0x4 /* Operand printed as %ax */ #define OPERAND_CR 0x8 /* Operand printed as %cx */ -#define OPERAND_DISP 0x10 /* Operand printed as displacement */ -#define OPERAND_BASE 0x20 /* Operand printed as base register */ -#define OPERAND_INDEX 0x40 /* Operand printed as index register */ -#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ -#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ -#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ +#define OPERAND_VR 0x10 /* Operand printed as %vx */ +#define OPERAND_DISP 0x20 /* Operand printed as displacement */ +#define OPERAND_BASE 0x40 /* Operand printed as base register */ +#define OPERAND_INDEX 0x80 /* Operand printed as index register */ +#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */ + +struct s390_operand { + unsigned char bits; /* The number of bits in the operand. */ + unsigned char shift; /* The number of bits to shift. */ + unsigned short flags; /* One bit syntax flags. */ +}; + +struct s390_insn { + union { + const char name[5]; + struct { + unsigned char zero; + unsigned int offset; + } __packed; + }; + unsigned char opfrag; + unsigned char format; +}; + +struct s390_opcode_offset { + unsigned char opcode; + unsigned char mask; + unsigned char byte; + unsigned short offset; + unsigned short count; +} __packed; enum { - UNUSED, /* Indicates the end of the operand list */ - R_8, /* GPR starting at position 8 */ - R_12, /* GPR starting at position 12 */ - R_16, /* GPR starting at position 16 */ - R_20, /* GPR starting at position 20 */ - R_24, /* GPR starting at position 24 */ - R_28, /* GPR starting at position 28 */ - R_32, /* GPR starting at position 32 */ - F_8, /* FPR starting at position 8 */ - F_12, /* FPR starting at position 12 */ - F_16, /* FPR starting at position 16 */ - F_20, /* FPR starting at position 16 */ - F_24, /* FPR starting at position 24 */ - F_28, /* FPR starting at position 28 */ - F_32, /* FPR starting at position 32 */ + UNUSED, A_8, /* Access reg. starting at position 8 */ A_12, /* Access reg. starting at position 12 */ A_24, /* Access reg. starting at position 24 */ A_28, /* Access reg. starting at position 28 */ - C_8, /* Control reg. starting at position 8 */ - C_12, /* Control reg. starting at position 12 */ B_16, /* Base register starting at position 16 */ B_32, /* Base register starting at position 32 */ - X_12, /* Index register starting at position 12 */ + C_8, /* Control reg. starting at position 8 */ + C_12, /* Control reg. starting at position 12 */ + D20_20, /* 20 bit displacement starting at 20 */ D_20, /* Displacement starting at position 20 */ D_36, /* Displacement starting at position 36 */ - D20_20, /* 20 bit displacement starting at 20 */ + F_8, /* FPR starting at position 8 */ + F_12, /* FPR starting at position 12 */ + F_16, /* FPR starting at position 16 */ + F_24, /* FPR starting at position 24 */ + F_28, /* FPR starting at position 28 */ + F_32, /* FPR starting at position 32 */ + I8_8, /* 8 bit signed value starting at 8 */ + I8_32, /* 8 bit signed value starting at 32 */ + I16_16, /* 16 bit signed value starting at 16 */ + I16_32, /* 16 bit signed value starting at 32 */ + I32_16, /* 32 bit signed value starting at 16 */ + J12_12, /* 12 bit PC relative offset at 12 */ + J16_16, /* 16 bit PC relative offset at 16 */ + J16_32, /* 16 bit PC relative offset at 32 */ + J24_24, /* 24 bit PC relative offset at 24 */ + J32_16, /* 32 bit PC relative offset at 16 */ L4_8, /* 4 bit length starting at position 8 */ L4_12, /* 4 bit length starting at position 12 */ L8_8, /* 8 bit length starting at position 8 */ + R_8, /* GPR starting at position 8 */ + R_12, /* GPR starting at position 12 */ + R_16, /* GPR starting at position 16 */ + R_24, /* GPR starting at position 24 */ + R_28, /* GPR starting at position 28 */ U4_8, /* 4 bit unsigned value starting at 8 */ U4_12, /* 4 bit unsigned value starting at 12 */ U4_16, /* 4 bit unsigned value starting at 16 */ @@ -90,1500 +117,248 @@ enum { U8_8, /* 8 bit unsigned value starting at 8 */ U8_16, /* 8 bit unsigned value starting at 16 */ U8_24, /* 8 bit unsigned value starting at 24 */ + U8_28, /* 8 bit unsigned value starting at 28 */ U8_32, /* 8 bit unsigned value starting at 32 */ - I8_8, /* 8 bit signed value starting at 8 */ - I8_32, /* 8 bit signed value starting at 32 */ - J12_12, /* PC relative offset at 12 */ - I16_16, /* 16 bit signed value starting at 16 */ - I16_32, /* 32 bit signed value starting at 16 */ - U16_16, /* 16 bit unsigned value starting at 16 */ - U16_32, /* 32 bit unsigned value starting at 16 */ - J16_16, /* PC relative jump offset at 16 */ - J16_32, /* PC relative offset at 16 */ - I24_24, /* 24 bit signed value starting at 24 */ - J32_16, /* PC relative long offset at 16 */ - I32_16, /* 32 bit signed value starting at 16 */ - U32_16, /* 32 bit unsigned value starting at 16 */ - M_16, /* 4 bit optional mask starting at 16 */ - M_20, /* 4 bit optional mask starting at 20 */ - RO_28, /* optional GPR starting at position 28 */ -}; - -/* - * Enumeration of the different instruction formats. - * For details consult the principles of operation. - */ -enum { - INSTR_INVALID, - INSTR_E, - INSTR_IE_UU, - INSTR_MII_UPI, - INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, - INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, - INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, - INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, - INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, - INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0, - INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, - INSTR_RRE_RR, INSTR_RRE_RR_OPT, - INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, - INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR, - INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR, - INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF, - INSTR_RRF_UUFR, INSTR_RRF_UURF, - INSTR_RRR_F0FF, INSTR_RRS_RRRDU, - INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, - INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, - INSTR_RSI_RRP, - INSTR_RSL_LRDFU, INSTR_RSL_R0RD, - INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, - INSTR_RSY_RDRM, - INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, - INSTR_RS_RURD, - INSTR_RXE_FRRD, INSTR_RXE_RRRD, - INSTR_RXF_FRRDF, - INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, - INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, - INSTR_SIL_RDI, INSTR_SIL_RDU, - INSTR_SIY_IRD, INSTR_SIY_URD, - INSTR_SI_URD, - INSTR_SMI_U0RDP, - INSTR_SSE_RDRD, - INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, - INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, - INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, - INSTR_S_00, INSTR_S_RD, -}; - -struct operand { - int bits; /* The number of bits in the operand. */ - int shift; /* The number of bits to shift. */ - int flags; /* One bit syntax flags. */ -}; - -struct insn { - const char name[5]; - unsigned char opfrag; - unsigned char format; + U12_16, /* 12 bit unsigned value starting at 16 */ + U16_16, /* 16 bit unsigned value starting at 16 */ + U16_20, /* 16 bit unsigned value starting at 20 */ + U16_32, /* 16 bit unsigned value starting at 32 */ + U32_16, /* 32 bit unsigned value starting at 16 */ + VX_12, /* Vector index register starting at position 12 */ + V_8, /* Vector reg. starting at position 8 */ + V_12, /* Vector reg. starting at position 12 */ + V_16, /* Vector reg. starting at position 16 */ + V_32, /* Vector reg. starting at position 32 */ + X_12, /* Index register starting at position 12 */ }; -static const struct operand operands[] = -{ - [UNUSED] = { 0, 0, 0 }, - [R_8] = { 4, 8, OPERAND_GPR }, - [R_12] = { 4, 12, OPERAND_GPR }, - [R_16] = { 4, 16, OPERAND_GPR }, - [R_20] = { 4, 20, OPERAND_GPR }, - [R_24] = { 4, 24, OPERAND_GPR }, - [R_28] = { 4, 28, OPERAND_GPR }, - [R_32] = { 4, 32, OPERAND_GPR }, - [F_8] = { 4, 8, OPERAND_FPR }, - [F_12] = { 4, 12, OPERAND_FPR }, - [F_16] = { 4, 16, OPERAND_FPR }, - [F_20] = { 4, 16, OPERAND_FPR }, - [F_24] = { 4, 24, OPERAND_FPR }, - [F_28] = { 4, 28, OPERAND_FPR }, - [F_32] = { 4, 32, OPERAND_FPR }, +static const struct s390_operand operands[] = { + [UNUSED] = { 0, 0, 0 }, [A_8] = { 4, 8, OPERAND_AR }, [A_12] = { 4, 12, OPERAND_AR }, [A_24] = { 4, 24, OPERAND_AR }, [A_28] = { 4, 28, OPERAND_AR }, - [C_8] = { 4, 8, OPERAND_CR }, - [C_12] = { 4, 12, OPERAND_CR }, [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, - [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, + [C_8] = { 4, 8, OPERAND_CR }, + [C_12] = { 4, 12, OPERAND_CR }, + [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED }, [D_20] = { 12, 20, OPERAND_DISP }, [D_36] = { 12, 36, OPERAND_DISP }, - [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED }, + [F_8] = { 4, 8, OPERAND_FPR }, + [F_12] = { 4, 12, OPERAND_FPR }, + [F_16] = { 4, 16, OPERAND_FPR }, + [F_24] = { 4, 24, OPERAND_FPR }, + [F_28] = { 4, 28, OPERAND_FPR }, + [F_32] = { 4, 32, OPERAND_FPR }, + [I8_8] = { 8, 8, OPERAND_SIGNED }, + [I8_32] = { 8, 32, OPERAND_SIGNED }, + [I16_16] = { 16, 16, OPERAND_SIGNED }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, + [I32_16] = { 32, 16, OPERAND_SIGNED }, + [J12_12] = { 12, 12, OPERAND_PCREL }, + [J16_16] = { 16, 16, OPERAND_PCREL }, + [J16_32] = { 16, 32, OPERAND_PCREL }, + [J24_24] = { 24, 24, OPERAND_PCREL }, + [J32_16] = { 32, 16, OPERAND_PCREL }, [L4_8] = { 4, 8, OPERAND_LENGTH }, - [L4_12] = { 4, 12, OPERAND_LENGTH }, + [L4_12] = { 4, 12, OPERAND_LENGTH }, [L8_8] = { 8, 8, OPERAND_LENGTH }, + [R_8] = { 4, 8, OPERAND_GPR }, + [R_12] = { 4, 12, OPERAND_GPR }, + [R_16] = { 4, 16, OPERAND_GPR }, + [R_24] = { 4, 24, OPERAND_GPR }, + [R_28] = { 4, 28, OPERAND_GPR }, [U4_8] = { 4, 8, 0 }, - [U4_12] = { 4, 12, 0 }, - [U4_16] = { 4, 16, 0 }, - [U4_20] = { 4, 20, 0 }, - [U4_24] = { 4, 24, 0 }, - [U4_28] = { 4, 28, 0 }, - [U4_32] = { 4, 32, 0 }, - [U4_36] = { 4, 36, 0 }, + [U4_12] = { 4, 12, 0 }, + [U4_16] = { 4, 16, 0 }, + [U4_20] = { 4, 20, 0 }, + [U4_24] = { 4, 24, 0 }, + [U4_28] = { 4, 28, 0 }, + [U4_32] = { 4, 32, 0 }, + [U4_36] = { 4, 36, 0 }, [U8_8] = { 8, 8, 0 }, - [U8_16] = { 8, 16, 0 }, - [U8_24] = { 8, 24, 0 }, - [U8_32] = { 8, 32, 0 }, - [J12_12] = { 12, 12, OPERAND_PCREL }, - [I16_16] = { 16, 16, OPERAND_SIGNED }, + [U8_16] = { 8, 16, 0 }, + [U8_24] = { 8, 24, 0 }, + [U8_28] = { 8, 28, 0 }, + [U8_32] = { 8, 32, 0 }, + [U12_16] = { 12, 16, 0 }, [U16_16] = { 16, 16, 0 }, + [U16_20] = { 16, 20, 0 }, [U16_32] = { 16, 32, 0 }, - [J16_16] = { 16, 16, OPERAND_PCREL }, - [J16_32] = { 16, 32, OPERAND_PCREL }, - [I16_32] = { 16, 32, OPERAND_SIGNED }, - [I24_24] = { 24, 24, OPERAND_SIGNED }, - [J32_16] = { 32, 16, OPERAND_PCREL }, - [I32_16] = { 32, 16, OPERAND_SIGNED }, [U32_16] = { 32, 16, 0 }, - [M_16] = { 4, 16, 0 }, - [M_20] = { 4, 20, 0 }, - [RO_28] = { 4, 28, OPERAND_GPR } -}; - -static const unsigned char formats[][7] = { - [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, - [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 }, - [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 }, - [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 }, - [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 }, - [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, - [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 }, - [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, - [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, - [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, - [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 }, - [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, - [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, - [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, - [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, - [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 }, - [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 }, - [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 }, - [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, - [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, - [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, - [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, - [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, - [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, - [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, - [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, - [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, - [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, - [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 }, - [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, - [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, - [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, - [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, - [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, - [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 }, - [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 }, - [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, - [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 }, - [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 }, - [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, - [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 }, - [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, - [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, - [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, - [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 }, - [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, - [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, - [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, - [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 }, - [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 }, - [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 }, - [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 }, - [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 }, - [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 }, - [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, - [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, - [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, - [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, - [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, - [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, - [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, - [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, - [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, - [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 }, - [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 }, - [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, - [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, - [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, - [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, - [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, - [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, - [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, - [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, - [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, - [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, - [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, - [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, - [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, - [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, - [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, - [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 }, - [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, - [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, - [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, - [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 }, - [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 }, - [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 }, - [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, - [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, - [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 }, - [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, - [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 }, - [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 }, - [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, - [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, - [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, - [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 }, - [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 }, - [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, - [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, - [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, -}; - -enum { - LONG_INSN_ALGHSIK, - LONG_INSN_ALHHHR, - LONG_INSN_ALHHLR, - LONG_INSN_ALHSIK, - LONG_INSN_ALSIHN, - LONG_INSN_CDFBRA, - LONG_INSN_CDGBRA, - LONG_INSN_CDGTRA, - LONG_INSN_CDLFBR, - LONG_INSN_CDLFTR, - LONG_INSN_CDLGBR, - LONG_INSN_CDLGTR, - LONG_INSN_CEFBRA, - LONG_INSN_CEGBRA, - LONG_INSN_CELFBR, - LONG_INSN_CELGBR, - LONG_INSN_CFDBRA, - LONG_INSN_CFEBRA, - LONG_INSN_CFXBRA, - LONG_INSN_CGDBRA, - LONG_INSN_CGDTRA, - LONG_INSN_CGEBRA, - LONG_INSN_CGXBRA, - LONG_INSN_CGXTRA, - LONG_INSN_CLFDBR, - LONG_INSN_CLFDTR, - LONG_INSN_CLFEBR, - LONG_INSN_CLFHSI, - LONG_INSN_CLFXBR, - LONG_INSN_CLFXTR, - LONG_INSN_CLGDBR, - LONG_INSN_CLGDTR, - LONG_INSN_CLGEBR, - LONG_INSN_CLGFRL, - LONG_INSN_CLGHRL, - LONG_INSN_CLGHSI, - LONG_INSN_CLGXBR, - LONG_INSN_CLGXTR, - LONG_INSN_CLHHSI, - LONG_INSN_CXFBRA, - LONG_INSN_CXGBRA, - LONG_INSN_CXGTRA, - LONG_INSN_CXLFBR, - LONG_INSN_CXLFTR, - LONG_INSN_CXLGBR, - LONG_INSN_CXLGTR, - LONG_INSN_FIDBRA, - LONG_INSN_FIEBRA, - LONG_INSN_FIXBRA, - LONG_INSN_LDXBRA, - LONG_INSN_LEDBRA, - LONG_INSN_LEXBRA, - LONG_INSN_LLGFAT, - LONG_INSN_LLGFRL, - LONG_INSN_LLGHRL, - LONG_INSN_LLGTAT, - LONG_INSN_POPCNT, - LONG_INSN_RIEMIT, - LONG_INSN_RINEXT, - LONG_INSN_RISBGN, - LONG_INSN_RISBHG, - LONG_INSN_RISBLG, - LONG_INSN_SLHHHR, - LONG_INSN_SLHHLR, - LONG_INSN_TABORT, - LONG_INSN_TBEGIN, - LONG_INSN_TBEGINC, - LONG_INSN_PCISTG, - LONG_INSN_MPCIFC, - LONG_INSN_STPCIFC, - LONG_INSN_PCISTB, -}; - -static char *long_insn_name[] = { - [LONG_INSN_ALGHSIK] = "alghsik", - [LONG_INSN_ALHHHR] = "alhhhr", - [LONG_INSN_ALHHLR] = "alhhlr", - [LONG_INSN_ALHSIK] = "alhsik", - [LONG_INSN_ALSIHN] = "alsihn", - [LONG_INSN_CDFBRA] = "cdfbra", - [LONG_INSN_CDGBRA] = "cdgbra", - [LONG_INSN_CDGTRA] = "cdgtra", - [LONG_INSN_CDLFBR] = "cdlfbr", - [LONG_INSN_CDLFTR] = "cdlftr", - [LONG_INSN_CDLGBR] = "cdlgbr", - [LONG_INSN_CDLGTR] = "cdlgtr", - [LONG_INSN_CEFBRA] = "cefbra", - [LONG_INSN_CEGBRA] = "cegbra", - [LONG_INSN_CELFBR] = "celfbr", - [LONG_INSN_CELGBR] = "celgbr", - [LONG_INSN_CFDBRA] = "cfdbra", - [LONG_INSN_CFEBRA] = "cfebra", - [LONG_INSN_CFXBRA] = "cfxbra", - [LONG_INSN_CGDBRA] = "cgdbra", - [LONG_INSN_CGDTRA] = "cgdtra", - [LONG_INSN_CGEBRA] = "cgebra", - [LONG_INSN_CGXBRA] = "cgxbra", - [LONG_INSN_CGXTRA] = "cgxtra", - [LONG_INSN_CLFDBR] = "clfdbr", - [LONG_INSN_CLFDTR] = "clfdtr", - [LONG_INSN_CLFEBR] = "clfebr", - [LONG_INSN_CLFHSI] = "clfhsi", - [LONG_INSN_CLFXBR] = "clfxbr", - [LONG_INSN_CLFXTR] = "clfxtr", - [LONG_INSN_CLGDBR] = "clgdbr", - [LONG_INSN_CLGDTR] = "clgdtr", - [LONG_INSN_CLGEBR] = "clgebr", - [LONG_INSN_CLGFRL] = "clgfrl", - [LONG_INSN_CLGHRL] = "clghrl", - [LONG_INSN_CLGHSI] = "clghsi", - [LONG_INSN_CLGXBR] = "clgxbr", - [LONG_INSN_CLGXTR] = "clgxtr", - [LONG_INSN_CLHHSI] = "clhhsi", - [LONG_INSN_CXFBRA] = "cxfbra", - [LONG_INSN_CXGBRA] = "cxgbra", - [LONG_INSN_CXGTRA] = "cxgtra", - [LONG_INSN_CXLFBR] = "cxlfbr", - [LONG_INSN_CXLFTR] = "cxlftr", - [LONG_INSN_CXLGBR] = "cxlgbr", - [LONG_INSN_CXLGTR] = "cxlgtr", - [LONG_INSN_FIDBRA] = "fidbra", - [LONG_INSN_FIEBRA] = "fiebra", - [LONG_INSN_FIXBRA] = "fixbra", - [LONG_INSN_LDXBRA] = "ldxbra", - [LONG_INSN_LEDBRA] = "ledbra", - [LONG_INSN_LEXBRA] = "lexbra", - [LONG_INSN_LLGFAT] = "llgfat", - [LONG_INSN_LLGFRL] = "llgfrl", - [LONG_INSN_LLGHRL] = "llghrl", - [LONG_INSN_LLGTAT] = "llgtat", - [LONG_INSN_POPCNT] = "popcnt", - [LONG_INSN_RIEMIT] = "riemit", - [LONG_INSN_RINEXT] = "rinext", - [LONG_INSN_RISBGN] = "risbgn", - [LONG_INSN_RISBHG] = "risbhg", - [LONG_INSN_RISBLG] = "risblg", - [LONG_INSN_SLHHHR] = "slhhhr", - [LONG_INSN_SLHHLR] = "slhhlr", - [LONG_INSN_TABORT] = "tabort", - [LONG_INSN_TBEGIN] = "tbegin", - [LONG_INSN_TBEGINC] = "tbeginc", - [LONG_INSN_PCISTG] = "pcistg", - [LONG_INSN_MPCIFC] = "mpcifc", - [LONG_INSN_STPCIFC] = "stpcifc", - [LONG_INSN_PCISTB] = "pcistb", -}; - -static struct insn opcode[] = { -#ifdef CONFIG_64BIT - { "bprp", 0xc5, INSTR_MII_UPI }, - { "bpp", 0xc7, INSTR_SMI_U0RDP }, - { "trtr", 0xd0, INSTR_SS_L0RDRD }, - { "lmd", 0xef, INSTR_SS_RRRDRD3 }, -#endif - { "spm", 0x04, INSTR_RR_R0 }, - { "balr", 0x05, INSTR_RR_RR }, - { "bctr", 0x06, INSTR_RR_RR }, - { "bcr", 0x07, INSTR_RR_UR }, - { "svc", 0x0a, INSTR_RR_U0 }, - { "bsm", 0x0b, INSTR_RR_RR }, - { "bassm", 0x0c, INSTR_RR_RR }, - { "basr", 0x0d, INSTR_RR_RR }, - { "mvcl", 0x0e, INSTR_RR_RR }, - { "clcl", 0x0f, INSTR_RR_RR }, - { "lpr", 0x10, INSTR_RR_RR }, - { "lnr", 0x11, INSTR_RR_RR }, - { "ltr", 0x12, INSTR_RR_RR }, - { "lcr", 0x13, INSTR_RR_RR }, - { "nr", 0x14, INSTR_RR_RR }, - { "clr", 0x15, INSTR_RR_RR }, - { "or", 0x16, INSTR_RR_RR }, - { "xr", 0x17, INSTR_RR_RR }, - { "lr", 0x18, INSTR_RR_RR }, - { "cr", 0x19, INSTR_RR_RR }, - { "ar", 0x1a, INSTR_RR_RR }, - { "sr", 0x1b, INSTR_RR_RR }, - { "mr", 0x1c, INSTR_RR_RR }, - { "dr", 0x1d, INSTR_RR_RR }, - { "alr", 0x1e, INSTR_RR_RR }, - { "slr", 0x1f, INSTR_RR_RR }, - { "lpdr", 0x20, INSTR_RR_FF }, - { "lndr", 0x21, INSTR_RR_FF }, - { "ltdr", 0x22, INSTR_RR_FF }, - { "lcdr", 0x23, INSTR_RR_FF }, - { "hdr", 0x24, INSTR_RR_FF }, - { "ldxr", 0x25, INSTR_RR_FF }, - { "mxr", 0x26, INSTR_RR_FF }, - { "mxdr", 0x27, INSTR_RR_FF }, - { "ldr", 0x28, INSTR_RR_FF }, - { "cdr", 0x29, INSTR_RR_FF }, - { "adr", 0x2a, INSTR_RR_FF }, - { "sdr", 0x2b, INSTR_RR_FF }, - { "mdr", 0x2c, INSTR_RR_FF }, - { "ddr", 0x2d, INSTR_RR_FF }, - { "awr", 0x2e, INSTR_RR_FF }, - { "swr", 0x2f, INSTR_RR_FF }, - { "lper", 0x30, INSTR_RR_FF }, - { "lner", 0x31, INSTR_RR_FF }, - { "lter", 0x32, INSTR_RR_FF }, - { "lcer", 0x33, INSTR_RR_FF }, - { "her", 0x34, INSTR_RR_FF }, - { "ledr", 0x35, INSTR_RR_FF }, - { "axr", 0x36, INSTR_RR_FF }, - { "sxr", 0x37, INSTR_RR_FF }, - { "ler", 0x38, INSTR_RR_FF }, - { "cer", 0x39, INSTR_RR_FF }, - { "aer", 0x3a, INSTR_RR_FF }, - { "ser", 0x3b, INSTR_RR_FF }, - { "mder", 0x3c, INSTR_RR_FF }, - { "der", 0x3d, INSTR_RR_FF }, - { "aur", 0x3e, INSTR_RR_FF }, - { "sur", 0x3f, INSTR_RR_FF }, - { "sth", 0x40, INSTR_RX_RRRD }, - { "la", 0x41, INSTR_RX_RRRD }, - { "stc", 0x42, INSTR_RX_RRRD }, - { "ic", 0x43, INSTR_RX_RRRD }, - { "ex", 0x44, INSTR_RX_RRRD }, - { "bal", 0x45, INSTR_RX_RRRD }, - { "bct", 0x46, INSTR_RX_RRRD }, - { "bc", 0x47, INSTR_RX_URRD }, - { "lh", 0x48, INSTR_RX_RRRD }, - { "ch", 0x49, INSTR_RX_RRRD }, - { "ah", 0x4a, INSTR_RX_RRRD }, - { "sh", 0x4b, INSTR_RX_RRRD }, - { "mh", 0x4c, INSTR_RX_RRRD }, - { "bas", 0x4d, INSTR_RX_RRRD }, - { "cvd", 0x4e, INSTR_RX_RRRD }, - { "cvb", 0x4f, INSTR_RX_RRRD }, - { "st", 0x50, INSTR_RX_RRRD }, - { "lae", 0x51, INSTR_RX_RRRD }, - { "n", 0x54, INSTR_RX_RRRD }, - { "cl", 0x55, INSTR_RX_RRRD }, - { "o", 0x56, INSTR_RX_RRRD }, - { "x", 0x57, INSTR_RX_RRRD }, - { "l", 0x58, INSTR_RX_RRRD }, - { "c", 0x59, INSTR_RX_RRRD }, - { "a", 0x5a, INSTR_RX_RRRD }, - { "s", 0x5b, INSTR_RX_RRRD }, - { "m", 0x5c, INSTR_RX_RRRD }, - { "d", 0x5d, INSTR_RX_RRRD }, - { "al", 0x5e, INSTR_RX_RRRD }, - { "sl", 0x5f, INSTR_RX_RRRD }, - { "std", 0x60, INSTR_RX_FRRD }, - { "mxd", 0x67, INSTR_RX_FRRD }, - { "ld", 0x68, INSTR_RX_FRRD }, - { "cd", 0x69, INSTR_RX_FRRD }, - { "ad", 0x6a, INSTR_RX_FRRD }, - { "sd", 0x6b, INSTR_RX_FRRD }, - { "md", 0x6c, INSTR_RX_FRRD }, - { "dd", 0x6d, INSTR_RX_FRRD }, - { "aw", 0x6e, INSTR_RX_FRRD }, - { "sw", 0x6f, INSTR_RX_FRRD }, - { "ste", 0x70, INSTR_RX_FRRD }, - { "ms", 0x71, INSTR_RX_RRRD }, - { "le", 0x78, INSTR_RX_FRRD }, - { "ce", 0x79, INSTR_RX_FRRD }, - { "ae", 0x7a, INSTR_RX_FRRD }, - { "se", 0x7b, INSTR_RX_FRRD }, - { "mde", 0x7c, INSTR_RX_FRRD }, - { "de", 0x7d, INSTR_RX_FRRD }, - { "au", 0x7e, INSTR_RX_FRRD }, - { "su", 0x7f, INSTR_RX_FRRD }, - { "ssm", 0x80, INSTR_S_RD }, - { "lpsw", 0x82, INSTR_S_RD }, - { "diag", 0x83, INSTR_RS_RRRD }, - { "brxh", 0x84, INSTR_RSI_RRP }, - { "brxle", 0x85, INSTR_RSI_RRP }, - { "bxh", 0x86, INSTR_RS_RRRD }, - { "bxle", 0x87, INSTR_RS_RRRD }, - { "srl", 0x88, INSTR_RS_R0RD }, - { "sll", 0x89, INSTR_RS_R0RD }, - { "sra", 0x8a, INSTR_RS_R0RD }, - { "sla", 0x8b, INSTR_RS_R0RD }, - { "srdl", 0x8c, INSTR_RS_R0RD }, - { "sldl", 0x8d, INSTR_RS_R0RD }, - { "srda", 0x8e, INSTR_RS_R0RD }, - { "slda", 0x8f, INSTR_RS_R0RD }, - { "stm", 0x90, INSTR_RS_RRRD }, - { "tm", 0x91, INSTR_SI_URD }, - { "mvi", 0x92, INSTR_SI_URD }, - { "ts", 0x93, INSTR_S_RD }, - { "ni", 0x94, INSTR_SI_URD }, - { "cli", 0x95, INSTR_SI_URD }, - { "oi", 0x96, INSTR_SI_URD }, - { "xi", 0x97, INSTR_SI_URD }, - { "lm", 0x98, INSTR_RS_RRRD }, - { "trace", 0x99, INSTR_RS_RRRD }, - { "lam", 0x9a, INSTR_RS_AARD }, - { "stam", 0x9b, INSTR_RS_AARD }, - { "mvcle", 0xa8, INSTR_RS_RRRD }, - { "clcle", 0xa9, INSTR_RS_RRRD }, - { "stnsm", 0xac, INSTR_SI_URD }, - { "stosm", 0xad, INSTR_SI_URD }, - { "sigp", 0xae, INSTR_RS_RRRD }, - { "mc", 0xaf, INSTR_SI_URD }, - { "lra", 0xb1, INSTR_RX_RRRD }, - { "stctl", 0xb6, INSTR_RS_CCRD }, - { "lctl", 0xb7, INSTR_RS_CCRD }, - { "cs", 0xba, INSTR_RS_RRRD }, - { "cds", 0xbb, INSTR_RS_RRRD }, - { "clm", 0xbd, INSTR_RS_RURD }, - { "stcm", 0xbe, INSTR_RS_RURD }, - { "icm", 0xbf, INSTR_RS_RURD }, - { "mvn", 0xd1, INSTR_SS_L0RDRD }, - { "mvc", 0xd2, INSTR_SS_L0RDRD }, - { "mvz", 0xd3, INSTR_SS_L0RDRD }, - { "nc", 0xd4, INSTR_SS_L0RDRD }, - { "clc", 0xd5, INSTR_SS_L0RDRD }, - { "oc", 0xd6, INSTR_SS_L0RDRD }, - { "xc", 0xd7, INSTR_SS_L0RDRD }, - { "mvck", 0xd9, INSTR_SS_RRRDRD }, - { "mvcp", 0xda, INSTR_SS_RRRDRD }, - { "mvcs", 0xdb, INSTR_SS_RRRDRD }, - { "tr", 0xdc, INSTR_SS_L0RDRD }, - { "trt", 0xdd, INSTR_SS_L0RDRD }, - { "ed", 0xde, INSTR_SS_L0RDRD }, - { "edmk", 0xdf, INSTR_SS_L0RDRD }, - { "pku", 0xe1, INSTR_SS_L0RDRD }, - { "unpku", 0xe2, INSTR_SS_L0RDRD }, - { "mvcin", 0xe8, INSTR_SS_L0RDRD }, - { "pka", 0xe9, INSTR_SS_L0RDRD }, - { "unpka", 0xea, INSTR_SS_L0RDRD }, - { "plo", 0xee, INSTR_SS_RRRDRD2 }, - { "srp", 0xf0, INSTR_SS_LIRDRD }, - { "mvo", 0xf1, INSTR_SS_LLRDRD }, - { "pack", 0xf2, INSTR_SS_LLRDRD }, - { "unpk", 0xf3, INSTR_SS_LLRDRD }, - { "zap", 0xf8, INSTR_SS_LLRDRD }, - { "cp", 0xf9, INSTR_SS_LLRDRD }, - { "ap", 0xfa, INSTR_SS_LLRDRD }, - { "sp", 0xfb, INSTR_SS_LLRDRD }, - { "mp", 0xfc, INSTR_SS_LLRDRD }, - { "dp", 0xfd, INSTR_SS_LLRDRD }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_01[] = { -#ifdef CONFIG_64BIT - { "ptff", 0x04, INSTR_E }, - { "pfpo", 0x0a, INSTR_E }, - { "sam64", 0x0e, INSTR_E }, -#endif - { "pr", 0x01, INSTR_E }, - { "upt", 0x02, INSTR_E }, - { "sckpf", 0x07, INSTR_E }, - { "tam", 0x0b, INSTR_E }, - { "sam24", 0x0c, INSTR_E }, - { "sam31", 0x0d, INSTR_E }, - { "trap2", 0xff, INSTR_E }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_a5[] = { -#ifdef CONFIG_64BIT - { "iihh", 0x00, INSTR_RI_RU }, - { "iihl", 0x01, INSTR_RI_RU }, - { "iilh", 0x02, INSTR_RI_RU }, - { "iill", 0x03, INSTR_RI_RU }, - { "nihh", 0x04, INSTR_RI_RU }, - { "nihl", 0x05, INSTR_RI_RU }, - { "nilh", 0x06, INSTR_RI_RU }, - { "nill", 0x07, INSTR_RI_RU }, - { "oihh", 0x08, INSTR_RI_RU }, - { "oihl", 0x09, INSTR_RI_RU }, - { "oilh", 0x0a, INSTR_RI_RU }, - { "oill", 0x0b, INSTR_RI_RU }, - { "llihh", 0x0c, INSTR_RI_RU }, - { "llihl", 0x0d, INSTR_RI_RU }, - { "llilh", 0x0e, INSTR_RI_RU }, - { "llill", 0x0f, INSTR_RI_RU }, -#endif - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_a7[] = { -#ifdef CONFIG_64BIT - { "tmhh", 0x02, INSTR_RI_RU }, - { "tmhl", 0x03, INSTR_RI_RU }, - { "brctg", 0x07, INSTR_RI_RP }, - { "lghi", 0x09, INSTR_RI_RI }, - { "aghi", 0x0b, INSTR_RI_RI }, - { "mghi", 0x0d, INSTR_RI_RI }, - { "cghi", 0x0f, INSTR_RI_RI }, -#endif - { "tmlh", 0x00, INSTR_RI_RU }, - { "tmll", 0x01, INSTR_RI_RU }, - { "brc", 0x04, INSTR_RI_UP }, - { "bras", 0x05, INSTR_RI_RP }, - { "brct", 0x06, INSTR_RI_RP }, - { "lhi", 0x08, INSTR_RI_RI }, - { "ahi", 0x0a, INSTR_RI_RI }, - { "mhi", 0x0c, INSTR_RI_RI }, - { "chi", 0x0e, INSTR_RI_RI }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_aa[] = { -#ifdef CONFIG_64BIT - { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI }, - { "rion", 0x01, INSTR_RI_RI }, - { "tric", 0x02, INSTR_RI_RI }, - { "rioff", 0x03, INSTR_RI_RI }, - { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI }, -#endif - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_b2[] = { -#ifdef CONFIG_64BIT - { "stckf", 0x7c, INSTR_S_RD }, - { "lpp", 0x80, INSTR_S_RD }, - { "lcctl", 0x84, INSTR_S_RD }, - { "lpctl", 0x85, INSTR_S_RD }, - { "qsi", 0x86, INSTR_S_RD }, - { "lsctl", 0x87, INSTR_S_RD }, - { "qctri", 0x8e, INSTR_S_RD }, - { "stfle", 0xb0, INSTR_S_RD }, - { "lpswe", 0xb2, INSTR_S_RD }, - { "srnmb", 0xb8, INSTR_S_RD }, - { "srnmt", 0xb9, INSTR_S_RD }, - { "lfas", 0xbd, INSTR_S_RD }, - { "scctr", 0xe0, INSTR_RRE_RR }, - { "spctr", 0xe1, INSTR_RRE_RR }, - { "ecctr", 0xe4, INSTR_RRE_RR }, - { "epctr", 0xe5, INSTR_RRE_RR }, - { "ppa", 0xe8, INSTR_RRF_U0RR }, - { "etnd", 0xec, INSTR_RRE_R0 }, - { "ecpga", 0xed, INSTR_RRE_RR }, - { "tend", 0xf8, INSTR_S_00 }, - { "niai", 0xfa, INSTR_IE_UU }, - { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD }, -#endif - { "stidp", 0x02, INSTR_S_RD }, - { "sck", 0x04, INSTR_S_RD }, - { "stck", 0x05, INSTR_S_RD }, - { "sckc", 0x06, INSTR_S_RD }, - { "stckc", 0x07, INSTR_S_RD }, - { "spt", 0x08, INSTR_S_RD }, - { "stpt", 0x09, INSTR_S_RD }, - { "spka", 0x0a, INSTR_S_RD }, - { "ipk", 0x0b, INSTR_S_00 }, - { "ptlb", 0x0d, INSTR_S_00 }, - { "spx", 0x10, INSTR_S_RD }, - { "stpx", 0x11, INSTR_S_RD }, - { "stap", 0x12, INSTR_S_RD }, - { "sie", 0x14, INSTR_S_RD }, - { "pc", 0x18, INSTR_S_RD }, - { "sac", 0x19, INSTR_S_RD }, - { "cfc", 0x1a, INSTR_S_RD }, - { "servc", 0x20, INSTR_RRE_RR }, - { "ipte", 0x21, INSTR_RRE_RR }, - { "ipm", 0x22, INSTR_RRE_R0 }, - { "ivsk", 0x23, INSTR_RRE_RR }, - { "iac", 0x24, INSTR_RRE_R0 }, - { "ssar", 0x25, INSTR_RRE_R0 }, - { "epar", 0x26, INSTR_RRE_R0 }, - { "esar", 0x27, INSTR_RRE_R0 }, - { "pt", 0x28, INSTR_RRE_RR }, - { "iske", 0x29, INSTR_RRE_RR }, - { "rrbe", 0x2a, INSTR_RRE_RR }, - { "sske", 0x2b, INSTR_RRF_M0RR }, - { "tb", 0x2c, INSTR_RRE_0R }, - { "dxr", 0x2d, INSTR_RRE_FF }, - { "pgin", 0x2e, INSTR_RRE_RR }, - { "pgout", 0x2f, INSTR_RRE_RR }, - { "csch", 0x30, INSTR_S_00 }, - { "hsch", 0x31, INSTR_S_00 }, - { "msch", 0x32, INSTR_S_RD }, - { "ssch", 0x33, INSTR_S_RD }, - { "stsch", 0x34, INSTR_S_RD }, - { "tsch", 0x35, INSTR_S_RD }, - { "tpi", 0x36, INSTR_S_RD }, - { "sal", 0x37, INSTR_S_00 }, - { "rsch", 0x38, INSTR_S_00 }, - { "stcrw", 0x39, INSTR_S_RD }, - { "stcps", 0x3a, INSTR_S_RD }, - { "rchp", 0x3b, INSTR_S_00 }, - { "schm", 0x3c, INSTR_S_00 }, - { "bakr", 0x40, INSTR_RRE_RR }, - { "cksm", 0x41, INSTR_RRE_RR }, - { "sqdr", 0x44, INSTR_RRE_FF }, - { "sqer", 0x45, INSTR_RRE_FF }, - { "stura", 0x46, INSTR_RRE_RR }, - { "msta", 0x47, INSTR_RRE_R0 }, - { "palb", 0x48, INSTR_RRE_00 }, - { "ereg", 0x49, INSTR_RRE_RR }, - { "esta", 0x4a, INSTR_RRE_RR }, - { "lura", 0x4b, INSTR_RRE_RR }, - { "tar", 0x4c, INSTR_RRE_AR }, - { "cpya", 0x4d, INSTR_RRE_AA }, - { "sar", 0x4e, INSTR_RRE_AR }, - { "ear", 0x4f, INSTR_RRE_RA }, - { "csp", 0x50, INSTR_RRE_RR }, - { "msr", 0x52, INSTR_RRE_RR }, - { "mvpg", 0x54, INSTR_RRE_RR }, - { "mvst", 0x55, INSTR_RRE_RR }, - { "cuse", 0x57, INSTR_RRE_RR }, - { "bsg", 0x58, INSTR_RRE_RR }, - { "bsa", 0x5a, INSTR_RRE_RR }, - { "clst", 0x5d, INSTR_RRE_RR }, - { "srst", 0x5e, INSTR_RRE_RR }, - { "cmpsc", 0x63, INSTR_RRE_RR }, - { "siga", 0x74, INSTR_S_RD }, - { "xsch", 0x76, INSTR_S_00 }, - { "rp", 0x77, INSTR_S_RD }, - { "stcke", 0x78, INSTR_S_RD }, - { "sacf", 0x79, INSTR_S_RD }, - { "stsi", 0x7d, INSTR_S_RD }, - { "srnm", 0x99, INSTR_S_RD }, - { "stfpc", 0x9c, INSTR_S_RD }, - { "lfpc", 0x9d, INSTR_S_RD }, - { "tre", 0xa5, INSTR_RRE_RR }, - { "cuutf", 0xa6, INSTR_RRF_M0RR }, - { "cutfu", 0xa7, INSTR_RRF_M0RR }, - { "stfl", 0xb1, INSTR_S_RD }, - { "trap4", 0xff, INSTR_S_RD }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_b3[] = { -#ifdef CONFIG_64BIT - { "maylr", 0x38, INSTR_RRF_F0FF }, - { "mylr", 0x39, INSTR_RRF_F0FF }, - { "mayr", 0x3a, INSTR_RRF_F0FF }, - { "myr", 0x3b, INSTR_RRF_F0FF }, - { "mayhr", 0x3c, INSTR_RRF_F0FF }, - { "myhr", 0x3d, INSTR_RRF_F0FF }, - { "lpdfr", 0x70, INSTR_RRE_FF }, - { "lndfr", 0x71, INSTR_RRE_FF }, - { "cpsdr", 0x72, INSTR_RRF_F0FF2 }, - { "lcdfr", 0x73, INSTR_RRE_FF }, - { "sfasr", 0x85, INSTR_RRE_R0 }, - { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR }, - { "ldgr", 0xc1, INSTR_RRE_FR }, - { "cegr", 0xc4, INSTR_RRE_FR }, - { "cdgr", 0xc5, INSTR_RRE_FR }, - { "cxgr", 0xc6, INSTR_RRE_FR }, - { "cger", 0xc8, INSTR_RRF_U0RF }, - { "cgdr", 0xc9, INSTR_RRF_U0RF }, - { "cgxr", 0xca, INSTR_RRF_U0RF }, - { "lgdr", 0xcd, INSTR_RRE_RF }, - { "mdtra", 0xd0, INSTR_RRF_FUFF2 }, - { "ddtra", 0xd1, INSTR_RRF_FUFF2 }, - { "adtra", 0xd2, INSTR_RRF_FUFF2 }, - { "sdtra", 0xd3, INSTR_RRF_FUFF2 }, - { "ldetr", 0xd4, INSTR_RRF_0UFF }, - { "ledtr", 0xd5, INSTR_RRF_UUFF }, - { "ltdtr", 0xd6, INSTR_RRE_FF }, - { "fidtr", 0xd7, INSTR_RRF_UUFF }, - { "mxtra", 0xd8, INSTR_RRF_FUFF2 }, - { "dxtra", 0xd9, INSTR_RRF_FUFF2 }, - { "axtra", 0xda, INSTR_RRF_FUFF2 }, - { "sxtra", 0xdb, INSTR_RRF_FUFF2 }, - { "lxdtr", 0xdc, INSTR_RRF_0UFF }, - { "ldxtr", 0xdd, INSTR_RRF_UUFF }, - { "ltxtr", 0xde, INSTR_RRE_FF }, - { "fixtr", 0xdf, INSTR_RRF_UUFF }, - { "kdtr", 0xe0, INSTR_RRE_FF }, - { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF }, - { "cudtr", 0xe2, INSTR_RRE_RF }, - { "csdtr", 0xe3, INSTR_RRE_RF }, - { "cdtr", 0xe4, INSTR_RRE_FF }, - { "eedtr", 0xe5, INSTR_RRE_RF }, - { "esdtr", 0xe7, INSTR_RRE_RF }, - { "kxtr", 0xe8, INSTR_RRE_FF }, - { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR }, - { "cuxtr", 0xea, INSTR_RRE_RF }, - { "csxtr", 0xeb, INSTR_RRE_RF }, - { "cxtr", 0xec, INSTR_RRE_FF }, - { "eextr", 0xed, INSTR_RRE_RF }, - { "esxtr", 0xef, INSTR_RRE_RF }, - { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR }, - { "cdutr", 0xf2, INSTR_RRE_FR }, - { "cdstr", 0xf3, INSTR_RRE_FR }, - { "cedtr", 0xf4, INSTR_RRE_FF }, - { "qadtr", 0xf5, INSTR_RRF_FUFF }, - { "iedtr", 0xf6, INSTR_RRF_F0FR }, - { "rrdtr", 0xf7, INSTR_RRF_FFRU }, - { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF }, - { "cxutr", 0xfa, INSTR_RRE_FR }, - { "cxstr", 0xfb, INSTR_RRE_FR }, - { "cextr", 0xfc, INSTR_RRE_FF }, - { "qaxtr", 0xfd, INSTR_RRF_FUFF }, - { "iextr", 0xfe, INSTR_RRF_F0FR }, - { "rrxtr", 0xff, INSTR_RRF_FFRU }, -#endif - { "lpebr", 0x00, INSTR_RRE_FF }, - { "lnebr", 0x01, INSTR_RRE_FF }, - { "ltebr", 0x02, INSTR_RRE_FF }, - { "lcebr", 0x03, INSTR_RRE_FF }, - { "ldebr", 0x04, INSTR_RRE_FF }, - { "lxdbr", 0x05, INSTR_RRE_FF }, - { "lxebr", 0x06, INSTR_RRE_FF }, - { "mxdbr", 0x07, INSTR_RRE_FF }, - { "kebr", 0x08, INSTR_RRE_FF }, - { "cebr", 0x09, INSTR_RRE_FF }, - { "aebr", 0x0a, INSTR_RRE_FF }, - { "sebr", 0x0b, INSTR_RRE_FF }, - { "mdebr", 0x0c, INSTR_RRE_FF }, - { "debr", 0x0d, INSTR_RRE_FF }, - { "maebr", 0x0e, INSTR_RRF_F0FF }, - { "msebr", 0x0f, INSTR_RRF_F0FF }, - { "lpdbr", 0x10, INSTR_RRE_FF }, - { "lndbr", 0x11, INSTR_RRE_FF }, - { "ltdbr", 0x12, INSTR_RRE_FF }, - { "lcdbr", 0x13, INSTR_RRE_FF }, - { "sqebr", 0x14, INSTR_RRE_FF }, - { "sqdbr", 0x15, INSTR_RRE_FF }, - { "sqxbr", 0x16, INSTR_RRE_FF }, - { "meebr", 0x17, INSTR_RRE_FF }, - { "kdbr", 0x18, INSTR_RRE_FF }, - { "cdbr", 0x19, INSTR_RRE_FF }, - { "adbr", 0x1a, INSTR_RRE_FF }, - { "sdbr", 0x1b, INSTR_RRE_FF }, - { "mdbr", 0x1c, INSTR_RRE_FF }, - { "ddbr", 0x1d, INSTR_RRE_FF }, - { "madbr", 0x1e, INSTR_RRF_F0FF }, - { "msdbr", 0x1f, INSTR_RRF_F0FF }, - { "lder", 0x24, INSTR_RRE_FF }, - { "lxdr", 0x25, INSTR_RRE_FF }, - { "lxer", 0x26, INSTR_RRE_FF }, - { "maer", 0x2e, INSTR_RRF_F0FF }, - { "mser", 0x2f, INSTR_RRF_F0FF }, - { "sqxr", 0x36, INSTR_RRE_FF }, - { "meer", 0x37, INSTR_RRE_FF }, - { "madr", 0x3e, INSTR_RRF_F0FF }, - { "msdr", 0x3f, INSTR_RRF_F0FF }, - { "lpxbr", 0x40, INSTR_RRE_FF }, - { "lnxbr", 0x41, INSTR_RRE_FF }, - { "ltxbr", 0x42, INSTR_RRE_FF }, - { "lcxbr", 0x43, INSTR_RRE_FF }, - { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF }, - { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF }, - { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF }, - { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF }, - { "kxbr", 0x48, INSTR_RRE_FF }, - { "cxbr", 0x49, INSTR_RRE_FF }, - { "axbr", 0x4a, INSTR_RRE_FF }, - { "sxbr", 0x4b, INSTR_RRE_FF }, - { "mxbr", 0x4c, INSTR_RRE_FF }, - { "dxbr", 0x4d, INSTR_RRE_FF }, - { "tbedr", 0x50, INSTR_RRF_U0FF }, - { "tbdr", 0x51, INSTR_RRF_U0FF }, - { "diebr", 0x53, INSTR_RRF_FUFF }, - { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF }, - { "thder", 0x58, INSTR_RRE_FF }, - { "thdr", 0x59, INSTR_RRE_FF }, - { "didbr", 0x5b, INSTR_RRF_FUFF }, - { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF }, - { "lpxr", 0x60, INSTR_RRE_FF }, - { "lnxr", 0x61, INSTR_RRE_FF }, - { "ltxr", 0x62, INSTR_RRE_FF }, - { "lcxr", 0x63, INSTR_RRE_FF }, - { "lxr", 0x65, INSTR_RRE_FF }, - { "lexr", 0x66, INSTR_RRE_FF }, - { "fixr", 0x67, INSTR_RRE_FF }, - { "cxr", 0x69, INSTR_RRE_FF }, - { "lzer", 0x74, INSTR_RRE_F0 }, - { "lzdr", 0x75, INSTR_RRE_F0 }, - { "lzxr", 0x76, INSTR_RRE_F0 }, - { "fier", 0x77, INSTR_RRE_FF }, - { "fidr", 0x7f, INSTR_RRE_FF }, - { "sfpc", 0x84, INSTR_RRE_RR_OPT }, - { "efpc", 0x8c, INSTR_RRE_RR_OPT }, - { "cefbr", 0x94, INSTR_RRE_RF }, - { "cdfbr", 0x95, INSTR_RRE_RF }, - { "cxfbr", 0x96, INSTR_RRE_RF }, - { "cfebr", 0x98, INSTR_RRF_U0RF }, - { "cfdbr", 0x99, INSTR_RRF_U0RF }, - { "cfxbr", 0x9a, INSTR_RRF_U0RF }, - { "cefr", 0xb4, INSTR_RRE_FR }, - { "cdfr", 0xb5, INSTR_RRE_FR }, - { "cxfr", 0xb6, INSTR_RRE_FR }, - { "cfer", 0xb8, INSTR_RRF_U0RF }, - { "cfdr", 0xb9, INSTR_RRF_U0RF }, - { "cfxr", 0xba, INSTR_RRF_U0RF }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_b9[] = { -#ifdef CONFIG_64BIT - { "lpgr", 0x00, INSTR_RRE_RR }, - { "lngr", 0x01, INSTR_RRE_RR }, - { "ltgr", 0x02, INSTR_RRE_RR }, - { "lcgr", 0x03, INSTR_RRE_RR }, - { "lgr", 0x04, INSTR_RRE_RR }, - { "lurag", 0x05, INSTR_RRE_RR }, - { "lgbr", 0x06, INSTR_RRE_RR }, - { "lghr", 0x07, INSTR_RRE_RR }, - { "agr", 0x08, INSTR_RRE_RR }, - { "sgr", 0x09, INSTR_RRE_RR }, - { "algr", 0x0a, INSTR_RRE_RR }, - { "slgr", 0x0b, INSTR_RRE_RR }, - { "msgr", 0x0c, INSTR_RRE_RR }, - { "dsgr", 0x0d, INSTR_RRE_RR }, - { "eregg", 0x0e, INSTR_RRE_RR }, - { "lrvgr", 0x0f, INSTR_RRE_RR }, - { "lpgfr", 0x10, INSTR_RRE_RR }, - { "lngfr", 0x11, INSTR_RRE_RR }, - { "ltgfr", 0x12, INSTR_RRE_RR }, - { "lcgfr", 0x13, INSTR_RRE_RR }, - { "lgfr", 0x14, INSTR_RRE_RR }, - { "llgfr", 0x16, INSTR_RRE_RR }, - { "llgtr", 0x17, INSTR_RRE_RR }, - { "agfr", 0x18, INSTR_RRE_RR }, - { "sgfr", 0x19, INSTR_RRE_RR }, - { "algfr", 0x1a, INSTR_RRE_RR }, - { "slgfr", 0x1b, INSTR_RRE_RR }, - { "msgfr", 0x1c, INSTR_RRE_RR }, - { "dsgfr", 0x1d, INSTR_RRE_RR }, - { "cgr", 0x20, INSTR_RRE_RR }, - { "clgr", 0x21, INSTR_RRE_RR }, - { "sturg", 0x25, INSTR_RRE_RR }, - { "lbr", 0x26, INSTR_RRE_RR }, - { "lhr", 0x27, INSTR_RRE_RR }, - { "cgfr", 0x30, INSTR_RRE_RR }, - { "clgfr", 0x31, INSTR_RRE_RR }, - { "cfdtr", 0x41, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF }, - { "bctgr", 0x46, INSTR_RRE_RR }, - { "cfxtr", 0x49, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR }, - { "cdftr", 0x51, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR }, - { "cxftr", 0x59, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR }, - { "cgrt", 0x60, INSTR_RRF_U0RR }, - { "clgrt", 0x61, INSTR_RRF_U0RR }, - { "crt", 0x72, INSTR_RRF_U0RR }, - { "clrt", 0x73, INSTR_RRF_U0RR }, - { "ngr", 0x80, INSTR_RRE_RR }, - { "ogr", 0x81, INSTR_RRE_RR }, - { "xgr", 0x82, INSTR_RRE_RR }, - { "flogr", 0x83, INSTR_RRE_RR }, - { "llgcr", 0x84, INSTR_RRE_RR }, - { "llghr", 0x85, INSTR_RRE_RR }, - { "mlgr", 0x86, INSTR_RRE_RR }, - { "dlgr", 0x87, INSTR_RRE_RR }, - { "alcgr", 0x88, INSTR_RRE_RR }, - { "slbgr", 0x89, INSTR_RRE_RR }, - { "cspg", 0x8a, INSTR_RRE_RR }, - { "idte", 0x8e, INSTR_RRF_R0RR }, - { "crdte", 0x8f, INSTR_RRF_RMRR }, - { "llcr", 0x94, INSTR_RRE_RR }, - { "llhr", 0x95, INSTR_RRE_RR }, - { "esea", 0x9d, INSTR_RRE_R0 }, - { "ptf", 0xa2, INSTR_RRE_R0 }, - { "lptea", 0xaa, INSTR_RRF_RURR }, - { "rrbm", 0xae, INSTR_RRE_RR }, - { "pfmf", 0xaf, INSTR_RRE_RR }, - { "cu14", 0xb0, INSTR_RRF_M0RR }, - { "cu24", 0xb1, INSTR_RRF_M0RR }, - { "cu41", 0xb2, INSTR_RRE_RR }, - { "cu42", 0xb3, INSTR_RRE_RR }, - { "trtre", 0xbd, INSTR_RRF_M0RR }, - { "srstu", 0xbe, INSTR_RRE_RR }, - { "trte", 0xbf, INSTR_RRF_M0RR }, - { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, - { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 }, - { "chhr", 0xcd, INSTR_RRE_RR }, - { "clhhr", 0xcf, INSTR_RRE_RR }, - { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR }, - { "pcilg", 0xd2, INSTR_RRE_RR }, - { "rpcit", 0xd3, INSTR_RRE_RR }, - { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, - { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 }, - { "chlr", 0xdd, INSTR_RRE_RR }, - { "clhlr", 0xdf, INSTR_RRE_RR }, - { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, - { "locgr", 0xe2, INSTR_RRF_M0RR }, - { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, - { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, - { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, - { "agrk", 0xe8, INSTR_RRF_R0RR2 }, - { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, - { "algrk", 0xea, INSTR_RRF_R0RR2 }, - { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, - { "locr", 0xf2, INSTR_RRF_M0RR }, - { "nrk", 0xf4, INSTR_RRF_R0RR2 }, - { "ork", 0xf6, INSTR_RRF_R0RR2 }, - { "xrk", 0xf7, INSTR_RRF_R0RR2 }, - { "ark", 0xf8, INSTR_RRF_R0RR2 }, - { "srk", 0xf9, INSTR_RRF_R0RR2 }, - { "alrk", 0xfa, INSTR_RRF_R0RR2 }, - { "slrk", 0xfb, INSTR_RRF_R0RR2 }, -#endif - { "kmac", 0x1e, INSTR_RRE_RR }, - { "lrvr", 0x1f, INSTR_RRE_RR }, - { "km", 0x2e, INSTR_RRE_RR }, - { "kmc", 0x2f, INSTR_RRE_RR }, - { "kimd", 0x3e, INSTR_RRE_RR }, - { "klmd", 0x3f, INSTR_RRE_RR }, - { "epsw", 0x8d, INSTR_RRE_RR }, - { "trtt", 0x90, INSTR_RRF_M0RR }, - { "trto", 0x91, INSTR_RRF_M0RR }, - { "trot", 0x92, INSTR_RRF_M0RR }, - { "troo", 0x93, INSTR_RRF_M0RR }, - { "mlr", 0x96, INSTR_RRE_RR }, - { "dlr", 0x97, INSTR_RRE_RR }, - { "alcr", 0x98, INSTR_RRE_RR }, - { "slbr", 0x99, INSTR_RRE_RR }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_c0[] = { -#ifdef CONFIG_64BIT - { "lgfi", 0x01, INSTR_RIL_RI }, - { "xihf", 0x06, INSTR_RIL_RU }, - { "xilf", 0x07, INSTR_RIL_RU }, - { "iihf", 0x08, INSTR_RIL_RU }, - { "iilf", 0x09, INSTR_RIL_RU }, - { "nihf", 0x0a, INSTR_RIL_RU }, - { "nilf", 0x0b, INSTR_RIL_RU }, - { "oihf", 0x0c, INSTR_RIL_RU }, - { "oilf", 0x0d, INSTR_RIL_RU }, - { "llihf", 0x0e, INSTR_RIL_RU }, - { "llilf", 0x0f, INSTR_RIL_RU }, -#endif - { "larl", 0x00, INSTR_RIL_RP }, - { "brcl", 0x04, INSTR_RIL_UP }, - { "brasl", 0x05, INSTR_RIL_RP }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_c2[] = { -#ifdef CONFIG_64BIT - { "msgfi", 0x00, INSTR_RIL_RI }, - { "msfi", 0x01, INSTR_RIL_RI }, - { "slgfi", 0x04, INSTR_RIL_RU }, - { "slfi", 0x05, INSTR_RIL_RU }, - { "agfi", 0x08, INSTR_RIL_RI }, - { "afi", 0x09, INSTR_RIL_RI }, - { "algfi", 0x0a, INSTR_RIL_RU }, - { "alfi", 0x0b, INSTR_RIL_RU }, - { "cgfi", 0x0c, INSTR_RIL_RI }, - { "cfi", 0x0d, INSTR_RIL_RI }, - { "clgfi", 0x0e, INSTR_RIL_RU }, - { "clfi", 0x0f, INSTR_RIL_RU }, -#endif - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_c4[] = { -#ifdef CONFIG_64BIT - { "llhrl", 0x02, INSTR_RIL_RP }, - { "lghrl", 0x04, INSTR_RIL_RP }, - { "lhrl", 0x05, INSTR_RIL_RP }, - { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, - { "sthrl", 0x07, INSTR_RIL_RP }, - { "lgrl", 0x08, INSTR_RIL_RP }, - { "stgrl", 0x0b, INSTR_RIL_RP }, - { "lgfrl", 0x0c, INSTR_RIL_RP }, - { "lrl", 0x0d, INSTR_RIL_RP }, - { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, - { "strl", 0x0f, INSTR_RIL_RP }, -#endif - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_c6[] = { -#ifdef CONFIG_64BIT - { "exrl", 0x00, INSTR_RIL_RP }, - { "pfdrl", 0x02, INSTR_RIL_UP }, - { "cghrl", 0x04, INSTR_RIL_RP }, - { "chrl", 0x05, INSTR_RIL_RP }, - { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, - { "clhrl", 0x07, INSTR_RIL_RP }, - { "cgrl", 0x08, INSTR_RIL_RP }, - { "clgrl", 0x0a, INSTR_RIL_RP }, - { "cgfrl", 0x0c, INSTR_RIL_RP }, - { "crl", 0x0d, INSTR_RIL_RP }, - { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, - { "clrl", 0x0f, INSTR_RIL_RP }, -#endif - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_c8[] = { -#ifdef CONFIG_64BIT - { "mvcos", 0x00, INSTR_SSF_RRDRD }, - { "ectg", 0x01, INSTR_SSF_RRDRD }, - { "csst", 0x02, INSTR_SSF_RRDRD }, - { "lpd", 0x04, INSTR_SSF_RRDRD2 }, - { "lpdg", 0x05, INSTR_SSF_RRDRD2 }, -#endif - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_cc[] = { -#ifdef CONFIG_64BIT - { "brcth", 0x06, INSTR_RIL_RP }, - { "aih", 0x08, INSTR_RIL_RI }, - { "alsih", 0x0a, INSTR_RIL_RI }, - { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI }, - { "cih", 0x0d, INSTR_RIL_RI }, - { "clih", 0x0f, INSTR_RIL_RI }, -#endif - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_e3[] = { -#ifdef CONFIG_64BIT - { "ltg", 0x02, INSTR_RXY_RRRD }, - { "lrag", 0x03, INSTR_RXY_RRRD }, - { "lg", 0x04, INSTR_RXY_RRRD }, - { "cvby", 0x06, INSTR_RXY_RRRD }, - { "ag", 0x08, INSTR_RXY_RRRD }, - { "sg", 0x09, INSTR_RXY_RRRD }, - { "alg", 0x0a, INSTR_RXY_RRRD }, - { "slg", 0x0b, INSTR_RXY_RRRD }, - { "msg", 0x0c, INSTR_RXY_RRRD }, - { "dsg", 0x0d, INSTR_RXY_RRRD }, - { "cvbg", 0x0e, INSTR_RXY_RRRD }, - { "lrvg", 0x0f, INSTR_RXY_RRRD }, - { "lt", 0x12, INSTR_RXY_RRRD }, - { "lray", 0x13, INSTR_RXY_RRRD }, - { "lgf", 0x14, INSTR_RXY_RRRD }, - { "lgh", 0x15, INSTR_RXY_RRRD }, - { "llgf", 0x16, INSTR_RXY_RRRD }, - { "llgt", 0x17, INSTR_RXY_RRRD }, - { "agf", 0x18, INSTR_RXY_RRRD }, - { "sgf", 0x19, INSTR_RXY_RRRD }, - { "algf", 0x1a, INSTR_RXY_RRRD }, - { "slgf", 0x1b, INSTR_RXY_RRRD }, - { "msgf", 0x1c, INSTR_RXY_RRRD }, - { "dsgf", 0x1d, INSTR_RXY_RRRD }, - { "cg", 0x20, INSTR_RXY_RRRD }, - { "clg", 0x21, INSTR_RXY_RRRD }, - { "stg", 0x24, INSTR_RXY_RRRD }, - { "ntstg", 0x25, INSTR_RXY_RRRD }, - { "cvdy", 0x26, INSTR_RXY_RRRD }, - { "cvdg", 0x2e, INSTR_RXY_RRRD }, - { "strvg", 0x2f, INSTR_RXY_RRRD }, - { "cgf", 0x30, INSTR_RXY_RRRD }, - { "clgf", 0x31, INSTR_RXY_RRRD }, - { "ltgf", 0x32, INSTR_RXY_RRRD }, - { "cgh", 0x34, INSTR_RXY_RRRD }, - { "pfd", 0x36, INSTR_RXY_URRD }, - { "strvh", 0x3f, INSTR_RXY_RRRD }, - { "bctg", 0x46, INSTR_RXY_RRRD }, - { "sty", 0x50, INSTR_RXY_RRRD }, - { "msy", 0x51, INSTR_RXY_RRRD }, - { "ny", 0x54, INSTR_RXY_RRRD }, - { "cly", 0x55, INSTR_RXY_RRRD }, - { "oy", 0x56, INSTR_RXY_RRRD }, - { "xy", 0x57, INSTR_RXY_RRRD }, - { "ly", 0x58, INSTR_RXY_RRRD }, - { "cy", 0x59, INSTR_RXY_RRRD }, - { "ay", 0x5a, INSTR_RXY_RRRD }, - { "sy", 0x5b, INSTR_RXY_RRRD }, - { "mfy", 0x5c, INSTR_RXY_RRRD }, - { "aly", 0x5e, INSTR_RXY_RRRD }, - { "sly", 0x5f, INSTR_RXY_RRRD }, - { "sthy", 0x70, INSTR_RXY_RRRD }, - { "lay", 0x71, INSTR_RXY_RRRD }, - { "stcy", 0x72, INSTR_RXY_RRRD }, - { "icy", 0x73, INSTR_RXY_RRRD }, - { "laey", 0x75, INSTR_RXY_RRRD }, - { "lb", 0x76, INSTR_RXY_RRRD }, - { "lgb", 0x77, INSTR_RXY_RRRD }, - { "lhy", 0x78, INSTR_RXY_RRRD }, - { "chy", 0x79, INSTR_RXY_RRRD }, - { "ahy", 0x7a, INSTR_RXY_RRRD }, - { "shy", 0x7b, INSTR_RXY_RRRD }, - { "mhy", 0x7c, INSTR_RXY_RRRD }, - { "ng", 0x80, INSTR_RXY_RRRD }, - { "og", 0x81, INSTR_RXY_RRRD }, - { "xg", 0x82, INSTR_RXY_RRRD }, - { "lgat", 0x85, INSTR_RXY_RRRD }, - { "mlg", 0x86, INSTR_RXY_RRRD }, - { "dlg", 0x87, INSTR_RXY_RRRD }, - { "alcg", 0x88, INSTR_RXY_RRRD }, - { "slbg", 0x89, INSTR_RXY_RRRD }, - { "stpq", 0x8e, INSTR_RXY_RRRD }, - { "lpq", 0x8f, INSTR_RXY_RRRD }, - { "llgc", 0x90, INSTR_RXY_RRRD }, - { "llgh", 0x91, INSTR_RXY_RRRD }, - { "llc", 0x94, INSTR_RXY_RRRD }, - { "llh", 0x95, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD }, - { "lat", 0x9f, INSTR_RXY_RRRD }, - { "lbh", 0xc0, INSTR_RXY_RRRD }, - { "llch", 0xc2, INSTR_RXY_RRRD }, - { "stch", 0xc3, INSTR_RXY_RRRD }, - { "lhh", 0xc4, INSTR_RXY_RRRD }, - { "llhh", 0xc6, INSTR_RXY_RRRD }, - { "sthh", 0xc7, INSTR_RXY_RRRD }, - { "lfhat", 0xc8, INSTR_RXY_RRRD }, - { "lfh", 0xca, INSTR_RXY_RRRD }, - { "stfh", 0xcb, INSTR_RXY_RRRD }, - { "chf", 0xcd, INSTR_RXY_RRRD }, - { "clhf", 0xcf, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD }, -#endif - { "lrv", 0x1e, INSTR_RXY_RRRD }, - { "lrvh", 0x1f, INSTR_RXY_RRRD }, - { "strv", 0x3e, INSTR_RXY_RRRD }, - { "ml", 0x96, INSTR_RXY_RRRD }, - { "dl", 0x97, INSTR_RXY_RRRD }, - { "alc", 0x98, INSTR_RXY_RRRD }, - { "slb", 0x99, INSTR_RXY_RRRD }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_e5[] = { -#ifdef CONFIG_64BIT - { "strag", 0x02, INSTR_SSE_RDRD }, - { "mvhhi", 0x44, INSTR_SIL_RDI }, - { "mvghi", 0x48, INSTR_SIL_RDI }, - { "mvhi", 0x4c, INSTR_SIL_RDI }, - { "chhsi", 0x54, INSTR_SIL_RDI }, - { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, - { "cghsi", 0x58, INSTR_SIL_RDI }, - { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, - { "chsi", 0x5c, INSTR_SIL_RDI }, - { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, - { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU }, - { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU }, -#endif - { "lasp", 0x00, INSTR_SSE_RDRD }, - { "tprot", 0x01, INSTR_SSE_RDRD }, - { "mvcsk", 0x0e, INSTR_SSE_RDRD }, - { "mvcdk", 0x0f, INSTR_SSE_RDRD }, - { "", 0, INSTR_INVALID } -}; - -static struct insn opcode_eb[] = { -#ifdef CONFIG_64BIT - { "lmg", 0x04, INSTR_RSY_RRRD }, - { "srag", 0x0a, INSTR_RSY_RRRD }, - { "slag", 0x0b, INSTR_RSY_RRRD }, - { "srlg", 0x0c, INSTR_RSY_RRRD }, - { "sllg", 0x0d, INSTR_RSY_RRRD }, - { "tracg", 0x0f, INSTR_RSY_RRRD }, - { "csy", 0x14, INSTR_RSY_RRRD }, - { "rllg", 0x1c, INSTR_RSY_RRRD }, - { "clmh", 0x20, INSTR_RSY_RURD }, - { "clmy", 0x21, INSTR_RSY_RURD }, - { "clt", 0x23, INSTR_RSY_RURD }, - { "stmg", 0x24, INSTR_RSY_RRRD }, - { "stctg", 0x25, INSTR_RSY_CCRD }, - { "stmh", 0x26, INSTR_RSY_RRRD }, - { "clgt", 0x2b, INSTR_RSY_RURD }, - { "stcmh", 0x2c, INSTR_RSY_RURD }, - { "stcmy", 0x2d, INSTR_RSY_RURD }, - { "lctlg", 0x2f, INSTR_RSY_CCRD }, - { "csg", 0x30, INSTR_RSY_RRRD }, - { "cdsy", 0x31, INSTR_RSY_RRRD }, - { "cdsg", 0x3e, INSTR_RSY_RRRD }, - { "bxhg", 0x44, INSTR_RSY_RRRD }, - { "bxleg", 0x45, INSTR_RSY_RRRD }, - { "ecag", 0x4c, INSTR_RSY_RRRD }, - { "tmy", 0x51, INSTR_SIY_URD }, - { "mviy", 0x52, INSTR_SIY_URD }, - { "niy", 0x54, INSTR_SIY_URD }, - { "cliy", 0x55, INSTR_SIY_URD }, - { "oiy", 0x56, INSTR_SIY_URD }, - { "xiy", 0x57, INSTR_SIY_URD }, - { "asi", 0x6a, INSTR_SIY_IRD }, - { "alsi", 0x6e, INSTR_SIY_IRD }, - { "agsi", 0x7a, INSTR_SIY_IRD }, - { "algsi", 0x7e, INSTR_SIY_IRD }, - { "icmh", 0x80, INSTR_RSY_RURD }, - { "icmy", 0x81, INSTR_RSY_RURD }, - { "clclu", 0x8f, INSTR_RSY_RRRD }, - { "stmy", 0x90, INSTR_RSY_RRRD }, - { "lmh", 0x96, INSTR_RSY_RRRD }, - { "lmy", 0x98, INSTR_RSY_RRRD }, - { "lamy", 0x9a, INSTR_RSY_AARD }, - { "stamy", 0x9b, INSTR_RSY_AARD }, - { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD }, - { "sic", 0xd1, INSTR_RSY_RRRD }, - { "srak", 0xdc, INSTR_RSY_RRRD }, - { "slak", 0xdd, INSTR_RSY_RRRD }, - { "srlk", 0xde, INSTR_RSY_RRRD }, - { "sllk", 0xdf, INSTR_RSY_RRRD }, - { "locg", 0xe2, INSTR_RSY_RDRM }, - { "stocg", 0xe3, INSTR_RSY_RDRM }, - { "lang", 0xe4, INSTR_RSY_RRRD }, - { "laog", 0xe6, INSTR_RSY_RRRD }, - { "laxg", 0xe7, INSTR_RSY_RRRD }, - { "laag", 0xe8, INSTR_RSY_RRRD }, - { "laalg", 0xea, INSTR_RSY_RRRD }, - { "loc", 0xf2, INSTR_RSY_RDRM }, - { "stoc", 0xf3, INSTR_RSY_RDRM }, - { "lan", 0xf4, INSTR_RSY_RRRD }, - { "lao", 0xf6, INSTR_RSY_RRRD }, - { "lax", 0xf7, INSTR_RSY_RRRD }, - { "laa", 0xf8, INSTR_RSY_RRRD }, - { "laal", 0xfa, INSTR_RSY_RRRD }, - { "lric", 0x60, INSTR_RSY_RDRM }, - { "stric", 0x61, INSTR_RSY_RDRM }, - { "mric", 0x62, INSTR_RSY_RDRM }, -#endif - { "rll", 0x1d, INSTR_RSY_RRRD }, - { "mvclu", 0x8e, INSTR_RSY_RRRD }, - { "tp", 0xc0, INSTR_RSL_R0RD }, - { "", 0, INSTR_INVALID } + [VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR }, + [V_8] = { 4, 8, OPERAND_VR }, + [V_12] = { 4, 12, OPERAND_VR }, + [V_16] = { 4, 16, OPERAND_VR }, + [V_32] = { 4, 32, OPERAND_VR }, + [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, }; -static struct insn opcode_ec[] = { -#ifdef CONFIG_64BIT - { "brxhg", 0x44, INSTR_RIE_RRP }, - { "brxlg", 0x45, INSTR_RIE_RRP }, - { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, - { "rnsbg", 0x54, INSTR_RIE_RRUUU }, - { "risbg", 0x55, INSTR_RIE_RRUUU }, - { "rosbg", 0x56, INSTR_RIE_RRUUU }, - { "rxsbg", 0x57, INSTR_RIE_RRUUU }, - { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU }, - { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, - { "cgrj", 0x64, INSTR_RIE_RRPU }, - { "clgrj", 0x65, INSTR_RIE_RRPU }, - { "cgit", 0x70, INSTR_RIE_R0IU }, - { "clgit", 0x71, INSTR_RIE_R0UU }, - { "cit", 0x72, INSTR_RIE_R0IU }, - { "clfit", 0x73, INSTR_RIE_R0UU }, - { "crj", 0x76, INSTR_RIE_RRPU }, - { "clrj", 0x77, INSTR_RIE_RRPU }, - { "cgij", 0x7c, INSTR_RIE_RUPI }, - { "clgij", 0x7d, INSTR_RIE_RUPU }, - { "cij", 0x7e, INSTR_RIE_RUPI }, - { "clij", 0x7f, INSTR_RIE_RUPU }, - { "ahik", 0xd8, INSTR_RIE_RRI0 }, - { "aghik", 0xd9, INSTR_RIE_RRI0 }, - { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, - { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, - { "cgrb", 0xe4, INSTR_RRS_RRRDU }, - { "clgrb", 0xe5, INSTR_RRS_RRRDU }, - { "crb", 0xf6, INSTR_RRS_RRRDU }, - { "clrb", 0xf7, INSTR_RRS_RRRDU }, - { "cgib", 0xfc, INSTR_RIS_RURDI }, - { "clgib", 0xfd, INSTR_RIS_RURDU }, - { "cib", 0xfe, INSTR_RIS_RURDI }, - { "clib", 0xff, INSTR_RIS_RURDU }, -#endif - { "", 0, INSTR_INVALID } +static const unsigned char formats[][6] = { + [INSTR_E] = { 0, 0, 0, 0, 0, 0 }, + [INSTR_IE_UU] = { U4_24, U4_28, 0, 0, 0, 0 }, + [INSTR_MII_UPP] = { U4_8, J12_12, J24_24 }, + [INSTR_RIE_R0IU] = { R_8, I16_16, U4_32, 0, 0, 0 }, + [INSTR_RIE_R0UU] = { R_8, U16_16, U4_32, 0, 0, 0 }, + [INSTR_RIE_RRI0] = { R_8, R_12, I16_16, 0, 0, 0 }, + [INSTR_RIE_RRP] = { R_8, R_12, J16_16, 0, 0, 0 }, + [INSTR_RIE_RRPU] = { R_8, R_12, U4_32, J16_16, 0, 0 }, + [INSTR_RIE_RRUUU] = { R_8, R_12, U8_16, U8_24, U8_32, 0 }, + [INSTR_RIE_RUI0] = { R_8, I16_16, U4_12, 0, 0, 0 }, + [INSTR_RIE_RUPI] = { R_8, I8_32, U4_12, J16_16, 0, 0 }, + [INSTR_RIE_RUPU] = { R_8, U8_32, U4_12, J16_16, 0, 0 }, + [INSTR_RIL_RI] = { R_8, I32_16, 0, 0, 0, 0 }, + [INSTR_RIL_RP] = { R_8, J32_16, 0, 0, 0, 0 }, + [INSTR_RIL_RU] = { R_8, U32_16, 0, 0, 0, 0 }, + [INSTR_RIL_UP] = { U4_8, J32_16, 0, 0, 0, 0 }, + [INSTR_RIS_RURDI] = { R_8, I8_32, U4_12, D_20, B_16, 0 }, + [INSTR_RIS_RURDU] = { R_8, U8_32, U4_12, D_20, B_16, 0 }, + [INSTR_RI_RI] = { R_8, I16_16, 0, 0, 0, 0 }, + [INSTR_RI_RP] = { R_8, J16_16, 0, 0, 0, 0 }, + [INSTR_RI_RU] = { R_8, U16_16, 0, 0, 0, 0 }, + [INSTR_RI_UP] = { U4_8, J16_16, 0, 0, 0, 0 }, + [INSTR_RRE_00] = { 0, 0, 0, 0, 0, 0 }, + [INSTR_RRE_AA] = { A_24, A_28, 0, 0, 0, 0 }, + [INSTR_RRE_AR] = { A_24, R_28, 0, 0, 0, 0 }, + [INSTR_RRE_F0] = { F_24, 0, 0, 0, 0, 0 }, + [INSTR_RRE_FF] = { F_24, F_28, 0, 0, 0, 0 }, + [INSTR_RRE_FR] = { F_24, R_28, 0, 0, 0, 0 }, + [INSTR_RRE_R0] = { R_24, 0, 0, 0, 0, 0 }, + [INSTR_RRE_RA] = { R_24, A_28, 0, 0, 0, 0 }, + [INSTR_RRE_RF] = { R_24, F_28, 0, 0, 0, 0 }, + [INSTR_RRE_RR] = { R_24, R_28, 0, 0, 0, 0 }, + [INSTR_RRF_0UFF] = { F_24, F_28, U4_20, 0, 0, 0 }, + [INSTR_RRF_0URF] = { R_24, F_28, U4_20, 0, 0, 0 }, + [INSTR_RRF_F0FF] = { F_16, F_24, F_28, 0, 0, 0 }, + [INSTR_RRF_F0FF2] = { F_24, F_16, F_28, 0, 0, 0 }, + [INSTR_RRF_F0FR] = { F_24, F_16, R_28, 0, 0, 0 }, + [INSTR_RRF_FFRU] = { F_24, F_16, R_28, U4_20, 0, 0 }, + [INSTR_RRF_FUFF] = { F_24, F_16, F_28, U4_20, 0, 0 }, + [INSTR_RRF_FUFF2] = { F_24, F_28, F_16, U4_20, 0, 0 }, + [INSTR_RRF_R0RR] = { R_24, R_16, R_28, 0, 0, 0 }, + [INSTR_RRF_R0RR2] = { R_24, R_28, R_16, 0, 0, 0 }, + [INSTR_RRF_RURR] = { R_24, R_28, R_16, U4_20, 0, 0 }, + [INSTR_RRF_RURR2] = { R_24, R_16, R_28, U4_20, 0, 0 }, + [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 }, + [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 }, + [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 }, + [INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 }, + [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 }, + [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 }, + [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 }, + [INSTR_RRS_RRRDU] = { R_8, R_12, U4_32, D_20, B_16 }, + [INSTR_RR_FF] = { F_8, F_12, 0, 0, 0, 0 }, + [INSTR_RR_R0] = { R_8, 0, 0, 0, 0, 0 }, + [INSTR_RR_RR] = { R_8, R_12, 0, 0, 0, 0 }, + [INSTR_RR_U0] = { U8_8, 0, 0, 0, 0, 0 }, + [INSTR_RR_UR] = { U4_8, R_12, 0, 0, 0, 0 }, + [INSTR_RSI_RRP] = { R_8, R_12, J16_16, 0, 0, 0 }, + [INSTR_RSL_LRDFU] = { F_32, D_20, L8_8, B_16, U4_36, 0 }, + [INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 }, + [INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 }, + [INSTR_RS_AARD] = { A_8, A_12, D_20, B_16, 0, 0 }, + [INSTR_RS_CCRD] = { C_8, C_12, D_20, B_16, 0, 0 }, + [INSTR_RS_R0RD] = { R_8, D_20, B_16, 0, 0, 0 }, + [INSTR_RS_RRRD] = { R_8, R_12, D_20, B_16, 0, 0 }, + [INSTR_RS_RURD] = { R_8, U4_12, D_20, B_16, 0, 0 }, + [INSTR_RXE_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_RXE_RRRDU] = { R_8, D_20, X_12, B_16, U4_32, 0 }, + [INSTR_RXF_FRRDF] = { F_32, F_8, D_20, X_12, B_16, 0 }, + [INSTR_RXY_FRRD] = { F_8, D20_20, X_12, B_16, 0, 0 }, + [INSTR_RXY_RRRD] = { R_8, D20_20, X_12, B_16, 0, 0 }, + [INSTR_RXY_URRD] = { U4_8, D20_20, X_12, B_16, 0, 0 }, + [INSTR_RX_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_RX_RRRD] = { R_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_RX_URRD] = { U4_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_SIL_RDI] = { D_20, B_16, I16_32, 0, 0, 0 }, + [INSTR_SIL_RDU] = { D_20, B_16, U16_32, 0, 0, 0 }, + [INSTR_SIY_IRD] = { D20_20, B_16, I8_8, 0, 0, 0 }, + [INSTR_SIY_RD] = { D20_20, B_16, 0, 0, 0, 0 }, + [INSTR_SIY_URD] = { D20_20, B_16, U8_8, 0, 0, 0 }, + [INSTR_SI_RD] = { D_20, B_16, 0, 0, 0, 0 }, + [INSTR_SI_URD] = { D_20, B_16, U8_8, 0, 0, 0 }, + [INSTR_SMI_U0RDP] = { U4_8, J16_32, D_20, B_16, 0, 0 }, + [INSTR_SSE_RDRD] = { D_20, B_16, D_36, B_32, 0, 0 }, + [INSTR_SSF_RRDRD] = { D_20, B_16, D_36, B_32, R_8, 0 }, + [INSTR_SSF_RRDRD2] = { R_8, D_20, B_16, D_36, B_32, 0 }, + [INSTR_SS_L0RDRD] = { D_20, L8_8, B_16, D_36, B_32, 0 }, + [INSTR_SS_L2RDRD] = { D_20, B_16, D_36, L8_8, B_32, 0 }, + [INSTR_SS_LIRDRD] = { D_20, L4_8, B_16, D_36, B_32, U4_12 }, + [INSTR_SS_LLRDRD] = { D_20, L4_8, B_16, D_36, L4_12, B_32 }, + [INSTR_SS_RRRDRD] = { D_20, R_8, B_16, D_36, B_32, R_12 }, + [INSTR_SS_RRRDRD2] = { R_8, D_20, B_16, R_12, D_36, B_32 }, + [INSTR_SS_RRRDRD3] = { R_8, R_12, D_20, B_16, D_36, B_32 }, + [INSTR_S_00] = { 0, 0, 0, 0, 0, 0 }, + [INSTR_S_RD] = { D_20, B_16, 0, 0, 0, 0 }, + [INSTR_VRI_V0IU] = { V_8, I16_16, U4_32, 0, 0, 0 }, + [INSTR_VRI_V0U] = { V_8, U16_16, 0, 0, 0, 0 }, + [INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 }, + [INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 }, + [INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 }, + [INSTR_VRI_VV0UU] = { V_8, V_12, U8_28, U4_24, 0, 0 }, + [INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 }, + [INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 }, + [INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 }, + [INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 }, + [INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 }, + [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 }, + [INSTR_VRI_VVV0UV] = { V_8, V_12, V_16, V_32, U8_24, 0 }, + [INSTR_VRR_0V0U] = { V_12, U16_20, 0, 0, 0, 0 }, + [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 }, + [INSTR_VRR_0VVU] = { V_12, V_16, U16_20, 0, 0, 0 }, + [INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 }, + [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 }, + [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 }, + [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 }, + [INSTR_VRR_VV0U0U] = { V_8, V_12, U4_32, U4_24, 0, 0 }, + [INSTR_VRR_VV0U2] = { V_8, V_12, U4_24, 0, 0, 0 }, + [INSTR_VRR_VV0UU2] = { V_8, V_12, U4_32, U4_28, 0, 0 }, + [INSTR_VRR_VV0UUU] = { V_8, V_12, U4_32, U4_28, U4_24, 0 }, + [INSTR_VRR_VVV] = { V_8, V_12, V_16, 0, 0, 0 }, + [INSTR_VRR_VVV0U] = { V_8, V_12, V_16, U4_32, 0, 0 }, + [INSTR_VRR_VVV0U0] = { V_8, V_12, V_16, U4_24, 0, 0 }, + [INSTR_VRR_VVV0U0U] = { V_8, V_12, V_16, U4_32, U4_24, 0 }, + [INSTR_VRR_VVV0UU] = { V_8, V_12, V_16, U4_32, U4_28, 0 }, + [INSTR_VRR_VVV0UUU] = { V_8, V_12, V_16, U4_32, U4_28, U4_24 }, + [INSTR_VRR_VVV0V] = { V_8, V_12, V_16, V_32, 0, 0 }, + [INSTR_VRR_VVVU0UV] = { V_8, V_12, V_16, V_32, U4_28, U4_20 }, + [INSTR_VRR_VVVU0V] = { V_8, V_12, V_16, V_32, U4_20, 0 }, + [INSTR_VRR_VVVUU0V] = { V_8, V_12, V_16, V_32, U4_20, U4_24 }, + [INSTR_VRS_RRDV] = { V_32, R_12, D_20, B_16, 0, 0 }, + [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 }, + [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 }, + [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 }, + [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 }, + [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 }, + [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 }, + [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 }, + [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 }, }; -static struct insn opcode_ed[] = { -#ifdef CONFIG_64BIT - { "mayl", 0x38, INSTR_RXF_FRRDF }, - { "myl", 0x39, INSTR_RXF_FRRDF }, - { "may", 0x3a, INSTR_RXF_FRRDF }, - { "my", 0x3b, INSTR_RXF_FRRDF }, - { "mayh", 0x3c, INSTR_RXF_FRRDF }, - { "myh", 0x3d, INSTR_RXF_FRRDF }, - { "sldt", 0x40, INSTR_RXF_FRRDF }, - { "srdt", 0x41, INSTR_RXF_FRRDF }, - { "slxt", 0x48, INSTR_RXF_FRRDF }, - { "srxt", 0x49, INSTR_RXF_FRRDF }, - { "tdcet", 0x50, INSTR_RXE_FRRD }, - { "tdget", 0x51, INSTR_RXE_FRRD }, - { "tdcdt", 0x54, INSTR_RXE_FRRD }, - { "tdgdt", 0x55, INSTR_RXE_FRRD }, - { "tdcxt", 0x58, INSTR_RXE_FRRD }, - { "tdgxt", 0x59, INSTR_RXE_FRRD }, - { "ley", 0x64, INSTR_RXY_FRRD }, - { "ldy", 0x65, INSTR_RXY_FRRD }, - { "stey", 0x66, INSTR_RXY_FRRD }, - { "stdy", 0x67, INSTR_RXY_FRRD }, - { "czdt", 0xa8, INSTR_RSL_LRDFU }, - { "czxt", 0xa9, INSTR_RSL_LRDFU }, - { "cdzt", 0xaa, INSTR_RSL_LRDFU }, - { "cxzt", 0xab, INSTR_RSL_LRDFU }, -#endif - { "ldeb", 0x04, INSTR_RXE_FRRD }, - { "lxdb", 0x05, INSTR_RXE_FRRD }, - { "lxeb", 0x06, INSTR_RXE_FRRD }, - { "mxdb", 0x07, INSTR_RXE_FRRD }, - { "keb", 0x08, INSTR_RXE_FRRD }, - { "ceb", 0x09, INSTR_RXE_FRRD }, - { "aeb", 0x0a, INSTR_RXE_FRRD }, - { "seb", 0x0b, INSTR_RXE_FRRD }, - { "mdeb", 0x0c, INSTR_RXE_FRRD }, - { "deb", 0x0d, INSTR_RXE_FRRD }, - { "maeb", 0x0e, INSTR_RXF_FRRDF }, - { "mseb", 0x0f, INSTR_RXF_FRRDF }, - { "tceb", 0x10, INSTR_RXE_FRRD }, - { "tcdb", 0x11, INSTR_RXE_FRRD }, - { "tcxb", 0x12, INSTR_RXE_FRRD }, - { "sqeb", 0x14, INSTR_RXE_FRRD }, - { "sqdb", 0x15, INSTR_RXE_FRRD }, - { "meeb", 0x17, INSTR_RXE_FRRD }, - { "kdb", 0x18, INSTR_RXE_FRRD }, - { "cdb", 0x19, INSTR_RXE_FRRD }, - { "adb", 0x1a, INSTR_RXE_FRRD }, - { "sdb", 0x1b, INSTR_RXE_FRRD }, - { "mdb", 0x1c, INSTR_RXE_FRRD }, - { "ddb", 0x1d, INSTR_RXE_FRRD }, - { "madb", 0x1e, INSTR_RXF_FRRDF }, - { "msdb", 0x1f, INSTR_RXF_FRRDF }, - { "lde", 0x24, INSTR_RXE_FRRD }, - { "lxd", 0x25, INSTR_RXE_FRRD }, - { "lxe", 0x26, INSTR_RXE_FRRD }, - { "mae", 0x2e, INSTR_RXF_FRRDF }, - { "mse", 0x2f, INSTR_RXF_FRRDF }, - { "sqe", 0x34, INSTR_RXE_FRRD }, - { "sqd", 0x35, INSTR_RXE_FRRD }, - { "mee", 0x37, INSTR_RXE_FRRD }, - { "mad", 0x3e, INSTR_RXF_FRRDF }, - { "msd", 0x3f, INSTR_RXF_FRRDF }, - { "", 0, INSTR_INVALID } -}; +static char long_insn_name[][7] = LONG_INSN_INITIALIZER; +static struct s390_insn opcode[] = OPCODE_TABLE_INITIALIZER; +static struct s390_opcode_offset opcode_offset[] = OPCODE_OFFSET_INITIALIZER; /* Extracts an operand value from an instruction. */ static unsigned int extract_operand(unsigned char *code, - const struct operand *operand) + const struct s390_operand *operand) { + unsigned char *cp; unsigned int val; int bits; /* Extract fragments of the operand byte for byte. */ - code += operand->shift / 8; + cp = code + operand->shift / 8; bits = (operand->shift & 7) + operand->bits; val = 0; do { val <<= 8; - val |= (unsigned int) *code++; + val |= (unsigned int) *cp++; bits -= 8; } while (bits > 0); val >>= -bits; @@ -1593,6 +368,18 @@ static unsigned int extract_operand(unsigned char *code, if (operand->bits == 20 && operand->shift == 20) val = (val & 0xff) << 12 | (val & 0xfff00) >> 8; + /* Check for register extensions bits for vector registers. */ + if (operand->flags & OPERAND_VR) { + if (operand->shift == 8) + val |= (code[4] & 8) << 1; + else if (operand->shift == 12) + val |= (code[4] & 4) << 2; + else if (operand->shift == 16) + val |= (code[4] & 2) << 3; + else if (operand->shift == 32) + val |= (code[4] & 1) << 4; + } + /* Sign extend value if the operand is signed or pc relative. */ if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) && (val & (1U << (operand->bits - 1)))) @@ -1608,123 +395,40 @@ static unsigned int extract_operand(unsigned char *code, return val; } -static inline int insn_length(unsigned char code) +struct s390_insn *find_insn(unsigned char *code) { - return ((((int) code + 64) >> 7) + 1) << 1; -} - -static struct insn *find_insn(unsigned char *code) -{ - unsigned char opfrag = code[1]; - unsigned char opmask; - struct insn *table; + struct s390_opcode_offset *entry; + struct s390_insn *insn; + unsigned char opfrag; + int i; - switch (code[0]) { - case 0x01: - table = opcode_01; - break; - case 0xa5: - table = opcode_a5; - break; - case 0xa7: - table = opcode_a7; - break; - case 0xaa: - table = opcode_aa; - break; - case 0xb2: - table = opcode_b2; - break; - case 0xb3: - table = opcode_b3; - break; - case 0xb9: - table = opcode_b9; - break; - case 0xc0: - table = opcode_c0; - break; - case 0xc2: - table = opcode_c2; - break; - case 0xc4: - table = opcode_c4; - break; - case 0xc6: - table = opcode_c6; - break; - case 0xc8: - table = opcode_c8; - break; - case 0xcc: - table = opcode_cc; - break; - case 0xe3: - table = opcode_e3; - opfrag = code[5]; - break; - case 0xe5: - table = opcode_e5; - break; - case 0xeb: - table = opcode_eb; - opfrag = code[5]; - break; - case 0xec: - table = opcode_ec; - opfrag = code[5]; - break; - case 0xed: - table = opcode_ed; - opfrag = code[5]; - break; - default: - table = opcode; - opfrag = code[0]; - break; - } - while (table->format != INSTR_INVALID) { - opmask = formats[table->format][0]; - if (table->opfrag == (opfrag & opmask)) - return table; - table++; + /* Search the opcode offset table to find an entry which + * matches the beginning of the opcode. If there is no match + * the last entry will be used, which is the default entry for + * unknown instructions as well as 1-byte opcode instructions. + */ + for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) { + entry = &opcode_offset[i]; + if (entry->opcode == code[0]) + break; } - return NULL; -} -/** - * insn_to_mnemonic - decode an s390 instruction - * @instruction: instruction to decode - * @buf: buffer to fill with mnemonic - * @len: length of buffer - * - * Decode the instruction at @instruction and store the corresponding - * mnemonic into @buf of length @len. - * @buf is left unchanged if the instruction could not be decoded. - * Returns: - * %0 on success, %-ENOENT if the instruction was not found. - */ -int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len) -{ - struct insn *insn; + opfrag = *(code + entry->byte) & entry->mask; - insn = find_insn(instruction); - if (!insn) - return -ENOENT; - if (insn->name[0] == '\0') - snprintf(buf, len, "%s", - long_insn_name[(int) insn->name[1]]); - else - snprintf(buf, len, "%.5s", insn->name); - return 0; + insn = &opcode[entry->offset]; + for (i = 0; i < entry->count; i++) { + if (insn->opfrag == opfrag) + return insn; + insn++; + } + return NULL; } -EXPORT_SYMBOL_GPL(insn_to_mnemonic); static int print_insn(char *buffer, unsigned char *code, unsigned long addr) { - struct insn *insn; + struct s390_insn *insn; const unsigned char *ops; - const struct operand *operand; + const struct s390_operand *operand; unsigned int value; char separator; char *ptr; @@ -1733,14 +437,14 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr = buffer; insn = find_insn(code); if (insn) { - if (insn->name[0] == '\0') - ptr += sprintf(ptr, "%s\t", - long_insn_name[(int) insn->name[1]]); + if (insn->zero == 0) + ptr += sprintf(ptr, "%.7s\t", + long_insn_name[insn->offset]); else ptr += sprintf(ptr, "%.5s\t", insn->name); /* Extract the operands. */ separator = 0; - for (ops = formats[insn->format] + 1, i = 0; + for (ops = formats[insn->format], i = 0; *ops != 0 && i < 6; ops++, i++) { operand = operands + *ops; value = extract_operand(code, operand); @@ -1754,18 +458,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) if (separator) ptr += sprintf(ptr, "%c", separator); if (operand->flags & OPERAND_GPR) - ptr += sprintf(ptr, "%%r%i", value); + ptr += sprintf(ptr, "%%r%u", value); else if (operand->flags & OPERAND_FPR) - ptr += sprintf(ptr, "%%f%i", value); + ptr += sprintf(ptr, "%%f%u", value); else if (operand->flags & OPERAND_AR) - ptr += sprintf(ptr, "%%a%i", value); + ptr += sprintf(ptr, "%%a%u", value); else if (operand->flags & OPERAND_CR) - ptr += sprintf(ptr, "%%c%i", value); - else if (operand->flags & OPERAND_PCREL) - ptr += sprintf(ptr, "%lx", (signed int) value - + addr); - else if (operand->flags & OPERAND_SIGNED) - ptr += sprintf(ptr, "%i", value); + ptr += sprintf(ptr, "%%c%u", value); + else if (operand->flags & OPERAND_VR) + ptr += sprintf(ptr, "%%v%u", value); + else if (operand->flags & OPERAND_PCREL) { + void *pcrel = (void *)((int)value + addr); + + ptr += sprintf(ptr, "%px", pcrel); + } else if (operand->flags & OPERAND_SIGNED) + ptr += sprintf(ptr, "%i", (int)value); else ptr += sprintf(ptr, "%u", value); if (operand->flags & OPERAND_DISP) @@ -1781,33 +488,42 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) return (int) (ptr - buffer); } +static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len) +{ + if (user_mode(regs)) { + if (copy_from_user(dst, (char __user *)src, len)) + return -EFAULT; + } else { + if (copy_from_kernel_nofault(dst, src, len)) + return -EFAULT; + } + return 0; +} + void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; + unsigned long addr, pswaddr; unsigned char code[64]; - char buffer[64], *ptr; - mm_segment_t old_fs; - unsigned long addr; + char buffer[128], *ptr; int start, end, opsize, hops, i; + pswaddr = regs->psw.addr; + if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED)) + pswaddr = __forward_psw(regs->psw, regs->int_code >> 16); /* Get a snapshot of the 64 bytes surrounding the fault address. */ - old_fs = get_fs(); - set_fs(user_mode(regs) ? USER_DS : KERNEL_DS); - for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { - addr = regs->psw.addr - 34 + start; - if (__copy_from_user(code + start - 2, - (char __user *) addr, 2)) + for (start = 32; start && pswaddr >= 34 - start; start -= 2) { + addr = pswaddr - 34 + start; + if (copy_from_regs(regs, code + start - 2, (void *)addr, 2)) break; } for (end = 32; end < 64; end += 2) { - addr = regs->psw.addr + end - 32; - if (__copy_from_user(code + end, - (char __user *) addr, 2)) + addr = pswaddr + end - 32; + if (copy_from_regs(regs, code + end, (void *)addr, 2)) break; } - set_fs(old_fs); - /* Code snapshot useable ? */ - if ((regs->psw.addr & 1) || start >= end) { + /* Code snapshot usable ? */ + if ((pswaddr & 1) || start >= end) { printk("%s Code: Bad PSW.\n", mode); return; } @@ -1830,13 +546,13 @@ void show_code(struct pt_regs *regs) while (start < end && hops < 8) { opsize = insn_length(code[start]); if (start + opsize == 32) - *ptr++ = '#'; + *ptr++ = '*'; else if (start == 32) *ptr++ = '>'; else *ptr++ = ' '; - addr = regs->psw.addr + start - 32; - ptr += sprintf(ptr, ONELONG, addr); + addr = pswaddr + start - 32; + ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -1846,17 +562,17 @@ void show_code(struct pt_regs *regs) *ptr++ = '\t'; ptr += print_insn(ptr, code + start, addr); start += opsize; - printk(buffer); + pr_cont("%s", buffer); ptr = buffer; ptr += sprintf(ptr, "\n "); hops++; } - printk("\n"); + pr_cont("\n"); } void print_fn_code(unsigned char *code, unsigned long len) { - char buffer[64], *ptr; + char buffer[128], *ptr; int opsize, i; while (len) { @@ -1864,7 +580,7 @@ void print_fn_code(unsigned char *code, unsigned long len) opsize = insn_length(*code); if (opsize > len) break; - ptr += sprintf(ptr, "%p: ", code); + ptr += sprintf(ptr, "%px: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); *ptr++ = '\t'; @@ -1873,7 +589,7 @@ void print_fn_code(unsigned char *code, unsigned long len) ptr += print_insn(ptr, code, (unsigned long) code); *ptr++ = '\n'; *ptr++ = 0; - printk(buffer); + printk("%s", buffer); code += opsize; len -= opsize; } diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 87acc38f73c6..f9d52e05e01e 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack dumping functions * @@ -11,159 +12,171 @@ #include <linux/export.h> #include <linux/kdebug.h> #include <linux/ptrace.h> +#include <linux/mm.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> +#include <asm/asm-offsets.h> #include <asm/processor.h> #include <asm/debug.h> +#include <asm/dis.h> #include <asm/ipl.h> +#include <asm/unwind.h> -#ifndef CONFIG_64BIT -#define LONG "%08lx " -#define FOURLONG "%08lx %08lx %08lx %08lx\n" -static int kstack_depth_to_print = 12; -#else /* CONFIG_64BIT */ -#define LONG "%016lx " -#define FOURLONG "%016lx %016lx %016lx %016lx\n" -static int kstack_depth_to_print = 20; -#endif /* CONFIG_64BIT */ - -/* - * For show_trace we have tree different stack to consider: - * - the panic stack which is used if the kernel stack has overflown - * - the asynchronous interrupt stack (cpu related) - * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially - * touch all of them. The order is: panic stack, async stack, sync stack. - */ -static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) +const char *stack_type_name(enum stack_type type) { - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - sp = sp & PSW_ADDR_INSN; - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); - print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); - low = sp; - sp = regs->gprs[15]; + switch (type) { + case STACK_TYPE_TASK: + return "task"; + case STACK_TYPE_IRQ: + return "irq"; + case STACK_TYPE_NODAT: + return "nodat"; + case STACK_TYPE_RESTART: + return "restart"; + default: + return "unknown"; } } +EXPORT_SYMBOL_GPL(stack_type_name); + +static inline bool in_stack(unsigned long sp, struct stack_info *info, + enum stack_type type, unsigned long stack) +{ + if (sp < stack || sp >= stack + THREAD_SIZE) + return false; + info->type = type; + info->begin = stack; + info->end = stack + THREAD_SIZE; + return true; +} -static void show_trace(struct task_struct *task, unsigned long *stack) +static bool in_task_stack(unsigned long sp, struct task_struct *task, + struct stack_info *info) { - const unsigned long frame_size = - STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - register unsigned long __r15 asm ("15"); - unsigned long sp; + unsigned long stack = (unsigned long)task_stack_page(task); - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); -#ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, - S390_lowcore.panic_stack + frame_size - 4096, - S390_lowcore.panic_stack + frame_size); -#endif - sp = __show_trace(sp, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); - else - __show_trace(sp, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); - if (!task) - task = current; - debug_show_held_locks(task); + return in_stack(sp, info, STACK_TYPE_TASK, stack); +} + +static bool in_irq_stack(unsigned long sp, struct stack_info *info) +{ + unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET; + + return in_stack(sp, info, STACK_TYPE_IRQ, stack); +} + +static bool in_nodat_stack(unsigned long sp, struct stack_info *info) +{ + unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET; + + return in_stack(sp, info, STACK_TYPE_NODAT, stack); } -void show_stack(struct task_struct *task, unsigned long *sp) +static bool in_mcck_stack(unsigned long sp, struct stack_info *info) { - register unsigned long *__r15 asm ("15"); - unsigned long *stack; - int i; + unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET; + return in_stack(sp, info, STACK_TYPE_MCCK, stack); +} + +static bool in_restart_stack(unsigned long sp, struct stack_info *info) +{ + unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET; + + return in_stack(sp, info, STACK_TYPE_RESTART, stack); +} + +int get_stack_info(unsigned long sp, struct task_struct *task, + struct stack_info *info, unsigned long *visit_mask) +{ if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - - for (i = 0; i < kstack_depth_to_print; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if ((i * sizeof(long) % 32) == 0) - printk("%s ", i == 0 ? "" : "\n"); - printk(LONG, *stack++); - } - printk("\n"); - show_trace(task, sp); + goto unknown; + + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (sp & 0x7) + goto unknown; + + /* Check per-task stack */ + if (in_task_stack(sp, task, info)) + goto recursion_check; + + if (task != current) + goto unknown; + + /* Check per-cpu stacks */ + if (!in_irq_stack(sp, info) && + !in_nodat_stack(sp, info) && + !in_restart_stack(sp, info) && + !in_mcck_stack(sp, info)) + goto unknown; + +recursion_check: + /* + * Make sure we don't iterate through any given stack more than once. + * If it comes up a second time then there's something wrong going on: + * just break out and report an unknown stack type. + */ + if (*visit_mask & (1UL << info->type)) + goto unknown; + *visit_mask |= 1UL << info->type; + return 0; +unknown: + info->type = STACK_TYPE_UNKNOWN; + return -EINVAL; } -static void show_last_breaking_event(struct pt_regs *regs) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { -#ifdef CONFIG_64BIT - printk("Last Breaking-Event-Address:\n"); - printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); - print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); -#endif + struct unwind_state state; + + printk("%sCall Trace:\n", loglvl); + unwind_for_each_frame(&state, task, NULL, (unsigned long) stack) + printk(state.reliable ? "%s [<%016lx>] %pSR \n" : + "%s([<%016lx>] %pSR)\n", + loglvl, state.ip, (void *) state.ip); + debug_show_held_locks(task ? : current); } -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) +static void show_last_breaking_event(struct pt_regs *regs) { - return (regs->psw.mask & bits) / ((~bits + 1) & bits); + printk("Last Breaking-Event-Address:\n"); + printk(" [<%016lx>] ", regs->last_break); + if (user_mode(regs)) { + print_vma_addr(KERN_CONT, regs->last_break); + pr_cont("\n"); + } else { + pr_cont("%pSR\n", (void *)regs->last_break); + } } void show_registers(struct pt_regs *regs) { + struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long pswaddr; char *mode; + pswaddr = regs->psw.addr; + if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED)) + pswaddr = __forward_psw(regs->psw, regs->int_code >> 16); mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %p %p", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr); - print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); + printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)pswaddr); + if (!user_mode(regs)) + pr_cont(" (%pSR)", (void *)pswaddr); + pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); -#ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); -#endif - printk("\n%s GPRS: " FOURLONG, mode, + "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext, + psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm); + pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba); + printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); - printk(" " FOURLONG, + printk(" %016lx %016lx %016lx %016lx\n", regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); - printk(" " FOURLONG, + printk(" %016lx %016lx %016lx %016lx\n", regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); - printk(" " FOURLONG, + printk(" %016lx %016lx %016lx %016lx\n", regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); show_code(regs); } @@ -174,13 +187,13 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT); show_last_breaking_event(regs); } static DEFINE_SPINLOCK(die_lock); -void die(struct pt_regs *regs, const char *str) +void __noreturn die(struct pt_regs *regs, const char *str) { static int die_counter; @@ -190,17 +203,12 @@ void die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, + regs->int_code >> 17, ++die_counter); + pr_cont("SMP "); + if (debug_pagealloc_enabled()) + pr_cont("DEBUG_PAGEALLOC"); + pr_cont("\n"); notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); print_modules(); show_regs(regs); @@ -212,5 +220,5 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception: panic_on_oops"); oops_exit(); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index dc8770d7173c..b27239c03d79 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -1,24 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Hongjie Yang <hongjie@us.ibm.com>, - * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#define KMSG_COMPONENT "setup" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "setup: " fmt +#include <linux/sched/debug.h> +#include <linux/cpufeature.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/ctype.h> -#include <linux/ftrace.h> #include <linux/lockdep.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/pfn.h> #include <linux/uaccess.h> #include <linux/kernel.h> +#include <asm/asm-extable.h> +#include <linux/memblock.h> +#include <linux/kasan.h> +#include <asm/access-regs.h> +#include <asm/asm-offsets.h> +#include <asm/machine.h> +#include <asm/diag.h> #include <asm/ebcdic.h> +#include <asm/fpu.h> #include <asm/ipl.h> #include <asm/lowcore.h> #include <asm/processor.h> @@ -28,177 +36,37 @@ #include <asm/cpcmd.h> #include <asm/sclp.h> #include <asm/facility.h> +#include <asm/boot_data.h> #include "entry.h" -/* - * Create a Kernel NSS if the SAVESYS= parameter is defined - */ -#define DEFSYS_CMD_SIZE 128 -#define SAVESYS_CMD_SIZE 32 - -char kernel_nss_name[NSS_NAME_SIZE + 1]; - -static void __init setup_boot_command_line(void); - -/* - * Get the TOD clock running. - */ -static void __init reset_tod_clock(void) -{ - u64 time; - - if (store_tod_clock(&time) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) - disabled_wait(0); - - sched_clock_base_cc = TOD_UNIX_EPOCH; - S390_lowcore.last_update_clock = sched_clock_base_cc; -} - -#ifdef CONFIG_SHARED_KERNEL -int __init savesys_ipl_nss(char *cmd, const int cmdlen); - -asm( - " .section .init.text,\"ax\",@progbits\n" - " .align 4\n" - " .type savesys_ipl_nss, @function\n" - "savesys_ipl_nss:\n" -#ifdef CONFIG_64BIT - " stmg 6,15,48(15)\n" - " lgr 14,3\n" - " sam31\n" - " diag 2,14,0x8\n" - " sam64\n" - " lgr 2,14\n" - " lmg 6,15,48(15)\n" -#else - " stm 6,15,24(15)\n" - " lr 14,3\n" - " diag 2,14,0x8\n" - " lr 2,14\n" - " lm 6,15,24(15)\n" +#define __decompressor_handled_param(func, param) \ +static int __init ignore_decompressor_param_##func(char *s) \ +{ \ + return 0; \ +} \ +early_param(#param, ignore_decompressor_param_##func) + +#define decompressor_handled_param(param) __decompressor_handled_param(param, param) + +decompressor_handled_param(mem); +decompressor_handled_param(vmalloc); +decompressor_handled_param(dfltcc); +decompressor_handled_param(facilities); +decompressor_handled_param(nokaslr); +decompressor_handled_param(cmma); +decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); +__decompressor_handled_param(debug_alternative, debug-alternative); +#if IS_ENABLED(CONFIG_KVM) +decompressor_handled_param(prot_virt); #endif - " br 14\n" - " .size savesys_ipl_nss, .-savesys_ipl_nss\n" - " .previous\n"); -static __initdata char upper_command_line[COMMAND_LINE_SIZE]; - -static noinline __init void create_kernel_nss(void) +static void __init kasan_early_init(void) { - unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; -#ifdef CONFIG_BLK_DEV_INITRD - unsigned int sinitrd_pfn, einitrd_pfn; +#ifdef CONFIG_KASAN + init_task.kasan_depth = 0; + kasan_init_generic(); #endif - int response; - int hlen; - size_t len; - char *savesys_ptr; - char defsys_cmd[DEFSYS_CMD_SIZE]; - char savesys_cmd[SAVESYS_CMD_SIZE]; - - /* Do nothing if we are not running under VM */ - if (!MACHINE_IS_VM) - return; - - /* Convert COMMAND_LINE to upper case */ - for (i = 0; i < strlen(boot_command_line); i++) - upper_command_line[i] = toupper(boot_command_line[i]); - - savesys_ptr = strstr(upper_command_line, "SAVESYS="); - - if (!savesys_ptr) - return; - - savesys_ptr += 8; /* Point to the beginning of the NSS name */ - for (i = 0; i < NSS_NAME_SIZE; i++) { - if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0') - break; - kernel_nss_name[i] = savesys_ptr[i]; - } - - stext_pfn = PFN_DOWN(__pa(&_stext)); - eshared_pfn = PFN_DOWN(__pa(&_eshared)); - end_pfn = PFN_UP(__pa(&_end)); - min_size = end_pfn << 2; - - hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, - "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", - kernel_nss_name, stext_pfn - 1, stext_pfn, - eshared_pfn - 1, eshared_pfn, end_pfn); - -#ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE) { - sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); - einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); - min_size = einitrd_pfn << 2; - hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, - " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); - } -#endif - - snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, - " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); - defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; - snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", - kernel_nss_name, kernel_nss_name); - savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; - - __cpcmd(defsys_cmd, NULL, 0, &response); - - if (response != 0) { - pr_err("Defining the Linux kernel NSS failed with rc=%d\n", - response); - kernel_nss_name[0] = '\0'; - return; - } - - len = strlen(savesys_cmd); - ASCEBC(savesys_cmd, len); - response = savesys_ipl_nss(savesys_cmd, len); - - /* On success: response is equal to the command size, - * max SAVESYS_CMD_SIZE - * On error: response contains the numeric portion of cp error message. - * for SAVESYS it will be >= 263 - * for missing privilege class, it will be 1 - */ - if (response > SAVESYS_CMD_SIZE || response == 1) { - pr_err("Saving the Linux kernel NSS failed with rc=%d\n", - response); - kernel_nss_name[0] = '\0'; - return; - } - - /* re-initialize cputime accounting. */ - sched_clock_base_cc = get_tod_clock(); - S390_lowcore.last_update_clock = sched_clock_base_cc; - S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; - S390_lowcore.user_timer = 0; - S390_lowcore.system_timer = 0; - asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer)); - - /* re-setup boot command line with new ipl vm parms */ - ipl_update_parameters(); - setup_boot_command_line(); - - ipl_flags = IPL_NSS_VALID; -} - -#else /* CONFIG_SHARED_KERNEL */ - -static inline void create_kernel_nss(void) { } - -#endif /* CONFIG_SHARED_KERNEL */ - -/* - * Clear bss memory - */ -static noinline __init void clear_bss_section(void) -{ - memset(__bss_start, 0, __bss_stop - __bss_start); } /* @@ -206,284 +74,170 @@ static noinline __init void clear_bss_section(void) */ static noinline __init void init_kernel_storage_key(void) { +#if PAGE_DEFAULT_KEY unsigned long end_pfn, init_pfn; - end_pfn = PFN_UP(__pa(&_end)); + end_pfn = PFN_UP(__pa(_end)); for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY, 0); +#endif } static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); -static noinline __init void detect_machine_type(void) +/* Remove leading, trailing and double whitespace. */ +static inline void strim_all(char *str) +{ + char *s; + + s = strim(str); + if (s != str) + memmove(str, s, strlen(s)); + while (*str) { + if (!isspace(*str++)) + continue; + if (isspace(*str)) { + s = skip_spaces(str); + memmove(str, s, strlen(s) + 1); + } + } +} + +char arch_hw_string[128]; + +static noinline __init void setup_arch_string(void) { - struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; + struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page; + char mstr[80], hvstr[17]; - /* Check current-configuration-level */ - if (stsi(NULL, 0, 0, 0) <= 2) { - S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; + if (stsi(mach, 1, 1, 1)) return; + EBCASC(mach->manufacturer, sizeof(mach->manufacturer)); + EBCASC(mach->type, sizeof(mach->type)); + EBCASC(mach->model, sizeof(mach->model)); + EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); + scnprintf(mstr, sizeof(mstr), "%-16.16s %-4.4s %-16.16s %-16.16s", + mach->manufacturer, mach->type, + mach->model, mach->model_capacity); + strim_all(mstr); + if (stsi(vm, 3, 2, 2) == 0 && vm->count) { + EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi)); + scnprintf(hvstr, sizeof(hvstr), "%-16.16s", vm->vm[0].cpi); + strim_all(hvstr); + } else { + scnprintf(hvstr, sizeof(hvstr), "%s", + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } - /* Get virtual-machine cpu information. */ - if (stsi(vmms, 3, 2, 2) || !vmms->count) - return; - - /* Running under KVM? If not we assume z/VM */ - if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; - else - S390_lowcore.machine_flags |= MACHINE_FLAG_VM; + scnprintf(arch_hw_string, sizeof(arch_hw_string), "HW: %s (%s)", mstr, hvstr); + dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } static __init void setup_topology(void) { -#ifdef CONFIG_64BIT int max_mnest; - if (!test_facility(11)) + if (!cpu_has_topology()) return; - S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; } topology_max_mnest = max_mnest; -#endif } -static void early_pgm_check_handler(void) +void __init __do_early_pgm_check(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - unsigned long addr; + struct lowcore *lc = get_lowcore(); + unsigned long ip; - addr = S390_lowcore.program_old_psw.addr; - fixup = search_exception_tables(addr & PSW_ADDR_INSN); - if (!fixup) - disabled_wait(0); - S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; + regs->int_code = lc->pgm_int_code; + regs->int_parm_long = lc->trans_exc_code; + regs->last_break = lc->pgm_last_break; + ip = __rewind_psw(regs->psw, regs->int_code >> 16); + + /* Monitor Event? Might be a warning */ + if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) { + if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN) + return; + } + if (fixup_exception(regs)) + return; + /* + * Unhandled exception - system cannot continue but try to get some + * helpful messages to the console. Use early_printk() to print + * some basic information in case it is too early for printk(). + */ + register_early_console(); + early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n", + regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr); + show_regs(regs); + disabled_wait(); } static noinline __init void setup_lowcore_early(void) { + struct lowcore *lc = get_lowcore(); psw_t psw; - psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; - psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; - S390_lowcore.external_new_psw = psw; - psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; - S390_lowcore.program_new_psw = psw; - s390_base_pgm_handler_fn = early_pgm_check_handler; -} - -static noinline __init void setup_facility_list(void) -{ - stfle(S390_lowcore.stfle_fac_list, - ARRAY_SIZE(S390_lowcore.stfle_fac_list)); -} - -static __init void detect_mvpg(void) -{ -#ifndef CONFIG_64BIT - int rc; - - asm volatile( - " la 0,0\n" - " mvpg %2,%2\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG; -#endif -} - -static __init void detect_ieee(void) -{ -#ifndef CONFIG_64BIT - int rc, tmp; - - asm volatile( - " efpc %1,0\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE; -#endif + psw.addr = (unsigned long)early_pgm_check_handler; + psw.mask = PSW_KERNEL_BITS; + lc->program_new_psw = psw; + lc->preempt_count = INIT_PREEMPT_COUNT; + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); } -static __init void detect_csp(void) +static inline void save_vector_registers(void) { -#ifndef CONFIG_64BIT - int rc; - - asm volatile( - " la 0,0\n" - " la 1,0\n" - " la 2,4\n" - " csp 0,2\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_CSP; +#ifdef CONFIG_CRASH_DUMP + if (cpu_has_vx()) + save_vx_regs(boot_cpu_vector_save_area); #endif } -static __init void detect_diag9c(void) +static inline void setup_low_address_protection(void) { - unsigned int cpu_address; - int rc; - - cpu_address = stap(); - asm volatile( - " diag %2,0,0x9c\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; + system_ctl_set_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT); } -static __init void detect_diag44(void) +static inline void setup_access_registers(void) { -#ifdef CONFIG_64BIT - int rc; + unsigned int acrs[NUM_ACRS] = { 0 }; - asm volatile( - " diag 0,0,0x44\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; -#endif -} - -static __init void detect_machine_facilities(void) -{ -#ifdef CONFIG_64BIT - if (test_facility(8)) { - S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1; - __ctl_set_bit(0, 23); - } - if (test_facility(78)) - S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; - if (test_facility(3)) - S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(27)) - S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; - if (test_facility(40)) - S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; - if (test_facility(50) && test_facility(73)) - S390_lowcore.machine_flags |= MACHINE_FLAG_TE; - if (test_facility(66)) - S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; -#endif -} - -static __init void rescue_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); - /* - * Just like in case of IPL from VM reader we make sure there is a - * gap of 4MB between end of kernel and start of initrd. - * That way we can also be sure that saving an NSS will succeed, - * which however only requires different segments. - */ - if (!INITRD_START || !INITRD_SIZE) - return; - if (INITRD_START >= min_initrd_addr) - return; - memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); - INITRD_START = min_initrd_addr; -#endif -} - -/* Set up boot command line */ -static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) -{ - char *parm, *delim; - size_t rc, len; - - len = strlen(boot_command_line); - - delim = boot_command_line + len; /* '\0' character position */ - parm = boot_command_line + len + 1; /* append right after '\0' */ - - rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); - if (rc) { - if (*parm == '=') - memmove(boot_command_line, parm + 1, rc); - else - *delim = ' '; /* replace '\0' with space */ - } -} - -static inline int has_ebcdic_char(const char *str) -{ - int i; - - for (i = 0; str[i]; i++) - if (str[i] & 0x80) - return 1; - return 0; + restore_access_regs(acrs); } +char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; static void __init setup_boot_command_line(void) { - COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; - /* convert arch command line to ascii if necessary */ - if (has_ebcdic_char(COMMAND_LINE)) - EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); /* copy arch command line */ - strlcpy(boot_command_line, strstrip(COMMAND_LINE), - ARCH_COMMAND_LINE_SIZE); - - /* append IPL PARM data to the boot command line */ - if (MACHINE_IS_VM) - append_to_cmdline(append_ipl_vmparm); + strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); +} - append_to_cmdline(append_ipl_scpdata); +static void __init sort_amode31_extable(void) +{ + sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); } -/* - * Save ipl parameters, clear bss memory, initialize storage keys - * and create a kernel NSS at startup if the SAVESYS= parm is defined - */ void __init startup_init(void) { - reset_tod_clock(); - ipl_save_parameters(); - rescue_initrd(); - clear_bss_section(); + kasan_early_init(); + time_early_init(); init_kernel_storage_key(); - lockdep_init(); lockdep_off(); + sort_amode31_extable(); setup_lowcore_early(); - setup_facility_list(); - detect_machine_type(); - ipl_update_parameters(); + setup_arch_string(); setup_boot_command_line(); - create_kernel_nss(); - detect_mvpg(); - detect_ieee(); - detect_csp(); - detect_diag9c(); - detect_diag44(); - detect_machine_facilities(); + save_vector_registers(); setup_topology(); - sclp_facilities_detect(); -#ifdef CONFIG_DYNAMIC_FTRACE - S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; -#endif + sclp_early_detect(); + setup_low_address_protection(); + setup_access_registers(); lockdep_on(); } diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c new file mode 100644 index 000000000000..cefe020a3be3 --- /dev/null +++ b/arch/s390/kernel/early_printk.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2017 + */ + +#include <linux/console.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <asm/setup.h> +#include <asm/sclp.h> + +static void sclp_early_write(struct console *con, const char *s, unsigned int len) +{ + __sclp_early_printk(s, len); +} + +static struct console sclp_early_console = { + .name = "earlysclp", + .write = sclp_early_write, + .flags = CON_PRINTBUFFER | CON_BOOT, + .index = -1, +}; + +void __init register_early_console(void) +{ + if (early_console) + return; + if (!sclp.has_linemode && !sclp.has_vt220) + return; + early_console = &sclp_early_console; + register_console(early_console); +} + +static int __init setup_early_printk(char *buf) +{ + if (early_console) + return 0; + /* Accept only "earlyprintk" and "earlyprintk=sclp" */ + if (buf && !str_has_prefix(buf, "sclp")) + return 0; + register_early_console(); + return 0; +} +early_param("earlyprintk", setup_early_printk); diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c index b971c6be6298..0e51fa537262 100644 --- a/arch/s390/kernel/ebcdic.c +++ b/arch/s390/kernel/ebcdic.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * ECBDIC -> ASCII, ASCII -> ECBDIC, + * EBCDIC -> ASCII, ASCII -> EBCDIC, * upper to lower case (EBCDIC) conversion tables. * * S390 version @@ -8,8 +9,8 @@ * Martin Peschke <peschke@fh-brandenburg.de> */ -#include <linux/module.h> -#include <asm/types.h> +#include <linux/types.h> +#include <linux/export.h> #include <asm/ebcdic.h> /* @@ -110,15 +111,15 @@ __u8 _ebcasc[256] = 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04, /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */ 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A, - /* 0x40 SP RSP ä ---- */ + /* 0x40 SP RSP ä ---- */ 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86, /* 0x48 . < ( + | */ 0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, /* 0x50 & ---- */ 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07, - /* 0x58 ß ! $ * ) ; */ + /* 0x58 ß ! $ * ) ; */ 0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA, - /* 0x60 - / ---- Ä ---- ---- ---- */ + /* 0x60 - / ---- Ä ---- ---- ---- */ 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F, /* 0x68 ---- , % _ > ? */ 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, @@ -130,7 +131,7 @@ __u8 _ebcasc[256] = 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x88 h i ---- ---- ---- */ 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1, - /* 0x90 ° j k l m n o p */ + /* 0x90 ° j k l m n o p */ 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, /* 0x98 q r ---- ---- */ 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07, @@ -138,25 +139,25 @@ __u8 _ebcasc[256] = 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* 0xA8 y z ---- ---- ---- ---- */ 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07, - /* 0xB0 ^ ---- § ---- */ + /* 0xB0 ^ ---- § ---- */ 0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC, /* 0xB8 ---- [ ] ---- ---- ---- ---- */ 0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07, /* 0xC0 { A B C D E F G */ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - /* 0xC8 H I ---- ö ---- */ + /* 0xC8 H I ---- ö ---- */ 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07, /* 0xD0 } J K L M N O P */ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, - /* 0xD8 Q R ---- ü */ + /* 0xD8 Q R ---- ü */ 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98, /* 0xE0 \ S T U V W X */ 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - /* 0xE8 Y Z ---- Ö ---- ---- ---- */ + /* 0xE8 Y Z ---- Ö ---- ---- ---- */ 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07, /* 0xF0 0 1 2 3 4 5 6 7 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */ + /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */ 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07 }; @@ -259,15 +260,15 @@ __u8 _ebcasc_500[256] = 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04, /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */ 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A, - /* 0x40 SP RSP ä ---- */ + /* 0x40 SP RSP ä ---- */ 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86, /* 0x48 [ . < ( + ! */ 0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21, /* 0x50 & ---- */ 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07, - /* 0x58 ß ] $ * ) ; ^ */ + /* 0x58 ß ] $ * ) ; ^ */ 0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E, - /* 0x60 - / ---- Ä ---- ---- ---- */ + /* 0x60 - / ---- Ä ---- ---- ---- */ 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F, /* 0x68 ---- , % _ > ? */ 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, @@ -279,7 +280,7 @@ __u8 _ebcasc_500[256] = 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x88 h i ---- ---- ---- */ 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1, - /* 0x90 ° j k l m n o p */ + /* 0x90 ° j k l m n o p */ 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, /* 0x98 q r ---- ---- */ 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07, @@ -287,25 +288,25 @@ __u8 _ebcasc_500[256] = 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* 0xA8 y z ---- ---- ---- ---- */ 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07, - /* 0xB0 ---- § ---- */ + /* 0xB0 ---- § ---- */ 0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC, /* 0xB8 ---- | ---- ---- ---- ---- */ 0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07, /* 0xC0 { A B C D E F G */ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - /* 0xC8 H I ---- ö ---- */ + /* 0xC8 H I ---- ö ---- */ 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07, /* 0xD0 } J K L M N O P */ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, - /* 0xD8 Q R ---- ü */ + /* 0xD8 Q R ---- ü */ 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98, /* 0xE0 \ S T U V W X */ 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - /* 0xE8 Y Z ---- Ö ---- ---- ---- */ + /* 0xE8 Y Z ---- Ö ---- ---- ---- */ 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07, /* 0xF0 0 1 2 3 4 5 6 7 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */ + /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */ 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07 }; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index be7a408be7a1..b7f1553d9ee5 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * S390 low-level entry points. * @@ -5,12 +6,16 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Heiko Carstens <heiko.carstens@de.ibm.com> */ +#include <linux/export.h> #include <linux/init.h> #include <linux/linkage.h> +#include <asm/asm-extable.h> +#include <asm/alternative.h> +#include <asm/processor.h> #include <asm/cache.h> +#include <asm/dwarf.h> #include <asm/errno.h> #include <asm/ptrace.h> #include <asm/thread_info.h> @@ -18,916 +23,592 @@ #include <asm/unistd.h> #include <asm/page.h> #include <asm/sigp.h> +#include <asm/irq.h> +#include <asm/fpu-insn.h> +#include <asm/setup.h> +#include <asm/nmi.h> +#include <asm/nospec-insn.h> +#include <asm/lowcore.h> +#include <asm/machine.h> + +_LPP_OFFSET = __LC_LPP + + .macro STBEAR address + ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193) + .endm -__PT_R0 = __PT_GPRS -__PT_R1 = __PT_GPRS + 4 -__PT_R2 = __PT_GPRS + 8 -__PT_R3 = __PT_GPRS + 12 -__PT_R4 = __PT_GPRS + 16 -__PT_R5 = __PT_GPRS + 20 -__PT_R6 = __PT_GPRS + 24 -__PT_R7 = __PT_GPRS + 28 -__PT_R8 = __PT_GPRS + 32 -__PT_R9 = __PT_GPRS + 36 -__PT_R10 = __PT_GPRS + 40 -__PT_R11 = __PT_GPRS + 44 -__PT_R12 = __PT_GPRS + 48 -__PT_R13 = __PT_GPRS + 524 -__PT_R14 = __PT_GPRS + 56 -__PT_R15 = __PT_GPRS + 60 - -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_PER_TRAP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING) -_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) - -STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER -STACK_SIZE = 1 << STACK_SHIFT -STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE - -#define BASED(name) name-system_call(%r13) - - .macro TRACE_IRQS_ON -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - l %r1,BASED(.Lhardirqs_on) - basr %r14,%r1 # call trace_hardirqs_on_caller -#endif + .macro LBEAR address + ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193) .endm - .macro TRACE_IRQS_OFF -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - l %r1,BASED(.Lhardirqs_off) - basr %r14,%r1 # call trace_hardirqs_off_caller -#endif + .macro LPSWEY address, lpswe + ALTERNATIVE_2 "b \lpswe;nopr", \ + ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ + __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ + ALT_FEATURE(MFEATURE_LOWCORE) .endm - .macro LOCKDEP_SYS_EXIT -#ifdef CONFIG_LOCKDEP - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jz .+10 - l %r1,BASED(.Llockdep_sys_exit) - basr %r14,%r1 # call lockdep_sys_exit -#endif + .macro MBEAR reg, lowcore + ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\ + ALT_FACILITY(193) .endm - .macro CHECK_STACK stacksize,savearea -#ifdef CONFIG_CHECK_STACK - tml %r15,\stacksize - CONFIG_STACK_GUARD - la %r14,\savearea - jz stack_overflow -#endif + .macro CHECK_VMAP_STACK savearea, lowcore, oklabel + lgr %r14,%r15 + nill %r14,0x10000 - THREAD_SIZE + oill %r14,STACK_INIT_OFFSET + clg %r14,__LC_KERNEL_STACK(\lowcore) + je \oklabel + clg %r14,__LC_ASYNC_STACK(\lowcore) + je \oklabel + clg %r14,__LC_MCCK_STACK(\lowcore) + je \oklabel + clg %r14,__LC_NODAT_STACK(\lowcore) + je \oklabel + clg %r14,__LC_RESTART_STACK(\lowcore) + je \oklabel + la %r14,\savearea(\lowcore) + j stack_invalid .endm - .macro SWITCH_ASYNC savearea,stack,shift - tmh %r8,0x0001 # interrupting from user ? - jnz 1f - lr %r14,%r9 - sl %r14,BASED(.Lcritical_start) - cl %r14,BASED(.Lcritical_length) - jhe 0f - la %r11,\savearea # inside critical section, do cleanup - bras %r14,cleanup_critical - tmh %r8,0x0001 # retest problem state after cleanup - jnz 1f -0: l %r14,\stack # are we already on the target stack? - slr %r14,%r15 - sra %r14,\shift - jnz 1f - CHECK_STACK 1<<\shift,\savearea - ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 2f -1: l %r15,\stack # load target stack -2: la %r11,STACK_FRAME_OVERHEAD(%r15) + /* + * The TSTMSK macro generates a test-under-mask instruction by + * calculating the memory offset for the specified mask value. + * Mask value can be any constant. The macro shifts the mask + * value to calculate the memory offset for the test-under-mask + * instruction. + */ + .macro TSTMSK addr, mask, size=8, bytepos=0 + .if (\bytepos < \size) && (\mask >> 8) + .if (\mask & 0xff) + .error "Mask exceeds byte boundary" + .endif + TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)" + .exitm + .endif + .ifeq \mask + .error "Mask must not be zero" + .endif + off = \size - \bytepos - 1 + tm off+\addr, \mask + .endm + + .macro BPOFF + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82) + .endm + + .macro BPON + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm - .macro ADD64 high,low,timer - al \high,\timer - al \low,4+\timer - brc 12,.+8 - ahi \high,1 + .macro BPENTER tif_ptr,tif_mask + ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \ + "j .+12; nop; nop", ALT_SPEC(82) .endm - .macro SUB64 high,low,timer - sl \high,\timer - sl \low,4+\timer - brc 3,.+8 - ahi \high,-1 + .macro BPEXIT tif_ptr,tif_mask + TSTMSK \tif_ptr,\tif_mask + ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \ + "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm - .macro UPDATE_VTIME high,low,enter_timer - lm \high,\low,__LC_EXIT_TIMER - SUB64 \high,\low,\enter_timer - ADD64 \high,\low,__LC_USER_TIMER - stm \high,\low,__LC_USER_TIMER - lm \high,\low,__LC_LAST_UPDATE_TIMER - SUB64 \high,\low,__LC_EXIT_TIMER - ADD64 \high,\low,__LC_SYSTEM_TIMER - stm \high,\low,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer +#if IS_ENABLED(CONFIG_KVM) + .macro SIEEXIT sie_control,lowcore + lg %r9,\sie_control # get control block pointer + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce + lg %r9,__LC_CURRENT(\lowcore) + mvi __TI_sie(%r9),0 + larl %r9,sie_exit # skip forward to sie_exit .endm +#endif - .macro REENABLE_IRQS - st %r8,__LC_RETURN_PSW - ni __LC_RETURN_PSW,0xbf - ssm __LC_RETURN_PSW + .macro STACKLEAK_ERASE +#ifdef CONFIG_KSTACK_ERASE + brasl %r14,stackleak_erase_on_task_stack +#endif .endm + GEN_BR_THUNK %r14 + .section .kprobes.text, "ax" +.Ldummy: + /* + * The following nop exists only in order to avoid that the next + * symbol starts at the beginning of the kprobes text section. + * In that case there would be several symbols at the same address. + * E.g. objdump would take an arbitrary symbol when disassembling + * the code. + * With the added nop in between this cannot happen. + */ + nop 0 /* - * Scheduler resume function, called by switch_to - * gpr2 = (task_struct *) prev - * gpr3 = (task_struct *) next + * Scheduler resume function, called by __switch_to + * gpr2 = (task_struct *)prev + * gpr3 = (task_struct *)next * Returns: * gpr2 = prev */ -ENTRY(__switch_to) - stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - st %r15,__THREAD_ksp(%r2) # store kernel stack of prev - l %r4,__THREAD_info(%r2) # get thread_info of prev - l %r5,__THREAD_info(%r3) # get thread_info of next - lr %r15,%r5 - ahi %r15,STACK_INIT # end of kernel stack of next - st %r3,__LC_CURRENT # store task struct of next - st %r5,__LC_THREAD_INFO # store thread info of next - st %r15,__LC_KERNEL_STACK # store end of kernel stack - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next - l %r15,__THREAD_ksp(%r3) # load kernel stack of next - tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? - jz 0f - ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next -0: lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - br %r14 +SYM_FUNC_START(__switch_to_asm) + stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + lghi %r4,__TASK_stack + lghi %r1,__TASK_thread + llill %r5,STACK_INIT_OFFSET + stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev + lg %r15,0(%r4,%r3) # start of kernel stack of next + agr %r15,%r5 # end of kernel stack of next + GET_LC %r13 + stg %r3,__LC_CURRENT(%r13) # store task struct of next + stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack + lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next + lay %r4,__TASK_pid(%r3) + mvc __LC_CURRENT_PID(4,%r13),0(%r4) # store pid of next + ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40) +#ifdef CONFIG_STACKPROTECTOR + lg %r3,__TASK_stack_canary(%r3) + stg %r3,__LC_STACK_CANARY(%r13) +#endif + lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + BR_EX %r14 +SYM_FUNC_END(__switch_to_asm) -__critical_start: +#if IS_ENABLED(CONFIG_KVM) /* - * SVC interrupt handler routine. System calls are synchronous events and - * are executed with interrupts enabled. + * __sie64a calling convention: + * %r2 pointer to sie control block phys + * %r3 pointer to sie control block virt + * %r4 guest register save area + * %r5 guest asce */ +SYM_FUNC_START(__sie64a) + stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers + GET_LC %r13 + lg %r14,__LC_CURRENT(%r13) + stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. + stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses + stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area + stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce + xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 + mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags + lmg %r0,%r13,0(%r4) # load guest gprs 0-13 + mvi __TI_sie(%r14),1 + stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts + lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce + lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),3 # last exit... + jnz .Lsie_skip + lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr + BPEXIT __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST +.Lsie_entry: + sie 0(%r14) +# Let the next instruction be NOP to avoid triggering a machine check +# and handling it in a guest as result of the instruction execution. + nopr 7 +.Lsie_leave: + BPOFF + BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST +.Lsie_skip: + lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + GET_LC %r14 + lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce + lg %r14,__LC_CURRENT(%r14) + mvi __TI_sie(%r14),0 +SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) + stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts + lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + xgr %r0,%r0 # clear guest registers to + xgr %r1,%r1 # prevent speculative use + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers + lg %r2,__SF_SIE_REASON(%r15) # return exit reason code + BR_EX %r14 +SYM_FUNC_END(__sie64a) +EXPORT_SYMBOL(__sie64a) +EXPORT_SYMBOL(sie_exit) +#endif -ENTRY(system_call) - stpt __LC_SYNC_ENTER_TIMER -sysc_stm: - stm %r8,%r15,__LC_SAVE_AREA_SYNC - l %r12,__LC_THREAD_INFO - l %r13,__LC_SVC_NEW_PSW+4 -sysc_per: - l %r15,__LC_KERNEL_STACK - la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs -sysc_vtime: - UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER - stm %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC -sysc_do_svc: - oi __TI_flags+3(%r12),_TIF_SYSCALL - l %r10,__TI_sysc_table(%r12) # 31 bit system call table - lh %r8,__PT_INT_CODE+2(%r11) - sla %r8,2 # shift and test for svc0 - jnz sysc_nr_ok - # svc 0: system call number in %r1 - cl %r1,BASED(.Lnr_syscalls) - jnl sysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) - lr %r8,%r1 - sla %r8,2 -sysc_nr_ok: - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - st %r2,__PT_ORIG_GPR2(%r11) - st %r7,STACK_FRAME_OVERHEAD(%r15) - l %r9,0(%r8,%r10) # get system call addr. - tm __TI_flags+2(%r12),_TIF_TRACE >> 8 - jnz sysc_tracesys - basr %r14,%r9 # call sys_xxxx - st %r2,__PT_R2(%r11) # store return value - -sysc_return: - LOCKDEP_SYS_EXIT -sysc_tif: - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno sysc_restore - tm __TI_flags+3(%r12),_TIF_WORK_SVC - jnz sysc_work # check for work - ni __TI_flags+3(%r12),255-_TIF_SYSCALL -sysc_restore: - mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) - stpt __LC_EXIT_TIMER - lm %r0,%r15,__PT_R0(%r11) - lpsw __LC_RETURN_PSW -sysc_done: - -# -# One of the work bits is on. Find out which one. -# -sysc_work: - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - jo sysc_mcck_pending - tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - jo sysc_reschedule - tm __TI_flags+3(%r12),_TIF_PER_TRAP - jo sysc_singlestep - tm __TI_flags+3(%r12),_TIF_SIGPENDING - jo sysc_sigpending - tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - jo sysc_notify_resume - j sysc_return # beware of critical section cleanup - -# -# _TIF_NEED_RESCHED is set, call schedule -# -sysc_reschedule: - l %r1,BASED(.Lschedule) - la %r14,BASED(sysc_return) - br %r1 # call schedule - -# -# _TIF_MCCK_PENDING is set, call handler -# -sysc_mcck_pending: - l %r1,BASED(.Lhandle_mcck) - la %r14,BASED(sysc_return) - br %r1 # TIF bit will be cleared by handler - -# -# _TIF_SIGPENDING is set, call do_signal -# -sysc_sigpending: - lr %r2,%r11 # pass pointer to pt_regs - l %r1,BASED(.Ldo_signal) - basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_SYSCALL - jno sysc_return - lm %r2,%r7,__PT_R2(%r11) # load svc arguments - xr %r8,%r8 # svc 0 returns -ENOSYS - clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) - jnl sysc_nr_ok # invalid svc number -> do svc 0 - lh %r8,__PT_INT_CODE+2(%r11) # load new svc number - sla %r8,2 - j sysc_nr_ok # restart svc - -# -# _TIF_NOTIFY_RESUME is set, call do_notify_resume -# -sysc_notify_resume: - lr %r2,%r11 # pass pointer to pt_regs - l %r1,BASED(.Ldo_notify_resume) - la %r14,BASED(sysc_return) - br %r1 # call do_notify_resume - -# -# _TIF_PER_TRAP is set, call do_per_trap -# -sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP - lr %r2,%r11 # pass pointer to pt_regs - l %r1,BASED(.Ldo_per_trap) - la %r14,BASED(sysc_return) - br %r1 # call do_per_trap +/* + * SVC interrupt handler routine. System calls are synchronous events and + * are entered with interrupts disabled. + */ -# -# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before -# and after the system call -# -sysc_tracesys: - l %r1,BASED(.Ltrace_enter) - lr %r2,%r11 # pass pointer to pt_regs - la %r3,0 - xr %r0,%r0 - icm %r0,3,__PT_INT_CODE+2(%r11) - st %r0,__PT_R2(%r11) - basr %r14,%r1 # call do_syscall_trace_enter - cl %r2,BASED(.Lnr_syscalls) - jnl sysc_tracenogo - lr %r8,%r2 - sll %r8,2 - l %r9,0(%r8,%r10) -sysc_tracego: - lm %r3,%r7,__PT_R3(%r11) - st %r7,STACK_FRAME_OVERHEAD(%r15) - l %r2,__PT_ORIG_GPR2(%r11) - basr %r14,%r9 # call sys_xxx - st %r2,__PT_R2(%r11) # store return value -sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_TRACE >> 8 - jz sysc_return - l %r1,BASED(.Ltrace_exit) - lr %r2,%r11 # pass pointer to pt_regs - la %r14,BASED(sysc_return) - br %r1 # call do_syscall_trace_exit +SYM_CODE_START(system_call) + STMG_LC %r8,%r15,__LC_SAVE_AREA + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) + BPOFF + lghi %r14,0 +.Lsysc_per: + STBEAR __LC_LAST_BREAK(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + # clear user controlled register to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r8,%r8 + xgr %r9,%r9 + xgr %r10,%r10 + xgr %r11,%r11 + la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs + mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13) + MBEAR %r2,%r13 + lgr %r3,%r14 + brasl %r14,__do_syscall + STACKLEAK_ERASE + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPON + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE +SYM_CODE_END(system_call) # # a new process exits the kernel with ret_from_fork # -ENTRY(ret_from_fork) - la %r11,STACK_FRAME_OVERHEAD(%r15) - l %r12,__LC_THREAD_INFO - l %r13,__LC_SVC_NEW_PSW+4 - l %r1,BASED(.Lschedule_tail) - basr %r14,%r1 # call schedule_tail - TRACE_IRQS_ON - ssm __LC_SVC_NEW_PSW # reenable interrupts - tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? - jne sysc_tracenogo - # it's a kernel thread - lm %r9,%r10,__PT_R9(%r11) # load gprs -ENTRY(kernel_thread_starter) - la %r2,0(%r10) - basr %r14,%r9 - j sysc_tracenogo +SYM_CODE_START(ret_from_fork) + lgr %r3,%r11 + brasl %r14,__ret_from_fork + STACKLEAK_ERASE + GET_LC %r13 + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPON + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE +SYM_CODE_END(ret_from_fork) /* * Program check handler routine */ -ENTRY(pgm_check_handler) - stpt __LC_SYNC_ENTER_TIMER - stm %r8,%r15,__LC_SAVE_AREA_SYNC - l %r12,__LC_THREAD_INFO - l %r13,__LC_SVC_NEW_PSW+4 - lm %r8,%r9,__LC_PGM_OLD_PSW - tmh %r8,0x0001 # test problem state bit - jnz 1f # -> fault in user space - tmh %r8,0x4000 # PER bit set in old PSW ? - jnz 0f # -> enabled, can't be a double fault - tm __LC_PGM_ILC+3,0x80 # check for per exception - jnz pgm_svcper # -> single stepped svc -0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC - ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 2f -1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER - l %r15,__LC_KERNEL_STACK -2: la %r11,STACK_FRAME_OVERHEAD(%r15) - stm %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC - stm %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC - mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE - tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 0f - l %r1,__TI_task(%r12) - tmh %r8,0x0001 # kernel per event ? - jz pgm_kprobe - oi __TI_flags+3(%r12),_TIF_PER_TRAP - mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE - mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID -0: REENABLE_IRQS - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - l %r1,BASED(.Ljump_table) - la %r10,0x7f - n %r10,__PT_INT_CODE(%r11) - je sysc_return - sll %r10,2 - l %r1,0(%r10,%r1) # load address of handler routine - lr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # branch to interrupt-handler - j sysc_return - -# -# PER event in supervisor state, must be kprobes -# -pgm_kprobe: - REENABLE_IRQS - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - l %r1,BASED(.Ldo_per_trap) - lr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # call do_per_trap - j sysc_return +SYM_CODE_START(pgm_check_handler) + STMG_LC %r8,%r15,__LC_SAVE_AREA + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) + BPOFF + lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) + xgr %r10,%r10 + tmhh %r8,0x0001 # coming from user space? + jo 3f # -> fault in user space +#if IS_ENABLED(CONFIG_KVM) + lg %r11,__LC_CURRENT(%r13) + tm __TI_sie(%r11),0xff + jz 1f + BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 + lghi %r10,_PIF_GUEST_FAULT +#endif +1: tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 2f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception + jnz .Lpgm_svcper # -> single stepped svc +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + # CHECK_VMAP_STACK branches to stack_invalid or 4f + CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f +3: lg %r15,__LC_KERNEL_STACK(%r13) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) + stg %r10,__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13) + mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13) + stmg %r8,%r9,__PT_PSW(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r12,%r12 + lgr %r2,%r11 + brasl %r14,__do_pgm_check + tmhh %r8,0x0001 # returning to user space? + jno .Lpgm_exit_kernel + STACKLEAK_ERASE + BPON + stpt __LC_EXIT_TIMER(%r13) +.Lpgm_exit_kernel: + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # # single stepped system call # -pgm_svcper: - oi __TI_flags+3(%r12),_TIF_PER_TRAP - mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW - mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) - lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs +.Lpgm_svcper: + mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13) + larl %r14,.Lsysc_per + stg %r14,__LC_RETURN_PSW+8(%r13) + lghi %r14,1 + LBEAR __LC_PGM_LAST_BREAK(%r13) + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per +SYM_CODE_END(pgm_check_handler) /* - * IO interrupt handler routine + * Interrupt handler macro used for external and IO interrupts. */ - -ENTRY(io_int_handler) - stck __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER - stm %r8,%r15,__LC_SAVE_AREA_ASYNC - l %r12,__LC_THREAD_INFO - l %r13,__LC_SVC_NEW_PSW+4 - lm %r8,%r9,__LC_IO_OLD_PSW - tmh %r8,0x0001 # interrupting from user ? - jz io_skip - UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER -io_skip: - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT - stm %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC - stm %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - TRACE_IRQS_OFF - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) -io_loop: - l %r1,BASED(.Ldo_IRQ) - lr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # call do_IRQ - tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR - jz io_return - tpi 0 - jz io_return - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - j io_loop -io_return: - LOCKDEP_SYS_EXIT - TRACE_IRQS_ON -io_tif: - tm __TI_flags+3(%r12),_TIF_WORK_INT - jnz io_work # there is work to do (signals etc.) -io_restore: - mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) - stpt __LC_EXIT_TIMER - lm %r0,%r15,__PT_R0(%r11) - lpsw __LC_RETURN_PSW -io_done: - -# -# There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK_INT work -# 2) if we return to kernel code and preemptive scheduling is enabled check -# the preemption counter and if it is zero call preempt_schedule_irq -# Before any work can be done, a switch to the kernel stack is required. -# -io_work: - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jo io_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPT - # check for preemptive scheduling - icm %r0,15,__TI_precount(%r12) - jnz io_restore # preemption disabled - tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - jno io_restore - # switch to kernel stack - l %r1,__PT_R15(%r11) - ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lr %r15,%r1 - # TRACE_IRQS_ON already done at io_return, call - # TRACE_IRQS_OFF to keep things symmetrical - TRACE_IRQS_OFF - l %r1,BASED(.Lpreempt_irq) - basr %r14,%r1 # call preempt_schedule_irq - j io_return -#else - j io_restore +.macro INT_HANDLER name,lc_old_psw,handler +SYM_CODE_START(\name) + STMG_LC %r8,%r15,__LC_SAVE_AREA + GET_LC %r13 + stckf __LC_INT_CLOCK(%r13) + stpt __LC_SYS_ENTER_TIMER(%r13) + STBEAR __LC_LAST_BREAK(%r13) + BPOFF + lmg %r8,%r9,\lc_old_psw(%r13) + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f +#if IS_ENABLED(CONFIG_KVM) + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff + jz 0f + BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: lg %r15,__LC_KERNEL_STACK(%r13) +2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13) + MBEAR %r11,%r13 + stmg %r8,%r9,__PT_PSW(%r11) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,\handler + mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11) + tmhh %r8,0x0001 # returning to user ? + jno 2f + STACKLEAK_ERASE + BPON + stpt __LC_EXIT_TIMER(%r13) +2: LBEAR __PT_LAST_BREAK(%r11) + lmg %r0,%r15,__PT_R0(%r11) + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE +SYM_CODE_END(\name) +.endm + + .section .irqentry.text, "ax" + +INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq +INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq -# -# Need to do work before returning to userspace, switch to kernel stack -# -io_work_user: - l %r1,__LC_KERNEL_STACK - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lr %r15,%r1 - -# -# One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING -# -io_work_tif: - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - jo io_mcck_pending - tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - jo io_reschedule - tm __TI_flags+3(%r12),_TIF_SIGPENDING - jo io_sigpending - tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - jo io_notify_resume - j io_return # beware of critical section cleanup - -# -# _TIF_MCCK_PENDING is set, call handler -# -io_mcck_pending: - # TRACE_IRQS_ON already done at io_return - l %r1,BASED(.Lhandle_mcck) - basr %r14,%r1 # TIF bit will be cleared by handler - TRACE_IRQS_OFF - j io_return - -# -# _TIF_NEED_RESCHED is set, call schedule -# -io_reschedule: - # TRACE_IRQS_ON already done at io_return - l %r1,BASED(.Lschedule) - ssm __LC_SVC_NEW_PSW # reenable interrupts - basr %r14,%r1 # call scheduler - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF - j io_return - -# -# _TIF_SIGPENDING is set, call do_signal -# -io_sigpending: - # TRACE_IRQS_ON already done at io_return - l %r1,BASED(.Ldo_signal) - ssm __LC_SVC_NEW_PSW # reenable interrupts - lr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # call do_signal - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF - j io_return - -# -# _TIF_SIGPENDING is set, call do_signal -# -io_notify_resume: - # TRACE_IRQS_ON already done at io_return - l %r1,BASED(.Ldo_notify_resume) - ssm __LC_SVC_NEW_PSW # reenable interrupts - lr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # call do_notify_resume - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF - j io_return - -/* - * External interrupt handler routine - */ - -ENTRY(ext_int_handler) - stck __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER - stm %r8,%r15,__LC_SAVE_AREA_ASYNC - l %r12,__LC_THREAD_INFO - l %r13,__LC_SVC_NEW_PSW+4 - lm %r8,%r9,__LC_EXT_OLD_PSW - tmh %r8,0x0001 # interrupting from user ? - jz ext_skip - UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER -ext_skip: - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT - stm %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC - stm %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR - mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS - TRACE_IRQS_OFF - lr %r2,%r11 # pass pointer to pt_regs - l %r1,BASED(.Ldo_extint) - basr %r14,%r1 # call do_extint - j io_return - -/* - * Load idle PSW. The second "half" of this function is in cleanup_idle. - */ -ENTRY(psw_idle) - st %r3,__SF_EMPTY(%r15) - basr %r1,0 - la %r1,psw_idle_lpsw+4-.(%r1) - st %r1,__SF_EMPTY+4(%r15) - oi __SF_EMPTY+4(%r15),0x80 - stck __CLOCK_IDLE_ENTER(%r2) - stpt __TIMER_IDLE_ENTER(%r2) -psw_idle_lpsw: - lpsw __SF_EMPTY(%r15) - br %r14 -psw_idle_end: - -__critical_end: + .section .kprobes.text, "ax" /* * Machine check handler routines */ - -ENTRY(mcck_int_handler) - stck __LC_MCCK_CLOCK - spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer - lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs - l %r12,__LC_THREAD_INFO - l %r13,__LC_SVC_NEW_PSW+4 - lm %r8,%r9,__LC_MCK_OLD_PSW - tm __LC_MCCK_CODE,0x80 # system damage? - jo mcck_panic # yes -> rest of mcck code invalid - la %r14,__LC_CPU_TIMER_SAVE_AREA - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) - tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? +SYM_CODE_START(mcck_int_handler) + BPOFF + GET_LC %r13 + lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE + jo .Lmcck_panic # yes -> rest of mcck code invalid + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID + jno .Lmcck_panic # control registers invalid -> panic + ptlb + lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13) + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYNC_ENTER_TIMER - clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - jl 0f - la %r14,__LC_ASYNC_ENTER_TIMER -0: clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER(%r13) + clc 0(8,%r14),__LC_EXIT_TIMER(%r13) jl 1f - la %r14,__LC_EXIT_TIMER -1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + la %r14,__LC_EXIT_TIMER(%r13) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13) jl 2f - la %r14,__LC_LAST_UPDATE_TIMER + la %r14,__LC_LAST_UPDATE_TIMER(%r13) 2: spt 0(%r14) - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - jno mcck_panic # no -> skip cleanup critical - tm %r8,0x0001 # interrupting from user ? - jz mcck_skip - UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER -mcck_skip: - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT - stm %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32 - stm %r8,%r9,__PT_PSW(%r11) - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - l %r1,BASED(.Ldo_machine_check) - lr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # call s390_do_machine_check - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno mcck_return - l %r1,__LC_KERNEL_STACK # switch to kernel stack - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) +3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID + jno .Lmcck_panic + tmhh %r8,0x0001 # interrupting from user ? + jnz .Lmcck_user + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID + jno .Lmcck_panic +#if IS_ENABLED(CONFIG_KVM) + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff + jz .Lmcck_user + # Need to compare the address instead of __TI_SIE flag. + # Otherwise there would be a race between setting the flag + # and entering SIE (or leaving and clearing the flag). This + # would cause machine checks targeted at the guest to be + # handled by the host. + larl %r14,.Lsie_entry + clgrjl %r9,%r14, 4f + larl %r14,.Lsie_leave + clgrjhe %r9,%r14, 4f + lg %r10,__LC_PCPU(%r13) + oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST +4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 +#endif +.Lmcck_user: + lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) - lr %r15,%r1 - ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - jno mcck_return - TRACE_IRQS_OFF - l %r1,BASED(.Lhandle_mcck) - basr %r14,%r1 # call s390_handle_mcck - TRACE_IRQS_ON -mcck_return: - mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW - tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lay %r14,__LC_GPREGS_SAVE_AREA(%r13) + mvc __PT_R0(128,%r11),0(%r14) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 + stmg %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,s390_do_machine_check + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? jno 0f - lm %r0,%r15,__PT_R0(%r11) - stpt __LC_EXIT_TIMER - lpsw __LC_RETURN_MCCK_PSW -0: lm %r0,%r15,__PT_R0(%r11) - lpsw __LC_RETURN_MCCK_PSW - -mcck_panic: - l %r14,__LC_PANIC_STACK - slr %r14,%r15 - sra %r14,PAGE_SHIFT + BPON + stpt __LC_EXIT_TIMER(%r13) +0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\ + ALT_FACILITY(193) + LBEAR 0(%r12) + lmg %r11,%r15,__PT_R11(%r11) + LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE + +.Lmcck_panic: + /* + * Iterate over all possible CPU addresses in the range 0..0xffff + * and stop each CPU using signal processor. Use compare and swap + * to allow just one CPU-stopper and prevent concurrent CPUs from + * stopping each other while leaving the others running. + */ + lhi %r5,0 + lhi %r6,1 + larl %r7,stop_lock + cs %r5,%r6,0(%r7) # single CPU-stopper only + jnz 4f + larl %r7,this_cpu + stap 0(%r7) # this CPU address + lh %r4,0(%r7) + nilh %r4,0 + lhi %r0,1 + sll %r0,16 # CPU counter + lhi %r3,0 # next CPU address +0: cr %r3,%r4 + je 2f +1: sigp %r1,%r3,SIGP_STOP # stop next CPU + brc SIGP_CC_BUSY,1b +2: ahi %r3,1 + brct %r0,0b +3: sigp %r1,%r4,SIGP_STOP # stop this CPU + brc SIGP_CC_BUSY,3b +4: j 4b +SYM_CODE_END(mcck_int_handler) + +SYM_CODE_START(restart_int_handler) + ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40) + stg %r15,__LC_SAVE_AREA_RESTART + TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f - l %r15,__LC_PANIC_STACK - j mcck_skip -0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j mcck_skip - -# -# PSW restart interrupt handler -# -ENTRY(restart_int_handler) - st %r15,__LC_SAVE_AREA_RESTART - l %r15,__LC_RESTART_STACK - ahi %r15,-__PT_SIZE # create pt_regs on stack - xc 0(__PT_SIZE,%r15),0(%r15) - stm %r0,%r14,__PT_R0(%r15) - mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART - mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw - ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + lctlg %c0,%c15,__LC_CREGS_SAVE_AREA +0: larl %r15,daton_psw + lpswe 0(%r15) # turn dat on, keep irqs off +.Ldaton: + GET_LC %r15 + lg %r15,__LC_RESTART_STACK(%r15) + xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + GET_LC %r13 + mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13) + mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) - l %r1,__LC_RESTART_FN # load fn, parm & source cpu - l %r2,__LC_RESTART_DATA - l %r3,__LC_RESTART_SOURCE - ltr %r3,%r3 # test source cpu address + lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA(%r13) + lgf %r3,__LC_RESTART_SOURCE(%r13) + ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu brc 10,0b # wait for status stored 1: basr %r14,%r1 # call function stap __SF_EMPTY(%r15) # store cpu address - lh %r3,__SF_EMPTY(%r15) + llgh %r3,__SF_EMPTY(%r15) 2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu brc 2,2b 3: j 3b +SYM_CODE_END(restart_int_handler) + + __INIT +SYM_CODE_START(early_pgm_check_handler) + STMG_LC %r8,%r15,__LC_SAVE_AREA + GET_LC %r13 + aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13) + lgr %r2,%r11 + brasl %r14,__do_early_pgm_check + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE +SYM_CODE_END(early_pgm_check_handler) + __FINIT .section .kprobes.text, "ax" -#ifdef CONFIG_CHECK_STACK /* - * The synchronous or the asynchronous stack overflowed. We are dead. + * The synchronous or the asynchronous stack pointer is invalid. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -stack_overflow: - l %r15,__LC_PANIC_STACK # change to panic stack +SYM_CODE_START(stack_invalid) + GET_LC %r15 + lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) - stm %r0,%r7,__PT_R0(%r11) - stm %r8,%r9,__PT_PSW(%r11) - mvc __PT_R8(32,%r11),0(%r14) - l %r1,BASED(1f) - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - lr %r2,%r11 # pass pointer to pt_regs - br %r1 # branch to kernel_stack_overflow -1: .long kernel_stack_overflow -#endif - -cleanup_table: - .long system_call + 0x80000000 - .long sysc_do_svc + 0x80000000 - .long sysc_tif + 0x80000000 - .long sysc_restore + 0x80000000 - .long sysc_done + 0x80000000 - .long io_tif + 0x80000000 - .long io_restore + 0x80000000 - .long io_done + 0x80000000 - .long psw_idle + 0x80000000 - .long psw_idle_end + 0x80000000 - -cleanup_critical: - cl %r9,BASED(cleanup_table) # system_call - jl 0f - cl %r9,BASED(cleanup_table+4) # sysc_do_svc - jl cleanup_system_call - cl %r9,BASED(cleanup_table+8) # sysc_tif - jl 0f - cl %r9,BASED(cleanup_table+12) # sysc_restore - jl cleanup_sysc_tif - cl %r9,BASED(cleanup_table+16) # sysc_done - jl cleanup_sysc_restore - cl %r9,BASED(cleanup_table+20) # io_tif - jl 0f - cl %r9,BASED(cleanup_table+24) # io_restore - jl cleanup_io_tif - cl %r9,BASED(cleanup_table+28) # io_done - jl cleanup_io_restore - cl %r9,BASED(cleanup_table+32) # psw_idle - jl 0f - cl %r9,BASED(cleanup_table+36) # psw_idle_end - jl cleanup_idle -0: br %r14 - -cleanup_system_call: - # check if stpt has been executed - cl %r9,BASED(cleanup_system_call_insn) - jh 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER - chi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER -0: # check if stm has been executed - cl %r9,BASED(cleanup_system_call_insn+4) - jh 0f - mvc __LC_SAVE_AREA_SYNC(32),0(%r11) -0: # set up saved registers r12, and r13 - st %r12,16(%r11) # r12 thread-info pointer - st %r13,20(%r11) # r13 literal-pool pointer - # check if the user time calculation has been done - cl %r9,BASED(cleanup_system_call_insn+8) - jh 0f - l %r10,__LC_EXIT_TIMER - l %r15,__LC_EXIT_TIMER+4 - SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER - ADD64 %r10,%r15,__LC_USER_TIMER - st %r10,__LC_USER_TIMER - st %r15,__LC_USER_TIMER+4 -0: # check if the system time calculation has been done - cl %r9,BASED(cleanup_system_call_insn+12) - jh 0f - l %r10,__LC_LAST_UPDATE_TIMER - l %r15,__LC_LAST_UPDATE_TIMER+4 - SUB64 %r10,%r15,__LC_EXIT_TIMER - ADD64 %r10,%r15,__LC_SYSTEM_TIMER - st %r10,__LC_SYSTEM_TIMER - st %r15,__LC_SYSTEM_TIMER+4 -0: # update accounting time stamp - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - # set up saved register 11 - l %r15,__LC_KERNEL_STACK - la %r9,STACK_FRAME_OVERHEAD(%r15) - st %r9,12(%r11) # r11 pt_regs pointer - # fill pt_regs - mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC - stm %r0,%r7,__PT_R0(%r9) - mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC - # setup saved register 15 - st %r15,28(%r11) # r15 stack pointer - # set new psw address and exit - l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 - br %r14 -cleanup_system_call_insn: - .long system_call + 0x80000000 - .long sysc_stm + 0x80000000 - .long sysc_vtime + 0x80000000 + 36 - .long sysc_vtime + 0x80000000 + 76 - -cleanup_sysc_tif: - l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000 - br %r14 - -cleanup_sysc_restore: - cl %r9,BASED(cleanup_sysc_restore_insn) - jhe 0f - l %r9,12(%r11) # get saved pointer to pt_regs - mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) - mvc 0(32,%r11),__PT_R8(%r9) - lm %r0,%r7,__PT_R0(%r9) -0: lm %r8,%r9,__LC_RETURN_PSW - br %r14 -cleanup_sysc_restore_insn: - .long sysc_done - 4 + 0x80000000 - -cleanup_io_tif: - l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000 - br %r14 - -cleanup_io_restore: - cl %r9,BASED(cleanup_io_restore_insn) - jhe 0f - l %r9,12(%r11) # get saved r11 pointer to pt_regs - mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) - mvc 0(32,%r11),__PT_R8(%r9) - lm %r0,%r7,__PT_R0(%r9) -0: lm %r8,%r9,__LC_RETURN_PSW - br %r14 -cleanup_io_restore_insn: - .long io_done - 4 + 0x80000000 - -cleanup_idle: - # copy interrupt clock & cpu timer - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER - chi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER -0: # check if stck has been executed - cl %r9,BASED(cleanup_idle_insn) - jhe 1f - mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) - mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3) -1: # account system time going idle - lm %r9,%r10,__LC_STEAL_TIMER - ADD64 %r9,%r10,__CLOCK_IDLE_ENTER(%r2) - SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK - stm %r9,%r10,__LC_STEAL_TIMER - mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) - lm %r9,%r10,__LC_SYSTEM_TIMER - ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER - SUB64 %r9,%r10,__TIMER_IDLE_ENTER(%r2) - stm %r9,%r10,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) - # prepare return psw - n %r8,BASED(cleanup_idle_wait) # clear wait state bit - l %r9,24(%r11) # return from psw_idle - br %r14 -cleanup_idle_insn: - .long psw_idle_lpsw + 0x80000000 -cleanup_idle_wait: - .long 0xfffdffff - -/* - * Integer constants - */ - .align 4 -.Lnr_syscalls: - .long NR_syscalls -.Lvtimer_max: - .quad 0x7fffffffffffffff - -/* - * Symbol constants - */ -.Ldo_machine_check: .long s390_do_machine_check -.Lhandle_mcck: .long s390_handle_mcck -.Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint -.Ldo_signal: .long do_signal -.Ldo_notify_resume: .long do_notify_resume -.Ldo_per_trap: .long do_per_trap -.Ljump_table: .long pgm_check_table -.Lschedule: .long schedule -#ifdef CONFIG_PREEMPT -.Lpreempt_irq: .long preempt_schedule_irq -#endif -.Ltrace_enter: .long do_syscall_trace_enter -.Ltrace_exit: .long do_syscall_trace_exit -.Lschedule_tail: .long schedule_tail -.Lsysc_per: .long sysc_per + 0x80000000 -#ifdef CONFIG_TRACE_IRQFLAGS -.Lhardirqs_on: .long trace_hardirqs_on_caller -.Lhardirqs_off: .long trace_hardirqs_off_caller -#endif -#ifdef CONFIG_LOCKDEP -.Llockdep_sys_exit: .long lockdep_sys_exit -#endif -.Lcritical_start: .long __critical_start + 0x80000000 -.Lcritical_length: .long __critical_end - __critical_start - - .section .rodata, "a" -#define SYSCALL(esa,esame,emu) .long esa - .globl sys_call_table -sys_call_table: -#include "syscalls.S" -#undef SYSCALL + stmg %r0,%r7,__PT_R0(%r11) + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(64,%r11),0(%r14) + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + jg kernel_stack_invalid +SYM_CODE_END(stack_invalid) + + .section .data, "aw" + .balign 4 +SYM_DATA_LOCAL(stop_lock, .long 0) +SYM_DATA_LOCAL(this_cpu, .short 0) + .balign 8 +SYM_DATA_START_LOCAL(daton_psw) + .quad PSW_KERNEL_BITS + .quad .Ldaton +SYM_DATA_END(daton_psw) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 3ddbc26d246e..dd55cc6bbc28 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -1,13 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ENTRY_H #define _ENTRY_H +#include <linux/percpu.h> #include <linux/types.h> #include <linux/signal.h> +#include <asm/extable.h> #include <asm/ptrace.h> -#include <asm/cputime.h> +#include <asm/idle.h> extern void *restart_stack; -extern unsigned long suspend_zero_pages; void system_call(void); void pgm_check_handler(void); @@ -15,68 +17,59 @@ void ext_int_handler(void); void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); -void restart_call_handler(void); -void psw_idle(struct s390_idle_data *, unsigned long); +void early_pgm_check_handler(void); -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); +struct task_struct *__switch_to_asm(struct task_struct *prev, struct task_struct *next); +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs); +void __do_pgm_check(struct pt_regs *regs); +void __do_syscall(struct pt_regs *regs, int per_trap); +void __do_early_pgm_check(struct pt_regs *regs); void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); -void do_asce_exception(struct pt_regs *regs); +void do_secure_storage_access(struct pt_regs *regs); +void do_non_secure_storage_access(struct pt_regs *regs); +void do_secure_storage_violation(struct pt_regs *regs); +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); +void kernel_stack_invalid(struct pt_regs *regs); +void handle_signal32(struct ksignal *ksig, sigset_t *oldset, + struct pt_regs *regs); -void addressing_exception(struct pt_regs *regs); -void data_exception(struct pt_regs *regs); -void default_trap_handler(struct pt_regs *regs); -void divide_exception(struct pt_regs *regs); -void execute_exception(struct pt_regs *regs); -void hfp_divide_exception(struct pt_regs *regs); -void hfp_overflow_exception(struct pt_regs *regs); -void hfp_significance_exception(struct pt_regs *regs); -void hfp_sqrt_exception(struct pt_regs *regs); -void hfp_underflow_exception(struct pt_regs *regs); -void illegal_op(struct pt_regs *regs); -void operand_exception(struct pt_regs *regs); -void overflow_exception(struct pt_regs *regs); -void privileged_op(struct pt_regs *regs); -void space_switch_exception(struct pt_regs *regs); -void special_op_exception(struct pt_regs *regs); -void specification_exception(struct pt_regs *regs); -void transaction_exception(struct pt_regs *regs); -void translation_exception(struct pt_regs *regs); - -void do_per_trap(struct pt_regs *regs); -void syscall_trace(struct pt_regs *regs, int entryexit); -void kernel_stack_overflow(struct pt_regs * regs); -void do_signal(struct pt_regs *regs); -void handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); -void do_notify_resume(struct pt_regs *regs); - -struct ext_code; -void do_extint(struct pt_regs *regs); -void do_restart(void); +void do_io_irq(struct pt_regs *regs); +void do_ext_irq(struct pt_regs *regs); +void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); - -void __init time_init(void); +int setup_profiling_timer(unsigned int multiplier); struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; -long sys_mmap2(struct s390_mmap_arg_struct __user *arg); -long sys_s390_ipc(uint call, int first, unsigned long second, - unsigned long third, void __user *ptr); -long sys_s390_personality(unsigned int personality); -long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low, - size_t len, int advice); -long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); -long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, - u32 len_low); -long sys_sigreturn(void); long sys_rt_sigreturn(void); -long sys32_sigreturn(void); -long sys32_rt_sigreturn(void); +long sys_sigreturn(void); + +long sys_s390_personality(unsigned int personality); +long sys_s390_runtime_instr(int command, int signum); +long sys_s390_guarded_storage(int command, struct gs_cb __user *); +long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); +long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); +long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags); + +DECLARE_PER_CPU(u64, mt_cycles[8]); + +unsigned long stack_alloc(void); +void stack_free(unsigned long stack); + +extern char kprobes_insn_page[]; + +extern char _samode31[], _eamode31[]; +extern char _stext_amode31[], _etext_amode31[]; +extern struct exception_table_entry _start_amode31_ex_table[]; +extern struct exception_table_entry _stop_amode31_ex_table[]; + +#define __amode31_data __section(".amode31.data") +#define __amode31_ref __section(".amode31.refs") +extern long _start_amode31_refs[], _end_amode31_refs[]; #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S deleted file mode 100644 index 1c039d0c24c7..000000000000 --- a/arch/s390/kernel/entry64.S +++ /dev/null @@ -1,1011 +0,0 @@ -/* - * S390 low-level entry points. - * - * Copyright IBM Corp. 1999, 2012 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Hartmut Penner (hp@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Heiko Carstens <heiko.carstens@de.ibm.com> - */ - -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/processor.h> -#include <asm/cache.h> -#include <asm/errno.h> -#include <asm/ptrace.h> -#include <asm/thread_info.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/page.h> -#include <asm/sigp.h> - -__PT_R0 = __PT_GPRS -__PT_R1 = __PT_GPRS + 8 -__PT_R2 = __PT_GPRS + 16 -__PT_R3 = __PT_GPRS + 24 -__PT_R4 = __PT_GPRS + 32 -__PT_R5 = __PT_GPRS + 40 -__PT_R6 = __PT_GPRS + 48 -__PT_R7 = __PT_GPRS + 56 -__PT_R8 = __PT_GPRS + 64 -__PT_R9 = __PT_GPRS + 72 -__PT_R10 = __PT_GPRS + 80 -__PT_R11 = __PT_GPRS + 88 -__PT_R12 = __PT_GPRS + 96 -__PT_R13 = __PT_GPRS + 104 -__PT_R14 = __PT_GPRS + 112 -__PT_R15 = __PT_GPRS + 120 - -STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER -STACK_SIZE = 1 << STACK_SHIFT -STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE - -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_PER_TRAP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING) -_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) - -#define BASED(name) name-system_call(%r13) - - .macro TRACE_IRQS_ON -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_on_caller -#endif - .endm - - .macro TRACE_IRQS_OFF -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_off_caller -#endif - .endm - - .macro LOCKDEP_SYS_EXIT -#ifdef CONFIG_LOCKDEP - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jz .+10 - brasl %r14,lockdep_sys_exit -#endif - .endm - - .macro LPP newpp -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP - jz .+8 - .insn s,0xb2800000,\newpp -#endif - .endm - - .macro HANDLE_SIE_INTERCEPT scratch,reason -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - tmhh %r8,0x0001 # interrupting from user ? - jnz .+62 - lgr \scratch,%r9 - slg \scratch,BASED(.Lsie_critical) - clg \scratch,BASED(.Lsie_critical_length) - .if \reason==1 - # Some program interrupts are suppressing (e.g. protection). - # We must also check the instruction after SIE in that case. - # do_protection_exception will rewind to rewind_pad - jh .+42 - .else - jhe .+42 - .endif - lg %r14,__SF_EMPTY(%r15) # get control block pointer - LPP __SF_EMPTY+16(%r15) # set host id - ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - larl %r9,sie_exit # skip forward to sie_exit - mvi __SF_EMPTY+31(%r15),\reason # set exit reason -#endif - .endm - - .macro CHECK_STACK stacksize,savearea -#ifdef CONFIG_CHECK_STACK - tml %r15,\stacksize - CONFIG_STACK_GUARD - lghi %r14,\savearea - jz stack_overflow -#endif - .endm - - .macro SWITCH_ASYNC savearea,stack,shift - tmhh %r8,0x0001 # interrupting from user ? - jnz 1f - lgr %r14,%r9 - slg %r14,BASED(.Lcritical_start) - clg %r14,BASED(.Lcritical_length) - jhe 0f - lghi %r11,\savearea # inside critical section, do cleanup - brasl %r14,cleanup_critical - tmhh %r8,0x0001 # retest problem state after cleanup - jnz 1f -0: lg %r14,\stack # are we already on the target stack? - slgr %r14,%r15 - srag %r14,%r14,\shift - jnz 1f - CHECK_STACK 1<<\shift,\savearea - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 2f -1: lg %r15,\stack # load target stack -2: la %r11,STACK_FRAME_OVERHEAD(%r15) - .endm - - .macro UPDATE_VTIME scratch,enter_timer - lg \scratch,__LC_EXIT_TIMER - slg \scratch,\enter_timer - alg \scratch,__LC_USER_TIMER - stg \scratch,__LC_USER_TIMER - lg \scratch,__LC_LAST_UPDATE_TIMER - slg \scratch,__LC_EXIT_TIMER - alg \scratch,__LC_SYSTEM_TIMER - stg \scratch,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer - .endm - - .macro LAST_BREAK scratch - srag \scratch,%r10,23 - jz .+10 - stg %r10,__TI_last_break(%r12) - .endm - - .macro REENABLE_IRQS - stg %r8,__LC_RETURN_PSW - ni __LC_RETURN_PSW,0xbf - ssm __LC_RETURN_PSW - .endm - - .macro STCK savearea -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - .insn s,0xb27c0000,\savearea # store clock fast -#else - .insn s,0xb2050000,\savearea # store clock -#endif - .endm - - .section .kprobes.text, "ax" - -/* - * Scheduler resume function, called by switch_to - * gpr2 = (task_struct *) prev - * gpr3 = (task_struct *) next - * Returns: - * gpr2 = prev - */ -ENTRY(__switch_to) - stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev - lg %r4,__THREAD_info(%r2) # get thread_info of prev - lg %r5,__THREAD_info(%r3) # get thread_info of next - lgr %r15,%r5 - aghi %r15,STACK_INIT # end of kernel stack of next - stg %r3,__LC_CURRENT # store task struct of next - stg %r5,__LC_THREAD_INFO # store thread info of next - stg %r15,__LC_KERNEL_STACK # store end of kernel stack - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next - lg %r15,__THREAD_ksp(%r3) # load kernel stack of next - tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? - jz 0f - ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - oi __TI_flags+7(%r5),_TIF_MCCK_PENDING # set it in next -0: lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - br %r14 - -__critical_start: -/* - * SVC interrupt handler routine. System calls are synchronous events and - * are executed with interrupts enabled. - */ - -ENTRY(system_call) - stpt __LC_SYNC_ENTER_TIMER -sysc_stmg: - stmg %r8,%r15,__LC_SAVE_AREA_SYNC - lg %r10,__LC_LAST_BREAK - lg %r12,__LC_THREAD_INFO -sysc_per: - lg %r15,__LC_KERNEL_STACK - la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs -sysc_vtime: - UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER - LAST_BREAK %r13 - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC -sysc_do_svc: - oi __TI_flags+7(%r12),_TIF_SYSCALL - lg %r10,__TI_sysc_table(%r12) # address of system call table - llgh %r8,__PT_INT_CODE+2(%r11) - slag %r8,%r8,2 # shift and test for svc 0 - jnz sysc_nr_ok - # svc 0: system call number in %r1 - llgfr %r1,%r1 # clear high word in r1 - cghi %r1,NR_syscalls - jnl sysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) - slag %r8,%r1,2 -sysc_nr_ok: - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stg %r2,__PT_ORIG_GPR2(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lgf %r9,0(%r8,%r10) # get system call add. - tm __TI_flags+6(%r12),_TIF_TRACE >> 8 - jnz sysc_tracesys - basr %r14,%r9 # call sys_xxxx - stg %r2,__PT_R2(%r11) # store return value - -sysc_return: - LOCKDEP_SYS_EXIT -sysc_tif: - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno sysc_restore - tm __TI_flags+7(%r12),_TIF_WORK_SVC - jnz sysc_work # check for work - ni __TI_flags+7(%r12),255-_TIF_SYSCALL -sysc_restore: - lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r10,__PT_R0(%r11) - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) - stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER - lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW -sysc_done: - -# -# One of the work bits is on. Find out which one. -# -sysc_work: - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING - jo sysc_mcck_pending - tm __TI_flags+7(%r12),_TIF_NEED_RESCHED - jo sysc_reschedule - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep - tm __TI_flags+7(%r12),_TIF_SIGPENDING - jo sysc_sigpending - tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME - jo sysc_notify_resume - j sysc_return # beware of critical section cleanup - -# -# _TIF_NEED_RESCHED is set, call schedule -# -sysc_reschedule: - larl %r14,sysc_return - jg schedule - -# -# _TIF_MCCK_PENDING is set, call handler -# -sysc_mcck_pending: - larl %r14,sysc_return - jg s390_handle_mcck # TIF bit will be cleared by handler - -# -# _TIF_SIGPENDING is set, call do_signal -# -sysc_sigpending: - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - tm __TI_flags+7(%r12),_TIF_SYSCALL - jno sysc_return - lmg %r2,%r7,__PT_R2(%r11) # load svc arguments - lghi %r8,0 # svc 0 returns -ENOSYS - llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number - cghi %r1,NR_syscalls - jnl sysc_nr_ok # invalid svc number -> do svc 0 - slag %r8,%r1,2 - j sysc_nr_ok # restart svc - -# -# _TIF_NOTIFY_RESUME is set, call do_notify_resume -# -sysc_notify_resume: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,sysc_return - jg do_notify_resume - -# -# _TIF_PER_TRAP is set, call do_per_trap -# -sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,sysc_return - jg do_per_trap - -# -# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before -# and after the system call -# -sysc_tracesys: - lgr %r2,%r11 # pass pointer to pt_regs - la %r3,0 - llgh %r0,__PT_INT_CODE+2(%r11) - stg %r0,__PT_R2(%r11) - brasl %r14,do_syscall_trace_enter - lghi %r0,NR_syscalls - clgr %r0,%r2 - jnh sysc_tracenogo - sllg %r8,%r2,2 - lgf %r9,0(%r8,%r10) -sysc_tracego: - lmg %r3,%r7,__PT_R3(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lg %r2,__PT_ORIG_GPR2(%r11) - basr %r14,%r9 # call sys_xxx - stg %r2,__PT_R2(%r11) # store return value -sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_TRACE >> 8 - jz sysc_return - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,sysc_return - jg do_syscall_trace_exit - -# -# a new process exits the kernel with ret_from_fork -# -ENTRY(ret_from_fork) - la %r11,STACK_FRAME_OVERHEAD(%r15) - lg %r12,__LC_THREAD_INFO - brasl %r14,schedule_tail - TRACE_IRQS_ON - ssm __LC_SVC_NEW_PSW # reenable interrupts - tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? - jne sysc_tracenogo - # it's a kernel thread - lmg %r9,%r10,__PT_R9(%r11) # load gprs -ENTRY(kernel_thread_starter) - la %r2,0(%r10) - basr %r14,%r9 - j sysc_tracenogo - -/* - * Program check handler routine - */ - -ENTRY(pgm_check_handler) - stpt __LC_SYNC_ENTER_TIMER - stmg %r8,%r15,__LC_SAVE_AREA_SYNC - lg %r10,__LC_LAST_BREAK - lg %r12,__LC_THREAD_INFO - larl %r13,system_call - lmg %r8,%r9,__LC_PGM_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,1 - tmhh %r8,0x0001 # test problem state bit - jnz 1f # -> fault in user space - tmhh %r8,0x4000 # PER bit set in old PSW ? - jnz 0f # -> enabled, can't be a double fault - tm __LC_PGM_ILC+3,0x80 # check for per exception - jnz pgm_svcper # -> single stepped svc -0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 2f -1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER - LAST_BREAK %r14 - lg %r15,__LC_KERNEL_STACK - lg %r14,__TI_task(%r12) - lghi %r13,__LC_PGM_TDB - tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 2f - mvc __THREAD_trap_tdb(256,%r14),0(%r13) -2: la %r11,STACK_FRAME_OVERHEAD(%r15) - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - stmg %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC - mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE - stg %r10,__PT_ARGS(%r11) - tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 0f - tmhh %r8,0x0001 # kernel per event ? - jz pgm_kprobe - oi __TI_flags+7(%r12),_TIF_PER_TRAP - mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS - mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE - mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID -0: REENABLE_IRQS - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - larl %r1,pgm_check_table - llgh %r10,__PT_INT_CODE+2(%r11) - nill %r10,0x007f - sll %r10,2 - je sysc_return - lgf %r1,0(%r10,%r1) # load address of handler routine - lgr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # branch to interrupt-handler - j sysc_return - -# -# PER event in supervisor state, must be kprobes -# -pgm_kprobe: - REENABLE_IRQS - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_per_trap - j sysc_return - -# -# single stepped system call -# -pgm_svcper: - oi __TI_flags+7(%r12),_TIF_PER_TRAP - mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW - larl %r14,sysc_per - stg %r14,__LC_RETURN_PSW+8 - lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs - -/* - * IO interrupt handler routine - */ -ENTRY(io_int_handler) - STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lg %r10,__LC_LAST_BREAK - lg %r12,__LC_THREAD_INFO - larl %r13,system_call - lmg %r8,%r9,__LC_IO_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,2 - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT - tmhh %r8,0x0001 # interrupting from user? - jz io_skip - UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER - LAST_BREAK %r14 -io_skip: - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - stmg %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - TRACE_IRQS_OFF - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -io_loop: - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_IRQ - tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR - jz io_return - tpi 0 - jz io_return - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - j io_loop -io_return: - LOCKDEP_SYS_EXIT - TRACE_IRQS_ON -io_tif: - tm __TI_flags+7(%r12),_TIF_WORK_INT - jnz io_work # there is work to do (signals etc.) -io_restore: - lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r10,__PT_R0(%r11) - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) - stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER - lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW -io_done: - -# -# There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK_INT work -# 2) if we return to kernel code and kvm is enabled check if we need to -# modify the psw to leave SIE -# 3) if we return to kernel code and preemptive scheduling is enabled check -# the preemption counter and if it is zero call preempt_schedule_irq -# Before any work can be done, a switch to the kernel stack is required. -# -io_work: - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jo io_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPT - # check for preemptive scheduling - icm %r0,15,__TI_precount(%r12) - jnz io_restore # preemption is disabled - tm __TI_flags+7(%r12),_TIF_NEED_RESCHED - jno io_restore - # switch to kernel stack - lg %r1,__PT_R15(%r11) - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - # TRACE_IRQS_ON already done at io_return, call - # TRACE_IRQS_OFF to keep things symmetrical - TRACE_IRQS_OFF - brasl %r14,preempt_schedule_irq - j io_return -#else - j io_restore -#endif - -# -# Need to do work before returning to userspace, switch to kernel stack -# -io_work_user: - lg %r1,__LC_KERNEL_STACK - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - -# -# One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING -# -io_work_tif: - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING - jo io_mcck_pending - tm __TI_flags+7(%r12),_TIF_NEED_RESCHED - jo io_reschedule - tm __TI_flags+7(%r12),_TIF_SIGPENDING - jo io_sigpending - tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME - jo io_notify_resume - j io_return # beware of critical section cleanup - -# -# _TIF_MCCK_PENDING is set, call handler -# -io_mcck_pending: - # TRACE_IRQS_ON already done at io_return - brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler - TRACE_IRQS_OFF - j io_return - -# -# _TIF_NEED_RESCHED is set, call schedule -# -io_reschedule: - # TRACE_IRQS_ON already done at io_return - ssm __LC_SVC_NEW_PSW # reenable interrupts - brasl %r14,schedule # call scheduler - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF - j io_return - -# -# _TIF_SIGPENDING or is set, call do_signal -# -io_sigpending: - # TRACE_IRQS_ON already done at io_return - ssm __LC_SVC_NEW_PSW # reenable interrupts - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF - j io_return - -# -# _TIF_NOTIFY_RESUME or is set, call do_notify_resume -# -io_notify_resume: - # TRACE_IRQS_ON already done at io_return - ssm __LC_SVC_NEW_PSW # reenable interrupts - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_notify_resume - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF - j io_return - -/* - * External interrupt handler routine - */ -ENTRY(ext_int_handler) - STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lg %r10,__LC_LAST_BREAK - lg %r12,__LC_THREAD_INFO - larl %r13,system_call - lmg %r8,%r9,__LC_EXT_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,3 - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT - tmhh %r8,0x0001 # interrupting from user ? - jz ext_skip - UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER - LAST_BREAK %r14 -ext_skip: - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - stmg %r8,%r9,__PT_PSW(%r11) - lghi %r1,__LC_EXT_PARAMS2 - mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR - mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS - mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) - TRACE_IRQS_OFF - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_extint - j io_return - -/* - * Load idle PSW. The second "half" of this function is in cleanup_idle. - */ -ENTRY(psw_idle) - stg %r3,__SF_EMPTY(%r15) - larl %r1,psw_idle_lpsw+4 - stg %r1,__SF_EMPTY+8(%r15) - STCK __CLOCK_IDLE_ENTER(%r2) - stpt __TIMER_IDLE_ENTER(%r2) -psw_idle_lpsw: - lpswe __SF_EMPTY(%r15) - br %r14 -psw_idle_end: - -__critical_end: - -/* - * Machine check handler routines - */ -ENTRY(mcck_int_handler) - STCK __LC_MCCK_CLOCK - la %r1,4095 # revalidate r1 - spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs - lg %r10,__LC_LAST_BREAK - lg %r12,__LC_THREAD_INFO - larl %r13,system_call - lmg %r8,%r9,__LC_MCK_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,4 - tm __LC_MCCK_CODE,0x80 # system damage? - jo mcck_panic # yes -> rest of mcck code invalid - lghi %r14,__LC_CPU_TIMER_SAVE_AREA - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) - tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - jo 3f - la %r14,__LC_SYNC_ENTER_TIMER - clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - jl 0f - la %r14,__LC_ASYNC_ENTER_TIMER -0: clc 0(8,%r14),__LC_EXIT_TIMER - jl 1f - la %r14,__LC_EXIT_TIMER -1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - jl 2f - la %r14,__LC_LAST_UPDATE_TIMER -2: spt 0(%r14) - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - jno mcck_panic # no -> skip cleanup critical - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT - tm %r8,0x0001 # interrupting from user ? - jz mcck_skip - UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER - LAST_BREAK %r14 -mcck_skip: - lghi %r14,__LC_GPREGS_SAVE_AREA+64 - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),0(%r14) - stmg %r8,%r9,__PT_PSW(%r11) - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,s390_do_machine_check - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno mcck_return - lg %r1,__LC_KERNEL_STACK # switch to kernel stack - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING - jno mcck_return - TRACE_IRQS_OFF - brasl %r14,s390_handle_mcck - TRACE_IRQS_ON -mcck_return: - lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r10,__PT_R0(%r11) - mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW - tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? - jno 0f - stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER -0: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_MCCK_PSW - -mcck_panic: - lg %r14,__LC_PANIC_STACK - slgr %r14,%r15 - srag %r14,%r14,PAGE_SHIFT - jz 0f - lg %r15,__LC_PANIC_STACK -0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j mcck_skip - -# -# PSW restart interrupt handler -# -ENTRY(restart_int_handler) - stg %r15,__LC_SAVE_AREA_RESTART - lg %r15,__LC_RESTART_STACK - aghi %r15,-__PT_SIZE # create pt_regs on stack - xc 0(__PT_SIZE,%r15),0(%r15) - stmg %r0,%r14,__PT_R0(%r15) - mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART - mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw - aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack - xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) - lg %r1,__LC_RESTART_FN # load fn, parm & source cpu - lg %r2,__LC_RESTART_DATA - lg %r3,__LC_RESTART_SOURCE - ltgr %r3,%r3 # test source cpu address - jm 1f # negative -> skip source stop -0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu - brc 10,0b # wait for status stored -1: basr %r14,%r1 # call function - stap __SF_EMPTY(%r15) # store cpu address - llgh %r3,__SF_EMPTY(%r15) -2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu - brc 2,2b -3: j 3b - - .section .kprobes.text, "ax" - -#ifdef CONFIG_CHECK_STACK -/* - * The synchronous or the asynchronous stack overflowed. We are dead. - * No need to properly save the registers, we are going to panic anyway. - * Setup a pt_regs so that show_trace can provide a good call trace. - */ -stack_overflow: - lg %r15,__LC_PANIC_STACK # change to panic stack - la %r11,STACK_FRAME_OVERHEAD(%r15) - stmg %r0,%r7,__PT_R0(%r11) - stmg %r8,%r9,__PT_PSW(%r11) - mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lgr %r2,%r11 # pass pointer to pt_regs - jg kernel_stack_overflow -#endif - - .align 8 -cleanup_table: - .quad system_call - .quad sysc_do_svc - .quad sysc_tif - .quad sysc_restore - .quad sysc_done - .quad io_tif - .quad io_restore - .quad io_done - .quad psw_idle - .quad psw_idle_end - -cleanup_critical: - clg %r9,BASED(cleanup_table) # system_call - jl 0f - clg %r9,BASED(cleanup_table+8) # sysc_do_svc - jl cleanup_system_call - clg %r9,BASED(cleanup_table+16) # sysc_tif - jl 0f - clg %r9,BASED(cleanup_table+24) # sysc_restore - jl cleanup_sysc_tif - clg %r9,BASED(cleanup_table+32) # sysc_done - jl cleanup_sysc_restore - clg %r9,BASED(cleanup_table+40) # io_tif - jl 0f - clg %r9,BASED(cleanup_table+48) # io_restore - jl cleanup_io_tif - clg %r9,BASED(cleanup_table+56) # io_done - jl cleanup_io_restore - clg %r9,BASED(cleanup_table+64) # psw_idle - jl 0f - clg %r9,BASED(cleanup_table+72) # psw_idle_end - jl cleanup_idle -0: br %r14 - - -cleanup_system_call: - # check if stpt has been executed - clg %r9,BASED(cleanup_system_call_insn) - jh 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER -0: # check if stmg has been executed - clg %r9,BASED(cleanup_system_call_insn+8) - jh 0f - mvc __LC_SAVE_AREA_SYNC(64),0(%r11) -0: # check if base register setup + TIF bit load has been done - clg %r9,BASED(cleanup_system_call_insn+16) - jhe 0f - # set up saved registers r10 and r12 - stg %r10,16(%r11) # r10 last break - stg %r12,32(%r11) # r12 thread-info pointer -0: # check if the user time update has been done - clg %r9,BASED(cleanup_system_call_insn+24) - jh 0f - lg %r15,__LC_EXIT_TIMER - slg %r15,__LC_SYNC_ENTER_TIMER - alg %r15,__LC_USER_TIMER - stg %r15,__LC_USER_TIMER -0: # check if the system time update has been done - clg %r9,BASED(cleanup_system_call_insn+32) - jh 0f - lg %r15,__LC_LAST_UPDATE_TIMER - slg %r15,__LC_EXIT_TIMER - alg %r15,__LC_SYSTEM_TIMER - stg %r15,__LC_SYSTEM_TIMER -0: # update accounting time stamp - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - # do LAST_BREAK - lg %r9,16(%r11) - srag %r9,%r9,23 - jz 0f - mvc __TI_last_break(8,%r12),16(%r11) -0: # set up saved register r11 - lg %r15,__LC_KERNEL_STACK - la %r9,STACK_FRAME_OVERHEAD(%r15) - stg %r9,24(%r11) # r11 pt_regs pointer - # fill pt_regs - mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC - stmg %r0,%r7,__PT_R0(%r9) - mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC - # setup saved register r15 - stg %r15,56(%r11) # r15 stack pointer - # set new psw address and exit - larl %r9,sysc_do_svc - br %r14 -cleanup_system_call_insn: - .quad system_call - .quad sysc_stmg - .quad sysc_per - .quad sysc_vtime+18 - .quad sysc_vtime+42 - -cleanup_sysc_tif: - larl %r9,sysc_tif - br %r14 - -cleanup_sysc_restore: - clg %r9,BASED(cleanup_sysc_restore_insn) - je 0f - lg %r9,24(%r11) # get saved pointer to pt_regs - mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - mvc 0(64,%r11),__PT_R8(%r9) - lmg %r0,%r7,__PT_R0(%r9) -0: lmg %r8,%r9,__LC_RETURN_PSW - br %r14 -cleanup_sysc_restore_insn: - .quad sysc_done - 4 - -cleanup_io_tif: - larl %r9,io_tif - br %r14 - -cleanup_io_restore: - clg %r9,BASED(cleanup_io_restore_insn) - je 0f - lg %r9,24(%r11) # get saved r11 pointer to pt_regs - mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - mvc 0(64,%r11),__PT_R8(%r9) - lmg %r0,%r7,__PT_R0(%r9) -0: lmg %r8,%r9,__LC_RETURN_PSW - br %r14 -cleanup_io_restore_insn: - .quad io_done - 4 - -cleanup_idle: - # copy interrupt clock & cpu timer - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER -0: # check if stck & stpt have been executed - clg %r9,BASED(cleanup_idle_insn) - jhe 1f - mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) - mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) -1: # account system time going idle - lg %r9,__LC_STEAL_TIMER - alg %r9,__CLOCK_IDLE_ENTER(%r2) - slg %r9,__LC_LAST_UPDATE_CLOCK - stg %r9,__LC_STEAL_TIMER - mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) - lg %r9,__LC_SYSTEM_TIMER - alg %r9,__LC_LAST_UPDATE_TIMER - slg %r9,__TIMER_IDLE_ENTER(%r2) - stg %r9,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) - # prepare return psw - nihh %r8,0xfffd # clear wait state bit - lg %r9,48(%r11) # return from psw_idle - br %r14 -cleanup_idle_insn: - .quad psw_idle_lpsw - -/* - * Integer constants - */ - .align 8 -.Lcritical_start: - .quad __critical_start -.Lcritical_length: - .quad __critical_end - __critical_start - - -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) -/* - * sie64a calling convention: - * %r2 pointer to sie control block - * %r3 guest register save area - */ -ENTRY(sie64a) - stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers - stg %r2,__SF_EMPTY(%r15) # save control block pointer - stg %r3,__SF_EMPTY+8(%r15) # save guest register save area - xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason - lmg %r0,%r13,0(%r3) # load guest gprs 0-13 - lg %r14,__LC_GMAP # get gmap pointer - ltgr %r14,%r14 - jz sie_gmap - lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce -sie_gmap: - lg %r14,__SF_EMPTY(%r15) # get control block pointer - oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now - tm __SIE_PROG20+3(%r14),1 # last exit... - jnz sie_done - LPP __SF_EMPTY(%r15) # set guest id - sie 0(%r14) -sie_done: - LPP __SF_EMPTY+16(%r15) # set host id - ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce -# some program checks are suppressing. C code (e.g. do_protection_exception) -# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions beween sie64a and sie_done should not cause program -# interrupts. So lets use a nop (47 00 00 00) as a landing pad. -# See also HANDLE_SIE_INTERCEPT -rewind_pad: - nop 0 - .globl sie_exit -sie_exit: - lg %r14,__SF_EMPTY+8(%r15) # load guest register save area - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 - lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lg %r2,__SF_EMPTY+24(%r15) # return exit reason code - br %r14 -sie_fault: - lghi %r14,-EFAULT - stg %r14,__SF_EMPTY+24(%r15) # set exit reason code - j sie_exit - - .align 8 -.Lsie_critical: - .quad sie_gmap -.Lsie_critical_length: - .quad sie_done - sie_gmap - - EX_TABLE(rewind_pad,sie_fault) - EX_TABLE(sie_exit,sie_fault) -#endif - - .section .rodata, "a" -#define SYSCALL(esa,esame,emu) .long esame - .globl sys_call_table -sys_call_table: -#include "syscalls.S" -#undef SYSCALL - -#ifdef CONFIG_COMPAT - -#define SYSCALL(esa,esame,emu) .long emu - .globl sys_call_table_emu -sys_call_table_emu: -#include "syscalls.S" -#undef SYSCALL -#endif diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c new file mode 100644 index 000000000000..d028b0be5c1d --- /dev/null +++ b/arch/s390/kernel/facility.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2023 + */ + +#include <linux/export.h> +#include <asm/facility.h> + +unsigned int stfle_size(void) +{ + static unsigned int size; + unsigned int r; + u64 dummy; + + r = READ_ONCE(size); + if (!r) { + r = __stfle_asm(&dummy, 1) + 1; + WRITE_ONCE(size, r); + } + return r; +} +EXPORT_SYMBOL(stfle_size); diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c new file mode 100644 index 000000000000..03a8973aec3c --- /dev/null +++ b/arch/s390/kernel/fpu.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * In-kernel vector facility support functions + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <linux/sched.h> +#include <asm/fpu.h> + +void __kernel_fpu_begin(struct kernel_fpu *state, int flags) +{ + __vector128 *vxrs = state->vxrs; + int mask; + + /* + * Limit the save to the FPU/vector registers already + * in use by the previous context. + */ + flags &= state->hdr.mask; + if (flags & KERNEL_FPC) + fpu_stfpc(&state->hdr.fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_LOW) + save_fp_regs_vx(vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + vxrs += fpu_vstm(0, 15, vxrs); + vxrs += fpu_vstm(16, 31, vxrs); + return; + } + if (mask == KERNEL_VXR_MID) { + vxrs += fpu_vstm(8, 23, vxrs); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + vxrs += fpu_vstm(0, 15, vxrs); + else if (mask == KERNEL_VXR_V0V7) + vxrs += fpu_vstm(0, 7, vxrs); + else + vxrs += fpu_vstm(8, 15, vxrs); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + vxrs += fpu_vstm(16, 31, vxrs); + else if (mask == KERNEL_VXR_V16V23) + vxrs += fpu_vstm(16, 23, vxrs); + else + vxrs += fpu_vstm(24, 31, vxrs); + } +} +EXPORT_SYMBOL(__kernel_fpu_begin); + +void __kernel_fpu_end(struct kernel_fpu *state, int flags) +{ + __vector128 *vxrs = state->vxrs; + int mask; + + /* + * Limit the restore to the FPU/vector registers of the + * previous context that have been overwritten by the + * current context. + */ + flags &= state->hdr.mask; + if (flags & KERNEL_FPC) + fpu_lfpc(&state->hdr.fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_LOW) + load_fp_regs_vx(vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + vxrs += fpu_vlm(0, 15, vxrs); + vxrs += fpu_vlm(16, 31, vxrs); + return; + } + if (mask == KERNEL_VXR_MID) { + vxrs += fpu_vlm(8, 23, vxrs); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + vxrs += fpu_vlm(0, 15, vxrs); + else if (mask == KERNEL_VXR_V0V7) + vxrs += fpu_vlm(0, 7, vxrs); + else + vxrs += fpu_vlm(8, 15, vxrs); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + vxrs += fpu_vlm(16, 31, vxrs); + else if (mask == KERNEL_VXR_V16V23) + vxrs += fpu_vlm(16, 23, vxrs); + else + vxrs += fpu_vlm(24, 31, vxrs); + } +} +EXPORT_SYMBOL(__kernel_fpu_end); + +void load_fpu_state(struct fpu *state, int flags) +{ + __vector128 *vxrs = &state->vxrs[0]; + int mask; + + if (flags & KERNEL_FPC) + fpu_lfpc_safe(&state->fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_V0V7) + load_fp_regs_vx(state->vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + fpu_vlm(0, 15, &vxrs[0]); + fpu_vlm(16, 31, &vxrs[16]); + return; + } + if (mask == KERNEL_VXR_MID) { + fpu_vlm(8, 23, &vxrs[8]); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + fpu_vlm(0, 15, &vxrs[0]); + else if (mask == KERNEL_VXR_V0V7) + fpu_vlm(0, 7, &vxrs[0]); + else + fpu_vlm(8, 15, &vxrs[8]); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + fpu_vlm(16, 31, &vxrs[16]); + else if (mask == KERNEL_VXR_V16V23) + fpu_vlm(16, 23, &vxrs[16]); + else + fpu_vlm(24, 31, &vxrs[24]); + } +} + +void save_fpu_state(struct fpu *state, int flags) +{ + __vector128 *vxrs = &state->vxrs[0]; + int mask; + + if (flags & KERNEL_FPC) + fpu_stfpc(&state->fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_LOW) + save_fp_regs_vx(state->vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + fpu_vstm(0, 15, &vxrs[0]); + fpu_vstm(16, 31, &vxrs[16]); + return; + } + if (mask == KERNEL_VXR_MID) { + fpu_vstm(8, 23, &vxrs[8]); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + fpu_vstm(0, 15, &vxrs[0]); + else if (mask == KERNEL_VXR_V0V7) + fpu_vstm(0, 7, &vxrs[0]); + else + fpu_vstm(8, 15, &vxrs[8]); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + fpu_vstm(16, 31, &vxrs[16]); + else if (mask == KERNEL_VXR_V16V23) + fpu_vstm(16, 23, &vxrs[16]); + else + fpu_vstm(24, 31, &vxrs[24]); + } +} +EXPORT_SYMBOL(save_fpu_state); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index e3043aef87a9..e94bb98f5231 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -1,10 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Dynamic function tracer architecture backend. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009,2014 * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, - * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ #include <linux/hardirq.h> @@ -12,182 +12,325 @@ #include <linux/ftrace.h> #include <linux/kernel.h> #include <linux/types.h> +#include <linux/kmsan-checks.h> +#include <linux/cpufeature.h> #include <linux/kprobes.h> +#include <linux/execmem.h> #include <trace/syscall.h> #include <asm/asm-offsets.h> +#include <asm/text-patching.h> +#include <asm/cacheflush.h> +#include <asm/ftrace.lds.h> +#include <asm/nospec-branch.h> +#include <asm/set_memory.h> +#include "entry.h" +#include "ftrace.h" -#ifdef CONFIG_DYNAMIC_FTRACE - -void ftrace_disable_code(void); -void ftrace_enable_insn(void); - -#ifdef CONFIG_64BIT -/* - * The 64-bit mcount code looks like this: - * stg %r14,8(%r15) # offset 0 - * > larl %r1,<&counter> # offset 6 - * > brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. The middle two instructions of the mcount - * block get overwritten by ftrace_make_nop / ftrace_make_call. - * The 64-bit enabled ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 - * > lg %r1,__LC_FTRACE_FUNC # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 - * The return points of the mcount/ftrace function have the same offset 18. - * The 64-bit disable ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 - * > jg .+18 # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 - * The jg instruction branches to offset 24 to skip as many instructions - * as possible. - */ -asm( - " .align 4\n" - "ftrace_disable_code:\n" - " jg 0f\n" - " lgr %r0,%r0\n" - " basr %r14,%r1\n" - "0:\n" - " .align 4\n" - "ftrace_enable_insn:\n" - " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); - -#define FTRACE_INSN_SIZE 6 - -#else /* CONFIG_64BIT */ /* - * The 31-bit mcount code looks like this: - * st %r14,4(%r15) # offset 0 - * > bras %r1,0f # offset 4 - * > .long _mcount # offset 8 - * > .long <&counter> # offset 12 - * > 0: l %r14,0(%r1) # offset 16 - * > l %r1,4(%r1) # offset 20 - * basr %r14,%r14 # offset 24 - * l %r14,4(%r15) # offset 26 - * Total length is 30 bytes. The twenty bytes starting from offset 4 - * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. - * The 31-bit enabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > l %r14,__LC_FTRACE_FUNC # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The return points of the mcount/ftrace function have the same offset 26. - * The 31-bit disabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > j .+26 # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The j instruction branches to offset 30 to skip as many instructions - * as possible. + * To generate function prologue either gcc's hotpatch feature (since gcc 4.8) + * or a combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags + * (since gcc 9 / clang 10) is used. + * In both cases the original and also the disabled function prologue contains + * only a single six byte instruction and looks like this: + * > brcl 0,0 # offset 0 + * To enable ftrace the code gets patched like above and afterwards looks + * like this: + * > brasl %r0,ftrace_caller # offset 0 + * + * The instruction will be patched by ftrace_make_call / ftrace_make_nop. + * The ftrace function gets called with a non-standard C function call ABI + * where r0 contains the return address. It is also expected that the called + * function only clobbers r0 and r1, but restores r2-r15. + * For module code we can't directly jump to ftrace caller, but need a + * trampoline (ftrace_plt), which clobbers also r1. */ -asm( - " .align 4\n" - "ftrace_disable_code:\n" - " j 1f\n" - " j 0f\n" - " .fill 12,1,0x07\n" - "0: basr %r14,%r14\n" - "1:\n" - " .align 4\n" - "ftrace_enable_insn:\n" - " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); -#define FTRACE_INSN_SIZE 4 +void *ftrace_func __read_mostly = ftrace_stub; +struct ftrace_insn { + u16 opc; + s32 disp; +} __packed; + +static const char *ftrace_shared_hotpatch_trampoline(const char **end) +{ + const char *tstart, *tend; + + tstart = ftrace_shared_hotpatch_trampoline_br; + tend = ftrace_shared_hotpatch_trampoline_br_end; +#ifdef CONFIG_EXPOLINE + if (!nospec_disable) { + tstart = ftrace_shared_hotpatch_trampoline_exrl; + tend = ftrace_shared_hotpatch_trampoline_exrl_end; + } +#endif /* CONFIG_EXPOLINE */ + if (end) + *end = tend; + return tstart; +} + +bool ftrace_need_init_nop(void) +{ + return !cpu_has_seq_insn(); +} + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline = + __ftrace_hotpatch_trampolines_start; + static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 }; + static struct ftrace_hotpatch_trampoline *trampoline; + struct ftrace_hotpatch_trampoline **next_trampoline; + struct ftrace_hotpatch_trampoline *trampolines_end; + struct ftrace_hotpatch_trampoline tmp; + struct ftrace_insn *insn; + struct ftrace_insn old; + const char *shared; + s32 disp; + + BUILD_BUG_ON(sizeof(struct ftrace_hotpatch_trampoline) != + SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE); -#endif /* CONFIG_64BIT */ + next_trampoline = &next_vmlinux_trampoline; + trampolines_end = __ftrace_hotpatch_trampolines_end; + shared = ftrace_shared_hotpatch_trampoline(NULL); +#ifdef CONFIG_MODULES + if (mod) { + next_trampoline = &mod->arch.next_trampoline; + trampolines_end = mod->arch.trampolines_end; + } +#endif + if (WARN_ON_ONCE(*next_trampoline >= trampolines_end)) + return -ENOMEM; + trampoline = (*next_trampoline)++; + + if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old))) + return -EFAULT; + /* Check for the compiler-generated fentry nop (brcl 0, .). */ + if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old)))) + return -EINVAL; + + /* Generate the trampoline. */ + tmp.brasl_opc = 0xc015; /* brasl %r1, shared */ + tmp.brasl_disp = (shared - (const char *)&trampoline->brasl_opc) / 2; + tmp.interceptor = FTRACE_ADDR; + tmp.rest_of_intercepted_function = rec->ip + sizeof(struct ftrace_insn); + s390_kernel_write(trampoline, &tmp, sizeof(tmp)); + + /* Generate a jump to the trampoline. */ + disp = ((char *)trampoline - (char *)rec->ip) / 2; + insn = (struct ftrace_insn *)rec->ip; + s390_kernel_write(&insn->disp, &disp, sizeof(disp)); + + return 0; +} + +static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec) +{ + struct ftrace_hotpatch_trampoline *trampoline; + struct ftrace_insn insn; + s64 disp; + u16 opc; + + if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn))) + return ERR_PTR(-EFAULT); + disp = (s64)insn.disp * 2; + trampoline = (void *)(rec->ip + disp); + if (get_kernel_nofault(opc, &trampoline->brasl_opc)) + return ERR_PTR(-EFAULT); + if (opc != 0xc015) + return ERR_PTR(-EINVAL); + return trampoline; +} + +static inline struct ftrace_insn +ftrace_generate_branch_insn(unsigned long ip, unsigned long target) +{ + /* brasl r0,target or brcl 0,0 */ + return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004, + .disp = target ? (target - ip) / 2 : 0 }; +} + +static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target, + unsigned long target) +{ + struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target); + struct ftrace_insn new = ftrace_generate_branch_insn(ip, target); + struct ftrace_insn old; + + if (!IS_ALIGNED(ip, 8)) + return -EINVAL; + if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old))) + return -EFAULT; + /* Verify that the to be replaced code matches what we expect. */ + if (memcmp(&orig, &old, sizeof(old))) + return -EINVAL; + s390_kernel_write((void *)ip, &new, sizeof(new)); + return 0; +} + +static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec, + unsigned long old_addr, + unsigned long addr) +{ + struct ftrace_hotpatch_trampoline *trampoline; + u64 old; + + trampoline = ftrace_get_trampoline(rec); + if (IS_ERR(trampoline)) + return PTR_ERR(trampoline); + if (get_kernel_nofault(old, &trampoline->interceptor)) + return -EFAULT; + if (old != old_addr) + return -EINVAL; + s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr)); + return 0; +} + +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + if (cpu_has_seq_insn()) + return ftrace_patch_branch_insn(rec->ip, old_addr, addr); + else + return ftrace_modify_trampoline_call(rec, old_addr, addr); +} + +static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) +{ + u16 old; + u8 op; + + if (get_kernel_nofault(old, addr)) + return -EFAULT; + if (old != expected) + return -EINVAL; + /* set mask field to all ones or zeroes */ + op = enable ? 0xf4 : 0x04; + s390_kernel_write((char *)addr + 1, &op, sizeof(op)); + return 0; +} int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, - MCOUNT_INSN_SIZE)) - return -EPERM; - return 0; + /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */ + if (cpu_has_seq_insn()) + return ftrace_patch_branch_insn(rec->ip, addr, 0); + else + return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); +} + +static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr) +{ + struct ftrace_hotpatch_trampoline *trampoline; + + trampoline = ftrace_get_trampoline(rec); + if (IS_ERR(trampoline)) + return PTR_ERR(trampoline); + s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr)); + /* Expect brcl 0x0,... */ + return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true); } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn, - FTRACE_INSN_SIZE)) - return -EPERM; - return 0; + if (cpu_has_seq_insn()) + return ftrace_patch_branch_insn(rec->ip, 0, addr); + else + return ftrace_make_trampoline_call(rec, addr); } int ftrace_update_ftrace_func(ftrace_func_t func) { + ftrace_func = func; return 0; } -int __init ftrace_dyn_arch_init(void *data) +void arch_ftrace_update_code(int command) { - *(unsigned long *) data = 0; - return 0; + ftrace_modify_all_code(command); } -#endif /* CONFIG_DYNAMIC_FTRACE */ +void ftrace_arch_code_modify_post_process(void) +{ + /* + * Flush any pre-fetched instructions on all + * CPUs to make the new code visible. + */ + text_poke_sync_lock(); +} #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* - * Hook the return address and push it in the stack of return addresses - * in current thread info. - */ -unsigned long __kprobes prepare_ftrace_return(unsigned long parent, - unsigned long ip) + +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct ftrace_graph_ent trace; + unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; + if (unlikely(ftrace_graph_is_dead())) + return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) + *parent = (unsigned long)&return_to_handler; +} + +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_KPROBES_ON_FTRACE +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct ftrace_regs *fregs) +{ + struct kprobe_ctlblk *kcb; + struct pt_regs *regs; + struct kprobe *p; + int bit; + + if (unlikely(kprobe_ftrace_disabled)) + return; + + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) + return; + + kmsan_unpoison_memory(fregs, ftrace_regs_size()); + regs = ftrace_get_regs(fregs); + p = get_kprobe((kprobe_opcode_t *)ip); + if (!regs || unlikely(!p) || kprobe_disabled(p)) goto out; - ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) - goto out; - trace.func = ip; - /* Only trace if the calling function expects to. */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; + + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); goto out; } - parent = (unsigned long) return_to_handler; -out: - return parent; -} -#ifdef CONFIG_DYNAMIC_FTRACE -/* - * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative and save to prepare_ftrace_return. To disable - * the call to prepare_ftrace_return we patch the bras offset to point - * directly after the instructions. To enable the call we calculate - * the original offset to prepare_ftrace_return and put it back. - */ -int ftrace_enable_ftrace_graph_caller(void) -{ - unsigned short offset; + __this_cpu_write(current_kprobe, p); - offset = ((void *) prepare_ftrace_return - - (void *) ftrace_graph_caller) / 2; - return probe_kernel_write(ftrace_graph_caller + 2, - &offset, sizeof(offset)); -} + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; -int ftrace_disable_ftrace_graph_caller(void) -{ - static unsigned short offset = 0x0002; + instruction_pointer_set(regs, ip); + + if (!p->pre_handler || !p->pre_handler(p, regs)) { - return probe_kernel_write(ftrace_graph_caller + 2, - &offset, sizeof(offset)); + instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE); + + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + } + __this_cpu_write(current_kprobe, NULL); +out: + ftrace_test_recursion_unlock(bit); } +NOKPROBE_SYMBOL(kprobe_ftrace_handler); -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + return 0; +} +#endif diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h new file mode 100644 index 000000000000..23337065f402 --- /dev/null +++ b/arch/s390/kernel/ftrace.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _FTRACE_H +#define _FTRACE_H + +#include <asm/types.h> + +struct ftrace_hotpatch_trampoline { + u16 brasl_opc; + s32 brasl_disp; + s16: 16; + u64 rest_of_intercepted_function; + u64 interceptor; +} __packed; + +extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[]; +extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[]; +extern const char ftrace_shared_hotpatch_trampoline_br[]; +extern const char ftrace_shared_hotpatch_trampoline_br_end[]; +extern const char ftrace_shared_hotpatch_trampoline_exrl[]; +extern const char ftrace_shared_hotpatch_trampoline_exrl_end[]; + +#endif /* _FTRACE_H */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..cf26d7a37425 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <asm/guarded_storage.h> +#include "entry.h" + +void guarded_storage_release(struct task_struct *tsk) +{ + kfree(tsk->thread.gs_cb); + kfree(tsk->thread.gs_bc_cb); +} + +static int gs_enable(void) +{ + struct gs_cb *gs_cb; + + if (!current->thread.gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + gs_cb->gsd = 25; + preempt_disable(); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + preempt_enable(); + } + return 0; +} + +static int gs_disable(void) +{ + if (current->thread.gs_cb) { + preempt_disable(); + kfree(current->thread.gs_cb); + current->thread.gs_cb = NULL; + local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT); + preempt_enable(); + } + return 0; +} + +static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + if (!gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + current->thread.gs_bc_cb = gs_cb; + } + if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) + return -EFAULT; + return 0; +} + +static int gs_clear_bc_cb(void) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + current->thread.gs_bc_cb = NULL; + kfree(gs_cb); + return 0; +} + +void gs_load_bc_cb(struct pt_regs *regs) +{ + struct gs_cb *gs_cb; + + preempt_disable(); + clear_thread_flag(TIF_GUARDED_STORAGE); + gs_cb = current->thread.gs_bc_cb; + if (gs_cb) { + kfree(current->thread.gs_cb); + current->thread.gs_bc_cb = NULL; + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + } + preempt_enable(); +} + +static int gs_broadcast(void) +{ + struct task_struct *sibling; + + read_lock(&tasklist_lock); + for_each_thread(current, sibling) { + if (!sibling->thread.gs_bc_cb) + continue; + if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) + kick_process(sibling); + } + read_unlock(&tasklist_lock); + return 0; +} + +SYSCALL_DEFINE2(s390_guarded_storage, int, command, + struct gs_cb __user *, gs_cb) +{ + if (!cpu_has_gs()) + return -EOPNOTSUPP; + switch (command) { + case GS_ENABLE: + return gs_enable(); + case GS_DISABLE: + return gs_disable(); + case GS_SET_BC_CB: + return gs_set_bc_cb(gs_cb); + case GS_CLEAR_BC_CB: + return gs_clear_bc_cb(); + case GS_BROADCAST: + return gs_broadcast(); + default: + return -EINVAL; + } +} diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index fd8db63dfc94..7edb9ded199c 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -1,501 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright IBM Corp. 1999, 2010 * - * Author(s): Hartmut Penner <hp@de.ibm.com> - * Martin Schwidefsky <schwidefsky@de.ibm.com> - * Rob van der Heij <rvdhei@iae.nl> - * Heiko Carstens <heiko.carstens@de.ibm.com> - * - * There are 5 different IPL methods - * 1) load the image directly into ram at address 0 and do an PSW restart - * 2) linload will load the image from address 0x10000 to memory 0x10000 - * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated) - * 3) generate the tape ipl header, store the generated image on a tape - * and ipl from it - * In case of SL tape you need to IPL 5 times to get past VOL1 etc - * 4) generate the vm reader ipl header, move the generated image to the - * VM reader (use option NOH!) and do a ipl from reader (VM only) - * 5) direct call of start by the SALIPL loader - * We use the cpuid to distinguish between VM and native ipl - * params for kernel are pushed to 0x10400 (see setup.h) + * Author(s): Hartmut Penner <hp@de.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Rob van der Heij <rvdhei@iae.nl> * */ #include <linux/init.h> #include <linux/linkage.h> +#include <asm/lowcore.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> - -#ifdef CONFIG_64BIT -#define ARCH_OFFSET 4 -#else -#define ARCH_OFFSET 0 -#endif +#include <asm/ptrace.h> __HEAD - -#define IPL_BS 0x730 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x02000018,0x60000050 # by ipl to addresses 0-23. - .long 0x02000068,0x60000050 # (a PSW and two CCWs). - .fill 80-24,1,0x40 # bytes 24-79 are discarded !! - .long 0x020000f0,0x60000050 # The next 160 byte are loaded - .long 0x02000140,0x60000050 # to addresses 0x18-0xb7 - .long 0x02000190,0x60000050 # They form the continuation - .long 0x020001e0,0x60000050 # of the CCW program started - .long 0x02000230,0x60000050 # by ipl and load the range - .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image - .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730 - .long 0x02000320,0x60000050 # in memory. At the end of - .long 0x02000370,0x60000050 # the channel program the PSW - .long 0x020003c0,0x60000050 # at location 0 is loaded. - .long 0x02000410,0x60000050 # Initial processing starts - .long 0x02000460,0x60000050 # at 0x200 = iplstart. - .long 0x020004b0,0x60000050 - .long 0x02000500,0x60000050 - .long 0x02000550,0x60000050 - .long 0x020005a0,0x60000050 - .long 0x020005f0,0x60000050 - .long 0x02000640,0x60000050 - .long 0x02000690,0x60000050 - .long 0x020006e0,0x20000050 - - .org 0x200 -# -# subroutine to set architecture mode -# -.Lsetmode: -#ifdef CONFIG_64BIT - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode - bras %r13,0f - .fill 16,4,0x0 -0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs - sam31 # switch to 31 bit addressing mode -#else - mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) -#endif - br %r14 - -# -# subroutine to wait for end I/O -# -.Lirqwait: -#ifdef CONFIG_64BIT - mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw -.Lioint: - br %r14 - .align 8 -.Lnewpsw: - .quad 0x0000000080000000,.Lioint -#else - mvc 0x78(8),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw -.Lioint: - br %r14 - .align 8 -.Lnewpsw: - .long 0x00080000,0x80000000+.Lioint -#endif -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - -# -# subroutine for loading cards from the reader -# -.Lloader: - la %r4,0(%r14) - la %r3,.Lorb # r2 = address of orb into r2 - la %r5,.Lirb # r4 = address of irb - la %r6,.Lccws - la %r7,20 -.Linit: - st %r2,4(%r6) # initialize CCW data addresses - la %r2,0x50(%r2) - la %r6,8(%r6) - bct 7,.Linit - - lctl %c6,%c6,.Lcr6 # set IO subclass mask - slr %r2,%r2 -.Lldlp: - ssch 0(%r3) # load chunk of 1600 bytes - bnz .Llderr -.Lwait4irq: - bas %r14,.Lirqwait - c %r1,0xb8 # compare subchannel number - bne .Lwait4irq - tsch 0(%r5) - - slr %r0,%r0 - ic %r0,8(%r5) # get device status - chi %r0,8 # channel end ? - be .Lcont - chi %r0,12 # channel end + device end ? - be .Lcont - - l %r0,4(%r5) - s %r0,8(%r3) # r0/8 = number of ccws executed - mhi %r0,10 # *10 = number of bytes in ccws - lh %r3,10(%r5) # get residual count - sr %r0,%r3 # #ccws*80-residual=#bytes read - ar %r2,%r0 - - br %r4 # r2 contains the total size - -.Lcont: - ahi %r2,0x640 # add 0x640 to total size - la %r6,.Lccws - la %r7,20 -.Lincr: - l %r0,4(%r6) # update CCW data addresses - ahi %r0,0x640 - st %r0,4(%r6) - ahi %r6,8 - bct 7,.Lincr - - b .Lldlp -.Llderr: - lpsw .Lcrash - - .align 8 -.Lorb: .long 0x00000000,0x0080ff00,.Lccws -.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 -.Lloadp:.long 0,0 - .align 8 -.Lcrash:.long 0x000a0000,0x00000000 - - .align 8 -.Lccws: .rept 19 - .long 0x02600050,0x00000000 - .endr - .long 0x02200050,0x00000000 - -iplstart: - bas %r14,.Lsetmode # Immediately switch to 64 bit mode - lh %r1,0xb8 # test if subchannel number - bct %r1,.Lnoload # is valid - l %r1,0xb8 # load ipl subchannel number - la %r2,IPL_BS # load start address - bas %r14,.Lloader # load rest of ipl image - l %r12,.Lparm # pointer to parameter area - st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number - -# -# load parameter file from ipl device -# -.Lagain1: - l %r2,.Linitrd # ramdisk loc. is temp - bas %r14,.Lloader # load parameter file - ltr %r2,%r2 # got anything ? - bz .Lnopf - chi %r2,895 - bnh .Lnotrunc - la %r2,895 -.Lnotrunc: - l %r4,.Linitrd - clc 0(3,%r4),.L_hdr # if it is HDRx - bz .Lagain1 # skip dataset header - clc 0(3,%r4),.L_eof # if it is EOFx - bz .Lagain1 # skip dateset trailer - la %r5,0(%r4,%r2) - lr %r3,%r2 - la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line - mvc 0(256,%r3),0(%r4) - mvc 256(256,%r3),256(%r4) - mvc 512(256,%r3),512(%r4) - mvc 768(122,%r3),768(%r4) - slr %r0,%r0 - b .Lcntlp -.Ldelspc: - ic %r0,0(%r2,%r3) - chi %r0,0x20 # is it a space ? - be .Lcntlp - ahi %r2,1 - b .Leolp -.Lcntlp: - brct %r2,.Ldelspc -.Leolp: - slr %r0,%r0 - stc %r0,0(%r2,%r3) # terminate buffer -.Lnopf: - -# -# load ramdisk from ipl device -# -.Lagain2: - l %r2,.Linitrd # addr of ramdisk - st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) - bas %r14,.Lloader # load ramdisk - st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd - ltr %r2,%r2 - bnz .Lrdcont - st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found -.Lrdcont: - l %r2,.Linitrd - - clc 0(3,%r2),.L_hdr # skip HDRx and EOFx - bz .Lagain2 - clc 0(3,%r2),.L_eof - bz .Lagain2 - -# -# reset files in VM reader -# - stidp .Lcpuid # store cpuid - tm .Lcpuid,0xff # running VM ? - bno .Lnoreset - la %r2,.Lreset - lhi %r3,26 - diag %r2,%r3,8 - la %r5,.Lirb - stsch 0(%r5) # check if irq is pending - tm 30(%r5),0x0f # by verifying if any of the - bnz .Lwaitforirq # activity or status control - tm 31(%r5),0xff # bits is set in the schib - bz .Lnoreset -.Lwaitforirq: - bas %r14,.Lirqwait # wait for IO interrupt - c %r1,0xb8 # compare subchannel number - bne .Lwaitforirq - la %r5,.Lirb - tsch 0(%r5) -.Lnoreset: - b .Lnoload - -# -# everything loaded, go for it -# -.Lnoload: - l %r1,.Lstartup - br %r1 - -.Linitrd:.long _end # default address of initrd -.Lparm: .long PARMAREA -.Lstartup: .long startup -.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 - .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 - .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" -.L_eof: .long 0xc5d6c600 /* C'EOF' */ -.L_hdr: .long 0xc8c4d900 /* C'HDR' */ - .align 8 -.Lcpuid:.fill 8,1,0 - +SYM_CODE_START(startup_continue) # -# SALIPL loader support. Based on a patch by Rob van der Heij. -# This entry point is called directly from the SALIPL loader and -# doesn't need a builtin ipl record. -# - .org 0x800 -ENTRY(start) - stm %r0,%r15,0x07b0 # store registers - bas %r14,.Lsetmode # Immediately switch to 64 bit mode - basr %r12,%r0 -.base: - l %r11,.parm - l %r8,.cmd # pointer to command buffer - - ltr %r9,%r9 # do we have SALIPL parameters? - bp .sk8x8 - - mvc 0(64,%r8),0x00b0 # copy saved registers - xc 64(240-64,%r8),0(%r8) # remainder of buffer - tr 0(64,%r8),.lowcase - b .gotr -.sk8x8: - mvc 0(240,%r8),0(%r9) # copy iplparms into buffer -.gotr: - slr %r0,%r0 - st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) - st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) - j startup # continue with startup -.cmd: .long COMMAND_LINE # address of command line buffer -.parm: .long PARMAREA -.lowcase: - .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 - .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f - .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17 - .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f - .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27 - .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f - .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37 - .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f - .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47 - .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f - .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57 - .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f - .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67 - .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f - .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77 - .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f - - .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87 - .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f - .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97 - .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f - .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 - .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf - .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7 - .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf - .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg - .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi - .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop - .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr - .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx - .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz - .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 - .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff - +# Setup stack # -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL + GET_LC %r2 + larl %r14,init_task + stg %r14,__LC_CURRENT(%r2) + larl %r15,init_thread_union+STACK_INIT_OFFSET + stg %r15,__LC_KERNEL_STACK(%r2) + brasl %r14,sclp_early_adjust_va # allow sclp_early_printk + brasl %r14,startup_init # s390 specific early init + brasl %r14,start_kernel # common init code # - .org 0x10000 -ENTRY(startup) - j .Lep_startup_normal - .org 0x10008 +# We returned from start_kernel ?!? PANIK # -# This is a list of s390 kernel entry points. At address 0x1000f the number of -# valid entry points is stored. -# -# IMPORTANT: Do not change this table, it is s390 kernel ABI! -# - .ascii "S390EP" - .byte 0x00,0x01 -# -# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode -# - .org 0x10010 -ENTRY(startup_kdump) - j .Lep_startup_kdump -.Lep_startup_normal: -#ifdef CONFIG_64BIT - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode - bras %r13,0f - .fill 16,4,0x0 -0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs - sam31 # switch to 31 bit addressing mode -#else - mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) -#endif - basr %r13,0 # get base -.LPG0: - xc 0x200(256),0x200 # partially clear lowcore - xc 0x300(256),0x300 - xc 0xe00(256),0xe00 - stck __LC_LAST_UPDATE_CLOCK - spt 6f-.LPG0(%r13) - mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) - xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST -#ifndef CONFIG_MARCH_G5 - # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list - tm __LC_STFL_FAC_LIST,0x01 # stfle available ? - jz 0f - la %r0,1 - .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended - # verify if all required facilities are supported by the machine -0: la %r1,__LC_STFL_FAC_LIST - la %r2,3f+8-.LPG0(%r13) - l %r3,0(%r2) -1: l %r0,0(%r1) - n %r0,4(%r2) - cl %r0,4(%r2) - jne 2f - la %r1,4(%r1) - la %r2,4(%r2) - ahi %r3,-1 - jnz 1b - j 4f -2: l %r15,.Lstack-.LPG0(%r13) - ahi %r15,-96 - la %r2,.Lals_string-.LPG0(%r13) - l %r3,.Lsclp_print-.LPG0(%r13) - basr %r14,%r3 - lpsw 3f-.LPG0(%r13) # machine type not good enough, crash -.Lals_string: - .asciz "The Linux kernel requires more recent processor hardware" -.Lsclp_print: - .long _sclp_print_early -.Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) - .align 16 -3: .long 0x000a0000,0x8badcccc - -# List of facilities that are required. If not all facilities are present -# the kernel will crash. Format is number of facility words with bits set, -# followed by the facility words. - -#if defined(CONFIG_64BIT) -#if defined(CONFIG_MARCH_ZEC12) - .long 3, 0xc100efe3, 0xf46ce000, 0x00400000 -#elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100efe3, 0xf46c0000 -#elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100efe3, 0xf0680000 -#elif defined(CONFIG_MARCH_Z9_109) - .long 1, 0xc100efc3 -#elif defined(CONFIG_MARCH_Z990) - .long 1, 0xc0002000 -#elif defined(CONFIG_MARCH_Z900) - .long 1, 0xc0000000 -#endif -#else -#if defined(CONFIG_MARCH_ZEC12) - .long 1, 0x8100c880 -#elif defined(CONFIG_MARCH_Z196) - .long 1, 0x8100c880 -#elif defined(CONFIG_MARCH_Z10) - .long 1, 0x8100c880 -#elif defined(CONFIG_MARCH_Z9_109) - .long 1, 0x8100c880 -#elif defined(CONFIG_MARCH_Z990) - .long 1, 0x80002000 -#elif defined(CONFIG_MARCH_Z900) - .long 1, 0x80000000 -#endif -#endif -4: -#endif - -#ifdef CONFIG_64BIT - /* Continue with 64bit startup code in head64.S */ - sam64 # switch to 64 bit mode - jg startup_continue -#else - /* Continue with 31bit startup code in head31.S */ - l %r13,5f-.LPG0(%r13) - b 0(%r13) - .align 8 -5: .long startup_continue -#endif - - .align 8 -6: .long 0x7fffffff,0xffffffff - -#include "head_kdump.S" - -# -# params at 10400 (setup.h) -# - .org PARMAREA - .long 0,0 # IPL_DEVICE - .long 0,0 # INITRD_START - .long 0,0 # INITRD_SIZE - .long 0,0 # OLDMEM_BASE - .long 0,0 # OLDMEM_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 + basr %r13,0 + lpswe dw_psw-.(%r13) # load disabled wait psw +SYM_CODE_END(startup_continue) - .org 0x11000 + .balign 16 +SYM_DATA_LOCAL(dw_psw, .quad 0x0002000180000000,0x0000000000000000) diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S deleted file mode 100644 index 9a99856df1c9..000000000000 --- a/arch/s390/kernel/head31.S +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright IBM Corp. 2005, 2010 - * - * Author(s): Hartmut Penner <hp@de.ibm.com> - * Martin Schwidefsky <schwidefsky@de.ibm.com> - * Rob van der Heij <rvdhei@iae.nl> - * Heiko Carstens <heiko.carstens@de.ibm.com> - * - */ - -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/page.h> - -__HEAD -ENTRY(startup_continue) - basr %r13,0 # get base -.LPG1: - - l %r1,.Lbase_cc-.LPG1(%r13) - mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK - lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers - l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area - # move IPL device to lowcore -# -# Setup stack -# - l %r15,.Linittu-.LPG1(%r13) - st %r15,__LC_THREAD_INFO # cache thread info in lowcore - mvc __LC_CURRENT(4),__TI_task(%r15) - ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE - st %r15,__LC_KERNEL_STACK # set end of kernel stack - ahi %r15,-96 -# -# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, -# and create a kernel NSS if the SAVESYS= parm is defined -# - l %r14,.Lstartup_init-.LPG1(%r13) - basr %r14,%r14 - lpsw .Lentry-.LPG1(13) # jump to _stext in primary-space, - # virtual and never return ... - .align 8 -.Lentry:.long 0x00080000,0x80000000 + _stext -.Lctl: .long 0x04b50000 # cr0: various things - .long 0 # cr1: primary space segment table - .long .Lduct # cr2: dispatchable unit control table - .long 0 # cr3: instruction authorization - .long 0 # cr4: instruction authorization - .long .Lduct # cr5: primary-aste origin - .long 0 # cr6: I/O interrupts - .long 0 # cr7: secondary space segment table - .long 0 # cr8: access registers translation - .long 0 # cr9: tracing off - .long 0 # cr10: tracing off - .long 0 # cr11: tracing off - .long 0 # cr12: tracing off - .long 0 # cr13: home space segment table - .long 0xc0000000 # cr14: machine check handling off - .long 0 # cr15: linkage stack operations -.Lmchunk:.long memory_chunk -.Lbss_bgn: .long __bss_start -.Lbss_end: .long _end -.Lparmaddr: .long PARMAREA -.Linittu: .long init_thread_union -.Lstartup_init: - .long startup_init - .align 64 -.Lduct: .long 0,0,0,0,.Lduald,0,0,0 - .long 0,0,0,0,0,0,0,0 - .align 128 -.Lduald:.rept 8 - .long 0x80000000,0,0,0 # invalid access-list entries - .endr -.Lbase_cc: - .long sched_clock_base_cc - -ENTRY(_ehead) - - .org 0x100000 - 0x11000 # head.o ends at 0x11000 -# -# startup-code, running in absolute addressing mode -# -ENTRY(_stext) - basr %r13,0 # get base -.LPG3: -# check control registers - stctl %c0,%c15,0(%r15) - oi 2(%r15),0x60 # enable sigp emergency & external call - oi 0(%r15),0x10 # switch on low address protection - lctl %c0,%c15,0(%r15) - -# - lam 0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess - l %r14,.Lstart-.LPG3(%r13) - basr %r14,%r14 # call start_kernel -# -# We returned from start_kernel ?!? PANIK -# - basr %r13,0 - lpsw .Ldw-.(%r13) # load disabled wait psw -# - .align 8 -.Ldw: .long 0x000a0000,0x00000000 -.Lstart:.long start_kernel -.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S deleted file mode 100644 index b9e25ae2579c..000000000000 --- a/arch/s390/kernel/head64.S +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright IBM Corp. 1999, 2010 - * - * Author(s): Hartmut Penner <hp@de.ibm.com> - * Martin Schwidefsky <schwidefsky@de.ibm.com> - * Rob van der Heij <rvdhei@iae.nl> - * Heiko Carstens <heiko.carstens@de.ibm.com> - * - */ - -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/page.h> - -__HEAD -ENTRY(startup_continue) - larl %r1,sched_clock_base_cc - mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK - larl %r13,.LPG1 # get base - lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers - lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area - # move IPL device to lowcore - lghi %r0,__LC_PASTE - stg %r0,__LC_VDSO_PER_CPU -# -# Setup stack -# - larl %r15,init_thread_union - stg %r15,__LC_THREAD_INFO # cache thread info in lowcore - lg %r14,__TI_task(%r15) # cache current in lowcore - stg %r14,__LC_CURRENT - aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE - stg %r15,__LC_KERNEL_STACK # set end of kernel stack - aghi %r15,-160 -# -# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, -# and create a kernel NSS if the SAVESYS= parm is defined -# - brasl %r14,startup_init - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, - # virtual and never return ... - .align 16 -.LPG1: -.Lentry:.quad 0x0000000180000000,_stext -.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space - .quad 0 # cr1: primary space segment table - .quad .Lduct # cr2: dispatchable unit control table - .quad 0 # cr3: instruction authorization - .quad 0 # cr4: instruction authorization - .quad .Lduct # cr5: primary-aste origin - .quad 0 # cr6: I/O interrupts - .quad 0 # cr7: secondary space segment table - .quad 0 # cr8: access registers translation - .quad 0 # cr9: tracing off - .quad 0 # cr10: tracing off - .quad 0 # cr11: tracing off - .quad 0 # cr12: tracing off - .quad 0 # cr13: home space segment table - .quad 0xc0000000 # cr14: machine check handling off - .quad 0 # cr15: linkage stack operations -.Lpcmsk:.quad 0x0000000180000000 -.L4malign:.quad 0xffffffffffc00000 -.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 -.Lnop: .long 0x07000700 -.Lparmaddr: - .quad PARMAREA - .align 64 -.Lduct: .long 0,0,0,0,.Lduald,0,0,0 - .long 0,0,0,0,0,0,0,0 - .align 128 -.Lduald:.rept 8 - .long 0x80000000,0,0,0 # invalid access-list entries - .endr - -ENTRY(_ehead) - - .org 0x100000 - 0x11000 # head.o ends at 0x11000 -# -# startup-code, running in absolute addressing mode -# -ENTRY(_stext) - basr %r13,0 # get base -.LPG3: -# check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x60 # enable sigp emergency & external call - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) - - lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess - brasl %r14,start_kernel # go to C code -# -# We returned from start_kernel ?!? PANIK -# - basr %r13,0 - lpswe .Ldw-.(%r13) # load disabled wait psw - - .align 8 -.Ldw: .quad 0x0002000180000000,0x0000000000000000 -.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S deleted file mode 100644 index 085a95eb315f..000000000000 --- a/arch/s390/kernel/head_kdump.S +++ /dev/null @@ -1,108 +0,0 @@ -/* - * S390 kdump lowlevel functions (new kernel) - * - * Copyright IBM Corp. 2011 - * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> - */ - -#include <asm/sigp.h> - -#define DATAMOVER_ADDR 0x4000 -#define COPY_PAGE_ADDR 0x6000 - -#ifdef CONFIG_CRASH_DUMP - -# -# kdump entry (new kernel - not yet relocated) -# -# Note: This code has to be position independent -# - -.align 2 -.Lep_startup_kdump: - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode - sam64 # Switch to 64 bit addressing - basr %r13,0 -.Lbase: - larl %r2,.Lbase_addr # Check, if we have been - lg %r2,0(%r2) # already relocated: - clgr %r2,%r13 # - jne .Lrelocate # No : Start data mover - lghi %r2,0 # Yes: Start kdump kernel - brasl %r14,startup_kdump_relocated - -.Lrelocate: - larl %r4,startup - lg %r2,0x418(%r4) # Get kdump base - lg %r3,0x420(%r4) # Get kdump size - - larl %r10,.Lcopy_start # Source of data mover - lghi %r8,DATAMOVER_ADDR # Target of data mover - mvc 0(256,%r8),0(%r10) # Copy data mover code - - agr %r8,%r2 # Copy data mover to - mvc 0(256,%r8),0(%r10) # reserved mem - - lghi %r14,DATAMOVER_ADDR # Jump to copied data mover - basr %r14,%r14 -.Lbase_addr: - .quad .Lbase - -# -# kdump data mover code (runs at address DATAMOVER_ADDR) -# -# r2: kdump base address -# r3: kdump size -# -.Lcopy_start: - basr %r13,0 # Base -0: - lgr %r11,%r2 # Save kdump base address - lgr %r12,%r2 - agr %r12,%r3 # Compute kdump end address - - lghi %r5,0 - lghi %r10,COPY_PAGE_ADDR # Load copy page address -1: - mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp - mvc 0(256,%r5),0(%r11) # Copy new kernel to old - mvc 0(256,%r11),0(%r10) # Copy tmp to new - aghi %r11,256 - aghi %r5,256 - clgr %r11,%r12 - jl 1b - - lg %r14,.Lstartup_kdump-0b(%r13) - basr %r14,%r14 # Start relocated kernel -.Lstartup_kdump: - .long 0x00000000,0x00000000 + startup_kdump_relocated -.Lcopy_end: - -# -# Startup of kdump (relocated new kernel) -# -.align 2 -startup_kdump_relocated: - basr %r13,0 -0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... -.align 8 -.Lrestart_psw: - .quad 0x0000000080000000,0x0000000000000000 + startup -#else -.align 2 -.Lep_startup_kdump: -#ifdef CONFIG_64BIT - larl %r13,startup_kdump_crash - lpswe 0(%r13) -.align 8 -startup_kdump_crash: - .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash -#else - basr %r13,0 -0: lpsw startup_kdump_crash-0b(%r13) -.align 8 -startup_kdump_crash: - .long 0x000a0000,0x00000000 + startup_kdump_crash -#endif /* CONFIG_64BIT */ -#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c new file mode 100644 index 000000000000..217206522266 --- /dev/null +++ b/arch/s390/kernel/hiperdispatch.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "hd: " fmt + +/* + * Hiperdispatch: + * Dynamically calculates the optimum number of high capacity COREs + * by considering the state the system is in. When hiperdispatch decides + * that a capacity update is necessary, it schedules a topology update. + * During topology updates the CPU capacities are always re-adjusted. + * + * There is two places where CPU capacities are being accessed within + * hiperdispatch. + * -> hiperdispatch's reoccuring work function reads CPU capacities to + * determine high capacity CPU count. + * -> during a topology update hiperdispatch's adjustment function + * updates CPU capacities. + * These two can run on different CPUs in parallel which can cause + * hiperdispatch to make wrong decisions. This can potentially cause + * some overhead by leading to extra rebuild_sched_domains() calls + * for correction. Access to capacities within hiperdispatch has to be + * serialized to prevent the overhead. + * + * Hiperdispatch decision making revolves around steal time. + * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time + * crosses the threshold value hiperdispatch falls back to giving high + * capacities to entitled CPUs. When steal time drops below the + * threshold boundary, hiperdispatch utilizes all CPUs by giving all + * of them high capacity. + * + * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread + * performance. Comparing the throughput of; + * - single CORE, with N threads, running N tasks + * - N separate COREs running N tasks, + * using individual COREs for individual tasks yield better + * performance. This performance difference is roughly ~30% (can change + * between machine generations) + * + * Hiperdispatch tries to hint scheduler to use individual COREs for + * each task, as long as steal time on those COREs are less than 30%, + * therefore delaying the throughput loss caused by using SMP threads. + */ + +#include <linux/cpufeature.h> +#include <linux/cpumask.h> +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/kernel_stat.h> +#include <linux/kstrtox.h> +#include <linux/ktime.h> +#include <linux/sysctl.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <asm/hiperdispatch.h> +#include <asm/setup.h> +#include <asm/smp.h> +#include <asm/topology.h> + +#define CREATE_TRACE_POINTS +#include <asm/trace/hiperdispatch.h> + +#define HD_DELAY_FACTOR (4) +#define HD_DELAY_INTERVAL (HZ / 4) +#define HD_STEAL_THRESHOLD 10 +#define HD_STEAL_AVG_WEIGHT 16 + +static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */ +static cpumask_t hd_vmvl_cpumask; /* Mask containing vertical medium and low CPUs */ +static int hd_high_capacity_cores; /* Current CORE count with high capacity */ +static int hd_entitled_cores; /* Total vertical high and medium CORE count */ +static int hd_online_cores; /* Current online CORE count */ + +static unsigned long hd_previous_steal; /* Previous iteration's CPU steal timer total */ +static unsigned long hd_high_time; /* Total time spent while all cpus have high capacity */ +static unsigned long hd_low_time; /* Total time spent while vl cpus have low capacity */ +static atomic64_t hd_adjustments; /* Total occurrence count of hiperdispatch adjustments */ + +static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD; +static unsigned int hd_delay_factor = HD_DELAY_FACTOR; +static int hd_enabled; + +static void hd_capacity_work_fn(struct work_struct *work); +static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn); + +static int hd_set_hiperdispatch_mode(int enable) +{ + if (!cpu_has_topology()) + enable = 0; + if (hd_enabled == enable) + return 0; + hd_enabled = enable; + return 1; +} + +void hd_reset_state(void) +{ + cpumask_clear(&hd_vl_coremask); + cpumask_clear(&hd_vmvl_cpumask); + hd_entitled_cores = 0; + hd_online_cores = 0; +} + +void hd_add_core(int cpu) +{ + const struct cpumask *siblings; + int polarization; + + hd_online_cores++; + polarization = smp_cpu_get_polarization(cpu); + siblings = topology_sibling_cpumask(cpu); + switch (polarization) { + case POLARIZATION_VH: + hd_entitled_cores++; + break; + case POLARIZATION_VM: + hd_entitled_cores++; + cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings); + break; + case POLARIZATION_VL: + cpumask_set_cpu(cpu, &hd_vl_coremask); + cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings); + break; + } +} + +/* Serialize update and read operations of debug counters. */ +static DEFINE_MUTEX(hd_counter_mutex); + +static void hd_update_times(void) +{ + static ktime_t prev; + ktime_t now; + + /* + * Check if hiperdispatch is active, if not set the prev to 0. + * This way it is possible to differentiate the first update iteration after + * enabling hiperdispatch. + */ + if (hd_entitled_cores == 0 || hd_enabled == 0) { + prev = ktime_set(0, 0); + return; + } + now = ktime_get(); + if (ktime_after(prev, 0)) { + if (hd_high_capacity_cores == hd_online_cores) + hd_high_time += ktime_ms_delta(now, prev); + else + hd_low_time += ktime_ms_delta(now, prev); + } + prev = now; +} + +static void hd_update_capacities(void) +{ + int cpu, upscaling_cores; + unsigned long capacity; + + upscaling_cores = hd_high_capacity_cores - hd_entitled_cores; + capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW; + hd_high_capacity_cores = hd_entitled_cores; + for_each_cpu(cpu, &hd_vl_coremask) { + smp_set_core_capacity(cpu, capacity); + if (capacity != CPU_CAPACITY_HIGH) + continue; + hd_high_capacity_cores++; + upscaling_cores--; + if (upscaling_cores == 0) + capacity = CPU_CAPACITY_LOW; + } +} + +void hd_disable_hiperdispatch(void) +{ + cancel_delayed_work_sync(&hd_capacity_work); + hd_high_capacity_cores = hd_online_cores; + hd_previous_steal = 0; +} + +int hd_enable_hiperdispatch(void) +{ + mutex_lock(&hd_counter_mutex); + hd_update_times(); + mutex_unlock(&hd_counter_mutex); + if (hd_enabled == 0) + return 0; + if (hd_entitled_cores == 0) + return 0; + if (hd_online_cores <= hd_entitled_cores) + return 0; + mod_delayed_work(system_dfl_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor); + hd_update_capacities(); + return 1; +} + +static unsigned long hd_steal_avg(unsigned long new) +{ + static unsigned long steal; + + steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT; + return steal; +} + +static unsigned long hd_calculate_steal_percentage(void) +{ + unsigned long time_delta, steal_delta, steal, percentage; + static ktime_t prev; + int cpus, cpu; + ktime_t now; + + cpus = 0; + steal = 0; + percentage = 0; + for_each_cpu(cpu, &hd_vmvl_cpumask) { + steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + cpus++; + } + /* + * If there is no vertical medium and low CPUs steal time + * is 0 as vertical high CPUs shouldn't experience steal time. + */ + if (cpus == 0) + return percentage; + now = ktime_get(); + time_delta = ktime_to_ns(ktime_sub(now, prev)); + if (steal > hd_previous_steal && hd_previous_steal != 0) { + steal_delta = (steal - hd_previous_steal) * 100 / time_delta; + percentage = steal_delta / cpus; + } + hd_previous_steal = steal; + prev = now; + return percentage; +} + +static void hd_capacity_work_fn(struct work_struct *work) +{ + unsigned long steal_percentage, new_cores; + + mutex_lock(&smp_cpu_state_mutex); + /* + * If online cores are less or equal to entitled cores hiperdispatch + * does not need to make any adjustments, call a topology update to + * disable hiperdispatch. + * Normally this check is handled on topology update, but during cpu + * unhotplug, topology and cpu mask updates are done in reverse + * order, causing hd_enable_hiperdispatch() to get stale data. + */ + if (hd_online_cores <= hd_entitled_cores) { + topology_schedule_update(); + mutex_unlock(&smp_cpu_state_mutex); + return; + } + steal_percentage = hd_steal_avg(hd_calculate_steal_percentage()); + if (steal_percentage < hd_steal_threshold) + new_cores = hd_online_cores; + else + new_cores = hd_entitled_cores; + if (hd_high_capacity_cores != new_cores) { + trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores); + hd_high_capacity_cores = new_cores; + atomic64_inc(&hd_adjustments); + topology_schedule_update(); + } + trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores); + mutex_unlock(&smp_cpu_state_mutex); + schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL); +} + +static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int hiperdispatch; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &hiperdispatch, + .maxlen = sizeof(int), + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + + hiperdispatch = hd_enabled; + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; + mutex_lock(&smp_cpu_state_mutex); + if (hd_set_hiperdispatch_mode(hiperdispatch)) + topology_schedule_update(); + mutex_unlock(&smp_cpu_state_mutex); + return 0; +} + +static const struct ctl_table hiperdispatch_ctl_table[] = { + { + .procname = "hiperdispatch", + .mode = 0644, + .proc_handler = hiperdispatch_ctl_handler, + }, +}; + +static ssize_t hd_steal_threshold_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%u\n", hd_steal_threshold); +} + +static ssize_t hd_steal_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + if (val > 100) + return -ERANGE; + hd_steal_threshold = val; + return count; +} + +static DEVICE_ATTR_RW(hd_steal_threshold); + +static ssize_t hd_delay_factor_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%u\n", hd_delay_factor); +} + +static ssize_t hd_delay_factor_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + if (!val) + return -ERANGE; + hd_delay_factor = val; + return count; +} + +static DEVICE_ATTR_RW(hd_delay_factor); + +static struct attribute *hd_attrs[] = { + &dev_attr_hd_steal_threshold.attr, + &dev_attr_hd_delay_factor.attr, + NULL, +}; + +static const struct attribute_group hd_attr_group = { + .name = "hiperdispatch", + .attrs = hd_attrs, +}; + +static int hd_greedy_time_get(void *unused, u64 *val) +{ + mutex_lock(&hd_counter_mutex); + hd_update_times(); + *val = hd_high_time; + mutex_unlock(&hd_counter_mutex); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n"); + +static int hd_conservative_time_get(void *unused, u64 *val) +{ + mutex_lock(&hd_counter_mutex); + hd_update_times(); + *val = hd_low_time; + mutex_unlock(&hd_counter_mutex); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n"); + +static int hd_adjustment_count_get(void *unused, u64 *val) +{ + *val = atomic64_read(&hd_adjustments); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n"); + +static void __init hd_create_debugfs_counters(void) +{ + struct dentry *dir; + + dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir); + debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops); + debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops); + debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops); +} + +static void __init hd_create_attributes(void) +{ + struct device *dev; + + dev = bus_get_dev_root(&cpu_subsys); + if (!dev) + return; + if (sysfs_create_group(&dev->kobj, &hd_attr_group)) + pr_warn("Unable to create hiperdispatch attribute group\n"); + put_device(dev); +} + +static int __init hd_init(void) +{ + if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) { + hd_set_hiperdispatch_mode(1); + topology_schedule_update(); + } + if (!register_sysctl("s390", hiperdispatch_ctl_table)) + pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n"); + hd_create_debugfs_counters(); + hd_create_attributes(); + return 0; +} +late_initcall(hd_init); diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c new file mode 100644 index 000000000000..39cb8d0ae348 --- /dev/null +++ b/arch/s390/kernel/idle.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Idle functions for s390. + * + * Copyright IBM Corp. 2014 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <trace/events/power.h> +#include <asm/cpu_mf.h> +#include <asm/cputime.h> +#include <asm/nmi.h> +#include <asm/smp.h> +#include "entry.h" + +static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + +void account_idle_time_irq(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + struct lowcore *lc = get_lowcore(); + unsigned long idle_time; + u64 cycles_new[8]; + int i; + + if (smp_cpu_mtid) { + stcctm(MT_DIAG, smp_cpu_mtid, cycles_new); + for (i = 0; i < smp_cpu_mtid; i++) + this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); + } + + idle_time = lc->int_clock - idle->clock_idle_enter; + + lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; + lc->last_update_clock = lc->int_clock; + + lc->system_timer += lc->last_update_timer - idle->timer_idle_enter; + lc->last_update_timer = lc->sys_enter_timer; + + /* Account time spent with enabled wait psw loaded as idle time. */ + WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); + WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1); + account_idle_time(cputime_to_nsecs(idle_time)); +} + +void noinstr arch_cpu_idle(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + unsigned long psw_mask; + + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + clear_cpu_flag(CIF_NOHZ_DELAY); + set_cpu_flag(CIF_ENABLED_WAIT); + if (smp_cpu_mtid) + stcctm(MT_DIAG, smp_cpu_mtid, (u64 *)&idle->mt_cycles_enter); + idle->clock_idle_enter = get_tod_clock_fast(); + idle->timer_idle_enter = get_cpu_timer(); + bpon(); + __load_psw_mask(psw_mask); +} + +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + + return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count)); +} +DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); + +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + + return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12); +} +DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); + +void arch_cpu_idle_enter(void) +{ +} + +void arch_cpu_idle_exit(void) +{ +} + +void __noreturn arch_cpu_idle_dead(void) +{ + cpu_die(); +} diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c new file mode 100644 index 000000000000..f3c3e6e1c5d3 --- /dev/null +++ b/arch/s390/kernel/ima_arch.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/ima.h> +#include <asm/boot_data.h> + +bool arch_ima_get_secureboot(void) +{ + return ipl_secure_flag; +} + +const char * const *arch_get_ima_policy(void) +{ + return NULL; +} diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index feb719d3c851..961a3d60a4dd 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1,45 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ipl/reipl/dump support for Linux on s390. * * Copyright IBM Corp. 2005, 2012 * Author(s): Michael Holzheu <holzheu@de.ibm.com> - * Heiko Carstens <heiko.carstens@de.ibm.com> * Volker Sameske <sameske@de.ibm.com> */ #include <linux/types.h> -#include <linux/module.h> +#include <linux/export.h> +#include <linux/init.h> #include <linux/device.h> #include <linux/delay.h> +#include <linux/kstrtox.h> +#include <linux/panic_notifier.h> #include <linux/reboot.h> #include <linux/ctype.h> #include <linux/fs.h> #include <linux/gfp.h> #include <linux/crash_dump.h> #include <linux/debug_locks.h> +#include <linux/vmalloc.h> +#include <asm/asm-extable.h> +#include <asm/machine.h> +#include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> #include <asm/cpcmd.h> -#include <asm/cio.h> #include <asm/ebcdic.h> -#include <asm/reset.h> #include <asm/sclp.h> #include <asm/checksum.h> #include <asm/debug.h> +#include <asm/abs_lowcore.h> #include <asm/os_info.h> +#include <asm/sections.h> +#include <asm/boot_data.h> #include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 #define IPL_UNKNOWN_STR "unknown" #define IPL_CCW_STR "ccw" +#define IPL_ECKD_STR "eckd" +#define IPL_ECKD_DUMP_STR "eckd_dump" #define IPL_FCP_STR "fcp" #define IPL_FCP_DUMP_STR "fcp_dump" +#define IPL_NVME_STR "nvme" +#define IPL_NVME_DUMP_STR "nvme_dump" #define IPL_NSS_STR "nss" #define DUMP_CCW_STR "ccw" +#define DUMP_ECKD_STR "eckd" #define DUMP_FCP_STR "fcp" +#define DUMP_NVME_STR "nvme" #define DUMP_NONE_STR "none" /* @@ -84,12 +98,20 @@ static char *ipl_type_str(enum ipl_type type) switch (type) { case IPL_TYPE_CCW: return IPL_CCW_STR; + case IPL_TYPE_ECKD: + return IPL_ECKD_STR; + case IPL_TYPE_ECKD_DUMP: + return IPL_ECKD_DUMP_STR; case IPL_TYPE_FCP: return IPL_FCP_STR; case IPL_TYPE_FCP_DUMP: return IPL_FCP_DUMP_STR; case IPL_TYPE_NSS: return IPL_NSS_STR; + case IPL_TYPE_NVME: + return IPL_NVME_STR; + case IPL_TYPE_NVME_DUMP: + return IPL_NVME_DUMP_STR; case IPL_TYPE_UNKNOWN: default: return IPL_UNKNOWN_STR; @@ -100,6 +122,8 @@ enum dump_type { DUMP_TYPE_NONE = 1, DUMP_TYPE_CCW = 2, DUMP_TYPE_FCP = 4, + DUMP_TYPE_NVME = 8, + DUMP_TYPE_ECKD = 16, }; static char *dump_type_str(enum dump_type type) @@ -109,97 +133,120 @@ static char *dump_type_str(enum dump_type type) return DUMP_NONE_STR; case DUMP_TYPE_CCW: return DUMP_CCW_STR; + case DUMP_TYPE_ECKD: + return DUMP_ECKD_STR; case DUMP_TYPE_FCP: return DUMP_FCP_STR; + case DUMP_TYPE_NVME: + return DUMP_NVME_STR; default: return NULL; } } -/* - * Must be in data section since the bss section - * is not cleared when these are accessed. - */ -static u16 ipl_devno __attribute__((__section__(".data"))) = 0; -u32 ipl_flags __attribute__((__section__(".data"))) = 0; - -enum ipl_method { - REIPL_METHOD_CCW_CIO, - REIPL_METHOD_CCW_DIAG, - REIPL_METHOD_CCW_VM, - REIPL_METHOD_FCP_RO_DIAG, - REIPL_METHOD_FCP_RW_DIAG, - REIPL_METHOD_FCP_RO_VM, - REIPL_METHOD_FCP_DUMP, - REIPL_METHOD_NSS, - REIPL_METHOD_NSS_DIAG, - REIPL_METHOD_DEFAULT, -}; - -enum dump_method { - DUMP_METHOD_NONE, - DUMP_METHOD_CCW_CIO, - DUMP_METHOD_CCW_DIAG, - DUMP_METHOD_CCW_VM, - DUMP_METHOD_FCP_DIAG, -}; +int __bootdata_preserved(ipl_block_valid); +struct ipl_parameter_block __bootdata_preserved(ipl_block); +int __bootdata_preserved(ipl_secure_flag); -static int diag308_set_works = 0; +unsigned long __bootdata_preserved(ipl_cert_list_addr); +unsigned long __bootdata_preserved(ipl_cert_list_size); -static struct ipl_parameter_block ipl_block; +unsigned long __bootdata(early_ipl_comp_list_addr); +unsigned long __bootdata(early_ipl_comp_list_size); static int reipl_capabilities = IPL_TYPE_UNKNOWN; static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; -static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT; static struct ipl_parameter_block *reipl_block_fcp; +static struct ipl_parameter_block *reipl_block_nvme; static struct ipl_parameter_block *reipl_block_ccw; +static struct ipl_parameter_block *reipl_block_eckd; static struct ipl_parameter_block *reipl_block_nss; static struct ipl_parameter_block *reipl_block_actual; static int dump_capabilities = DUMP_TYPE_NONE; static enum dump_type dump_type = DUMP_TYPE_NONE; -static enum dump_method dump_method = DUMP_METHOD_NONE; static struct ipl_parameter_block *dump_block_fcp; +static struct ipl_parameter_block *dump_block_nvme; static struct ipl_parameter_block *dump_block_ccw; +static struct ipl_parameter_block *dump_block_eckd; static struct sclp_ipl_info sclp_ipl_info; -int diag308(unsigned long subcode, void *addr) +static bool reipl_nvme_clear; +static bool reipl_fcp_clear; +static bool reipl_ccw_clear; +static bool reipl_eckd_clear; + +static unsigned long os_info_flags; + +static inline int __diag308(unsigned long subcode, unsigned long addr) { - register unsigned long _addr asm("0") = (unsigned long) addr; - register unsigned long _rc asm("1") = 0; + union register_pair r1; - asm volatile( - " diag %0,%2,0x308\n" - "0:\n" + r1.even = addr; + r1.odd = 0; + asm_inline volatile( + " diag %[r1],%[subcode],0x308\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) - : "+d" (_addr), "+d" (_rc) - : "d" (subcode) : "cc", "memory"); - return _rc; + : [r1] "+&d" (r1.pair) + : [subcode] "d" (subcode) + : "cc", "memory"); + return r1.odd; +} + +int diag308(unsigned long subcode, void *addr) +{ + diag_stat_inc(DIAG_STAT_X308); + return __diag308(subcode, addr ? virt_to_phys(addr) : 0); } EXPORT_SYMBOL_GPL(diag308); /* SYSFS */ -#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ +#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...) \ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ char *page) \ { \ - return sprintf(page, _format, _value); \ -} \ -static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ - __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL); + return sysfs_emit(page, _format, ##args); \ +} -#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \ -static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ +#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ struct kobj_attribute *attr, \ - char *page) \ + const char *buf, size_t len) \ { \ - return sprintf(page, _fmt_out, \ - (unsigned long long) _value); \ -} \ + unsigned long long ssid, devno; \ + \ + if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2) \ + return -EINVAL; \ + \ + if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL) \ + return -EINVAL; \ + \ + _ipl_blk.ssid = ssid; \ + _ipl_blk.devno = devno; \ + return len; \ +} + +#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk) \ +IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \ + _ipl_blk.ssid, _ipl_blk.devno); \ +IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, 0644, \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store) \ + +#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ +IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, 0444, sys_##_prefix##_##_name##_show, NULL) + +#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \ +IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ struct kobj_attribute *attr, \ const char *buf, size_t len) \ @@ -211,37 +258,83 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ return len; \ } \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ - __ATTR(_name,(S_IRUGO | S_IWUSR), \ + __ATTR(_name, 0644, \ sys_##_prefix##_##_name##_show, \ - sys_##_prefix##_##_name##_store); + sys_##_prefix##_##_name##_store) #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ -static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - return sprintf(page, _fmt_out, _value); \ -} \ +IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ struct kobj_attribute *attr, \ const char *buf, size_t len) \ { \ - strncpy(_value, buf, sizeof(_value) - 1); \ + if (len >= sizeof(_value)) \ + return -E2BIG; \ + len = strscpy(_value, buf); \ + if ((ssize_t)len < 0) \ + return len; \ strim(_value); \ return len; \ } \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ - __ATTR(_name,(S_IRUGO | S_IWUSR), \ + __ATTR(_name, 0644, \ sys_##_prefix##_##_name##_show, \ - sys_##_prefix##_##_name##_store); - -static void make_attrs_ro(struct attribute **attrs) -{ - while (*attrs) { - (*attrs)->mode = S_IRUGO; - attrs++; - } -} + sys_##_prefix##_##_name##_store) + +#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t size = _ipl_block.scp_data_len; \ + void *scp_data = _ipl_block.scp_data; \ + \ + return memory_read_from_buffer(buf, count, &off, \ + scp_data, size); \ +} + +#define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ +static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t scpdata_len = count; \ + size_t padding; \ + \ + if (off) \ + return -EINVAL; \ + \ + memcpy(_ipl_block.scp_data, buf, count); \ + if (scpdata_len % 8) { \ + padding = 8 - (scpdata_len % 8); \ + memset(_ipl_block.scp_data + scpdata_len, \ + 0, padding); \ + scpdata_len += padding; \ + } \ + \ + _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ + _ipl_block.len = _ipl_bp0_len + scpdata_len; \ + _ipl_block.scp_data_len = scpdata_len; \ + \ + return count; \ +} + +#define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \ +IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ + __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \ + NULL, _size) + +#define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\ +IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ + __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \ + sys_##_prefix##_scp_data_store, _size) /* * ipl section @@ -249,21 +342,29 @@ static void make_attrs_ro(struct attribute **attrs) static __init enum ipl_type get_ipl_type(void) { - struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - - if (ipl_flags & IPL_NSS_VALID) - return IPL_TYPE_NSS; - if (!(ipl_flags & IPL_DEVNO_VALID)) + if (!ipl_block_valid) return IPL_TYPE_UNKNOWN; - if (!(ipl_flags & IPL_PARMBLOCK_VALID)) + + switch (ipl_block.pb0_hdr.pbt) { + case IPL_PBT_CCW: return IPL_TYPE_CCW; - if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION) - return IPL_TYPE_UNKNOWN; - if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP) - return IPL_TYPE_UNKNOWN; - if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) - return IPL_TYPE_FCP_DUMP; - return IPL_TYPE_FCP; + case IPL_PBT_FCP: + if (ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return IPL_TYPE_FCP_DUMP; + else + return IPL_TYPE_FCP; + case IPL_PBT_NVME: + if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return IPL_TYPE_NVME_DUMP; + else + return IPL_TYPE_NVME; + case IPL_PBT_ECKD: + if (ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP) + return IPL_TYPE_ECKD_DUMP; + else + return IPL_TYPE_ECKD; + } + return IPL_TYPE_UNKNOWN; } struct ipl_info ipl_info; @@ -272,204 +373,200 @@ EXPORT_SYMBOL_GPL(ipl_info); static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", ipl_type_str(ipl_info.type)); + return sysfs_emit(page, "%s\n", ipl_type_str(ipl_info.type)); } static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); -/* VM IPL PARM routines */ -static size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static ssize_t ipl_secure_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) { - int i; - size_t len; - char has_lowercase = 0; - - len = 0; - if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && - (ipb->ipl_info.ccw.vm_parm_len > 0)) { - - len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len); - memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); - /* If at least one character is lowercase, we assume mixed - * case; otherwise we convert everything to lowercase. - */ - for (i = 0; i < len; i++) - if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */ - (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */ - (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */ - has_lowercase = 1; - break; - } - if (!has_lowercase) - EBC_TOLOWER(dest, len); - EBCASC(dest, len); - } - dest[len] = 0; - - return len; + return sysfs_emit(page, "%i\n", !!ipl_secure_flag); } -size_t append_ipl_vmparm(char *dest, size_t size) -{ - size_t rc; +static struct kobj_attribute sys_ipl_secure_attr = + __ATTR(secure, 0444, ipl_secure_show, NULL); - rc = 0; - if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)) - rc = reipl_get_ascii_vmparm(dest, size, &ipl_block); - else - dest[0] = 0; - return rc; +static ssize_t ipl_has_secure_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sysfs_emit(page, "%i\n", !!sclp.has_sipl); } +static struct kobj_attribute sys_ipl_has_secure_attr = + __ATTR(has_secure, 0444, ipl_has_secure_show, NULL); + static ssize_t ipl_vm_parm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { char parm[DIAG308_VMPARM_SIZE + 1] = {}; - append_ipl_vmparm(parm, sizeof(parm)); - return sprintf(page, "%s\n", parm); + if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW)) + ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block); + return sysfs_emit(page, "%s\n", parm); } -static size_t scpdata_length(const char* buf, size_t count) -{ - while (count) { - if (buf[count - 1] != '\0' && buf[count - 1] != ' ') - break; - count--; - } - return count; -} - -static size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) -{ - size_t count; - size_t i; - int has_lowercase; - - count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data, - ipb->ipl_info.fcp.scp_data_len)); - if (!count) - goto out; - - has_lowercase = 0; - for (i = 0; i < count; i++) { - if (!isascii(ipb->ipl_info.fcp.scp_data[i])) { - count = 0; - goto out; - } - if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i])) - has_lowercase = 1; - } - - if (has_lowercase) - memcpy(dest, ipb->ipl_info.fcp.scp_data, count); - else - for (i = 0; i < count; i++) - dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]); -out: - dest[count] = '\0'; - return count; -} - -size_t append_ipl_scpdata(char *dest, size_t len) -{ - size_t rc; - - rc = 0; - if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP) - rc = reipl_append_ascii_scpdata(dest, len, &ipl_block); - else - dest[0] = 0; - return rc; -} - - static struct kobj_attribute sys_ipl_vm_parm_attr = - __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); + __ATTR(parm, 0444, ipl_vm_parm_show, NULL); static ssize_t sys_ipl_device_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - switch (ipl_info.type) { case IPL_TYPE_CCW: - return sprintf(page, "0.0.%04x\n", ipl_devno); + return sysfs_emit(page, "0.%x.%04x\n", ipl_block.ccw.ssid, + ipl_block.ccw.devno); + case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: + return sysfs_emit(page, "0.%x.%04x\n", ipl_block.eckd.ssid, + ipl_block.eckd.devno); case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: - return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno); + return sysfs_emit(page, "0.0.%04x\n", ipl_block.fcp.devno); + case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: + return sysfs_emit(page, "%08ux\n", ipl_block.nvme.fid); default: return 0; } } static struct kobj_attribute sys_ipl_device_attr = - __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL); + __ATTR(device, 0444, sys_ipl_device_show, NULL); -static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t off, size_t count) +static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, char *buf, + loff_t off, size_t count) { - return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START, - IPL_PARMBLOCK_SIZE); + return memory_read_from_buffer(buf, count, &off, &ipl_block, + ipl_block.hdr.len); } +static const struct bin_attribute sys_ipl_parameter_attr = + __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL, + PAGE_SIZE); -static struct bin_attribute ipl_parameter_attr = { - .attr = { - .name = "binary_parameter", - .mode = S_IRUGO, - }, - .size = PAGE_SIZE, - .read = &ipl_parameter_read, -}; +DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE); -static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t off, size_t count) -{ - unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len; - void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data; +static const struct bin_attribute *const ipl_fcp_bin_attrs[] = { + &sys_ipl_parameter_attr, + &sys_ipl_fcp_scp_data_attr, + NULL, +}; - return memory_read_from_buffer(buf, count, &off, scp_data, size); -} +DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE); -static struct bin_attribute ipl_scp_data_attr = { - .attr = { - .name = "scp_data", - .mode = S_IRUGO, - }, - .size = PAGE_SIZE, - .read = ipl_scp_data_read, +static const struct bin_attribute *const ipl_nvme_bin_attrs[] = { + &sys_ipl_parameter_attr, + &sys_ipl_nvme_scp_data_attr, + NULL, }; -/* FCP ipl device attributes */ - -DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long) - IPL_PARMBLOCK_START->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long) - IPL_PARMBLOCK_START->ipl_info.fcp.lun); -DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long) - IPL_PARMBLOCK_START->ipl_info.fcp.bootprog); -DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long) - IPL_PARMBLOCK_START->ipl_info.fcp.br_lba); +DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE); -static struct attribute *ipl_fcp_attrs[] = { - &sys_ipl_type_attr.attr, - &sys_ipl_device_attr.attr, - &sys_ipl_fcp_wwpn_attr.attr, - &sys_ipl_fcp_lun_attr.attr, - &sys_ipl_fcp_bootprog_attr.attr, - &sys_ipl_fcp_br_lba_attr.attr, +static const struct bin_attribute *const ipl_eckd_bin_attrs[] = { + &sys_ipl_parameter_attr, + &sys_ipl_eckd_scp_data_attr, NULL, }; -static struct attribute_group ipl_fcp_attr_group = { - .attrs = ipl_fcp_attrs, -}; +/* FCP ipl device attributes */ -/* CCW ipl device attributes */ +DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", + (unsigned long long)ipl_block.fcp.wwpn); +DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", + (unsigned long long)ipl_block.fcp.lun); +DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", + (unsigned long long)ipl_block.fcp.bootprog); +DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", + (unsigned long long)ipl_block.fcp.br_lba); + +/* NVMe ipl device attributes */ +DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.fid); +DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.nsid); +DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n", + (unsigned long long)ipl_block.nvme.bootprog); +DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n", + (unsigned long long)ipl_block.nvme.br_lba); + +/* ECKD ipl device attributes */ +DEFINE_IPL_ATTR_RO(ipl_eckd, bootprog, "%lld\n", + (unsigned long long)ipl_block.eckd.bootprog); + +#define IPL_ATTR_BR_CHR_SHOW_FN(_name, _ipb) \ +static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ +{ \ + struct ipl_pb0_eckd *ipb = &(_ipb); \ + \ + if (!ipb->br_chr.cyl && \ + !ipb->br_chr.head && \ + !ipb->br_chr.record) \ + return sysfs_emit(buf, "auto\n"); \ + \ + return sysfs_emit(buf, "0x%x,0x%x,0x%x\n", \ + ipb->br_chr.cyl, \ + ipb->br_chr.head, \ + ipb->br_chr.record); \ +} + +#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb) \ +static ssize_t eckd_##_name##_br_chr_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + struct ipl_pb0_eckd *ipb = &(_ipb); \ + unsigned long args[3] = { 0 }; \ + char *p, *p1, *tmp = NULL; \ + int i, rc; \ + \ + if (!strncmp(buf, "auto", 4)) \ + goto out; \ + \ + tmp = kstrdup(buf, GFP_KERNEL); \ + p = tmp; \ + for (i = 0; i < 3; i++) { \ + p1 = strsep(&p, ", "); \ + if (!p1) { \ + rc = -EINVAL; \ + goto err; \ + } \ + rc = kstrtoul(p1, 0, args + i); \ + if (rc) \ + goto err; \ + } \ + \ + rc = -EINVAL; \ + if (i != 3) \ + goto err; \ + \ + if ((args[0] || args[1]) && !args[2]) \ + goto err; \ + \ + if (args[0] > UINT_MAX || args[1] > 255 || args[2] > 255) \ + goto err; \ + \ +out: \ + ipb->br_chr.cyl = args[0]; \ + ipb->br_chr.head = args[1]; \ + ipb->br_chr.record = args[2]; \ + rc = len; \ +err: \ + kfree(tmp); \ + return rc; \ +} + +IPL_ATTR_BR_CHR_SHOW_FN(ipl, ipl_block.eckd); +static struct kobj_attribute sys_ipl_eckd_br_chr_attr = + __ATTR(br_chr, 0644, eckd_ipl_br_chr_show, NULL); + +IPL_ATTR_BR_CHR_SHOW_FN(reipl, reipl_block_eckd->eckd); +IPL_ATTR_BR_CHR_STORE_FN(reipl, reipl_block_eckd->eckd); + +static struct kobj_attribute sys_reipl_eckd_br_chr_attr = + __ATTR(br_chr, 0644, eckd_reipl_br_chr_show, eckd_reipl_br_chr_store); static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) @@ -477,97 +574,97 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, char loadparm[LOADPARM_LEN + 1] = {}; if (!sclp_ipl_info.is_valid) - return sprintf(page, "#unknown#\n"); + return sysfs_emit(page, "#unknown#\n"); memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); strim(loadparm); - return sprintf(page, "%s\n", loadparm); + return sysfs_emit(page, "%s\n", loadparm); } static struct kobj_attribute sys_ipl_ccw_loadparm_attr = __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); -static struct attribute *ipl_ccw_attrs_vm[] = { - &sys_ipl_type_attr.attr, +static struct attribute *ipl_fcp_attrs[] = { &sys_ipl_device_attr.attr, + &sys_ipl_fcp_wwpn_attr.attr, + &sys_ipl_fcp_lun_attr.attr, + &sys_ipl_fcp_bootprog_attr.attr, + &sys_ipl_fcp_br_lba_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_vm_parm_attr.attr, NULL, }; -static struct attribute *ipl_ccw_attrs_lpar[] = { - &sys_ipl_type_attr.attr, - &sys_ipl_device_attr.attr, +static const struct attribute_group ipl_fcp_attr_group = { + .attrs = ipl_fcp_attrs, + .bin_attrs = ipl_fcp_bin_attrs, +}; + +static struct attribute *ipl_nvme_attrs[] = { + &sys_ipl_nvme_fid_attr.attr, + &sys_ipl_nvme_nsid_attr.attr, + &sys_ipl_nvme_bootprog_attr.attr, + &sys_ipl_nvme_br_lba_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, NULL, }; -static struct attribute_group ipl_ccw_attr_group_vm = { - .attrs = ipl_ccw_attrs_vm, +static const struct attribute_group ipl_nvme_attr_group = { + .attrs = ipl_nvme_attrs, + .bin_attrs = ipl_nvme_bin_attrs, }; -static struct attribute_group ipl_ccw_attr_group_lpar = { - .attrs = ipl_ccw_attrs_lpar +static struct attribute *ipl_eckd_attrs[] = { + &sys_ipl_eckd_bootprog_attr.attr, + &sys_ipl_eckd_br_chr_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_device_attr.attr, + NULL, }; -/* NSS ipl device attributes */ +static const struct attribute_group ipl_eckd_attr_group = { + .attrs = ipl_eckd_attrs, + .bin_attrs = ipl_eckd_bin_attrs, +}; -DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name); +/* CCW ipl device attributes */ -static struct attribute *ipl_nss_attrs[] = { - &sys_ipl_type_attr.attr, - &sys_ipl_nss_name_attr.attr, +static struct attribute *ipl_ccw_attrs_vm[] = { + &sys_ipl_device_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, &sys_ipl_vm_parm_attr.attr, NULL, }; -static struct attribute_group ipl_nss_attr_group = { - .attrs = ipl_nss_attrs, +static struct attribute *ipl_ccw_attrs_lpar[] = { + &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + NULL, +}; + +static const struct attribute_group ipl_ccw_attr_group_vm = { + .attrs = ipl_ccw_attrs_vm, }; -/* UNKNOWN ipl device attributes */ +static const struct attribute_group ipl_ccw_attr_group_lpar = { + .attrs = ipl_ccw_attrs_lpar +}; -static struct attribute *ipl_unknown_attrs[] = { +static struct attribute *ipl_common_attrs[] = { &sys_ipl_type_attr.attr, + &sys_ipl_secure_attr.attr, + &sys_ipl_has_secure_attr.attr, NULL, }; -static struct attribute_group ipl_unknown_attr_group = { - .attrs = ipl_unknown_attrs, +static const struct attribute_group ipl_common_attr_group = { + .attrs = ipl_common_attrs, }; static struct kset *ipl_kset; -static int __init ipl_register_fcp_files(void) -{ - int rc; - - rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); - if (rc) - goto out; - rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_parameter_attr); - if (rc) - goto out_ipl_parm; - rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_scp_data_attr); - if (!rc) - goto out; - - sysfs_remove_bin_file(&ipl_kset->kobj, &ipl_parameter_attr); - -out_ipl_parm: - sysfs_remove_group(&ipl_kset->kobj, &ipl_fcp_attr_group); -out: - return rc; -} - static void __ipl_run(void *unused) { - diag308(DIAG308_IPL, NULL); - if (MACHINE_IS_VM) - __cpcmd("IPL", NULL, 0, NULL); - else if (ipl_info.type == IPL_TYPE_CCW) - reipl_ccw_dev(&ipl_info.data.ccw.dev_id); + diag308(DIAG308_LOAD_CLEAR, NULL); } static void ipl_run(struct shutdown_trigger *trigger) @@ -584,25 +681,31 @@ static int __init ipl_init(void) rc = -ENOMEM; goto out; } + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group); + if (rc) + goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_vm); else rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_lpar); break; + case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group); + break; case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: - rc = ipl_register_fcp_files(); + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); break; - case IPL_TYPE_NSS: - rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group); + case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); break; default: - rc = sysfs_create_group(&ipl_kset->kobj, - &ipl_unknown_attr_group); break; } out: @@ -628,8 +731,8 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb, { char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; - reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); - return sprintf(page, "%s\n", vmparm); + ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + return sysfs_emit(page, "%s\n", vmparm); } static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb, @@ -651,14 +754,14 @@ static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb, if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i]))) return -EINVAL; - memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE); - ipb->ipl_info.ccw.vm_parm_len = ip_len; + memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE); + ipb->ccw.vm_parm_len = ip_len; if (ip_len > 0) { - ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; - memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len); - ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len); + ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; + memcpy(ipb->ccw.vm_parm, buf, ip_len); + ASCEBC(ipb->ccw.vm_parm, ip_len); } else { - ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID; + ipb->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_VP; } return len; @@ -693,100 +796,39 @@ static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj, } static struct kobj_attribute sys_reipl_nss_vmparm_attr = - __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show, - reipl_nss_vmparm_store); + __ATTR(parm, 0644, reipl_nss_vmparm_show, + reipl_nss_vmparm_store); static struct kobj_attribute sys_reipl_ccw_vmparm_attr = - __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show, - reipl_ccw_vmparm_store); + __ATTR(parm, 0644, reipl_ccw_vmparm_show, + reipl_ccw_vmparm_store); /* FCP reipl device attributes */ -static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, - char *buf, loff_t off, size_t count) -{ - size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len; - void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data; - - return memory_read_from_buffer(buf, count, &off, scp_data, size); -} - -static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, - char *buf, loff_t off, size_t count) -{ - size_t padding; - size_t scpdata_len; - - if (off < 0) - return -EINVAL; - - if (off >= DIAG308_SCPDATA_SIZE) - return -ENOSPC; - - if (count > DIAG308_SCPDATA_SIZE - off) - count = DIAG308_SCPDATA_SIZE - off; +DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr, + reipl_block_fcp->fcp, + IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN, + DIAG308_SCPDATA_SIZE); - memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count); - scpdata_len = off + count; - - if (scpdata_len % 8) { - padding = 8 - (scpdata_len % 8); - memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len, - 0, padding); - scpdata_len += padding; - } - - reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len; - reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len; - reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len; - - return count; -} - -static struct bin_attribute sys_reipl_fcp_scp_data_attr = { - .attr = { - .name = "scp_data", - .mode = S_IRUGO | S_IWUSR, - }, - .size = PAGE_SIZE, - .read = reipl_fcp_scpdata_read, - .write = reipl_fcp_scpdata_write, +static const struct bin_attribute *const reipl_fcp_bin_attrs[] = { + &sys_reipl_fcp_scp_data_attr, + NULL, }; DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", - reipl_block_fcp->ipl_info.fcp.wwpn); + reipl_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", - reipl_block_fcp->ipl_info.fcp.lun); + reipl_block_fcp->fcp.lun); DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", - reipl_block_fcp->ipl_info.fcp.bootprog); + reipl_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", - reipl_block_fcp->ipl_info.fcp.br_lba); + reipl_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", - reipl_block_fcp->ipl_info.fcp.devno); - -static struct attribute *reipl_fcp_attrs[] = { - &sys_reipl_fcp_device_attr.attr, - &sys_reipl_fcp_wwpn_attr.attr, - &sys_reipl_fcp_lun_attr.attr, - &sys_reipl_fcp_bootprog_attr.attr, - &sys_reipl_fcp_br_lba_attr.attr, - NULL, -}; - -static struct attribute_group reipl_fcp_attr_group = { - .attrs = reipl_fcp_attrs, -}; - -/* CCW reipl device attributes */ - -DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", - reipl_block_ccw->ipl_info.ccw.devno); + reipl_block_fcp->fcp.devno); static void reipl_get_ascii_loadparm(char *loadparm, struct ipl_parameter_block *ibp) { - memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN); + memcpy(loadparm, ibp->common.loadparm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); loadparm[LOADPARM_LEN] = 0; strim(loadparm); @@ -798,7 +840,7 @@ static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb, char buf[LOADPARM_LEN + 1]; reipl_get_ascii_loadparm(buf, ipb); - return sprintf(page, "%s\n", buf); + return sysfs_emit(page, "%s\n", buf); } static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, @@ -821,55 +863,155 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, return -EINVAL; } /* initialize loadparm with blanks */ - memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN); + memset(ipb->common.loadparm, ' ', LOADPARM_LEN); /* copy and convert to ebcdic */ - memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len); - ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN); + memcpy(ipb->common.loadparm, buf, lp_len); + ASCEBC(ipb->common.loadparm, LOADPARM_LEN); + ipb->common.flags |= IPL_PB0_FLAG_LOADPARM; return len; } -/* NSS wrapper */ -static ssize_t reipl_nss_loadparm_show(struct kobject *kobj, - struct kobj_attribute *attr, char *page) +#define DEFINE_GENERIC_LOADPARM(name) \ +static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *page) \ +{ \ + return reipl_generic_loadparm_show(reipl_block_##name, page); \ +} \ +static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return reipl_generic_loadparm_store(reipl_block_##name, buf, len); \ +} \ +static struct kobj_attribute sys_reipl_##name##_loadparm_attr = \ + __ATTR(loadparm, 0644, reipl_##name##_loadparm_show, \ + reipl_##name##_loadparm_store) + +DEFINE_GENERIC_LOADPARM(fcp); +DEFINE_GENERIC_LOADPARM(nvme); +DEFINE_GENERIC_LOADPARM(ccw); +DEFINE_GENERIC_LOADPARM(nss); +DEFINE_GENERIC_LOADPARM(eckd); + +static ssize_t reipl_fcp_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) { - return reipl_generic_loadparm_show(reipl_block_nss, page); + return sysfs_emit(page, "%u\n", reipl_fcp_clear); } -static ssize_t reipl_nss_loadparm_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t len) +static ssize_t reipl_fcp_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) { - return reipl_generic_loadparm_store(reipl_block_nss, buf, len); + if (kstrtobool(buf, &reipl_fcp_clear) < 0) + return -EINVAL; + return len; } -/* CCW wrapper */ -static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj, - struct kobj_attribute *attr, char *page) +static struct attribute *reipl_fcp_attrs[] = { + &sys_reipl_fcp_device_attr.attr, + &sys_reipl_fcp_wwpn_attr.attr, + &sys_reipl_fcp_lun_attr.attr, + &sys_reipl_fcp_bootprog_attr.attr, + &sys_reipl_fcp_br_lba_attr.attr, + &sys_reipl_fcp_loadparm_attr.attr, + NULL, +}; + +static const struct attribute_group reipl_fcp_attr_group = { + .attrs = reipl_fcp_attrs, + .bin_attrs = reipl_fcp_bin_attrs, +}; + +static struct kobj_attribute sys_reipl_fcp_clear_attr = + __ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store); + +/* NVME reipl device attributes */ + +DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr, + reipl_block_nvme->nvme, + IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN, + DIAG308_SCPDATA_SIZE); + +static const struct bin_attribute *const reipl_nvme_bin_attrs[] = { + &sys_reipl_nvme_scp_data_attr, + NULL, +}; + +DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.fid); +DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.nsid); +DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.bootprog); +DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.br_lba); + +static struct attribute *reipl_nvme_attrs[] = { + &sys_reipl_nvme_fid_attr.attr, + &sys_reipl_nvme_nsid_attr.attr, + &sys_reipl_nvme_bootprog_attr.attr, + &sys_reipl_nvme_br_lba_attr.attr, + &sys_reipl_nvme_loadparm_attr.attr, + NULL, +}; + +static const struct attribute_group reipl_nvme_attr_group = { + .attrs = reipl_nvme_attrs, + .bin_attrs = reipl_nvme_bin_attrs +}; + +static ssize_t reipl_nvme_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sysfs_emit(page, "%u\n", reipl_nvme_clear); +} + +static ssize_t reipl_nvme_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + if (kstrtobool(buf, &reipl_nvme_clear) < 0) + return -EINVAL; + return len; +} + +static struct kobj_attribute sys_reipl_nvme_clear_attr = + __ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store); + +/* CCW reipl device attributes */ +DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); + +static ssize_t reipl_ccw_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) { - return reipl_generic_loadparm_show(reipl_block_ccw, page); + return sysfs_emit(page, "%u\n", reipl_ccw_clear); } -static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t len) +static ssize_t reipl_ccw_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) { - return reipl_generic_loadparm_store(reipl_block_ccw, buf, len); + if (kstrtobool(buf, &reipl_ccw_clear) < 0) + return -EINVAL; + return len; } -static struct kobj_attribute sys_reipl_ccw_loadparm_attr = - __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show, - reipl_ccw_loadparm_store); +static struct kobj_attribute sys_reipl_ccw_clear_attr = + __ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store); static struct attribute *reipl_ccw_attrs_vm[] = { &sys_reipl_ccw_device_attr.attr, &sys_reipl_ccw_loadparm_attr.attr, &sys_reipl_ccw_vmparm_attr.attr, + &sys_reipl_ccw_clear_attr.attr, NULL, }; static struct attribute *reipl_ccw_attrs_lpar[] = { &sys_reipl_ccw_device_attr.attr, &sys_reipl_ccw_loadparm_attr.attr, + &sys_reipl_ccw_clear_attr.attr, NULL, }; @@ -883,12 +1025,58 @@ static struct attribute_group reipl_ccw_attr_group_lpar = { .attrs = reipl_ccw_attrs_lpar, }; +/* ECKD reipl device attributes */ + +DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr, + reipl_block_eckd->eckd, + IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN, + DIAG308_SCPDATA_SIZE); + +static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { + &sys_reipl_eckd_scp_data_attr, + NULL, +}; + +DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd); +DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", + reipl_block_eckd->eckd.bootprog); + +static struct attribute *reipl_eckd_attrs[] = { + &sys_reipl_eckd_device_attr.attr, + &sys_reipl_eckd_bootprog_attr.attr, + &sys_reipl_eckd_br_chr_attr.attr, + &sys_reipl_eckd_loadparm_attr.attr, + NULL, +}; + +static const struct attribute_group reipl_eckd_attr_group = { + .attrs = reipl_eckd_attrs, + .bin_attrs = reipl_eckd_bin_attrs +}; + +static ssize_t reipl_eckd_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sysfs_emit(page, "%u\n", reipl_eckd_clear); +} + +static ssize_t reipl_eckd_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + if (kstrtobool(buf, &reipl_eckd_clear) < 0) + return -EINVAL; + return len; +} + +static struct kobj_attribute sys_reipl_eckd_clear_attr = + __ATTR(clear, 0644, reipl_eckd_clear_show, reipl_eckd_clear_store); /* NSS reipl device attributes */ static void reipl_get_ascii_nss_name(char *dst, struct ipl_parameter_block *ipb) { - memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE); + memcpy(dst, ipb->ccw.nss_name, NSS_NAME_SIZE); EBCASC(dst, NSS_NAME_SIZE); dst[NSS_NAME_SIZE] = 0; } @@ -899,7 +1087,7 @@ static ssize_t reipl_nss_name_show(struct kobject *kobj, char nss_name[NSS_NAME_SIZE + 1] = {}; reipl_get_ascii_nss_name(nss_name, reipl_block_nss); - return sprintf(page, "%s\n", nss_name); + return sysfs_emit(page, "%s\n", nss_name); } static ssize_t reipl_nss_name_store(struct kobject *kobj, @@ -916,28 +1104,22 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj, if (nss_len > NSS_NAME_SIZE) return -EINVAL; - memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE); + memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE); if (nss_len > 0) { - reipl_block_nss->ipl_info.ccw.vm_flags |= - DIAG308_VM_FLAGS_NSS_VALID; - memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len); - ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); - EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); + reipl_block_nss->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_NSS; + memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len); + ASCEBC(reipl_block_nss->ccw.nss_name, nss_len); + EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len); } else { - reipl_block_nss->ipl_info.ccw.vm_flags &= - ~DIAG308_VM_FLAGS_NSS_VALID; + reipl_block_nss->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_NSS; } return len; } static struct kobj_attribute sys_reipl_nss_name_attr = - __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show, - reipl_nss_name_store); - -static struct kobj_attribute sys_reipl_nss_loadparm_attr = - __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show, - reipl_nss_loadparm_store); + __ATTR(name, 0644, reipl_nss_name_show, + reipl_nss_name_store); static struct attribute *reipl_nss_attrs[] = { &sys_reipl_nss_name_attr.attr, @@ -951,11 +1133,10 @@ static struct attribute_group reipl_nss_attr_group = { .attrs = reipl_nss_attrs, }; -static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +void set_os_info_reipl_block(void) { - reipl_block_actual = reipl_block; - os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, - reipl_block->hdr.len); + os_info_entry_add_data(OS_INFO_REIPL_BLOCK, reipl_block_actual, + reipl_block_actual->hdr.len); } /* reipl type */ @@ -967,38 +1148,22 @@ static int reipl_set_type(enum ipl_type type) switch(type) { case IPL_TYPE_CCW: - if (diag308_set_works) - reipl_method = REIPL_METHOD_CCW_DIAG; - else if (MACHINE_IS_VM) - reipl_method = REIPL_METHOD_CCW_VM; - else - reipl_method = REIPL_METHOD_CCW_CIO; - set_reipl_block_actual(reipl_block_ccw); + reipl_block_actual = reipl_block_ccw; + break; + case IPL_TYPE_ECKD: + reipl_block_actual = reipl_block_eckd; break; case IPL_TYPE_FCP: - if (diag308_set_works) - reipl_method = REIPL_METHOD_FCP_RW_DIAG; - else if (MACHINE_IS_VM) - reipl_method = REIPL_METHOD_FCP_RO_VM; - else - reipl_method = REIPL_METHOD_FCP_RO_DIAG; - set_reipl_block_actual(reipl_block_fcp); + reipl_block_actual = reipl_block_fcp; break; - case IPL_TYPE_FCP_DUMP: - reipl_method = REIPL_METHOD_FCP_DUMP; + case IPL_TYPE_NVME: + reipl_block_actual = reipl_block_nvme; break; case IPL_TYPE_NSS: - if (diag308_set_works) - reipl_method = REIPL_METHOD_NSS_DIAG; - else - reipl_method = REIPL_METHOD_NSS; - set_reipl_block_actual(reipl_block_nss); - break; - case IPL_TYPE_UNKNOWN: - reipl_method = REIPL_METHOD_DEFAULT; + reipl_block_actual = reipl_block_nss; break; default: - BUG(); + break; } reipl_type = type; return 0; @@ -1007,7 +1172,7 @@ static int reipl_set_type(enum ipl_type type) static ssize_t reipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", ipl_type_str(reipl_type)); + return sysfs_emit(page, "%s\n", ipl_type_str(reipl_type)); } static ssize_t reipl_type_store(struct kobject *kobj, @@ -1018,8 +1183,12 @@ static ssize_t reipl_type_store(struct kobject *kobj, if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0) rc = reipl_set_type(IPL_TYPE_CCW); + else if (strncmp(buf, IPL_ECKD_STR, strlen(IPL_ECKD_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_ECKD); else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) rc = reipl_set_type(IPL_TYPE_FCP); + else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_NVME); else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0) rc = reipl_set_type(IPL_TYPE_NSS); return (rc != 0) ? rc : len; @@ -1030,81 +1199,53 @@ static struct kobj_attribute reipl_type_attr = static struct kset *reipl_kset; static struct kset *reipl_fcp_kset; - -static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb, - const enum ipl_method m) -{ - char loadparm[LOADPARM_LEN + 1] = {}; - char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; - char nss_name[NSS_NAME_SIZE + 1] = {}; - size_t pos = 0; - - reipl_get_ascii_loadparm(loadparm, ipb); - reipl_get_ascii_nss_name(nss_name, ipb); - reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); - - switch (m) { - case REIPL_METHOD_CCW_VM: - pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno); - break; - case REIPL_METHOD_NSS: - pos = sprintf(dst, "IPL %s", nss_name); - break; - default: - break; - } - if (strlen(loadparm) > 0) - pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm); - if (strlen(vmparm) > 0) - sprintf(dst + pos, " PARM %s", vmparm); -} +static struct kset *reipl_nvme_kset; +static struct kset *reipl_eckd_kset; static void __reipl_run(void *unused) { - struct ccw_dev_id devid; - static char buf[128]; - - switch (reipl_method) { - case REIPL_METHOD_CCW_CIO: - devid.devno = reipl_block_ccw->ipl_info.ccw.devno; - devid.ssid = 0; - reipl_ccw_dev(&devid); - break; - case REIPL_METHOD_CCW_VM: - get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM); - __cpcmd(buf, NULL, 0, NULL); - break; - case REIPL_METHOD_CCW_DIAG: + switch (reipl_type) { + case IPL_TYPE_CCW: diag308(DIAG308_SET, reipl_block_ccw); - diag308(DIAG308_IPL, NULL); + if (reipl_ccw_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); break; - case REIPL_METHOD_FCP_RW_DIAG: - diag308(DIAG308_SET, reipl_block_fcp); - diag308(DIAG308_IPL, NULL); + case IPL_TYPE_ECKD: + diag308(DIAG308_SET, reipl_block_eckd); + if (reipl_eckd_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL, NULL); break; - case REIPL_METHOD_FCP_RO_DIAG: - diag308(DIAG308_IPL, NULL); + case IPL_TYPE_FCP: + diag308(DIAG308_SET, reipl_block_fcp); + if (reipl_fcp_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL, NULL); break; - case REIPL_METHOD_FCP_RO_VM: - __cpcmd("IPL", NULL, 0, NULL); + case IPL_TYPE_NVME: + diag308(DIAG308_SET, reipl_block_nvme); + if (reipl_nvme_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL, NULL); break; - case REIPL_METHOD_NSS_DIAG: + case IPL_TYPE_NSS: diag308(DIAG308_SET, reipl_block_nss); - diag308(DIAG308_IPL, NULL); - break; - case REIPL_METHOD_NSS: - get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS); - __cpcmd(buf, NULL, 0, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; - case REIPL_METHOD_DEFAULT: - if (MACHINE_IS_VM) - __cpcmd("IPL", NULL, 0, NULL); - diag308(DIAG308_IPL, NULL); + case IPL_TYPE_UNKNOWN: + diag308(DIAG308_LOAD_CLEAR, NULL); break; - case REIPL_METHOD_FCP_DUMP: + case IPL_TYPE_FCP_DUMP: + case IPL_TYPE_NVME_DUMP: + case IPL_TYPE_ECKD_DUMP: break; } - disabled_wait((unsigned long) __builtin_return_address(0)); + disabled_wait(); } static void reipl_run(struct shutdown_trigger *trigger) @@ -1114,10 +1255,10 @@ static void reipl_run(struct shutdown_trigger *trigger) static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) { - ipb->hdr.len = IPL_PARM_BLK_CCW_LEN; + ipb->hdr.len = IPL_BP_CCW_LEN; ipb->hdr.version = IPL_PARM_BLOCK_VERSION; - ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; - ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW; + ipb->pb0_hdr.len = IPL_BP0_CCW_LEN; + ipb->pb0_hdr.pbt = IPL_PBT_CCW; } static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) @@ -1125,22 +1266,20 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) /* LOADPARM */ /* check if read scp info worked and set loadparm */ if (sclp_ipl_info.is_valid) - memcpy(ipb->ipl_info.ccw.load_parm, - &sclp_ipl_info.loadparm, LOADPARM_LEN); + memcpy(ipb->ccw.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); else /* read scp info failed: set empty loadparm (EBCDIC blanks) */ - memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN); - ipb->hdr.flags = DIAG308_FLAGS_LP_VALID; + memset(ipb->ccw.loadparm, 0x40, LOADPARM_LEN); + ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM; /* VM PARM */ - if (MACHINE_IS_VM && diag308_set_works && - (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) { - - ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; - ipb->ipl_info.ccw.vm_parm_len = - ipl_block.ipl_info.ccw.vm_parm_len; - memcpy(ipb->ipl_info.ccw.vm_parm, - ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE); + if (machine_is_vm() && ipl_block_valid && + (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) { + + ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; + ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len; + memcpy(ipb->ccw.vm_parm, + ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE); } } @@ -1148,33 +1287,18 @@ static int __init reipl_nss_init(void) { int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); if (!reipl_block_nss) return -ENOMEM; - if (!diag308_set_works) - sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO; - rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group); if (rc) return rc; reipl_block_ccw_init(reipl_block_nss); - if (ipl_info.type == IPL_TYPE_NSS) { - memset(reipl_block_nss->ipl_info.ccw.nss_name, - ' ', NSS_NAME_SIZE); - memcpy(reipl_block_nss->ipl_info.ccw.nss_name, - kernel_nss_name, strlen(kernel_nss_name)); - ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE); - reipl_block_nss->ipl_info.ccw.vm_flags |= - DIAG308_VM_FLAGS_NSS_VALID; - - reipl_block_ccw_fill_parms(reipl_block_nss); - } - reipl_capabilities |= IPL_TYPE_NSS; return 0; } @@ -1187,23 +1311,16 @@ static int __init reipl_ccw_init(void) if (!reipl_block_ccw) return -ENOMEM; - if (MACHINE_IS_VM) { - if (!diag308_set_works) - sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO; - rc = sysfs_create_group(&reipl_kset->kobj, - &reipl_ccw_attr_group_vm); - } else { - if(!diag308_set_works) - sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO; - rc = sysfs_create_group(&reipl_kset->kobj, - &reipl_ccw_attr_group_lpar); - } + rc = sysfs_create_group(&reipl_kset->kobj, + machine_is_vm() ? &reipl_ccw_attr_group_vm + : &reipl_ccw_attr_group_lpar); if (rc) return rc; reipl_block_ccw_init(reipl_block_ccw); if (ipl_info.type == IPL_TYPE_CCW) { - reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; + reipl_block_ccw->ccw.ssid = ipl_block.ccw.ssid; + reipl_block_ccw->ccw.devno = ipl_block.ccw.devno; reipl_block_ccw_fill_parms(reipl_block_ccw); } @@ -1215,14 +1332,6 @@ static int __init reipl_fcp_init(void) { int rc; - if (!diag308_set_works) { - if (ipl_info.type == IPL_TYPE_FCP) { - make_attrs_ro(reipl_fcp_attrs); - sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO; - } else - return 0; - } - reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); if (!reipl_block_fcp) return -ENOMEM; @@ -1236,32 +1345,151 @@ static int __init reipl_fcp_init(void) } rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); - if (rc) { - kset_unregister(reipl_fcp_kset); - free_page((unsigned long) reipl_block_fcp); - return rc; - } + if (rc) + goto out1; - rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj, - &sys_reipl_fcp_scp_data_attr); - if (rc) { - sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); - kset_unregister(reipl_fcp_kset); - free_page((unsigned long) reipl_block_fcp); - return rc; + if (test_facility(141)) { + rc = sysfs_create_file(&reipl_fcp_kset->kobj, + &sys_reipl_fcp_clear_attr.attr); + if (rc) + goto out2; + } else { + reipl_fcp_clear = true; } - if (ipl_info.type == IPL_TYPE_FCP) - memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); - else { - reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + if (ipl_info.type == IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block)); + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * is invalid in the SCSI IPL parameter block, so take it + * always from sclp_ipl_info. + */ + memcpy(reipl_block_fcp->fcp.loadparm, sclp_ipl_info.loadparm, + LOADPARM_LEN); + } else { + reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN; reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; - reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; - reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; - reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL; + reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN; + reipl_block_fcp->fcp.pbt = IPL_PBT_FCP; + reipl_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_IPL; } reipl_capabilities |= IPL_TYPE_FCP; return 0; + +out2: + sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); +out1: + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; +} + +static int __init reipl_nvme_init(void) +{ + int rc; + + reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_nvme) + return -ENOMEM; + + /* sysfs: create kset for mixing attr group and bin attrs */ + reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL, + &reipl_kset->kobj); + if (!reipl_nvme_kset) { + free_page((unsigned long) reipl_block_nvme); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); + if (rc) + goto out1; + + if (test_facility(141)) { + rc = sysfs_create_file(&reipl_nvme_kset->kobj, + &sys_reipl_nvme_clear_attr.attr); + if (rc) + goto out2; + } else { + reipl_nvme_clear = true; + } + + if (ipl_info.type == IPL_TYPE_NVME) { + memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block)); + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * is invalid in the IPL parameter block, so take it + * always from sclp_ipl_info. + */ + memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm, + LOADPARM_LEN); + } else { + reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN; + reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN; + reipl_block_nvme->nvme.pbt = IPL_PBT_NVME; + reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_NVME; + return 0; + +out2: + sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); +out1: + kset_unregister(reipl_nvme_kset); + free_page((unsigned long) reipl_block_nvme); + return rc; +} + +static int __init reipl_eckd_init(void) +{ + int rc; + + if (!sclp.has_sipl_eckd) + return 0; + + reipl_block_eckd = (void *)get_zeroed_page(GFP_KERNEL); + if (!reipl_block_eckd) + return -ENOMEM; + + /* sysfs: create kset for mixing attr group and bin attrs */ + reipl_eckd_kset = kset_create_and_add(IPL_ECKD_STR, NULL, + &reipl_kset->kobj); + if (!reipl_eckd_kset) { + free_page((unsigned long)reipl_block_eckd); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group); + if (rc) + goto out1; + + if (test_facility(141)) { + rc = sysfs_create_file(&reipl_eckd_kset->kobj, + &sys_reipl_eckd_clear_attr.attr); + if (rc) + goto out2; + } else { + reipl_eckd_clear = true; + } + + if (ipl_info.type == IPL_TYPE_ECKD) { + memcpy(reipl_block_eckd, &ipl_block, sizeof(ipl_block)); + } else { + reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN; + reipl_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN; + reipl_block_eckd->eckd.pbt = IPL_PBT_ECKD; + reipl_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_ECKD; + return 0; + +out2: + sysfs_remove_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group); +out1: + kset_unregister(reipl_eckd_kset); + free_page((unsigned long)reipl_block_eckd); + return rc; } static int __init reipl_type_init(void) @@ -1276,12 +1504,18 @@ static int __init reipl_type_init(void) /* * If we have an OS info reipl block, this will be used */ - if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { + if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) { memcpy(reipl_block_fcp, reipl_block, size); reipl_type = IPL_TYPE_FCP; - } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { + } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) { + memcpy(reipl_block_nvme, reipl_block, size); + reipl_type = IPL_TYPE_NVME; + } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) { memcpy(reipl_block_ccw, reipl_block, size); reipl_type = IPL_TYPE_CCW; + } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_ECKD) { + memcpy(reipl_block_eckd, reipl_block, size); + reipl_type = IPL_TYPE_ECKD; } out: return reipl_set_type(reipl_type); @@ -1302,9 +1536,15 @@ static int __init reipl_init(void) rc = reipl_ccw_init(); if (rc) return rc; + rc = reipl_eckd_init(); + if (rc) + return rc; rc = reipl_fcp_init(); if (rc) return rc; + rc = reipl_nvme_init(); + if (rc) + return rc; rc = reipl_nss_init(); if (rc) return rc; @@ -1324,15 +1564,20 @@ static struct shutdown_action __refdata reipl_action = { /* FCP dump device attributes */ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", - dump_block_fcp->ipl_info.fcp.wwpn); + dump_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", - dump_block_fcp->ipl_info.fcp.lun); + dump_block_fcp->fcp.lun); DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", - dump_block_fcp->ipl_info.fcp.bootprog); + dump_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n", - dump_block_fcp->ipl_info.fcp.br_lba); + dump_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", - dump_block_fcp->ipl_info.fcp.devno); + dump_block_fcp->fcp.devno); + +DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr, + dump_block_fcp->fcp, + IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN, + DIAG308_SCPDATA_SIZE); static struct attribute *dump_fcp_attrs[] = { &sys_dump_fcp_device_attr.attr, @@ -1343,15 +1588,87 @@ static struct attribute *dump_fcp_attrs[] = { NULL, }; -static struct attribute_group dump_fcp_attr_group = { +static const struct bin_attribute *const dump_fcp_bin_attrs[] = { + &sys_dump_fcp_scp_data_attr, + NULL, +}; + +static const struct attribute_group dump_fcp_attr_group = { .name = IPL_FCP_STR, .attrs = dump_fcp_attrs, + .bin_attrs = dump_fcp_bin_attrs, }; -/* CCW dump device attributes */ +/* NVME dump device attributes */ +DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n", + dump_block_nvme->nvme.fid); +DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n", + dump_block_nvme->nvme.nsid); +DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", + dump_block_nvme->nvme.bootprog); +DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n", + dump_block_nvme->nvme.br_lba); + +DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr, + dump_block_nvme->nvme, + IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN, + DIAG308_SCPDATA_SIZE); + +static struct attribute *dump_nvme_attrs[] = { + &sys_dump_nvme_fid_attr.attr, + &sys_dump_nvme_nsid_attr.attr, + &sys_dump_nvme_bootprog_attr.attr, + &sys_dump_nvme_br_lba_attr.attr, + NULL, +}; + +static const struct bin_attribute *const dump_nvme_bin_attrs[] = { + &sys_dump_nvme_scp_data_attr, + NULL, +}; + +static const struct attribute_group dump_nvme_attr_group = { + .name = IPL_NVME_STR, + .attrs = dump_nvme_attrs, + .bin_attrs = dump_nvme_bin_attrs, +}; + +/* ECKD dump device attributes */ +DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd); +DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", + dump_block_eckd->eckd.bootprog); + +IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd); +IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd); + +static struct kobj_attribute sys_dump_eckd_br_chr_attr = + __ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store); + +DEFINE_IPL_ATTR_SCP_DATA_RW(dump_eckd, dump_block_eckd->hdr, + dump_block_eckd->eckd, + IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN, + DIAG308_SCPDATA_SIZE); + +static struct attribute *dump_eckd_attrs[] = { + &sys_dump_eckd_device_attr.attr, + &sys_dump_eckd_bootprog_attr.attr, + &sys_dump_eckd_br_chr_attr.attr, + NULL, +}; + +static const struct bin_attribute *const dump_eckd_bin_attrs[] = { + &sys_dump_eckd_scp_data_attr, + NULL, +}; + +static const struct attribute_group dump_eckd_attr_group = { + .name = IPL_ECKD_STR, + .attrs = dump_eckd_attrs, + .bin_attrs = dump_eckd_bin_attrs, +}; -DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", - dump_block_ccw->ipl_info.ccw.devno); +/* CCW dump device attributes */ +DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw); static struct attribute *dump_ccw_attrs[] = { &sys_dump_ccw_device_attr.attr, @@ -1369,21 +1686,6 @@ static int dump_set_type(enum dump_type type) { if (!(dump_capabilities & type)) return -EINVAL; - switch (type) { - case DUMP_TYPE_CCW: - if (diag308_set_works) - dump_method = DUMP_METHOD_CCW_DIAG; - else if (MACHINE_IS_VM) - dump_method = DUMP_METHOD_CCW_VM; - else - dump_method = DUMP_METHOD_CCW_CIO; - break; - case DUMP_TYPE_FCP: - dump_method = DUMP_METHOD_FCP_DIAG; - break; - default: - dump_method = DUMP_METHOD_NONE; - } dump_type = type; return 0; } @@ -1391,7 +1693,7 @@ static int dump_set_type(enum dump_type type) static ssize_t dump_type_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", dump_type_str(dump_type)); + return sysfs_emit(page, "%s\n", dump_type_str(dump_type)); } static ssize_t dump_type_store(struct kobject *kobj, @@ -1404,49 +1706,63 @@ static ssize_t dump_type_store(struct kobject *kobj, rc = dump_set_type(DUMP_TYPE_NONE); else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0) rc = dump_set_type(DUMP_TYPE_CCW); + else if (strncmp(buf, DUMP_ECKD_STR, strlen(DUMP_ECKD_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_ECKD); else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0) rc = dump_set_type(DUMP_TYPE_FCP); + else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_NVME); return (rc != 0) ? rc : len; } static struct kobj_attribute dump_type_attr = __ATTR(dump_type, 0644, dump_type_show, dump_type_store); +static ssize_t dump_area_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sysfs_emit(page, "%lu\n", sclp.hsa_size); +} + +static struct kobj_attribute dump_area_size_attr = __ATTR_RO(dump_area_size); + +static struct attribute *dump_attrs[] = { + &dump_type_attr.attr, + &dump_area_size_attr.attr, + NULL, +}; + +static struct attribute_group dump_attr_group = { + .attrs = dump_attrs, +}; + static struct kset *dump_kset; static void diag308_dump(void *dump_block) { diag308(DIAG308_SET, dump_block); while (1) { - if (diag308(DIAG308_DUMP, NULL) != 0x302) + if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302) break; - udelay_simple(USEC_PER_SEC); + udelay(USEC_PER_SEC); } } static void __dump_run(void *unused) { - struct ccw_dev_id devid; - static char buf[100]; - - switch (dump_method) { - case DUMP_METHOD_CCW_CIO: - devid.devno = dump_block_ccw->ipl_info.ccw.devno; - devid.ssid = 0; - reipl_ccw_dev(&devid); - break; - case DUMP_METHOD_CCW_VM: - sprintf(buf, "STORE STATUS"); - __cpcmd(buf, NULL, 0, NULL); - sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); - __cpcmd(buf, NULL, 0, NULL); - break; - case DUMP_METHOD_CCW_DIAG: + switch (dump_type) { + case DUMP_TYPE_CCW: diag308_dump(dump_block_ccw); break; - case DUMP_METHOD_FCP_DIAG: + case DUMP_TYPE_ECKD: + diag308_dump(dump_block_eckd); + break; + case DUMP_TYPE_FCP: diag308_dump(dump_block_fcp); break; + case DUMP_TYPE_NVME: + diag308_dump(dump_block_nvme); + break; default: break; } @@ -1454,7 +1770,7 @@ static void __dump_run(void *unused) static void dump_run(struct shutdown_trigger *trigger) { - if (dump_method == DUMP_METHOD_NONE) + if (dump_type == DUMP_TYPE_NONE) return; smp_send_stop(); smp_call_ipl_cpu(__dump_run, NULL); @@ -1472,10 +1788,10 @@ static int __init dump_ccw_init(void) free_page((unsigned long)dump_block_ccw); return rc; } - dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN; + dump_block_ccw->hdr.len = IPL_BP_CCW_LEN; dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; - dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; - dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; + dump_block_ccw->ccw.len = IPL_BP0_CCW_LEN; + dump_block_ccw->ccw.pbt = IPL_PBT_CCW; dump_capabilities |= DUMP_TYPE_CCW; return 0; } @@ -1486,8 +1802,6 @@ static int __init dump_fcp_init(void) if (!sclp_ipl_info.has_dump) return 0; /* LDIPL DUMP is not installed */ - if (!diag308_set_works) - return 0; dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); if (!dump_block_fcp) return -ENOMEM; @@ -1496,15 +1810,61 @@ static int __init dump_fcp_init(void) free_page((unsigned long)dump_block_fcp); return rc; } - dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + dump_block_fcp->hdr.len = IPL_BP_FCP_LEN; dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; - dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; - dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; - dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP; + dump_block_fcp->fcp.len = IPL_BP0_FCP_LEN; + dump_block_fcp->fcp.pbt = IPL_PBT_FCP; + dump_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_DUMP; dump_capabilities |= DUMP_TYPE_FCP; return 0; } +static int __init dump_nvme_init(void) +{ + int rc; + + if (!sclp_ipl_info.has_dump) + return 0; /* LDIPL DUMP is not installed */ + dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_nvme) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group); + if (rc) { + free_page((unsigned long)dump_block_nvme); + return rc; + } + dump_block_nvme->hdr.len = IPL_BP_NVME_LEN; + dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_nvme->nvme.len = IPL_BP0_NVME_LEN; + dump_block_nvme->nvme.pbt = IPL_PBT_NVME; + dump_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_DUMP; + dump_capabilities |= DUMP_TYPE_NVME; + return 0; +} + +static int __init dump_eckd_init(void) +{ + int rc; + + if (!sclp_ipl_info.has_dump || !sclp.has_sipl_eckd) + return 0; /* LDIPL DUMP is not installed */ + dump_block_eckd = (void *)get_zeroed_page(GFP_KERNEL); + if (!dump_block_eckd) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_eckd_attr_group); + if (rc) { + free_page((unsigned long)dump_block_eckd); + return rc; + } + dump_block_eckd->hdr.len = IPL_BP_ECKD_LEN; + dump_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_eckd->eckd.len = IPL_BP0_ECKD_LEN; + dump_block_eckd->eckd.pbt = IPL_PBT_ECKD; + dump_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_DUMP; + dump_capabilities |= DUMP_TYPE_ECKD; + return 0; +} + static int __init dump_init(void) { int rc; @@ -1512,7 +1872,7 @@ static int __init dump_init(void) dump_kset = kset_create_and_add("dump", NULL, firmware_kobj); if (!dump_kset) return -ENOMEM; - rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr); + rc = sysfs_create_group(&dump_kset->kobj, &dump_attr_group); if (rc) { kset_unregister(dump_kset); return rc; @@ -1520,9 +1880,15 @@ static int __init dump_init(void) rc = dump_ccw_init(); if (rc) return rc; + rc = dump_eckd_init(); + if (rc) + return rc; rc = dump_fcp_init(); if (rc) return rc; + rc = dump_nvme_init(); + if (rc) + return rc; dump_set_type(DUMP_TYPE_NONE); return 0; } @@ -1535,38 +1901,47 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { - unsigned long ipib = (unsigned long) reipl_block_actual; + struct lowcore *abs_lc; unsigned int csum; - csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); - mem_assign_absolute(S390_lowcore.ipib, ipib); - mem_assign_absolute(S390_lowcore.ipib_checksum, csum); + /* + * Set REIPL_CLEAR flag in os_info flags entry indicating + * 'clear' sysfs attribute has been set on the panicked system + * for specified reipl type. + * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN. + */ + if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) || + (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) || + (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) || + (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) || + reipl_type == IPL_TYPE_NSS || + reipl_type == IPL_TYPE_UNKNOWN) + os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR; + os_info_entry_add_data(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags)); + csum = (__force unsigned int)cksm(reipl_block_actual, reipl_block_actual->hdr.len, 0); + abs_lc = get_abs_lowcore(); + abs_lc->ipib = __pa(reipl_block_actual); + abs_lc->ipib_checksum = csum; + put_abs_lowcore(abs_lc); dump_run(trigger); } -static int __init dump_reipl_init(void) -{ - if (!diag308_set_works) - return -EOPNOTSUPP; - else - return 0; -} - static struct shutdown_action __refdata dump_reipl_action = { .name = SHUTDOWN_ACTION_DUMP_REIPL_STR, .fn = dump_reipl_run, - .init = dump_reipl_init, }; /* * vmcmd shutdown action: Trigger vm command on shutdown. */ -static char vmcmd_on_reboot[128]; -static char vmcmd_on_panic[128]; -static char vmcmd_on_halt[128]; -static char vmcmd_on_poff[128]; -static char vmcmd_on_restart[128]; +#define VMCMD_MAX_SIZE 240 + +static char vmcmd_on_reboot[VMCMD_MAX_SIZE + 1]; +static char vmcmd_on_panic[VMCMD_MAX_SIZE + 1]; +static char vmcmd_on_halt[VMCMD_MAX_SIZE + 1]; +static char vmcmd_on_poff[VMCMD_MAX_SIZE + 1]; +static char vmcmd_on_restart[VMCMD_MAX_SIZE + 1]; DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); @@ -1613,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger) static int vmcmd_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) @@ -1632,7 +2007,7 @@ static void stop_run(struct shutdown_trigger *trigger) { if (strcmp(trigger->name, ON_PANIC_STR) == 0 || strcmp(trigger->name, ON_RESTART_STR) == 0) - disabled_wait((unsigned long) __builtin_return_address(0)); + disabled_wait(); smp_stop_cpu(); } @@ -1678,7 +2053,7 @@ static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR, static ssize_t on_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_reboot_trigger.action->name); + return sysfs_emit(page, "%s\n", on_reboot_trigger.action->name); } static ssize_t on_reboot_store(struct kobject *kobj, @@ -1687,9 +2062,7 @@ static ssize_t on_reboot_store(struct kobject *kobj, { return set_trigger(buf, &on_reboot_trigger, len); } - -static struct kobj_attribute on_reboot_attr = - __ATTR(on_reboot, 0644, on_reboot_show, on_reboot_store); +static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot); static void do_machine_restart(char *__unused) { @@ -1706,7 +2079,7 @@ static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action}; static ssize_t on_panic_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_panic_trigger.action->name); + return sysfs_emit(page, "%s\n", on_panic_trigger.action->name); } static ssize_t on_panic_store(struct kobject *kobj, @@ -1715,9 +2088,7 @@ static ssize_t on_panic_store(struct kobject *kobj, { return set_trigger(buf, &on_panic_trigger, len); } - -static struct kobj_attribute on_panic_attr = - __ATTR(on_panic, 0644, on_panic_show, on_panic_store); +static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic); static void do_panic(void) { @@ -1734,7 +2105,7 @@ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, static ssize_t on_restart_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_restart_trigger.action->name); + return sysfs_emit(page, "%s\n", on_restart_trigger.action->name); } static ssize_t on_restart_store(struct kobject *kobj, @@ -1743,13 +2114,10 @@ static ssize_t on_restart_store(struct kobject *kobj, { return set_trigger(buf, &on_restart_trigger, len); } - -static struct kobj_attribute on_restart_attr = - __ATTR(on_restart, 0644, on_restart_show, on_restart_store); +static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart); static void __do_restart(void *ignore) { - __arch_local_irq_stosm(0x04); /* enable DAT */ smp_send_stop(); #ifdef CONFIG_CRASH_DUMP crash_kexec(NULL); @@ -1758,12 +2126,12 @@ static void __do_restart(void *ignore) stop_run(&on_restart_trigger); } -void do_restart(void) +void do_restart(void *arg) { tracing_off(); debug_locks_off(); lgr_info_log(); - smp_call_online_cpu(__do_restart, NULL); + smp_call_ipl_cpu(__do_restart, arg); } /* on halt */ @@ -1773,7 +2141,7 @@ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; static ssize_t on_halt_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_halt_trigger.action->name); + return sysfs_emit(page, "%s\n", on_halt_trigger.action->name); } static ssize_t on_halt_store(struct kobject *kobj, @@ -1782,10 +2150,7 @@ static ssize_t on_halt_store(struct kobject *kobj, { return set_trigger(buf, &on_halt_trigger, len); } - -static struct kobj_attribute on_halt_attr = - __ATTR(on_halt, 0644, on_halt_show, on_halt_store); - +static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt); static void do_machine_halt(void) { @@ -1802,7 +2167,7 @@ static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action}; static ssize_t on_poff_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_poff_trigger.action->name); + return sysfs_emit(page, "%s\n", on_poff_trigger.action->name); } static ssize_t on_poff_store(struct kobject *kobj, @@ -1811,10 +2176,7 @@ static ssize_t on_poff_store(struct kobject *kobj, { return set_trigger(buf, &on_poff_trigger, len); } - -static struct kobj_attribute on_poff_attr = - __ATTR(on_poff, 0644, on_poff_show, on_poff_store); - +static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff); static void do_machine_power_off(void) { @@ -1824,26 +2186,27 @@ static void do_machine_power_off(void) } void (*_machine_power_off)(void) = do_machine_power_off; +static struct attribute *shutdown_action_attrs[] = { + &on_restart_attr.attr, + &on_reboot_attr.attr, + &on_panic_attr.attr, + &on_halt_attr.attr, + &on_poff_attr.attr, + NULL, +}; + +static struct attribute_group shutdown_action_attr_group = { + .attrs = shutdown_action_attrs, +}; + static void __init shutdown_triggers_init(void) { shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL, firmware_kobj); if (!shutdown_actions_kset) goto fail; - if (sysfs_create_file(&shutdown_actions_kset->kobj, - &on_reboot_attr.attr)) - goto fail; - if (sysfs_create_file(&shutdown_actions_kset->kobj, - &on_panic_attr.attr)) - goto fail; - if (sysfs_create_file(&shutdown_actions_kset->kobj, - &on_halt_attr.attr)) - goto fail; - if (sysfs_create_file(&shutdown_actions_kset->kobj, - &on_poff_attr.attr)) - goto fail; - if (sysfs_create_file(&shutdown_actions_kset->kobj, - &on_restart_attr.attr)) + if (sysfs_create_group(&shutdown_actions_kset->kobj, + &shutdown_action_attr_group)) goto fail; return; fail: @@ -1864,7 +2227,21 @@ static void __init shutdown_actions_init(void) static int __init s390_ipl_init(void) { - sclp_get_ipl_info(&sclp_ipl_info); + char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}; + + sclp_early_get_ipl_info(&sclp_ipl_info); + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * returned by read SCP info is invalid (contains EBCDIC blanks) + * when the system has been booted via diag308. In that case we use + * the value from diag308, if available. + * + * There are also systems where diag308 store does not work in + * case the system is booted from HMC. Fortunately in this case + * READ SCP info provides the correct value. + */ + if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid) + memcpy(sclp_ipl_info.loadparm, ipl_block.ccw.loadparm, LOADPARM_LEN); shutdown_actions_init(); shutdown_triggers_init(); return 0; @@ -1872,26 +2249,28 @@ static int __init s390_ipl_init(void) __initcall(s390_ipl_init); -static void __init strncpy_skip_quote(char *dst, char *src, int n) +static void __init strscpy_skip_quote(char *dst, char *src, int n) { int sx, dx; - dx = 0; - for (sx = 0; src[sx] != 0; sx++) { + if (!n) + return; + for (sx = 0, dx = 0; src[sx]; sx++) { if (src[sx] == '"') continue; - dst[dx++] = src[sx]; - if (dx >= n) + dst[dx] = src[sx]; + if (dx + 1 == n) break; + dx++; } + dst[dx] = '\0'; } static int __init vmcmd_on_reboot_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_reboot, str, 127); - vmcmd_on_reboot[127] = 0; + strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot)); on_reboot_trigger.action = &vmcmd_action; return 1; } @@ -1899,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup); static int __init vmcmd_on_panic_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_panic, str, 127); - vmcmd_on_panic[127] = 0; + strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic)); on_panic_trigger.action = &vmcmd_action; return 1; } @@ -1910,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup); static int __init vmcmd_on_halt_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_halt, str, 127); - vmcmd_on_halt[127] = 0; + strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt)); on_halt_trigger.action = &vmcmd_action; return 1; } @@ -1921,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup); static int __init vmcmd_on_poff_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_poff, str, 127); - vmcmd_on_poff[127] = 0; + strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff)); on_poff_trigger.action = &vmcmd_action; return 1; } @@ -1944,24 +2320,32 @@ static struct notifier_block on_panic_nb = { void __init setup_ipl(void) { + BUILD_BUG_ON(sizeof(struct ipl_parameter_block) != PAGE_SIZE); + ipl_info.type = get_ipl_type(); switch (ipl_info.type) { case IPL_TYPE_CCW: - ipl_info.data.ccw.dev_id.devno = ipl_devno; - ipl_info.data.ccw.dev_id.ssid = 0; + ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid; + ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno; + break; + case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: + ipl_info.data.eckd.dev_id.ssid = ipl_block.eckd.ssid; + ipl_info.data.eckd.dev_id.devno = ipl_block.eckd.devno; break; case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: - ipl_info.data.fcp.dev_id.devno = - IPL_PARMBLOCK_START->ipl_info.fcp.devno; ipl_info.data.fcp.dev_id.ssid = 0; - ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn; - ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun; + ipl_info.data.fcp.dev_id.devno = ipl_block.fcp.devno; + ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn; + ipl_info.data.fcp.lun = ipl_block.fcp.lun; break; - case IPL_TYPE_NSS: - strncpy(ipl_info.data.nss.name, kernel_nss_name, - sizeof(ipl_info.data.nss.name)); + case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: + ipl_info.data.nvme.fid = ipl_block.nvme.fid; + ipl_info.data.nvme.nsid = ipl_block.nvme.nsid; break; + case IPL_TYPE_NSS: case IPL_TYPE_UNKNOWN: /* We have no info to copy */ break; @@ -1969,101 +2353,147 @@ void __init setup_ipl(void) atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb); } -void __init ipl_update_parameters(void) +void s390_reset_system(void) { - int rc; + /* Disable prefixing */ + set_prefix(0); - rc = diag308(DIAG308_STORE, &ipl_block); - if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG)) - diag308_set_works = 1; + /* Disable lowcore protection */ + local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT); + diag_amode31_ops.diag308_reset(); } -void __init ipl_save_parameters(void) +#ifdef CONFIG_KEXEC_FILE + +int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf, + unsigned char flags, unsigned short cert) { - struct cio_iplinfo iplinfo; - void *src, *dst; + struct ipl_report_component *comp; - if (cio_get_iplinfo(&iplinfo)) - return; + comp = vzalloc(sizeof(*comp)); + if (!comp) + return -ENOMEM; + list_add_tail(&comp->list, &report->components); - ipl_devno = iplinfo.devno; - ipl_flags |= IPL_DEVNO_VALID; - if (!iplinfo.is_qdio) - return; - ipl_flags |= IPL_PARMBLOCK_VALID; - src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr; - dst = (void *)IPL_PARMBLOCK_ORIGIN; - memmove(dst, src, PAGE_SIZE); - S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN; -} + comp->entry.addr = kbuf->mem; + comp->entry.len = kbuf->memsz; + comp->entry.flags = flags; + comp->entry.certificate_index = cert; -static LIST_HEAD(rcall); -static DEFINE_MUTEX(rcall_mutex); + report->size += sizeof(comp->entry); -void register_reset_call(struct reset_call *reset) -{ - mutex_lock(&rcall_mutex); - list_add(&reset->list, &rcall); - mutex_unlock(&rcall_mutex); + return 0; } -EXPORT_SYMBOL_GPL(register_reset_call); -void unregister_reset_call(struct reset_call *reset) +int ipl_report_add_certificate(struct ipl_report *report, void *key, + unsigned long addr, unsigned long len) { - mutex_lock(&rcall_mutex); - list_del(&reset->list); - mutex_unlock(&rcall_mutex); + struct ipl_report_certificate *cert; + + cert = vzalloc(sizeof(*cert)); + if (!cert) + return -ENOMEM; + list_add_tail(&cert->list, &report->certificates); + + cert->entry.addr = addr; + cert->entry.len = len; + cert->key = key; + + report->size += sizeof(cert->entry); + report->size += cert->entry.len; + + return 0; } -EXPORT_SYMBOL_GPL(unregister_reset_call); -static void do_reset_calls(void) +struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib) { - struct reset_call *reset; + struct ipl_report *report; -#ifdef CONFIG_64BIT - if (diag308_set_works) { - diag308_reset(); - return; - } -#endif - list_for_each_entry(reset, &rcall, list) - reset->fn(); -} + report = vzalloc(sizeof(*report)); + if (!report) + return ERR_PTR(-ENOMEM); -u32 dump_prefix_page; + report->ipib = ipib; + INIT_LIST_HEAD(&report->components); + INIT_LIST_HEAD(&report->certificates); -void s390_reset_system(void (*func)(void *), void *data) -{ - struct _lowcore *lc; + report->size = ALIGN(ipib->hdr.len, 8); + report->size += sizeof(struct ipl_rl_hdr); + report->size += sizeof(struct ipl_rb_components); + report->size += sizeof(struct ipl_rb_certificates); - lc = (struct _lowcore *)(unsigned long) store_prefix(); + return report; +} - /* Stack for interrupt/machine check handler */ - lc->panic_stack = S390_lowcore.panic_stack; +void *ipl_report_finish(struct ipl_report *report) +{ + struct ipl_report_certificate *cert; + struct ipl_report_component *comp; + struct ipl_rb_certificates *certs; + struct ipl_parameter_block *ipib; + struct ipl_rb_components *comps; + struct ipl_rl_hdr *rl_hdr; + void *buf, *ptr; - /* Save prefix page address for dump case */ - dump_prefix_page = (u32)(unsigned long) lc; + buf = vzalloc(report->size); + if (!buf) + goto out; + ptr = buf; + + memcpy(ptr, report->ipib, report->ipib->hdr.len); + ipib = ptr; + if (ipl_secure_flag) + ipib->hdr.flags |= IPL_PL_FLAG_SIPL; + ipib->hdr.flags |= IPL_PL_FLAG_IPLSR; + ptr += report->ipib->hdr.len; + ptr = PTR_ALIGN(ptr, 8); + + rl_hdr = ptr; + ptr += sizeof(*rl_hdr); + + comps = ptr; + comps->rbt = IPL_RBT_COMPONENTS; + ptr += sizeof(*comps); + list_for_each_entry(comp, &report->components, list) { + memcpy(ptr, &comp->entry, sizeof(comp->entry)); + ptr += sizeof(comp->entry); + } + comps->len = ptr - (void *)comps; + + certs = ptr; + certs->rbt = IPL_RBT_CERTIFICATES; + ptr += sizeof(*certs); + list_for_each_entry(cert, &report->certificates, list) { + memcpy(ptr, &cert->entry, sizeof(cert->entry)); + ptr += sizeof(cert->entry); + } + certs->len = ptr - (void *)certs; + rl_hdr->len = ptr - (void *)rl_hdr; - /* Disable prefixing */ - set_prefix(0); + list_for_each_entry(cert, &report->certificates, list) { + memcpy(ptr, cert->key, cert->entry.len); + ptr += cert->entry.len; + } - /* Disable lowcore protection */ - __ctl_clear_bit(0,28); + BUG_ON(ptr > buf + report->size); +out: + return buf; +} + +int ipl_report_free(struct ipl_report *report) +{ + struct ipl_report_component *comp, *ncomp; + struct ipl_report_certificate *cert, *ncert; - /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; - S390_lowcore.mcck_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; + list_for_each_entry_safe(comp, ncomp, &report->components, list) + vfree(comp); - /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; - S390_lowcore.program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + list_for_each_entry_safe(cert, ncert, &report->certificates, list) + vfree(cert); - /* Store status at absolute zero */ - store_status(); + vfree(report); - do_reset_calls(); - if (func) - func(data); + return 0; } + +#endif diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c new file mode 100644 index 000000000000..b5245fadcfb0 --- /dev/null +++ b/arch/s390/kernel/ipl_vmparm.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/minmax.h> +#include <linux/string.h> +#include <asm/ebcdic.h> +#include <asm/ipl.h> + +/* VM IPL PARM routines */ +size_t ipl_block_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + int i; + size_t len; + char has_lowercase = 0; + + len = 0; + if ((ipb->ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP) && + (ipb->ccw.vm_parm_len > 0)) { + + len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len); + memcpy(dest, ipb->ccw.vm_parm, len); + /* If at least one character is lowercase, we assume mixed + * case; otherwise we convert everything to lowercase. + */ + for (i = 0; i < len; i++) + if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */ + (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */ + (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */ + has_lowercase = 1; + break; + } + if (!has_lowercase) + EBC_TOLOWER(dest, len); + EBCASC(dest, len); + } + dest[len] = 0; + + return len; +} diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 54b0995514e8..bdf9c7cb5685 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2004, 2011 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, @@ -8,26 +9,37 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/profile.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/ftrace.h> #include <linux/errno.h> #include <linux/slab.h> +#include <linux/init.h> #include <linux/cpu.h> +#include <linux/irq.h> +#include <linux/entry-common.h> #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/irq.h> +#include <asm/hw_irq.h> +#include <asm/stacktrace.h> +#include <asm/softirq_stack.h> +#include <asm/vtime.h> +#include <asm/asm.h> #include "entry.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); EXPORT_PER_CPU_SYMBOL_GPL(irq_stat); struct irq_class { + int irq; char *name; char *desc; }; @@ -42,9 +54,10 @@ struct irq_class { * Since the external and I/O interrupt fields are already sums we would end * up with having a sum which accounts each interrupt twice. */ -static const struct irq_class irqclass_main_desc[NR_IRQS] = { - [EXTERNAL_INTERRUPT] = {.name = "EXT"}, - [IO_INTERRUPT] = {.name = "I/O"} +static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { + {.irq = EXT_INTERRUPT, .name = "EXT"}, + {.irq = IO_INTERRUPT, .name = "I/O"}, + {.irq = THIN_INTERRUPT, .name = "AIO"}, }; /* @@ -52,151 +65,248 @@ static const struct irq_class irqclass_main_desc[NR_IRQS] = { * /proc/interrupts. * In addition this list contains non external / I/O events like NMIs. */ -static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { - [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, - [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, - [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, - [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, - [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, - [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, - [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, - [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, - [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, - [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, - [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, - [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, - [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, - [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, - [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, - [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, - [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"}, - [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"}, - [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, - [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, - [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, - [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, - [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, - [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, - [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, - [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, - [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, - [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, - [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, - [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, - [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, +static const struct irq_class irqclass_sub_desc[] = { + {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"}, + {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"}, + {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"}, + {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"}, + {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"}, + {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"}, + {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"}, + {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"}, + {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"}, + {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"}, + {.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"}, + {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"}, + {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"}, + {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, + {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, + {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, + {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, + {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, + {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + {.irq = IRQIO_QAI, .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"}, + {.irq = IRQIO_APB, .name = "APB", .desc = "[AIO] AP Bus"}, + {.irq = IRQIO_PCF, .name = "PCF", .desc = "[AIO] PCI Floating Interrupt"}, + {.irq = IRQIO_PCD, .name = "PCD", .desc = "[AIO] PCI Directed Interrupt"}, + {.irq = IRQIO_MSI, .name = "MSI", .desc = "[AIO] MSI Interrupt"}, + {.irq = IRQIO_VAI, .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"}, + {.irq = IRQIO_GAL, .name = "GAL", .desc = "[AIO] GIB Alert"}, + {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, + {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; +static void do_IRQ(struct pt_regs *regs, int irq) +{ + if (tod_after_eq(get_lowcore()->int_clock, + get_lowcore()->clock_comparator)) + /* Serve timer interrupts first. */ + clock_comparator_work(); + generic_handle_irq(irq); +} + +static int on_async_stack(void) +{ + unsigned long frame = current_frame_address(); + + return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0; +} + +static void do_irq_async(struct pt_regs *regs, int irq) +{ + if (on_async_stack()) { + do_IRQ(regs, irq); + } else { + call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ, + struct pt_regs *, regs, int, irq); + } +} + +static int irq_pending(struct pt_regs *regs) +{ + int cc; + + asm volatile( + " tpi 0\n" + CC_IPM(cc) + : CC_OUT(cc, cc) + : + : CC_CLOBBER); + return CC_TRANSFORM(cc); +} + +void noinstr do_io_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + bool from_idle; + + irq_enter_rcu(); + + if (user_mode(regs)) { + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + } + + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); + if (from_idle) + account_idle_time_irq(); + + set_cpu_flag(CIF_NOHZ_DELAY); + do { + regs->tpi_info = get_lowcore()->tpi_info; + if (get_lowcore()->tpi_info.adapter_IO) + do_irq_async(regs, THIN_INTERRUPT); + else + do_irq_async(regs, IO_INTERRUPT); + } while (machine_is_lpar() && irq_pending(regs)); + + irq_exit_rcu(); + + set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); +} + +void noinstr do_ext_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + bool from_idle; + + irq_enter_rcu(); + + if (user_mode(regs)) { + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + } + + regs->int_code = get_lowcore()->ext_int_code_addr; + regs->int_parm = get_lowcore()->ext_params; + regs->int_parm_long = get_lowcore()->ext_params2; + + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); + if (from_idle) + account_idle_time_irq(); + + do_irq_async(regs, EXT_INTERRUPT); + + irq_exit_rcu(); + set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); +} + +static void show_msi_interrupt(struct seq_file *p, int irq) +{ + struct irq_desc *desc; + unsigned long flags; + int cpu; + + rcu_read_lock(); + desc = irq_to_desc(irq); + if (!desc) + goto out; + + raw_spin_lock_irqsave(&desc->lock, flags); + seq_printf(p, "%3d: ", irq); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu)); + + if (desc->irq_data.chip) + seq_printf(p, " %8s", desc->irq_data.chip->name); + + if (desc->action) + seq_printf(p, " %s", desc->action->name); + + seq_putc(p, '\n'); + raw_spin_unlock_irqrestore(&desc->lock, flags); +out: + rcu_read_unlock(); +} + /* * show_interrupts is needed by /proc/interrupts. */ int show_interrupts(struct seq_file *p, void *v) { - int irq = *(loff_t *) v; - int cpu; + int index = *(loff_t *) v; + int cpu, irq; - get_online_cpus(); - if (irq == 0) { + cpus_read_lock(); + if (index == 0) { seq_puts(p, " "); for_each_online_cpu(cpu) - seq_printf(p, "CPU%d ", cpu); + seq_printf(p, "CPU%-8d", cpu); seq_putc(p, '\n'); } - if (irq < NR_IRQS) { - seq_printf(p, "%s: ", irqclass_main_desc[irq].name); + if (index < NR_IRQS_BASE) { + seq_printf(p, "%s: ", irqclass_main_desc[index].name); + irq = irqclass_main_desc[index].irq; for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]); + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_putc(p, '\n'); - goto skip_arch_irqs; + goto out; + } + if (index < irq_get_nr_irqs()) { + show_msi_interrupt(p, index); + goto out; } - for (irq = 0; irq < NR_ARCH_IRQS; irq++) { - seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); + for (index = 0; index < NR_ARCH_IRQS; index++) { + seq_printf(p, "%s: ", irqclass_sub_desc[index].name); + irq = irqclass_sub_desc[index].irq; for_each_online_cpu(cpu) - seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); - if (irqclass_sub_desc[irq].desc) - seq_printf(p, " %s", irqclass_sub_desc[irq].desc); + seq_printf(p, "%10u ", + per_cpu(irq_stat, cpu).irqs[irq]); + if (irqclass_sub_desc[index].desc) + seq_printf(p, " %s", irqclass_sub_desc[index].desc); seq_putc(p, '\n'); } -skip_arch_irqs: - put_online_cpus(); +out: + cpus_read_unlock(); return 0; } -/* - * Switch to the asynchronous interrupt stack for softirq execution. - */ -asmlinkage void do_softirq(void) -{ - unsigned long flags, old, new; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); - /* Check against async. stack address range. */ - new = S390_lowcore.async_stack; - if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { - /* Need to switch to the async. stack. */ - new -= STACK_FRAME_OVERHEAD; - ((struct stack_frame *) new)->back_chain = old; - - asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" - " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) - : "0", "1", "2", "3", "4", "5", "14", - "cc", "memory" ); - } else { - /* We are already on the async stack. */ - __do_softirq(); - } - } - - local_irq_restore(flags); -} - -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) +unsigned int arch_dynirq_lower_bound(unsigned int from) { - if (proc_mkdir("irq", NULL)) - create_prof_cpu_mask(); + return from < NR_IRQS_BASE ? NR_IRQS_BASE : from; } -#endif /* * ext_int_hash[index] is the list head for all external interrupts that hash * to this index. */ -static struct list_head ext_int_hash[256]; +static struct hlist_head ext_int_hash[32] ____cacheline_aligned; struct ext_int_info { ext_int_handler_t handler; - u16 code; - struct list_head entry; + struct hlist_node entry; struct rcu_head rcu; + u16 code; }; /* ext_int_hash_lock protects the handler lists for external interrupts */ -DEFINE_SPINLOCK(ext_int_hash_lock); - -static void __init init_external_interrupts(void) -{ - int idx; - - for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) - INIT_LIST_HEAD(&ext_int_hash[idx]); -} +static DEFINE_SPINLOCK(ext_int_hash_lock); static inline int ext_hash(u16 code) { - return (code + (code >> 9)) & 0xff; + BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash))); + + return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1); } -int register_external_interrupt(u16 code, ext_int_handler_t handler) +int register_external_irq(u16 code, ext_int_handler_t handler) { struct ext_int_info *p; unsigned long flags; @@ -210,172 +320,92 @@ int register_external_interrupt(u16 code, ext_int_handler_t handler) index = ext_hash(code); spin_lock_irqsave(&ext_int_hash_lock, flags); - list_add_rcu(&p->entry, &ext_int_hash[index]); + hlist_add_head_rcu(&p->entry, &ext_int_hash[index]); spin_unlock_irqrestore(&ext_int_hash_lock, flags); return 0; } -EXPORT_SYMBOL(register_external_interrupt); +EXPORT_SYMBOL(register_external_irq); -int unregister_external_interrupt(u16 code, ext_int_handler_t handler) +int unregister_external_irq(u16 code, ext_int_handler_t handler) { struct ext_int_info *p; unsigned long flags; int index = ext_hash(code); spin_lock_irqsave(&ext_int_hash_lock, flags); - list_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { if (p->code == code && p->handler == handler) { - list_del_rcu(&p->entry); + hlist_del_rcu(&p->entry); kfree_rcu(p, rcu); } } spin_unlock_irqrestore(&ext_int_hash_lock, flags); return 0; } -EXPORT_SYMBOL(unregister_external_interrupt); +EXPORT_SYMBOL(unregister_external_irq); -void __irq_entry do_extint(struct pt_regs *regs) +static irqreturn_t do_ext_interrupt(int irq, void *dummy) { + struct pt_regs *regs = get_irq_regs(); struct ext_code ext_code; - struct pt_regs *old_regs; struct ext_int_info *p; int index; - old_regs = set_irq_regs(regs); - irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) { - /* Serve timer interrupts first. */ - clock_comparator_work(); - } - kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL); - ext_code = *(struct ext_code *) ®s->int_code; - if (ext_code.code != 0x1004) - __get_cpu_var(s390_idle).nohz_delay = 1; + ext_code.int_code = regs->int_code; + if (ext_code.code != EXT_IRQ_CLK_COMP) + set_cpu_flag(CIF_NOHZ_DELAY); index = ext_hash(ext_code.code); rcu_read_lock(); - list_for_each_entry_rcu(p, &ext_int_hash[index], entry) - if (likely(p->code == ext_code.code)) - p->handler(ext_code, regs->int_parm, - regs->int_parm_long); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (unlikely(p->code != ext_code.code)) + continue; + p->handler(ext_code, regs->int_parm, regs->int_parm_long); + } rcu_read_unlock(); - irq_exit(); - set_irq_regs(old_regs); -} - -void __init init_IRQ(void) -{ - init_external_interrupts(); + return IRQ_HANDLED; } -static DEFINE_SPINLOCK(sc_irq_lock); -static int sc_irq_refcount; - -void service_subclass_irq_register(void) +static void __init init_ext_interrupts(void) { - spin_lock(&sc_irq_lock); - if (!sc_irq_refcount) - ctl_set_bit(0, 9); - sc_irq_refcount++; - spin_unlock(&sc_irq_lock); -} -EXPORT_SYMBOL(service_subclass_irq_register); - -void service_subclass_irq_unregister(void) -{ - spin_lock(&sc_irq_lock); - sc_irq_refcount--; - if (!sc_irq_refcount) - ctl_clear_bit(0, 9); - spin_unlock(&sc_irq_lock); -} -EXPORT_SYMBOL(service_subclass_irq_unregister); - -static DEFINE_SPINLOCK(ma_subclass_lock); -static int ma_subclass_refcount; - -void measurement_alert_subclass_register(void) -{ - spin_lock(&ma_subclass_lock); - if (!ma_subclass_refcount) - ctl_set_bit(0, 5); - ma_subclass_refcount++; - spin_unlock(&ma_subclass_lock); -} -EXPORT_SYMBOL(measurement_alert_subclass_register); - -void measurement_alert_subclass_unregister(void) -{ - spin_lock(&ma_subclass_lock); - ma_subclass_refcount--; - if (!ma_subclass_refcount) - ctl_clear_bit(0, 5); - spin_unlock(&ma_subclass_lock); -} -EXPORT_SYMBOL(measurement_alert_subclass_unregister); - -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - /* - * Not needed, the handler is protected by a lock and IRQs that occur - * after the handler is deleted are just NOPs. - */ -} -EXPORT_SYMBOL_GPL(synchronize_irq); -#endif - -#ifndef CONFIG_PCI - -/* Only PCI devices have dynamically-defined IRQ handlers */ - -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(free_irq); + int idx; -void enable_irq(unsigned int irq) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(enable_irq); + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_HLIST_HEAD(&ext_int_hash[idx]); -void disable_irq(unsigned int irq) -{ - WARN_ON(1); + irq_set_chip_and_handler(EXT_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL)) + panic("Failed to register EXT interrupt\n"); } -EXPORT_SYMBOL_GPL(disable_irq); - -#endif /* !CONFIG_PCI */ -void disable_irq_nosync(unsigned int irq) +void __init init_IRQ(void) { - disable_irq(irq); + BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS); + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); } -EXPORT_SYMBOL_GPL(disable_irq_nosync); -unsigned long probe_irq_on(void) -{ - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_on); +static DEFINE_SPINLOCK(irq_subclass_lock); +static unsigned char irq_subclass_refcount[64]; -int probe_irq_off(unsigned long val) +void irq_subclass_register(enum irq_subclass subclass) { - return 0; + spin_lock(&irq_subclass_lock); + if (!irq_subclass_refcount[subclass]) + system_ctl_set_bit(0, subclass); + irq_subclass_refcount[subclass]++; + spin_unlock(&irq_subclass_lock); } -EXPORT_SYMBOL_GPL(probe_irq_off); +EXPORT_SYMBOL(irq_subclass_register); -unsigned int probe_irq_mask(unsigned long val) +void irq_subclass_unregister(enum irq_subclass subclass) { - return val; + spin_lock(&irq_subclass_lock); + irq_subclass_refcount[subclass]--; + if (!irq_subclass_refcount[subclass]) + system_ctl_clear_bit(0, subclass); + spin_unlock(&irq_subclass_lock); } -EXPORT_SYMBOL_GPL(probe_irq_mask); +EXPORT_SYMBOL(irq_subclass_unregister); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index b987ab2c1541..e808bb8bc0da 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -1,70 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Jump label s390 support * * Copyright IBM Corp. 2011 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ -#include <linux/module.h> #include <linux/uaccess.h> -#include <linux/stop_machine.h> #include <linux/jump_label.h> +#include <linux/module.h> +#include <asm/text-patching.h> #include <asm/ipl.h> -#ifdef HAVE_JUMP_LABEL - struct insn { u16 opcode; s32 offset; } __packed; -struct insn_args { - struct jump_entry *entry; - enum jump_label_type type; -}; +static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn) +{ + /* brcl 0,offset */ + insn->opcode = 0xc004; + insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1; +} -static void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn) { - struct insn insn; - int rc; + /* brcl 15,offset */ + insn->opcode = 0xc0f4; + insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1; +} - if (type == JUMP_LABEL_ENABLE) { - /* brcl 15,offset */ - insn.opcode = 0xc0f4; - insn.offset = (entry->target - entry->code) >> 1; - } else { - /* brcl 0,0 */ - insn.opcode = 0xc004; - insn.offset = 0; - } +static void jump_label_bug(struct jump_entry *entry, struct insn *expected, + struct insn *new) +{ + unsigned char *ipc = (unsigned char *)jump_entry_code(entry); + unsigned char *ipe = (unsigned char *)expected; + unsigned char *ipn = (unsigned char *)new; - rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); - WARN_ON_ONCE(rc < 0); + pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc); + pr_emerg("Found: %6ph\n", ipc); + pr_emerg("Expected: %6ph\n", ipe); + pr_emerg("New: %6ph\n", ipn); + panic("Corrupted kernel text"); } -static int __sm_arch_jump_label_transform(void *data) +static void jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { - struct insn_args *args = data; + void *code = (void *)jump_entry_code(entry); + struct insn old, new; - __jump_label_transform(args->entry, args->type); - return 0; + if (type == JUMP_LABEL_JMP) { + jump_label_make_nop(entry, &old); + jump_label_make_branch(entry, &new); + } else { + jump_label_make_branch(entry, &old); + jump_label_make_nop(entry, &new); + } + if (memcmp(code, &old, sizeof(old))) + jump_label_bug(entry, &old, &new); + s390_kernel_write(code, &new, sizeof(new)); } void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct insn_args args; - - args.entry = entry; - args.type = type; - - stop_machine(__sm_arch_jump_label_transform, &args, NULL); + jump_label_transform(entry, type); + text_poke_sync(); } -void arch_jump_label_transform_static(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { - __jump_label_transform(entry, type); + jump_label_transform(entry, type); + return true; } -#endif +void arch_jump_label_transform_apply(void) +{ + text_poke_sync(); +} diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c new file mode 100644 index 000000000000..33130c7daf55 --- /dev/null +++ b/arch/s390/kernel/kdebugfs.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/init.h> + +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + +static int __init arch_kdebugfs_init(void) +{ + arch_debugfs_dir = debugfs_create_dir("s390", NULL); + return 0; +} +postcore_initcall(arch_kdebugfs_init); diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c new file mode 100644 index 000000000000..143e34a4eca5 --- /dev/null +++ b/arch/s390/kernel/kexec_elf.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ELF loader for kexec_file_load system call. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <asm/ipl.h> +#include <asm/setup.h> + +static int kexec_file_add_kernel_elf(struct kimage *image, + struct s390_load_data *data) +{ + struct kexec_buf buf = {}; + const Elf_Ehdr *ehdr; + const Elf_Phdr *phdr; + Elf_Addr entry; + void *kernel; + int i, ret; + + kernel = image->kernel_buf; + ehdr = (Elf_Ehdr *)kernel; + buf.image = image; + if (image->type == KEXEC_TYPE_CRASH) + entry = STARTUP_KDUMP_OFFSET; + else + entry = ehdr->e_entry; + + phdr = (void *)ehdr + ehdr->e_phoff; + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type != PT_LOAD) + continue; + + buf.buffer = kernel + phdr->p_offset; + buf.bufsz = phdr->p_filesz; + + buf.mem = ALIGN(phdr->p_paddr, phdr->p_align); +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; +#endif + buf.memsz = phdr->p_memsz; + data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz; + + if (entry - phdr->p_paddr < phdr->p_memsz) { + data->kernel_buf = buf.buffer; + data->kernel_mem = buf.mem; + data->parm = buf.buffer + PARMAREA; + } + + ipl_report_add_component(data->report, &buf, + IPL_RB_COMPONENT_FLAG_SIGNED | + IPL_RB_COMPONENT_FLAG_VERIFIED, + IPL_RB_CERT_UNKNOWN); + ret = kexec_add_buffer(&buf); + if (ret) + return ret; + } + + return data->memsz ? 0 : -EINVAL; +} + +static void *s390_elf_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + const Elf_Ehdr *ehdr; + const Elf_Phdr *phdr; + size_t size; + int i; + + /* image->fobs->probe already checked for valid ELF magic number. */ + ehdr = (Elf_Ehdr *)kernel; + + if (ehdr->e_type != ET_EXEC || + ehdr->e_ident[EI_CLASS] != ELFCLASS64 || + !elf_check_arch(ehdr)) + return ERR_PTR(-EINVAL); + + if (!ehdr->e_phnum || ehdr->e_phentsize != sizeof(Elf_Phdr)) + return ERR_PTR(-EINVAL); + + size = ehdr->e_ehsize + ehdr->e_phoff; + size += ehdr->e_phentsize * ehdr->e_phnum; + if (size > kernel_len) + return ERR_PTR(-EINVAL); + + phdr = (void *)ehdr + ehdr->e_phoff; + size = ALIGN(size, phdr->p_align); + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type == PT_INTERP) + return ERR_PTR(-EINVAL); + + if (phdr->p_offset > kernel_len) + return ERR_PTR(-EINVAL); + + size += ALIGN(phdr->p_filesz, phdr->p_align); + } + + if (size > kernel_len) + return ERR_PTR(-EINVAL); + + return kexec_file_add_components(image, kexec_file_add_kernel_elf); +} + +static int s390_elf_probe(const char *buf, unsigned long len) +{ + const Elf_Ehdr *ehdr; + + if (len < sizeof(Elf_Ehdr)) + return -ENOEXEC; + + ehdr = (Elf_Ehdr *)buf; + + /* Only check the ELF magic number here and do proper validity check + * in the loader. Any check here that fails would send the erroneous + * ELF file to the image loader that does not care what it gets. + * (Most likely) causing behavior not intended by the user. + */ + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) + return -ENOEXEC; + + return 0; +} + +const struct kexec_file_ops s390_kexec_elf_ops = { + .probe = s390_elf_probe, + .load = s390_elf_load, +#ifdef CONFIG_KEXEC_SIG + .verify_sig = s390_verify_sig, +#endif /* CONFIG_KEXEC_SIG */ +}; diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c new file mode 100644 index 000000000000..9a439175723c --- /dev/null +++ b/arch/s390/kernel/kexec_image.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Image loader for kexec_file_load system call. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <asm/ipl.h> +#include <asm/setup.h> + +static int kexec_file_add_kernel_image(struct kimage *image, + struct s390_load_data *data) +{ + struct kexec_buf buf = {}; + + buf.image = image; + + buf.buffer = image->kernel_buf; + buf.bufsz = image->kernel_buf_len; + + buf.mem = 0; +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; +#endif + buf.memsz = buf.bufsz; + + data->kernel_buf = image->kernel_buf; + data->kernel_mem = buf.mem; + data->parm = image->kernel_buf + PARMAREA; + data->memsz += buf.memsz; + + ipl_report_add_component(data->report, &buf, + IPL_RB_COMPONENT_FLAG_SIGNED | + IPL_RB_COMPONENT_FLAG_VERIFIED, + IPL_RB_CERT_UNKNOWN); + return kexec_add_buffer(&buf); +} + +static void *s390_image_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + return kexec_file_add_components(image, kexec_file_add_kernel_image); +} + +static int s390_image_probe(const char *buf, unsigned long len) +{ + /* Can't reliably tell if an image is valid. Therefore give the + * user whatever he wants. + */ + return 0; +} + +const struct kexec_file_ops s390_kexec_image_ops = { + .probe = s390_image_probe, + .load = s390_image_load, +#ifdef CONFIG_KEXEC_SIG + .verify_sig = s390_verify_sig, +#endif /* CONFIG_KEXEC_SIG */ +}; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 3388b2b2a07d..c450120b4474 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -1,244 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> */ +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/preempt.h> #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/kdebug.h> #include <linux/uaccess.h> -#include <asm/cacheflush.h> -#include <asm/sections.h> +#include <linux/extable.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/hardirq.h> +#include <linux/ftrace.h> +#include <linux/execmem.h> +#include <asm/text-patching.h> +#include <asm/set_memory.h> +#include <asm/sections.h> +#include <asm/dis.h> +#include "entry.h" DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = { }; -static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) +void *alloc_insn_page(void) { - switch (insn[0] >> 8) { - case 0x0c: /* bassm */ - case 0x0b: /* bsm */ - case 0x83: /* diag */ - case 0x44: /* ex */ - case 0xac: /* stnsm */ - case 0xad: /* stosm */ - return -EINVAL; - } - switch (insn[0]) { - case 0x0101: /* pr */ - case 0xb25a: /* bsa */ - case 0xb240: /* bakr */ - case 0xb258: /* bsg */ - case 0xb218: /* pc */ - case 0xb228: /* pt */ - case 0xb98d: /* epsw */ - return -EINVAL; + void *page; + + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); + if (!page) + return NULL; + set_memory_rox((unsigned long)page, 1); + return page; +} + +static void copy_instruction(struct kprobe *p) +{ + kprobe_opcode_t insn[MAX_INSN_SIZE]; + s64 disp, new_disp; + u64 addr, new_addr; + unsigned int len; + + len = insn_length(*p->addr >> 8); + memcpy(&insn, p->addr, len); + p->opcode = insn[0]; + if (probe_is_insn_relative_long(&insn[0])) { + /* + * For pc-relative instructions in RIL-b or RIL-c format patch + * the RI2 displacement field. The insn slot for the to be + * patched instruction is within the same 4GB area like the + * original instruction. Therefore the new displacement will + * always fit. + */ + disp = *(s32 *)&insn[1]; + addr = (u64)(unsigned long)p->addr; + new_addr = (u64)(unsigned long)p->ainsn.insn; + new_disp = ((addr + (disp * 2)) - new_addr) / 2; + *(s32 *)&insn[1] = new_disp; } - return 0; + s390_kernel_write(p->ainsn.insn, &insn, len); } +NOKPROBE_SYMBOL(copy_instruction); -static int __kprobes get_fixup_type(kprobe_opcode_t *insn) +/* Check if paddr is at an instruction boundary */ +static bool can_probe(unsigned long paddr) { - /* default fixup method */ - int fixup = FIXUP_PSW_NORMAL; - - switch (insn[0] >> 8) { - case 0x05: /* balr */ - case 0x0d: /* basr */ - fixup = FIXUP_RETURN_REGISTER; - /* if r2 = 0, no branch will be taken */ - if ((insn[0] & 0x0f) == 0) - fixup |= FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x06: /* bctr */ - case 0x07: /* bcr */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x45: /* bal */ - case 0x4d: /* bas */ - fixup = FIXUP_RETURN_REGISTER; - break; - case 0x47: /* bc */ - case 0x46: /* bct */ - case 0x86: /* bxh */ - case 0x87: /* bxle */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x82: /* lpsw */ - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xb2: /* lpswe */ - if ((insn[0] & 0xff) == 0xb2) - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xa7: /* bras */ - if ((insn[0] & 0x0f) == 0x05) - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xc0: - if ((insn[0] & 0x0f) == 0x00 || /* larl */ - (insn[0] & 0x0f) == 0x05) /* brasl */ - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xeb: - if ((insn[2] & 0xff) == 0x44 || /* bxhg */ - (insn[2] & 0xff) == 0x45) /* bxleg */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0xe3: /* bctg */ - if ((insn[2] & 0xff) == 0x46) - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; + unsigned long addr, offset = 0; + kprobe_opcode_t insn; + struct kprobe *kp; + + if (paddr & 0x01) + return false; + + if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) + return false; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(insn))) + return false; + + if (insn >> 8 == 0) { + if (insn != BREAKPOINT_INSTRUCTION) { + /* + * Note that QEMU inserts opcode 0x0000 to implement + * software breakpoints for guests. Since the size of + * the original instruction is unknown, stop following + * instructions and prevent setting a kprobe. + */ + return false; + } + /* + * Check if the instruction has been modified by another + * kprobe, in which case the original instruction is + * decoded. + */ + kp = get_kprobe((void *)addr); + if (!kp) { + /* not a kprobe */ + return false; + } + insn = kp->opcode; + } + addr += insn_length(insn >> 8); } - return fixup; + return addr == paddr; } -int __kprobes arch_prepare_kprobe(struct kprobe *p) +int arch_prepare_kprobe(struct kprobe *p) { - if ((unsigned long) p->addr & 0x01) + if (!can_probe((unsigned long)p->addr)) return -EINVAL; - /* Make sure the probe isn't going on a difficult instruction */ - if (is_prohibited_opcode(p->addr)) + if (probe_is_prohibited_opcode(p->addr)) return -EINVAL; - - p->opcode = *p->addr; - memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); - + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + copy_instruction(p); return 0; } +NOKPROBE_SYMBOL(arch_prepare_kprobe); -struct ins_replace_args { - kprobe_opcode_t *ptr; - kprobe_opcode_t opcode; +struct swap_insn_args { + struct kprobe *p; + unsigned int arm_kprobe : 1; }; -static int __kprobes swap_instruction(void *aref) +static int swap_instruction(void *data) { - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long status = kcb->kprobe_status; - struct ins_replace_args *args = aref; + struct swap_insn_args *args = data; + struct kprobe *p = args->p; + u16 opc; - kcb->kprobe_status = KPROBE_SWAP_INST; - probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode)); - kcb->kprobe_status = status; + opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode; + s390_kernel_write(p->addr, &opc, sizeof(opc)); return 0; } +NOKPROBE_SYMBOL(swap_instruction); -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { - struct ins_replace_args args; + struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - args.ptr = p->addr; - args.opcode = BREAKPOINT_INSTRUCTION; - stop_machine(swap_instruction, &args, NULL); + if (cpu_has_seq_insn()) { + swap_instruction(&args); + text_poke_sync(); + } else { + stop_machine_cpuslocked(swap_instruction, &args, NULL); + } } +NOKPROBE_SYMBOL(arch_arm_kprobe); -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { - struct ins_replace_args args; + struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - args.ptr = p->addr; - args.opcode = p->opcode; - stop_machine(swap_instruction, &args, NULL); + if (cpu_has_seq_insn()) { + swap_instruction(&args); + text_poke_sync(); + } else { + stop_machine_cpuslocked(swap_instruction, &args, NULL); + } } +NOKPROBE_SYMBOL(arch_disarm_kprobe); -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { + if (!p->ainsn.insn) + return; + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; } +NOKPROBE_SYMBOL(arch_remove_kprobe); -static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, - struct pt_regs *regs, - unsigned long ip) +static void enable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) { - struct per_regs per_kprobe; + union { + struct ctlreg regs[3]; + struct { + struct ctlreg control; + struct ctlreg start; + struct ctlreg end; + }; + } per_kprobe; /* Set up the PER control registers %cr9-%cr11 */ - per_kprobe.control = PER_EVENT_IFETCH; - per_kprobe.start = ip; - per_kprobe.end = ip; + per_kprobe.control.val = PER_EVENT_IFETCH; + per_kprobe.start.val = ip; + per_kprobe.end.val = ip; /* Save control regs and psw mask */ - __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + __local_ctl_store(9, 11, kcb->kprobe_saved_ctl); kcb->kprobe_saved_imask = regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); /* Set PER control regs, turns on single step for the given address */ - __ctl_load(per_kprobe, 9, 11); + __local_ctl_load(9, 11, per_kprobe.regs); regs->psw.mask |= PSW_MASK_PER; regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); - regs->psw.addr = ip | PSW_ADDR_AMODE; + regs->psw.addr = ip; } +NOKPROBE_SYMBOL(enable_singlestep); -static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb, - struct pt_regs *regs, - unsigned long ip) +static void disable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) { /* Restore control regs and psw mask, set new psw address */ - __ctl_load(kcb->kprobe_saved_ctl, 9, 11); + __local_ctl_load(9, 11, kcb->kprobe_saved_ctl); regs->psw.mask &= ~PSW_MASK_PER; regs->psw.mask |= kcb->kprobe_saved_imask; - regs->psw.addr = ip | PSW_ADDR_AMODE; + regs->psw.addr = ip; } +NOKPROBE_SYMBOL(disable_singlestep); /* * Activate a kprobe by storing its pointer to current_kprobe. The * previous kprobe is stored in kcb->prev_kprobe. A stack of up to * two kprobes can be active, see KPROBE_REENTER. */ -static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) +static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) { - kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); + kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe); kcb->prev_kprobe.status = kcb->kprobe_status; - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } +NOKPROBE_SYMBOL(push_kprobe); /* * Deactivate a kprobe by backing up to the previous state. If the * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL, * for any other state prev_kprobe.kp will be NULL. */ -static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) +static void pop_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->prev_kprobe.kp = NULL; } +NOKPROBE_SYMBOL(pop_kprobe); -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14]; - - /* Replace the return addr with trampoline addr */ - regs->gprs[14] = (unsigned long) &kretprobe_trampoline; -} - -static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb, - struct kprobe *p) +static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p) { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: @@ -253,13 +269,14 @@ static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb, * is a BUG. The code path resides in the .kprobes.text * section and is executed with interrupts disabled. */ - printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr); + pr_err("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); BUG(); } } +NOKPROBE_SYMBOL(kprobe_reenter_check); -static int __kprobes kprobe_handler(struct pt_regs *regs) +static int kprobe_handler(struct pt_regs *regs) { struct kprobe_ctlblk *kcb; struct kprobe *p; @@ -271,7 +288,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) */ preempt_disable(); kcb = get_kprobe_ctlblk(); - p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2)); + p = get_kprobe((void *)(regs->psw.addr - 2)); if (p) { if (kprobe_running()) { @@ -292,38 +309,20 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) * If we have no pre-handler or it returned 0, we * continue with single stepping. If we have a * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry - * for jprobe processing, so get out doing nothing - * more here. + * for changing execution path, so get out doing + * nothing more here. */ push_kprobe(kcb, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (p->pre_handler && p->pre_handler(p, regs)) + if (p->pre_handler && p->pre_handler(p, regs)) { + pop_kprobe(kcb); + preempt_enable_no_resched(); return 1; + } kcb->kprobe_status = KPROBE_HIT_SS; } enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); return 1; - } else if (kprobe_running()) { - p = __get_cpu_var(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - /* - * Continuation after the jprobe completed and - * caused the jprobe_return trap. The jprobe - * break_handler "returns" to the original - * function that still has the kprobe breakpoint - * installed. We continue with single stepping. - */ - kcb->kprobe_status = KPROBE_HIT_SS; - enable_singlestep(kcb, regs, - (unsigned long) p->ainsn.insn); - return 1; - } /* else: - * No kprobe at this address and the current kprobe - * has no break handler (no jprobe!). The kernel just - * exploded, let the standard trap handler pick up the - * pieces. - */ } /* else: * No kprobe at this address and no active kprobe. The trap has * not been caused by a kprobe breakpoint. The race of breakpoint @@ -333,111 +332,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_enable_no_resched(); return 0; } - -/* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -static void __used kretprobe_trampoline_holder(void) -{ - asm volatile(".global kretprobe_trampoline\n" - "kretprobe_trampoline: bcr 0,0\n"); -} - -/* - * Called when the probe at kretprobe trampoline is hit - */ -static int __kprobes trampoline_probe_handler(struct kprobe *p, - struct pt_regs *regs) -{ - struct kretprobe_instance *ri; - struct hlist_head *head, empty_rp; - struct hlist_node *tmp; - unsigned long flags, orig_ret_address; - unsigned long trampoline_address; - kprobe_opcode_t *correct_ret_addr; - - INIT_HLIST_HEAD(&empty_rp); - kretprobe_hash_lock(current, &head, &flags); - - /* - * It is possible to have multiple instances associated with a given - * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more than one return - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always inserted at the head of the list - * - when multiple return probes are registered for the same - * function, the first instance's ret_addr will point to the - * real return address, and all the rest will point to - * kretprobe_trampoline - */ - ri = NULL; - orig_ret_address = 0; - correct_ret_addr = NULL; - trampoline_address = (unsigned long) &kretprobe_trampoline; - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long) ri->ret_addr; - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - kretprobe_assert(ri, orig_ret_address, trampoline_address); - - correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long) ri->ret_addr; - - if (ri->rp && ri->rp->handler) { - ri->ret_addr = correct_ret_addr; - ri->rp->handler(ri, regs); - } - - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; - - pop_kprobe(get_kprobe_ctlblk()); - kretprobe_hash_unlock(current, &flags); - preempt_enable_no_resched(); - - hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; -} +NOKPROBE_SYMBOL(kprobe_handler); /* * Called after single-stepping. p->addr is the address of the @@ -447,17 +342,17 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +static void resume_execution(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; - int fixup = get_fixup_type(p->ainsn.insn); + unsigned long ip = regs->psw.addr; + int fixup = probe_get_fixup_type(p->ainsn.insn); if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; if (fixup & FIXUP_BRANCH_NOT_TAKEN) { - int ilen = ((p->ainsn.insn[0] >> 14) + 3) & -2; + int ilen = insn_length(p->ainsn.insn[0] >> 8); if (ip - (unsigned long) p->ainsn.insn == ilen) ip = (unsigned long) p->addr + ilen; } @@ -470,8 +365,9 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) disable_singlestep(kcb, regs, ip); } +NOKPROBE_SYMBOL(resume_execution); -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +static int post_kprobe_handler(struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); struct kprobe *p = kprobe_running(); @@ -479,12 +375,11 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) if (!p) return 0; + resume_execution(p, regs); if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); } - - resume_execution(p, regs); pop_kprobe(kcb); preempt_enable_no_resched(); @@ -498,17 +393,14 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) return 1; } +NOKPROBE_SYMBOL(post_kprobe_handler); -static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) +static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); struct kprobe *p = kprobe_running(); - const struct exception_table_entry *entry; switch(kcb->kprobe_status) { - case KPROBE_SWAP_INST: - /* We are here because the instruction replacement failed */ - return 0; case KPROBE_HIT_SS: case KPROBE_REENTER: /* @@ -525,32 +417,11 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accouting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(p); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (p->fault_handler && p->fault_handler(p, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ - entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); - if (entry) { - regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE; + if (fixup_exception(regs)) return 1; - } - /* * fixup_exception() could not handle it, * Let do_page_fault() fix it. @@ -561,8 +432,9 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) } return 0; } +NOKPROBE_SYMBOL(kprobe_trap_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { int ret; @@ -573,12 +445,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); return ret; } +NOKPROBE_SYMBOL(kprobe_fault_handler); /* * Wrapper routine to for handling exceptions. */ -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *) data; struct pt_regs *regs = args->regs; @@ -610,62 +483,21 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, return ret; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long stack; - - memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); - - /* setup return addr to the jprobe handler routine */ - regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE; - regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); - - /* r15 is the stack pointer */ - stack = (unsigned long) regs->gprs[15]; - - memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack)); - return 1; -} - -void __kprobes jprobe_return(void) -{ - asm volatile(".word 0x0002"); -} - -static void __used __kprobes jprobe_return_end(void) -{ - asm volatile("bcr 0,0"); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __init arch_init_kprobes(void) { - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long stack; - - stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15]; - - /* Put the regs back */ - memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); - /* put the stack back */ - memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack)); - preempt_enable_no_resched(); - return 1; + return 0; } -static struct kprobe trampoline = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - -int __init arch_init_kprobes(void) +int __init arch_populate_kprobe_blacklist(void) { - return register_kprobe(&trampoline); + return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { - return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline; + return 0; } +NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 6ea6d69339b5..6d1ffca5f798 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux Guest Relocation (LGR) detection * @@ -5,7 +6,8 @@ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> */ -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/timer.h> #include <linux/slab.h> #include <asm/facility.h> @@ -86,8 +88,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) if (stsi(si, 2, 2, 2)) return; cpascii(lgr_info->name, si->name, sizeof(si->name)); - memcpy(&lgr_info->lpar_number, &si->lpar_number, - sizeof(lgr_info->lpar_number)); + lgr_info->lpar_number = si->lpar_number; } /* @@ -152,21 +153,20 @@ static void lgr_timer_set(void); /* * LGR timer callback */ -static void lgr_timer_fn(unsigned long ignored) +static void lgr_timer_fn(struct timer_list *unused) { lgr_info_log(); lgr_timer_set(); } -static struct timer_list lgr_timer = - TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); +static struct timer_list lgr_timer; /* * Setup next LGR timer */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); + mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS)); } /* @@ -180,7 +180,8 @@ static int __init lgr_init(void) debug_register_view(lgr_dbf, &debug_hex_ascii_view); lgr_info_get(&lgr_info_last); debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + timer_setup(&lgr_timer, lgr_timer_fn, TIMER_DEFERRABLE); lgr_timer_set(); return 0; } -module_init(lgr_init); +device_initcall(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index ac2178161ec3..baeb3dcfc1c8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,8 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2005, 2011 * * Author(s): Rolf Adelsberger, - * Heiko Carstens <heiko.carstens@de.ibm.com> * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ @@ -13,20 +13,28 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> -#include <linux/suspend.h> +#include <linux/cpufeature.h> +#include <asm/guarded_storage.h> +#include <asm/machine.h> +#include <asm/pfault.h> #include <asm/cio.h> +#include <asm/fpu.h> #include <asm/setup.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> #include <asm/smp.h> -#include <asm/reset.h> #include <asm/ipl.h> #include <asm/diag.h> #include <asm/elf.h> #include <asm/asm-offsets.h> +#include <asm/cacheflush.h> +#include <asm/abs_lowcore.h> #include <asm/os_info.h> +#include <asm/set_memory.h> +#include <asm/stacktrace.h> +#include <asm/nmi.h> +#include <asm/sclp.h> -typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); +typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long); +typedef int (*purgatory_t)(int); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; @@ -34,146 +42,145 @@ extern const unsigned long long relocate_kernel_len; #ifdef CONFIG_CRASH_DUMP /* - * Create ELF notes for one CPU + * Reset the system, copy boot CPU registers to absolute zero, + * and jump to the kdump image */ -static void add_elf_notes(int cpu) +static void __do_machine_kdump(void *data) { - struct save_area *sa = (void *) 4608 + store_prefix(); - void *ptr; + struct kimage *image = data; + purgatory_t purgatory; + unsigned long prefix; + + purgatory = (purgatory_t)image->start; + + /* store_status() saved the prefix register to lowcore */ + prefix = (unsigned long)get_lowcore()->prefixreg_save_area; + + /* Now do the reset */ + s390_reset_system(); + + /* + * Copy dump CPU store status info to absolute zero. + * This need to be done *after* s390_reset_system set the + * prefix register of this CPU to zero + */ + memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area), + phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); + + call_nodat(1, int, purgatory, int, 1); - memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); - ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); - ptr = fill_cpu_elf_notes(ptr, sa); - memset(ptr, 0, sizeof(struct elf_note)); + /* Die if kdump returns */ + disabled_wait(); } /* - * Initialize CPU ELF notes + * Start kdump: create a LGR log entry, store status of all CPUs and + * branch to __do_machine_kdump. */ -void setup_regs(void) +static noinline void __machine_kdump(void *image) { - unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; - int cpu, this_cpu; + struct mcesa *mcesa; + union ctlreg2 cr2_old, cr2_new; + int this_cpu, cpu; + lgr_info_log(); + /* Get status of the other CPUs */ this_cpu = smp_find_processor_id(stap()); - add_elf_notes(this_cpu); for_each_online_cpu(cpu) { if (cpu == this_cpu) continue; if (smp_store_status(cpu)) continue; - add_elf_notes(cpu); } - /* Copy dump CPU store status info to absolute zero */ - memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); -} - -/* - * PM notifier callback for kdump - */ -static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, - void *ptr) -{ - switch (action) { - case PM_SUSPEND_PREPARE: - case PM_HIBERNATION_PREPARE: - if (crashk_res.start) - crash_map_reserved_pages(); - break; - case PM_POST_SUSPEND: - case PM_POST_HIBERNATION: - if (crashk_res.start) - crash_unmap_reserved_pages(); - break; - default: - return NOTIFY_DONE; + /* Store status of the boot CPU */ + mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); + if (cpu_has_vx()) + save_vx_regs((__vector128 *) mcesa->vector_save_area); + if (cpu_has_gs()) { + local_ctl_store(2, &cr2_old.reg); + cr2_new = cr2_old; + cr2_new.gse = 1; + local_ctl_load(2, &cr2_new.reg); + save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); + local_ctl_load(2, &cr2_old.reg); } - return NOTIFY_OK; + /* + * To create a good backchain for this CPU in the dump store_status + * is passed the address of a function. The address is saved into + * the PSW save area of the boot CPU and the function is invoked as + * a tail call of store_status. The backchain in the dump will look + * like this: + * restart_int_handler -> __machine_kexec -> __do_machine_kdump + * The call to store_status() will not return. + */ + store_status(__do_machine_kdump, image); } -static int __init machine_kdump_pm_init(void) -{ - pm_notifier(machine_kdump_pm_cb, 0); - return 0; -} -arch_initcall(machine_kdump_pm_init); -#endif +#endif /* CONFIG_CRASH_DUMP */ /* - * Start kdump: We expect here that a store status has been done on our CPU + * Check if kdump checksums are valid: We call purgatory with parameter "0" */ -static void __do_machine_kdump(void *image) +static bool kdump_csum_valid(struct kimage *image) { #ifdef CONFIG_CRASH_DUMP - int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + purgatory_t purgatory = (purgatory_t)image->start; + int rc; - setup_regs(); - __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); - start_kdump(1); + rc = call_nodat(1, int, purgatory, int, 0); + return rc == 0; +#else + return false; #endif } -/* - * Check if kdump checksums are valid: We call purgatory with parameter "0" - */ -static int kdump_csum_valid(struct kimage *image) -{ #ifdef CONFIG_CRASH_DUMP - int (*start_kdump)(int) = (void *)image->start; - int rc; - __arch_local_irq_stnsm(0xfb); /* disable DAT */ - rc = start_kdump(0); - __arch_local_irq_stosm(0x04); /* enable DAT */ - return rc ? 0 : -EINVAL; -#else - return -EINVAL; -#endif +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr, size; + + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); + size = begin - crashk_res.start; + if (size) + os_info_crashkernel_add(crashk_res.start, size); + else + os_info_crashkernel_add(0, 0); } -/* - * Map or unmap crashkernel memory - */ -static void crash_map_pages(int enable) +static void crash_protect_pages(int protect) { - unsigned long size = resource_size(&crashk_res); - - BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || - size % KEXEC_CRASH_MEM_ALIGN); - if (enable) - vmem_add_mapping(crashk_res.start, size); - else { - vmem_remove_mapping(crashk_res.start, size); - if (size) - os_info_crashkernel_add(crashk_res.start, size); - else - os_info_crashkernel_add(0, 0); - } + unsigned long size; + + if (!crashk_res.end) + return; + size = resource_size(&crashk_res); + if (protect) + set_memory_ro(crashk_res.start, size >> PAGE_SHIFT); + else + set_memory_rw(crashk_res.start, size >> PAGE_SHIFT); } -/* - * Map crashkernel memory - */ -void crash_map_reserved_pages(void) +void arch_kexec_protect_crashkres(void) { - crash_map_pages(1); + crash_protect_pages(1); } -/* - * Unmap crashkernel memory - */ -void crash_unmap_reserved_pages(void) +void arch_kexec_unprotect_crashkres(void) { - crash_map_pages(0); + crash_protect_pages(0); } +#endif + /* * Give back memory to hypervisor before new kdump is loaded */ static int machine_kexec_prepare_kdump(void) { #ifdef CONFIG_CRASH_DUMP - if (MACHINE_IS_VM) + if (machine_is_vm()) diag10_range(PFN_DOWN(crashk_res.start), PFN_DOWN(crashk_res.end - crashk_res.start + 1)); return 0; @@ -186,10 +193,6 @@ int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; - /* Can't replace kernel image since it is read-only. */ - if (ipl_flags & IPL_NSS_VALID) - return -EOPNOTSUPP; - if (image->type == KEXEC_TYPE_CRASH) return machine_kexec_prepare_kdump(); @@ -198,7 +201,7 @@ int machine_kexec_prepare(struct kimage *image) return -EINVAL; /* Get the destination where the assembler code should be copied to.*/ - reboot_code_buffer = (void *) page_to_phys(image->control_code_page); + reboot_code_buffer = page_to_virt(image->control_code_page); /* Then copy it */ memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len); @@ -209,19 +212,13 @@ void machine_kexec_cleanup(struct kimage *image) { } -void arch_crash_save_vmcoreinfo(void) -{ - VMCOREINFO_SYMBOL(lowcore_ptr); - VMCOREINFO_SYMBOL(high_memory); - VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); -} - void machine_shutdown(void) { } void machine_crash_shutdown(struct pt_regs *regs) { + set_os_info_reipl_block(); } /* @@ -229,13 +226,23 @@ void machine_crash_shutdown(struct pt_regs *regs) */ static void __do_machine_kexec(void *data) { - relocate_kernel_t data_mover; + unsigned long data_mover, entry, diag308_subcode; struct kimage *image = data; - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); + data_mover = page_to_phys(image->control_code_page); + entry = virt_to_phys(&image->head); + diag308_subcode = DIAG308_CLEAR_RESET; + if (sclp.has_iplcc) + diag308_subcode |= DIAG308_FLAG_EI; + s390_reset_system(); - /* Call the moving routine */ - (*data_mover)(&image->head, image->start); + call_nodat(3, void, (relocate_kernel_t)data_mover, + unsigned long, entry, + unsigned long, image->start, + unsigned long, diag308_subcode); + + /* Die if kexec returns */ + disabled_wait(); } /* @@ -243,19 +250,14 @@ static void __do_machine_kexec(void *data) */ static void __machine_kexec(void *data) { - struct kimage *image = data; - - __arch_local_irq_stosm(0x04); /* enable DAT */ pfault_fini(); tracing_off(); debug_locks_off(); - if (image->type == KEXEC_TYPE_CRASH) { - lgr_info_log(); - s390_reset_system(__do_machine_kdump, data); - } else { - s390_reset_system(__do_machine_kexec, data); - } - disabled_wait((unsigned long) __builtin_return_address(0)); +#ifdef CONFIG_CRASH_DUMP + if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) + __machine_kdump(data); +#endif + __do_machine_kexec(data); } /* diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c new file mode 100644 index 000000000000..a36d7311c668 --- /dev/null +++ b/arch/s390/kernel/machine_kexec_file.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * s390 code for kexec_file_load system call + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#define pr_fmt(fmt) "kexec: " fmt + +#include <linux/elf.h> +#include <linux/errno.h> +#include <linux/kexec.h> +#include <linux/module_signature.h> +#include <linux/verification.h> +#include <linux/vmalloc.h> +#include <asm/boot_data.h> +#include <asm/ipl.h> +#include <asm/setup.h> + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &s390_kexec_elf_ops, + &s390_kexec_image_ops, + NULL, +}; + +#ifdef CONFIG_KEXEC_SIG +int s390_verify_sig(const char *kernel, unsigned long kernel_len) +{ + const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1; + struct module_signature *ms; + unsigned long sig_len; + int ret; + + /* Skip signature verification when not secure IPLed. */ + if (!ipl_secure_flag) + return 0; + + if (marker_len > kernel_len) + return -EKEYREJECTED; + + if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING, + marker_len)) + return -EKEYREJECTED; + kernel_len -= marker_len; + + ms = (void *)kernel + kernel_len - sizeof(*ms); + kernel_len -= sizeof(*ms); + + sig_len = be32_to_cpu(ms->sig_len); + if (sig_len >= kernel_len) + return -EKEYREJECTED; + kernel_len -= sig_len; + + if (ms->id_type != PKEY_ID_PKCS7) + return -EKEYREJECTED; + + if (ms->algo != 0 || + ms->hash != 0 || + ms->signer_len != 0 || + ms->key_id_len != 0 || + ms->__pad[0] != 0 || + ms->__pad[1] != 0 || + ms->__pad[2] != 0) { + return -EBADMSG; + } + + ret = verify_pkcs7_signature(kernel, kernel_len, + kernel + kernel_len, sig_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) + ret = verify_pkcs7_signature(kernel, kernel_len, + kernel + kernel_len, sig_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + return ret; +} +#endif /* CONFIG_KEXEC_SIG */ + +static int kexec_file_update_purgatory(struct kimage *image, + struct s390_load_data *data) +{ + u64 entry, type; + int ret; + + if (image->type == KEXEC_TYPE_CRASH) { + entry = STARTUP_KDUMP_OFFSET; + type = KEXEC_TYPE_CRASH; + } else { + entry = STARTUP_NORMAL_OFFSET; + type = KEXEC_TYPE_DEFAULT; + } + + ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry, + sizeof(entry), false); + if (ret) + return ret; + + ret = kexec_purgatory_get_set_symbol(image, "kernel_type", &type, + sizeof(type), false); + if (ret) + return ret; + +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) { + u64 crash_size; + + ret = kexec_purgatory_get_set_symbol(image, "crash_start", + &crashk_res.start, + sizeof(crashk_res.start), + false); + if (ret) + return ret; + + crash_size = crashk_res.end - crashk_res.start + 1; + ret = kexec_purgatory_get_set_symbol(image, "crash_size", + &crash_size, + sizeof(crash_size), + false); + } +#endif + return ret; +} + +static int kexec_file_add_purgatory(struct kimage *image, + struct s390_load_data *data) +{ + struct kexec_buf buf = {}; + int ret; + + buf.image = image; + + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; +#endif + + ret = kexec_load_purgatory(image, &buf); + if (ret) + return ret; + data->memsz += buf.memsz; + + return kexec_file_update_purgatory(image, data); +} + +static int kexec_file_add_initrd(struct kimage *image, + struct s390_load_data *data) +{ + struct kexec_buf buf = {}; + int ret; + + buf.image = image; + + buf.buffer = image->initrd_buf; + buf.bufsz = image->initrd_buf_len; + + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; +#endif + buf.memsz = buf.bufsz; + + data->parm->initrd_start = data->memsz; + data->parm->initrd_size = buf.memsz; + data->memsz += buf.memsz; + + ret = kexec_add_buffer(&buf); + if (ret) + return ret; + + return ipl_report_add_component(data->report, &buf, 0, 0); +} + +static int kexec_file_add_ipl_report(struct kimage *image, + struct s390_load_data *data) +{ + __u32 *lc_ipl_parmblock_ptr; + unsigned int len, ncerts; + struct kexec_buf buf = {}; + unsigned long addr; + void *ptr, *end; + int ret; + + buf.image = image; + + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; + + ptr = __va(ipl_cert_list_addr); + end = ptr + ipl_cert_list_size; + ncerts = 0; + while (ptr < end) { + ncerts++; + len = *(unsigned int *)ptr; + ptr += sizeof(len); + ptr += len; + } + + addr = data->memsz + data->report->size; + addr += ncerts * sizeof(struct ipl_rb_certificate_entry); + ptr = __va(ipl_cert_list_addr); + while (ptr < end) { + len = *(unsigned int *)ptr; + ptr += sizeof(len); + ipl_report_add_certificate(data->report, ptr, addr, len); + addr += len; + ptr += len; + } + + ret = -ENOMEM; + buf.buffer = ipl_report_finish(data->report); + if (!buf.buffer) + goto out; + buf.bufsz = data->report->size; + buf.memsz = buf.bufsz; + image->arch.ipl_buf = buf.buffer; + + data->memsz += buf.memsz; + + lc_ipl_parmblock_ptr = + data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); + *lc_ipl_parmblock_ptr = (__u32)buf.mem; + +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; +#endif + + ret = kexec_add_buffer(&buf); +out: + return ret; +} + +void *kexec_file_add_components(struct kimage *image, + int (*add_kernel)(struct kimage *image, + struct s390_load_data *data)) +{ + unsigned long max_command_line_size = LEGACY_COMMAND_LINE_SIZE; + struct s390_load_data data = {0}; + unsigned long minsize; + int ret; + + data.report = ipl_report_init(&ipl_block); + if (IS_ERR(data.report)) + return data.report; + + ret = add_kernel(image, &data); + if (ret) + goto out; + + ret = -EINVAL; + minsize = PARMAREA + offsetof(struct parmarea, command_line); + if (image->kernel_buf_len < minsize) + goto out; + + if (data.parm->max_command_line_size) + max_command_line_size = data.parm->max_command_line_size; + + if (minsize + max_command_line_size < minsize) + goto out; + + if (image->kernel_buf_len < minsize + max_command_line_size) + goto out; + + if (image->cmdline_buf_len >= max_command_line_size) + goto out; + + memcpy(data.parm->command_line, image->cmdline_buf, + image->cmdline_buf_len); + +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) { + data.parm->oldmem_base = crashk_res.start; + data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1; + } +#endif + + if (image->initrd_buf) { + ret = kexec_file_add_initrd(image, &data); + if (ret) + goto out; + } + + ret = kexec_file_add_purgatory(image, &data); + if (ret) + goto out; + + if (data.kernel_mem == 0) { + unsigned long restart_psw = 0x0008000080000000UL; + restart_psw += image->start; + memcpy(data.kernel_buf, &restart_psw, sizeof(restart_psw)); + image->start = 0; + } + + ret = kexec_file_add_ipl_report(image, &data); +out: + ipl_report_free(data.report); + return ERR_PTR(ret); +} + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; + Elf_Rela *relas; + int i, r_type; + int ret; + + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + if (sym->st_shndx == SHN_UNDEF) { + pr_err("Undefined symbol: %s\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx == SHN_COMMON) { + pr_err("symbol '%s' in common section\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx >= pi->ehdr->e_shnum && + sym->st_shndx != SHN_ABS) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + val = sym->st_value; + if (sym->st_shndx != SHN_ABS) + val += pi->sechdrs[sym->st_shndx].sh_addr; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + r_type = ELF64_R_TYPE(relas[i].r_info); + + if (r_type == R_390_PLT32DBL) + r_type = R_390_PC32DBL; + + ret = arch_kexec_do_relocs(r_type, loc, val, addr); + if (ret) { + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } + } + return 0; +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->arch.ipl_buf); + image->arch.ipl_buf = NULL; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c new file mode 100644 index 000000000000..b7182cec48dc --- /dev/null +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/elf.h> +#include <asm/kexec.h> + +int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, + unsigned long addr) +{ + switch (r_type) { + case R_390_NONE: + break; + case R_390_8: /* Direct 8 bit. */ + *(u8 *)loc = val; + break; + case R_390_12: /* Direct 12 bit. */ + *(u16 *)loc &= 0xf000; + *(u16 *)loc |= val & 0xfff; + break; + case R_390_16: /* Direct 16 bit. */ + *(u16 *)loc = val; + break; + case R_390_20: /* Direct 20 bit. */ + *(u32 *)loc &= 0xf00000ff; + *(u32 *)loc |= (val & 0xfff) << 16; /* DL */ + *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */ + break; + case R_390_32: /* Direct 32 bit. */ + *(u32 *)loc = val; + break; + case R_390_64: /* Direct 64 bit. */ + case R_390_GLOB_DAT: + case R_390_JMP_SLOT: + *(u64 *)loc = val; + break; + case R_390_PC16: /* PC relative 16 bit. */ + *(u16 *)loc = (val - addr); + break; + case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ + *(u16 *)loc = (val - addr) >> 1; + break; + case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */ + *(u32 *)loc = (val - addr) >> 1; + break; + case R_390_PC32: /* PC relative 32 bit. */ + *(u32 *)loc = (val - addr); + break; + case R_390_PC64: /* PC relative 64 bit. */ + *(u64 *)loc = (val - addr); + break; + case R_390_RELATIVE: + *(unsigned long *) loc = val; + break; + default: + return 1; + } + return 0; +} diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 08dcf21cb8df..1fec370fecf4 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -1,73 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright IBM Corp. 2008, 2009 * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, - * */ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/ftrace.h> +#include <asm/nospec-insn.h> +#include <asm/ptrace.h> +#include <asm/march.h> + +#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + +#define STACK_FRAME_SIZE_FREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE) +#define STACK_FREGS (STACK_FRAME_OVERHEAD) +#define STACK_FREGS_PTREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS) +#define STACK_FREGS_PTREGS_GPRS (STACK_FREGS_PTREGS + __PT_GPRS) +#define STACK_FREGS_PTREGS_PSW (STACK_FREGS_PTREGS + __PT_PSW) +#define STACK_FREGS_PTREGS_ORIG_GPR2 (STACK_FREGS_PTREGS + __PT_ORIG_GPR2) +#define STACK_FREGS_PTREGS_FLAGS (STACK_FREGS_PTREGS + __PT_FLAGS) + +/* packed stack: allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 + +#ifdef CONFIG_FUNCTION_TRACER + + GEN_BR_THUNK %r1 + GEN_BR_THUNK %r14 .section .kprobes.text, "ax" -ENTRY(ftrace_stub) - br %r14 +SYM_FUNC_START(ftrace_stub) + BR_EX %r14 +SYM_FUNC_END(ftrace_stub) + +SYM_CODE_START(ftrace_stub_direct_tramp) + lgr %r1, %r0 + BR_EX %r1 +SYM_CODE_END(ftrace_stub_direct_tramp) + + .macro ftrace_regs_entry, allregs=0 + stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller + + .if \allregs == 1 + # save psw mask + # don't put any instructions clobbering CC before this point + epsw %r1,%r14 + risbg %r14,%r1,0,31,32 + .endif + + lgr %r1,%r15 + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate ftrace_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE_FREGS + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + xc STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15) + + .if \allregs == 1 + stg %r14,(STACK_FREGS_PTREGS_PSW)(%r15) + mvghi STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS + .else + xc STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15) + .endif + + lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) + .endm -ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE - br %r14 +SYM_CODE_START(ftrace_regs_caller) + ftrace_regs_entry 1 + j ftrace_common +SYM_CODE_END(ftrace_regs_caller) -ENTRY(ftrace_caller) +SYM_CODE_START(ftrace_caller) + ftrace_regs_entry 0 + j ftrace_common +SYM_CODE_END(ftrace_caller) + +SYM_CODE_START(ftrace_common) +#ifdef MARCH_HAS_Z196_FEATURES + aghik %r2,%r0,-MCOUNT_INSN_SIZE + lgrl %r4,function_trace_op + lgrl %r1,ftrace_func +#else + lgr %r2,%r0 + aghi %r2,-MCOUNT_INSN_SIZE + larl %r4,function_trace_op + lg %r4,0(%r4) + larl %r1,ftrace_func + lg %r1,0(%r1) #endif - stm %r2,%r5,16(%r15) - bras %r1,2f -0: .long ftrace_trace_function -1: .long function_trace_stop -2: l %r2,1b-0b(%r1) - icm %r2,0xf,0(%r2) - jnz 3f - st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - l %r3,100(%r15) - la %r2,0(%r14) - st %r0,__SF_BACKCHAIN(%r15) - la %r3,0(%r3) - ahi %r2,-MCOUNT_INSN_SIZE - l %r14,0b-0b(%r1) - l %r14,0(%r14) - basr %r14,%r14 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - l %r2,100(%r15) - l %r3,152(%r15) -ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: st %r2,100(%r15) + lgr %r3,%r14 + la %r5,STACK_FREGS(%r15) + BASR_EX %r14,%r1 + lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) +#ifdef MARCH_HAS_Z196_FEATURES + ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) + locgrz %r1,%r0 +#else + lg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) + ltgr %r1,%r1 + jnz 0f + lgr %r1,%r0 #endif - ahi %r15,96 - l %r14,56(%r15) -3: lm %r2,%r5,16(%r15) - br %r14 +0: lmg %r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) + BR_EX %r1 +SYM_CODE_END(ftrace_common) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(return_to_handler) - stm %r2,%r5,16(%r15) - st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - st %r0,__SF_BACKCHAIN(%r15) - bras %r1,0f - .long ftrace_return_to_handler -0: l %r2,0b-0b(%r1) - basr %r14,%r2 - lr %r14,%r2 - ahi %r15,96 - lm %r2,%r5,16(%r15) - br %r14 +SYM_FUNC_START(return_to_handler) + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + # allocate ftrace_regs and stack frame for ftrace_return_to_handler + aghi %r15,-STACK_FRAME_SIZE_FREGS + stg %r1,__SF_BACKCHAIN(%r15) + stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + la %r2,STACK_FRAME_OVERHEAD(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,STACK_FRAME_SIZE_FREGS + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) + BR_EX %r14 +SYM_FUNC_END(return_to_handler) #endif +#endif /* CONFIG_FUNCTION_TRACER */ + +SYM_CODE_START(ftrace_shared_hotpatch_trampoline_br) + lmg %r0,%r1,2(%r1) + br %r1 +SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_br_end, SYM_L_GLOBAL) +SYM_CODE_END(ftrace_shared_hotpatch_trampoline_br) + +#ifdef CONFIG_EXPOLINE +SYM_CODE_START(ftrace_shared_hotpatch_trampoline_exrl) + lmg %r0,%r1,2(%r1) + exrl %r0,0f + j . +0: br %r1 +SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_exrl_end, SYM_L_GLOBAL) +SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl) +#endif /* CONFIG_EXPOLINE */ + +#ifdef CONFIG_RETHOOK + +SYM_CODE_START(arch_rethook_trampoline) + stg %r14,(__SF_GPRS+8*8)(%r15) + lay %r15,-STACK_FRAME_SIZE_PTREGS(%r15) + stmg %r0,%r14,STACK_PTREGS_GPRS(%r15) + + # store original stack pointer in backchain and pt_regs + lay %r7,STACK_FRAME_SIZE_PTREGS(%r15) + stg %r7,__SF_BACKCHAIN(%r15) + stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15) + + # store full psw + epsw %r2,%r3 + risbg %r3,%r2,0,31,32 + stg %r3,STACK_PTREGS_PSW(%r15) + larl %r1,arch_rethook_trampoline + stg %r1,STACK_PTREGS_PSW+8(%r15) + + lay %r2,STACK_PTREGS(%r15) + brasl %r14,arch_rethook_trampoline_callback + + mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15) + lmg %r0,%r15,STACK_PTREGS_GPRS(%r15) + lpswe __SF_EMPTY(%r15) +SYM_CODE_END(arch_rethook_trampoline) + +#endif /* CONFIG_RETHOOK */ diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S deleted file mode 100644 index 1c52eae3396a..000000000000 --- a/arch/s390/kernel/mcount64.S +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, - * - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> - - .section .kprobes.text, "ax" - -ENTRY(ftrace_stub) - br %r14 - -ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE - br %r14 - -ENTRY(ftrace_caller) -#endif - larl %r1,function_trace_stop - icm %r1,0xf,0(%r1) - bnzr %r14 - stmg %r2,%r5,32(%r15) - stg %r14,112(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - lgr %r2,%r14 - lg %r3,168(%r15) - aghi %r2,-MCOUNT_INSN_SIZE - larl %r14,ftrace_trace_function - lg %r14,0(%r14) - basr %r14,%r14 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - lg %r2,168(%r15) - lg %r3,272(%r15) -ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: stg %r2,168(%r15) -#endif - aghi %r15,160 - lmg %r2,%r5,32(%r15) - lg %r14,112(%r15) - br %r14 - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -ENTRY(return_to_handler) - stmg %r2,%r5,32(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - brasl %r14,ftrace_return_to_handler - aghi %r15,160 - lgr %r14,%r2 - lmg %r2,%r5,32(%r15) - br %r14 - -#endif diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 7845e15a17df..9d1f8a50f5a4 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel module help for s390. * @@ -8,29 +9,27 @@ * * based on i386 version * Copyright (C) 2001 Rusty Russell. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/module.h> #include <linux/elf.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <linux/ftrace.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/kasan.h> #include <linux/moduleloader.h> #include <linux/bug.h> +#include <linux/memory.h> +#include <linux/execmem.h> +#include <asm/arch-stackprotector.h> +#include <asm/alternative.h> +#include <asm/nospec-branch.h> +#include <asm/facility.h> +#include <asm/ftrace.lds.h> +#include <asm/set_memory.h> +#include <asm/setup.h> +#include <asm/asm-offsets.h> #if 0 #define DEBUGP printk @@ -38,31 +37,23 @@ #define DEBUGP(fmt , ...) #endif -#ifndef CONFIG_64BIT -#define PLT_ENTRY_SIZE 12 -#else /* CONFIG_64BIT */ -#define PLT_ENTRY_SIZE 20 -#endif /* CONFIG_64BIT */ +#define PLT_ENTRY_SIZE 22 -#ifdef CONFIG_64BIT -void *module_alloc(unsigned long size) +#ifdef CONFIG_FUNCTION_TRACER +void module_arch_cleanup(struct module *mod) { - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, -1, - __builtin_return_address(0)); + execmem_free(mod->arch.trampolines_start); } #endif -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) +void module_arch_freeing_init(struct module *mod) { - if (mod) { - vfree(mod->arch.syminfo); - mod->arch.syminfo = NULL; - } - vfree(module_region); + if (is_livepatch_module(mod) && + mod->state == MODULE_STATE_LIVE) + return; + + vfree(mod->arch.syminfo); + mod->arch.syminfo = NULL; } static void check_rela(Elf_Rela *rela, struct module *me) @@ -122,6 +113,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, Elf_Rela *rela; char *strings; int nrela, i, j; + struct module_memory *mod_mem; /* Find symbol table and string table. */ symtab = NULL; @@ -138,8 +130,8 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Allocate one syminfo structure per symbol. */ me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym); - me->arch.syminfo = vmalloc(me->arch.nsyms * - sizeof(struct mod_arch_syminfo)); + me->arch.syminfo = vmalloc(array_size(sizeof(struct mod_arch_syminfo), + me->arch.nsyms)); if (!me->arch.syminfo) return -ENOMEM; symbols = (void *) hdr + symtab->sh_offset; @@ -169,19 +161,26 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + mod_mem = &me->mem[MOD_TEXT]; + mod_mem->size = ALIGN(mod_mem->size, 4); + me->arch.got_offset = mod_mem->size; + mod_mem->size += me->arch.got_size; + me->arch.plt_offset = mod_mem->size; + if (me->arch.plt_size) { + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) + me->arch.plt_size += PLT_ENTRY_SIZE; + mod_mem->size += me->arch.plt_size; + } return 0; } static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, - int sign, int bits, int shift) + int sign, int bits, int shift, + void *(*write)(void *dest, const void *src, size_t len)) { unsigned long umax; long min, max; + void *dest = (void *)loc; if (val & ((1UL << shift) - 1)) return -ENOEXEC; @@ -198,26 +197,33 @@ static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, return -ENOEXEC; } - if (bits == 8) - *(unsigned char *) loc = val; - else if (bits == 12) - *(unsigned short *) loc = (val & 0xfff) | + if (bits == 8) { + unsigned char tmp = val; + write(dest, &tmp, 1); + } else if (bits == 12) { + unsigned short tmp = (val & 0xfff) | (*(unsigned short *) loc & 0xf000); - else if (bits == 16) - *(unsigned short *) loc = val; - else if (bits == 20) - *(unsigned int *) loc = (val & 0xfff) << 16 | - (val & 0xff000) >> 4 | - (*(unsigned int *) loc & 0xf00000ff); - else if (bits == 32) - *(unsigned int *) loc = val; - else if (bits == 64) - *(unsigned long *) loc = val; + write(dest, &tmp, 2); + } else if (bits == 16) { + unsigned short tmp = val; + write(dest, &tmp, 2); + } else if (bits == 20) { + unsigned int tmp = (val & 0xfff) << 16 | + (val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff); + write(dest, &tmp, 4); + } else if (bits == 32) { + unsigned int tmp = val; + write(dest, &tmp, 4); + } else if (bits == 64) { + unsigned long tmp = val; + write(dest, &tmp, 8); + } return 0; } static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, - const char *strtab, struct module *me) + const char *strtab, struct module *me, + void *(*write)(void *dest, const void *src, size_t len)) { struct mod_arch_syminfo *info; Elf_Addr loc, val; @@ -245,17 +251,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_64: /* Direct 64 bit. */ val += rela->r_addend; if (r_type == R_390_8) - rc = apply_rela_bits(loc, val, 0, 8, 0); + rc = apply_rela_bits(loc, val, 0, 8, 0, write); else if (r_type == R_390_12) - rc = apply_rela_bits(loc, val, 0, 12, 0); + rc = apply_rela_bits(loc, val, 0, 12, 0, write); else if (r_type == R_390_16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_20) - rc = apply_rela_bits(loc, val, 1, 20, 0); + rc = apply_rela_bits(loc, val, 1, 20, 0, write); else if (r_type == R_390_32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_PC16: /* PC relative 16 bit. */ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ @@ -264,15 +270,15 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PC64: /* PC relative 64 bit. */ val += rela->r_addend - loc; if (r_type == R_390_PC16) - rc = apply_rela_bits(loc, val, 1, 16, 0); + rc = apply_rela_bits(loc, val, 1, 16, 0, write); else if (r_type == R_390_PC16DBL) - rc = apply_rela_bits(loc, val, 1, 16, 1); + rc = apply_rela_bits(loc, val, 1, 16, 1, write); else if (r_type == R_390_PC32DBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); else if (r_type == R_390_PC32) - rc = apply_rela_bits(loc, val, 1, 32, 0); + rc = apply_rela_bits(loc, val, 1, 32, 0, write); else if (r_type == R_390_PC64) - rc = apply_rela_bits(loc, val, 1, 64, 0); + rc = apply_rela_bits(loc, val, 1, 64, 0, write); break; case R_390_GOT12: /* 12 bit GOT offset. */ case R_390_GOT16: /* 16 bit GOT offset. */ @@ -287,33 +293,34 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTPLT64: /* 64 bit offset to jump slot. */ case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */ if (info->got_initialized == 0) { - Elf_Addr *gotent; + Elf_Addr *gotent = me->mem[MOD_TEXT].base + + me->arch.got_offset + + info->got_offset; - gotent = me->module_core + me->arch.got_offset + - info->got_offset; - *gotent = val; + write(gotent, &val, sizeof(*gotent)); info->got_initialized = 1; } val = info->got_offset + rela->r_addend; if (r_type == R_390_GOT12 || r_type == R_390_GOTPLT12) - rc = apply_rela_bits(loc, val, 0, 12, 0); + rc = apply_rela_bits(loc, val, 0, 12, 0, write); else if (r_type == R_390_GOT16 || r_type == R_390_GOTPLT16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_GOT20 || r_type == R_390_GOTPLT20) - rc = apply_rela_bits(loc, val, 1, 20, 0); + rc = apply_rela_bits(loc, val, 1, 20, 0, write); else if (r_type == R_390_GOT32 || r_type == R_390_GOTPLT32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->module_core - loc; - rc = apply_rela_bits(loc, val, 1, 32, 1); + val += (Elf_Addr)me->mem[MOD_TEXT].base + + me->arch.got_offset - loc; + rc = apply_rela_bits(loc, val, 1, 32, 1, write); } break; case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ @@ -324,20 +331,28 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + - info->plt_offset; -#ifndef CONFIG_64BIT - ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ - ip[1] = 0x100607f1; - ip[2] = val; -#else /* CONFIG_64BIT */ - ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ - ip[1] = 0x100a0004; - ip[2] = 0x07f10000; - ip[3] = (unsigned int) (val >> 32); - ip[4] = (unsigned int) val; -#endif /* CONFIG_64BIT */ + unsigned char insn[PLT_ENTRY_SIZE]; + char *plt_base; + char *ip; + + plt_base = me->mem[MOD_TEXT].base + me->arch.plt_offset; + ip = plt_base + info->plt_offset; + *(int *)insn = 0x0d10e310; /* basr 1,0 */ + *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */ + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { + char *jump_r1; + + jump_r1 = plt_base + me->arch.plt_size - + PLT_ENTRY_SIZE; + /* brcl 0xf,__jump_r1 */ + *(short *)&insn[8] = 0xc0f4; + *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2; + } else { + *(int *)&insn[8] = 0x07f10000; /* br %r1 */ + } + *(long *)&insn[14] = val; + + write(ip, insn, sizeof(insn)); info->plt_initialized = 1; } if (r_type == R_390_PLTOFF16 || @@ -350,44 +365,44 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val - loc + 0xffffUL < 0x1ffffeUL) || (r_type == R_390_PLT32DBL && val - loc + 0xffffffffULL < 0x1fffffffeULL))) - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->mem[MOD_TEXT].base + me->arch.plt_offset + info->plt_offset; val += rela->r_addend - loc; } if (r_type == R_390_PLT16DBL) - rc = apply_rela_bits(loc, val, 1, 16, 1); + rc = apply_rela_bits(loc, val, 1, 16, 1, write); else if (r_type == R_390_PLTOFF16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_PLT32DBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); else if (r_type == R_390_PLT32 || r_type == R_390_PLTOFF32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_PLT64 || r_type == R_390_PLTOFF64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_GOTOFF16: /* 16 bit offset to GOT. */ case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset); if (r_type == R_390_GOTOFF16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_GOTOFF32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_GOTOFF64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) - rc = apply_rela_bits(loc, val, 1, 32, 0); + rc = apply_rela_bits(loc, val, 1, 32, 0, write); else if (r_type == R_390_GOTPCDBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); break; case R_390_COPY: case R_390_GLOB_DAT: /* Create GOT entry. */ @@ -411,9 +426,10 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, return 0; } -int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, +static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, - struct module *me) + struct module *me, + void *(*write)(void *dest, const void *src, size_t len)) { Elf_Addr base; Elf_Sym *symtab; @@ -429,18 +445,103 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, n = sechdrs[relsec].sh_size / sizeof(Elf_Rela); for (i = 0; i < n; i++, rela++) { - rc = apply_rela(rela, base, symtab, strtab, me); + rc = apply_rela(rela, base, symtab, strtab, me, write); if (rc) return rc; } return 0; } +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) +{ + bool early = me->state == MODULE_STATE_UNFORMED; + void *(*write)(void *, const void *, size_t) = memcpy; + + if (!early) + write = s390_kernel_write; + + return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me, + write); +} + +#ifdef CONFIG_FUNCTION_TRACER +static int module_alloc_ftrace_hotpatch_trampolines(struct module *me, + const Elf_Shdr *s) +{ + char *start, *end; + int numpages; + size_t size; + + size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size); + numpages = DIV_ROUND_UP(size, PAGE_SIZE); + start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE); + if (!start) + return -ENOMEM; + set_memory_rox((unsigned long)start, numpages); + end = start + size; + + me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start; + me->arch.trampolines_end = (struct ftrace_hotpatch_trampoline *)end; + me->arch.next_trampoline = me->arch.trampolines_start; + + return 0; +} +#endif /* CONFIG_FUNCTION_TRACER */ + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - vfree(me->arch.syminfo); - me->arch.syminfo = NULL; - return 0; + const Elf_Shdr *s; + char *secstrings, *secname; + void *aseg; + int rc = 0; + + if (IS_ENABLED(CONFIG_EXPOLINE) && + !nospec_disable && me->arch.plt_size) { + unsigned int *ij; + + ij = me->mem[MOD_TEXT].base + me->arch.plt_offset + + me->arch.plt_size - PLT_ENTRY_SIZE; + ij[0] = 0xc6000000; /* exrl %r0,.+10 */ + ij[1] = 0x0005a7f4; /* j . */ + ij[2] = 0x000007f1; /* br %r1 */ + } + + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + aseg = (void *) s->sh_addr; + secname = secstrings + s->sh_name; + + if (!strcmp(".altinstructions", secname)) + /* patch .altinstructions */ + apply_alternatives(aseg, aseg + s->sh_size); + + if (IS_ENABLED(CONFIG_EXPOLINE) && + (str_has_prefix(secname, ".s390_indirect"))) + nospec_revert(aseg, aseg + s->sh_size); + + if (IS_ENABLED(CONFIG_EXPOLINE) && + (str_has_prefix(secname, ".s390_return"))) + nospec_revert(aseg, aseg + s->sh_size); + + if (IS_ENABLED(CONFIG_STACKPROTECTOR) && + (str_has_prefix(secname, "__stack_protector_loc"))) { + rc = stack_protector_apply(aseg, aseg + s->sh_size); + if (rc) + break; + } + +#ifdef CONFIG_FUNCTION_TRACER + if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) { + rc = module_alloc_ftrace_hotpatch_trampolines(me, s); + if (rc) + break; + } +#endif /* CONFIG_FUNCTION_TRACER */ + } + + return rc; } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 504175ebf8b0..a55abbf65333 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Machine check handler * @@ -5,59 +6,235 @@ * Author(s): Ingo Adlung <adlung@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Cornelia Huck <cornelia.huck@de.ibm.com>, - * Heiko Carstens <heiko.carstens@de.ibm.com>, */ #include <linux/kernel_stat.h> +#include <linux/utsname.h> +#include <linux/cpufeature.h> #include <linux/init.h> #include <linux/errno.h> +#include <linux/entry-common.h> #include <linux/hardirq.h> +#include <linux/log2.h> +#include <linux/kprobes.h> +#include <linux/kmemleak.h> #include <linux/time.h> #include <linux/module.h> +#include <linux/sched/signal.h> +#include <linux/kvm_host.h> #include <asm/lowcore.h> +#include <asm/ctlreg.h> +#include <asm/fpu.h> #include <asm/smp.h> -#include <asm/etr.h> +#include <asm/stp.h> #include <asm/cputime.h> #include <asm/nmi.h> #include <asm/crw.h> +#include <asm/asm-offsets.h> +#include <asm/pai.h> +#include <asm/vtime.h> struct mcck_struct { - int kill_task; - int channel_report; - int warning; - unsigned long long mcck_code; + unsigned int kill_task : 1; + unsigned int channel_report : 1; + unsigned int warning : 1; + unsigned int stp_queue : 1; + unsigned long mcck_code; }; static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); -static void s390_handle_damage(char *msg) +static inline int nmi_needs_mcesa(void) { - smp_send_stop(); - disabled_wait((unsigned long) __builtin_return_address(0)); - while (1); + return cpu_has_vx() || cpu_has_gs(); } /* - * Main machine check handler function. Will be called with interrupts enabled - * or disabled and machine checks enabled or disabled. + * The initial machine check extended save area for the boot CPU. + * It will be replaced on the boot CPU reinit with an allocated + * structure. The structure is required for machine check happening + * early in the boot process. + */ +static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE); + +void __init nmi_alloc_mcesa_early(u64 *mcesad) +{ + if (!nmi_needs_mcesa()) + return; + *mcesad = __pa(&boot_mcesa); + if (cpu_has_gs()) + *mcesad |= ilog2(MCESA_MAX_SIZE); +} + +int nmi_alloc_mcesa(u64 *mcesad) +{ + unsigned long size; + void *origin; + + *mcesad = 0; + if (!nmi_needs_mcesa()) + return 0; + size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + origin = kmalloc(size, GFP_KERNEL); + if (!origin) + return -ENOMEM; + /* The pointer is stored with mcesa_bits ORed in */ + kmemleak_not_leak(origin); + *mcesad = __pa(origin); + if (cpu_has_gs()) + *mcesad |= ilog2(MCESA_MAX_SIZE); + return 0; +} + +void nmi_free_mcesa(u64 *mcesad) +{ + if (!nmi_needs_mcesa()) + return; + kfree(__va(*mcesad & MCESA_ORIGIN_MASK)); +} + +static __always_inline char *nmi_puts(char *dest, const char *src) +{ + while (*src) + *dest++ = *src++; + *dest = 0; + return dest; +} + +static __always_inline char *u64_to_hex(char *dest, u64 val) +{ + int i, num; + + for (i = 1; i <= 16; i++) { + num = (val >> (64 - 4 * i)) & 0xf; + if (num >= 10) + *dest++ = 'A' + num - 10; + else + *dest++ = '0' + num; + } + *dest = 0; + return dest; +} + +static notrace void nmi_print_info(void) +{ + struct lowcore *lc = get_lowcore(); + char message[100]; + char *ptr; + int i; + + ptr = nmi_puts(message, "Unrecoverable machine check, code: "); + ptr = u64_to_hex(ptr, lc->mcck_interruption_code); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, init_utsname()->release); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, arch_hw_string); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "PSW: "); + ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask); + ptr = nmi_puts(ptr, " "); + ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr); + ptr = nmi_puts(ptr, " PFX: "); + ptr = u64_to_hex(ptr, (u64)get_lowcore()); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "LBA: "); + ptr = u64_to_hex(ptr, lc->last_break_save_area); + ptr = nmi_puts(ptr, " EDC: "); + ptr = u64_to_hex(ptr, lc->external_damage_code); + ptr = nmi_puts(ptr, " FSA: "); + ptr = u64_to_hex(ptr, lc->failing_storage_address); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "CRS:\n"); + sclp_emergency_printk(message); + ptr = message; + for (i = 0; i < 16; i++) { + ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val); + ptr = nmi_puts(ptr, " "); + if ((i + 1) % 4 == 0) { + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + ptr = message; + } + } + + ptr = nmi_puts(message, "GPRS:\n"); + sclp_emergency_printk(message); + ptr = message; + for (i = 0; i < 16; i++) { + ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]); + ptr = nmi_puts(ptr, " "); + if ((i + 1) % 4 == 0) { + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + ptr = message; + } + } + + ptr = nmi_puts(message, "System stopped\n"); + sclp_emergency_printk(message); +} + +static notrace void __noreturn s390_handle_damage(void) +{ + struct lowcore *lc = get_lowcore(); + union ctlreg0 cr0, cr0_new; + psw_t psw_save; + + smp_emergency_stop(); + diag_amode31_ops.diag308_reset(); + + /* + * Disable low address protection and make machine check new PSW a + * disabled wait PSW. Any additional machine check cannot be handled. + */ + local_ctl_store(0, &cr0.reg); + cr0_new = cr0; + cr0_new.lap = 0; + local_ctl_load(0, &cr0_new.reg); + psw_save = lc->mcck_new_psw; + psw_bits(lc->mcck_new_psw).io = 0; + psw_bits(lc->mcck_new_psw).ext = 0; + psw_bits(lc->mcck_new_psw).wait = 1; + nmi_print_info(); + + /* + * Restore machine check new PSW and control register 0 to original + * values. This makes possible system dump analysis easier. + */ + lc->mcck_new_psw = psw_save; + local_ctl_load(0, &cr0.reg); + disabled_wait(); +} +NOKPROBE_SYMBOL(s390_handle_damage); + +/* + * Main machine check handler function. Will be called with interrupts disabled + * and machine checks enabled. */ void s390_handle_mcck(void) { - unsigned long flags; struct mcck_struct mcck; + unsigned long mflags; /* * Disable machine checks and get the current state of accumulated * machine checks. Afterwards delete the old state and enable machine * checks again. */ - local_irq_save(flags); - local_mcck_disable(); - mcck = __get_cpu_var(cpu_mcck); - memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); - clear_thread_flag(TIF_MCCK_PENDING); - local_mcck_enable(); - local_irq_restore(flags); + local_mcck_save(mflags); + mcck = *this_cpu_ptr(&cpu_mcck); + memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck)); + local_mcck_restore(mflags); if (mcck.channel_report) crw_handle_channel_report(); @@ -74,171 +251,112 @@ void s390_handle_mcck(void) static int mchchk_wng_posted = 0; /* Use single cpu clear, as we cannot handle smp here. */ - __ctl_clear_bit(14, 24); /* Disable WARNING MCH */ + local_ctl_clear_bit(14, CR14_WARNING_SUBMASK_BIT); if (xchg(&mchchk_wng_posted, 1) == 0) kill_cad_pid(SIGPWR, 1); } + if (mcck.stp_queue) + stp_queue_work(); if (mcck.kill_task) { - local_irq_enable(); printk(KERN_EMERG "mcck: Terminating task because of machine " - "malfunction (code 0x%016llx).\n", mcck.mcck_code); + "malfunction (code 0x%016lx).\n", mcck.mcck_code); printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", current->comm, current->pid); - do_exit(SIGSEGV); + if (is_global_init(current)) + panic("mcck: Attempting to kill init!\n"); + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID); } } -EXPORT_SYMBOL_GPL(s390_handle_mcck); -/* - * returns 0 if all registers could be validated - * returns 1 otherwise +/** + * nmi_registers_valid - verify if registers are valid + * @mci: machine check interruption code + * + * Inspect a machine check interruption code and verify if all required + * registers are valid. For some registers the corresponding validity bit is + * ignored and the registers are set to the expected value. + * Returns true if all registers are valid, otherwise false. */ -static int notrace s390_revalidate_registers(struct mci *mci) +static bool notrace nmi_registers_valid(union mci mci) { - int kill_task; - u64 zero; - void *fpt_save_area, *fpt_creg_save_area; - - kill_task = 0; - zero = 0; + union ctlreg2 cr2; - if (!mci->gr) { - /* - * General purpose registers couldn't be restored and have - * unknown contents. Process needs to be terminated. - */ - kill_task = 1; - } - if (!mci->fp) { - /* - * Floating point registers can't be restored and - * therefore the process needs to be terminated. - */ - kill_task = 1; - } -#ifndef CONFIG_64BIT - asm volatile( - " ld 0,0(%0)\n" - " ld 2,8(%0)\n" - " ld 4,16(%0)\n" - " ld 6,24(%0)" - : : "a" (&S390_lowcore.floating_pt_save_area)); -#endif - - if (MACHINE_HAS_IEEE) { -#ifdef CONFIG_64BIT - fpt_save_area = &S390_lowcore.floating_pt_save_area; - fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; -#else - fpt_save_area = (void *) S390_lowcore.extended_save_area_addr; - fpt_creg_save_area = fpt_save_area + 128; -#endif - if (!mci->fc) { - /* - * Floating point control register can't be restored. - * Task will be terminated. - */ - asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero)); - kill_task = 1; - - } else - asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); - - asm volatile( - " ld 0,0(%0)\n" - " ld 1,8(%0)\n" - " ld 2,16(%0)\n" - " ld 3,24(%0)\n" - " ld 4,32(%0)\n" - " ld 5,40(%0)\n" - " ld 6,48(%0)\n" - " ld 7,56(%0)\n" - " ld 8,64(%0)\n" - " ld 9,72(%0)\n" - " ld 10,80(%0)\n" - " ld 11,88(%0)\n" - " ld 12,96(%0)\n" - " ld 13,104(%0)\n" - " ld 14,112(%0)\n" - " ld 15,120(%0)\n" - : : "a" (fpt_save_area)); - } - /* Revalidate access registers */ - asm volatile( - " lam 0,15,0(%0)" - : : "a" (&S390_lowcore.access_regs_save_area)); - if (!mci->ar) { - /* - * Access registers have unknown contents. - * Terminating task. - */ - kill_task = 1; - } - /* Revalidate control registers */ - if (!mci->cr) { - /* - * Control registers have unknown contents. - * Can't recover and therefore stopping machine. - */ - s390_handle_damage("invalid control registers."); - } else { -#ifdef CONFIG_64BIT - asm volatile( - " lctlg 0,15,0(%0)" - : : "a" (&S390_lowcore.cregs_save_area)); -#else - asm volatile( - " lctl 0,15,0(%0)" - : : "a" (&S390_lowcore.cregs_save_area)); -#endif - } /* - * We don't even try to revalidate the TOD register, since we simply - * can't write something sensible into that register. + * The getcpu vdso syscall reads the CPU number from the programmable + * field of the TOD clock. Disregard the TOD programmable register + * validity bit and load the CPU number into the TOD programmable field + * unconditionally. */ -#ifdef CONFIG_64BIT + set_tod_programmable_field(raw_smp_processor_id()); /* - * See if we can revalidate the TOD programmable register with its - * old contents (should be zero) otherwise set it to zero. + * Set the clock comparator register to the next expected value. */ - if (!mci->pr) - asm volatile( - " sr 0,0\n" - " sckpf" - : : : "0", "cc"); - else - asm volatile( - " l 0,0(%0)\n" - " sckpf" - : : "a" (&S390_lowcore.tod_progreg_save_area) - : "0", "cc"); -#endif - /* Revalidate clock comparator register */ - if (S390_lowcore.clock_comparator == -1) - set_clock_comparator(S390_lowcore.mcck_clock); - else - set_clock_comparator(S390_lowcore.clock_comparator); - /* Check if old PSW is valid */ - if (!mci->wp) - /* - * Can't tell if we come from user or kernel mode - * -> stopping machine. - */ - s390_handle_damage("old psw invalid."); + set_clock_comparator(get_lowcore()->clock_comparator); + if (!mci.gr || !mci.fp || !mci.fc) + return false; + /* + * The vector validity must only be checked if not running a + * KVM guest. For KVM guests the machine check is forwarded by + * KVM and it is the responsibility of the guest to take + * appropriate actions. The host vector or FPU values have been + * saved by KVM and will be restored by KVM. + */ + if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) + return false; + if (!mci.ar) + return false; + /* + * Two cases for guarded storage registers: + * - machine check in kernel or userspace + * - machine check while running SIE (KVM guest) + * For kernel or userspace the userspace values of guarded storage + * control can not be recreated, the process must be terminated. + * For SIE the guest values of guarded storage can not be recreated. + * This is either due to a bug or due to GS being disabled in the + * guest. The guest will be notified by KVM code and the guests machine + * check handling must take care of this. The host values are saved by + * KVM and are not affected. + */ + cr2.reg = get_lowcore()->cregs_save_area[2]; + if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST)) + return false; + if (!mci.ms || !mci.pm || !mci.ia) + return false; + return true; +} +NOKPROBE_SYMBOL(nmi_registers_valid); - if (!mci->ms || !mci->pm || !mci->ia) - kill_task = 1; +/* + * Backup the guest's machine check info to its description block + */ +static void notrace s390_backup_mcck_info(struct pt_regs *regs) +{ + struct mcck_volatile_info *mcck_backup; + struct sie_page *sie_page; + + /* r14 contains the sie block, which was set in sie64a */ + struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]); - return kill_task; + if (sie_block == NULL) + /* Something's seriously wrong, stop system. */ + s390_handle_damage(); + + sie_page = container_of(sie_block, struct sie_page, sie_block); + mcck_backup = &sie_page->mcck_info; + mcck_backup->mcic = get_lowcore()->mcck_interruption_code & + ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE); + mcck_backup->ext_damage_code = get_lowcore()->external_damage_code; + mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address; } +NOKPROBE_SYMBOL(s390_backup_mcck_info); #define MAX_IPD_COUNT 29 #define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ #define ED_STP_ISLAND 6 /* External damage STP island check */ #define ED_STP_SYNC 7 /* External damage STP sync check */ -#define ED_ETR_SYNC 12 /* External damage ETR sync check */ -#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */ + +#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE) /* * machine check handler. @@ -248,44 +366,41 @@ void notrace s390_do_machine_check(struct pt_regs *regs) static int ipd_count; static DEFINE_SPINLOCK(ipd_lock); static unsigned long long last_ipd; + struct lowcore *lc = get_lowcore(); struct mcck_struct *mcck; unsigned long long tmp; - struct mci *mci; - int umode; + irqentry_state_t irq_state; + union mci mci; + unsigned long mcck_dam_code; + int mcck_pending = 0; + + irq_state = irqentry_nmi_enter(regs); - nmi_enter(); + if (user_mode(regs)) + update_timer_mcck(); inc_irq_stat(NMI_NMI); - mci = (struct mci *) &S390_lowcore.mcck_interruption_code; - mcck = &__get_cpu_var(cpu_mcck); - umode = user_mode(regs); + mci.val = lc->mcck_interruption_code; + mcck = this_cpu_ptr(&cpu_mcck); - if (mci->sd) { - /* System damage -> stopping machine */ - s390_handle_damage("received system damage machine check."); - } - if (mci->pd) { - if (mci->b) { + /* + * Reinject the instruction processing damages' machine checks + * including Delayed Access Exception into the guest + * instead of damaging the host if they happen in the guest. + */ + if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) { + if (mci.b) { /* Processing backup -> verify if we can survive this */ u64 z_mcic, o_mcic, t_mcic; -#ifdef CONFIG_64BIT z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); -#else - z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 | - 1ULL<<29); - o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | - 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | - 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); -#endif - t_mcic = *(u64 *)mci; + t_mcic = mci.val; if (((t_mcic & z_mcic) != 0) || ((t_mcic & o_mcic) != o_mcic)) { - s390_handle_damage("processing backup machine " - "check with damage."); + s390_handle_damage(); } /* @@ -300,76 +415,95 @@ void notrace s390_do_machine_check(struct pt_regs *regs) ipd_count = 1; last_ipd = tmp; if (ipd_count == MAX_IPD_COUNT) - s390_handle_damage("too many ipd retries."); + s390_handle_damage(); spin_unlock(&ipd_lock); } else { /* Processing damage -> stopping machine */ - s390_handle_damage("received instruction processing " - "damage machine check."); + s390_handle_damage(); } } - if (s390_revalidate_registers(mci)) { - if (umode) { - /* - * Couldn't restore all register contents while in - * user mode -> mark task for termination. - */ - mcck->kill_task = 1; - mcck->mcck_code = *(unsigned long long *) mci; - set_thread_flag(TIF_MCCK_PENDING); - } else { - /* - * Couldn't restore all register contents while in - * kernel mode -> stopping machine. - */ - s390_handle_damage("unable to revalidate registers."); - } + if (!nmi_registers_valid(mci)) { + if (!user_mode(regs)) + s390_handle_damage(); + /* + * Couldn't restore all register contents for the + * user space process -> mark task for termination. + */ + mcck->kill_task = 1; + mcck->mcck_code = mci.val; + mcck_pending = 1; } - if (mci->cd) { + + /* + * Backup the machine check's info if it happens when the guest + * is running. + */ + if (test_cpu_flag(CIF_MCCK_GUEST)) + s390_backup_mcck_info(regs); + + if (mci.cd) { /* Timing facility damage */ - s390_handle_damage("TOD clock damaged"); + s390_handle_damage(); } - if (mci->ed && mci->ec) { + if (mci.ed && mci.ec) { /* External damage */ - if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) - etr_sync_check(); - if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) - etr_switch_to_local(); - if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) - stp_sync_check(); - if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) - stp_island_check(); + if (lc->external_damage_code & (1U << ED_STP_SYNC)) + mcck->stp_queue |= stp_sync_check(); + if (lc->external_damage_code & (1U << ED_STP_ISLAND)) + mcck->stp_queue |= stp_island_check(); + mcck_pending = 1; } - if (mci->se) + /* + * Reinject storage related machine checks into the guest if they + * happen when the guest is running. + */ + if (!test_cpu_flag(CIF_MCCK_GUEST)) { /* Storage error uncorrected */ - s390_handle_damage("received storage error uncorrected " - "machine check."); - if (mci->ke) + if (mci.se) + s390_handle_damage(); /* Storage key-error uncorrected */ - s390_handle_damage("received storage key-error uncorrected " - "machine check."); - if (mci->ds && mci->fa) + if (mci.ke) + s390_handle_damage(); /* Storage degradation */ - s390_handle_damage("received storage degradation machine " - "check."); - if (mci->cp) { + if (mci.ds && mci.fa) + s390_handle_damage(); + } + if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; - set_thread_flag(TIF_MCCK_PENDING); + mcck_pending = 1; } - if (mci->w) { + if (mci.w) { /* Warning pending */ mcck->warning = 1; - set_thread_flag(TIF_MCCK_PENDING); + mcck_pending = 1; } - nmi_exit(); + + /* + * If there are only Channel Report Pending and External Damage + * machine checks, they will not be reinjected into the guest + * because they refer to host conditions only. + */ + mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK); + if (test_cpu_flag(CIF_MCCK_GUEST) && + (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) { + /* Set exit reason code for host's later handling */ + *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; + } + clear_cpu_flag(CIF_MCCK_GUEST); + + if (mcck_pending) + schedule_mcck_handler(); + + irqentry_nmi_exit(regs, irq_state); } +NOKPROBE_SYMBOL(s390_do_machine_check); static int __init machine_check_init(void) { - ctl_set_bit(14, 25); /* enable external damage MCH */ - ctl_set_bit(14, 27); /* enable system recovery MCH */ - ctl_set_bit(14, 24); /* enable warning MCH */ + system_ctl_set_bit(14, CR14_EXTERNAL_DAMAGE_SUBMASK_BIT); + system_ctl_set_bit(14, CR14_RECOVERY_SUBMASK_BIT); + system_ctl_set_bit(14, CR14_WARNING_SUBMASK_BIT); return 0; } -arch_initcall(machine_check_init); +early_initcall(machine_check_init); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c new file mode 100644 index 000000000000..e11ec15960a1 --- /dev/null +++ b/arch/s390/kernel/nospec-branch.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/module.h> +#include <linux/device.h> +#include <linux/cpu.h> +#include <asm/nospec-branch.h> + +int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP); + +static int __init nobp_setup_early(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + if (enabled && test_facility(82)) { + /* + * The user explicitly requested nobp=1, enable it and + * disable the expoline support. + */ + nobp = 1; + if (IS_ENABLED(CONFIG_EXPOLINE)) + nospec_disable = 1; + } else { + nobp = 0; + } + return 0; +} +early_param("nobp", nobp_setup_early); + +static int __init nospec_setup_early(char *str) +{ + nobp = 0; + return 0; +} +early_param("nospec", nospec_setup_early); + +static int __init nospec_report(void) +{ + if (test_facility(156)) + pr_info("Spectre V2 mitigation: etokens\n"); + if (nospec_uses_trampoline()) + pr_info("Spectre V2 mitigation: execute trampolines\n"); + if (nobp_enabled()) + pr_info("Spectre V2 mitigation: limited branch prediction\n"); + return 0; +} +arch_initcall(nospec_report); + +#ifdef CONFIG_EXPOLINE + +int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); + +static int __init nospectre_v2_setup_early(char *str) +{ + nospec_disable = 1; + return 0; +} +early_param("nospectre_v2", nospectre_v2_setup_early); + +void __init nospec_auto_detect(void) +{ + if (test_facility(156) || cpu_mitigations_off()) { + /* + * The machine supports etokens. + * Disable expolines and disable nobp. + */ + if (__is_defined(CC_USING_EXPOLINE)) + nospec_disable = 1; + nobp = 0; + } else if (__is_defined(CC_USING_EXPOLINE)) { + /* + * The kernel has been compiled with expolines. + * Keep expolines enabled and disable nobp. + */ + nospec_disable = 0; + nobp = 0; + } + /* + * If the kernel has not been compiled with expolines the + * nobp setting decides what is done, this depends on the + * CONFIG_KERNEL_NP option and the nobp/nospec parameters. + */ +} + +static int __init spectre_v2_setup_early(char *str) +{ + if (str && !strncmp(str, "on", 2)) { + nospec_disable = 0; + nobp = 0; + } + if (str && !strncmp(str, "off", 3)) + nospec_disable = 1; + if (str && !strncmp(str, "auto", 4)) + nospec_auto_detect(); + return 0; +} +early_param("spectre_v2", spectre_v2_setup_early); + +static void __init_or_module __nospec_revert(s32 *start, s32 *end) +{ + enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type; + static const u8 branch[] = { 0x47, 0x00, 0x07, 0x00 }; + u8 *instr, *thunk, *br; + u8 insnbuf[6]; + s32 *epo; + + /* Second part of the instruction replace is always a nop */ + memcpy(insnbuf + 2, branch, sizeof(branch)); + for (epo = start; epo < end; epo++) { + instr = (u8 *) epo + *epo; + if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) + type = BRCL_EXPOLINE; /* brcl instruction */ + else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05) + type = BRASL_EXPOLINE; /* brasl instruction */ + else + continue; + thunk = instr + (long)(*(int *)(instr + 2)) * 2; + if (thunk[0] == 0xc6 && thunk[1] == 0x00) + /* exrl %r0,<target-br> */ + br = thunk + (long)(*(int *)(thunk + 2)) * 2; + else + continue; + if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) + continue; + switch (type) { + case BRCL_EXPOLINE: + /* brcl to thunk, replace with br + nop */ + insnbuf[0] = br[0]; + insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + break; + case BRASL_EXPOLINE: + /* brasl to thunk, replace with basr + nop */ + insnbuf[0] = 0x0d; + insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + break; + } + + s390_kernel_write(instr, insnbuf, 6); + } +} + +void __init_or_module nospec_revert(s32 *start, s32 *end) +{ + if (nospec_disable) + __nospec_revert(start, end); +} + +extern s32 __nospec_call_start[], __nospec_call_end[]; +extern s32 __nospec_return_start[], __nospec_return_end[]; +void __init nospec_init_branches(void) +{ + nospec_revert(__nospec_call_start, __nospec_call_end); + nospec_revert(__nospec_return_start, __nospec_return_end); +} + +#endif /* CONFIG_EXPOLINE */ diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c new file mode 100644 index 000000000000..5970dd3ee7c5 --- /dev/null +++ b/arch/s390/kernel/nospec-sysfs.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/device.h> +#include <linux/cpu.h> +#include <asm/facility.h> +#include <asm/nospec-branch.h> + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (test_facility(156)) + return sysfs_emit(buf, "Mitigation: etokens\n"); + if (nospec_uses_trampoline()) + return sysfs_emit(buf, "Mitigation: execute trampolines\n"); + if (nobp_enabled()) + return sysfs_emit(buf, "Mitigation: limited branch prediction\n"); + return sysfs_emit(buf, "Vulnerable\n"); +} diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c new file mode 100644 index 000000000000..2fc40f97c0ad --- /dev/null +++ b/arch/s390/kernel/numa.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NUMA support for s390 + * + * Implement NUMA core code. + * + * Copyright IBM Corp. 2015 + */ + +#include <linux/kernel.h> +#include <linux/mmzone.h> +#include <linux/cpumask.h> +#include <linux/memblock.h> +#include <linux/node.h> +#include <asm/numa.h> + +void __init numa_setup(void) +{ + int nid; + + nodes_clear(node_possible_map); + node_set(0, node_possible_map); + node_set_online(0); + for (nid = 0; nid < MAX_NUMNODES; nid++) + NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8); + NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; + NODE_DATA(0)->node_id = 0; +} diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index d112fc66f993..94fa44776d0c 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * OS info memory interface * @@ -5,15 +6,19 @@ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> */ -#define KMSG_COMPONENT "os_info" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "os_info: " fmt #include <linux/crash_dump.h> #include <linux/kernel.h> #include <linux/slab.h> #include <asm/checksum.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/os_info.h> +#include <asm/physmem_info.h> +#include <asm/maccess.h> +#include <asm/asm-offsets.h> +#include <asm/sections.h> +#include <asm/ipl.h> /* * OS info structure has to be page aligned @@ -26,7 +31,7 @@ static struct os_info os_info __page_aligned_data; u32 os_info_csum(struct os_info *os_info) { int size = sizeof(*os_info) - offsetof(struct os_info, version_major); - return csum_partial(&os_info->version_major, size, 0); + return (__force u32)cksm(&os_info->version_major, size, 0); } /* @@ -40,28 +45,51 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size) } /* - * Add OS info entry and update checksum + * Add OS info data entry and update checksum */ -void os_info_entry_add(int nr, void *ptr, u64 size) +void os_info_entry_add_data(int nr, void *ptr, u64 size) { - os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].addr = __pa(ptr); os_info.entry[nr].size = size; - os_info.entry[nr].csum = csum_partial(ptr, size, 0); + os_info.entry[nr].csum = (__force u32)cksm(ptr, size, 0); os_info.csum = os_info_csum(&os_info); } /* - * Initialize OS info struture and set lowcore pointer + * Add OS info value entry and update checksum + */ +void os_info_entry_add_val(int nr, u64 value) +{ + os_info.entry[nr].val = value; + os_info.entry[nr].size = 0; + os_info.entry[nr].csum = 0; + os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info structure and set lowcore pointer */ void __init os_info_init(void) { - void *ptr = &os_info; + struct lowcore *abs_lc; + BUILD_BUG_ON(sizeof(struct os_info) != PAGE_SIZE); os_info.version_major = OS_INFO_VERSION_MAJOR; os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; + os_info_entry_add_val(OS_INFO_IDENTITY_BASE, __identity_base); + os_info_entry_add_val(OS_INFO_KASLR_OFFSET, kaslr_offset()); + os_info_entry_add_val(OS_INFO_KASLR_OFF_PHYS, __kaslr_offset_phys); + os_info_entry_add_val(OS_INFO_VMEMMAP, (unsigned long)vmemmap); + os_info_entry_add_val(OS_INFO_AMODE31_START, AMODE31_START); + os_info_entry_add_val(OS_INFO_AMODE31_END, AMODE31_END); + os_info_entry_add_val(OS_INFO_IMAGE_START, (unsigned long)_stext); + os_info_entry_add_val(OS_INFO_IMAGE_END, (unsigned long)_end); + os_info_entry_add_val(OS_INFO_IMAGE_PHYS, __pa_symbol(_stext)); os_info.csum = os_info_csum(&os_info); - mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr); + abs_lc = get_abs_lowcore(); + abs_lc->os_info = __pa(&os_info); + put_abs_lowcore(abs_lc); } #ifdef CONFIG_CRASH_DUMP @@ -89,11 +117,11 @@ static void os_info_old_alloc(int nr, int align) goto fail; } buf_align = PTR_ALIGN(buf, align); - if (copy_from_oldmem(buf_align, (void *) addr, size)) { + if (copy_oldmem_kernel(buf_align, addr, size)) { msg = "copy failed"; goto fail_free; } - csum = csum_partial(buf_align, size, 0); + csum = (__force u32)cksm(buf_align, size, 0); if (csum != os_info_old->entry[nr].csum) { msg = "checksum failed"; goto fail_free; @@ -120,16 +148,16 @@ static void os_info_old_init(void) if (os_info_init) return; - if (!OLDMEM_BASE) + if (!oldmem_data.start && !is_ipl_type_dump()) goto fail; - if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); if (!os_info_old) goto fail; - if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old))) goto fail_free; if (os_info_old->magic != OS_INFO_MAGIC) goto fail_free; @@ -152,7 +180,7 @@ fail: } /* - * Return pointer to os infor entry and its size + * Return pointer to os info entry and its size */ void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 390d9ae57bb2..408ab93112bf 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,40 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x - CPU-measurement Counter Facility * - * Copyright IBM Corp. 2012 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. + * Copyright IBM Corp. 2012, 2023 + * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> + * Thomas Richter <tmricht@linux.ibm.com> */ -#define KMSG_COMPONENT "cpum_cf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpum_cf: " fmt #include <linux/kernel.h> #include <linux/kernel_stat.h> -#include <linux/perf_event.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/init.h> -#include <linux/export.h> -#include <asm/ctl_reg.h> -#include <asm/irq.h> +#include <linux/miscdevice.h> +#include <linux/perf_event.h> + #include <asm/cpu_mf.h> +#include <asm/hwctrset.h> +#include <asm/debug.h> + +/* Perf PMU definitions for the counter facility */ +#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */ +#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */ -/* CPU-measurement counter facility supports these CPU counter sets: - * For CPU counter sets: - * Basic counter set: 0-31 - * Problem-state counter set: 32-63 - * Crypto-activity counter set: 64-127 - * Extented counter set: 128-159 - */ enum cpumf_ctr_set { - /* CPU counter sets */ - CPUMF_CTR_SET_BASIC = 0, - CPUMF_CTR_SET_USER = 1, - CPUMF_CTR_SET_CRYPTO = 2, - CPUMF_CTR_SET_EXT = 3, + CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ + CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ + CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ + CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ + CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ /* Maximum number of counter sets */ CPUMF_CTR_SET_MAX, @@ -42,261 +37,681 @@ enum cpumf_ctr_set { #define CPUMF_LCCTL_ENABLE_SHIFT 16 #define CPUMF_LCCTL_ACTCTL_SHIFT 0 -static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { - [CPUMF_CTR_SET_BASIC] = 0x02, - [CPUMF_CTR_SET_USER] = 0x04, - [CPUMF_CTR_SET_CRYPTO] = 0x08, - [CPUMF_CTR_SET_EXT] = 0x01, -}; -static void ctr_set_enable(u64 *state, int ctr_set) +static inline void ctr_set_enable(u64 *state, u64 ctrsets) { - *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; + *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; } -static void ctr_set_disable(u64 *state, int ctr_set) + +static inline void ctr_set_disable(u64 *state, u64 ctrsets) { - *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); + *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); } -static void ctr_set_start(u64 *state, int ctr_set) + +static inline void ctr_set_start(u64 *state, u64 ctrsets) { - *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; + *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; } -static void ctr_set_stop(u64 *state, int ctr_set) + +static inline void ctr_set_stop(u64 *state, u64 ctrsets) { - *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); + *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); } -/* Local CPUMF event structure */ -struct cpu_hw_events { - struct cpumf_ctr_info info; +static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) +{ + switch (set) { + case CPUMF_CTR_SET_BASIC: + return stcctm(BASIC, range, dest); + case CPUMF_CTR_SET_USER: + return stcctm(PROBLEM_STATE, range, dest); + case CPUMF_CTR_SET_CRYPTO: + return stcctm(CRYPTO_ACTIVITY, range, dest); + case CPUMF_CTR_SET_EXT: + return stcctm(EXTENDED, range, dest); + case CPUMF_CTR_SET_MT_DIAG: + return stcctm(MT_DIAG_CLEARING, range, dest); + case CPUMF_CTR_SET_MAX: + return 3; + } + return 3; +} + +struct cpu_cf_events { + refcount_t refcnt; /* Reference count */ atomic_t ctr_set[CPUMF_CTR_SET_MAX]; - u64 state, tx_state; + u64 state; /* For perf_event_open SVC */ + u64 dev_state; /* For /dev/hwctr */ unsigned int flags; + size_t used; /* Bytes used in data */ + size_t usedss; /* Bytes used in start/stop */ + unsigned char start[PAGE_SIZE]; /* Counter set at event add */ + unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ + unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ + unsigned int sets; /* # Counter set saved in memory */ }; -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { - .ctr_set = { - [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), - }, - .state = 0, - .flags = 0, + +static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ +static debug_info_t *cf_dbg; + +/* + * The CPU Measurement query counter information instruction contains + * information which varies per machine generation, but is constant and + * does not change when running on a particular machine, such as counter + * first and second version number. This is needed to determine the size + * of counter sets. Extract this information at device driver initialization. + */ +static struct cpumf_ctr_info cpumf_ctr_info; + +struct cpu_cf_ptr { + struct cpu_cf_events *cpucf; }; -static int get_counter_set(u64 event) +static struct cpu_cf_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct cpu_cf_ptr __percpu *cfptr; +} cpu_cf_root; + +/* + * Serialize event initialization and event removal. Both are called from + * user space in task context with perf_event_open() and close() + * system calls. + * + * This mutex serializes functions cpum_cf_alloc_cpu() called at event + * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() + * called at event removal via call back function hw_perf_event_destroy() + * when the event is deleted. They are serialized to enforce correct + * bookkeeping of pointer and reference counts anchored by + * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the + * per CPU pointers stored in cpu_cf_root::cfptr. + */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Get pointer to per-cpu structure. + * + * Function get_cpu_cfhw() is called from + * - cfset_copy_all(): This function is protected by cpus_read_lock(), so + * CPU hot plug remove can not happen. Event removal requires a close() + * first. + * + * Function this_cpu_cfhw() is called from perf common code functions: + * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): + * All functions execute with interrupts disabled on that particular CPU. + * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). + * + * Therefore it is safe to access the CPU specific pointer to the event. + */ +static struct cpu_cf_events *get_cpu_cfhw(int cpu) { - int set = -1; + struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; - if (event < 32) - set = CPUMF_CTR_SET_BASIC; - else if (event < 64) - set = CPUMF_CTR_SET_USER; - else if (event < 128) - set = CPUMF_CTR_SET_CRYPTO; - else if (event < 256) - set = CPUMF_CTR_SET_EXT; + if (p) { + struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); - return set; + return q->cpucf; + } + return NULL; } -static int validate_event(const struct hw_perf_event *hwc) +static struct cpu_cf_events *this_cpu_cfhw(void) { - switch (hwc->config_base) { - case CPUMF_CTR_SET_BASIC: - case CPUMF_CTR_SET_USER: - case CPUMF_CTR_SET_CRYPTO: - case CPUMF_CTR_SET_EXT: - /* check for reserved counters */ - if ((hwc->config >= 6 && hwc->config <= 31) || - (hwc->config >= 38 && hwc->config <= 63) || - (hwc->config >= 80 && hwc->config <= 127)) - return -EOPNOTSUPP; - break; - default: - return -EINVAL; - } - - return 0; + return get_cpu_cfhw(smp_processor_id()); } -static int validate_ctr_version(const struct hw_perf_event *hwc) +/* Disable counter sets on dedicated CPU */ +static void cpum_cf_reset_cpu(void *flags) { - struct cpu_hw_events *cpuhw; - int err = 0; + lcctl(0); +} - cpuhw = &get_cpu_var(cpu_hw_events); +/* Free per CPU data when the last event is removed. */ +static void cpum_cf_free_root(void) +{ + if (!refcount_dec_and_test(&cpu_cf_root.refcnt)) + return; + free_percpu(cpu_cf_root.cfptr); + cpu_cf_root.cfptr = NULL; + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n", + __func__, refcount_read(&cpu_cf_root.refcnt), + !cpu_cf_root.cfptr); +} - /* check required version for counter sets */ - switch (hwc->config_base) { - case CPUMF_CTR_SET_BASIC: - case CPUMF_CTR_SET_USER: - if (cpuhw->info.cfvn < 1) - err = -EOPNOTSUPP; - break; - case CPUMF_CTR_SET_CRYPTO: - case CPUMF_CTR_SET_EXT: - if (cpuhw->info.csvn < 1) - err = -EOPNOTSUPP; - if ((cpuhw->info.csvn == 1 && hwc->config > 159) || - (cpuhw->info.csvn == 2 && hwc->config > 175) || - (cpuhw->info.csvn > 2 && hwc->config > 255)) - err = -EOPNOTSUPP; - break; +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int cpum_cf_alloc_root(void) +{ + int rc = 0; + + if (refcount_inc_not_zero(&cpu_cf_root.refcnt)) + return rc; + + /* The memory is already zeroed. */ + cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); + if (cpu_cf_root.cfptr) { + refcount_set(&cpu_cf_root.refcnt, 1); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + } else { + rc = -ENOMEM; } - put_cpu_var(cpu_hw_events); - return err; + return rc; } -static int validate_ctr_auth(const struct hw_perf_event *hwc) +/* Free CPU counter data structure for a PMU */ +static void cpum_cf_free_cpu(int cpu) { - struct cpu_hw_events *cpuhw; - u64 ctrs_state; - int err = 0; + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + + mutex_lock(&pmc_reserve_mutex); + /* + * When invoked via CPU hotplug handler, there might be no events + * installed or that particular CPU might not have an + * event installed. This anchor pointer can be NULL! + */ + if (!cpu_cf_root.cfptr) + goto out; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + /* + * Might be zero when called from CPU hotplug handler and no event + * installed on that CPU, but on different CPUs. + */ + if (!cpuhw) + goto out; - cpuhw = &get_cpu_var(cpu_hw_events); + if (refcount_dec_and_test(&cpuhw->refcnt)) { + kfree(cpuhw); + p->cpucf = NULL; + } + cpum_cf_free_root(); +out: + mutex_unlock(&pmc_reserve_mutex); +} - /* check authorization for cpu counter sets */ - ctrs_state = cpumf_state_ctl[hwc->config_base]; - if (!(ctrs_state & cpuhw->info.auth_ctl)) - err = -EPERM; +/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ +static int cpum_cf_alloc_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + int rc; - put_cpu_var(cpu_hw_events); - return err; + mutex_lock(&pmc_reserve_mutex); + rc = cpum_cf_alloc_root(); + if (rc) + goto unlock; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + + if (!cpuhw) { + cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL); + if (cpuhw) { + p->cpucf = cpuhw; + refcount_set(&cpuhw->refcnt, 1); + } else { + rc = -ENOMEM; + } + } else { + refcount_inc(&cpuhw->refcnt); + } + if (rc) { + /* + * Error in allocation of event, decrement anchor. Since + * cpu_cf_event in not created, its destroy() function is not + * invoked. Adjust the reference counter for the anchor. + */ + cpum_cf_free_root(); + } +unlock: + mutex_unlock(&pmc_reserve_mutex); + return rc; } /* - * Change the CPUMF state to active. - * Enable and activate the CPU-counter sets according - * to the per-cpu control state. + * Create/delete per CPU data structures for /dev/hwctr interface and events + * created by perf_event_open(). + * If cpu is -1, track task on all available CPUs. This requires + * allocation of hardware data structures for all CPUs. This setup handles + * perf_event_open() with task context and /dev/hwctr interface. + * If cpu is non-zero install event on this CPU only. This setup handles + * perf_event_open() with CPU context. */ -static void cpumf_pmu_enable(struct pmu *pmu) +static int cpum_cf_alloc(int cpu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - int err; + cpumask_var_t mask; + int rc; - if (cpuhw->flags & PMU_F_ENABLED) - return; + if (cpu == -1) { + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + for_each_online_cpu(cpu) { + rc = cpum_cf_alloc_cpu(cpu); + if (rc) { + for_each_cpu(cpu, mask) + cpum_cf_free_cpu(cpu); + break; + } + cpumask_set_cpu(cpu, mask); + } + free_cpumask_var(mask); + } else { + rc = cpum_cf_alloc_cpu(cpu); + } + return rc; +} - err = lcctl(cpuhw->state); - if (err) { - pr_err("Enabling the performance measuring unit " - "failed with rc=%x\n", err); - return; +static void cpum_cf_free(int cpu) +{ + if (cpu == -1) { + for_each_online_cpu(cpu) + cpum_cf_free_cpu(cpu); + } else { + cpum_cf_free_cpu(cpu); } +} + +#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ + /* interval in seconds */ + +/* Counter sets are stored as data stream in a page sized memory buffer and + * exported to user space via raw data attached to the event sample data. + * Each counter set starts with an eight byte header consisting of: + * - a two byte eye catcher (0xfeef) + * - a one byte counter set number + * - a two byte counter set size (indicates the number of counters in this set) + * - a three byte reserved value (must be zero) to make the header the same + * size as a counter value. + * All counter values are eight byte in size. + * + * All counter sets are followed by a 64 byte trailer. + * The trailer consists of a: + * - flag field indicating valid fields when corresponding bit set + * - the counter facility first and second version number + * - the CPU speed if nonzero + * - the time stamp the counter sets have been collected + * - the time of day (TOD) base value + * - the machine type. + * + * The counter sets are saved when the process is prepared to be executed on a + * CPU and saved again when the process is going to be removed from a CPU. + * The difference of both counter sets are calculated and stored in the event + * sample data area. + */ +struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int set:16; /* 16-31 Counter set identifier */ + unsigned int ctr:16; /* 32-47 Number of stored counters */ + unsigned int res1:16; /* 48-63 Reserved */ +}; + +struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ + /* 0 - 7 */ + union { + struct { + unsigned int clock_base:1; /* TOD clock base set */ + unsigned int speed:1; /* CPU speed set */ + /* Measurement alerts */ + unsigned int mtda:1; /* Loss of MT ctr. data alert */ + unsigned int caca:1; /* Counter auth. change alert */ + unsigned int lcda:1; /* Loss of counter data alert */ + }; + unsigned long flags; /* 0-63 All indicators */ + }; + /* 8 - 15 */ + unsigned int cfvn:16; /* 64-79 Ctr First Version */ + unsigned int csvn:16; /* 80-95 Ctr Second Version */ + unsigned int cpu_speed:32; /* 96-127 CPU speed */ + /* 16 - 23 */ + unsigned long timestamp; /* 128-191 Timestamp (TOD) */ + /* 24 - 55 */ + union { + struct { + unsigned long progusage1; + unsigned long progusage2; + unsigned long progusage3; + unsigned long tod_base; + }; + unsigned long progusage[4]; + }; + /* 56 - 63 */ + unsigned int mach_type:16; /* Machine type */ + unsigned int res1:16; /* Reserved */ + unsigned int res2:32; /* Reserved */ +}; - cpuhw->flags |= PMU_F_ENABLED; +/* Create the trailer data at the end of a page. */ +static void cfdiag_trailer(struct cf_trailer_entry *te) +{ + struct cpuid cpuid; + + te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */ + te->csvn = cpumf_ctr_info.csvn; + + get_cpu_id(&cpuid); /* Machine type */ + te->mach_type = cpuid.machine; + te->cpu_speed = cfdiag_cpu_speed; + if (te->cpu_speed) + te->speed = 1; + te->clock_base = 1; /* Save clock base */ + te->tod_base = tod_clock_base.tod; + te->timestamp = get_tod_clock_fast(); } /* - * Change the CPUMF state to inactive. - * Disable and enable (inactive) the CPU-counter sets according - * to the per-cpu control state. + * The number of counters per counter set varies between machine generations, + * but is constant when running on a particular machine generation. + * Determine each counter set size at device driver initialization and + * retrieve it later. */ -static void cpumf_pmu_disable(struct pmu *pmu) +static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX]; +static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - int err; - u64 inactive; + size_t ctrset_size = 0; - if (!(cpuhw->flags & PMU_F_ENABLED)) - return; + switch (ctrset) { + case CPUMF_CTR_SET_BASIC: + if (cpumf_ctr_info.cfvn >= 1) + ctrset_size = 6; + break; + case CPUMF_CTR_SET_USER: + if (cpumf_ctr_info.cfvn == 1) + ctrset_size = 6; + else if (cpumf_ctr_info.cfvn >= 3) + ctrset_size = 2; + break; + case CPUMF_CTR_SET_CRYPTO: + if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5) + ctrset_size = 16; + else if (cpumf_ctr_info.csvn >= 6) + ctrset_size = 20; + break; + case CPUMF_CTR_SET_EXT: + if (cpumf_ctr_info.csvn == 1) + ctrset_size = 32; + else if (cpumf_ctr_info.csvn == 2) + ctrset_size = 48; + else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) + ctrset_size = 128; + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) + ctrset_size = 160; + break; + case CPUMF_CTR_SET_MT_DIAG: + if (cpumf_ctr_info.csvn > 3) + ctrset_size = 48; + break; + case CPUMF_CTR_SET_MAX: + break; + } + cpumf_ctr_setsizes[ctrset] = ctrset_size; +} - inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); - err = lcctl(inactive); - if (err) { - pr_err("Disabling the performance measuring unit " - "failed with rc=%x\n", err); - return; +/* + * Return the maximum possible counter set size (in number of 8 byte counters) + * depending on type and model number. + */ +static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset) +{ + return cpumf_ctr_setsizes[ctrset]; +} + +/* Read a counter set. The counter set number determines the counter set and + * the CPUM-CF first and second version number determine the number of + * available counters in each counter set. + * Each counter set starts with header containing the counter set number and + * the number of eight byte counters. + * + * The functions returns the number of bytes occupied by this counter set + * including the header. + * If there is no counter in the counter set, this counter set is useless and + * zero is returned on this case. + * + * Note that the counter sets may not be enabled or active and the stcctm + * instruction might return error 3. Depending on error_ok value this is ok, + * for example when called from cpumf_pmu_start() call back function. + */ +static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, + size_t room, bool error_ok) +{ + size_t ctrset_size, need = 0; + int rc = 3; /* Assume write failure */ + + ctrdata->def = CF_DIAG_CTRSET_DEF; + ctrdata->set = ctrset; + ctrdata->res1 = 0; + ctrset_size = cpum_cf_read_setsize(ctrset); + + if (ctrset_size) { /* Save data */ + need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); + if (need <= room) { + rc = ctr_stcctm(ctrset, ctrset_size, + (u64 *)(ctrdata + 1)); + } + if (rc != 3 || error_ok) + ctrdata->ctr = ctrset_size; + else + need = 0; } - cpuhw->flags &= ~PMU_F_ENABLED; + return need; } +static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, + [CPUMF_CTR_SET_MT_DIAG] = 0x20, +}; -/* Number of perf events counting hardware events */ -static atomic_t num_events = ATOMIC_INIT(0); -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); +/* Read out all counter sets and save them in the provided data buffer. + * The last 64 byte host an artificial trailer entry. + */ +static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, + bool error_ok) +{ + struct cf_trailer_entry *trailer; + size_t offset = 0, done; + int i; -/* CPU-measurement alerts for the counter facility */ -static void cpumf_measurement_alert(struct ext_code ext_code, - unsigned int alert, unsigned long unused) + memset(data, 0, sz); + sz -= sizeof(*trailer); /* Always room for trailer */ + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + struct cf_ctrset_entry *ctrdata = data + offset; + + if (!(auth & cpumf_ctr_ctl[i])) + continue; /* Counter set not authorized */ + + done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); + offset += done; + } + trailer = data + offset; + cfdiag_trailer(trailer); + return offset + sizeof(*trailer); +} + +/* Calculate the difference for each counter in a counter set. */ +static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) { - struct cpu_hw_events *cpuhw; + for (; --counters >= 0; ++pstart, ++pstop) + if (*pstop >= *pstart) + *pstop -= *pstart; + else + *pstop = *pstart - *pstop + 1; +} - if (!(alert & CPU_MF_INT_CF_MASK)) - return; +/* Scan the counter sets and calculate the difference of each counter + * in each set. The result is the increment of each counter during the + * period the counter set has been activated. + * + * Return true on success. + */ +static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) +{ + struct cf_trailer_entry *trailer_start, *trailer_stop; + struct cf_ctrset_entry *ctrstart, *ctrstop; + size_t offset = 0; + int i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); + ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); + + /* Counter set not authorized */ + if (!(auth & cpumf_ctr_ctl[i])) + continue; + /* Counter set size zero was not saved */ + if (!cpum_cf_read_setsize(i)) + continue; + + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { + pr_err_once("cpum_cf_diag counter set compare error " + "in set %i\n", ctrstart->set); + return 0; + } + if (ctrstart->def == CF_DIAG_CTRSET_DEF) { + cfdiag_diffctrset((u64 *)(ctrstart + 1), + (u64 *)(ctrstop + 1), ctrstart->ctr); + offset += ctrstart->ctr * sizeof(u64) + + sizeof(*ctrstart); + } + } - inc_irq_stat(IRQEXT_CMC); - cpuhw = &__get_cpu_var(cpu_hw_events); + /* Save time_stamp from start of event in stop's trailer */ + trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); + trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); + trailer_stop->progusage[0] = trailer_start->timestamp; - /* Measurement alerts are shared and might happen when the PMU - * is not reserved. Ignore these alerts in this case. */ - if (!(cpuhw->flags & PMU_F_RESERVED)) - return; + return 1; +} - /* counter authorization change alert */ - if (alert & CPU_MF_INT_CF_CACA) - qctri(&cpuhw->info); +static enum cpumf_ctr_set get_counter_set(u64 event) +{ + int set = CPUMF_CTR_SET_MAX; - /* loss of counter data alert */ - if (alert & CPU_MF_INT_CF_LCDA) - pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); + if (event < 32) + set = CPUMF_CTR_SET_BASIC; + else if (event < 64) + set = CPUMF_CTR_SET_USER; + else if (event < 128) + set = CPUMF_CTR_SET_CRYPTO; + else if (event < 288) + set = CPUMF_CTR_SET_EXT; + else if (event >= 448 && event < 496) + set = CPUMF_CTR_SET_MT_DIAG; + + return set; } -#define PMC_INIT 0 -#define PMC_RELEASE 1 -static void setup_pmc_cpu(void *flags) +static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + u16 mtdiag_ctl; + int err = 0; - switch (*((int *) flags)) { - case PMC_INIT: - memset(&cpuhw->info, 0, sizeof(cpuhw->info)); - qctri(&cpuhw->info); - cpuhw->flags |= PMU_F_RESERVED; + /* check required version for counter sets */ + switch (set) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + if (cpumf_ctr_info.cfvn < 1) + err = -EOPNOTSUPP; break; - - case PMC_RELEASE: - cpuhw->flags &= ~PMU_F_RESERVED; + case CPUMF_CTR_SET_CRYPTO: + if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 && + config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83)) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_EXT: + if (cpumf_ctr_info.csvn < 1) + err = -EOPNOTSUPP; + if ((cpumf_ctr_info.csvn == 1 && config > 159) || + (cpumf_ctr_info.csvn == 2 && config > 175) || + (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 && + config > 255) || + (cpumf_ctr_info.csvn >= 6 && config > 287)) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_MT_DIAG: + if (cpumf_ctr_info.csvn <= 3) + err = -EOPNOTSUPP; + /* + * MT-diagnostic counters are read-only. The counter set + * is automatically enabled and activated on all CPUs with + * multithreading (SMT). Deactivation of multithreading + * also disables the counter set. State changes are ignored + * by lcctl(). Because Linux controls SMT enablement through + * a kernel parameter only, the counter set is either disabled + * or enabled and active. + * + * Thus, the counters can only be used if SMT is on and the + * counter set is enabled and active. + */ + mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; + if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) && + (cpumf_ctr_info.enable_ctl & mtdiag_ctl) && + (cpumf_ctr_info.act_ctl & mtdiag_ctl))) + err = -EOPNOTSUPP; break; + case CPUMF_CTR_SET_MAX: + err = -EOPNOTSUPP; } - /* Disable CPU counter sets */ - lcctl(0); + return err; } -/* Initialize the CPU-measurement facility */ -static int reserve_pmc_hardware(void) +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_enable(struct pmu *pmu) { - int flags = PMC_INIT; + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); + int err; - on_each_cpu(setup_pmc_cpu, &flags, 1); - measurement_alert_subclass_register(); + if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) + return; - return 0; + err = lcctl(cpuhw->state | cpuhw->dev_state); + if (err) + pr_err("Enabling the performance measuring unit failed with rc=%x\n", err); + else + cpuhw->flags |= PMU_F_ENABLED; } -/* Release the CPU-measurement facility */ -static void release_pmc_hardware(void) +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_disable(struct pmu *pmu) { - int flags = PMC_RELEASE; + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); + u64 inactive; + int err; - on_each_cpu(setup_pmc_cpu, &flags, 1); - measurement_alert_subclass_unregister(); + if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + inactive |= cpuhw->dev_state; + err = lcctl(inactive); + if (err) + pr_err("Disabling the performance measuring unit failed with rc=%x\n", err); + else + cpuhw->flags &= ~PMU_F_ENABLED; } /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) { - if (!atomic_add_unless(&num_events, -1, 1)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_events) == 0) - release_pmc_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } + cpum_cf_free(event->cpu); } /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ @@ -320,14 +735,20 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static int __hw_perf_event_init(struct perf_event *event) +static int is_userspace_event(u64 ev) +{ + return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev; +} + +static int __hw_perf_event_init(struct perf_event *event, unsigned int type) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; - int err; + enum cpumf_ctr_set set; u64 ev; - switch (attr->type) { + switch (type) { case PERF_TYPE_RAW: /* Raw events are used to access counters directly, * hence do not permit excludes */ @@ -339,21 +760,26 @@ static int __hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = attr->config; - /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { - if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) - return -EOPNOTSUPP; - ev = cpumf_generic_events_user[ev]; - - /* No support for kernel space counters only */ + /* + * Count user space (problem-state) only + * Handle events 32 and 33 as 0:u and 1:u + */ + if (!is_userspace_event(ev)) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + } } else if (!attr->exclude_kernel && attr->exclude_user) { + /* No support for kernel space counters only */ return -EOPNOTSUPP; - - /* Count user and kernel space */ } else { - if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) - return -EOPNOTSUPP; - ev = cpumf_generic_events_basic[ev]; + /* Count user and kernel space, incl. events 32 + 33 */ + if (!is_userspace_event(ev)) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } } break; @@ -364,67 +790,79 @@ static int __hw_perf_event_init(struct perf_event *event) if (ev == -1) return -ENOENT; - if (ev >= PERF_CPUM_CF_MAX_CTR) - return -EINVAL; - - /* Use the hardware perf event structure to store the counter number - * in 'config' member and the counter set to which the counter belongs - * in the 'config_base'. The counter set (config_base) is then used - * to enable/disable the counters. - */ - hwc->config = ev; - hwc->config_base = get_counter_set(ev); + if (ev > PERF_CPUM_CF_MAX_CTR) + return -ENOENT; - /* Validate the counter that is assigned to this event. - * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicity - * validate event groups (event->group_leader != event). - */ - err = validate_event(hwc); - if (err) - return err; + /* Obtain the counter set to which the specified counter belongs */ + set = get_counter_set(ev); + switch (set) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + case CPUMF_CTR_SET_MT_DIAG: + /* + * Use the hardware perf event structure to store the + * counter number in the 'config' member and the counter + * set number in the 'config_base' as bit mask. + * It is later used to enable/disable the counter(s). + */ + hwc->config = ev; + hwc->config_base = cpumf_ctr_ctl[set]; + break; + case CPUMF_CTR_SET_MAX: + /* The counter could not be associated to a counter set */ + return -EINVAL; + } /* Initialize for using the CPU-measurement counter facility */ - if (!atomic_inc_not_zero(&num_events)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) - err = -EBUSY; - else - atomic_inc(&num_events); - mutex_unlock(&pmc_reserve_mutex); - } + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; - /* Finally, validate version and authorization of the counter set */ - err = validate_ctr_auth(hwc); - if (!err) - err = validate_ctr_version(hwc); + /* + * Finally, validate version and authorization of the counter set. + * If the particular CPU counter set is not authorized, + * return with -ENOENT in order to fall back to other + * PMUs that might suffice the event request. + */ + if (!(hwc->config_base & cpumf_ctr_info.auth_ctl)) + return -ENOENT; + return validate_ctr_version(hwc->config, set); +} - return err; +/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; } static int cpumf_pmu_event_init(struct perf_event *event) { - int err; - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - case PERF_TYPE_RAW: - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - if (is_sampling_event(event)) - return -ENOENT; - err = __hw_perf_event_init(event); - break; - default: - return -ENOENT; - } + unsigned int type = event->attr.type; + int err = -ENOENT; - if (unlikely(err) && event->destroy) - event->destroy(event); + if (is_sampling_event(event)) /* No sampling support */ + return err; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) + /* Registered as unknown PMU */ + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); return err; } @@ -434,8 +872,8 @@ static int hw_perf_event_reset(struct perf_event *event) u64 prev, new; int err; + prev = local64_read(&event->hw.prev_count); do { - prev = local64_read(&event->hw.prev_count); err = ecctr(event->hw.config, &new); if (err) { if (err != 3) @@ -447,28 +885,26 @@ static int hw_perf_event_reset(struct perf_event *event) */ new = 0; } - } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new)); return err; } -static int hw_perf_event_update(struct perf_event *event) +static void hw_perf_event_update(struct perf_event *event) { u64 prev, new, delta; int err; + prev = local64_read(&event->hw.prev_count); do { - prev = local64_read(&event->hw.prev_count); err = ecctr(event->hw.config, &new); if (err) - goto out; - } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + return; + } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new)); delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; /* overflow */ local64_add(delta, &event->count); -out: - return err; } static void cpumf_pmu_read(struct perf_event *event) @@ -481,18 +917,13 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; + int i; - if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) - return; - - if (WARN_ON_ONCE(hwc->config == -1)) + if (!(hwc->state & PERF_HES_STOPPED)) return; - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - hwc->state = 0; /* (Re-)enable and activate the counter set */ @@ -504,45 +935,93 @@ static void cpumf_pmu_start(struct perf_event *event, int flags) * needs to be synchronized. At this point, the counter set can be in * the inactive or disabled state. */ - hw_perf_event_reset(event); + if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { + cpuhw->usedss = cfdiag_getctr(cpuhw->start, + sizeof(cpuhw->start), + hwc->config_base, true); + } else { + hw_perf_event_reset(event); + } - /* increment refcount for this counter set */ - atomic_inc(&cpuhw->ctr_set[hwc->config_base]); + /* Increment refcount for counter sets */ + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) + if ((hwc->config_base & cpumf_ctr_ctl[i])) + atomic_inc(&cpuhw->ctr_set[i]); +} + +/* Create perf event sample with the counter sets as raw data. The sample + * is then pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int cfdiag_push_sample(struct perf_event *event, + struct cpu_cf_events *cpuhw) +{ + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int overflow; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + + if (event->attr.sample_type & PERF_SAMPLE_CPU) + data.cpu_entry.cpu = event->cpu; + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.frag.size = cpuhw->usedss; + raw.frag.data = cpuhw->stop; + perf_sample_save_raw_data(&data, event, &raw); + } + + overflow = perf_event_overflow(event, &data, ®s); + + perf_event_update_userpage(event); + return overflow; } static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; + int i; if (!(hwc->state & PERF_HES_STOPPED)) { /* Decrement reference count for this counter set and if this * is the last used counter in the set, clear activation * control and set the counter set state to inactive. */ - if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) - ctr_set_stop(&cpuhw->state, hwc->config_base); - event->hw.state |= PERF_HES_STOPPED; + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (!(hwc->config_base & cpumf_ctr_ctl[i])) + continue; + if (!atomic_dec_return(&cpuhw->ctr_set[i])) + ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); + } + hwc->state |= PERF_HES_STOPPED; } if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - hw_perf_event_update(event); - event->hw.state |= PERF_HES_UPTODATE; + if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { + local64_inc(&event->count); + cpuhw->usedss = cfdiag_getctr(cpuhw->stop, + sizeof(cpuhw->stop), + event->hw.config_base, + false); + if (cfdiag_diffctr(cpuhw, event->hw.config_base)) + cfdiag_push_sample(event, cpuhw); + } else { + hw_perf_event_update(event); + } + hwc->state |= PERF_HES_UPTODATE; } } static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - - /* Check authorization for the counter set to which this - * counter belongs. - * For group events transaction, the authorization check is - * done in cpumf_pmu_commit_txn(). - */ - if (!(cpuhw->flags & PERF_EVENT_TXN)) - if (validate_ctr_auth(&event->hw)) - return -EPERM; + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); ctr_set_enable(&cpuhw->state, event->hw.config_base); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -550,14 +1029,13 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) if (flags & PERF_EF_START) cpumf_pmu_start(event, PERF_EF_RELOAD); - perf_event_update_userpage(event); - return 0; } static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); + int i; cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -567,127 +1045,903 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) * * When a new perf event has been added but not yet started, this can * clear enable control and resets all counters in a set. Therefore, - * cpumf_pmu_start() always has to reenable a counter set. + * cpumf_pmu_start() always has to re-enable a counter set. */ - if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) - ctr_set_disable(&cpuhw->state, event->hw.config_base); - - perf_event_update_userpage(event); + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) + if (!atomic_read(&cpuhw->ctr_set[i])) + ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); } +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, +}; + +static struct cfset_session { /* CPUs and counter set bit mask */ + struct list_head head; /* Head of list of active processes */ +} cfset_session = { + .head = LIST_HEAD_INIT(cfset_session.head) +}; + +static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ +/* + * Synchronize access to device /dev/hwc. This mutex protects against + * concurrent access to functions cfset_open() and cfset_release(). + * Same for CPU hotplug add and remove events triggering + * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). + * It also serializes concurrent device ioctl access from multiple + * processes accessing /dev/hwc. + * + * The mutex protects concurrent access to the /dev/hwctr session management + * struct cfset_session and reference counting variable cfset_opencnt. + */ +static DEFINE_MUTEX(cfset_ctrset_mutex); + /* - * Start group events scheduling transaction. - * Set flags to perform a single test at commit time. + * CPU hotplug handles only /dev/hwctr device. + * For perf_event_open() the CPU hotplug handling is done on kernel common + * code: + * - CPU add: Nothing is done since a file descriptor can not be created + * and returned to the user. + * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and + * pmu_delete(). The event itself is removed when the file descriptor is + * closed. */ -static void cpumf_pmu_start_txn(struct pmu *pmu) +static int cfset_online_cpu(unsigned int cpu); + +static int cpum_cf_online_cpu(unsigned int cpu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int rc = 0; - perf_pmu_disable(pmu); - cpuhw->flags |= PERF_EVENT_TXN; - cpuhw->tx_state = cpuhw->state; + /* + * Ignore notification for perf_event_open(). + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + rc = cpum_cf_alloc_cpu(cpu); + if (!rc) + cfset_online_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return rc; } -/* - * Stop and cancel a group events scheduling tranctions. - * Assumes cpumf_pmu_del() is called for each successful added - * cpumf_pmu_add() during the transaction. +static int cfset_offline_cpu(unsigned int cpu); + +static int cpum_cf_offline_cpu(unsigned int cpu) +{ + /* + * During task exit processing of grouped perf events triggered by CPU + * hotplug processing, pmu_disable() is called as part of perf context + * removal process. Therefore do not trigger event removal now for + * perf_event_open() created events. Perf common code triggers event + * destruction when the event file descriptor is closed. + * + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + cfset_offline_cpu(cpu); + cpum_cf_free_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return 0; +} + +/* Return true if store counter set multiple instruction is available */ +static inline int stccm_avail(void) +{ + return test_facility(142); +} + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_cf_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + inc_irq_stat(IRQEXT_CMC); + + /* + * Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. + */ + cpuhw = this_cpu_cfhw(); + if (!cpuhw) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpumf_ctr_info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); + + /* loss of MT counter data alert */ + if (alert & CPU_MF_INT_CF_MTDA) + pr_warn("CPU[%i] MT counter data was lost\n", + smp_processor_id()); +} + +static int cfset_init(void); +static int __init cpumf_pmu_init(void) +{ + int rc; + + /* Extract counter measurement facility information */ + if (!cpum_cf_avail() || qctri(&cpumf_ctr_info)) + return -ENODEV; + + /* Determine and store counter set sizes for later reference */ + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) + cpum_cf_make_setsize(rc); + + /* + * Clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters + */ + system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT); + + /* register handler for measurement-alert interruptions */ + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc); + return rc; + } + + /* Setup s390dbf facility */ + cf_dbg = debug_register("cpum_cf", 2, 1, 128); + if (!cf_dbg) { + pr_err("Registration of s390dbf(cpum_cf) failed\n"); + rc = -ENOMEM; + goto out1; + } + debug_register_view(cf_dbg, &debug_sprintf_view); + + cpumf_pmu.attr_groups = cpumf_cf_event_group(); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); + if (rc) { + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + goto out2; + } else if (stccm_avail()) { /* Setup counter set device */ + cfset_init(); + } + + rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, + "perf/s390/cf:online", + cpum_cf_online_cpu, cpum_cf_offline_cpu); + return rc; + +out2: + debug_unregister_view(cf_dbg, &debug_sprintf_view); + debug_unregister(cf_dbg); +out1: + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); + return rc; +} + +/* Support for the CPU Measurement Facility counter set extraction using + * device /dev/hwctr. This allows user space programs to extract complete + * counter set via normal file operations. */ -static void cpumf_pmu_cancel_txn(struct pmu *pmu) + +struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ + unsigned int sets; /* Counter set bit mask */ + atomic_t cpus_ack; /* # CPUs successfully executed func */ +}; + +struct cfset_request { /* CPUs and counter set bit mask */ + unsigned long ctrset; /* Bit mask of counter set to read */ + cpumask_t mask; /* CPU mask to read from */ + struct list_head node; /* Chain to cfset_session.head */ +}; + +static void cfset_session_init(void) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + INIT_LIST_HEAD(&cfset_session.head); +} - WARN_ON(cpuhw->tx_state != cpuhw->state); +/* Remove current request from global bookkeeping. Maintain a counter set bit + * mask on a per CPU basis. + * Done in process context under mutex protection. + */ +static void cfset_session_del(struct cfset_request *p) +{ + list_del(&p->node); +} - cpuhw->flags &= ~PERF_EVENT_TXN; - perf_pmu_enable(pmu); +/* Add current request to global bookkeeping. Maintain a counter set bit mask + * on a per CPU basis. + * Done in process context under mutex protection. + */ +static void cfset_session_add(struct cfset_request *p) +{ + list_add(&p->node, &cfset_session.head); +} + +/* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access + * path is currently used. + * The cpu_cf_events::dev_state is used to denote counter sets in use by this + * interface. It is always or'ed in. If this interface is not active, its + * value is zero and no additional counter sets will be included. + * + * The cpu_cf_events::state is used by the perf_event_open SVC and remains + * unchanged. + * + * perf_pmu_enable() and perf_pmu_enable() and its call backs + * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the + * performance measurement subsystem to enable per process + * CPU Measurement counter facility. + * The XXX_enable() and XXX_disable functions are used to turn off + * x86 performance monitoring interrupt (PMI) during scheduling. + * s390 uses these calls to temporarily stop and resume the active CPU + * counters sets during scheduling. + * + * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr + * device access. The perf_event_open() SVC interface makes a lot of effort + * to only run the counters while the calling process is actively scheduled + * to run. + * When /dev/hwctr interface is also used at the same time, the counter sets + * will keep running, even when the process is scheduled off a CPU. + * However this is not a problem and does not lead to wrong counter values + * for the perf_event_open() SVC. The current counter value will be recorded + * during schedule-in. At schedule-out time the current counter value is + * extracted again and the delta is calculated and added to the event. + */ +/* Stop all counter sets via ioctl interface */ +static void cfset_ioctl_off(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); + struct cfset_call_on_cpu_parm *p = parm; + int rc; + + /* Check if any counter set used by /dev/hwctr */ + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) + if ((p->sets & cpumf_ctr_ctl[rc])) { + if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { + ctr_set_disable(&cpuhw->dev_state, + cpumf_ctr_ctl[rc]); + ctr_set_stop(&cpuhw->dev_state, + cpumf_ctr_ctl[rc]); + } + } + /* Keep perf_event_open counter sets */ + rc = lcctl(cpuhw->dev_state | cpuhw->state); + if (rc) + pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", + cpuhw->state, S390_HWCTR_DEVICE, rc); + if (!cpuhw->dev_state) + cpuhw->flags &= ~PMU_F_IN_USE; +} + +/* Start counter sets on particular CPU */ +static void cfset_ioctl_on(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); + struct cfset_call_on_cpu_parm *p = parm; + int rc; + + cpuhw->flags |= PMU_F_IN_USE; + ctr_set_enable(&cpuhw->dev_state, p->sets); + ctr_set_start(&cpuhw->dev_state, p->sets); + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) + if ((p->sets & cpumf_ctr_ctl[rc])) + atomic_inc(&cpuhw->ctr_set[rc]); + rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ + if (!rc) + atomic_inc(&p->cpus_ack); + else + pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", + cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); +} + +static void cfset_release_cpu(void *p) +{ + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); + int rc; + + cpuhw->dev_state = 0; + rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ + if (rc) + pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", + cpuhw->state, S390_HWCTR_DEVICE, rc); +} + +/* This modifies the process CPU mask to adopt it to the currently online + * CPUs. Offline CPUs can not be addresses. This call terminates the access + * and is usually followed by close() or a new iotcl(..., START, ...) which + * creates a new request structure. + */ +static void cfset_all_stop(struct cfset_request *req) +{ + struct cfset_call_on_cpu_parm p = { + .sets = req->ctrset, + }; + + cpumask_and(&req->mask, &req->mask, cpu_online_mask); + on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1); +} + +/* Release function is also called when application gets terminated without + * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. + */ +static int cfset_release(struct inode *inode, struct file *file) +{ + mutex_lock(&cfset_ctrset_mutex); + /* Open followed by close/exit has no private_data */ + if (file->private_data) { + cfset_all_stop(file->private_data); + cfset_session_del(file->private_data); + kfree(file->private_data); + file->private_data = NULL; + } + if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */ + on_each_cpu(cfset_release_cpu, NULL, 1); + cpum_cf_free(-1); + } + mutex_unlock(&cfset_ctrset_mutex); + return 0; } /* - * Commit the group events scheduling transaction. On success, the - * transaction is closed. On error, the transaction is kept open - * until cpumf_pmu_cancel_txn() is called. + * Open via /dev/hwctr device. Allocate all per CPU resources on the first + * open of the device. The last close releases all per CPU resources. + * Parallel perf_event_open system calls also use per CPU resources. + * These invocations are handled via reference counting on the per CPU data + * structures. */ -static int cpumf_pmu_commit_txn(struct pmu *pmu) +static int cfset_open(struct inode *inode, struct file *file) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - u64 state; + int rc = 0; - /* check if the updated state can be scheduled */ - state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); - state >>= CPUMF_LCCTL_ENABLE_SHIFT; - if ((state & cpuhw->info.auth_ctl) != state) + if (!perfmon_capable()) return -EPERM; + file->private_data = NULL; + + mutex_lock(&cfset_ctrset_mutex); + if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */ + rc = cpum_cf_alloc(-1); + if (!rc) { + cfset_session_init(); + refcount_set(&cfset_opencnt, 1); + } + } + mutex_unlock(&cfset_ctrset_mutex); - cpuhw->flags &= ~PERF_EVENT_TXN; - perf_pmu_enable(pmu); + /* nonseekable_open() never fails */ + return rc ?: nonseekable_open(inode, file); +} + +static int cfset_all_start(struct cfset_request *req) +{ + struct cfset_call_on_cpu_parm p = { + .sets = req->ctrset, + .cpus_ack = ATOMIC_INIT(0), + }; + cpumask_var_t mask; + int rc = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_and(mask, &req->mask, cpu_online_mask); + on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); + if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { + on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); + rc = -EIO; + } + free_cpumask_var(mask); + return rc; +} + +/* Return the maximum required space for all possible CPUs in case one + * CPU will be onlined during the START, READ, STOP cycles. + * To find out the size of the counter sets, any one CPU will do. They + * all have the same counter sets. + */ +static size_t cfset_needspace(unsigned int sets) +{ + size_t bytes = 0; + int i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (!(sets & cpumf_ctr_ctl[i])) + continue; + bytes += cpum_cf_read_setsize(i) * sizeof(u64) + + sizeof(((struct s390_ctrset_setdata *)0)->set) + + sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); + } + bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * + (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + + sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); + return bytes; +} + +static int cfset_all_copy(unsigned long arg, cpumask_t *mask) +{ + struct s390_ctrset_read __user *ctrset_read; + unsigned int cpu, cpus, rc = 0; + void __user *uptr; + + ctrset_read = (struct s390_ctrset_read __user *)arg; + uptr = ctrset_read->data; + for_each_cpu(cpu, mask) { + struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); + struct s390_ctrset_cpudata __user *ctrset_cpudata; + + ctrset_cpudata = uptr; + rc = put_user(cpu, &ctrset_cpudata->cpu_nr); + rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); + rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, + cpuhw->used); + if (rc) { + rc = -EFAULT; + goto out; + } + uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; + cond_resched(); + } + cpus = cpumask_weight(mask); + if (put_user(cpus, &ctrset_read->no_cpus)) + rc = -EFAULT; +out: + return rc; +} + +static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, + int ctrset_size, size_t room) +{ + size_t need = 0; + int rc = -1; + + need = sizeof(*p) + sizeof(u64) * ctrset_size; + if (need <= room) { + p->set = cpumf_ctr_ctl[ctrset]; + p->no_cnts = ctrset_size; + rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); + if (rc == 3) /* Nothing stored */ + need = 0; + } + return need; +} + +/* Read all counter sets. */ +static void cfset_cpu_read(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); + struct cfset_call_on_cpu_parm *p = parm; + int set, set_size; + size_t space; + + /* No data saved yet */ + cpuhw->used = 0; + cpuhw->sets = 0; + memset(cpuhw->data, 0, sizeof(cpuhw->data)); + + /* Scan the counter sets */ + for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { + struct s390_ctrset_setdata *sp = (void *)cpuhw->data + + cpuhw->used; + + if (!(p->sets & cpumf_ctr_ctl[set])) + continue; /* Counter set not in list */ + set_size = cpum_cf_read_setsize(set); + space = sizeof(cpuhw->data) - cpuhw->used; + space = cfset_cpuset_read(sp, set, set_size, space); + if (space) { + cpuhw->used += space; + cpuhw->sets += 1; + } + } +} + +static int cfset_all_read(unsigned long arg, struct cfset_request *req) +{ + struct cfset_call_on_cpu_parm p; + cpumask_var_t mask; + int rc; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + p.sets = req->ctrset; + cpumask_and(mask, &req->mask, cpu_online_mask); + on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); + rc = cfset_all_copy(arg, mask); + free_cpumask_var(mask); + return rc; +} + +static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) +{ + int ret = -ENODATA; + + if (req && req->ctrset) + ret = cfset_all_read(arg, req); + return ret; +} + +static long cfset_ioctl_stop(struct file *file) +{ + struct cfset_request *req = file->private_data; + int ret = -ENXIO; + + if (req) { + cfset_all_stop(req); + cfset_session_del(req); + kfree(req); + file->private_data = NULL; + ret = 0; + } + return ret; +} + +static long cfset_ioctl_start(unsigned long arg, struct file *file) +{ + struct s390_ctrset_start __user *ustart; + struct s390_ctrset_start start; + struct cfset_request *preq; + void __user *umask; + unsigned int len; + int ret = 0; + size_t need; + + if (file->private_data) + return -EBUSY; + ustart = (struct s390_ctrset_start __user *)arg; + if (copy_from_user(&start, ustart, sizeof(start))) + return -EFAULT; + if (start.version != S390_HWCTR_START_VERSION) + return -EINVAL; + if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | + cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | + cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | + cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | + cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) + return -EINVAL; /* Invalid counter set */ + if (!start.counter_sets) + return -EINVAL; /* No counter set at all? */ + + preq = kzalloc(sizeof(*preq), GFP_KERNEL); + if (!preq) + return -ENOMEM; + cpumask_clear(&preq->mask); + len = min_t(u64, start.cpumask_len, cpumask_size()); + umask = (void __user *)start.cpumask; + if (copy_from_user(&preq->mask, umask, len)) { + kfree(preq); + return -EFAULT; + } + if (cpumask_empty(&preq->mask)) { + kfree(preq); + return -EINVAL; + } + need = cfset_needspace(start.counter_sets); + if (put_user(need, &ustart->data_bytes)) { + kfree(preq); + return -EFAULT; + } + preq->ctrset = start.counter_sets; + ret = cfset_all_start(preq); + if (!ret) { + cfset_session_add(preq); + file->private_data = preq; + } else { + kfree(preq); + } + return ret; +} + +/* Entry point to the /dev/hwctr device interface. + * The ioctl system call supports three subcommands: + * S390_HWCTR_START: Start the specified counter sets on a CPU list. The + * counter set keeps running until explicitly stopped. Returns the number + * of bytes needed to store the counter values. If another S390_HWCTR_START + * ioctl subcommand is called without a previous S390_HWCTR_STOP stop + * command on the same file descriptor, -EBUSY is returned. + * S390_HWCTR_READ: Read the counter set values from specified CPU list given + * with the S390_HWCTR_START command. + * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the + * previous S390_HWCTR_START subcommand. + */ +static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret; + + cpus_read_lock(); + mutex_lock(&cfset_ctrset_mutex); + switch (cmd) { + case S390_HWCTR_START: + ret = cfset_ioctl_start(arg, file); + break; + case S390_HWCTR_STOP: + ret = cfset_ioctl_stop(file); + break; + case S390_HWCTR_READ: + ret = cfset_ioctl_read(arg, file->private_data); + break; + default: + ret = -ENOTTY; + break; + } + mutex_unlock(&cfset_ctrset_mutex); + cpus_read_unlock(); + return ret; +} + +static const struct file_operations cfset_fops = { + .owner = THIS_MODULE, + .open = cfset_open, + .release = cfset_release, + .unlocked_ioctl = cfset_ioctl, +}; + +static struct miscdevice cfset_dev = { + .name = S390_HWCTR_DEVICE, + .minor = MISC_DYNAMIC_MINOR, + .fops = &cfset_fops, + .mode = 0666, +}; + +/* Hotplug add of a CPU. Scan through all active processes and add + * that CPU to the list of CPUs supplied with ioctl(..., START, ...). + */ +static int cfset_online_cpu(unsigned int cpu) +{ + struct cfset_call_on_cpu_parm p; + struct cfset_request *rp; + + if (!list_empty(&cfset_session.head)) { + list_for_each_entry(rp, &cfset_session.head, node) { + p.sets = rp->ctrset; + cfset_ioctl_on(&p); + cpumask_set_cpu(cpu, &rp->mask); + } + } return 0; } -/* Performance monitoring unit for s390x */ -static struct pmu cpumf_pmu = { +/* Hotplug remove of a CPU. Scan through all active processes and clear + * that CPU from the list of CPUs supplied with ioctl(..., START, ...). + * Adjust reference counts. + */ +static int cfset_offline_cpu(unsigned int cpu) +{ + struct cfset_call_on_cpu_parm p; + struct cfset_request *rp; + + if (!list_empty(&cfset_session.head)) { + list_for_each_entry(rp, &cfset_session.head, node) { + p.sets = rp->ctrset; + cfset_ioctl_off(&p); + cpumask_clear_cpu(cpu, &rp->mask); + } + } + return 0; +} + +static void cfdiag_read(struct perf_event *event) +{ +} + +static int get_authctrsets(void) +{ + unsigned long auth = 0; + enum cpumf_ctr_set i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i]) + auth |= cpumf_ctr_ctl[i]; + } + return auth; +} + +/* Setup the event. Test for authorized counter sets and only include counter + * sets which are authorized at the time of the setup. Including unauthorized + * counter sets result in specification exception (and panic). + */ +static int cfdiag_event_init2(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + int err = 0; + + /* Set sample_period to indicate sampling */ + event->hw.config = attr->config; + event->hw.sample_period = attr->sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + local64_set(&event->count, 0); + event->hw.last_period = event->hw.sample_period; + + /* Add all authorized counter sets to config_base. The + * the hardware init function is either called per-cpu or just once + * for all CPUS (event->cpu == -1). This depends on the whether + * counting is started for all CPUs or on a per workload base where + * the perf event moves from one CPU to another CPU. + * Checking the authorization on any CPU is fine as the hardware + * applies the same authorization settings to all CPUs. + */ + event->hw.config_base = get_authctrsets(); + + /* No authorized counter sets, nothing to count/sample */ + if (!event->hw.config_base) + err = -EINVAL; + + return err; +} + +static int cfdiag_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + int err = -ENOENT; + + if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || + event->attr.type != event->pmu->type) + goto out; + + /* Raw events are used to access counters directly, + * hence do not permit excludes. + * This event is useless without PERF_SAMPLE_RAW to return counter set + * values as raw data. + */ + if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || + !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { + err = -EOPNOTSUPP; + goto out; + } + + /* Initialize for using the CPU-measurement counter facility */ + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; + event->destroy = hw_perf_event_destroy; + + err = cfdiag_event_init2(event); +out: + return err; +} + +/* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used + * to collect the complete counter sets for a scheduled process. Target + * are complete counter sets attached as raw data to the artificial event. + * This results in complete counter sets available when a process is + * scheduled. Contains the delta of every counter while the process was + * running. + */ +CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); + +static struct attribute *cfdiag_events_attr[] = { + CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cfdiag_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cfdiag_events_group = { + .name = "events", + .attrs = cfdiag_events_attr, +}; +static struct attribute_group cfdiag_format_group = { + .name = "format", + .attrs = cfdiag_format_attr, +}; +static const struct attribute_group *cfdiag_attr_groups[] = { + &cfdiag_events_group, + &cfdiag_format_group, + NULL, +}; + +/* Performance monitoring unit for event CF_DIAG. Since this event + * is also started and stopped via the perf_event_open() system call, use + * the same event enable/disable call back functions. They do not + * have a pointer to the perf_event structure as first parameter. + * + * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. + * Reuse them and distinguish the event (always first parameter) via + * 'config' member. + */ +static struct pmu cf_diag = { + .task_ctx_nr = perf_sw_context, + .event_init = cfdiag_event_init, .pmu_enable = cpumf_pmu_enable, .pmu_disable = cpumf_pmu_disable, - .event_init = cpumf_pmu_event_init, .add = cpumf_pmu_add, .del = cpumf_pmu_del, .start = cpumf_pmu_start, .stop = cpumf_pmu_stop, - .read = cpumf_pmu_read, - .start_txn = cpumf_pmu_start_txn, - .commit_txn = cpumf_pmu_commit_txn, - .cancel_txn = cpumf_pmu_cancel_txn, + .read = cfdiag_read, + + .attr_groups = cfdiag_attr_groups }; -static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +/* Calculate memory needed to store all counter sets together with header and + * trailer data. This is independent of the counter set authorization which + * can vary depending on the configuration. + */ +static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) { - unsigned int cpu = (long) hcpu; - int flags; + size_t max_size = sizeof(struct cf_trailer_entry); + enum cpumf_ctr_set i; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - flags = PMC_INIT; - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); - break; - case CPU_DOWN_PREPARE: - flags = PMC_RELEASE; - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); - break; - default: - break; + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + size_t size = cpum_cf_read_setsize(i); + + if (size) + max_size += size * sizeof(u64) + + sizeof(struct cf_ctrset_entry); + } + return max_size; +} + +/* Get the CPU speed, try sampling facility first and CPU attributes second. */ +static void cfdiag_get_cpu_speed(void) +{ + unsigned long mhz; + + if (cpum_sf_avail()) { /* Sampling facility first */ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) { + cfdiag_cpu_speed = si.cpu_speed; + return; + } } - return NOTIFY_OK; + /* Fallback: CPU speed extract static part. Used in case + * CPU Measurement Sampling Facility is turned off. + */ + mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + if (mhz != -1UL) + cfdiag_cpu_speed = mhz & 0xffffffff; } -static int __init cpumf_pmu_init(void) +static int cfset_init(void) { + size_t need; int rc; - if (!cpum_cf_avail()) - return -ENODEV; - - /* clear bit 15 of cr0 to unauthorize problem-state to - * extract measurement counters */ - ctl_clear_bit(0, 48); + cfdiag_get_cpu_speed(); + /* Make sure the counter set data fits into predefined buffer. */ + need = cfdiag_maxsize(&cpumf_ctr_info); + if (need > sizeof(((struct cpu_cf_events *)0)->start)) { + pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", + need); + return -ENOMEM; + } - /* register handler for measurement-alert interruptions */ - rc = register_external_interrupt(0x1407, cpumf_measurement_alert); + rc = misc_register(&cfset_dev); if (rc) { - pr_err("Registering for CPU-measurement alerts " - "failed with rc=%i\n", rc); + pr_err("Registration of /dev/%s failed rc=%i\n", + cfset_dev.name, rc); goto out; } - rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); if (rc) { - pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); - unregister_external_interrupt(0x1407, cpumf_measurement_alert); - goto out; + misc_deregister(&cfset_dev); + pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", + rc); } - perf_cpu_notifier(cpumf_pmu_notifier); out: return rc; } -early_initcall(cpumf_pmu_init); + +device_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c new file mode 100644 index 000000000000..7ace1f9e4ccf --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -0,0 +1,1065 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Perf PMU sysfs events attributes for available CPU-measurement counters + * + */ + +#include <linux/slab.h> +#include <linux/perf_event.h> +#include <asm/cpu_mf.h> + + +/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ + +CPUMF_EVENT_ATTR(cf_fvn1, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf_fvn1, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf_fvn1, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf_fvn1, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); +CPUMF_EVENT_ATTR(cf_fvn1, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf_fvn1, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf_fvn3, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf_fvn3, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf_fvn3, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf_fvn3, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_FUNCTIONS, 0x0040); +CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_CYCLES, 0x0041); +CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS, 0x0042); +CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_CYCLES, 0x0043); +CPUMF_EVENT_ATTR(cf_svn_12345, SHA_FUNCTIONS, 0x0044); +CPUMF_EVENT_ATTR(cf_svn_12345, SHA_CYCLES, 0x0045); +CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS, 0x0046); +CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_CYCLES, 0x0047); +CPUMF_EVENT_ATTR(cf_svn_12345, DEA_FUNCTIONS, 0x0048); +CPUMF_EVENT_ATTR(cf_svn_12345, DEA_CYCLES, 0x0049); +CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS, 0x004a); +CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_CYCLES, 0x004b); +CPUMF_EVENT_ATTR(cf_svn_12345, AES_FUNCTIONS, 0x004c); +CPUMF_EVENT_ATTR(cf_svn_12345, AES_CYCLES, 0x004d); +CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS, 0x004e); +CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_CYCLES, 0x004f); +CPUMF_EVENT_ATTR(cf_svn_6, ECC_FUNCTION_COUNT, 0x0050); +CPUMF_EVENT_ATTR(cf_svn_6, ECC_CYCLES_COUNT, 0x0051); +CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT, 0x0052); +CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT, 0x0053); +CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092); +CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083); +CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z14, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z14, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z14, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z14, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z14, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z14, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z14, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z14, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z14, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z14, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z14, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0); +CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3); +CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); +CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0); +CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109); +CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6); +CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7); +CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0); +CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); + +static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn1, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, L1D_PENALTY_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_fvn3, CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn3, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn3, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn3, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn3, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn3, L1D_PENALTY_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES), + CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_6, ECC_FUNCTION_COUNT), + CPUMF_EVENT_PTR(cf_svn_6, ECC_CYCLES_COUNT), + CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT), + CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT), + NULL, +}; + +static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT), + NULL, +}; + +static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES), + NULL, +}; + +static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL), + NULL, +}; + +static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z13, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + +static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z14, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z14, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z14, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z14, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z14, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z14, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z14, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z14, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z14, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, BCD_DFP_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z14, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z14, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z14, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z14, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + +static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z15, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z15, DFLT_CC), + CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + +static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z16, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z16, SORTL), + CPUMF_EVENT_PTR(cf_z16, DFLT_CC), + CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + +/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ + +static struct attribute_group cpumcf_pmu_events_group = { + .name = "events", +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumcf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumcf_pmu_format_group = { + .name = "format", + .attrs = cpumcf_pmu_format_attr, +}; + +static const struct attribute_group *cpumcf_pmu_attr_groups[] = { + &cpumcf_pmu_events_group, + &cpumcf_pmu_format_group, + NULL, +}; + + +static __init struct attribute **merge_attr(struct attribute **a, + struct attribute **b, + struct attribute **c) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + for (i = 0; c[i]; i++) + j++; + j++; + + new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL); + if (!new) + return NULL; + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + for (i = 0; c[i]; i++) + new[j++] = c[i]; + new[j] = NULL; + + return new; +} + +__init const struct attribute_group **cpumf_cf_event_group(void) +{ + struct attribute **combined, **model, **cfvn, **csvn; + struct attribute *none[] = { NULL }; + struct cpumf_ctr_info ci; + struct cpuid cpu_id; + + /* Determine generic counters set(s) */ + qctri(&ci); + switch (ci.cfvn) { + case 1: + cfvn = cpumcf_fvn1_pmu_event_attr; + break; + case 3: + cfvn = cpumcf_fvn3_pmu_event_attr; + break; + default: + cfvn = none; + } + + /* Determine version specific crypto set */ + csvn = none; + if (ci.csvn >= 1 && ci.csvn <= 5) + csvn = cpumcf_svn_12345_pmu_event_attr; + else if (ci.csvn >= 6) + csvn = cpumcf_svn_678_pmu_event_attr; + + /* Determine model-specific counter set(s) */ + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + model = cpumcf_z10_pmu_event_attr; + break; + case 0x2817: + case 0x2818: + model = cpumcf_z196_pmu_event_attr; + break; + case 0x2827: + case 0x2828: + model = cpumcf_zec12_pmu_event_attr; + break; + case 0x2964: + case 0x2965: + model = cpumcf_z13_pmu_event_attr; + break; + case 0x3906: + case 0x3907: + model = cpumcf_z14_pmu_event_attr; + break; + case 0x8561: + case 0x8562: + model = cpumcf_z15_pmu_event_attr; + break; + case 0x3931: + case 0x3932: + model = cpumcf_z16_pmu_event_attr; + break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; + default: + model = none; + break; + } + + combined = merge_attr(cfvn, csvn, model); + if (combined) + cpumcf_pmu_events_group.attrs = combined; + return cpumcf_pmu_attr_groups; +} diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c new file mode 100644 index 000000000000..459af23a47a5 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -0,0 +1,2103 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance event support for the System z CPU-measurement Sampling Facility + * + * Copyright IBM Corp. 2013, 2018 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ +#define pr_fmt(fmt) "cpum_sf: " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/pid.h> +#include <linux/notifier.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/moduleparam.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> +#include <asm/debug.h> +#include <asm/timex.h> +#include <linux/io.h> + +/* Perf PMU definitions for the sampling facility */ +#define PERF_CPUM_SF_MAX_CTR 2 +#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */ +#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */ +#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ +#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */ + +#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) +#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) +#define TEAR_REG(hwc) ((hwc)->last_tag) +#define SAMPL_RATE(hwc) ((hwc)->event_base) +#define SAMPL_FLAGS(hwc) ((hwc)->config_base) +#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) +#define SAMPL_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE) + +/* Minimum number of sample-data-block-tables: + * At least one table is required for the sampling buffer structure. + * A single table contains up to 511 pointers to sample-data-blocks. + */ +#define CPUM_SF_MIN_SDBT 1 + +/* Number of sample-data-blocks per sample-data-block-table (SDBT): + * A table contains SDB pointers (8 bytes) and one table-link entry + * that points to the origin of the next SDBT. + */ +#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) + +/* Maximum page offset for an SDBT table-link entry: + * If this page offset is reached, a table-link entry to the next SDBT + * must be added. + */ +#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) +static inline int require_table_link(const void *sdbt) +{ + return ((unsigned long)sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; +} + +/* Minimum and maximum sampling buffer sizes: + * + * This number represents the maximum size of the sampling buffer taking + * the number of sample-data-block-tables into account. Note that these + * numbers apply to the basic-sampling function only. + * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if + * the diagnostic-sampling function is active. + * + * Sampling buffer size Buffer characteristics + * --------------------------------------------------- + * 64KB == 16 pages (4KB per page) + * 1 page for SDB-tables + * 15 pages for SDBs + * + * 32MB == 8192 pages (4KB per page) + * 16 pages for SDB-tables + * 8176 pages for SDBs + */ +static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; +static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; +static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; + +struct sf_buffer { + unsigned long *sdbt; /* Sample-data-block-table origin */ + /* buffer characteristics (required for buffer increments) */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long num_sdbt; /* Number of sample-data-block-tables */ + unsigned long *tail; /* last sample-data-block-table */ +}; + +struct aux_buffer { + struct sf_buffer sfb; + unsigned long head; /* index of SDB of buffer head */ + unsigned long alert_mark; /* index of SDB of alert request position */ + unsigned long empty_mark; /* mark of SDB not marked full */ + unsigned long *sdb_index; /* SDB address for fast lookup */ + unsigned long *sdbt_index; /* SDBT address for fast lookup */ +}; + +struct cpu_hw_sf { + /* CPU-measurement sampling information block */ + struct hws_qsi_info_block qsi; + /* CPU-measurement sampling control block */ + struct hws_lsctl_request_block lsctl; + struct sf_buffer sfb; /* Sampling buffer */ + unsigned int flags; /* Status flags */ + struct perf_event *event; /* Scheduled perf event */ + struct perf_output_handle handle; /* AUX buffer output handle */ +}; +static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); + +/* Debug feature */ +static debug_info_t *sfdbg; + +/* Sampling control helper functions */ +static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi, + unsigned long freq) +{ + return (USEC_PER_SEC / freq) * qsi->cpu_speed; +} + +static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, + unsigned long rate) +{ + return USEC_PER_SEC * qsi->cpu_speed / rate; +} + +/* Return pointer to trailer entry of an sample data block */ +static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *)v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return ret; +} + +/* + * Return true if the entry in the sample data block table (sdbt) + * is a link to the next sdbt + */ +static inline int is_link_entry(unsigned long *s) +{ + return *s & 0x1UL ? 1 : 0; +} + +/* Return pointer to the linked sdbt */ +static inline unsigned long *get_next_sdbt(unsigned long *s) +{ + return phys_to_virt(*s & ~0x1UL); +} + +/* + * sf_disable() - Switch off sampling facility + */ +static void sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + lsctl(&sreq); +} + +/* + * sf_buffer_available() - Check for an allocated sampling buffer + */ +static int sf_buffer_available(struct cpu_hw_sf *cpuhw) +{ + return !!cpuhw->sfb.sdbt; +} + +/* + * deallocate sampling facility buffer + */ +static void free_sampling_buffer(struct sf_buffer *sfb) +{ + unsigned long *sdbt, *curr, *head; + + sdbt = sfb->sdbt; + if (!sdbt) + return; + sfb->sdbt = NULL; + /* Free the SDBT after all SDBs are processed... */ + head = sdbt; + curr = sdbt; + do { + if (is_link_entry(curr)) { + /* Process table-link entries */ + curr = get_next_sdbt(curr); + free_page((unsigned long)sdbt); + sdbt = curr; + } else { + /* Process SDB pointer */ + free_page((unsigned long)phys_to_virt(*curr)); + curr++; + } + } while (curr != head); + memset(sfb, 0, sizeof(*sfb)); +} + +static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) +{ + struct hws_trailer_entry *te; + unsigned long sdb; + + /* Allocate and initialize sample-data-block */ + sdb = get_zeroed_page(gfp_flags); + if (!sdb) + return -ENOMEM; + te = trailer_entry_ptr(sdb); + te->header.a = 1; + + /* Link SDB into the sample-data-block-table */ + *sdbt = virt_to_phys((void *)sdb); + + return 0; +} + +/* + * realloc_sampling_buffer() - extend sampler memory + * + * Allocates new sample-data-blocks and adds them to the specified sampling + * buffer memory. + * + * Important: This modifies the sampling buffer and must be called when the + * sampling facility is disabled. + * + * Returns zero on success, non-zero otherwise. + */ +static int realloc_sampling_buffer(struct sf_buffer *sfb, + unsigned long num_sdb, gfp_t gfp_flags) +{ + int i, rc; + unsigned long *new, *tail, *tail_prev = NULL; + + if (!sfb->sdbt || !sfb->tail) + return -EINVAL; + + if (!is_link_entry(sfb->tail)) + return -EINVAL; + + /* Append to the existing sampling buffer, overwriting the table-link + * register. + * The tail variables always points to the "tail" (last and table-link) + * entry in an SDB-table. + */ + tail = sfb->tail; + + /* Do a sanity check whether the table-link entry points to + * the sampling buffer origin. + */ + if (sfb->sdbt != get_next_sdbt(tail)) { + debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n", + __func__, (unsigned long)sfb->sdbt, + (unsigned long)tail); + return -EINVAL; + } + + /* Allocate remaining SDBs */ + rc = 0; + for (i = 0; i < num_sdb; i++) { + /* Allocate a new SDB-table if it is full. */ + if (require_table_link(tail)) { + new = (unsigned long *)get_zeroed_page(gfp_flags); + if (!new) { + rc = -ENOMEM; + break; + } + sfb->num_sdbt++; + /* Link current page to tail of chain */ + *tail = virt_to_phys((void *)new) + 1; + tail_prev = tail; + tail = new; + } + + /* Allocate a new sample-data-block. + * If there is not enough memory, stop the realloc process + * and simply use what was allocated. If this is a temporary + * issue, a new realloc call (if required) might succeed. + */ + rc = alloc_sample_data_block(tail, gfp_flags); + if (rc) { + /* Undo last SDBT. An SDBT with no SDB at its first + * entry but with an SDBT entry instead can not be + * handled by the interrupt handler code. + * Avoid this situation. + */ + if (tail_prev) { + sfb->num_sdbt--; + free_page((unsigned long)new); + tail = tail_prev; + } + break; + } + sfb->num_sdb++; + tail++; + tail_prev = new = NULL; /* Allocated at least one SBD */ + } + + /* Link sampling buffer to its origin */ + *tail = virt_to_phys(sfb->sdbt) + 1; + sfb->tail = tail; + + return rc; +} + +/* + * allocate_sampling_buffer() - allocate sampler memory + * + * Allocates and initializes a sampling buffer structure using the + * specified number of sample-data-blocks (SDB). For each allocation, + * a 4K page is used. The number of sample-data-block-tables (SDBT) + * are calculated from SDBs. + * Also set the ALERT_REQ mask in each SDBs trailer. + * + * Returns zero on success, non-zero otherwise. + */ +static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) +{ + int rc; + + if (sfb->sdbt) + return -EINVAL; + + /* Allocate the sample-data-block-table origin */ + sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + return -ENOMEM; + sfb->num_sdb = 0; + sfb->num_sdbt = 1; + + /* Link the table origin to point to itself to prepare for + * realloc_sampling_buffer() invocation. + */ + sfb->tail = sfb->sdbt; + *sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1; + + /* Allocate requested number of sample-data-blocks */ + rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); + if (rc) + free_sampling_buffer(sfb); + return rc; +} + +static void sfb_set_limits(unsigned long min, unsigned long max) +{ + struct hws_qsi_info_block si; + + CPUM_SF_MIN_SDB = min; + CPUM_SF_MAX_SDB = max; + + memset(&si, 0, sizeof(si)); + qsi(&si); + CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); +} + +static unsigned long sfb_max_limit(struct hw_perf_event *hwc) +{ + return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR + : CPUM_SF_MAX_SDB; +} + +static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + if (!sfb->sdbt) + return SFB_ALLOC_REG(hwc); + if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) + return SFB_ALLOC_REG(hwc) - sfb->num_sdb; + return 0; +} + +static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + /* Limit the number of SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); + if (num) + SFB_ALLOC_REG(hwc) += num; +} + +static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + SFB_ALLOC_REG(hwc) = 0; + sfb_account_allocs(num, hwc); +} + +static void deallocate_buffers(struct cpu_hw_sf *cpuhw) +{ + if (sf_buffer_available(cpuhw)) + free_sampling_buffer(&cpuhw->sfb); +} + +static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) +{ + unsigned long n_sdb, freq; + + /* Calculate sampling buffers using 4K pages + * + * 1. The sampling size is 32 bytes for basic sampling. This size + * is the same for all machine types. Diagnostic + * sampling uses auxlilary data buffer setup which provides the + * memory for SDBs using linux common code auxiliary trace + * setup. + * + * 2. Function alloc_sampling_buffer() sets the Alert Request + * Control indicator to trigger a measurement-alert to harvest + * sample-data-blocks (SDB). This is done per SDB. This + * measurement alert interrupt fires quick enough to handle + * one SDB, on very high frequency and work loads there might + * be 2 to 3 SBDs available for sample processing. + * Currently there is no need for setup alert request on every + * n-th page. This is counterproductive as one IRQ triggers + * a very high number of samples to be processed at one IRQ. + * + * 3. Use the sampling frequency as input. + * Compute the number of SDBs and ensure a minimum + * of CPUM_SF_MIN_SDB. Depending on frequency add some more + * SDBs to handle a higher sampling rate. + * Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples + * (one SDB) for every 10000 HZ frequency increment. + * + * 4. Compute the number of sample-data-block-tables (SDBT) and + * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up + * to 511 SDBs). + */ + freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); + n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000); + + /* If there is already a sampling buffer allocated, it is very likely + * that the sampling facility is enabled too. If the event to be + * initialized requires a greater sampling buffer, the allocation must + * be postponed. Changing the sampling buffer requires the sampling + * facility to be in the disabled state. So, account the number of + * required SDBs and let cpumsf_pmu_enable() resize the buffer just + * before the event is started. + */ + sfb_init_allocs(n_sdb, hwc); + if (sf_buffer_available(cpuhw)) + return 0; + + return alloc_sampling_buffer(&cpuhw->sfb, + sfb_pending_allocs(&cpuhw->sfb, hwc)); +} + +static unsigned long min_percent(unsigned int percent, unsigned long base, + unsigned long min) +{ + return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); +} + +static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) +{ + /* Use a percentage-based approach to extend the sampling facility + * buffer. Accept up to 5% sample data loss. + * Vary the extents between 1% to 5% of the current number of + * sample-data-blocks. + */ + if (ratio <= 5) + return 0; + if (ratio <= 25) + return min_percent(1, base, 1); + if (ratio <= 50) + return min_percent(1, base, 1); + if (ratio <= 75) + return min_percent(2, base, 2); + if (ratio <= 100) + return min_percent(3, base, 3); + if (ratio <= 250) + return min_percent(4, base, 4); + + return min_percent(5, base, 8); +} + +static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, + struct hw_perf_event *hwc) +{ + unsigned long ratio, num; + + if (!OVERFLOW_REG(hwc)) + return; + + /* The sample_overflow contains the average number of sample data + * that has been lost because sample-data-blocks were full. + * + * Calculate the total number of sample data entries that has been + * discarded. Then calculate the ratio of lost samples to total samples + * per second in percent. + */ + ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, + sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); + + /* Compute number of sample-data-blocks */ + num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); + if (num) + sfb_account_allocs(num, hwc); + + OVERFLOW_REG(hwc) = 0; +} + +/* extend_sampling_buffer() - Extend sampling buffer + * @sfb: Sampling buffer structure (for local CPU) + * @hwc: Perf event hardware structure + * + * Use this function to extend the sampling buffer based on the overflow counter + * and postponed allocation extents stored in the specified Perf event hardware. + * + * Important: This function disables the sampling facility in order to safely + * change the sampling buffer structure. Do not call this function + * when the PMU is active. + */ +static void extend_sampling_buffer(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + unsigned long num; + + num = sfb_pending_allocs(sfb, hwc); + if (!num) + return; + + /* Disable the sampling facility to reset any states and also + * clear pending measurement alerts. + */ + sf_disable(); + + /* Extend the sampling buffer. + * This memory allocation typically happens in an atomic context when + * called by perf. Because this is a reallocation, it is fine if the + * new SDB-request cannot be satisfied immediately. + */ + realloc_sampling_buffer(sfb, num, GFP_ATOMIC); +} + +/* Number of perf events counting hardware events */ +static refcount_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + + sf_disable(); + switch (*((int *)flags)) { + case PMC_INIT: + memset(cpuhw, 0, sizeof(*cpuhw)); + qsi(&cpuhw->qsi); + cpuhw->flags |= PMU_F_RESERVED; + break; + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + deallocate_buffers(cpuhw); + break; + } +} + +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(setup_pmc_cpu, &flags, 1); +} + +static void reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Release PMC if this is the last perf event */ + if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) { + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static void hw_init_period(struct hw_perf_event *hwc, u64 period) +{ + hwc->sample_period = period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); +} + +static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, + unsigned long rate) +{ + return clamp_t(unsigned long, rate, + si->min_sampl_rate, si->max_sampl_rate); +} + +static u32 cpumsf_pid_type(struct perf_event *event, + u32 pid, enum pid_type type) +{ + struct task_struct *tsk; + + /* Idle process */ + if (!pid) + goto out; + + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + pid = -1; + if (tsk) { + /* + * Only top level events contain the pid namespace in which + * they are created. + */ + if (event->parent) + event = event->parent; + pid = __task_pid_nr_ns(tsk, type, event->ns); + /* + * See also 1d953111b648 + * "perf/core: Don't report zero PIDs for exiting tasks". + */ + if (!pid && !pid_alive(tsk)) + pid = -1; + } +out: + return pid; +} + +static void cpumsf_output_event_pid(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + u32 pid; + struct perf_event_header header; + struct perf_output_handle handle; + + /* + * Obtain the PID from the basic-sampling data entry and + * correct the data->tid_entry.pid value. + */ + pid = data->tid_entry.pid; + + /* Protect callchain buffers, tasks */ + rcu_read_lock(); + + perf_prepare_sample(data, event, regs); + perf_prepare_header(&header, data, event, regs); + if (perf_output_begin(&handle, data, event, header.size)) + goto out; + + /* Update the process ID (see also kernel/events/core.c) */ + data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID); + data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID); + + perf_output_sample(&handle, &header, data, event); + perf_output_end(&handle); +out: + rcu_read_unlock(); +} + +static unsigned long getrate(bool freq, unsigned long sample, + struct hws_qsi_info_block *si) +{ + unsigned long rate; + + if (freq) { + rate = freq_to_sample_rate(si, sample); + rate = hw_limit_rate(si, rate); + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(si, sample); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(si, rate) > + sysctl_perf_event_sample_rate) { + rate = 0; + } + } + return rate; +} + +/* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + * + * Since the CPU Measurement sampling facility can not handle frequency + * calculate the sampling interval when frequency is specified using + * this formula: + * interval := cpu_speed * 1000000 / sample_freq + * + * Returns errno on bad input and zero on success with parameter interval + * set to the correct sampling rate. + * + * Note: This function turns off freq bit to avoid calling function + * perf_adjust_period(). This causes frequency adjustment in the common + * code part which causes tremendous variations in the counter values. + */ +static int __hw_perf_event_init_rate(struct perf_event *event, + struct hws_qsi_info_block *si) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + + if (attr->freq) { + if (!attr->sample_freq) + return -EINVAL; + rate = getrate(attr->freq, attr->sample_freq, si); + attr->freq = 0; /* Don't call perf_adjust_period() */ + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE; + } else { + rate = getrate(attr->freq, attr->sample_period, si); + if (!rate) + return -EINVAL; + } + attr->sample_period = rate; + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + return 0; +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct cpu_hw_sf *cpuhw; + struct hws_qsi_info_block si; + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int cpu, err = 0; + + /* Reserve CPU-measurement sampling facility */ + mutex_lock(&pmc_reserve_mutex); + if (!refcount_inc_not_zero(&num_events)) { + reserve_pmc_hardware(); + refcount_set(&num_events, 1); + } + event->destroy = hw_perf_event_destroy; + + /* Access per-CPU sampling information (query sampling info) */ + /* + * The event->cpu value can be -1 to count on every CPU, for example, + * when attaching to a task. If this is specified, use the query + * sampling info from the current CPU, otherwise use event->cpu to + * retrieve the per-CPU information. + * Later, cpuhw indicates whether to allocate sampling buffers for a + * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). + */ + memset(&si, 0, sizeof(si)); + cpuhw = NULL; + if (event->cpu == -1) { + qsi(&si); + } else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + si = cpuhw->qsi; + } + + /* Check sampling facility authorization and, if not authorized, + * fall back to other PMUs. It is safe to check any CPU because + * the authorization is identical for all configured CPUs. + */ + if (!si.as) { + err = -ENOENT; + goto out; + } + + if (si.ribm & CPU_MF_SF_RIBM_NOTAV) { + pr_warn("CPU Measurement Facility sampling is temporarily not available\n"); + err = -EBUSY; + goto out; + } + + /* Always enable basic sampling */ + SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; + + /* Check if diagnostic sampling is requested. Deny if the required + * sampling authorization is missing. + */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { + if (!si.ad) { + err = -EPERM; + goto out; + } + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; + } + + err = __hw_perf_event_init_rate(event, &si); + if (err) + goto out; + + /* Use AUX buffer. No need to allocate it by ourself */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) + goto out; + + /* Allocate the per-CPU sampling buffer using the CPU information + * from the event. If the event is not pinned to a particular + * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling + * buffers for each online CPU. + */ + if (cpuhw) + /* Event is pinned to a particular CPU */ + err = allocate_buffers(cpuhw, hwc); + else { + /* Event is not pinned, allocate sampling buffer on + * each online CPU + */ + for_each_online_cpu(cpu) { + cpuhw = &per_cpu(cpu_hw_sf, cpu); + err = allocate_buffers(cpuhw, hwc); + if (err) + break; + } + } + + /* If PID/TID sampling is active, replace the default overflow + * handler to extract and resolve the PIDs from the basic-sampling + * data entries. + */ + if (event->attr.sample_type & PERF_SAMPLE_TID) + if (is_default_overflow_handler(event)) + event->overflow_handler = cpumsf_output_event_pid; +out: + mutex_unlock(&pmc_reserve_mutex); + return err; +} + +static bool is_callchain_event(struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER); +} + +static int cpumsf_pmu_event_init(struct perf_event *event) +{ + int err; + + /* No support for taken branch sampling */ + /* No support for callchain, stacks and registers */ + if (has_branch_stack(event) || is_callchain_event(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + if ((event->attr.config != PERF_EVENT_CPUM_SF) && + (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) + return -ENOENT; + break; + case PERF_TYPE_HARDWARE: + /* Support sampling of CPU cycles in addition to the + * counter facility. However, the counter facility + * is more precise and, hence, restrict this PMU to + * sampling events only. + */ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) + return -ENOENT; + if (!is_sampling_event(event)) + return -ENOENT; + break; + default: + return -ENOENT; + } + + /* Force reset of idle/hv excludes regardless of what the + * user requested. + */ + if (event->attr.exclude_hv) + event->attr.exclude_hv = 0; + if (event->attr.exclude_idle) + event->attr.exclude_idle = 0; + + err = __hw_perf_event_init(event); + return err; +} + +static void cpumsf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + struct hw_perf_event *hwc; + int err; + + /* + * Event must be + * - added/started on this CPU (PMU_F_IN_USE set) + * - and CPU must be available (PMU_F_RESERVED set) + * - and not already enabled (PMU_F_ENABLED not set) + * - and not in error condition (PMU_F_ERR_MASK not set) + */ + if (cpuhw->flags != (PMU_F_IN_USE | PMU_F_RESERVED)) + return; + + /* Check whether to extent the sampling buffer. + * + * Two conditions trigger an increase of the sampling buffer for a + * perf event: + * 1. Postponed buffer allocations from the event initialization. + * 2. Sampling overflows that contribute to pending allocations. + * + * Note that the extend_sampling_buffer() function disables the sampling + * facility, but it can be fully re-enabled using sampling controls that + * have been saved in cpumsf_pmu_disable(). + */ + hwc = &cpuhw->event->hw; + if (!(SAMPL_DIAG_MODE(hwc))) { + /* + * Account number of overflow-designated buffer extents + */ + sfb_account_overflows(cpuhw, hwc); + extend_sampling_buffer(&cpuhw->sfb, hwc); + } + /* Rate may be adjusted with ioctl() */ + cpuhw->lsctl.interval = SAMPL_RATE(hwc); + + /* (Re)enable the PMU and sampling facility */ + err = lsctl(&cpuhw->lsctl); + if (err) { + pr_err("Loading sampling controls failed: op 1 err %i\n", err); + return; + } + + /* Load current program parameter */ + lpp(&get_lowcore()->lpp); + cpuhw->flags |= PMU_F_ENABLED; +} + +static void cpumsf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + struct hws_lsctl_request_block inactive; + struct hws_qsi_info_block si; + int err; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Switch off sampling activation control */ + inactive = cpuhw->lsctl; + inactive.cs = 0; + inactive.cd = 0; + + err = lsctl(&inactive); + if (err) { + pr_err("Loading sampling controls failed: op 2 err %i\n", err); + return; + } + + /* + * Save state of TEAR and DEAR register contents. + * TEAR/DEAR values are valid only if the sampling facility is + * enabled. Note that cpumsf_pmu_disable() might be called even + * for a disabled sampling facility because cpumsf_pmu_enable() + * controls the enable/disable state. + */ + qsi(&si); + if (si.es) { + cpuhw->lsctl.tear = si.tear; + cpuhw->lsctl.dear = si.dear; + } + + cpuhw->flags &= ~PMU_F_ENABLED; +} + +/* perf_event_exclude() - Filter event + * @event: The perf event + * @regs: pt_regs structure + * @sde_regs: Sample-data-entry (sde) regs structure + * + * Filter perf events according to their exclude specification. + * + * Return non-zero if the event shall be excluded. + */ +static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs, + struct perf_sf_sde_regs *sde_regs) +{ + if (event->attr.exclude_user && user_mode(regs)) + return 1; + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + if (event->attr.exclude_guest && sde_regs->in_guest) + return 1; + if (event->attr.exclude_host && !sde_regs->in_guest) + return 1; + return 0; +} + +/* perf_push_sample() - Push samples to perf + * @event: The perf event + * @sample: Hardware sample data + * + * Use the hardware sample data to create perf event sample. The sample + * is the pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int perf_push_sample(struct perf_event *event, + struct hws_basic_entry *basic) +{ + int overflow; + struct pt_regs regs; + struct perf_sf_sde_regs *sde_regs; + struct perf_sample_data data; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + + /* Setup pt_regs to look like an CPU-measurement external interrupt + * using the Program Request Alert code. The regs.int_parm_long + * field which is unused contains additional sample-data-entry related + * indicators. + */ + memset(®s, 0, sizeof(regs)); + regs.int_code = 0x1407; + regs.int_parm = CPU_MF_INT_SF_PRA; + sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; + + psw_bits(regs.psw).ia = basic->ia; + psw_bits(regs.psw).dat = basic->T; + psw_bits(regs.psw).wait = basic->W; + psw_bits(regs.psw).pstate = basic->P; + psw_bits(regs.psw).as = basic->AS; + + /* + * Use the hardware provided configuration level to decide if the + * sample belongs to a guest or host. If that is not available, + * fall back to the following heuristics: + * A non-zero guest program parameter always indicates a guest + * sample. Some early samples or samples from guests without + * lpp usage would be misaccounted to the host. We use the asn + * value as an addon heuristic to detect most of these guest samples. + * If the value differs from 0xffff (the host value), we assume to + * be a KVM guest. + */ + switch (basic->CL) { + case 1: /* logical partition */ + sde_regs->in_guest = 0; + break; + case 2: /* virtual machine */ + sde_regs->in_guest = 1; + break; + default: /* old machine, use heuristics */ + if (basic->gpp || basic->prim_asn != 0xffff) + sde_regs->in_guest = 1; + break; + } + + /* + * Store the PID value from the sample-data-entry to be + * processed and resolved by cpumsf_output_event_pid(). + */ + data.tid_entry.pid = basic->hpp & LPP_PID_MASK; + + overflow = 0; + if (perf_event_exclude(event, ®s, sde_regs)) + goto out; + overflow = perf_event_overflow(event, &data, ®s); + perf_event_update_userpage(event); +out: + return overflow; +} + +static void perf_event_count_update(struct perf_event *event, u64 count) +{ + local64_add(count, &event->count); +} + +/* hw_collect_samples() - Walk through a sample-data-block and collect samples + * @event: The perf event + * @sdbt: Sample-data-block table + * @overflow: Event overflow counter + * + * Walks through a sample-data-block and collects sampling data entries that are + * then pushed to the perf event subsystem. Depending on the sampling function, + * there can be either basic-sampling or combined-sampling data entries. A + * combined-sampling data entry consists of a basic- and a diagnostic-sampling + * data entry. The sampling function is determined by the flags in the perf + * event hardware structure. The function always works with a combined-sampling + * data entry but ignores the diagnostic portion if it is not available. + * + * Note that the implementation focuses on basic-sampling data entries and, if + * such an entry is not valid, the entire combined-sampling data entry is + * ignored. + * + * The overflow variables counts the number of samples that has been discarded + * due to a perf event overflow. + */ +static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + unsigned long long *overflow) +{ + struct hws_trailer_entry *te; + struct hws_basic_entry *sample; + + te = trailer_entry_ptr((unsigned long)sdbt); + sample = (struct hws_basic_entry *)sdbt; + while ((unsigned long *)sample < (unsigned long *)te) { + /* Check for an empty sample */ + if (!sample->def || sample->LS) + break; + + /* Update perf event period */ + perf_event_count_update(event, SAMPL_RATE(&event->hw)); + + /* Check whether sample is valid */ + if (sample->def == 0x0001) { + /* If an event overflow occurred, the PMU is stopped to + * throttle event delivery. Remaining sample data is + * discarded. + */ + if (!*overflow) { + /* Check whether sample is consistent */ + if (sample->I == 0 && sample->W == 0) { + /* Deliver sample data to perf */ + *overflow = perf_push_sample(event, + sample); + } + } else + /* Count discarded samples */ + *overflow += 1; + } else { + /* Sample slot is not yet written or other record. + * + * This condition can occur if the buffer was reused + * from a combined basic- and diagnostic-sampling. + * If only basic-sampling is then active, entries are + * written into the larger diagnostic entries. + * This is typically the case for sample-data-blocks + * that are not full. Stop processing if the first + * invalid format was detected. + */ + if (!te->header.f) + break; + } + + /* Reset sample slot and advance to next sample */ + sample->def = 0; + sample++; + } +} + +/* hw_perf_event_update() - Process sampling buffer + * @event: The perf event + * @flush_all: Flag to also flush partially filled sample-data-blocks + * + * Processes the sampling buffer and create perf event samples. + * The sampling buffer position are retrieved and saved in the TEAR_REG + * register of the specified perf event. + * + * Only full sample-data-blocks are processed. Specify the flush_all flag + * to also walk through partially filled sample-data-blocks. + */ +static void hw_perf_event_update(struct perf_event *event, int flush_all) +{ + unsigned long long event_overflow, sampl_overflow, num_sdb; + struct hw_perf_event *hwc = &event->hw; + union hws_trailer_header prev, new; + struct hws_trailer_entry *te; + unsigned long *sdbt, sdb; + int done; + + /* + * AUX buffer is used when in diagnostic sampling mode. + * No perf events/samples are created. + */ + if (SAMPL_DIAG_MODE(hwc)) + return; + + sdbt = (unsigned long *)TEAR_REG(hwc); + done = event_overflow = sampl_overflow = num_sdb = 0; + while (!done) { + /* Get the trailer entry of the sample-data-block */ + sdb = (unsigned long)phys_to_virt(*sdbt); + te = trailer_entry_ptr(sdb); + + /* Leave loop if no more work to do (block full indicator) */ + if (!te->header.f) { + done = 1; + if (!flush_all) + break; + } + + /* Check the sample overflow count */ + if (te->header.overflow) + /* Account sample overflows and, if a particular limit + * is reached, extend the sampling buffer. + * For details, see sfb_account_overflows(). + */ + sampl_overflow += te->header.overflow; + + /* Collect all samples from a single sample-data-block and + * flag if an (perf) event overflow happened. If so, the PMU + * is stopped and remaining samples will be discarded. + */ + hw_collect_samples(event, (unsigned long *)sdb, &event_overflow); + num_sdb++; + + /* Reset trailer (using compare-double-and-swap) */ + prev.val = READ_ONCE_ALIGNED_128(te->header.val); + do { + new.val = prev.val; + new.f = 0; + new.a = 1; + new.overflow = 0; + } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val)); + + /* Advance to next sample-data-block */ + sdbt++; + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + /* Update event hardware registers */ + TEAR_REG(hwc) = (unsigned long)sdbt; + + /* Stop processing sample-data if all samples of the current + * sample-data-block were flushed even if it was not full. + */ + if (flush_all && done) + break; + } + + /* Account sample overflows in the event hardware structure */ + if (sampl_overflow) + OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); +} + +static inline unsigned long aux_sdb_index(struct aux_buffer *aux, + unsigned long i) +{ + return i % aux->sfb.num_sdb; +} + +static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end) +{ + return end >= start ? end - start + 1 : 0; +} + +static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux) +{ + return aux_sdb_num(aux->head, aux->alert_mark); +} + +static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux) +{ + return aux_sdb_num(aux->head, aux->empty_mark); +} + +/* + * Get trailer entry by index of SDB. + */ +static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux, + unsigned long index) +{ + unsigned long sdb; + + index = aux_sdb_index(aux, index); + sdb = aux->sdb_index[index]; + return trailer_entry_ptr(sdb); +} + +/* + * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu + * disabled. Collect the full SDBs in AUX buffer which have not reached + * the point of alert indicator. And ignore the SDBs which are not + * full. + * + * 1. Scan SDBs to see how much data is there and consume them. + * 2. Remove alert indicator in the buffer. + */ +static void aux_output_end(struct perf_output_handle *handle) +{ + unsigned long i, range_scan, idx; + struct aux_buffer *aux; + struct hws_trailer_entry *te; + + aux = perf_get_aux(handle); + if (!aux) + return; + + range_scan = aux_sdb_num_alert(aux); + for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + if (!te->header.f) + break; + } + /* i is num of SDBs which are full */ + perf_aux_output_end(handle, i << PAGE_SHIFT); + + /* Remove alert indicators in the buffer */ + te = aux_sdb_trailer(aux, aux->alert_mark); + te->header.a = 0; +} + +/* + * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event + * is first added to the CPU or rescheduled again to the CPU. It is called + * with pmu disabled. + * + * 1. Reset the trailer of SDBs to get ready for new data. + * 2. Tell the hardware where to put the data by reset the SDBs buffer + * head(tear/dear). + */ +static int aux_output_begin(struct perf_output_handle *handle, + struct aux_buffer *aux, + struct cpu_hw_sf *cpuhw) +{ + unsigned long range, i, range_scan, idx, head, base, offset; + struct hws_trailer_entry *te; + + if (handle->head & ~PAGE_MASK) + return -EINVAL; + + aux->head = handle->head >> PAGE_SHIFT; + range = (handle->size + 1) >> PAGE_SHIFT; + if (range <= 1) + return -ENOMEM; + + /* + * SDBs between aux->head and aux->empty_mark are already ready + * for new data. range_scan is num of SDBs not within them. + */ + if (range > aux_sdb_num_empty(aux)) { + range_scan = range - aux_sdb_num_empty(aux); + idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + te->header.f = 0; + te->header.a = 0; + te->header.overflow = 0; + } + /* Save the position of empty SDBs */ + aux->empty_mark = aux->head + range - 1; + } + + /* Set alert indicator */ + aux->alert_mark = aux->head + range/2 - 1; + te = aux_sdb_trailer(aux, aux->alert_mark); + te->header.a = 1; + + /* Reset hardware buffer head */ + head = aux_sdb_index(aux, aux->head); + base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE]; + offset = head % CPUM_SF_SDB_PER_TABLE; + cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long); + cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]); + + return 0; +} + +/* + * Set alert indicator on SDB at index @alert_index while sampler is running. + * + * Return true if successfully. + * Return false if full indicator is already set by hardware sampler. + */ +static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, + unsigned long long *overflow) +{ + union hws_trailer_header prev, new; + struct hws_trailer_entry *te; + + te = aux_sdb_trailer(aux, alert_index); + prev.val = READ_ONCE_ALIGNED_128(te->header.val); + do { + new.val = prev.val; + *overflow = prev.overflow; + if (prev.f) { + /* + * SDB is already set by hardware. + * Abort and try to set somewhere + * behind. + */ + return false; + } + new.a = 1; + new.overflow = 0; + } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val)); + return true; +} + +/* + * aux_reset_buffer() - Scan and setup SDBs for new samples + * @aux: The AUX buffer to set + * @range: The range of SDBs to scan started from aux->head + * @overflow: Set to overflow count + * + * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is + * marked as empty, check if it is already set full by the hardware sampler. + * If yes, that means new data is already there before we can set an alert + * indicator. Caller should try to set alert indicator to some position behind. + * + * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used + * previously and have already been consumed by user space. Reset these SDBs + * (clear full indicator and alert indicator) for new data. + * If aux->alert_mark fall in this area, just set it. Overflow count is + * recorded while scanning. + * + * SDBs between aux->head and aux->empty_mark are already reset at last time. + * and ready for new samples. So scanning on this area could be skipped. + * + * Return true if alert indicator is set successfully and false if not. + */ +static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, + unsigned long long *overflow) +{ + union hws_trailer_header prev, new; + unsigned long i, range_scan, idx; + unsigned long long orig_overflow; + struct hws_trailer_entry *te; + + if (range <= aux_sdb_num_empty(aux)) + /* + * No need to scan. All SDBs in range are marked as empty. + * Just set alert indicator. Should check race with hardware + * sampler. + */ + return aux_set_alert(aux, aux->alert_mark, overflow); + + if (aux->alert_mark <= aux->empty_mark) + /* + * Set alert indicator on empty SDB. Should check race + * with hardware sampler. + */ + if (!aux_set_alert(aux, aux->alert_mark, overflow)) + return false; + + /* + * Scan the SDBs to clear full and alert indicator used previously. + * Start scanning from one SDB behind empty_mark. If the new alert + * indicator fall into this range, set it. + */ + range_scan = range - aux_sdb_num_empty(aux); + idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + prev.val = READ_ONCE_ALIGNED_128(te->header.val); + do { + new.val = prev.val; + orig_overflow = prev.overflow; + new.f = 0; + new.overflow = 0; + if (idx == aux->alert_mark) + new.a = 1; + else + new.a = 0; + } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val)); + *overflow += orig_overflow; + } + + /* Update empty_mark to new position */ + aux->empty_mark = aux->head + range - 1; + + return true; +} + +/* + * Measurement alert handler for diagnostic mode sampling. + */ +static void hw_collect_aux(struct cpu_hw_sf *cpuhw) +{ + struct aux_buffer *aux; + int done = 0; + unsigned long range = 0, size; + unsigned long long overflow = 0; + struct perf_output_handle *handle = &cpuhw->handle; + unsigned long num_sdb; + + aux = perf_get_aux(handle); + if (!aux) + return; + + /* Inform user space new data arrived */ + size = aux_sdb_num_alert(aux) << PAGE_SHIFT; + debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__, + size >> PAGE_SHIFT); + perf_aux_output_end(handle, size); + + num_sdb = aux->sfb.num_sdb; + while (!done) { + /* Get an output handle */ + aux = perf_aux_output_begin(handle, cpuhw->event); + if (handle->size == 0) { + pr_err("The AUX buffer with %lu pages for the " + "diagnostic-sampling mode is full\n", + num_sdb); + break; + } + if (!aux) + return; + + /* Update head and alert_mark to new position */ + aux->head = handle->head >> PAGE_SHIFT; + range = (handle->size + 1) >> PAGE_SHIFT; + if (range == 1) + aux->alert_mark = aux->head; + else + aux->alert_mark = aux->head + range/2 - 1; + + if (aux_reset_buffer(aux, range, &overflow)) { + if (!overflow) { + done = 1; + break; + } + size = range << PAGE_SHIFT; + perf_aux_output_end(&cpuhw->handle, size); + pr_err("Sample data caused the AUX buffer with %lu " + "pages to overflow\n", aux->sfb.num_sdb); + } else { + size = aux_sdb_num_alert(aux) << PAGE_SHIFT; + perf_aux_output_end(&cpuhw->handle, size); + } + } +} + +/* + * Callback when freeing AUX buffers. + */ +static void aux_buffer_free(void *data) +{ + struct aux_buffer *aux = data; + unsigned long i, num_sdbt; + + if (!aux) + return; + + /* Free SDBT. SDB is freed by the caller */ + num_sdbt = aux->sfb.num_sdbt; + for (i = 0; i < num_sdbt; i++) + free_page(aux->sdbt_index[i]); + + kfree(aux->sdbt_index); + kfree(aux->sdb_index); + kfree(aux); +} + +static void aux_sdb_init(unsigned long sdb) +{ + struct hws_trailer_entry *te; + + te = trailer_entry_ptr(sdb); + + /* Save clock base */ + te->clock_base = 1; + te->progusage2 = tod_clock_base.tod; +} + +/* + * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling + * @event: Event the buffer is setup for, event->cpu == -1 means current + * @pages: Array of pointers to buffer pages passed from perf core + * @nr_pages: Total pages + * @snapshot: Flag for snapshot mode + * + * This is the callback when setup an event using AUX buffer. Perf tool can + * trigger this by an additional mmap() call on the event. Unlike the buffer + * for basic samples, AUX buffer belongs to the event. It is scheduled with + * the task among online cpus when it is a per-thread event. + * + * Return the private AUX buffer structure if success or NULL if fails. + */ +static void *aux_buffer_setup(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) +{ + struct sf_buffer *sfb; + struct aux_buffer *aux; + unsigned long *new, *tail; + int i, n_sdbt; + + if (!nr_pages || !pages) + return NULL; + + if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) { + pr_err("AUX buffer size (%i pages) is larger than the " + "maximum sampling buffer limit\n", + nr_pages); + return NULL; + } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) { + pr_err("AUX buffer size (%i pages) is less than the " + "minimum sampling buffer limit\n", + nr_pages); + return NULL; + } + + /* Allocate aux_buffer struct for the event */ + aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL); + if (!aux) + goto no_aux; + sfb = &aux->sfb; + + /* Allocate sdbt_index for fast reference */ + n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE); + aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL); + if (!aux->sdbt_index) + goto no_sdbt_index; + + /* Allocate sdb_index for fast reference */ + aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL); + if (!aux->sdb_index) + goto no_sdb_index; + + /* Allocate the first SDBT */ + sfb->num_sdbt = 0; + sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + goto no_sdbt; + aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt; + tail = sfb->tail = sfb->sdbt; + + /* + * Link the provided pages of AUX buffer to SDBT. + * Allocate SDBT if needed. + */ + for (i = 0; i < nr_pages; i++, tail++) { + if (require_table_link(tail)) { + new = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!new) + goto no_sdbt; + aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new; + /* Link current page to tail of chain */ + *tail = virt_to_phys(new) + 1; + tail = new; + } + /* Tail is the entry in a SDBT */ + *tail = virt_to_phys(pages[i]); + aux->sdb_index[i] = (unsigned long)pages[i]; + aux_sdb_init((unsigned long)pages[i]); + } + sfb->num_sdb = nr_pages; + + /* Link the last entry in the SDBT to the first SDBT */ + *tail = virt_to_phys(sfb->sdbt) + 1; + sfb->tail = tail; + + /* + * Initial all SDBs are zeroed. Mark it as empty. + * So there is no need to clear the full indicator + * when this event is first added. + */ + aux->empty_mark = sfb->num_sdb - 1; + + return aux; + +no_sdbt: + /* SDBs (AUX buffer pages) are freed by caller */ + for (i = 0; i < sfb->num_sdbt; i++) + free_page(aux->sdbt_index[i]); + kfree(aux->sdb_index); +no_sdb_index: + kfree(aux->sdbt_index); +no_sdbt_index: + kfree(aux); +no_aux: + return NULL; +} + +static void cpumsf_pmu_read(struct perf_event *event) +{ + /* Nothing to do ... updates are interrupt-driven */ +} + +/* Check if the new sampling period/frequency is appropriate. + * + * Return non-zero on error and zero on passed checks. + */ +static int cpumsf_pmu_check_period(struct perf_event *event, u64 value) +{ + struct hws_qsi_info_block si; + unsigned long rate; + bool do_freq; + + memset(&si, 0, sizeof(si)); + if (event->cpu == -1) { + qsi(&si); + } else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + + si = cpuhw->qsi; + } + + do_freq = !!SAMPL_FREQ_MODE(&event->hw); + rate = getrate(do_freq, value, &si); + if (!rate) + return -EINVAL; + + event->attr.sample_period = rate; + SAMPL_RATE(&event->hw) = rate; + hw_init_period(&event->hw, SAMPL_RATE(&event->hw)); + return 0; +} + +/* Activate sampling control. + * Next call of pmu_enable() starts sampling. + */ +static void cpumsf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + perf_pmu_disable(event->pmu); + event->hw.state = 0; + cpuhw->lsctl.cs = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.cd = 1; + perf_pmu_enable(event->pmu); +} + +/* Deactivate sampling control. + * Next call of pmu_enable() stops sampling. + */ +static void cpumsf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + perf_pmu_disable(event->pmu); + cpuhw->lsctl.cs = 0; + cpuhw->lsctl.cd = 0; + event->hw.state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + /* CPU hotplug off removes SDBs. No samples to extract. */ + if (cpuhw->flags & PMU_F_RESERVED) + hw_perf_event_update(event, 1); + event->hw.state |= PERF_HES_UPTODATE; + } + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + struct aux_buffer *aux; + int err = 0; + + if (cpuhw->flags & PMU_F_IN_USE) + return -EAGAIN; + + if (!SAMPL_DIAG_MODE(&event->hw) && !sf_buffer_available(cpuhw)) + return -EINVAL; + + perf_pmu_disable(event->pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* Set up sampling controls. Always program the sampling register + * using the SDB-table start. Reset TEAR_REG event hardware register + * that is used by hw_perf_event_update() to store the sampling buffer + * position after samples have been flushed. + */ + cpuhw->lsctl.s = 0; + cpuhw->lsctl.h = 1; + cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); + if (!SAMPL_DIAG_MODE(&event->hw)) { + cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt); + cpuhw->lsctl.dear = *(unsigned long *)cpuhw->sfb.sdbt; + TEAR_REG(&event->hw) = (unsigned long)cpuhw->sfb.sdbt; + } + + /* Ensure sampling functions are in the disabled state. If disabled, + * switch on sampling enable control. */ + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { + err = -EAGAIN; + goto out; + } + if (SAMPL_DIAG_MODE(&event->hw)) { + aux = perf_aux_output_begin(&cpuhw->handle, event); + if (!aux) { + err = -EINVAL; + goto out; + } + err = aux_output_begin(&cpuhw->handle, aux, cpuhw); + if (err) + goto out; + cpuhw->lsctl.ed = 1; + } + cpuhw->lsctl.es = 1; + + /* Set in_use flag and store event */ + cpuhw->event = event; + cpuhw->flags |= PMU_F_IN_USE; + + if (flags & PERF_EF_START) + cpumsf_pmu_start(event, PERF_EF_RELOAD); +out: + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + return err; +} + +static void cpumsf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + + perf_pmu_disable(event->pmu); + cpumsf_pmu_stop(event, PERF_EF_UPDATE); + + cpuhw->lsctl.es = 0; + cpuhw->lsctl.ed = 0; + cpuhw->flags &= ~PMU_F_IN_USE; + cpuhw->event = NULL; + + if (SAMPL_DIAG_MODE(&event->hw)) + aux_output_end(&cpuhw->handle); + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); +} + +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); + +/* Attribute list for CPU_SF. + * + * The availablitiy depends on the CPU_MF sampling facility authorization + * for basic + diagnositic samples. This is determined at initialization + * time by the sampling facility device driver. + * If the authorization for basic samples is turned off, it should be + * also turned off for diagnostic sampling. + * + * During initialization of the device driver, check the authorization + * level for diagnostic sampling and installs the attribute + * file for diagnostic sampling if necessary. + * + * For now install a placeholder to reference all possible attributes: + * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG. + * Add another entry for the final NULL pointer. + */ +enum { + SF_CYCLES_BASIC_ATTR_IDX = 0, + SF_CYCLES_BASIC_DIAG_ATTR_IDX, + SF_CYCLES_ATTR_MAX +}; + +static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = { + [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC) +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumsf_pmu_events_attr, +}; + +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; + +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + +static struct pmu cpumf_sampling = { + .pmu_enable = cpumsf_pmu_enable, + .pmu_disable = cpumsf_pmu_disable, + + .event_init = cpumsf_pmu_event_init, + .add = cpumsf_pmu_add, + .del = cpumsf_pmu_del, + + .start = cpumsf_pmu_start, + .stop = cpumsf_pmu_stop, + .read = cpumsf_pmu_read, + + .attr_groups = cpumsf_pmu_attr_groups, + + .setup_aux = aux_buffer_setup, + .free_aux = aux_buffer_free, + + .check_period = cpumsf_pmu_check_period, +}; + +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_sf *cpuhw; + + if (!(alert & CPU_MF_INT_SF_MASK)) + return; + inc_irq_stat(IRQEXT_CMS); + cpuhw = this_cpu_ptr(&cpu_hw_sf); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* The processing below must take care of multiple alert events that + * might be indicated concurrently. */ + + /* Program alert request */ + if (alert & CPU_MF_INT_SF_PRA) { + if (cpuhw->flags & PMU_F_IN_USE) { + if (SAMPL_DIAG_MODE(&cpuhw->event->hw)) + hw_collect_aux(cpuhw); + else + hw_perf_event_update(cpuhw->event, 0); + } + } + + /* Report measurement alerts only for non-PRA codes */ + if (alert != CPU_MF_INT_SF_PRA) + debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__, + alert); + + /* Sampling authorization change request */ + if (alert & CPU_MF_INT_SF_SACA) + qsi(&cpuhw->qsi); + + /* Loss of sample data due to high-priority machine activities */ + if (alert & CPU_MF_INT_SF_LSDA) { + pr_err("Sample data was lost\n"); + cpuhw->flags |= PMU_F_ERR_LSDA; + sf_disable(); + } + + /* Invalid sampling buffer entry */ + if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { + pr_err("A sampling buffer entry is incorrect (alert=%#x)\n", + alert); + cpuhw->flags |= PMU_F_ERR_IBE; + sf_disable(); + } +} + +static int cpusf_pmu_setup(unsigned int cpu, int flags) +{ + /* Ignore the notification if no events are scheduled on the PMU. + * This might be racy... + */ + if (!refcount_read(&num_events)) + return 0; + + local_irq_disable(); + setup_pmc_cpu(&flags); + local_irq_enable(); + return 0; +} + +static int s390_pmu_sf_online_cpu(unsigned int cpu) +{ + return cpusf_pmu_setup(cpu, PMC_INIT); +} + +static int s390_pmu_sf_offline_cpu(unsigned int cpu) +{ + return cpusf_pmu_setup(cpu, PMC_RELEASE); +} + +static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) +{ + if (!cpum_sf_avail()) + return -ENODEV; + return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); +} + +static int param_set_sfb_size(const char *val, const struct kernel_param *kp) +{ + int rc; + unsigned long min, max; + + if (!cpum_sf_avail()) + return -ENODEV; + if (!val || !strlen(val)) + return -EINVAL; + + /* Valid parameter values: "min,max" or "max" */ + min = CPUM_SF_MIN_SDB; + max = CPUM_SF_MAX_SDB; + if (strchr(val, ',')) + rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; + else + rc = kstrtoul(val, 10, &max); + + if (min < 2 || min >= max || max > get_num_physpages()) + rc = -EINVAL; + if (rc) + return rc; + + sfb_set_limits(min, max); + pr_info("The sampling buffer limits have changed to: " + "min %lu max %lu (diag %lu)\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); + return 0; +} + +#define param_check_sfb_size(name, p) __param_check(name, p, void) +static const struct kernel_param_ops param_ops_sfb_size = { + .set = param_set_sfb_size, + .get = param_get_sfb_size, +}; + +enum { + RS_INIT_FAILURE_BSDES = 2, /* Bad basic sampling size */ + RS_INIT_FAILURE_ALRT = 3, /* IRQ registration failure */ + RS_INIT_FAILURE_PERF = 4 /* PMU registration failure */ +}; + +static void __init pr_cpumsf_err(unsigned int reason) +{ + pr_err("Sampling facility support for perf is not available: " + "reason %#x\n", reason); +} + +static int __init init_cpum_sampling_pmu(void) +{ + struct hws_qsi_info_block si; + int err; + + if (!cpum_sf_avail()) + return -ENODEV; + + memset(&si, 0, sizeof(si)); + qsi(&si); + if (!si.as && !si.ad) + return -ENODEV; + + if (si.bsdes != sizeof(struct hws_basic_entry)) { + pr_cpumsf_err(RS_INIT_FAILURE_BSDES); + return -EINVAL; + } + + if (si.ad) { + sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + /* Sampling of diagnostic data authorized, + * install event into attribute list of PMU device. + */ + cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] = + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); + } + + sfdbg = debug_register("cpum_sf", 2, 1, 80); + if (!sfdbg) { + pr_err("Registering for s390dbf failed\n"); + return -ENOMEM; + } + debug_register_view(sfdbg, &debug_sprintf_view); + + err = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_ALRT); + debug_unregister(sfdbg); + goto out; + } + + err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_PERF); + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + debug_unregister(sfdbg); + goto out; + } + + cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online", + s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu); +out: + return err; +} + +arch_initcall(init_cpum_sampling_pmu); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index a6fc037671b1..606750bae508 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -1,44 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ -#define KMSG_COMPONENT "perf" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "perf: " fmt #include <linux/kernel.h> #include <linux/perf_event.h> #include <linux/kvm_host.h> #include <linux/percpu.h> -#include <linux/export.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/sysfs.h> +#include <asm/stacktrace.h> #include <asm/irq.h> #include <asm/cpu_mf.h> #include <asm/lowcore.h> #include <asm/processor.h> - -const char *perf_pmu_name(void) -{ - if (cpum_cf_avail() || cpum_sf_avail()) - return "CPU-measurement facilities (CPUMF)"; - return "pmu"; -} -EXPORT_SYMBOL(perf_pmu_name); - -int perf_num_counters(void) -{ - int num = 0; - - if (cpum_cf_avail()) - num += PERF_CPUM_CF_MAX_CTR; - - return num; -} -EXPORT_SYMBOL(perf_num_counters); +#include <asm/sysinfo.h> +#include <asm/unwind.h> static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) { @@ -47,17 +30,18 @@ static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) if (!stack) return NULL; - return (struct kvm_s390_sie_block *) stack->empty1[0]; + return (struct kvm_s390_sie_block *)stack->sie_control_block; } static bool is_in_guest(struct pt_regs *regs) { - unsigned long ip = instruction_pointer(regs); - if (user_mode(regs)) return false; - - return ip == (unsigned long) &sie_exit; +#if IS_ENABLED(CONFIG_KVM) + return instruction_pointer(regs) == (unsigned long) &sie_exit; +#else + return false; +#endif } static unsigned long guest_is_user_mode(struct pt_regs *regs) @@ -67,10 +51,10 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs) static unsigned long instruction_pointer_guest(struct pt_regs *regs) { - return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; + return sie_block(regs)->gpsw.addr; } -unsigned long perf_instruction_pointer(struct pt_regs *regs) +unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) { return is_in_guest(regs) ? instruction_pointer_guest(regs) : instruction_pointer(regs); @@ -82,8 +66,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs) : PERF_RECORD_MISC_GUEST_KERNEL; } -unsigned long perf_misc_flags(struct pt_regs *regs) +static unsigned long perf_misc_flags_sf(struct pt_regs *regs) +{ + struct perf_sf_sde_regs *sde_regs; + unsigned long flags; + + sde_regs = (struct perf_sf_sde_regs *) ®s->int_parm_long; + if (sde_regs->in_guest) + flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else + flags = user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + return flags; +} + +unsigned long perf_arch_misc_flags(struct pt_regs *regs) { + /* Check if the cpum_sf PMU has created the pt_regs structure. + * In this case, perf misc flags can be easily extracted. Otherwise, + * do regular checks on the pt_regs content. + */ + if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA) + if (!regs->gprs[15]) + return perf_misc_flags_sf(regs); + if (is_in_guest(regs)) return perf_misc_guest_flags(regs); @@ -91,86 +98,132 @@ unsigned long perf_misc_flags(struct pt_regs *regs) : PERF_RECORD_MISC_KERNEL; } -void perf_event_print_debug(void) +static void print_debug_cf(void) { struct cpumf_ctr_info cf_info; - unsigned long flags; - int cpu; + int cpu = smp_processor_id(); - if (!cpum_cf_avail()) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); memset(&cf_info, 0, sizeof(cf_info)); - if (!qctri(&cf_info)) { + if (!qctri(&cf_info)) pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", cpu, cf_info.cfvn, cf_info.csvn, cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); - print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET, - &cf_info, sizeof(cf_info)); - } +} - local_irq_restore(flags); +static void print_debug_sf(void) +{ + struct hws_qsi_info_block si; + int cpu = smp_processor_id(); + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n", + cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + + if (si.as) + pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" + " bsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); + if (si.ad) + pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" + " dsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); } -/* See also arch/s390/kernel/traps.c */ -static unsigned long __store_trace(struct perf_callchain_entry *entry, - unsigned long sp, - unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - sp = sp & PSW_ADDR_INSN; - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, - sf->gprs[8] & PSW_ADDR_INSN); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); - low = sp; - sp = regs->gprs[15]; - } +void perf_event_print_debug(void) +{ + unsigned long flags; + + local_irq_save(flags); + if (cpum_cf_avail()) + print_debug_cf(); + if (cpum_sf_avail()) + print_debug_sf(); + local_irq_restore(flags); } -void perf_callchain_kernel(struct perf_callchain_entry *entry, - struct pt_regs *regs) +/* Service level infrastructure */ +static void sl_print_counter(struct seq_file *m) { - unsigned long head; - struct stack_frame *head_sf; + struct cpumf_ctr_info ci; - if (user_mode(regs)) + memset(&ci, 0, sizeof(ci)); + if (qctri(&ci)) return; - head = regs->gprs[15]; - head_sf = (struct stack_frame *) head; + seq_printf(m, "CPU-MF: Counter facility: version=%u.%u " + "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl); +} - if (!head_sf || !head_sf->back_chain) +static void sl_print_sampling(struct seq_file *m) +{ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + if (!si.as && !si.ad) return; - head = head_sf->back_chain; - head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); + seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu" + " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + if (si.as) + seq_printf(m, "CPU-MF: Sampling facility: mode=basic" + " sample_size=%u\n", si.bsdes); + if (si.ad) + seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic" + " sample_size=%u\n", si.dsdes); +} + +static void service_level_perf_print(struct seq_file *m, + struct service_level *sl) +{ + if (cpum_cf_avail()) + sl_print_counter(m); + if (cpum_sf_avail()) + sl_print_sampling(m); +} + +static struct service_level service_level_perf = { + .seq_print = service_level_perf_print, +}; + +static int __init service_level_perf_register(void) +{ + return register_service_level(&service_level_perf); +} +arch_initcall(service_level_perf_register); + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct unwind_state state; + unsigned long addr; + + unwind_for_each_frame(&state, current, regs, 0) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } +} + +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + arch_stack_walk_user_common(NULL, NULL, entry, regs, true); +} + +/* Perf definitions for PMU event attributes in sysfs */ +ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; - __store_trace(entry, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sysfs_emit(page, "event=0x%04llx\n", pmu_attr->id); } diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c new file mode 100644 index 000000000000..810f5b6c5e01 --- /dev/null +++ b/arch/s390/kernel/perf_pai.c @@ -0,0 +1,1230 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance event support - Processor Activity Instrumentation Facility + * + * Copyright IBM Corp. 2026 + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + */ +#define pr_fmt(fmt) "pai: " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/perf_event.h> +#include <asm/ctlreg.h> +#include <asm/pai.h> +#include <asm/debug.h> + +static debug_info_t *paidbg; + +DEFINE_STATIC_KEY_FALSE(pai_key); + +enum { + PAI_PMU_CRYPTO, /* Index of PMU pai_crypto */ + PAI_PMU_EXT, /* Index of PMU pai_ext */ + PAI_PMU_MAX /* # of PAI PMUs */ +}; + +enum { + PAIE1_CB_SZ = 0x200, /* Size of PAIE1 control block */ + PAIE1_CTRBLOCK_SZ = 0x400 /* Size of PAIE1 counter blocks */ +}; + +struct pai_userdata { + u16 num; + u64 value; +} __packed; + +/* Create the PAI extension 1 control block area. + * The PAI extension control block 1 is pointed to by lowcore + * address 0x1508 for each CPU. This control block is 512 bytes in size + * and requires a 512 byte boundary alignment. + */ +struct paiext_cb { /* PAI extension 1 control block */ + u64 header; /* Not used */ + u64 reserved1; + u64 acc; /* Addr to analytics counter control block */ + u8 reserved2[PAIE1_CTRBLOCK_SZ - 3 * sizeof(u64)]; +} __packed; + +struct pai_map { + unsigned long *area; /* Area for CPU to store counters */ + struct pai_userdata *save; /* Page to store no-zero counters */ + unsigned int active_events; /* # of PAI crypto users */ + refcount_t refcnt; /* Reference count mapped buffers */ + struct perf_event *event; /* Perf event for sampling */ + struct list_head syswide_list; /* List system-wide sampling events */ + struct paiext_cb *paiext_cb; /* PAI extension control block area */ + bool fullpage; /* True: counter area is a full page */ +}; + +struct pai_mapptr { + struct pai_map *mapptr; +}; + +static struct pai_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct pai_mapptr __percpu *mapptr; +} pai_root[PAI_PMU_MAX]; + +/* This table defines the different parameters of the PAI PMUs. During + * initialization the machine dependent values are extracted and saved. + * However most of the values are static and do not change. + * There is one table entry per PAI PMU. + */ +struct pai_pmu { /* Define PAI PMU characteristics */ + const char *pmuname; /* Name of PMU */ + const int facility_nr; /* Facility number to check for support */ + unsigned int num_avail; /* # Counters defined by hardware */ + unsigned int num_named; /* # Counters known by name */ + unsigned long base; /* Counter set base number */ + unsigned long kernel_offset; /* Offset to kernel part in counter page */ + unsigned long area_size; /* Size of counter area */ + const char * const *names; /* List of counter names */ + struct pmu *pmu; /* Ptr to supporting PMU */ + int (*init)(struct pai_pmu *p); /* PMU support init function */ + void (*exit)(struct pai_pmu *p); /* PMU support exit function */ + struct attribute_group *event_group; /* Ptr to attribute of events */ +}; + +static struct pai_pmu pai_pmu[]; /* Forward declaration */ + +/* Free per CPU data when the last event is removed. */ +static void pai_root_free(int idx) +{ + if (refcount_dec_and_test(&pai_root[idx].refcnt)) { + free_percpu(pai_root[idx].mapptr); + pai_root[idx].mapptr = NULL; + } + debug_sprintf_event(paidbg, 5, "%s root[%d].refcount %d\n", __func__, + idx, refcount_read(&pai_root[idx].refcnt)); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int pai_root_alloc(int idx) +{ + if (!refcount_inc_not_zero(&pai_root[idx].refcnt)) { + /* The memory is already zeroed. */ + pai_root[idx].mapptr = alloc_percpu(struct pai_mapptr); + if (!pai_root[idx].mapptr) + return -ENOMEM; + refcount_set(&pai_root[idx].refcnt, 1); + } + return 0; +} + +/* Release the PMU if event is the last perf event */ +static DEFINE_MUTEX(pai_reserve_mutex); + +/* Free all memory allocated for event counting/sampling setup */ +static void pai_free(struct pai_mapptr *mp) +{ + if (mp->mapptr->fullpage) + free_page((unsigned long)mp->mapptr->area); + else + kfree(mp->mapptr->area); + kfree(mp->mapptr->paiext_cb); + kvfree(mp->mapptr->save); + kfree(mp->mapptr); + mp->mapptr = NULL; +} + +/* Adjust usage counters and remove allocated memory when all users are + * gone. + */ +static void pai_event_destroy_cpu(struct perf_event *event, int cpu) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = per_cpu_ptr(pai_root[idx].mapptr, cpu); + struct pai_map *cpump = mp->mapptr; + + mutex_lock(&pai_reserve_mutex); + debug_sprintf_event(paidbg, 5, "%s event %#llx idx %d cpu %d users %d " + "refcnt %u\n", __func__, event->attr.config, idx, + event->cpu, cpump->active_events, + refcount_read(&cpump->refcnt)); + if (refcount_dec_and_test(&cpump->refcnt)) + pai_free(mp); + pai_root_free(idx); + mutex_unlock(&pai_reserve_mutex); +} + +static void pai_event_destroy(struct perf_event *event) +{ + int cpu; + + free_page(PAI_SAVE_AREA(event)); + if (event->cpu == -1) { + struct cpumask *mask = PAI_CPU_MASK(event); + + for_each_cpu(cpu, mask) + pai_event_destroy_cpu(event, cpu); + kfree(mask); + } else { + pai_event_destroy_cpu(event, event->cpu); + } +} + +static void paicrypt_event_destroy(struct perf_event *event) +{ + static_branch_dec(&pai_key); + pai_event_destroy(event); +} + +static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset) +{ + if (offset) + nr += offset / sizeof(*page); + return page[nr]; +} + +/* Read the counter values. Return value from location in CMP. For base + * event xxx_ALL sum up all events. Returns counter value. + */ +static u64 pai_getdata(struct perf_event *event, bool kernel) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_pmu *pp = &pai_pmu[idx]; + struct pai_map *cpump = mp->mapptr; + unsigned int i; + u64 sum = 0; + + if (event->attr.config != pp->base) { + return pai_getctr(cpump->area, + event->attr.config - pp->base, + kernel ? pp->kernel_offset : 0); + } + + for (i = 1; i <= pp->num_avail; i++) { + u64 val = pai_getctr(cpump->area, i, + kernel ? pp->kernel_offset : 0); + + if (!val) + continue; + sum += val; + } + return sum; +} + +static u64 paicrypt_getall(struct perf_event *event) +{ + u64 sum = 0; + + if (!event->attr.exclude_kernel) + sum += pai_getdata(event, true); + if (!event->attr.exclude_user) + sum += pai_getdata(event, false); + + return sum; +} + +/* Check concurrent access of counting and sampling for crypto events. + * This function is called in process context and it is save to block. + * When the event initialization functions fails, no other call back will + * be invoked. + * + * Allocate the memory for the event. + */ +static int pai_alloc_cpu(struct perf_event *event, int cpu) +{ + int rc, idx = PAI_PMU_IDX(event); + struct pai_map *cpump = NULL; + bool need_paiext_cb = false; + struct pai_mapptr *mp; + + mutex_lock(&pai_reserve_mutex); + /* Allocate root node */ + rc = pai_root_alloc(idx); + if (rc) + goto unlock; + + /* Allocate node for this event */ + mp = per_cpu_ptr(pai_root[idx].mapptr, cpu); + cpump = mp->mapptr; + if (!cpump) { /* Paicrypt_map allocated? */ + rc = -ENOMEM; + cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); + if (!cpump) + goto undo; + /* Allocate memory for counter page and counter extraction. + * Only the first counting event has to allocate a page. + */ + mp->mapptr = cpump; + if (idx == PAI_PMU_CRYPTO) { + cpump->area = (unsigned long *)get_zeroed_page(GFP_KERNEL); + /* free_page() can handle 0x0 address */ + cpump->fullpage = true; + } else { /* PAI_PMU_EXT */ + /* + * Allocate memory for counter area and counter extraction. + * These are + * - a 512 byte block and requires 512 byte boundary + * alignment. + * - a 1KB byte block and requires 1KB boundary + * alignment. + * Only the first counting event has to allocate the area. + * + * Note: This works with commit 59bb47985c1d by default. + * Backporting this to kernels without this commit might + * needs adjustment. + */ + cpump->area = kzalloc(pai_pmu[idx].area_size, GFP_KERNEL); + cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); + need_paiext_cb = true; + } + cpump->save = kvmalloc_array(pai_pmu[idx].num_avail + 1, + sizeof(struct pai_userdata), + GFP_KERNEL); + if (!cpump->area || !cpump->save || + (need_paiext_cb && !cpump->paiext_cb)) { + pai_free(mp); + goto undo; + } + INIT_LIST_HEAD(&cpump->syswide_list); + refcount_set(&cpump->refcnt, 1); + rc = 0; + } else { + refcount_inc(&cpump->refcnt); + } + +undo: + if (rc) { + /* Error in allocation of event, decrement anchor. Since + * the event in not created, its destroy() function is never + * invoked. Adjust the reference counter for the anchor. + */ + pai_root_free(idx); + } +unlock: + mutex_unlock(&pai_reserve_mutex); + /* If rc is non-zero, no increment of counter/sampler was done. */ + return rc; +} + +static int pai_alloc(struct perf_event *event) +{ + struct cpumask *maskptr; + int cpu, rc = -ENOMEM; + + maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); + if (!maskptr) + goto out; + + for_each_online_cpu(cpu) { + rc = pai_alloc_cpu(event, cpu); + if (rc) { + for_each_cpu(cpu, maskptr) + pai_event_destroy_cpu(event, cpu); + kfree(maskptr); + goto out; + } + cpumask_set_cpu(cpu, maskptr); + } + + /* + * On error all cpumask are freed and all events have been destroyed. + * Save of which CPUs data structures have been allocated for. + * Release them in pai_event_destroy call back function + * for this event. + */ + PAI_CPU_MASK(event) = maskptr; + rc = 0; +out: + return rc; +} + +/* Validate event number and return error if event is not supported. + * On successful return, PAI_PMU_IDX(event) is set to the index of + * the supporting paing_support[] array element. + */ +static int pai_event_valid(struct perf_event *event, int idx) +{ + struct perf_event_attr *a = &event->attr; + struct pai_pmu *pp = &pai_pmu[idx]; + + /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ + if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) + return -ENOENT; + /* Allow only CRYPTO_ALL/NNPA_ALL for sampling */ + if (a->sample_period && a->config != pp->base) + return -EINVAL; + /* PAI crypto event must be in valid range, try others if not */ + if (a->config < pp->base || a->config > pp->base + pp->num_avail) + return -ENOENT; + if (idx == PAI_PMU_EXT && a->exclude_user) + return -EINVAL; + PAI_PMU_IDX(event) = idx; + return 0; +} + +/* Might be called on different CPU than the one the event is intended for. */ +static int pai_event_init(struct perf_event *event, int idx) +{ + struct perf_event_attr *a = &event->attr; + int rc; + + /* PAI event must be valid and in supported range */ + rc = pai_event_valid(event, idx); + if (rc) + goto out; + /* Get a page to store last counter values for sampling */ + if (a->sample_period) { + PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); + if (!PAI_SAVE_AREA(event)) { + rc = -ENOMEM; + goto out; + } + } + + if (event->cpu >= 0) + rc = pai_alloc_cpu(event, event->cpu); + else + rc = pai_alloc(event); + if (rc) { + free_page(PAI_SAVE_AREA(event)); + goto out; + } + + if (a->sample_period) { + a->sample_period = 1; + a->freq = 0; + /* Register for paicrypt_sched_task() to be called */ + event->attach_state |= PERF_ATTACH_SCHED_CB; + /* Add raw data which contain the memory mapped counters */ + a->sample_type |= PERF_SAMPLE_RAW; + /* Turn off inheritance */ + a->inherit = 0; + } +out: + return rc; +} + +static int paicrypt_event_init(struct perf_event *event) +{ + int rc = pai_event_init(event, PAI_PMU_CRYPTO); + + if (!rc) { + event->destroy = paicrypt_event_destroy; + static_branch_inc(&pai_key); + } + return rc; +} + +static void pai_read(struct perf_event *event, + u64 (*fct)(struct perf_event *event)) +{ + u64 prev, new, delta; + + prev = local64_read(&event->hw.prev_count); + new = fct(event); + local64_set(&event->hw.prev_count, new); + delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; + local64_add(delta, &event->count); +} + +static void paicrypt_read(struct perf_event *event) +{ + pai_read(event, paicrypt_getall); +} + +static void pai_start(struct perf_event *event, int flags, + u64 (*fct)(struct perf_event *event)) +{ + int idx = PAI_PMU_IDX(event); + struct pai_pmu *pp = &pai_pmu[idx]; + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + u64 sum; + + if (!event->attr.sample_period) { /* Counting */ + sum = fct(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + } else { /* Sampling */ + memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size); + /* Enable context switch callback for system-wide sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); + perf_sched_cb_inc(event->pmu); + } else { + cpump->event = event; + } + } +} + +static void paicrypt_start(struct perf_event *event, int flags) +{ + pai_start(event, flags, paicrypt_getall); +} + +static int pai_add(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + unsigned long ccd; + + if (++cpump->active_events == 1) { + if (!pcb) { /* PAI crypto */ + ccd = virt_to_phys(cpump->area) | PAI_CRYPTO_KERNEL_OFFSET; + WRITE_ONCE(get_lowcore()->ccd, ccd); + local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); + } else { /* PAI extension 1 */ + ccd = virt_to_phys(pcb); + WRITE_ONCE(get_lowcore()->aicd, ccd); + pcb->acc = virt_to_phys(cpump->area) | 0x1; + /* Enable CPU instruction lookup for PAIE1 control block */ + local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); + } + } + if (flags & PERF_EF_START) + pai_pmu[idx].pmu->start(event, PERF_EF_RELOAD); + event->hw.state = 0; + return 0; +} + +static int paicrypt_add(struct perf_event *event, int flags) +{ + return pai_add(event, flags); +} + +static void pai_have_sample(struct perf_event *, struct pai_map *); +static void pai_stop(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ + pai_pmu[idx].pmu->read(event); + } else { /* Sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + perf_sched_cb_dec(event->pmu); + list_del(PAI_SWLIST(event)); + } else { + pai_have_sample(event, cpump); + cpump->event = NULL; + } + } + event->hw.state = PERF_HES_STOPPED; +} + +static void paicrypt_stop(struct perf_event *event, int flags) +{ + pai_stop(event, flags); +} + +static void pai_del(struct perf_event *event, int flags) +{ + int idx = PAI_PMU_IDX(event); + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct paiext_cb *pcb = cpump->paiext_cb; + + pai_pmu[idx].pmu->stop(event, PERF_EF_UPDATE); + if (--cpump->active_events == 0) { + if (!pcb) { /* PAI crypto */ + local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); + WRITE_ONCE(get_lowcore()->ccd, 0); + } else { /* PAI extension 1 */ + /* Disable CPU instruction lookup for PAIE1 control block */ + local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); + pcb->acc = 0; + WRITE_ONCE(get_lowcore()->aicd, 0); + } + } +} + +static void paicrypt_del(struct perf_event *event, int flags) +{ + pai_del(event, flags); +} + +/* Create raw data and save it in buffer. Calculate the delta for each + * counter between this invocation and the last invocation. + * Returns number of bytes copied. + * Saves only entries with positive counter difference of the form + * 2 bytes: Number of counter + * 8 bytes: Value of counter + */ +static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page, + struct pai_pmu *pp, unsigned long *page_old, + bool exclude_user, bool exclude_kernel) +{ + int i, outidx = 0; + + for (i = 1; i <= pp->num_avail; i++) { + u64 val = 0, val_old = 0; + + if (!exclude_kernel) { + val += pai_getctr(page, i, pp->kernel_offset); + val_old += pai_getctr(page_old, i, pp->kernel_offset); + } + if (!exclude_user) { + val += pai_getctr(page, i, 0); + val_old += pai_getctr(page_old, i, 0); + } + if (val >= val_old) + val -= val_old; + else + val = (~0ULL - val_old) + val + 1; + if (val) { + userdata[outidx].num = i; + userdata[outidx].value = val; + outidx++; + } + } + return outidx * sizeof(*userdata); +} + +/* Write sample when one or more counters values are nonzero. + * + * Note: The function paicrypt_sched_task() and pai_push_sample() are not + * invoked after function paicrypt_del() has been called because of function + * perf_sched_cb_dec(). Both functions are only + * called when sampling is active. Function perf_sched_cb_inc() + * has been invoked to install function paicrypt_sched_task() as call back + * to run at context switch time. + * + * This causes function perf_event_context_sched_out() and + * perf_event_context_sched_in() to check whether the PMU has installed an + * sched_task() callback. That callback is not active after paicrypt_del() + * returns and has deleted the event on that CPU. + */ +static int pai_push_sample(size_t rawsize, struct pai_map *cpump, + struct perf_event *event) +{ + int idx = PAI_PMU_IDX(event); + struct pai_pmu *pp = &pai_pmu[idx]; + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int overflow; + + /* Setup perf sample */ + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + memset(&data, 0, sizeof(data)); + perf_sample_data_init(&data, 0, event->hw.last_period); + if (event->attr.sample_type & PERF_SAMPLE_TID) { + data.tid_entry.pid = task_tgid_nr(current); + data.tid_entry.tid = task_pid_nr(current); + } + if (event->attr.sample_type & PERF_SAMPLE_TIME) + data.time = event->clock(); + if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) + data.id = event->id; + if (event->attr.sample_type & PERF_SAMPLE_CPU) { + data.cpu_entry.cpu = smp_processor_id(); + data.cpu_entry.reserved = 0; + } + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.frag.size = rawsize; + raw.frag.data = cpump->save; + perf_sample_save_raw_data(&data, event, &raw); + } + + overflow = perf_event_overflow(event, &data, ®s); + perf_event_update_userpage(event); + /* Save crypto counter lowcore page after reading event data. */ + memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size); + return overflow; +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void pai_have_sample(struct perf_event *event, struct pai_map *cpump) +{ + struct pai_pmu *pp; + size_t rawsize; + + if (!event) /* No event active */ + return; + pp = &pai_pmu[PAI_PMU_IDX(event)]; + rawsize = pai_copy(cpump->save, cpump->area, pp, + (unsigned long *)PAI_SAVE_AREA(event), + event->attr.exclude_user, + event->attr.exclude_kernel); + if (rawsize) /* No incremented counters */ + pai_push_sample(rawsize, cpump, event); +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void pai_have_samples(int idx) +{ + struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr); + struct pai_map *cpump = mp->mapptr; + struct perf_event *event; + + list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) + pai_have_sample(event, cpump); +} + +/* Called on schedule-in and schedule-out. No access to event structure, + * but for sampling only event CRYPTO_ALL is allowed. + */ +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ + /* We started with a clean page on event installation. So read out + * results on schedule_out and if page was dirty, save old values. + */ + if (!sched_in) + pai_have_samples(PAI_PMU_CRYPTO); +} + +/* ============================= paiext ====================================*/ + +static void paiext_event_destroy(struct perf_event *event) +{ + pai_event_destroy(event); +} + +/* Might be called on different CPU than the one the event is intended for. */ +static int paiext_event_init(struct perf_event *event) +{ + int rc = pai_event_init(event, PAI_PMU_EXT); + + if (!rc) { + event->attr.exclude_kernel = true; /* No kernel space part */ + event->destroy = paiext_event_destroy; + /* Offset of NNPA in paiext_cb */ + event->hw.config_base = offsetof(struct paiext_cb, acc); + } + return rc; +} + +static u64 paiext_getall(struct perf_event *event) +{ + return pai_getdata(event, false); +} + +static void paiext_read(struct perf_event *event) +{ + pai_read(event, paiext_getall); +} + +static void paiext_start(struct perf_event *event, int flags) +{ + pai_start(event, flags, paiext_getall); +} + +static int paiext_add(struct perf_event *event, int flags) +{ + return pai_add(event, flags); +} + +static void paiext_stop(struct perf_event *event, int flags) +{ + pai_stop(event, flags); +} + +static void paiext_del(struct perf_event *event, int flags) +{ + pai_del(event, flags); +} + +/* Called on schedule-in and schedule-out. No access to event structure, + * but for sampling only event NNPA_ALL is allowed. + */ +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ + /* We started with a clean page on event installation. So read out + * results on schedule_out and if page was dirty, save old values. + */ + if (!sched_in) + pai_have_samples(PAI_PMU_EXT); +} + +/* Attribute definitions for paicrypt interface. As with other CPU + * Measurement Facilities, there is one attribute per mapped counter. + * The number of mapped counters may vary per machine generation. Use + * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction + * to determine the number of mapped counters. The instructions returns + * a positive number, which is the highest number of supported counters. + * All counters less than this number are also supported, there are no + * holes. A returned number of zero means no support for mapped counters. + * + * The identification of the counter is a unique number. The chosen range + * is 0x1000 + offset in mapped kernel page. + * All CPU Measurement Facility counters identifiers must be unique and + * the numbers from 0 to 496 are already used for the CPU Measurement + * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already + * used for the CPU Measurement Sampling facility. + */ +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *paicrypt_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group paicrypt_events_group = { + .name = "events", + .attrs = NULL /* Filled in attr_event_init() */ +}; + +static struct attribute_group paicrypt_format_group = { + .name = "format", + .attrs = paicrypt_format_attr, +}; + +static const struct attribute_group *paicrypt_attr_groups[] = { + &paicrypt_events_group, + &paicrypt_format_group, + NULL, +}; + +/* Performance monitoring unit for mapped counters */ +static struct pmu paicrypt = { + .task_ctx_nr = perf_hw_context, + .event_init = paicrypt_event_init, + .add = paicrypt_add, + .del = paicrypt_del, + .start = paicrypt_start, + .stop = paicrypt_stop, + .read = paicrypt_read, + .sched_task = paicrypt_sched_task, + .attr_groups = paicrypt_attr_groups +}; + +/* List of symbolic PAI counter names. */ +static const char * const paicrypt_ctrnames[] = { + [0] = "CRYPTO_ALL", + [1] = "KM_DEA", + [2] = "KM_TDEA_128", + [3] = "KM_TDEA_192", + [4] = "KM_ENCRYPTED_DEA", + [5] = "KM_ENCRYPTED_TDEA_128", + [6] = "KM_ENCRYPTED_TDEA_192", + [7] = "KM_AES_128", + [8] = "KM_AES_192", + [9] = "KM_AES_256", + [10] = "KM_ENCRYPTED_AES_128", + [11] = "KM_ENCRYPTED_AES_192", + [12] = "KM_ENCRYPTED_AES_256", + [13] = "KM_XTS_AES_128", + [14] = "KM_XTS_AES_256", + [15] = "KM_XTS_ENCRYPTED_AES_128", + [16] = "KM_XTS_ENCRYPTED_AES_256", + [17] = "KMC_DEA", + [18] = "KMC_TDEA_128", + [19] = "KMC_TDEA_192", + [20] = "KMC_ENCRYPTED_DEA", + [21] = "KMC_ENCRYPTED_TDEA_128", + [22] = "KMC_ENCRYPTED_TDEA_192", + [23] = "KMC_AES_128", + [24] = "KMC_AES_192", + [25] = "KMC_AES_256", + [26] = "KMC_ENCRYPTED_AES_128", + [27] = "KMC_ENCRYPTED_AES_192", + [28] = "KMC_ENCRYPTED_AES_256", + [29] = "KMC_PRNG", + [30] = "KMA_GCM_AES_128", + [31] = "KMA_GCM_AES_192", + [32] = "KMA_GCM_AES_256", + [33] = "KMA_GCM_ENCRYPTED_AES_128", + [34] = "KMA_GCM_ENCRYPTED_AES_192", + [35] = "KMA_GCM_ENCRYPTED_AES_256", + [36] = "KMF_DEA", + [37] = "KMF_TDEA_128", + [38] = "KMF_TDEA_192", + [39] = "KMF_ENCRYPTED_DEA", + [40] = "KMF_ENCRYPTED_TDEA_128", + [41] = "KMF_ENCRYPTED_TDEA_192", + [42] = "KMF_AES_128", + [43] = "KMF_AES_192", + [44] = "KMF_AES_256", + [45] = "KMF_ENCRYPTED_AES_128", + [46] = "KMF_ENCRYPTED_AES_192", + [47] = "KMF_ENCRYPTED_AES_256", + [48] = "KMCTR_DEA", + [49] = "KMCTR_TDEA_128", + [50] = "KMCTR_TDEA_192", + [51] = "KMCTR_ENCRYPTED_DEA", + [52] = "KMCTR_ENCRYPTED_TDEA_128", + [53] = "KMCTR_ENCRYPTED_TDEA_192", + [54] = "KMCTR_AES_128", + [55] = "KMCTR_AES_192", + [56] = "KMCTR_AES_256", + [57] = "KMCTR_ENCRYPTED_AES_128", + [58] = "KMCTR_ENCRYPTED_AES_192", + [59] = "KMCTR_ENCRYPTED_AES_256", + [60] = "KMO_DEA", + [61] = "KMO_TDEA_128", + [62] = "KMO_TDEA_192", + [63] = "KMO_ENCRYPTED_DEA", + [64] = "KMO_ENCRYPTED_TDEA_128", + [65] = "KMO_ENCRYPTED_TDEA_192", + [66] = "KMO_AES_128", + [67] = "KMO_AES_192", + [68] = "KMO_AES_256", + [69] = "KMO_ENCRYPTED_AES_128", + [70] = "KMO_ENCRYPTED_AES_192", + [71] = "KMO_ENCRYPTED_AES_256", + [72] = "KIMD_SHA_1", + [73] = "KIMD_SHA_256", + [74] = "KIMD_SHA_512", + [75] = "KIMD_SHA3_224", + [76] = "KIMD_SHA3_256", + [77] = "KIMD_SHA3_384", + [78] = "KIMD_SHA3_512", + [79] = "KIMD_SHAKE_128", + [80] = "KIMD_SHAKE_256", + [81] = "KIMD_GHASH", + [82] = "KLMD_SHA_1", + [83] = "KLMD_SHA_256", + [84] = "KLMD_SHA_512", + [85] = "KLMD_SHA3_224", + [86] = "KLMD_SHA3_256", + [87] = "KLMD_SHA3_384", + [88] = "KLMD_SHA3_512", + [89] = "KLMD_SHAKE_128", + [90] = "KLMD_SHAKE_256", + [91] = "KMAC_DEA", + [92] = "KMAC_TDEA_128", + [93] = "KMAC_TDEA_192", + [94] = "KMAC_ENCRYPTED_DEA", + [95] = "KMAC_ENCRYPTED_TDEA_128", + [96] = "KMAC_ENCRYPTED_TDEA_192", + [97] = "KMAC_AES_128", + [98] = "KMAC_AES_192", + [99] = "KMAC_AES_256", + [100] = "KMAC_ENCRYPTED_AES_128", + [101] = "KMAC_ENCRYPTED_AES_192", + [102] = "KMAC_ENCRYPTED_AES_256", + [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA", + [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128", + [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192", + [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA", + [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128", + [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192", + [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128", + [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192", + [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256", + [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", + [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", + [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256", + [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128", + [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256", + [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128", + [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256", + [119] = "PCC_SCALAR_MULTIPLY_P256", + [120] = "PCC_SCALAR_MULTIPLY_P384", + [121] = "PCC_SCALAR_MULTIPLY_P521", + [122] = "PCC_SCALAR_MULTIPLY_ED25519", + [123] = "PCC_SCALAR_MULTIPLY_ED448", + [124] = "PCC_SCALAR_MULTIPLY_X25519", + [125] = "PCC_SCALAR_MULTIPLY_X448", + [126] = "PRNO_SHA_512_DRNG", + [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO", + [128] = "PRNO_TRNG", + [129] = "KDSA_ECDSA_VERIFY_P256", + [130] = "KDSA_ECDSA_VERIFY_P384", + [131] = "KDSA_ECDSA_VERIFY_P521", + [132] = "KDSA_ECDSA_SIGN_P256", + [133] = "KDSA_ECDSA_SIGN_P384", + [134] = "KDSA_ECDSA_SIGN_P521", + [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256", + [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384", + [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521", + [138] = "KDSA_EDDSA_VERIFY_ED25519", + [139] = "KDSA_EDDSA_VERIFY_ED448", + [140] = "KDSA_EDDSA_SIGN_ED25519", + [141] = "KDSA_EDDSA_SIGN_ED448", + [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519", + [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448", + [144] = "PCKMO_ENCRYPT_DEA_KEY", + [145] = "PCKMO_ENCRYPT_TDEA_128_KEY", + [146] = "PCKMO_ENCRYPT_TDEA_192_KEY", + [147] = "PCKMO_ENCRYPT_AES_128_KEY", + [148] = "PCKMO_ENCRYPT_AES_192_KEY", + [149] = "PCKMO_ENCRYPT_AES_256_KEY", + [150] = "PCKMO_ENCRYPT_ECC_P256_KEY", + [151] = "PCKMO_ENCRYPT_ECC_P384_KEY", + [152] = "PCKMO_ENCRYPT_ECC_P521_KEY", + [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY", + [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY", + [155] = "IBM_RESERVED_155", + [156] = "IBM_RESERVED_156", + [157] = "KM_FULL_XTS_AES_128", + [158] = "KM_FULL_XTS_AES_256", + [159] = "KM_FULL_XTS_ENCRYPTED_AES_128", + [160] = "KM_FULL_XTS_ENCRYPTED_AES_256", + [161] = "KMAC_HMAC_SHA_224", + [162] = "KMAC_HMAC_SHA_256", + [163] = "KMAC_HMAC_SHA_384", + [164] = "KMAC_HMAC_SHA_512", + [165] = "KMAC_HMAC_ENCRYPTED_SHA_224", + [166] = "KMAC_HMAC_ENCRYPTED_SHA_256", + [167] = "KMAC_HMAC_ENCRYPTED_SHA_384", + [168] = "KMAC_HMAC_ENCRYPTED_SHA_512", + [169] = "PCKMO_ENCRYPT_HMAC_512_KEY", + [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY", + [171] = "PCKMO_ENCRYPT_AES_XTS_128", + [172] = "PCKMO_ENCRYPT_AES_XTS_256", +}; + +static struct attribute *paiext_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group paiext_events_group = { + .name = "events", + .attrs = NULL, /* Filled in attr_event_init() */ +}; + +static struct attribute_group paiext_format_group = { + .name = "format", + .attrs = paiext_format_attr, +}; + +static const struct attribute_group *paiext_attr_groups[] = { + &paiext_events_group, + &paiext_format_group, + NULL, +}; + +/* Performance monitoring unit for mapped counters */ +static struct pmu paiext = { + .task_ctx_nr = perf_hw_context, + .event_init = paiext_event_init, + .add = paiext_add, + .del = paiext_del, + .start = paiext_start, + .stop = paiext_stop, + .read = paiext_read, + .sched_task = paiext_sched_task, + .attr_groups = paiext_attr_groups, +}; + +/* List of symbolic PAI extension 1 NNPA counter names. */ +static const char * const paiext_ctrnames[] = { + [0] = "NNPA_ALL", + [1] = "NNPA_ADD", + [2] = "NNPA_SUB", + [3] = "NNPA_MUL", + [4] = "NNPA_DIV", + [5] = "NNPA_MIN", + [6] = "NNPA_MAX", + [7] = "NNPA_LOG", + [8] = "NNPA_EXP", + [9] = "NNPA_IBM_RESERVED_9", + [10] = "NNPA_RELU", + [11] = "NNPA_TANH", + [12] = "NNPA_SIGMOID", + [13] = "NNPA_SOFTMAX", + [14] = "NNPA_BATCHNORM", + [15] = "NNPA_MAXPOOL2D", + [16] = "NNPA_AVGPOOL2D", + [17] = "NNPA_LSTMACT", + [18] = "NNPA_GRUACT", + [19] = "NNPA_CONVOLUTION", + [20] = "NNPA_MATMUL_OP", + [21] = "NNPA_MATMUL_OP_BCAST23", + [22] = "NNPA_SMALLBATCH", + [23] = "NNPA_LARGEDIM", + [24] = "NNPA_SMALLTENSOR", + [25] = "NNPA_1MFRAME", + [26] = "NNPA_2GFRAME", + [27] = "NNPA_ACCESSEXCEPT", + [28] = "NNPA_TRANSFORM", + [29] = "NNPA_GELU", + [30] = "NNPA_MOMENTS", + [31] = "NNPA_LAYERNORM", + [32] = "NNPA_MATMUL_OP_BCAST1", + [33] = "NNPA_SQRT", + [34] = "NNPA_INVSQRT", + [35] = "NNPA_NORM", + [36] = "NNPA_REDUCE", +}; + +static void __init attr_event_free(struct attribute **attrs) +{ + struct perf_pmu_events_attr *pa; + unsigned int i; + + for (i = 0; attrs[i]; i++) { + struct device_attribute *dap; + + dap = container_of(attrs[i], struct device_attribute, attr); + pa = container_of(dap, struct perf_pmu_events_attr, attr); + kfree(pa); + } + kfree(attrs); +} + +static struct attribute * __init attr_event_init_one(int num, + unsigned long base, + const char *name) +{ + struct perf_pmu_events_attr *pa; + + pa = kzalloc(sizeof(*pa), GFP_KERNEL); + if (!pa) + return NULL; + + sysfs_attr_init(&pa->attr.attr); + pa->id = base + num; + pa->attr.attr.name = name; + pa->attr.attr.mode = 0444; + pa->attr.show = cpumf_events_sysfs_show; + pa->attr.store = NULL; + return &pa->attr.attr; +} + +static struct attribute ** __init attr_event_init(struct pai_pmu *p) +{ + unsigned int min_attr = min_t(unsigned int, p->num_named, p->num_avail); + struct attribute **attrs; + unsigned int i; + + attrs = kmalloc_array(min_attr + 1, sizeof(*attrs), GFP_KERNEL | __GFP_ZERO); + if (!attrs) + goto out; + for (i = 0; i < min_attr; i++) { + attrs[i] = attr_event_init_one(i, p->base, p->names[i]); + if (!attrs[i]) { + attr_event_free(attrs); + attrs = NULL; + goto out; + } + } + attrs[i] = NULL; +out: + return attrs; +} + +static void __init pai_pmu_exit(struct pai_pmu *p) +{ + attr_event_free(p->event_group->attrs); + p->event_group->attrs = NULL; +} + +/* Add a PMU. Install its events and register the PMU device driver + * call back functions. + */ +static int __init pai_pmu_init(struct pai_pmu *p) +{ + int rc = -ENOMEM; + + + /* Export known PAI events */ + p->event_group->attrs = attr_event_init(p); + if (!p->event_group->attrs) { + pr_err("Creation of PMU %s /sysfs failed\n", p->pmuname); + goto out; + } + + rc = perf_pmu_register(p->pmu, p->pmuname, -1); + if (rc) { + pai_pmu_exit(p); + pr_err("Registering PMU %s failed with rc=%i\n", p->pmuname, + rc); + } +out: + return rc; +} + +/* PAI PMU characteristics table */ +static struct pai_pmu pai_pmu[] __refdata = { + [PAI_PMU_CRYPTO] = { + .pmuname = "pai_crypto", + .facility_nr = 196, + .num_named = ARRAY_SIZE(paicrypt_ctrnames), + .names = paicrypt_ctrnames, + .base = PAI_CRYPTO_BASE, + .kernel_offset = PAI_CRYPTO_KERNEL_OFFSET, + .area_size = PAGE_SIZE, + .init = pai_pmu_init, + .exit = pai_pmu_exit, + .pmu = &paicrypt, + .event_group = &paicrypt_events_group + }, + [PAI_PMU_EXT] = { + .pmuname = "pai_ext", + .facility_nr = 197, + .num_named = ARRAY_SIZE(paiext_ctrnames), + .names = paiext_ctrnames, + .base = PAI_NNPA_BASE, + .kernel_offset = 0, + .area_size = PAIE1_CTRBLOCK_SZ, + .init = pai_pmu_init, + .exit = pai_pmu_exit, + .pmu = &paiext, + .event_group = &paiext_events_group + } +}; + +/* + * Check if the PMU (via facility) is supported by machine. Try all of the + * supported PAI PMUs. + * Return number of successfully installed PMUs. + */ +static int __init paipmu_setup(void) +{ + struct qpaci_info_block ib; + int install_ok = 0, rc; + struct pai_pmu *p; + size_t i; + + for (i = 0; i < ARRAY_SIZE(pai_pmu); ++i) { + p = &pai_pmu[i]; + + if (!test_facility(p->facility_nr)) + continue; + + qpaci(&ib); + switch (i) { + case PAI_PMU_CRYPTO: + p->num_avail = ib.num_cc; + if (p->num_avail >= PAI_CRYPTO_MAXCTR) { + pr_err("Too many PMU %s counters %d\n", + p->pmuname, p->num_avail); + continue; + } + break; + case PAI_PMU_EXT: + p->num_avail = ib.num_nnpa; + break; + } + p->num_avail += 1; /* Add xxx_ALL event */ + if (p->init) { + rc = p->init(p); + if (!rc) + ++install_ok; + } + } + return install_ok; +} + +static int __init pai_init(void) +{ + /* Setup s390dbf facility */ + paidbg = debug_register("pai", 32, 256, 128); + if (!paidbg) { + pr_err("Registration of s390dbf pai failed\n"); + return -ENOMEM; + } + debug_register_view(paidbg, &debug_sprintf_view); + + if (!paipmu_setup()) { + /* No PMU registration, no need for debug buffer */ + debug_unregister_view(paidbg, &debug_sprintf_view); + debug_unregister(paidbg); + return -ENODEV; + } + return 0; +} + +device_initcall(pai_init); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c new file mode 100644 index 000000000000..7b305f1456f8 --- /dev/null +++ b/arch/s390/kernel/perf_regs.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/perf_event.h> +#include <linux/perf_regs.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/bug.h> +#include <asm/ptrace.h> +#include <asm/fpu.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + freg_t fp; + + if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) + return regs->gprs[idx]; + + if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) { + if (!user_mode(regs)) + return 0; + + idx -= PERF_REG_S390_FP0; + fp = *(freg_t *)(current->thread.ufpu.vxrs + idx); + return fp.ui; + } + + if (idx == PERF_REG_S390_MASK) + return regs->psw.mask; + if (idx == PERF_REG_S390_PC) + return regs->psw.addr; + + WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX); + return 0; +} + +#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_64; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs) +{ + /* + * Use the regs from the first interruption and let + * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()). + * + * Also save FPU registers for user-space tasks only. + */ + regs_user->regs = task_pt_regs(current); + if (user_mode(regs_user->regs)) + save_user_fpu_regs(); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S deleted file mode 100644 index 14bdecb61923..000000000000 --- a/arch/s390/kernel/pgm_check.S +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Program check table. - * - * Copyright IBM Corp. 2012 - */ - -#include <linux/linkage.h> - -#ifdef CONFIG_32BIT -#define PGM_CHECK_64BIT(handler) .long default_trap_handler -#else -#define PGM_CHECK_64BIT(handler) .long handler -#endif - -#define PGM_CHECK(handler) .long handler -#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler) - -/* - * The program check table contains exactly 128 (0x00-0x7f) entries. Each - * line defines the 31 and/or 64 bit function to be called corresponding - * to the program check interruption code. - */ -.section .rodata, "a" -ENTRY(pgm_check_table) -PGM_CHECK_DEFAULT /* 00 */ -PGM_CHECK(illegal_op) /* 01 */ -PGM_CHECK(privileged_op) /* 02 */ -PGM_CHECK(execute_exception) /* 03 */ -PGM_CHECK(do_protection_exception) /* 04 */ -PGM_CHECK(addressing_exception) /* 05 */ -PGM_CHECK(specification_exception) /* 06 */ -PGM_CHECK(data_exception) /* 07 */ -PGM_CHECK(overflow_exception) /* 08 */ -PGM_CHECK(divide_exception) /* 09 */ -PGM_CHECK(overflow_exception) /* 0a */ -PGM_CHECK(divide_exception) /* 0b */ -PGM_CHECK(hfp_overflow_exception) /* 0c */ -PGM_CHECK(hfp_underflow_exception) /* 0d */ -PGM_CHECK(hfp_significance_exception) /* 0e */ -PGM_CHECK(hfp_divide_exception) /* 0f */ -PGM_CHECK(do_dat_exception) /* 10 */ -PGM_CHECK(do_dat_exception) /* 11 */ -PGM_CHECK(translation_exception) /* 12 */ -PGM_CHECK(special_op_exception) /* 13 */ -PGM_CHECK_DEFAULT /* 14 */ -PGM_CHECK(operand_exception) /* 15 */ -PGM_CHECK_DEFAULT /* 16 */ -PGM_CHECK_DEFAULT /* 17 */ -PGM_CHECK_64BIT(transaction_exception) /* 18 */ -PGM_CHECK_DEFAULT /* 19 */ -PGM_CHECK_DEFAULT /* 1a */ -PGM_CHECK_DEFAULT /* 1b */ -PGM_CHECK(space_switch_exception) /* 1c */ -PGM_CHECK(hfp_sqrt_exception) /* 1d */ -PGM_CHECK_DEFAULT /* 1e */ -PGM_CHECK_DEFAULT /* 1f */ -PGM_CHECK_DEFAULT /* 20 */ -PGM_CHECK_DEFAULT /* 21 */ -PGM_CHECK_DEFAULT /* 22 */ -PGM_CHECK_DEFAULT /* 23 */ -PGM_CHECK_DEFAULT /* 24 */ -PGM_CHECK_DEFAULT /* 25 */ -PGM_CHECK_DEFAULT /* 26 */ -PGM_CHECK_DEFAULT /* 27 */ -PGM_CHECK_DEFAULT /* 28 */ -PGM_CHECK_DEFAULT /* 29 */ -PGM_CHECK_DEFAULT /* 2a */ -PGM_CHECK_DEFAULT /* 2b */ -PGM_CHECK_DEFAULT /* 2c */ -PGM_CHECK_DEFAULT /* 2d */ -PGM_CHECK_DEFAULT /* 2e */ -PGM_CHECK_DEFAULT /* 2f */ -PGM_CHECK_DEFAULT /* 30 */ -PGM_CHECK_DEFAULT /* 31 */ -PGM_CHECK_DEFAULT /* 32 */ -PGM_CHECK_DEFAULT /* 33 */ -PGM_CHECK_DEFAULT /* 34 */ -PGM_CHECK_DEFAULT /* 35 */ -PGM_CHECK_DEFAULT /* 36 */ -PGM_CHECK_DEFAULT /* 37 */ -PGM_CHECK_64BIT(do_asce_exception) /* 38 */ -PGM_CHECK_64BIT(do_dat_exception) /* 39 */ -PGM_CHECK_64BIT(do_dat_exception) /* 3a */ -PGM_CHECK_64BIT(do_dat_exception) /* 3b */ -PGM_CHECK_DEFAULT /* 3c */ -PGM_CHECK_DEFAULT /* 3d */ -PGM_CHECK_DEFAULT /* 3e */ -PGM_CHECK_DEFAULT /* 3f */ -PGM_CHECK_DEFAULT /* 40 */ -PGM_CHECK_DEFAULT /* 41 */ -PGM_CHECK_DEFAULT /* 42 */ -PGM_CHECK_DEFAULT /* 43 */ -PGM_CHECK_DEFAULT /* 44 */ -PGM_CHECK_DEFAULT /* 45 */ -PGM_CHECK_DEFAULT /* 46 */ -PGM_CHECK_DEFAULT /* 47 */ -PGM_CHECK_DEFAULT /* 48 */ -PGM_CHECK_DEFAULT /* 49 */ -PGM_CHECK_DEFAULT /* 4a */ -PGM_CHECK_DEFAULT /* 4b */ -PGM_CHECK_DEFAULT /* 4c */ -PGM_CHECK_DEFAULT /* 4d */ -PGM_CHECK_DEFAULT /* 4e */ -PGM_CHECK_DEFAULT /* 4f */ -PGM_CHECK_DEFAULT /* 50 */ -PGM_CHECK_DEFAULT /* 51 */ -PGM_CHECK_DEFAULT /* 52 */ -PGM_CHECK_DEFAULT /* 53 */ -PGM_CHECK_DEFAULT /* 54 */ -PGM_CHECK_DEFAULT /* 55 */ -PGM_CHECK_DEFAULT /* 56 */ -PGM_CHECK_DEFAULT /* 57 */ -PGM_CHECK_DEFAULT /* 58 */ -PGM_CHECK_DEFAULT /* 59 */ -PGM_CHECK_DEFAULT /* 5a */ -PGM_CHECK_DEFAULT /* 5b */ -PGM_CHECK_DEFAULT /* 5c */ -PGM_CHECK_DEFAULT /* 5d */ -PGM_CHECK_DEFAULT /* 5e */ -PGM_CHECK_DEFAULT /* 5f */ -PGM_CHECK_DEFAULT /* 60 */ -PGM_CHECK_DEFAULT /* 61 */ -PGM_CHECK_DEFAULT /* 62 */ -PGM_CHECK_DEFAULT /* 63 */ -PGM_CHECK_DEFAULT /* 64 */ -PGM_CHECK_DEFAULT /* 65 */ -PGM_CHECK_DEFAULT /* 66 */ -PGM_CHECK_DEFAULT /* 67 */ -PGM_CHECK_DEFAULT /* 68 */ -PGM_CHECK_DEFAULT /* 69 */ -PGM_CHECK_DEFAULT /* 6a */ -PGM_CHECK_DEFAULT /* 6b */ -PGM_CHECK_DEFAULT /* 6c */ -PGM_CHECK_DEFAULT /* 6d */ -PGM_CHECK_DEFAULT /* 6e */ -PGM_CHECK_DEFAULT /* 6f */ -PGM_CHECK_DEFAULT /* 70 */ -PGM_CHECK_DEFAULT /* 71 */ -PGM_CHECK_DEFAULT /* 72 */ -PGM_CHECK_DEFAULT /* 73 */ -PGM_CHECK_DEFAULT /* 74 */ -PGM_CHECK_DEFAULT /* 75 */ -PGM_CHECK_DEFAULT /* 76 */ -PGM_CHECK_DEFAULT /* 77 */ -PGM_CHECK_DEFAULT /* 78 */ -PGM_CHECK_DEFAULT /* 79 */ -PGM_CHECK_DEFAULT /* 7a */ -PGM_CHECK_DEFAULT /* 7b */ -PGM_CHECK_DEFAULT /* 7c */ -PGM_CHECK_DEFAULT /* 7d */ -PGM_CHECK_DEFAULT /* 7e */ -PGM_CHECK_DEFAULT /* 7f */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2bc3eddae34a..0df95dcb2101 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file handles the architecture dependent parts of process handling. * @@ -7,9 +8,13 @@ * Denis Joseph Barrow, */ +#include <linux/elf-randomize.h> #include <linux/compiler.h> #include <linux/cpu.h> #include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/elfcore.h> @@ -19,93 +24,90 @@ #include <linux/tick.h> #include <linux/personality.h> #include <linux/syscalls.h> -#include <linux/compat.h> #include <linux/kprobes.h> #include <linux/random.h> -#include <linux/module.h> -#include <asm/io.h> +#include <linux/init_task.h> +#include <linux/entry-common.h> +#include <linux/io.h> +#include <asm/guarded_storage.h> +#include <asm/access-regs.h> +#include <asm/switch_to.h> +#include <asm/cpu_mf.h> #include <asm/processor.h> +#include <asm/ptrace.h> #include <asm/vtimer.h> #include <asm/exec.h> +#include <asm/fpu.h> #include <asm/irq.h> #include <asm/nmi.h> #include <asm/smp.h> -#include <asm/switch_to.h> +#include <asm/stacktrace.h> #include <asm/runtime_instr.h> +#include <asm/unwind.h> #include "entry.h" -asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); +void ret_from_fork(void) asm("ret_from_fork"); -/* - * Return saved PC of a blocked thread. used in kernel/sched. - * resume in entry.S does not create a new stack frame, it - * just stores the registers %r6-%r15 to the frame given by - * schedule. We want to return the address of the caller of - * schedule, so we have to walk the backchain one time to - * find the frame schedule() store its return address. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs) { - struct stack_frame *sf, *low, *high; + void (*func)(void *arg); - if (!tsk || !task_stack_page(tsk)) - return 0; - low = task_stack_page(tsk); - high = (struct stack_frame *) task_pt_regs(tsk); - sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN); - if (sf <= low || sf > high) - return 0; - sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN); - if (sf <= low || sf > high) - return 0; - return sf->gprs[8]; -} + schedule_tail(prev); -void arch_cpu_idle(void) -{ - local_mcck_disable(); - if (test_thread_flag(TIF_MCCK_PENDING)) { - local_mcck_enable(); - local_irq_enable(); - return; + if (!user_mode(regs)) { + /* Kernel thread */ + func = (void *)regs->gprs[9]; + func((void *)regs->gprs[10]); } - /* Halt the cpu and keep track of cpu time accounting. */ - vtime_stop_cpu(); + clear_pt_regs_flag(regs, PIF_SYSCALL); + syscall_exit_to_user_mode(regs); } -void arch_cpu_idle_exit(void) +void flush_thread(void) { - if (test_thread_flag(TIF_MCCK_PENDING)) - s390_handle_mcck(); } -void arch_cpu_idle_dead(void) +void arch_setup_new_exec(void) { - cpu_die(); + if (get_lowcore()->current_pid != current->pid) { + get_lowcore()->current_pid = current->pid; + if (test_facility(40)) + lpp(&get_lowcore()->lpp); + } } -extern void __kprobes kernel_thread_starter(void); - -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) +void arch_release_task_struct(struct task_struct *tsk) { - exit_thread_runtime_instr(); + runtime_instr_release(tsk); + guarded_storage_release(tsk); } -void flush_thread(void) +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { -} + save_user_fpu_regs(); -void release_thread(struct task_struct *dead_task) -{ + *dst = *src; + dst->thread.kfpu_flags = 0; + + /* + * Don't transfer over the runtime instrumentation or the guarded + * storage control block pointers. These fields are cleared here instead + * of in copy_thread() to avoid premature freeing of associated memory + * on fork() failure. Wait to clear the RI flag because ->stack still + * refers to the source thread. + */ + dst->thread.ri_cb = NULL; + dst->thread.gs_cb = NULL; + dst->thread.gs_bc_cb = NULL; + + return 0; } -int copy_thread(unsigned long clone_flags, unsigned long new_stackp, - unsigned long arg, struct task_struct *p) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { - struct thread_info *ti; + u64 clone_flags = args->flags; + unsigned long new_stackp = args->stack; + unsigned long tls = args->tls; struct fake_frame { struct stack_frame sf; @@ -117,158 +119,130 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); /* Don't copy debug registers */ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); clear_tsk_thread_flag(p, TIF_SINGLE_STEP); - clear_tsk_thread_flag(p, TIF_PER_TRAP); + p->thread.per_flags = 0; /* Initialize per thread user and system timer values */ - ti = task_thread_info(p); - ti->user_timer = 0; - ti->system_timer = 0; + p->thread.user_timer = 0; + p->thread.guest_timer = 0; + p->thread.system_timer = 0; + p->thread.hardirq_timer = 0; + p->thread.softirq_timer = 0; + p->thread.last_break = 1; frame->sf.back_chain = 0; + frame->sf.gprs[11 - 6] = (unsigned long)&frame->childregs; + frame->sf.gprs[12 - 6] = (unsigned long)p; /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long) ret_from_fork; + frame->sf.gprs[14 - 6] = (unsigned long)ret_from_fork; /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long) frame; + frame->sf.gprs[15 - 6] = (unsigned long)frame; /* Store access registers to kernel stack of new process. */ - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(args->fn)) { /* kernel thread */ memset(&frame->childregs, 0, sizeof(struct pt_regs)); - frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - frame->childregs.psw.addr = PSW_ADDR_AMODE | - (unsigned long) kernel_thread_starter; - frame->childregs.gprs[9] = new_stackp; /* function */ - frame->childregs.gprs[10] = arg; - frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | + PSW_MASK_EXT | PSW_MASK_MCHECK; + frame->childregs.gprs[9] = (unsigned long)args->fn; + frame->childregs.gprs[10] = (unsigned long)args->fn_arg; frame->childregs.orig_gpr2 = -1; - + frame->childregs.last_break = 1; return 0; } frame->childregs = *current_pt_regs(); frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ + frame->childregs.flags = 0; if (new_stackp) frame->childregs.gprs[15] = new_stackp; - - /* Don't copy runtime instrumentation info */ - p->thread.ri_cb = NULL; - p->thread.ri_signum = 0; - frame->childregs.psw.mask &= ~PSW_MASK_RI; - -#ifndef CONFIG_64BIT /* - * save fprs to current->thread.fp_regs to merge them with - * the emulated registers and then copy the result to the child. + * Clear the runtime instrumentation flag after the above childregs + * copy. The CB pointer was already cleared in arch_dup_task_struct(). */ - save_fp_regs(¤t->thread.fp_regs); - memcpy(&p->thread.fp_regs, ¤t->thread.fp_regs, - sizeof(s390_fp_regs)); - /* Set a new TLS ? */ - if (clone_flags & CLONE_SETTLS) - p->thread.acrs[0] = frame->childregs.gprs[6]; -#else /* CONFIG_64BIT */ - /* Save the fpu registers to new thread structure. */ - save_fp_regs(&p->thread.fp_regs); + frame->childregs.psw.mask &= ~PSW_MASK_RI; + /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { - unsigned long tls = frame->childregs.gprs[6]; - if (is_compat_task()) { - p->thread.acrs[0] = (unsigned int)tls; - } else { - p->thread.acrs[0] = (unsigned int)(tls >> 32); - p->thread.acrs[1] = (unsigned int)tls; - } + p->thread.acrs[0] = (unsigned int)(tls >> 32); + p->thread.acrs[1] = (unsigned int)tls; } -#endif /* CONFIG_64BIT */ + /* + * s390 stores the svc return address in arch_data when calling + * sigreturn()/restart_syscall() via vdso. 1 means no valid address + * stored. + */ + p->restart_block.arch_data = 1; return 0; } -asmlinkage void execve_tail(void) +void execve_tail(void) { - current->thread.fp_regs.fpc = 0; - if (MACHINE_HAS_IEEE) - asm volatile("sfpc %0,%0" : : "d" (0)); + current->thread.ufpu.fpc = 0; + fpu_sfpc(0); } -/* - * fill in the FPU structure for a core dump. - */ -int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) +struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) { -#ifndef CONFIG_64BIT - /* - * save fprs to current->thread.fp_regs to merge them with - * the emulated registers and then copy the result to the dump. - */ - save_fp_regs(¤t->thread.fp_regs); - memcpy(fpregs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); -#else /* CONFIG_64BIT */ - save_fp_regs(fpregs); -#endif /* CONFIG_64BIT */ - return 1; + save_user_fpu_regs(); + save_kernel_fpu_regs(&prev->thread); + save_access_regs(&prev->thread.acrs[0]); + save_ri_cb(prev->thread.ri_cb); + save_gs_cb(prev->thread.gs_cb); + update_cr_regs(next); + restore_kernel_fpu_regs(&next->thread); + restore_access_regs(&next->thread.acrs[0]); + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); + restore_gs_cb(next->thread.gs_cb); + return __switch_to_asm(prev, next); } -EXPORT_SYMBOL(dump_fpu); -unsigned long get_wchan(struct task_struct *p) +unsigned long __get_wchan(struct task_struct *p) { - struct stack_frame *sf, *low, *high; - unsigned long return_address; - int count; + struct unwind_state state; + unsigned long ip = 0; - if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p)) + if (!task_stack_page(p)) return 0; - low = task_stack_page(p); - high = (struct stack_frame *) task_pt_regs(p); - sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN); - if (sf <= low || sf > high) + + if (!try_get_task_stack(p)) return 0; - for (count = 0; count < 16; count++) { - sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN); - if (sf <= low || sf > high) - return 0; - return_address = sf->gprs[8] & PSW_ADDR_INSN; - if (!in_sched_functions(return_address)) - return return_address; + + unwind_for_each_frame(&state, p, NULL, 0) { + if (state.stack_info.type != STACK_TYPE_TASK) { + ip = 0; + break; + } + + ip = unwind_get_return_address(&state); + if (!ip) + break; + + if (!in_sched_functions(ip)) + break; } - return 0; + + put_task_stack(p); + return ip; } unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; + sp -= get_random_u32_below(PAGE_SIZE); return sp & ~0xf; } static inline unsigned long brk_rnd(void) { - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; - else - return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; + return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT; } unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - return ret; -} - -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); + unsigned long ret; - if (!(current->flags & PF_RANDOMIZE)) - return base; - if (ret < base) - return base; - return ret; + ret = PAGE_ALIGN(mm->brk + brk_rnd()); + return (ret > mm->brk) ? ret : mm->brk; } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 753c41d0ffd3..e33a3eccda56 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -1,91 +1,381 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt +#include <linux/stop_machine.h> +#include <linux/cpufeature.h> +#include <linux/bitops.h> #include <linux/kernel.h> +#include <linux/random.h> +#include <linux/sched/mm.h> #include <linux/init.h> -#include <linux/smp.h> #include <linux/seq_file.h> +#include <linux/mm_types.h> #include <linux/delay.h> #include <linux/cpu.h> +#include <linux/smp.h> +#include <asm/text-patching.h> +#include <asm/machine.h> +#include <asm/diag.h> +#include <asm/facility.h> #include <asm/elf.h> #include <asm/lowcore.h> #include <asm/param.h> +#include <asm/sclp.h> +#include <asm/smp.h> + +unsigned long __read_mostly elf_hwcap; +char elf_platform[ELF_PLATFORM_SIZE]; + +struct cpu_info { + unsigned int cpu_mhz_dynamic; + unsigned int cpu_mhz_static; + struct cpuid cpu_id; +}; + +static DEFINE_PER_CPU(struct cpu_info, cpu_info); +static DEFINE_PER_CPU(int, cpu_relax_retry); + +static bool machine_has_cpu_mhz; + +void __init cpu_detect_mhz_feature(void) +{ + if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL) + machine_has_cpu_mhz = true; +} + +static void update_cpu_mhz(void *arg) +{ + unsigned long mhz; + struct cpu_info *c; + + mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + c = this_cpu_ptr(&cpu_info); + c->cpu_mhz_dynamic = mhz >> 32; + c->cpu_mhz_static = mhz & 0xffffffff; +} + +void s390_update_cpu_mhz(void) +{ + s390_adjust_jiffies(); + if (machine_has_cpu_mhz) + on_each_cpu(update_cpu_mhz, NULL, 0); +} + +void notrace stop_machine_yield(const struct cpumask *cpumask) +{ + int cpu, this_cpu; + + this_cpu = smp_processor_id(); + if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { + __this_cpu_write(cpu_relax_retry, 0); + cpu = cpumask_next_wrap(this_cpu, cpumask); + if (cpu >= nr_cpu_ids) + return; + if (arch_vcpu_is_preempted(cpu)) + smp_yield_cpu(cpu); + } +} + +static void do_sync_core(void *info) +{ + sync_core(); +} + +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} -static DEFINE_PER_CPU(struct cpuid, cpu_id); +void text_poke_sync_lock(void) +{ + cpus_read_lock(); + text_poke_sync(); + cpus_read_unlock(); +} /* * cpu_init - initializes state that is per-CPU. */ -void __cpuinit cpu_init(void) +void cpu_init(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - struct cpuid *id = &__get_cpu_var(cpu_id); + struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id); get_cpu_id(id); - atomic_inc(&init_mm.mm_count); + if (machine_has_cpu_mhz) + update_cpu_mhz(NULL); + mmgrab(&init_mm); current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - memset(idle, 0, sizeof(*idle)); } -/* - * show_cpuinfo - Get information on one CPU for use by procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) +static void show_facilities(struct seq_file *m) +{ + unsigned int bit; + + seq_puts(m, "facilities :"); + for_each_set_bit_inv(bit, (long *)&stfle_fac_list, MAX_FACILITY_BIT) + seq_printf(m, " %d", bit); + seq_putc(m, '\n'); +} + +static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { - "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te" + [HWCAP_NR_ESAN3] = "esan3", + [HWCAP_NR_ZARCH] = "zarch", + [HWCAP_NR_STFLE] = "stfle", + [HWCAP_NR_MSA] = "msa", + [HWCAP_NR_LDISP] = "ldisp", + [HWCAP_NR_EIMM] = "eimm", + [HWCAP_NR_DFP] = "dfp", + [HWCAP_NR_HPAGE] = "edat", + [HWCAP_NR_ETF3EH] = "etf3eh", + [HWCAP_NR_HIGH_GPRS] = "highgprs", + [HWCAP_NR_TE] = "te", + [HWCAP_NR_VXRS] = "vx", + [HWCAP_NR_VXRS_BCD] = "vxd", + [HWCAP_NR_VXRS_EXT] = "vxe", + [HWCAP_NR_GS] = "gs", + [HWCAP_NR_VXRS_EXT2] = "vxe2", + [HWCAP_NR_VXRS_PDE] = "vxp", + [HWCAP_NR_SORT] = "sort", + [HWCAP_NR_DFLT] = "dflt", + [HWCAP_NR_VXRS_PDE2] = "vxp2", + [HWCAP_NR_NNPA] = "nnpa", + [HWCAP_NR_PCI_MIO] = "pcimio", + [HWCAP_NR_SIE] = "sie", }; - unsigned long n = (unsigned long) v - 1; - int i; - - if (!n) { - s390_adjust_jiffies(); - seq_printf(m, "vendor_id : IBM/S390\n" - "# processors : %i\n" - "bogomips per cpu: %lu.%02lu\n", - num_online_cpus(), loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ))%100); - seq_puts(m, "features\t: "); - for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) - if (hwcap_str[i] && (elf_hwcap & (1UL << i))) - seq_printf(m, "%s ", hwcap_str[i]); - seq_puts(m, "\n"); - show_cacheinfo(m); - } - get_online_cpus(); - if (cpu_online(n)) { - struct cpuid *id = &per_cpu(cpu_id, n); - seq_printf(m, "processor %li: " + int i, cpu; + + BUILD_BUG_ON(ARRAY_SIZE(hwcap_str) != HWCAP_NR_MAX); + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_printf(m, "max thread id : %d\n", smp_cpu_mtid); + seq_puts(m, "features\t: "); + for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + seq_puts(m, "\n"); + show_facilities(m); + show_cacheinfo(m); + for_each_online_cpu(cpu) { + struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu); + + seq_printf(m, "processor %d: " "version = %02X, " "identification = %06X, " "machine = %04X\n", - n, id->version, id->ident, id->machine); + cpu, id->version, id->ident, id->machine); + } +} + +static int __init setup_hwcaps(void) +{ + /* instructions named N3, "backported" to esa-mode */ + elf_hwcap |= HWCAP_ESAN3; + + /* z/Architecture mode active */ + elf_hwcap |= HWCAP_ZARCH; + + /* store-facility-list-extended */ + if (test_facility(7)) + elf_hwcap |= HWCAP_STFLE; + + /* message-security assist */ + if (test_facility(17)) + elf_hwcap |= HWCAP_MSA; + + /* long-displacement */ + if (test_facility(19)) + elf_hwcap |= HWCAP_LDISP; + + /* extended-immediate */ + elf_hwcap |= HWCAP_EIMM; + + /* extended-translation facility 3 enhancement */ + if (test_facility(22) && test_facility(30)) + elf_hwcap |= HWCAP_ETF3EH; + + /* decimal floating point & perform floating point operation */ + if (test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_DFP; + + /* huge page support */ + if (cpu_has_edat1()) + elf_hwcap |= HWCAP_HPAGE; + + /* 64-bit register support for 31-bit processes */ + elf_hwcap |= HWCAP_HIGH_GPRS; + + /* transactional execution */ + if (machine_has_tx()) + elf_hwcap |= HWCAP_TE; + + /* vector */ + if (test_facility(129)) { + elf_hwcap |= HWCAP_VXRS; + if (test_facility(134)) + elf_hwcap |= HWCAP_VXRS_BCD; + if (test_facility(135)) + elf_hwcap |= HWCAP_VXRS_EXT; + if (test_facility(148)) + elf_hwcap |= HWCAP_VXRS_EXT2; + if (test_facility(152)) + elf_hwcap |= HWCAP_VXRS_PDE; + if (test_facility(192)) + elf_hwcap |= HWCAP_VXRS_PDE2; + } + + if (test_facility(150)) + elf_hwcap |= HWCAP_SORT; + + if (test_facility(151)) + elf_hwcap |= HWCAP_DFLT; + + if (test_facility(165)) + elf_hwcap |= HWCAP_NNPA; + + /* guarded storage */ + if (cpu_has_gs()) + elf_hwcap |= HWCAP_GS; + + if (test_machine_feature(MFEATURE_PCI_MIO)) + elf_hwcap |= HWCAP_PCI_MIO; + + /* virtualization support */ + if (sclp.has_sief2) + elf_hwcap |= HWCAP_SIE; + + return 0; +} +arch_initcall(setup_hwcaps); + +static int __init setup_elf_platform(void) +{ + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + add_device_randomness(&cpu_id, sizeof(cpu_id)); + switch (cpu_id.machine) { + default: /* Use "z10" as default. */ + strscpy(elf_platform, "z10"); + break; + case 0x2817: + case 0x2818: + strscpy(elf_platform, "z196"); + break; + case 0x2827: + case 0x2828: + strscpy(elf_platform, "zEC12"); + break; + case 0x2964: + case 0x2965: + strscpy(elf_platform, "z13"); + break; + case 0x3906: + case 0x3907: + strscpy(elf_platform, "z14"); + break; + case 0x8561: + case 0x8562: + strscpy(elf_platform, "z15"); + break; + case 0x3931: + case 0x3932: + strscpy(elf_platform, "z16"); + break; + case 0x9175: + case 0x9176: + strscpy(elf_platform, "z17"); + break; } - put_online_cpus(); return 0; } +arch_initcall(setup_elf_platform); + +static void show_cpu_topology(struct seq_file *m, unsigned long n) +{ +#ifdef CONFIG_SCHED_TOPOLOGY + seq_printf(m, "physical id : %d\n", topology_physical_package_id(n)); + seq_printf(m, "core id : %d\n", topology_core_id(n)); + seq_printf(m, "book id : %d\n", topology_book_id(n)); + seq_printf(m, "drawer id : %d\n", topology_drawer_id(n)); + seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n)); + seq_printf(m, "address : %d\n", smp_cpu_get_cpu_address(n)); + seq_printf(m, "siblings : %d\n", cpumask_weight(topology_core_cpumask(n))); + seq_printf(m, "cpu cores : %d\n", topology_booted_cores(n)); +#endif /* CONFIG_SCHED_TOPOLOGY */ +} + +static void show_cpu_ids(struct seq_file *m, unsigned long n) +{ + struct cpuid *id = &per_cpu(cpu_info.cpu_id, n); + + seq_printf(m, "version : %02X\n", id->version); + seq_printf(m, "identification : %06X\n", id->ident); + seq_printf(m, "machine : %04X\n", id->machine); +} + +static void show_cpu_mhz(struct seq_file *m, unsigned long n) +{ + struct cpu_info *c = per_cpu_ptr(&cpu_info, n); + + if (!machine_has_cpu_mhz) + return; + seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic); + seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static); +} + +/* + * show_cpuinfo - Get information on one CPU for use by procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long n = (unsigned long) v - 1; + unsigned long first = cpumask_first(cpu_online_mask); + + if (n == first) + show_cpu_summary(m, v); + seq_printf(m, "\ncpu number : %ld\n", n); + show_cpu_topology(m, n); + show_cpu_ids(m, n); + show_cpu_mhz(m, n); + return 0; +} + +static inline void *c_update(loff_t *pos) +{ + if (*pos) + *pos = cpumask_next(*pos - 1, cpu_online_mask); + else + *pos = cpumask_first(cpu_online_mask); + return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL; +} static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; + cpus_read_lock(); + return c_update(pos); } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { ++*pos; - return c_start(m, pos); + return c_update(pos); } static void c_stop(struct seq_file *m, void *v) { + cpus_read_unlock(); } const struct seq_operations cpuinfo_op = { @@ -94,4 +384,3 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; - diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index a314c57f4e94..ceaa1726e328 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Ptrace user space interface. * @@ -8,6 +9,8 @@ #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> @@ -18,96 +21,117 @@ #include <linux/signal.h> #include <linux/elf.h> #include <linux/regset.h> -#include <linux/tracehook.h> #include <linux/seccomp.h> -#include <linux/compat.h> #include <trace/syscall.h> -#include <asm/segment.h> +#include <asm/guarded_storage.h> +#include <asm/access-regs.h> #include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unistd.h> -#include <asm/switch_to.h> -#include "entry.h" - -#ifdef CONFIG_COMPAT -#include "compat_ptrace.h" -#endif +#include <asm/runtime_instr.h> +#include <asm/facility.h> +#include <asm/machine.h> +#include <asm/ptrace.h> +#include <asm/rwonce.h> +#include <asm/fpu.h> -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - -enum s390_regset { - REGSET_GENERAL, - REGSET_FP, - REGSET_LAST_BREAK, - REGSET_TDB, - REGSET_SYSTEM_CALL, - REGSET_GENERAL_EXTENDED, -}; +#include "entry.h" -void update_per_regs(struct task_struct *task) +void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - struct per_regs old, new; - -#ifdef CONFIG_64BIT + union ctlreg0 cr0_old, cr0_new; + union ctlreg2 cr2_old, cr2_new; + int cr0_changed, cr2_changed; + union { + struct ctlreg regs[3]; + struct { + struct ctlreg control; + struct ctlreg start; + struct ctlreg end; + }; + } old, new; + + local_ctl_store(0, &cr0_old.reg); + local_ctl_store(2, &cr2_old.reg); + cr0_new = cr0_old; + cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { - unsigned long cr0, cr0_new; - - __ctl_store(cr0, 0, 0); - /* set or clear transaction execution bits 8 and 9. */ + if (machine_has_tx()) { + /* Set or clear transaction execution TXC bit 8. */ + cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) - cr0_new = cr0 & ~(3UL << 54); - else - cr0_new = cr0 | (3UL << 54); - /* Only load control register 0 if necessary. */ - if (cr0 != cr0_new) - __ctl_load(cr0_new, 0, 0); + cr0_new.tcx = 0; + /* Set or clear transaction execution TDC bits 62 and 63. */ + cr2_new.tdc = 0; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) + cr2_new.tdc = 1; + else + cr2_new.tdc = 2; + } + } + /* Take care of enable/disable of guarded storage. */ + if (cpu_has_gs()) { + cr2_new.gse = 0; + if (task->thread.gs_cb) + cr2_new.gse = 1; } -#endif + /* Load control register 0/2 iff changed */ + cr0_changed = cr0_new.val != cr0_old.val; + cr2_changed = cr2_new.val != cr2_old.val; + if (cr0_changed) + local_ctl_load(0, &cr0_new.reg); + if (cr2_changed) + local_ctl_load(2, &cr2_new.reg); /* Copy user specified PER registers */ - new.control = thread->per_user.control; - new.start = thread->per_user.start; - new.end = thread->per_user.end; + new.control.val = thread->per_user.control; + new.start.val = thread->per_user.start; + new.end.val = thread->per_user.end; /* merge TIF_SINGLE_STEP into user specified PER registers. */ - if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { - new.control |= PER_EVENT_IFETCH; -#ifdef CONFIG_64BIT - new.control |= PER_CONTROL_SUSPENSION; - new.control |= PER_EVENT_TRANSACTION_END; -#endif - new.start = 0; - new.end = PSW_ADDR_INSN; + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) || + test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) { + if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) + new.control.val |= PER_EVENT_BRANCH; + else + new.control.val |= PER_EVENT_IFETCH; + new.control.val |= PER_CONTROL_SUSPENSION; + new.control.val |= PER_EVENT_TRANSACTION_END; + if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) + new.control.val |= PER_EVENT_IFETCH; + new.start.val = 0; + new.end.val = -1UL; } /* Take care of the PER enablement bit in the PSW. */ - if (!(new.control & PER_EVENT_MASK)) { + if (!(new.control.val & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; - __ctl_store(old, 9, 11); + __local_ctl_store(9, 11, old.regs); if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(new, 9, 11); + __local_ctl_load(9, 11, new.regs); } void user_enable_single_step(struct task_struct *task) { + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); set_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_per_regs(task); } void user_disable_single_step(struct task_struct *task) { + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_per_regs(task); +} + +void user_enable_block_step(struct task_struct *task) +{ + set_tsk_thread_flag(task, TIF_SINGLE_STEP); + set_tsk_thread_flag(task, TIF_BLOCK_STEP); } /* @@ -124,47 +148,41 @@ void ptrace_disable(struct task_struct *task) task->thread.per_flags = 0; } -#ifndef CONFIG_64BIT -# define __ADDR_MASK 3 -#else -# define __ADDR_MASK 7 -#endif +#define __ADDR_MASK 7 static inline unsigned long __peek_user_per(struct task_struct *child, addr_t addr) { - struct per_struct_kernel *dummy = NULL; - - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* Control bits of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy->cr10) + else if (addr == offsetof(struct per_struct_kernel, cr10)) /* Start address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy->cr11) + else if (addr == offsetof(struct per_struct_kernel, cr11)) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? - PSW_ADDR_INSN : child->thread.per_user.end; - else if (addr == (addr_t) &dummy->bits) + -1UL : child->thread.per_user.end; + else if (addr == offsetof(struct per_struct_kernel, bits)) /* Single-step bit. */ return test_thread_flag(TIF_SINGLE_STEP) ? (1UL << (BITS_PER_LONG - 1)) : 0; - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Start address of the user specified per set. */ return child->thread.per_user.start; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* End address of the user specified per set. */ return child->thread.per_user.end; - else if (addr == (addr_t) &dummy->perc_atmid) + else if (addr == offsetof(struct per_struct_kernel, perc_atmid)) /* PER code, ATMID and AI of the last PER trap */ return (unsigned long) child->thread.per_event.cause << (BITS_PER_LONG - 16); - else if (addr == (addr_t) &dummy->address) + else if (addr == offsetof(struct per_struct_kernel, address)) /* Address of the last PER trap */ return child->thread.per_event.address; - else if (addr == (addr_t) &dummy->access_id) + else if (addr == offsetof(struct per_struct_kernel, access_id)) /* Access id of the last PER trap */ return (unsigned long) child->thread.per_event.paid << (BITS_PER_LONG - 8); @@ -182,63 +200,65 @@ static inline unsigned long __peek_user_per(struct task_struct *child, */ static unsigned long __peek_user(struct task_struct *child, addr_t addr) { - struct user *dummy = NULL; addr_t offset, tmp; - if (addr < (addr_t) &dummy->regs.acrs) { + if (addr < offsetof(struct user, regs.acrs)) { /* * psw and gprs are stored on the stack */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); - if (addr == (addr_t) &dummy->regs.psw.mask) + if (addr == offsetof(struct user, regs.psw.mask)) { /* Return a clean psw mask. */ - tmp = psw_user_bits | (tmp & PSW_MASK_USER); + tmp &= PSW_MASK_USER | PSW_MASK_RI; + tmp |= PSW_USER_BITS; + } - } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; -#ifdef CONFIG_64BIT + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb reading * from acrs[15]. Result is a 64 bit value. Read the * 32 bit acrs[15] value and shift it by 32. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) tmp = ((unsigned long) child->thread.acrs[15]) << 32; else -#endif - tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset); + tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset); - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ tmp = (addr_t) task_pt_regs(child)->orig_gpr2; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent reads of padding hole between * orig_gpr2 and fp_regs on s390. */ tmp = 0; - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { - /* - * floating point regs. are stored in the thread structure + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { + /* + * floating point control reg. is in the thread structure */ - offset = addr - (addr_t) &dummy->regs.fp_regs; - tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset); - if (addr == (addr_t) &dummy->regs.fp_regs.fpc) - tmp &= (unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32); + tmp = child->thread.ufpu.fpc; + tmp <<= BITS_PER_LONG - 32; - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { + /* + * floating point regs. are in the child->thread.ufpu.vxrs array + */ + offset = addr - offsetof(struct user, regs.fp_regs.fprs); + tmp = *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset); + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); tmp = __peek_user_per(child, addr); } else @@ -257,11 +277,9 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell... */ mask = __ADDR_MASK; -#ifdef CONFIG_64BIT - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; -#endif if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -272,8 +290,6 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) static inline void __poke_user_per(struct task_struct *child, addr_t addr, addr_t data) { - struct per_struct_kernel *dummy = NULL; - /* * There are only three fields in the per_info struct that the * debugger user can write to. @@ -286,14 +302,14 @@ static inline void __poke_user_per(struct task_struct *child, * addresses are used only if single stepping is not in effect. * Writes to any other field in per_info are ignored. */ - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* PER event mask of the user specified per set. */ child->thread.per_user.control = data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Starting address of the user specified per set. */ child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* Ending address of the user specified per set. */ child->thread.per_user.end = data; } @@ -306,67 +322,84 @@ static inline void __poke_user_per(struct task_struct *child, */ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; addr_t offset; - if (addr < (addr_t) &dummy->regs.acrs) { + + if (addr < offsetof(struct user, regs.acrs)) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy->regs.psw.mask && - ((data & ~PSW_MASK_USER) != psw_user_bits || - ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) - /* Invalid psw mask. */ - return -EINVAL; - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + if (addr == offsetof(struct user, regs.psw.mask)) { + unsigned long mask = PSW_MASK_USER; + + mask |= is_ri_task(child) ? PSW_MASK_RI : 0; + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; + if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ + return -EINVAL; + } - } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) { + struct pt_regs *regs = task_pt_regs(child); + + regs->int_code = 0x20000 | (data & 0xffff); + } + *(addr_t *)((addr_t) ®s->psw + addr) = data; + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; -#ifdef CONFIG_64BIT + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb writing * to acrs[15] with a 64 bit value. Ignore the lower * half of the value and write the upper 32 bit to * acrs[15]. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) child->thread.acrs[15] = (unsigned int) (data >> 32); else -#endif - *(addr_t *)((addr_t) &child->thread.acrs + offset) = data; + *(addr_t *)((addr_t) &child->thread.acrs + offset) = data; - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ task_pt_regs(child)->orig_gpr2 = data; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent writes of padding hole between * orig_gpr2 and fp_regs on s390. */ return 0; - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { /* - * floating point regs. are stored in the thread structure + * floating point control reg. is in the thread structure */ - if (addr == (addr_t) &dummy->regs.fp_regs.fpc && - (data & ~((unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32))) != 0) + if ((unsigned int)data != 0) return -EINVAL; - offset = addr - (addr_t) &dummy->regs.fp_regs; - *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data; + child->thread.ufpu.fpc = data >> (BITS_PER_LONG - 32); - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { + /* + * floating point regs. are in the child->thread.ufpu.vxrs array + */ + offset = addr - offsetof(struct user, regs.fp_regs.fprs); + *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = data; + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); __poke_user_per(child, addr, data); } @@ -383,11 +416,9 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell indeed... */ mask = __ADDR_MASK; -#ifdef CONFIG_64BIT - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; -#endif if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -435,365 +466,40 @@ long arch_ptrace(struct task_struct *child, long request, } return 0; case PTRACE_GET_LAST_BREAK: - put_user(task_thread_info(child)->last_break, - (unsigned long __user *) data); - return 0; + return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags &= ~PER_FLAG_NO_TE; return 0; case PTRACE_DISABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; return 0; - default: - /* Removing high order bit from addr (only for 31 bit). */ - addr &= PSW_ADDR_INSN; - return ptrace_request(child, request, addr, data); - } -} - -#ifdef CONFIG_COMPAT -/* - * Now the fun part starts... a 31 bit program running in the - * 31 bit emulation tracing another program. PTRACE_PEEKTEXT, - * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy - * to handle, the difference to the 64 bit versions of the requests - * is that the access is done in multiples of 4 byte instead of - * 8 bytes (sizeof(unsigned long) on 31/64 bit). - * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA, - * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program - * is a 31 bit program too, the content of struct user can be - * emulated. A 31 bit program peeking into the struct user of - * a 64 bit program is a no-no. - */ - -/* - * Same as peek_user_per but for a 31 bit program. - */ -static inline __u32 __peek_user_per_compat(struct task_struct *child, - addr_t addr) -{ - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) - /* Control bits of the active per set. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy32->cr10) - /* Start address of the active per set. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->cr11) - /* End address of the active per set. */ - return test_thread_flag(TIF_SINGLE_STEP) ? - PSW32_ADDR_INSN : child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->bits) - /* Single-step bit. */ - return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? - 0x80000000 : 0; - else if (addr == (addr_t) &dummy32->starting_addr) - /* Start address of the user specified per set. */ - return (__u32) child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->ending_addr) - /* End address of the user specified per set. */ - return (__u32) child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->perc_atmid) - /* PER code, ATMID and AI of the last PER trap */ - return (__u32) child->thread.per_event.cause << 16; - else if (addr == (addr_t) &dummy32->address) - /* Address of the last PER trap */ - return (__u32) child->thread.per_event.address; - else if (addr == (addr_t) &dummy32->access_id) - /* Access id of the last PER trap */ - return (__u32) child->thread.per_event.paid << 24; - return 0; -} - -/* - * Same as peek_user but for a 31 bit program. - */ -static u32 __peek_user_compat(struct task_struct *child, addr_t addr) -{ - struct compat_user *dummy32 = NULL; - addr_t offset; - __u32 tmp; - - if (addr < (addr_t) &dummy32->regs.acrs) { - struct pt_regs *regs = task_pt_regs(child); - /* - * psw and gprs are stored on the stack - */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { - /* Fake a 31 bit psw mask. */ - tmp = (__u32)(regs->psw.mask >> 32); - tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { - /* Fake a 31 bit psw address. */ - tmp = (__u32) regs->psw.addr | - (__u32)(regs->psw.mask & PSW_MASK_BA); - } else { - /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); - } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { - /* - * access registers are stored in the thread structure - */ - offset = addr - (addr_t) &dummy32->regs.acrs; - tmp = *(__u32*)((addr_t) &child->thread.acrs + offset); - - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { - /* - * orig_gpr2 is stored on the kernel stack - */ - tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); - - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { - /* - * prevent reads of padding hole between - * orig_gpr2 and fp_regs on s390. - */ - tmp = 0; - - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { - /* - * floating point regs. are stored in the thread structure - */ - offset = addr - (addr_t) &dummy32->regs.fp_regs; - tmp = *(__u32 *)((addr_t) &child->thread.fp_regs + offset); - - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { - /* - * Handle access to the per_info structure. - */ - addr -= (addr_t) &dummy32->regs.per_info; - tmp = __peek_user_per_compat(child, addr); - - } else - tmp = 0; - - return tmp; -} - -static int peek_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - __u32 tmp; - - if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3) - return -EIO; - - tmp = __peek_user_compat(child, addr); - return put_user(tmp, (__u32 __user *) data); -} - -/* - * Same as poke_user_per but for a 31 bit program. - */ -static inline void __poke_user_per_compat(struct task_struct *child, - addr_t addr, __u32 data) -{ - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) - /* PER event mask of the user specified per set. */ - child->thread.per_user.control = - data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy32->starting_addr) - /* Starting address of the user specified per set. */ - child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy32->ending_addr) - /* Ending address of the user specified per set. */ - child->thread.per_user.end = data; -} - -/* - * Same as poke_user but for a 31 bit program. - */ -static int __poke_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - struct compat_user *dummy32 = NULL; - __u32 tmp = (__u32) data; - addr_t offset; - - if (addr < (addr_t) &dummy32->regs.acrs) { - struct pt_regs *regs = task_pt_regs(child); - /* - * psw, gprs, acrs and orig_gpr2 are stored on the stack - */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { - /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) - /* Invalid psw mask. */ - return -EINVAL; - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (regs->psw.mask & PSW_MASK_BA) | - (__u64)(tmp & PSW32_MASK_USER) << 32; - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { - /* Build a 64 bit psw address from 31 bit address. */ - regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; - /* Transfer 31 bit amode bit to psw mask. */ - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | - (__u64)(tmp & PSW32_ADDR_AMODE); - } else { - /* gpr 0-15 */ - *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; - } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { - /* - * access registers are stored in the thread structure - */ - offset = addr - (addr_t) &dummy32->regs.acrs; - *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp; - - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { - /* - * orig_gpr2 is stored on the kernel stack - */ - *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; - - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { - /* - * prevent writess of padding hole between - * orig_gpr2 and fp_regs on s390. - */ - return 0; - - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { - /* - * floating point regs. are stored in the thread structure - */ - if (addr == (addr_t) &dummy32->regs.fp_regs.fpc && - (tmp & ~FPC_VALID_MASK) != 0) - /* Invalid floating point control. */ + case PTRACE_TE_ABORT_RAND: + if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE)) + return -EIO; + switch (data) { + case 0UL: + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + break; + case 1UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND; + break; + case 2UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND; + break; + default: return -EINVAL; - offset = addr - (addr_t) &dummy32->regs.fp_regs; - *(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp; - - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { - /* - * Handle access to the per_info structure. - */ - addr -= (addr_t) &dummy32->regs.per_info; - __poke_user_per_compat(child, addr, data); - } - - return 0; -} - -static int poke_user_compat(struct task_struct *child, - addr_t addr, addr_t data) -{ - if (!is_compat_task() || (addr & 3) || - addr > sizeof(struct compat_user) - 3) - return -EIO; - - return __poke_user_compat(child, addr, data); -} - -long compat_arch_ptrace(struct task_struct *child, compat_long_t request, - compat_ulong_t caddr, compat_ulong_t cdata) -{ - unsigned long addr = caddr; - unsigned long data = cdata; - compat_ptrace_area parea; - int copied, ret; - - switch (request) { - case PTRACE_PEEKUSR: - /* read the word at location addr in the USER area. */ - return peek_user_compat(child, addr, data); - - case PTRACE_POKEUSR: - /* write the word at location addr in the USER area */ - return poke_user_compat(child, addr, data); - - case PTRACE_PEEKUSR_AREA: - case PTRACE_POKEUSR_AREA: - if (copy_from_user(&parea, (void __force __user *) addr, - sizeof(parea))) - return -EFAULT; - addr = parea.kernel_addr; - data = parea.process_addr; - copied = 0; - while (copied < parea.len) { - if (request == PTRACE_PEEKUSR_AREA) - ret = peek_user_compat(child, addr, data); - else { - __u32 utmp; - if (get_user(utmp, - (__u32 __force __user *) data)) - return -EFAULT; - ret = poke_user_compat(child, addr, utmp); - } - if (ret) - return ret; - addr += sizeof(unsigned int); - data += sizeof(unsigned int); - copied += sizeof(unsigned int); } return 0; - case PTRACE_GET_LAST_BREAK: - put_user(task_thread_info(child)->last_break, - (unsigned int __user *) data); - return 0; - } - return compat_ptrace_request(child, request, addr, data); -} -#endif - -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) -{ - long ret = 0; - - /* Do the secure computing check first. */ - if (secure_computing(regs->gprs[2])) { - /* seccomp failures shouldn't expose any additional code. */ - ret = -1; - goto out; - } - - /* - * The sysc_tracesys code in entry.S stored the system - * call number to gprs[2]. - */ - if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { - /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip - * the system call and the system call restart handling. - */ - clear_thread_flag(TIF_SYSCALL); - ret = -1; + default: + return ptrace_request(child, request, addr, data); } - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gprs[2]); - - audit_syscall_entry(is_compat_task() ? - AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, - regs->gprs[2], regs->orig_gpr2, - regs->gprs[3], regs->gprs[4], - regs->gprs[5]); -out: - return ret ?: regs->gprs[2]; -} - -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) -{ - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->gprs[2]); - - if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); } /* @@ -802,28 +508,14 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) static int s390_regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { + unsigned pos; if (target == current) save_access_regs(target->thread.acrs); - if (kbuf) { - unsigned long *k = kbuf; - while (count > 0) { - *k++ = __peek_user(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - unsigned long __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long)) + membuf_store(&to, __peek_user(target, pos)); return 0; } @@ -864,14 +556,18 @@ static int s390_regs_set(struct task_struct *target, } static int s390_fpregs_get(struct task_struct *target, - const struct user_regset *regset, unsigned int pos, - unsigned int count, void *kbuf, void __user *ubuf) + const struct user_regset *regset, + struct membuf to) { + _s390_fp_regs fp_regs; + if (target == current) - save_fp_regs(&target->thread.fp_regs); + save_user_fpu_regs(); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_regs, 0, -1); + fp_regs.fpc = target->thread.ufpu.fpc; + fpregs_store(&fp_regs, &target->thread.ufpu); + + return membuf_write(&to, &fp_regs, sizeof(fp_regs)); } static int s390_fpregs_set(struct task_struct *target, @@ -880,51 +576,36 @@ static int s390_fpregs_set(struct task_struct *target, const void __user *ubuf) { int rc = 0; + freg_t fprs[__NUM_FPRS]; if (target == current) - save_fp_regs(&target->thread.fp_regs); - - /* If setting FPC, must validate it first. */ + save_user_fpu_regs(); + convert_vx_to_fp(fprs, target->thread.ufpu.vxrs); if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { - u32 fpc[2] = { target->thread.fp_regs.fpc, 0 }; - rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpc, + u32 ufpc[2] = { target->thread.ufpu.fpc, 0 }; + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, 0, offsetof(s390_fp_regs, fprs)); if (rc) return rc; - if ((fpc[0] & ~FPC_VALID_MASK) != 0 || fpc[1] != 0) + if (ufpc[1] != 0) return -EINVAL; - target->thread.fp_regs.fpc = fpc[0]; + target->thread.ufpu.fpc = ufpc[0]; } if (rc == 0 && count > 0) rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.fp_regs.fprs, - offsetof(s390_fp_regs, fprs), -1); - - if (rc == 0 && target == current) - restore_fp_regs(&target->thread.fp_regs); - + fprs, offsetof(s390_fp_regs, fprs), -1); + if (rc) + return rc; + convert_fp_to_vx(target->thread.ufpu.vxrs, fprs); return rc; } -#ifdef CONFIG_64BIT - static int s390_last_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (count > 0) { - if (kbuf) { - unsigned long *k = kbuf; - *k = task_thread_info(target)->last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(task_thread_info(target)->last_break, u)) - return -EFAULT; - } - } - return 0; + return membuf_store(&to, target->thread.last_break); } static int s390_last_break_set(struct task_struct *target, @@ -937,16 +618,15 @@ static int s390_last_break_set(struct task_struct *target, static int s390_tdb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); - unsigned char *data; + size_t size; if (!(regs->int_code & 0x200)) return -ENODATA; - data = target->thread.trap_tdb; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); + size = sizeof(target->thread.trap_tdb.data); + return membuf_write(&to, target->thread.trap_tdb.data, size); } static int s390_tdb_set(struct task_struct *target, @@ -957,16 +637,80 @@ static int s390_tdb_set(struct task_struct *target, return 0; } -#endif +static int s390_vxrs_low_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + if (!cpu_has_vx()) + return -ENODEV; + if (target == current) + save_user_fpu_regs(); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = target->thread.ufpu.vxrs[i].low; + return membuf_write(&to, vxrs, sizeof(vxrs)); +} + +static int s390_vxrs_low_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i, rc; + + if (!cpu_has_vx()) + return -ENODEV; + if (target == current) + save_user_fpu_regs(); + + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = target->thread.ufpu.vxrs[i].low; + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + if (rc == 0) + for (i = 0; i < __NUM_VXRS_LOW; i++) + target->thread.ufpu.vxrs[i].low = vxrs[i]; + + return rc; +} + +static int s390_vxrs_high_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!cpu_has_vx()) + return -ENODEV; + if (target == current) + save_user_fpu_regs(); + return membuf_write(&to, target->thread.ufpu.vxrs + __NUM_VXRS_LOW, + __NUM_VXRS_HIGH * sizeof(__vector128)); +} + +static int s390_vxrs_high_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc; + + if (!cpu_has_vx()) + return -ENODEV; + if (target == current) + save_user_fpu_regs(); + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.ufpu.vxrs + __NUM_VXRS_LOW, 0, -1); + return rc; +} static int s390_system_call_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned int *data = &task_thread_info(target)->system_call; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(unsigned int)); + return membuf_store(&to, target->thread.system_call); } static int s390_system_call_set(struct task_struct *target, @@ -974,282 +718,280 @@ static int s390_system_call_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - unsigned int *data = &task_thread_info(target)->system_call; + unsigned int *data = &target->thread.system_call; return user_regset_copyin(&pos, &count, &kbuf, &ubuf, data, 0, sizeof(unsigned int)); } -static const struct user_regset s390_regsets[] = { - [REGSET_GENERAL] = { - .core_note_type = NT_PRSTATUS, - .n = sizeof(s390_regs) / sizeof(long), - .size = sizeof(long), - .align = sizeof(long), - .get = s390_regs_get, - .set = s390_regs_set, - }, - [REGSET_FP] = { - .core_note_type = NT_PRFPREG, - .n = sizeof(s390_fp_regs) / sizeof(long), - .size = sizeof(long), - .align = sizeof(long), - .get = s390_fpregs_get, - .set = s390_fpregs_set, - }, -#ifdef CONFIG_64BIT - [REGSET_LAST_BREAK] = { - .core_note_type = NT_S390_LAST_BREAK, - .n = 1, - .size = sizeof(long), - .align = sizeof(long), - .get = s390_last_break_get, - .set = s390_last_break_set, - }, - [REGSET_TDB] = { - .core_note_type = NT_S390_TDB, - .n = 1, - .size = 256, - .align = 1, - .get = s390_tdb_get, - .set = s390_tdb_set, - }, -#endif - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(unsigned int), - .align = sizeof(unsigned int), - .get = s390_system_call_get, - .set = s390_system_call_set, - }, -}; +static int s390_gs_cb_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct gs_cb *data = target->thread.gs_cb; -static const struct user_regset_view user_s390_view = { - .name = UTS_MACHINE, - .e_machine = EM_S390, - .regsets = s390_regsets, - .n = ARRAY_SIZE(s390_regsets) -}; + if (!cpu_has_gs()) + return -ENODEV; + if (!data) + return -ENODATA; + if (target == current) + save_gs_cb(data); + return membuf_write(&to, data, sizeof(struct gs_cb)); +} -#ifdef CONFIG_COMPAT -static int s390_compat_regs_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) +static int s390_gs_cb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - if (target == current) - save_access_regs(target->thread.acrs); + struct gs_cb gs_cb = { }, *data = NULL; + int rc; - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = __peek_user_compat(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user_compat(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } + if (!cpu_has_gs()) + return -ENODEV; + if (!target->thread.gs_cb) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; } - return 0; + if (!target->thread.gs_cb) + gs_cb.gsd = 25; + else if (target == current) + save_gs_cb(&gs_cb); + else + gs_cb = *target->thread.gs_cb; + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &gs_cb, 0, sizeof(gs_cb)); + if (rc) { + kfree(data); + return -EFAULT; + } + preempt_disable(); + if (!target->thread.gs_cb) + target->thread.gs_cb = data; + *target->thread.gs_cb = gs_cb; + if (target == current) { + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); + restore_gs_cb(target->thread.gs_cb); + } + preempt_enable(); + return rc; } -static int s390_compat_regs_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) +static int s390_gs_bc_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) { - int rc = 0; + struct gs_cb *data = target->thread.gs_bc_cb; - if (target == current) - save_access_regs(target->thread.acrs); + if (!cpu_has_gs()) + return -ENODEV; + if (!data) + return -ENODATA; + return membuf_write(&to, data, sizeof(struct gs_cb)); +} - if (kbuf) { - const compat_ulong_t *k = kbuf; - while (count > 0 && !rc) { - rc = __poke_user_compat(target, pos, *k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - const compat_ulong_t __user *u = ubuf; - while (count > 0 && !rc) { - compat_ulong_t word; - rc = __get_user(word, u++); - if (rc) - break; - rc = __poke_user_compat(target, pos, word); - count -= sizeof(*u); - pos += sizeof(*u); - } +static int s390_gs_bc_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + + if (!cpu_has_gs()) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_bc_cb = data; } + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static bool is_ri_cb_valid(struct runtime_instr_cb *cb) +{ + return (cb->rca & 0x1f) == 0 && + (cb->roa & 0xfff) == 0 && + (cb->rla & 0xfff) == 0xfff && + cb->s == 1 && + cb->k == 1 && + cb->h == 0 && + cb->reserved1 == 0 && + cb->ps == 1 && + cb->qs == 0 && + cb->pc == 1 && + cb->qc == 0 && + cb->reserved2 == 0 && + cb->reserved3 == 0 && + cb->reserved4 == 0 && + cb->reserved5 == 0 && + cb->reserved6 == 0 && + cb->reserved7 == 0 && + cb->reserved8 == 0 && + cb->rla >= cb->roa && + cb->rca >= cb->roa && + cb->rca <= cb->rla+1 && + cb->m < 3; +} + +static int s390_runtime_instr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct runtime_instr_cb *data = target->thread.ri_cb; - if (rc == 0 && target == current) - restore_access_regs(target->thread.acrs); + if (!test_facility(64)) + return -ENODEV; + if (!data) + return -ENODATA; - return rc; + return membuf_write(&to, data, sizeof(struct runtime_instr_cb)); } -static int s390_compat_regs_high_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) +static int s390_runtime_instr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - compat_ulong_t *gprs_high; + struct runtime_instr_cb ri_cb = { }, *data = NULL; + int rc; - gprs_high = (compat_ulong_t *) - &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = *gprs_high; - gprs_high += 2; - count -= sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(*gprs_high, u++)) - return -EFAULT; - gprs_high += 2; - count -= sizeof(*u); - } - } - return 0; -} + if (!test_facility(64)) + return -ENODEV; -static int s390_compat_regs_high_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - compat_ulong_t *gprs_high; - int rc = 0; + if (!target->thread.ri_cb) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + } - gprs_high = (compat_ulong_t *) - &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; - if (kbuf) { - const compat_ulong_t *k = kbuf; - while (count > 0) { - *gprs_high = *k++; - *gprs_high += 2; - count -= sizeof(*k); - } - } else { - const compat_ulong_t __user *u = ubuf; - while (count > 0 && !rc) { - unsigned long word; - rc = __get_user(word, u++); - if (rc) - break; - *gprs_high = word; - *gprs_high += 2; - count -= sizeof(*u); - } + if (target->thread.ri_cb) { + if (target == current) + store_runtime_instr_cb(&ri_cb); + else + ri_cb = *target->thread.ri_cb; } - return rc; -} + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &ri_cb, 0, sizeof(struct runtime_instr_cb)); + if (rc) { + kfree(data); + return -EFAULT; + } -static int s390_compat_last_break_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - compat_ulong_t last_break; - - if (count > 0) { - last_break = task_thread_info(target)->last_break; - if (kbuf) { - unsigned long *k = kbuf; - *k = last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(last_break, u)) - return -EFAULT; - } + if (!is_ri_cb_valid(&ri_cb)) { + kfree(data); + return -EINVAL; } - return 0; -} + /* + * Override access key in any case, since user space should + * not be able to set it, nor should it care about it. + */ + ri_cb.key = PAGE_DEFAULT_KEY >> 4; + preempt_disable(); + if (!target->thread.ri_cb) + target->thread.ri_cb = data; + *target->thread.ri_cb = ri_cb; + if (target == current) + load_runtime_instr_cb(target->thread.ri_cb); + preempt_enable(); -static int s390_compat_last_break_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ return 0; } -static const struct user_regset s390_compat_regsets[] = { - [REGSET_GENERAL] = { - .core_note_type = NT_PRSTATUS, - .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .get = s390_compat_regs_get, - .set = s390_compat_regs_set, +static const struct user_regset s390_regsets[] = { + { + USER_REGSET_NOTE_TYPE(PRSTATUS), + .n = sizeof(s390_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .regset_get = s390_regs_get, + .set = s390_regs_set, }, - [REGSET_FP] = { - .core_note_type = NT_PRFPREG, - .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .get = s390_fpregs_get, + { + USER_REGSET_NOTE_TYPE(PRFPREG), + .n = sizeof(s390_fp_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .regset_get = s390_fpregs_get, .set = s390_fpregs_set, }, - [REGSET_LAST_BREAK] = { - .core_note_type = NT_S390_LAST_BREAK, + { + USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL), + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .regset_get = s390_system_call_get, + .set = s390_system_call_set, + }, + { + USER_REGSET_NOTE_TYPE(S390_LAST_BREAK), .n = 1, .size = sizeof(long), .align = sizeof(long), - .get = s390_compat_last_break_get, - .set = s390_compat_last_break_set, + .regset_get = s390_last_break_get, + .set = s390_last_break_set, }, - [REGSET_TDB] = { - .core_note_type = NT_S390_TDB, + { + USER_REGSET_NOTE_TYPE(S390_TDB), .n = 1, .size = 256, .align = 1, - .get = s390_tdb_get, + .regset_get = s390_tdb_get, .set = s390_tdb_set, }, - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(compat_uint_t), - .align = sizeof(compat_uint_t), - .get = s390_system_call_get, - .set = s390_system_call_set, + { + USER_REGSET_NOTE_TYPE(S390_VXRS_LOW), + .n = __NUM_VXRS_LOW, + .size = sizeof(__u64), + .align = sizeof(__u64), + .regset_get = s390_vxrs_low_get, + .set = s390_vxrs_low_set, + }, + { + USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH), + .n = __NUM_VXRS_HIGH, + .size = sizeof(__vector128), + .align = sizeof(__vector128), + .regset_get = s390_vxrs_high_get, + .set = s390_vxrs_high_set, }, - [REGSET_GENERAL_EXTENDED] = { - .core_note_type = NT_S390_HIGH_GPRS, - .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), - .size = sizeof(compat_long_t), - .align = sizeof(compat_long_t), - .get = s390_compat_regs_high_get, - .set = s390_compat_regs_high_set, + { + USER_REGSET_NOTE_TYPE(S390_GS_CB), + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .regset_get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, + { + USER_REGSET_NOTE_TYPE(S390_GS_BC), + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .regset_get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, + { + USER_REGSET_NOTE_TYPE(S390_RI_CB), + .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .regset_get = s390_runtime_instr_get, + .set = s390_runtime_instr_set, }, }; -static const struct user_regset_view user_s390_compat_view = { - .name = "s390", +static const struct user_regset_view user_s390_view = { + .name = "s390x", .e_machine = EM_S390, - .regsets = s390_compat_regsets, - .n = ARRAY_SIZE(s390_compat_regsets) + .regsets = s390_regsets, + .n = ARRAY_SIZE(s390_regsets) }; -#endif const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) - return &user_s390_compat_view; -#endif return &user_s390_view; } @@ -1258,20 +1000,13 @@ static const char *gpr_names[NUM_GPRS] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", }; -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) -{ - if (offset >= NUM_GPRS) - return 0; - return regs->gprs[offset]; -} - int regs_query_register_offset(const char *name) { unsigned long offset; if (!name || *name != 'r') return -EINVAL; - if (strict_strtoul(name + 1, 10, &offset)) + if (kstrtoul(name + 1, 10, &offset)) return -EINVAL; if (offset >= NUM_GPRS) return -EINVAL; @@ -1284,29 +1019,3 @@ const char *regs_query_register_name(unsigned int offset) return NULL; return gpr_names[offset]; } - -static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) -{ - unsigned long ksp = kernel_stack_pointer(regs); - - return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); -} - -/** - * regs_get_kernel_stack_nth() - get Nth entry of the stack - * @regs:pt_regs which contains kernel stack pointer. - * @n:stack entry number. - * - * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, - * this returns 0. - */ -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) -{ - unsigned long addr; - - addr = kernel_stack_pointer(regs) + n * sizeof(long); - if (!regs_within_kernel_stack(regs, addr)) - return 0; - return *(unsigned long *)addr; -} diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index dd8016b0477e..69fcaf54d5ca 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -1,92 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * S390 version - * Copyright IBM Corp. 2000 - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) + * Copyright IBM Corp 2000, 2011 + * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Denis Joseph Barrow, */ #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/nospec-insn.h> #include <asm/sigp.h> +#include <asm/lowcore.h> -# -# store_status: Empty implementation until kdump is supported on 31 bit -# -ENTRY(store_status) - br %r14 + GEN_BR_THUNK %r9 # -# do_reipl_asm -# Parameter: r2 = schid of reipl device +# Issue "store status" for the current CPU to its prefix page +# and call passed function afterwards +# +# r2 = Function to be called after store status +# r3 = Parameter for function # -ENTRY(do_reipl_asm) - basr %r13,0 -.Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) -.Lpg1: # do store status of all registers +SYM_CODE_START(store_status) + STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA + /* General purpose registers */ + GET_LC %r13 + /* Control registers */ + stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13) + /* Access registers */ + stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13) + /* Floating point registers */ + lay %r1,__LC_FPREGS_SAVE_AREA(%r13) + std %f0, 0x00(%r1) + std %f1, 0x08(%r1) + std %f2, 0x10(%r1) + std %f3, 0x18(%r1) + std %f4, 0x20(%r1) + std %f5, 0x28(%r1) + std %f6, 0x30(%r1) + std %f7, 0x38(%r1) + std %f8, 0x40(%r1) + std %f9, 0x48(%r1) + std %f10,0x50(%r1) + std %f11,0x58(%r1) + std %f12,0x60(%r1) + std %f13,0x68(%r1) + std %f14,0x70(%r1) + std %f15,0x78(%r1) + /* Floating point control register */ + lay %r1,__LC_FP_CREG_SAVE_AREA(%r13) + stfpc 0(%r1) + /* CPU timer */ + lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13) + stpt 0(%r1) + /* Store prefix register */ + lay %r1,__LC_PREFIX_SAVE_AREA(%r13) + stpx 0(%r1) + /* Clock comparator - seven bytes */ + larl %r4,clkcmp + stckc 0(%r4) + lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13) + mvc 1(7,%r1),1(%r4) + /* Program status word */ + lay %r1,__LC_PSW_SAVE_AREA(%r13) + epsw %r4,%r5 + st %r4,0(%r1) + st %r5,4(%r1) + stg %r2,8(%r1) + lgr %r9,%r2 + lgr %r2,%r3 + BR_EX %r9 +SYM_CODE_END(store_status) - stm %r0,%r15,__LC_GPREGS_SAVE_AREA - stctl %c0,%c15,__LC_CREGS_SAVE_AREA - stam %a0,%a15,__LC_AREGS_SAVE_AREA - l %r10,.Ldump_pfx-.Lpg0(%r13) - mvc __LC_PREFIX_SAVE_AREA(4),0(%r10) - stckc .Lclkcmp-.Lpg0(%r13) - mvc __LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13) - stpt __LC_CPU_TIMER_SAVE_AREA - st %r13, __LC_PSW_SAVE_AREA+4 - lctl %c6,%c6,.Lall-.Lpg0(%r13) - lr %r1,%r2 - mvc __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13) - stsch .Lschib-.Lpg0(%r13) - oi .Lschib+5-.Lpg0(%r13),0x84 -.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 - msch .Lschib-.Lpg0(%r13) - lhi %r0,5 -.Lssch: ssch .Liplorb-.Lpg0(%r13) - jz .L001 - brct %r0,.Lssch - bas %r14,.Ldisab-.Lpg0(%r13) -.L001: mvc __LC_IO_NEW_PSW(8),.Lionew-.Lpg0(%r13) -.Ltpi: lpsw .Lwaitpsw-.Lpg0(%r13) -.Lcont: c %r1,__LC_SUBCHANNEL_ID - jnz .Ltpi - clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13) - jnz .Ltpi - tsch .Liplirb-.Lpg0(%r13) - tm .Liplirb+9-.Lpg0(%r13),0xbf - jz .L002 - bas %r14,.Ldisab-.Lpg0(%r13) -.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 - jz .L003 - bas %r14,.Ldisab-.Lpg0(%r13) -.L003: st %r1,__LC_SUBCHANNEL_ID - lpsw 0 - sigp 0,0,SIGP_RESTART -.Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) - lpsw .Ldispsw-.Lpg0(%r13) - .align 8 -.Lclkcmp: .quad 0x0000000000000000 -.Lall: .long 0xff000000 -.Ldump_pfx: .long dump_prefix_page - .align 8 -.Lnewpsw: .long 0x00080000,0x80000000+.Lpg1 -.Lpcnew: .long 0x00080000,0x80000000+.Lecs -.Lionew: .long 0x00080000,0x80000000+.Lcont -.Lwaitpsw: .long 0x020a0000,0x00000000+.Ltpi -.Ldispsw: .long 0x000a0000,0x00000000 -.Liplccws: .long 0x02000000,0x60000018 - .long 0x08000008,0x20000001 -.Liplorb: .long 0x0049504c,0x0040ff80 - .long 0x00000000+.Liplccws -.Lschib: .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 -.Liplirb: .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 + .section .bss + .balign 8 +SYM_DATA_LOCAL(clkcmp, .quad 0x0000000000000000) + .previous diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S deleted file mode 100644 index dc3b1273c4dc..000000000000 --- a/arch/s390/kernel/reipl64.S +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright IBM Corp 2000, 2011 - * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, - * Denis Joseph Barrow, - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/sigp.h> - -# -# store_status -# -# Prerequisites to run this function: -# - Prefix register is set to zero -# - Original prefix register is stored in "dump_prefix_page" -# - Lowcore protection is off -# -ENTRY(store_status) - /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_RESTART - lghi %r1,SAVE_AREA_BASE - /* General purpose registers */ - stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_RESTART - stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) - /* Control registers */ - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - /* Access registers */ - stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - /* Floating point registers */ - std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - /* Floating point control register */ - stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) - /* CPU timer */ - stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) - /* Saved prefix register */ - larl %r2,dump_prefix_page - mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) - /* Clock comparator - seven bytes */ - larl %r2,.Lclkcmp - stckc 0(%r2) - mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) - /* Program status word */ - epsw %r2,%r3 - st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) - st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) - larl %r2,store_status - stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) - br %r14 - - .section .bss - .align 8 -.Lclkcmp: .quad 0x0000000000000000 - .previous - -# -# do_reipl_asm -# Parameter: r2 = schid of reipl device -# - -ENTRY(do_reipl_asm) - basr %r13,0 -.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: brasl %r14,store_status - - lctlg %c6,%c6,.Lall-.Lpg0(%r13) - lgr %r1,%r2 - mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) - stsch .Lschib-.Lpg0(%r13) - oi .Lschib+5-.Lpg0(%r13),0x84 -.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 - msch .Lschib-.Lpg0(%r13) - lghi %r0,5 -.Lssch: ssch .Liplorb-.Lpg0(%r13) - jz .L001 - brct %r0,.Lssch - bas %r14,.Ldisab-.Lpg0(%r13) -.L001: mvc __LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13) -.Ltpi: lpswe .Lwaitpsw-.Lpg0(%r13) -.Lcont: c %r1,__LC_SUBCHANNEL_ID - jnz .Ltpi - clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13) - jnz .Ltpi - tsch .Liplirb-.Lpg0(%r13) - tm .Liplirb+9-.Lpg0(%r13),0xbf - jz .L002 - bas %r14,.Ldisab-.Lpg0(%r13) -.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 - jz .L003 - bas %r14,.Ldisab-.Lpg0(%r13) -.L003: st %r1,__LC_SUBCHANNEL_ID - lhi %r1,0 # mode 0 = esa - slr %r0,%r0 # set cpuid to zero - sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode - lpsw 0 -.Ldisab: sll %r14,1 - srl %r14,1 # need to kill hi bit to avoid specification exceptions. - st %r14,.Ldispsw+12-.Lpg0(%r13) - lpswe .Ldispsw-.Lpg0(%r13) - .align 8 -.Lall: .quad 0x00000000ff000000 - .align 16 -/* - * These addresses have to be 31 bit otherwise - * the sigp will throw a specifcation exception - * when switching to ESA mode as bit 31 be set - * in the ESA psw. - * Bit 31 of the addresses has to be 0 for the - * 31bit lpswe instruction a fact they appear to have - * omitted from the pop. - */ -.Lnewpsw: .quad 0x0000000080000000 - .quad .Lpg1 -.Lpcnew: .quad 0x0000000080000000 - .quad .Lecs -.Lionew: .quad 0x0000000080000000 - .quad .Lcont -.Lwaitpsw: .quad 0x0202000080000000 - .quad .Ltpi -.Ldispsw: .quad 0x0002000080000000 - .quad 0x0000000000000000 -.Liplccws: .long 0x02000000,0x60000018 - .long 0x08000008,0x20000001 -.Liplorb: .long 0x0049504c,0x0040ff80 - .long 0x00000000+.Liplccws -.Lschib: .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 -.Liplirb: .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index f4e6f20e117a..0ae297c82afd 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -1,118 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright IBM Corp. 2005 * - * Author(s): Rolf Adelsberger, - * Heiko Carstens <heiko.carstens@de.ibm.com> + * Author(s): Rolf Adelsberger * */ #include <linux/linkage.h> +#include <asm/page.h> #include <asm/sigp.h> /* * moves the new kernel to its destination... * %r2 = pointer to first kimage_entry_t * %r3 = start address - where to jump to after the job is done... + * %r4 = subcode * * %r5 will be used as temp. storage * %r6 holds the destination address * %r7 = PAGE_SIZE * %r8 holds the source address * %r9 = PAGE_SIZE - * %r10 is a page mask + * + * 0xf000 is a page_mask */ .text -ENTRY(relocate_kernel) - basr %r13,0 # base address - .base: - stnsm sys_msk-.base(%r13),0xfb # disable DAT - stctl %c0,%c15,ctlregs-.base(%r13) - stm %r0,%r15,gprregs-.base(%r13) - la %r1,load_psw-.base(%r13) - mvc 0(8,%r0),0(%r1) - la %r0,.back-.base(%r13) - st %r0,4(%r0) - oi 4(%r0),0x80 - mvc 0x68(8,%r0),0(%r1) - la %r0,.back_pgm-.base(%r13) - st %r0,0x6c(%r0) - oi 0x6c(%r0),0x80 - lhi %r0,0 - diag %r0,%r0,0x308 - .back: - basr %r13,0 - .back_base: - oi have_diag308-.back_base(%r13),0x01 - lctl %c0,%c15,ctlregs-.back_base(%r13) - lm %r0,%r15,gprregs-.back_base(%r13) - j .start_reloc - .back_pgm: - lm %r0,%r15,gprregs-.base(%r13) - .start_reloc: - lhi %r10,-1 # preparing the mask - sll %r10,12 # shift it such that it becomes 0xf000 - .top: - lhi %r7,4096 # load PAGE_SIZE in r7 - lhi %r9,4096 # load PAGE_SIZE in r9 - l %r5,0(%r2) # read another word for indirection page - ahi %r2,4 # increment pointer - tml %r5,0x1 # is it a destination page? - je .indir_check # NO, goto "indir_check" - lr %r6,%r5 # r6 = r5 - nr %r6,%r10 # mask it out and... - j .top # ...next iteration - .indir_check: - tml %r5,0x2 # is it a indirection page? - je .done_test # NO, goto "done_test" - nr %r5,%r10 # YES, mask out, - lr %r2,%r5 # move it into the right register, - j .top # and read next... - .done_test: - tml %r5,0x4 # is it the done indicator? - je .source_test # NO! Well, then it should be the source indicator... - j .done # ok, lets finish it here... - .source_test: - tml %r5,0x8 # it should be a source indicator... - je .top # NO, ignore it... - lr %r8,%r5 # r8 = r5 - nr %r8,%r10 # masking - 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 - jo 0b - j .top - .done: - sr %r0,%r0 # clear register r0 - la %r4,load_psw-.base(%r13) # load psw-address into the register - o %r3,4(%r4) # or load address into psw - st %r3,4(%r4) - mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 - tm have_diag308-.base(%r13),0x01 - jno .no_diag308 - diag %r0,%r0,0x308 - .no_diag308: - sr %r1,%r1 # clear %r1 - sr %r2,%r2 # clear %r2 - sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero - lpsw 0 # hopefully start new kernel... +SYM_CODE_START(relocate_kernel) + basr %r13,0 # base address +.base: + lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7 + lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9 + lg %r5,0(%r2) # read another word for indirection page + aghi %r2,8 # increment pointer + tml %r5,0x1 # is it a destination page? + je .indir_check # NO, goto "indir_check" + lgr %r6,%r5 # r6 = r5 + nill %r6,0xf000 # mask it out and... + j .base # ...next iteration +.indir_check: + tml %r5,0x2 # is it a indirection page? + je .done_test # NO, goto "done_test" + nill %r5,0xf000 # YES, mask out, + lgr %r2,%r5 # move it into the right register, + j .base # and read next... +.done_test: + tml %r5,0x4 # is it the done indicator? + je .source_test # NO! Well, then it should be the source indicator... + j .done # ok, lets finish it here... +.source_test: + tml %r5,0x8 # it should be a source indicator... + je .base # NO, ignore it... + lgr %r8,%r5 # r8 = r5 + nill %r8,0xf000 # masking +0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + jo 0b + j .base +.done: + lgr %r0,%r4 # subcode + cghi %r3,0 + je .diag + la %r4,load_psw-.base(%r13) # load psw-address into the register + o %r3,4(%r4) # or load address into psw + st %r3,4(%r4) + mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 +.diag: + diag %r0,%r0,0x308 +SYM_CODE_END(relocate_kernel) - .align 8 - load_psw: - .long 0x00080000,0x80000000 - sys_msk: - .quad 0 - ctlregs: - .rept 16 - .long 0 - .endr - gprregs: - .rept 16 - .long 0 - .endr - have_diag308: - .byte 0 - .align 8 - relocate_kernel_end: - .align 8 - .globl relocate_kernel_len - relocate_kernel_len: - .quad relocate_kernel_end - relocate_kernel + .balign 8 +SYM_DATA_START_LOCAL(load_psw) + .long 0x00080000,0x80000000 +SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end) + .balign 8 +SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel) diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S deleted file mode 100644 index cfac28330b03..000000000000 --- a/arch/s390/kernel/relocate_kernel64.S +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright IBM Corp. 2005 - * - * Author(s): Rolf Adelsberger, - * Heiko Carstens <heiko.carstens@de.ibm.com> - * - */ - -#include <linux/linkage.h> -#include <asm/sigp.h> - -/* - * moves the new kernel to its destination... - * %r2 = pointer to first kimage_entry_t - * %r3 = start address - where to jump to after the job is done... - * - * %r5 will be used as temp. storage - * %r6 holds the destination address - * %r7 = PAGE_SIZE - * %r8 holds the source address - * %r9 = PAGE_SIZE - * - * 0xf000 is a page_mask - */ - - .text -ENTRY(relocate_kernel) - basr %r13,0 # base address - .base: - stnsm sys_msk-.base(%r13),0xfb # disable DAT - stctg %c0,%c15,ctlregs-.base(%r13) - stmg %r0,%r15,gprregs-.base(%r13) - lghi %r0,3 - sllg %r0,%r0,31 - stg %r0,0x1d0(%r0) - la %r0,.back_pgm-.base(%r13) - stg %r0,0x1d8(%r0) - la %r1,load_psw-.base(%r13) - mvc 0(8,%r0),0(%r1) - la %r0,.back-.base(%r13) - st %r0,4(%r0) - oi 4(%r0),0x80 - lghi %r0,0 - diag %r0,%r0,0x308 - .back: - lhi %r1,1 # mode 1 = esame - sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode - sam64 # switch to 64 bit addressing mode - basr %r13,0 - .back_base: - oi have_diag308-.back_base(%r13),0x01 - lctlg %c0,%c15,ctlregs-.back_base(%r13) - lmg %r0,%r15,gprregs-.back_base(%r13) - j .top - .back_pgm: - lmg %r0,%r15,gprregs-.base(%r13) - .top: - lghi %r7,4096 # load PAGE_SIZE in r7 - lghi %r9,4096 # load PAGE_SIZE in r9 - lg %r5,0(%r2) # read another word for indirection page - aghi %r2,8 # increment pointer - tml %r5,0x1 # is it a destination page? - je .indir_check # NO, goto "indir_check" - lgr %r6,%r5 # r6 = r5 - nill %r6,0xf000 # mask it out and... - j .top # ...next iteration - .indir_check: - tml %r5,0x2 # is it a indirection page? - je .done_test # NO, goto "done_test" - nill %r5,0xf000 # YES, mask out, - lgr %r2,%r5 # move it into the right register, - j .top # and read next... - .done_test: - tml %r5,0x4 # is it the done indicator? - je .source_test # NO! Well, then it should be the source indicator... - j .done # ok, lets finish it here... - .source_test: - tml %r5,0x8 # it should be a source indicator... - je .top # NO, ignore it... - lgr %r8,%r5 # r8 = r5 - nill %r8,0xf000 # masking - 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 - jo 0b - j .top - .done: - sgr %r0,%r0 # clear register r0 - la %r4,load_psw-.base(%r13) # load psw-address into the register - o %r3,4(%r4) # or load address into psw - st %r3,4(%r4) - mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 - tm have_diag308-.base(%r13),0x01 - jno .no_diag308 - diag %r0,%r0,0x308 - .no_diag308: - sam31 # 31 bit mode - sr %r1,%r1 # erase register r1 - sr %r2,%r2 # erase register r2 - sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero - lpsw 0 # hopefully start new kernel... - - .align 8 - load_psw: - .long 0x00080000,0x80000000 - sys_msk: - .quad 0 - ctlregs: - .rept 16 - .quad 0 - .endr - gprregs: - .rept 16 - .quad 0 - .endr - have_diag308: - .byte 0 - .align 8 - relocate_kernel_end: - .align 8 - .globl relocate_kernel_len - relocate_kernel_len: - .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c new file mode 100644 index 000000000000..af10e6bdd34e --- /dev/null +++ b/arch/s390/kernel/rethook.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/rethook.h> +#include <linux/kprobes.h> +#include "rethook.h" + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + rh->ret_addr = regs->gprs[14]; + rh->frame = regs->gprs[15]; + + /* Replace the return addr with trampoline addr */ + regs->gprs[14] = (unsigned long)&arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); + +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr) +{ + /* Replace fake return address with real one. */ + regs->gprs[14] = correct_ret_addr; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +/* + * Called from arch_rethook_trampoline + */ +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + return rethook_trampoline_handler(regs, regs->gprs[15]); +} +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +/* assembler function that handles the rethook must not be probed itself */ +NOKPROBE_SYMBOL(arch_rethook_trampoline); diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h new file mode 100644 index 000000000000..32f069eed3f3 --- /dev/null +++ b/arch/s390/kernel/rethook.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __S390_RETHOOK_H +#define __S390_RETHOOK_H + +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs); + +#endif diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 077a99389b07..1788a5454b6f 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> @@ -11,23 +12,35 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/kernel_stat.h> +#include <linux/sched/task_stack.h> + #include <asm/runtime_instr.h> #include <asm/cpu_mf.h> #include <asm/irq.h> +#include "entry.h" + /* empty control block to disable RI by loading it */ struct runtime_instr_cb runtime_instr_empty_cb; -static int runtime_instr_avail(void) +void runtime_instr_release(struct task_struct *tsk) { - return test_facility(64); + kfree(tsk->thread.ri_cb); } static void disable_runtime_instr(void) { - struct pt_regs *regs = task_pt_regs(current); + struct task_struct *task = current; + struct pt_regs *regs; + if (!task->thread.ri_cb) + return; + regs = task_pt_regs(task); + preempt_disable(); load_runtime_instr_cb(&runtime_instr_empty_cb); + kfree(task->thread.ri_cb); + task->thread.ri_cb = NULL; + preempt_enable(); /* * Make sure the RI bit is deleted from the PSW. If the user did not @@ -39,77 +52,34 @@ static void disable_runtime_instr(void) static void init_runtime_instr_cb(struct runtime_instr_cb *cb) { - cb->buf_limit = 0xfff; - if (s390_user_mode == HOME_SPACE_MODE) - cb->home_space = 1; - cb->int_requested = 1; - cb->pstate = 1; - cb->pstate_set_buf = 1; - cb->pstate_sample = 1; - cb->pstate_collect = 1; - cb->key = PAGE_DEFAULT_KEY; - cb->valid = 1; -} - -void exit_thread_runtime_instr(void) -{ - struct task_struct *task = current; - - if (!task->thread.ri_cb) - return; - disable_runtime_instr(); - kfree(task->thread.ri_cb); - task->thread.ri_signum = 0; - task->thread.ri_cb = NULL; -} - -static void runtime_instr_int_handler(struct ext_code ext_code, - unsigned int param32, unsigned long param64) -{ - struct siginfo info; - - if (!(param32 & CPU_MF_INT_RI_MASK)) - return; - - inc_irq_stat(IRQEXT_CMR); - - if (!current->thread.ri_cb) - return; - if (current->thread.ri_signum < SIGRTMIN || - current->thread.ri_signum > SIGRTMAX) { - WARN_ON_ONCE(1); - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = current->thread.ri_signum; - info.si_code = SI_QUEUE; - if (param32 & CPU_MF_INT_RI_BUF_FULL) - info.si_int = ENOBUFS; - else if (param32 & CPU_MF_INT_RI_HALTED) - info.si_int = ECANCELED; - else - return; /* unknown reason */ - - send_sig_info(current->thread.ri_signum, &info, current); + cb->rla = 0xfff; + cb->s = 1; + cb->k = 1; + cb->ps = 1; + cb->pc = 1; + cb->key = PAGE_DEFAULT_KEY >> 4; + cb->v = 1; } +/* + * The signum argument is unused. In older kernels it was used to + * specify a real-time signal. For backwards compatibility user space + * should pass a valid real-time signal number (the signum argument + * was checked in older kernels). + */ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) { struct runtime_instr_cb *cb; - if (!runtime_instr_avail()) + if (!test_facility(64)) return -EOPNOTSUPP; if (command == S390_RUNTIME_INSTR_STOP) { - preempt_disable(); - exit_thread_runtime_instr(); - preempt_enable(); + disable_runtime_instr(); return 0; } - if (command != S390_RUNTIME_INSTR_START || - (signum < SIGRTMIN || signum > SIGRTMAX)) + if (command != S390_RUNTIME_INSTR_START) return -EINVAL; if (!current->thread.ri_cb) { @@ -122,7 +92,6 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) } init_runtime_instr_cb(cb); - current->thread.ri_signum = signum; /* now load the control block to make it available */ preempt_disable(); @@ -131,20 +100,3 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) preempt_enable(); return 0; } - -static int __init runtime_instr_init(void) -{ - int rc; - - if (!runtime_instr_avail()) - return 0; - - measurement_alert_subclass_register(); - rc = register_external_interrupt(0x1407, runtime_instr_int_handler); - if (rc) - measurement_alert_subclass_unregister(); - else - pr_info("Runtime instrumentation facility initialized\n"); - return rc; -} -device_initcall(runtime_instr_init); diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c deleted file mode 100644 index 3bac589844a7..000000000000 --- a/arch/s390/kernel/s390_ksyms.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <linux/module.h> -#include <linux/kvm_host.h> -#include <asm/ftrace.h> - -#ifdef CONFIG_FUNCTION_TRACER -EXPORT_SYMBOL(_mcount); -#endif -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) -EXPORT_SYMBOL(sie64a); -EXPORT_SYMBOL(sie_exit); -#endif -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S deleted file mode 100644 index 29bd7bec4176..000000000000 --- a/arch/s390/kernel/sclp.S +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Mini SCLP driver. - * - * Copyright IBM Corp. 2004, 2009 - * - * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>, - * Heiko Carstens <heiko.carstens@de.ibm.com>, - * - */ - -#include <linux/linkage.h> - -LC_EXT_NEW_PSW = 0x58 # addr of ext int handler -LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit -LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter -LC_EXT_INT_CODE = 0x86 # addr of ext int code -LC_AR_MODE_ID = 0xa3 - -# -# Subroutine which waits synchronously until either an external interruption -# or a timeout occurs. -# -# Parameters: -# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds -# -# Returns: -# R2 = 0 on interrupt, 2 on timeout -# R3 = external interruption parameter if R2=0 -# - -_sclp_wait_int: - stm %r6,%r15,24(%r15) # save registers - basr %r13,0 # get base register -.LbaseS1: - ahi %r15,-96 # create stack frame - la %r8,LC_EXT_NEW_PSW # register int handler - la %r9,.LextpswS1-.LbaseS1(%r13) -#ifdef CONFIG_64BIT - tm LC_AR_MODE_ID,1 - jno .Lesa1 - la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit - la %r9,.LextpswS1_64-.LbaseS1(%r13) -.Lesa1: -#endif - mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8) - mvc 0(16,%r8),0(%r9) -#ifdef CONFIG_64BIT - epsw %r6,%r7 # set current addressing mode - nill %r6,0x1 # in new psw (31 or 64 bit mode) - nilh %r7,0x8000 - stm %r6,%r7,0(%r8) -#endif - lhi %r6,0x0200 # cr mask for ext int (cr0.54) - ltr %r2,%r2 - jz .LsetctS1 - ahi %r6,0x0800 # cr mask for clock int (cr0.52) - stck .LtimeS1-.LbaseS1(%r13) # initiate timeout - al %r2,.LtimeS1-.LbaseS1(%r13) - st %r2,.LtimeS1-.LbaseS1(%r13) - sckc .LtimeS1-.LbaseS1(%r13) - -.LsetctS1: - stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts - l %r0,.LctlS1-.LbaseS1(%r13) - lhi %r1,~(0x200 | 0x800) # clear old values - nr %r1,%r0 - or %r1,%r6 # set new value - st %r1,.LctlS1-.LbaseS1(%r13) - lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) - st %r0,.LctlS1-.LbaseS1(%r13) - lhi %r2,2 # return code for timeout -.LloopS1: - lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt -.LwaitS1: - lh %r7,LC_EXT_INT_CODE - chi %r7,0x1004 # timeout? - je .LtimeoutS1 - chi %r7,0x2401 # service int? - jne .LloopS1 - sr %r2,%r2 - l %r3,LC_EXT_INT_PARAM -.LtimeoutS1: - lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting - # restore old handler - mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13) - lm %r6,%r15,120(%r15) # restore registers - br %r14 # return to caller - - .align 8 -.LoldpswS1: - .long 0, 0, 0, 0 # old ext int PSW -.LextpswS1: - .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int -#ifdef CONFIG_64BIT -.LextpswS1_64: - .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit -#endif -.LwaitpswS1: - .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int -.LtimeS1: - .quad 0 # current time -.LctlS1: - .long 0 # CT0 contents - -# -# Subroutine to synchronously issue a service call. -# -# Parameters: -# R2 = command word -# R3 = sccb address -# -# Returns: -# R2 = 0 on success, 1 on failure -# R3 = sccb response code if R2 = 0 -# - -_sclp_servc: - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame - lr %r6,%r2 # save command word - lr %r7,%r3 # save sccb address -.LretryS2: - lhi %r2,1 # error return code - .insn rre,0xb2200000,%r6,%r7 # servc - brc 1,.LendS2 # exit if not operational - brc 8,.LnotbusyS2 # go on if not busy - sr %r2,%r2 # wait until no longer busy - bras %r14,_sclp_wait_int - j .LretryS2 # retry -.LnotbusyS2: - sr %r2,%r2 # wait until result - bras %r14,_sclp_wait_int - sr %r2,%r2 - lh %r3,6(%r7) -.LendS2: - lm %r6,%r15,120(%r15) # restore registers - br %r14 - -# -# Subroutine to set up the SCLP interface. -# -# Parameters: -# R2 = 0 to activate, non-zero to deactivate -# -# Returns: -# R2 = 0 on success, non-zero on failure -# - -_sclp_setup: - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame - basr %r13,0 # get base register -.LbaseS3: - l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb - mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13) - ltr %r2,%r2 # initialization? - jz .LdoinitS3 # go ahead - # clear masks - xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6) -.LdoinitS3: - l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word - lr %r3,%r6 # get sccb address - bras %r14,_sclp_servc # issue service call - ltr %r2,%r2 # servc successful? - jnz .LerrorS3 - chi %r3,0x20 # write mask successful? - jne .LerrorS3 - # check masks - la %r2,.LinitmaskS3-.LinitsccbS3(%r6) - l %r1,0(%r2) # receive mask ok? - n %r1,12(%r2) - cl %r1,0(%r2) - jne .LerrorS3 - l %r1,4(%r2) # send mask ok? - n %r1,8(%r2) - cl %r1,4(%r2) - sr %r2,%r2 - je .LendS3 -.LerrorS3: - lhi %r2,1 # error return code -.LendS3: - lm %r6,%r15,120(%r15) # restore registers - br %r14 -.LwritemaskS3: - .long 0x00780005 # SCLP command for write mask -.LinitsccbS3: - .word .LinitendS3-.LinitsccbS3 - .byte 0,0,0,0 - .word 0 - .word 0 - .word 4 -.LinitmaskS3: - .long 0x80000000 - .long 0x40000000 - .long 0 - .long 0 -.LinitendS3: - -# -# Subroutine which prints a given text to the SCLP console. -# -# Parameters: -# R2 = address of nil-terminated ASCII text -# -# Returns: -# R2 = 0 on success, 1 on failure -# - -_sclp_print: - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame - basr %r13,0 # get base register -.LbaseS4: - l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb - mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13) - la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr - sr %r0,%r0 - l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table -.LinitmtoS4: - # initialize mto - mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13) - lhi %r6,.LmtoendS4-.LmtoS4 # current mto length -.LloopS4: - ic %r0,0(%r2) # get character - ahi %r2,1 - ltr %r0,%r0 # end of string? - jz .LfinalizemtoS4 - chi %r0,0x0a # end of line (NL)? - jz .LfinalizemtoS4 - stc %r0,0(%r6,%r7) # copy to mto - la %r11,0(%r6,%r7) - tr 0(1,%r11),0(%r10) # translate to EBCDIC - ahi %r6,1 - j .LloopS4 -.LfinalizemtoS4: - sth %r6,0(%r7) # update mto length - lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length - ar %r9,%r6 - sth %r9,.LmdbS4-.LwritesccbS4(%r8) - lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length - ar %r9,%r6 - sth %r9,.LevbufS4-.LwritesccbS4(%r8) - lh %r9,0(%r8) # update sccb length - ar %r9,%r6 - sth %r9,0(%r8) - ar %r7,%r6 # update current mto address - ltr %r0,%r0 # more characters? - jnz .LinitmtoS4 - l %r2,.LwritedataS4-.LbaseS4(%r13)# write data - lr %r3,%r8 - bras %r14,_sclp_servc - ltr %r2,%r2 # servc successful? - jnz .LendS4 - chi %r3,0x20 # write data successful? - je .LendS4 - lhi %r2,1 # error return code -.LendS4: - lm %r6,%r15,120(%r15) # restore registers - br %r14 - -# -# Function which prints a given text to the SCLP console. -# -# Parameters: -# R2 = address of nil-terminated ASCII text -# -# Returns: -# R2 = 0 on success, 1 on failure -# - -ENTRY(_sclp_print_early) - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame -#ifdef CONFIG_64BIT - tm LC_AR_MODE_ID,1 - jno .Lesa2 - ahi %r15,-80 - stmh %r6,%r15,96(%r15) # store upper register halves -.Lesa2: -#endif - lr %r10,%r2 # save string pointer - lhi %r2,0 - bras %r14,_sclp_setup # enable console - ltr %r2,%r2 - jnz .LendS5 - lr %r2,%r10 - bras %r14,_sclp_print # print string - ltr %r2,%r2 - jnz .LendS5 - lhi %r2,1 - bras %r14,_sclp_setup # disable console -.LendS5: -#ifdef CONFIG_64BIT - tm LC_AR_MODE_ID,1 - jno .Lesa3 - lmh %r6,%r15,96(%r15) # store upper register halves - ahi %r15,80 -.Lesa3: -#endif - lm %r6,%r15,120(%r15) # restore registers - br %r14 - -.LwritedataS4: - .long 0x00760005 # SCLP command for write data -.LwritesccbS4: - # sccb - .word .LmtoS4-.LwritesccbS4 - .byte 0 - .byte 0,0,0 - .word 0 - - # evbuf -.LevbufS4: - .word .LmtoS4-.LevbufS4 - .byte 0x02 - .byte 0 - .word 0 - -.LmdbS4: - # mdb - .word .LmtoS4-.LmdbS4 - .word 1 - .long 0xd4c4c240 - .long 1 - - # go -.LgoS4: - .word .LmtoS4-.LgoS4 - .word 1 - .long 0 - .byte 0,0,0,0,0,0,0,0 - .byte 0,0,0 - .byte 0 - .byte 0,0,0,0,0,0,0 - .byte 0 - .word 0 - .byte 0,0,0,0,0,0,0,0,0,0 - .byte 0,0,0,0,0,0,0,0 - .byte 0,0,0,0,0,0,0,0 - -.LmtoS4: - .word .LmtoendS4-.LmtoS4 - .word 4 - .word 0x1000 - .byte 0 - .byte 0,0,0 -.LmtoendS4: - - # Global constants -.LsccbS0: - .long _sclp_work_area -.Lascebc: - .long _ascebc - -.section .data,"aw",@progbits - .balign 4096 -_sclp_work_area: - .fill 4096 -.previous diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 497451ec5e26..c1fe0b53c5ac 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 version * Copyright IBM Corp. 1999, 2012 @@ -12,70 +13,72 @@ * This file handles the architecture-dependent parts of initialization */ -#define KMSG_COMPONENT "setup" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "setup: " fmt #include <linux/errno.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> +#include <linux/random.h> #include <linux/user.h> #include <linux/tty.h> #include <linux/ioport.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/initrd.h> -#include <linux/bootmem.h> #include <linux/root_dev.h> #include <linux/console.h> #include <linux/kernel_stat.h> +#include <linux/dma-map-ops.h> #include <linux/device.h> #include <linux/notifier.h> #include <linux/pfn.h> #include <linux/ctype.h> #include <linux/reboot.h> #include <linux/topology.h> -#include <linux/ftrace.h> #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/memory.h> -#include <linux/compat.h> +#include <linux/start_kernel.h> +#include <linux/hugetlb.h> +#include <linux/kmemleak.h> +#include <asm/archrandom.h> +#include <asm/boot_data.h> +#include <asm/machine.h> #include <asm/ipl.h> -#include <asm/uaccess.h> #include <asm/facility.h> #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/cpcmd.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> +#include <asm/nmi.h> #include <asm/irq.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/sections.h> #include <asm/ebcdic.h> -#include <asm/kvm_virtio.h> #include <asm/diag.h> #include <asm/os_info.h> #include <asm/sclp.h> +#include <asm/stacktrace.h> +#include <asm/sysinfo.h> +#include <asm/numa.h> +#include <asm/alternative.h> +#include <asm/nospec-branch.h> +#include <asm/physmem_info.h> +#include <asm/maccess.h> +#include <asm/uv.h> +#include <asm/asm-offsets.h> #include "entry.h" -long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | - PSW_MASK_EA | PSW_MASK_BA; -long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | - PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_ASC_HOME; - -/* - * User copy operations. - */ -struct uaccess_ops uaccess; -EXPORT_SYMBOL(uaccess); - /* * Machine setup.. */ @@ -88,33 +91,101 @@ EXPORT_SYMBOL(console_devno); unsigned int console_irq = -1; EXPORT_SYMBOL(console_irq); -unsigned long elf_hwcap = 0; -char elf_platform[ELF_PLATFORM_SIZE]; +/* + * Some code and data needs to stay below 2 GB, even when the kernel would be + * relocated above 2 GB, because it has to use 31 bit addresses. + * Such code and data is part of the .amode31 section. + */ +char __amode31_ref *__samode31 = _samode31; +char __amode31_ref *__eamode31 = _eamode31; +char __amode31_ref *__stext_amode31 = _stext_amode31; +char __amode31_ref *__etext_amode31 = _etext_amode31; +struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; +struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; -struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; +/* + * Control registers CR2, CR5 and CR15 are initialized with addresses + * of tables that must be placed below 2G which is handled by the AMODE31 + * sections. + * Because the AMODE31 sections are relocated below 2G at startup, + * the content of control registers CR2, CR5 and CR15 must be updated + * with new addresses after the relocation. The initial initialization of + * control registers occurs in head.S and then gets updated again after AMODE31 + * relocation. We must access the relevant AMODE31 tables indirectly via + * pointers placed in the .amode31.refs linker section. Those pointers get + * updated automatically during AMODE31 relocation and always contain a valid + * address within AMODE31 sections. + */ -int __initdata memory_end_set; -unsigned long __initdata memory_end; +static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64); -unsigned long VMALLOC_START; +static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = { + [1] = 0xffffffffffffffff +}; + +static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = { + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0 +}; + +static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = { + 0, 0, 0x89000000, 0, + 0, 0, 0x8a000000, 0 +}; + +static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31; +static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; +static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; +static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; + +unsigned long __bootdata_preserved(max_mappable); +struct physmem_info __bootdata(physmem_info); + +struct vm_layout __bootdata_preserved(vm_layout); +EXPORT_SYMBOL(vm_layout); +int __bootdata_preserved(__kaslr_enabled); +unsigned int __bootdata_preserved(zlib_dfltcc_support); +EXPORT_SYMBOL(zlib_dfltcc_support); +u64 __bootdata_preserved(stfle_fac_list[16]); +EXPORT_SYMBOL(stfle_fac_list); +struct oldmem_data __bootdata_preserved(oldmem_data); + +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); -unsigned long VMALLOC_END; +unsigned long __bootdata_preserved(VMALLOC_END); EXPORT_SYMBOL(VMALLOC_END); -struct page *vmemmap; +struct page *__bootdata_preserved(vmemmap); EXPORT_SYMBOL(vmemmap); +unsigned long __bootdata_preserved(vmemmap_size); -#ifdef CONFIG_64BIT -unsigned long MODULES_VADDR; -unsigned long MODULES_END; -#endif +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); /* An array with a pointer to the lowcore of every CPU. */ -struct _lowcore *lowcore_ptr[NR_CPUS]; +struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); /* + * The Write Back bit position in the physaddr is given by the SLPC PCI. + * Leaving the mask zero always uses write through which is safe + */ +unsigned long mio_wb_bit_mask __ro_after_init; + +/* * This is set up by the setup-routine at boot-time * for S390 need to find out, what we have to setup * using address 0x10400 ... @@ -142,31 +213,28 @@ __setup("condev=", condev_setup); static void __init set_preferred_console(void) { - if (MACHINE_IS_KVM) { - if (sclp_has_vt220()) - add_preferred_console("ttyS", 1, NULL); - else if (sclp_has_linemode()) - add_preferred_console("ttyS", 0, NULL); - else - add_preferred_console("hvc", 0, NULL); - } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) add_preferred_console("ttyS", 0, NULL); else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); + else if (CONSOLE_IS_VT220) + add_preferred_console("ttysclp", 0, NULL); + else if (CONSOLE_IS_HVC) + add_preferred_console("hvc", 0, NULL); } static int __init conmode_setup(char *str) { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) - if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0) + if (!strcmp(str, "hwc") || !strcmp(str, "sclp")) SET_CONSOLE_SCLP; #endif #if defined(CONFIG_TN3215_CONSOLE) - if (strncmp(str, "3215", 5) == 0) + if (!strcmp(str, "3215")) SET_CONSOLE_3215; #endif #if defined(CONFIG_TN3270_CONSOLE) - if (strncmp(str, "3270", 5) == 0) + if (!strcmp(str, "3270")) SET_CONSOLE_3270; #endif set_preferred_console(); @@ -180,7 +248,7 @@ static void __init conmode_default(void) char query_buffer[1024]; char *ptr; - if (MACHINE_IS_VM) { + if (machine_is_vm()) { cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); @@ -201,7 +269,7 @@ static void __init conmode_default(void) #endif return; } - if (strncmp(ptr + 8, "3270", 4) == 0) { + if (str_has_prefix(ptr + 8, "3270")) { #if defined(CONFIG_TN3270_CONSOLE) SET_CONSOLE_3270; #elif defined(CONFIG_TN3215_CONSOLE) @@ -209,7 +277,7 @@ static void __init conmode_default(void) #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; #endif - } else if (strncmp(ptr + 8, "3215", 4) == 0) { + } else if (str_has_prefix(ptr + 8, "3215")) { #if defined(CONFIG_TN3215_CONSOLE) SET_CONSOLE_3215; #elif defined(CONFIG_TN3270_CONSOLE) @@ -218,6 +286,13 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } + } else if (machine_is_kvm()) { + if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) + SET_CONSOLE_VT220; + else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) + SET_CONSOLE_SCLP; + else + SET_CONSOLE_HVC; } else { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; @@ -225,19 +300,19 @@ static void __init conmode_default(void) } } -#ifdef CONFIG_ZFCPDUMP +#ifdef CONFIG_CRASH_DUMP static void __init setup_zfcpdump(void) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return; - if (OLDMEM_BASE) + if (oldmem_data.start) return; - strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); + strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE); console_loglevel = 2; } #else static inline void setup_zfcpdump(void) {} -#endif /* CONFIG_ZFCPDUMP */ +#endif /* CONFIG_CRASH_DUMP */ /* * Reboot, halt and power_off stubs. They just call _machine_restart, @@ -283,163 +358,127 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL_GPL(pm_power_off); -static int __init early_parse_mem(char *p) -{ - memory_end = memparse(p, &p); - memory_end_set = 1; - return 0; -} -early_param("mem", early_parse_mem); +void *restart_stack; -static int __init parse_vmalloc(char *arg) +unsigned long stack_alloc(void) { - if (!arg) - return -EINVAL; - VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; - return 0; -} -early_param("vmalloc", parse_vmalloc); + void *stack; -unsigned int s390_user_mode = PRIMARY_SPACE_MODE; -EXPORT_SYMBOL_GPL(s390_user_mode); - -static void __init set_user_mode_primary(void) -{ - psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; - psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; -#ifdef CONFIG_COMPAT - psw32_user_bits = - (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; -#endif - uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos_switch : uaccess_pt; + stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(stack); + return (unsigned long)stack; } -static int __init early_parse_user_mode(char *p) +void stack_free(unsigned long stack) { - if (p && strcmp(p, "primary") == 0) - s390_user_mode = PRIMARY_SPACE_MODE; - else if (!p || strcmp(p, "home") == 0) - s390_user_mode = HOME_SPACE_MODE; - else - return 1; - return 0; + vfree((void *)stack); } -early_param("user_mode", early_parse_user_mode); -static void __init setup_addressing_mode(void) +static unsigned long __init stack_alloc_early(void) { - if (s390_user_mode != PRIMARY_SPACE_MODE) - return; - set_user_mode_primary(); - if (MACHINE_HAS_MVCOS) - pr_info("Address spaces switched, mvcos available\n"); - else - pr_info("Address spaces switched, mvcos not available\n"); -} + unsigned long stack; -void *restart_stack __attribute__((__section__(".data"))); + stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE); + return stack; +} static void __init setup_lowcore(void) { - struct _lowcore *lc; + struct lowcore *lc, *abs_lc; /* * Setup lowcore for boot cpu */ - BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); - lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); - lc->restart_psw.mask = psw_kernel_bits; - lc->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - lc->external_new_psw.mask = psw_kernel_bits | - PSW_MASK_DAT | PSW_MASK_MCHECK; - lc->external_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | - PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits | - PSW_MASK_DAT | PSW_MASK_MCHECK; - lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; - lc->mcck_new_psw.mask = psw_kernel_bits; - lc->mcck_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits | - PSW_MASK_DAT | PSW_MASK_MCHECK; - lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; - lc->clock_comparator = -1ULL; - lc->kernel_stack = ((unsigned long) &init_thread_union) - + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->async_stack = (unsigned long) - __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) - + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->panic_stack = (unsigned long) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) - + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->current_task = (unsigned long) init_thread_union.thread_info.task; - lc->thread_info = (unsigned long) &init_thread_union; - lc->machine_flags = S390_lowcore.machine_flags; - lc->stfl_fac_list = S390_lowcore.stfl_fac_list; - memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - lc->extended_save_area_addr = (__u32) - __alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0); - /* enable extended save area */ - __ctl_set_bit(14, 29); - } -#else - lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; -#endif - lc->sync_enter_timer = S390_lowcore.sync_enter_timer; - lc->async_enter_timer = S390_lowcore.async_enter_timer; - lc->exit_timer = S390_lowcore.exit_timer; - lc->user_timer = S390_lowcore.user_timer; - lc->system_timer = S390_lowcore.system_timer; - lc->steal_timer = S390_lowcore.steal_timer; - lc->last_update_timer = S390_lowcore.last_update_timer; - lc->last_update_clock = S390_lowcore.last_update_clock; - lc->ftrace_func = S390_lowcore.ftrace_func; - - restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); - restart_stack += ASYNC_SIZE; - + BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE); + lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc)); + if (!lc) + panic("%s: Failed to allocate %zu bytes align=%zx\n", + __func__, sizeof(*lc), sizeof(*lc)); + + lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0); + lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; + lc->restart_psw.addr = __pa(restart_int_handler); + lc->external_new_psw.mask = PSW_KERNEL_BITS; + lc->external_new_psw.addr = (unsigned long) ext_int_handler; + lc->svc_new_psw.mask = PSW_KERNEL_BITS; + lc->svc_new_psw.addr = (unsigned long) system_call; + lc->program_new_psw.mask = PSW_KERNEL_BITS; + lc->program_new_psw.addr = (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = PSW_KERNEL_BITS; + lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; + lc->io_new_psw.mask = PSW_KERNEL_BITS; + lc->io_new_psw.addr = (unsigned long) io_int_handler; + lc->clock_comparator = clock_comparator_max; + lc->current_task = (unsigned long)&init_task; + lc->lpp = LPP_MAGIC; + lc->preempt_count = get_lowcore()->preempt_count; + nmi_alloc_mcesa_early(&lc->mcesad); + lc->sys_enter_timer = get_lowcore()->sys_enter_timer; + lc->exit_timer = get_lowcore()->exit_timer; + lc->user_timer = get_lowcore()->user_timer; + lc->system_timer = get_lowcore()->system_timer; + lc->steal_timer = get_lowcore()->steal_timer; + lc->last_update_timer = get_lowcore()->last_update_timer; + lc->last_update_clock = get_lowcore()->last_update_clock; + /* + * Allocate the global restart stack which is the same for + * all CPUs in case *one* of them does a PSW restart. + */ + restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET); + lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET; + lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET; + lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET; + lc->kernel_stack = get_lowcore()->kernel_stack; /* * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant - * restart data to the absolute zero lowcore. This is necesary if + * restart data to the absolute zero lowcore. This is necessary if * PSW restart is done on an offline CPU that has lowcore zero. */ lc->restart_stack = (unsigned long) restart_stack; lc->restart_fn = (unsigned long) do_restart; lc->restart_data = 0; - lc->restart_source = -1UL; - - /* Setup absolute zero lowcore */ - mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); - mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); - mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); - mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); - mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); - - set_prefix((u32)(unsigned long) lc); + lc->restart_source = -1U; + lc->spinlock_lockval = arch_spin_lockval(0); + lc->spinlock_index = 0; + arch_spin_lock_setup(0); + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); + lc->preempt_count = PREEMPT_DISABLED; + lc->kernel_asce = get_lowcore()->kernel_asce; + lc->user_asce = get_lowcore()->user_asce; + + system_ctlreg_init_save_area(lc); + abs_lc = get_abs_lowcore(); + abs_lc->restart_stack = lc->restart_stack; + abs_lc->restart_fn = lc->restart_fn; + abs_lc->restart_data = lc->restart_data; + abs_lc->restart_source = lc->restart_source; + abs_lc->restart_psw = lc->restart_psw; + abs_lc->restart_flags = RESTART_FLAG_CTLREGS; + abs_lc->program_new_psw = lc->program_new_psw; + abs_lc->mcesad = lc->mcesad; + put_abs_lowcore(abs_lc); + + set_prefix(__pa(lc)); lowcore_ptr[0] = lc; + if (abs_lowcore_map(0, lowcore_ptr[0], false)) + panic("Couldn't setup absolute lowcore"); } static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata *standard_resources[] = { @@ -451,33 +490,29 @@ static struct resource __initdata *standard_resources[] = { static void __init setup_resources(void) { struct resource *res, *std_res, *sub_res; - int i, j; - - code_resource.start = (unsigned long) &_text; - code_resource.end = (unsigned long) &_etext - 1; - data_resource.start = (unsigned long) &_etext; - data_resource.end = (unsigned long) &_edata - 1; - bss_resource.start = (unsigned long) &__bss_start; - bss_resource.end = (unsigned long) &__bss_stop - 1; - - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (!memory_chunk[i].size) - continue; - res = alloc_bootmem_low(sizeof(*res)); - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; - switch (memory_chunk[i].type) { - case CHUNK_READ_WRITE: - res->name = "System RAM"; - break; - case CHUNK_READ_ONLY: - res->name = "System ROM"; - res->flags |= IORESOURCE_READONLY; - break; - default: - res->name = "reserved"; - } - res->start = memory_chunk[i].addr; - res->end = res->start + memory_chunk[i].size - 1; + phys_addr_t start, end; + int j; + u64 i; + + code_resource.start = __pa_symbol(_text); + code_resource.end = __pa_symbol(_etext) - 1; + data_resource.start = __pa_symbol(_etext); + data_resource.end = __pa_symbol(_edata) - 1; + bss_resource.start = __pa_symbol(__bss_start); + bss_resource.end = __pa_symbol(__bss_stop) - 1; + + for_each_mem_range(i, &start, &end) { + res = memblock_alloc_or_panic(sizeof(*res), 8); + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; + + res->name = "System RAM"; + res->start = start; + /* + * In memblock, end points to the first byte after the + * range while in resources, end points to the last byte in + * the range. + */ + res->end = end - 1; request_resource(&iomem_resource, res); for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { @@ -486,7 +521,7 @@ static void __init setup_resources(void) std_res->start > res->end) continue; if (std_res->end > res->end) { - sub_res = alloc_bootmem_low(sizeof(*sub_res)); + sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8); *sub_res = *std_res; sub_res->end = res->end; std_res->start = res->end + 1; @@ -496,203 +531,47 @@ static void __init setup_resources(void) } } } -} - -static void __init setup_memory_end(void) -{ - unsigned long vmax, vmalloc_size, tmp; - unsigned long real_memory_size = 0; - int i; - - -#ifdef CONFIG_ZFCPDUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) { - memory_end = ZFCPDUMP_HSA_SIZE; - memory_end_set = 1; - } -#endif - memory_end &= PAGE_MASK; - +#ifdef CONFIG_CRASH_DUMP /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. + * Re-add removed crash kernel memory as reserved memory. This makes + * sure it will be mapped with the identity mapping and struct pages + * will be created, so it can be resized later on. + * However add it later since the crash kernel resource should not be + * part of the System RAM resource. */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - unsigned long start, end; - struct mem_chunk *chunk; - unsigned long align; - - chunk = &memory_chunk[i]; - if (!chunk->size) - continue; - align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); - start = (chunk->addr + align - 1) & ~(align - 1); - end = (chunk->addr + chunk->size) & ~(align - 1); - if (start >= end) - memset(chunk, 0, sizeof(*chunk)); - else { - chunk->addr = start; - chunk->size = end - start; - } - real_memory_size = max(real_memory_size, - chunk->addr + chunk->size); + if (crashk_res.end) { + memblock_add_node(crashk_res.start, resource_size(&crashk_res), + 0, MEMBLOCK_NONE); + memblock_reserve(crashk_res.start, resource_size(&crashk_res)); + insert_resource(&iomem_resource, &crashk_res); } - - /* Choose kernel address space layout: 2, 3, or 4 levels. */ -#ifdef CONFIG_64BIT - vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; - tmp = (memory_end ?: real_memory_size) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; - if (tmp <= (1UL << 42)) - vmax = 1UL << 42; /* 3-level kernel page table */ - else - vmax = 1UL << 53; /* 4-level kernel page table */ - /* module area is at the end of the kernel address space. */ - MODULES_END = vmax; - MODULES_VADDR = MODULES_END - MODULES_LEN; - VMALLOC_END = MODULES_VADDR; -#else - vmalloc_size = VMALLOC_END ?: 96UL << 20; - vmax = 1UL << 31; /* 2-level kernel page table */ - /* vmalloc area is at the end of the kernel address space. */ - VMALLOC_END = vmax; #endif - VMALLOC_START = vmax - vmalloc_size; - - /* Split remaining virtual space between 1:1 mapping & vmemmap array */ - tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); - /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ - tmp = SECTION_ALIGN_UP(tmp); - tmp = VMALLOC_START - tmp * sizeof(struct page); - tmp &= ~((vmax >> 11) - 1); /* align to page table level */ - tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); - vmemmap = (struct page *) tmp; - - /* Take care that memory_end is set and <= vmemmap */ - memory_end = min(memory_end ?: real_memory_size, tmp); - - /* Fixup memory chunk array to fit into 0..memory_end */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - struct mem_chunk *chunk = &memory_chunk[i]; - - if (!chunk->size) - continue; - if (chunk->addr >= memory_end) { - memset(chunk, 0, sizeof(*chunk)); - continue; - } - if (chunk->addr + chunk->size > memory_end) - chunk->size = memory_end - chunk->addr; - } } -static void __init setup_vmcoreinfo(void) +static void __init setup_memory_end(void) { - mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); + max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); + pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } #ifdef CONFIG_CRASH_DUMP /* - * Find suitable location for crashkernel memory - */ -static unsigned long __init find_crash_base(unsigned long crash_size, - char **msg) -{ - unsigned long crash_base; - struct mem_chunk *chunk; - int i; - - if (memory_chunk[0].size < crash_size) { - *msg = "first memory chunk must be at least crashkernel size"; - return 0; - } - if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) - return OLDMEM_BASE; - - for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { - chunk = &memory_chunk[i]; - if (chunk->size == 0) - continue; - if (chunk->type != CHUNK_READ_WRITE) - continue; - if (chunk->size < crash_size) - continue; - crash_base = (chunk->addr + chunk->size) - crash_size; - if (crash_base < crash_size) - continue; - if (crash_base < ZFCPDUMP_HSA_SIZE_MAX) - continue; - if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) - continue; - return crash_base; - } - *msg = "no suitable area found"; - return 0; -} - -/* - * Check if crash_base and crash_size is valid - */ -static int __init verify_crash_base(unsigned long crash_base, - unsigned long crash_size, - char **msg) -{ - struct mem_chunk *chunk; - int i; - - /* - * Because we do the swap to zero, we must have at least 'crash_size' - * bytes free space before crash_base - */ - if (crash_size > crash_base) { - *msg = "crashkernel offset must be greater than size"; - return -EINVAL; - } - - /* First memory chunk must be at least crash_size */ - if (memory_chunk[0].size < crash_size) { - *msg = "first memory chunk must be at least crashkernel size"; - return -EINVAL; - } - /* Check if we fit into the respective memory chunk */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - chunk = &memory_chunk[i]; - if (chunk->size == 0) - continue; - if (crash_base < chunk->addr) - continue; - if (crash_base >= chunk->addr + chunk->size) - continue; - /* we have found the memory chunk */ - if (crash_base + crash_size > chunk->addr + chunk->size) { - *msg = "selected memory chunk is too small for " - "crashkernel memory"; - return -EINVAL; - } - return 0; - } - *msg = "invalid memory range specified"; - return -EINVAL; -} - -/* - * When kdump is enabled, we have to ensure that no memory from - * the area [0 - crashkernel memory size] and - * [crashk_res.start - crashk_res.end] is set offline. + * When kdump is enabled, we have to ensure that no memory from the area + * [0 - crashkernel memory size] is set offline - it will be exchanged with + * the crashkernel memory region when kdump is triggered. The crashkernel + * memory region can never get offlined (pages are unmovable). */ static int kdump_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct memory_notify *arg = data; + if (action != MEM_GOING_OFFLINE) + return NOTIFY_OK; if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) return NOTIFY_BAD; - if (arg->start_pfn > PFN_DOWN(crashk_res.end)) - return NOTIFY_OK; - if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) - return NOTIFY_OK; - return NOTIFY_BAD; + return NOTIFY_OK; } static struct notifier_block kdump_mem_nb = { @@ -702,24 +581,15 @@ static struct notifier_block kdump_mem_nb = { #endif /* - * Make sure that oldmem, where the dump is stored, is protected + * Reserve page tables created by decompressor */ -static void reserve_oldmem(void) +static void __init reserve_pgtables(void) { -#ifdef CONFIG_CRASH_DUMP - unsigned long real_size = 0; - int i; + unsigned long start, end; + struct reserved_range *range; - if (!OLDMEM_BASE) - return; - for (i = 0; i < MEMORY_CHUNKS; i++) { - struct mem_chunk *chunk = &memory_chunk[i]; - - real_size = max(real_size, chunk->addr + chunk->size); - } - create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); - create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); -#endif + for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end) + memblock_reserve(start, end - start); } /* @@ -729,277 +599,297 @@ static void __init reserve_crashkernel(void) { #ifdef CONFIG_CRASH_DUMP unsigned long long crash_base, crash_size; - char *msg = NULL; + phys_addr_t low, high; int rc; - rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, - &crash_base); - if (rc || crash_size == 0) - return; + rc = parse_crashkernel(boot_command_line, ident_map_size, + &crash_size, &crash_base, NULL, NULL, NULL); + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); - if (register_memory_notifier(&kdump_mem_nb)) + if (rc || crash_size == 0) return; - if (!crash_base) - crash_base = find_crash_base(crash_size, &msg); + + if (memblock.memory.regions[0].size < crash_size) { + pr_info("crashkernel reservation failed: %s\n", + "first memory chunk must be at least crashkernel size"); + return; + } + + low = crash_base ?: oldmem_data.start; + high = low + crash_size; + if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) { + /* The crashkernel fits into OLDMEM, reuse OLDMEM */ + crash_base = low; + } else { + /* Find suitable area in free memory */ + low = max_t(unsigned long, crash_size, sclp.hsa_size); + high = crash_base ? crash_base + crash_size : ULONG_MAX; + + if (crash_base && crash_base < low) { + pr_info("crashkernel reservation failed: %s\n", + "crash_base too low"); + return; + } + low = crash_base ?: low; + crash_base = memblock_phys_alloc_range(crash_size, + KEXEC_CRASH_MEM_ALIGN, + low, high); + } + if (!crash_base) { - pr_info("crashkernel reservation failed: %s\n", msg); - unregister_memory_notifier(&kdump_mem_nb); + pr_info("crashkernel reservation failed: %s\n", + "no suitable area found"); return; } - if (verify_crash_base(crash_base, crash_size, &msg)) { - pr_info("crashkernel reservation failed: %s\n", msg); - unregister_memory_notifier(&kdump_mem_nb); + + if (register_memory_notifier(&kdump_mem_nb)) { + memblock_phys_free(crash_base, crash_size); return; } - if (!OLDMEM_BASE && MACHINE_IS_VM) + + if (!oldmem_data.start && machine_is_vm()) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - insert_resource(&iomem_resource, &crashk_res); - create_mem_hole(memory_chunk, crash_base, crash_size); + memblock_remove(crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", - crash_size >> 20, crash_base >> 20, memory_end >> 20); + crash_size >> 20, crash_base >> 20, + (unsigned long)memblock.memory.total_size >> 20); os_info_crashkernel_add(crash_base, crash_size); #endif } -static void __init setup_memory(void) +/* + * Reserve the initrd from being used by memblock + */ +static void __init reserve_initrd(void) { - unsigned long bootmap_size; - unsigned long start_pfn, end_pfn; - int i; + unsigned long addr, size; - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - start_pfn = PFN_UP(__pa(&_end)); - end_pfn = max_pfn = PFN_DOWN(memory_end); + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size)) + return; + initrd_start = (unsigned long)__va(addr); + initrd_end = initrd_start + size; + memblock_reserve(addr, size); +} -#ifdef CONFIG_BLK_DEV_INITRD - /* - * Move the initrd in case the bitmap of the bootmem allocater - * would overwrite it. - */ +/* + * Reserve the memory area used to pass the certificate lists + */ +static void __init reserve_certificate_list(void) +{ + if (ipl_cert_list_addr) + memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size); +} - if (INITRD_START && INITRD_SIZE) { - unsigned long bmap_size; - unsigned long start; +static void __init reserve_physmem_info(void) +{ + unsigned long addr, size; - bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1); - bmap_size = PFN_PHYS(bmap_size); + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) + memblock_reserve(addr, size); +} - if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { - start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; +static void __init free_physmem_info(void) +{ + unsigned long addr, size; -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) { - /* Move initrd behind kdump oldmem */ - if (start + INITRD_SIZE > OLDMEM_BASE && - start < OLDMEM_BASE + OLDMEM_SIZE) - start = OLDMEM_BASE + OLDMEM_SIZE; - } -#endif - if (start + INITRD_SIZE > memory_end) { - pr_err("initrd extends beyond end of " - "memory (0x%08lx > 0x%08lx) " - "disabling initrd\n", - start + INITRD_SIZE, memory_end); - INITRD_START = INITRD_SIZE = 0; - } else { - pr_info("Moving initrd (0x%08lx -> " - "0x%08lx, size: %ld)\n", - INITRD_START, start, INITRD_SIZE); - memmove((void *) start, (void *) INITRD_START, - INITRD_SIZE); - INITRD_START = start; - } - } - } -#endif + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) + memblock_phys_free(addr, size); +} - /* - * Initialize the boot-time allocator - */ - bootmap_size = init_bootmem(start_pfn, end_pfn); +static void __init memblock_add_physmem_info(void) +{ + unsigned long start, end; + int i; - /* - * Register RAM areas with the bootmem allocator. - */ + pr_debug("physmem info source: %s (%hhd)\n", + get_physmem_info_source(), physmem_info.info_source); + /* keep memblock lists close to the kernel */ + memblock_set_bottom_up(true); + for_each_physmem_usable_range(i, &start, &end) + memblock_add(start, end - start); + for_each_physmem_online_range(i, &start, &end) + memblock_physmem_add(start, end - start); + memblock_set_bottom_up(false); + memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); +} - for (i = 0; i < MEMORY_CHUNKS; i++) { - unsigned long start_chunk, end_chunk, pfn; - - if (!memory_chunk[i].size) - continue; - start_chunk = PFN_DOWN(memory_chunk[i].addr); - end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); - end_chunk = min(end_chunk, end_pfn); - if (start_chunk >= end_chunk) - continue; - memblock_add_node(PFN_PHYS(start_chunk), - PFN_PHYS(end_chunk - start_chunk), 0); - pfn = max(start_chunk, start_pfn); - storage_key_init_range(PFN_PHYS(pfn), PFN_PHYS(end_chunk)); +static void __init setup_high_memory(void) +{ + high_memory = __va(ident_map_size); +} + +/* + * Reserve memory used for lowcore. + */ +static void __init reserve_lowcore(void) +{ + void *lowcore_start = get_lowcore(); + void *lowcore_end = lowcore_start + sizeof(struct lowcore); + void *start, *end; + + if (absolute_pointer(__identity_base) < lowcore_end) { + start = max(lowcore_start, (void *)__identity_base); + end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); + memblock_reserve(__pa(start), __pa(end)); } +} - psw_set_key(PAGE_DEFAULT_KEY); +/* + * Reserve memory used for absolute lowcore/command line/kernel image. + */ +static void __init reserve_kernel(void) +{ + memblock_reserve(0, STARTUP_NORMAL_OFFSET); + memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); + memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); + memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31); + memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); + memblock_reserve(__pa(_stext), _end - _stext); +} - free_bootmem_with_active_regions(0, max_pfn); +static void __init setup_memory(void) +{ + phys_addr_t start, end; + u64 i; /* - * Reserve memory used for lowcore/command line/kernel image. + * Init storage key for present memory */ - reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT); - reserve_bootmem((unsigned long)_stext, - PFN_PHYS(start_pfn) - (unsigned long)_stext, - BOOTMEM_DEFAULT); - /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ - reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, - BOOTMEM_DEFAULT); + for_each_mem_range(i, &start, &end) + storage_key_init_range(start, end); -#ifdef CONFIG_CRASH_DUMP - if (crashk_res.start) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1, - BOOTMEM_DEFAULT); - if (is_kdump_kernel()) - reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, - PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); -#endif -#ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE) { - if (INITRD_START + INITRD_SIZE <= memory_end) { - reserve_bootmem(INITRD_START, INITRD_SIZE, - BOOTMEM_DEFAULT); - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - } else { - pr_err("initrd extends beyond end of " - "memory (0x%08lx > 0x%08lx) " - "disabling initrd\n", - initrd_start + INITRD_SIZE, memory_end); - initrd_start = initrd_end = 0; - } - } -#endif + psw_set_key(PAGE_DEFAULT_KEY); +} + +static void __init relocate_amode31_section(void) +{ + unsigned long amode31_size = __eamode31 - __samode31; + long amode31_offset, *ptr; + + amode31_offset = AMODE31_START - (unsigned long)__samode31; + pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); + + /* Move original AMODE31 section to the new one */ + memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size); + /* Zero out the old AMODE31 section to catch invalid accesses within it */ + memset(__samode31, 0, amode31_size); + + /* Update all AMODE31 region references */ + for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) + *ptr += amode31_offset; +} + +/* This must be called after AMODE31 relocation */ +static void __init setup_cr(void) +{ + union ctlreg2 cr2; + union ctlreg5 cr5; + union ctlreg15 cr15; + + __ctl_duct[1] = (unsigned long)__ctl_aste; + __ctl_duct[2] = (unsigned long)__ctl_aste; + __ctl_duct[4] = (unsigned long)__ctl_duald; + + /* Update control registers CR2, CR5 and CR15 */ + local_ctl_store(2, &cr2.reg); + local_ctl_store(5, &cr5.reg); + local_ctl_store(15, &cr15.reg); + cr2.ducto = (unsigned long)__ctl_duct >> 6; + cr5.pasteo = (unsigned long)__ctl_duct >> 6; + cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; + system_ctl_load(2, &cr2.reg); + system_ctl_load(5, &cr5.reg); + system_ctl_load(15, &cr15.reg); } /* - * Setup hardware capabilities. + * Add system information as device randomness */ -static void __init setup_hwcaps(void) +static void __init setup_randomness(void) { - static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - struct cpuid cpu_id; - int i; + struct sysinfo_3_2_2 *vmms; - /* - * The store facility list bits numbers as found in the principles - * of operation are numbered with bit 1UL<<31 as number 0 to - * bit 1UL<<0 as number 31. - * Bit 0: instructions named N3, "backported" to esa-mode - * Bit 2: z/Architecture mode is active - * Bit 7: the store-facility-list-extended facility is installed - * Bit 17: the message-security assist is installed - * Bit 19: the long-displacement facility is installed - * Bit 21: the extended-immediate facility is installed - * Bit 22: extended-translation facility 3 is installed - * Bit 30: extended-translation facility 3 enhancement facility - * These get translated to: - * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, - * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, - * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and - * HWCAP_S390_ETF3EH bit 8 (22 && 30). - */ - for (i = 0; i < 6; i++) - if (test_facility(stfl_bits[i])) - elf_hwcap |= 1UL << i; + vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); + if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) + add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); + memblock_free(vmms, PAGE_SIZE); - if (test_facility(22) && test_facility(30)) - elf_hwcap |= HWCAP_S390_ETF3EH; + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); +} - /* - * Check for additional facilities with store-facility-list-extended. - * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 - * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information - * as stored by stfl, bits 32-xxx contain additional facilities. - * How many facility words are stored depends on the number of - * doublewords passed to the instruction. The additional facilities - * are: - * Bit 42: decimal floating point facility is installed - * Bit 44: perform floating point operation facility is installed - * translated to: - * HWCAP_S390_DFP bit 6 (42 && 44). - */ - if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) - elf_hwcap |= HWCAP_S390_DFP; +/* + * Issue diagnose 318 to set the control program name and + * version codes. + */ +static void __init setup_control_program_code(void) +{ + union diag318_info diag318_info = { + .cpnc = CPNC_LINUX, + .cpvc = 0, + }; - /* - * Huge page support HWCAP_S390_HPAGE is bit 7. - */ - if (MACHINE_HAS_HPAGE) - elf_hwcap |= HWCAP_S390_HPAGE; + if (!sclp.has_diag318) + return; -#if defined(CONFIG_64BIT) - /* - * 64-bit register support for 31-bit processes - * HWCAP_S390_HIGH_GPRS is bit 9. - */ - elf_hwcap |= HWCAP_S390_HIGH_GPRS; + diag_stat_inc(DIAG_STAT_X318); + asm volatile("diag %0,0,0x318" : : "d" (diag318_info.val)); +} - /* - * Transactional execution support HWCAP_S390_TE is bit 10. - */ - if (test_facility(50) && test_facility(73)) - elf_hwcap |= HWCAP_S390_TE; -#endif +/* + * Print the component list from the IPL report + */ +static void __init log_component_list(void) +{ + struct ipl_rb_component_entry *ptr, *end; + char *str; - get_cpu_id(&cpu_id); - switch (cpu_id.machine) { - case 0x9672: -#if !defined(CONFIG_64BIT) - default: /* Use "g5" as default for 31 bit kernels. */ -#endif - strcpy(elf_platform, "g5"); - break; - case 0x2064: - case 0x2066: -#if defined(CONFIG_64BIT) - default: /* Use "z900" as default for 64 bit kernels. */ -#endif - strcpy(elf_platform, "z900"); - break; - case 0x2084: - case 0x2086: - strcpy(elf_platform, "z990"); - break; - case 0x2094: - case 0x2096: - strcpy(elf_platform, "z9-109"); - break; - case 0x2097: - case 0x2098: - strcpy(elf_platform, "z10"); - break; - case 0x2817: - case 0x2818: - strcpy(elf_platform, "z196"); - break; - case 0x2827: - strcpy(elf_platform, "zEC12"); - break; + if (!early_ipl_comp_list_addr) + return; + if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) + pr_info("Linux is running with Secure-IPL enabled\n"); + else + pr_info("Linux is running with Secure-IPL disabled\n"); + ptr = __va(early_ipl_comp_list_addr); + end = (void *) ptr + early_ipl_comp_list_size; + pr_info("The IPL report contains the following components:\n"); + while (ptr < end) { + if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) { + if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED) + str = "signed, verified"; + else + str = "signed, verification failed"; + } else { + str = "not signed"; + } + pr_info("%016llx - %016llx (%s)\n", + ptr->addr, ptr->addr + ptr->len, str); + ptr++; } } /* + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. + */ +static void __init print_rb_entry(const char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); + + buf = skip_timestamp(printk_skip_level(buf)); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) + return; + + fmt[1] = level; + printk(fmt, buf); +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -1009,27 +899,23 @@ void __init setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ -#ifndef CONFIG_64BIT - if (MACHINE_IS_VM) - pr_info("Linux is running as a z/VM " - "guest operating system in 31-bit mode\n"); - else if (MACHINE_IS_LPAR) - pr_info("Linux is running natively in 31-bit mode\n"); - if (MACHINE_HAS_IEEE) - pr_info("The hardware system has IEEE compatible " - "floating point units\n"); - else - pr_info("The hardware system has no IEEE compatible " - "floating point units\n"); -#else /* CONFIG_64BIT */ - if (MACHINE_IS_VM) + if (machine_is_vm()) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) + else if (machine_is_kvm()) pr_info("Linux is running under KVM in 64-bit mode\n"); - else if (MACHINE_IS_LPAR) + else if (machine_is_lpar()) pr_info("Linux is running natively in 64-bit mode\n"); -#endif /* CONFIG_64BIT */ + else + pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); + + if (machine_has_relocated_lowcore()) + pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); + + log_component_list(); /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ @@ -1037,46 +923,96 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = Root_RAM0; - init_mm.start_code = PAGE_OFFSET; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; + setup_initial_init_mm(_text, _etext, _edata, _end); - if (MACHINE_HAS_MVCOS) - memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess)); - else - memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); + if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) + nospec_auto_detect(); + jump_label_init(); parse_early_param(); - detect_memory_layout(memory_chunk, memory_end); +#ifdef CONFIG_CRASH_DUMP + /* Deactivate elfcorehdr= kernel parameter */ + elfcorehdr_addr = ELFCORE_ADDR_MAX; +#endif + os_info_init(); setup_ipl(); - reserve_oldmem(); + setup_control_program_code(); + + /* Do some memory reservations *before* memory is added to memblock */ + reserve_pgtables(); + reserve_lowcore(); + reserve_kernel(); + reserve_initrd(); + reserve_certificate_list(); + reserve_physmem_info(); + memblock_set_current_limit(ident_map_size); + memblock_allow_resize(); + + /* Get information about *all* installed memory */ + memblock_add_physmem_info(); + + free_physmem_info(); setup_memory_end(); - setup_addressing_mode(); - reserve_crashkernel(); + setup_high_memory(); + memblock_dump_all(); setup_memory(); - setup_resources(); - setup_vmcoreinfo(); - setup_lowcore(); - cpu_init(); - s390_init_cpu_topology(); + relocate_amode31_section(); + setup_cr(); + setup_uv(); + dma_contiguous_reserve(ident_map_size); + vmcp_cma_reserve(); + if (cpu_has_edat2()) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); + reserve_crashkernel(); +#ifdef CONFIG_CRASH_DUMP /* - * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). + * Be aware that smp_save_dump_secondary_cpus() triggers a system reset. + * Therefore CPU and device initialization should be done afterwards. */ - setup_hwcaps(); + smp_save_dump_secondary_cpus(); +#endif + setup_resources(); + setup_lowcore(); + smp_fill_possible_mask(); + cpu_detect_mhz_feature(); + cpu_init(); + numa_setup(); + smp_detect_cpus(); + topology_init_early(); + setup_protection_map(); /* - * Create kernel page tables and switch to virtual addressing. + * Create kernel page tables. */ paging_init(); + /* + * After paging_init created the kernel page table, the new PSWs + * in lowcore can now run with DAT enabled. + */ +#ifdef CONFIG_CRASH_DUMP + smp_save_dump_ipl_cpu(); +#endif + /* Setup default console */ conmode_default(); set_preferred_console(); - /* Setup zfcpdump support */ + apply_alternative_instructions(); + if (IS_ENABLED(CONFIG_EXPOLINE)) + nospec_init_branches(); + + /* Setup zfcp/nvme dump support */ setup_zfcpdump(); + + /* Add system specific data to the random pool */ + setup_randomness(); +} + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index c45becf82e01..4874de5edea0 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 1999, 2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) @@ -10,10 +11,13 @@ */ #include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/rseq.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/kernel.h> #include <linux/signal.h> +#include <linux/entry-common.h> #include <linux/errno.h> #include <linux/wait.h> #include <linux/ptrace.h> @@ -22,75 +26,135 @@ #include <linux/tty.h> #include <linux/personality.h> #include <linux/binfmts.h> -#include <linux/tracehook.h> #include <linux/syscalls.h> -#include <linux/compat.h> #include <asm/ucontext.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <asm/vdso-symbols.h> +#include <asm/access-regs.h> #include <asm/lowcore.h> -#include <asm/switch_to.h> #include "entry.h" -typedef struct +/* + * Layout of an old-style signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | struct sigcontext | + * | oldmask | + * | _sigregs * | + * ----------------------------------------- + * | _sigregs with | + * | _s390_regs_common | + * | _s390_fp_regs | + * ----------------------------------------- + * | int signo | + * ----------------------------------------- + * | _sigregs_ext with | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt) | + * | reserved 128 byte (opt) | + * ----------------------------------------- + * | __u16 svc_insn | + * ----------------------------------------- + * The svc_insn entry with the sigreturn system call opcode does not + * have a fixed position and moves if gprs_high or vxrs exist. + * Future extensions will be added to _sigregs_ext. + */ +struct sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; struct sigcontext sc; _sigregs sregs; int signo; - __u8 retcode[S390_SYSCALL_SIZE]; -} sigframe; + _sigregs_ext sregs_ext; + __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ +}; -typedef struct +/* + * Layout of an rt signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | svc __NR_rt_sigreturn 2 byte | + * ----------------------------------------- + * | struct siginfo | + * ----------------------------------------- + * | struct ucontext_extended with | + * | unsigned long uc_flags | + * | struct ucontext *uc_link | + * | stack_t uc_stack | + * | _sigregs uc_mcontext with | + * | _s390_regs_common | + * | _s390_fp_regs | + * | sigset_t uc_sigmask | + * | _sigregs_ext uc_mcontext_ext | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt)| + * | reserved 128 byte (opt) | + * ----------------------------------------- + * Future extensions will be added to _sigregs_ext. + */ +struct rt_sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; - __u8 retcode[S390_SYSCALL_SIZE]; + __u16 svc_insn; struct siginfo info; - struct ucontext uc; -} rt_sigframe; + struct ucontext_extended uc; +}; + +/* Store registers needed to create the signal frame */ +static void store_sigregs(void) +{ + save_access_regs(current->thread.acrs); + save_user_fpu_regs(); +} + +/* Load registers after signal return */ +static void load_sigregs(void) +{ + restore_access_regs(current->thread.acrs); +} /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { _sigregs user_sregs; - save_access_regs(current->thread.acrs); - /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = psw_user_bits | - (regs->psw.mask & PSW_MASK_USER); + user_sregs.regs.psw.mask = PSW_USER_BITS | + (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, - sizeof(sregs->regs.acrs)); - /* - * We have to store the fp registers to current->thread.fp_regs - * to merge them with the emulated registers. - */ - save_fp_regs(¤t->thread.fp_regs); - memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, - sizeof(s390_fp_regs)); - return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs)); + sizeof(user_sregs.regs.acrs)); + fpregs_store(&user_sregs.fpregs, ¤t->thread.ufpu); + if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) + return -EFAULT; + return 0; } -/* Returns positive number on error */ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { - int err; _sigregs user_sregs; - /* Alwys make any pending restarted system call return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + /* Always make any pending restarted system call return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; - err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); - if (err) - return err; - /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ - regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (user_sregs.regs.psw.mask & PSW_MASK_USER); + if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) + return -EFAULT; + + if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI)) + return -EINVAL; + + /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | + (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); /* Check for invalid user address space control. */ - if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC)) - regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) | + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) + regs->psw.mask = PSW_ASC_PRIMARY | (regs->psw.mask & ~PSW_MASK_ASC); /* Check for invalid amode */ if (regs->psw.mask & PSW_MASK_EA) @@ -98,60 +162,102 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, - sizeof(sregs->regs.acrs)); - restore_access_regs(current->thread.acrs); + sizeof(current->thread.acrs)); + + fpregs_load(&user_sregs.fpregs, ¤t->thread.ufpu); + + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + return 0; +} - memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, - sizeof(s390_fp_regs)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; +/* Returns non-zero on fault. */ +static int save_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Save vector registers to signal stack */ + if (cpu_has_vx()) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = current->thread.ufpu.vxrs[i].low; + if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, + sizeof(sregs_ext->vxrs_low)) || + __copy_to_user(&sregs_ext->vxrs_high, + current->thread.ufpu.vxrs + __NUM_VXRS_LOW, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + } + return 0; +} - restore_fp_regs(¤t->thread.fp_regs); - clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ +static int restore_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Restore vector registers from signal stack */ + if (cpu_has_vx()) { + if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, + sizeof(sregs_ext->vxrs_low)) || + __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW, + &sregs_ext->vxrs_high, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + for (i = 0; i < __NUM_VXRS_LOW; i++) + current->thread.ufpu.vxrs[i].low = vxrs[i]; + } return 0; } SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); - sigframe __user *frame = (sigframe __user *)regs->gprs[15]; + struct sigframe __user *frame = + (struct sigframe __user *) regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; set_current_blocked(&set); + save_user_fpu_regs(); if (restore_sigregs(regs, &frame->sregs)) goto badframe; + if (restore_sigregs_ext(regs, &frame->sregs_ext)) + goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); - rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; + save_user_fpu_regs(); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - if (restore_altstack(&frame->uc.uc_stack)) + if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } /* - * Set up a signal frame. - */ - - -/* * Determine which stack to use.. */ static inline void __user * @@ -175,60 +281,63 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) return (void __user *)((sp - frame_size) & -8ul); } -static inline int map_signal(int sig) -{ - if (current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32) - return current_thread_info()->exec_domain->signal_invmap[sig]; - else - return sig; -} - static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { - sigframe __user *frame; - - frame = get_sigframe(ka, regs, sizeof(sigframe)); + struct sigframe __user *frame; + struct sigcontext sc; + unsigned long restorer; + size_t frame_size; + frame_size = sizeof(*frame) - sizeof(frame->sregs_ext); + if (cpu_has_vx()) + frame_size += sizeof(frame->sregs_ext); + frame = get_sigframe(ka, regs, frame_size); if (frame == (void __user *) -1UL) - goto give_sigsegv; + return -EFAULT; + + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) + return -EFAULT; + + /* Create struct sigcontext on the signal stack */ + memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE); + sc.sregs = (_sigregs __user __force *) &frame->sregs; + if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) + return -EFAULT; - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE)) - goto give_sigsegv; + /* Store registers needed to create the signal frame */ + store_sigregs(); + /* Create _sigregs on the signal stack */ if (save_sigregs(regs, &frame->sregs)) - goto give_sigsegv; - if (__put_user(&frame->sregs, &frame->sc.sregs)) - goto give_sigsegv; + return -EFAULT; + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + return -EFAULT; + + /* Create _sigregs_ext on the signal stack */ + if (save_sigregs_ext(regs, &frame->sregs_ext)) + return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) - ka->sa.sa_restorer | PSW_ADDR_AMODE; - } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) - goto give_sigsegv; - } - - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) - goto give_sigsegv; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = (unsigned long) ka->sa.sa_restorer; + else + restorer = VDSO_SYMBOL(current, sigreturn); /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | - (psw_user_bits & PSW_MASK_ASC) | + (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) ka->sa.sa_handler; - regs->gprs[2] = map_signal(sig); + regs->gprs[2] = sig; regs->gprs[3] = (unsigned long) &frame->sc; /* We forgot to include these in the sigcontext. @@ -238,93 +347,83 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* set extra registers only for synchronous signals */ regs->gprs[4] = regs->int_code & 127; regs->gprs[5] = regs->int_parm_long; - regs->gprs[6] = task_thread_info(current)->last_break; + regs->gprs[6] = current->thread.last_break; } - - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) - goto give_sigsegv; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } -static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) +static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, + struct pt_regs *regs) { - int err = 0; - rt_sigframe __user *frame; - - frame = get_sigframe(ka, regs, sizeof(rt_sigframe)); - + struct rt_sigframe __user *frame; + unsigned long uc_flags, restorer; + size_t frame_size; + + frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext); + uc_flags = 0; + if (cpu_has_vx()) { + frame_size += sizeof(_sigregs_ext); + uc_flags |= UC_VXRS; + } + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) - goto give_sigsegv; - - if (copy_siginfo_to_user(&frame->info, info)) - goto give_sigsegv; + return -EFAULT; - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(NULL, &frame->uc.uc_link); - err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs(regs, &frame->uc.uc_mcontext); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto give_sigsegv; + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) + return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) - ka->sa.sa_restorer | PSW_ADDR_AMODE; - } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode))) - goto give_sigsegv; - } + if (ksig->ka.sa.sa_flags & SA_RESTORER) + restorer = (unsigned long) ksig->ka.sa.sa_restorer; + else + restorer = VDSO_SYMBOL(current, rt_sigreturn); - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) - goto give_sigsegv; + /* Create siginfo on the signal stack */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create ucontext on the signal stack. */ + if (__put_user(uc_flags, &frame->uc.uc_flags) || + __put_user(NULL, &frame->uc.uc_link) || + __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || + save_sigregs(regs, &frame->uc.uc_mcontext) || + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) + return -EFAULT; /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | - (psw_user_bits & PSW_MASK_ASC) | + (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler; - regs->gprs[2] = map_signal(sig); + regs->gprs[2] = ksig->sig; regs->gprs[3] = (unsigned long) &frame->info; regs->gprs[4] = (unsigned long) &frame->uc; - regs->gprs[5] = task_thread_info(current)->last_break; + regs->gprs[5] = current->thread.last_break; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } -static void handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, - struct pt_regs *regs) +static void handle_signal(struct ksignal *ksig, sigset_t *oldset, + struct pt_regs *regs) { int ret; /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame(sig, ka, info, oldset, regs); + if (ksig->ka.sa.sa_flags & SA_SIGINFO) + ret = setup_rt_frame(ksig, oldset, regs); else - ret = setup_frame(sig, ka, oldset, regs); - if (ret) - return; - signal_delivered(sig, info, ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); + ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs); + + signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP)); } /* @@ -336,11 +435,10 @@ static void handle_signal(unsigned long sig, struct k_sigaction *ka, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -void do_signal(struct pt_regs *regs) + +void arch_do_signal_or_restart(struct pt_regs *regs) { - siginfo_t info; - int signr; - struct k_sigaction ka; + struct ksignal ksig; sigset_t *oldset = sigmask_to_save(); /* @@ -348,14 +446,13 @@ void do_signal(struct pt_regs *regs) * the debugger may change all our registers, including the system * call information. */ - current_thread_info()->system_call = - test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0; - signr = get_signal_to_deliver(&info, &ka, regs, NULL); + current->thread.system_call = + test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (signr > 0) { + if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ - if (current_thread_info()->system_call) { - regs->int_code = current_thread_info()->system_call; + if (current->thread.system_call) { + regs->int_code = current->thread.system_call; /* Check for system call restarting. */ switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: @@ -363,11 +460,11 @@ void do_signal(struct pt_regs *regs) regs->gprs[2] = -EINTR; break; case -ERESTARTSYS: - if (!(ka.sa.sa_flags & SA_RESTART)) { + if (!(ksig.ka.sa.sa_flags & SA_RESTART)) { regs->gprs[2] = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->gprs[2] = regs->orig_gpr2; regs->psw.addr = @@ -377,32 +474,33 @@ void do_signal(struct pt_regs *regs) } } /* No longer in a system call */ - clear_thread_flag(TIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL); - if (is_compat_task()) - handle_signal32(signr, &ka, &info, oldset, regs); - else - handle_signal(signr, &ka, &info, oldset, regs); + rseq_signal_deliver(&ksig, regs); + handle_signal(&ksig, oldset, regs); return; } /* No handlers present - check for system call restart */ - clear_thread_flag(TIF_SYSCALL); - if (current_thread_info()->system_call) { - regs->int_code = current_thread_info()->system_call; + clear_pt_regs_flag(regs, PIF_SYSCALL); + if (current->thread.system_call) { + regs->int_code = current->thread.system_call; switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: /* Restart with sys_restart_syscall */ - regs->int_code = __NR_restart_syscall; - /* fallthrough */ + regs->gprs[2] = regs->orig_gpr2; + current->restart_block.arch_data = regs->psw.addr; + regs->psw.addr = VDSO_SYMBOL(current, restart_syscall); + if (test_thread_flag(TIF_SINGLE_STEP)) + clear_thread_flag(TIF_PER_TRAP); + break; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: - /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_thread_flag(TIF_SYSCALL); + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); if (test_thread_flag(TIF_SINGLE_STEP)) - set_thread_flag(TIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); break; } } @@ -412,9 +510,3 @@ void do_signal(struct pt_regs *regs) */ restore_saved_sigmask(); } - -void do_notify_resume(struct pt_regs *regs) -{ - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); -} diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c new file mode 100644 index 000000000000..cc869de6e3a5 --- /dev/null +++ b/arch/s390/kernel/skey.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <asm/rwonce.h> +#include <asm/page.h> +#include <asm/skey.h> + +int skey_regions_initialized; + +static inline unsigned long load_real_address(unsigned long address) +{ + unsigned long real; + + asm volatile( + " lra %[real],0(%[address])" + : [real] "=d" (real) + : [address] "a" (address) + : "cc"); + return real; +} + +/* + * Initialize storage keys of registered memory regions with the + * default key. This is useful for code which is executed with a + * non-default access key. + */ +void __skey_regions_initialize(void) +{ + unsigned long address, real; + struct skey_region *r, *end; + + r = __skey_region_start; + end = __skey_region_end; + while (r < end) { + address = r->start & PAGE_MASK; + do { + real = load_real_address(address); + page_set_storage_key(real, PAGE_DEFAULT_KEY, 1); + address += PAGE_SIZE; + } while (address < r->end); + r++; + } + /* + * Make sure storage keys are initialized before + * skey_regions_initialized is changed. + */ + barrier(); + WRITE_ONCE(skey_regions_initialized, 1); +} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 15a016c10563..b7429f30afc1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,10 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * SMP related functions * * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky <schwidefsky@de.ibm.com>, - * Heiko Carstens <heiko.carstens@de.ibm.com>, * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> @@ -15,11 +15,12 @@ * operates on physical cpu numbers needs to go into smp.c. */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> -#include <linux/module.h> +#include <linux/memblock.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/err.h> @@ -28,29 +29,45 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irqflags.h> +#include <linux/irq_work.h> #include <linux/cpu.h> #include <linux/slab.h> +#include <linux/sched/hotplug.h> +#include <linux/sched/task_stack.h> #include <linux/crash_dump.h> +#include <linux/kprobes.h> +#include <asm/access-regs.h> #include <asm/asm-offsets.h> -#include <asm/switch_to.h> +#include <asm/machine.h> +#include <asm/ctlreg.h> +#include <asm/pfault.h> +#include <asm/diag.h> #include <asm/facility.h> +#include <asm/fpu.h> #include <asm/ipl.h> #include <asm/setup.h> #include <asm/irq.h> #include <asm/tlbflush.h> #include <asm/vtimer.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/sclp.h> -#include <asm/vdso.h> #include <asm/debug.h> #include <asm/os_info.h> #include <asm/sigp.h> +#include <asm/idle.h> +#include <asm/nmi.h> +#include <asm/stacktrace.h> +#include <asm/topology.h> +#include <asm/vdso.h> +#include <asm/maccess.h> #include "entry.h" enum { ec_schedule = 0, ec_call_function_single, ec_stop_cpu, + ec_mcck_pending, + ec_irq_work, }; enum { @@ -58,46 +75,46 @@ enum { CPU_STATE_CONFIGURED, }; -struct pcpu { - struct cpu cpu; - struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ - unsigned long async_stack; /* async stack for the cpu */ - unsigned long panic_stack; /* panic stack for the cpu */ - unsigned long ec_mask; /* bit mask for ec_xxx functions */ - int state; /* physical cpu state */ - int polarization; /* physical polarization */ - u16 address; /* physical cpu address */ -}; +static u8 boot_core_type; +DEFINE_PER_CPU(struct pcpu, pcpu_devices); +/* + * Pointer to the pcpu area of the boot CPU. This is required when a restart + * interrupt is triggered on an offline CPU. For that case accessing percpu + * data with the common primitives does not work, since the percpu offset is + * stored in a non existent lowcore. + */ +static struct pcpu *ipl_pcpu; + +unsigned int smp_cpu_mt_shift; +EXPORT_SYMBOL(smp_cpu_mt_shift); + +unsigned int smp_cpu_mtid; +EXPORT_SYMBOL(smp_cpu_mtid); -static u8 boot_cpu_type; -static u16 boot_cpu_address; -static struct pcpu pcpu_devices[NR_CPUS]; +#ifdef CONFIG_CRASH_DUMP +__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; +#endif + +static unsigned int smp_max_threads __initdata = -1U; +cpumask_t cpu_setup_mask; + +static int __init early_smt(char *s) +{ + get_option(&s, &smp_max_threads); + return 0; +} +early_param("smt", early_smt); /* * The smp_cpu_state_mutex must be held when changing the state or polarization - * member of a pcpu data structure within the pcpu_devices arreay. + * member of a pcpu data structure within the pcpu_devices array. */ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) -{ - register unsigned int reg1 asm ("1") = parm; - int cc; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); - if (status && cc == 1) - *status = reg1; - return cc; -} - -static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm) { int cc; @@ -125,7 +142,7 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) static inline int pcpu_stopped(struct pcpu *pcpu) { - u32 uninitialized_var(status); + u32 status; if (__pcpu_sigp(pcpu->address, SIGP_SENSE, 0, &status) != SIGP_CC_STATUS_STORED) @@ -145,159 +162,187 @@ static inline int pcpu_running(struct pcpu *pcpu) /* * Find struct pcpu by cpu address. */ -static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) { int cpu; for_each_cpu(cpu, mask) - if (pcpu_devices[cpu].address == address) - return pcpu_devices + cpu; + if (per_cpu(pcpu_devices, cpu).address == address) + return &per_cpu(pcpu_devices, cpu); return NULL; } static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) { - int order; - - set_bit(ec_bit, &pcpu->ec_mask); - order = pcpu_running(pcpu) ? - SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; - pcpu_sigp_retry(pcpu, order, 0); + if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) + return; + pcpu->ec_clk = get_tod_clock_fast(); + pcpu_sigp_retry(pcpu, SIGP_EXTERNAL_CALL, 0); } -static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { - struct _lowcore *lc; - - if (pcpu != &pcpu_devices[0]) { - pcpu->lowcore = (struct _lowcore *) - __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - pcpu->panic_stack = __get_free_page(GFP_KERNEL); - if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) - goto out; - } - lc = pcpu->lowcore; - memcpy(lc, &S390_lowcore, 512); + unsigned long async_stack, nodat_stack, mcck_stack; + struct lowcore *lc; + + lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); + async_stack = stack_alloc(); + mcck_stack = stack_alloc(); + if (!lc || !nodat_stack || !async_stack || !mcck_stack) + goto out; + memcpy(lc, get_lowcore(), 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); - lc->async_stack = pcpu->async_stack + ASYNC_SIZE - - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->panic_stack = pcpu->panic_stack + PAGE_SIZE - - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->async_stack = async_stack + STACK_INIT_OFFSET; + lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; + lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; lc->cpu_nr = cpu; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); - if (!lc->extended_save_area_addr) - goto out; - } -#else - if (vdso_alloc_per_cpu(lc)) + lc->spinlock_lockval = arch_spin_lockval(cpu); + lc->spinlock_index = 0; + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); + lc->preempt_count = PREEMPT_DISABLED; + if (nmi_alloc_mcesa(&lc->mcesad)) goto out; -#endif + if (abs_lowcore_map(cpu, lc, true)) + goto out_mcesa; lowcore_ptr[cpu] = lc; - pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); return 0; + +out_mcesa: + nmi_free_mcesa(&lc->mcesad); out: - if (pcpu != &pcpu_devices[0]) { - free_page(pcpu->panic_stack); - free_pages(pcpu->async_stack, ASYNC_ORDER); - free_pages((unsigned long) pcpu->lowcore, LC_ORDER); - } + stack_free(mcck_stack); + stack_free(async_stack); + free_pages(nodat_stack, THREAD_SIZE_ORDER); + free_pages((unsigned long) lc, LC_ORDER); return -ENOMEM; } -#ifdef CONFIG_HOTPLUG_CPU - -static void pcpu_free_lowcore(struct pcpu *pcpu) +static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu) { - pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); - lowcore_ptr[pcpu - pcpu_devices] = NULL; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - struct _lowcore *lc = pcpu->lowcore; + unsigned long async_stack, nodat_stack, mcck_stack; + struct lowcore *lc; - free_page((unsigned long) lc->extended_save_area_addr); - lc->extended_save_area_addr = 0; - } -#else - vdso_free_per_cpu(pcpu->lowcore); -#endif - if (pcpu != &pcpu_devices[0]) { - free_page(pcpu->panic_stack); - free_pages(pcpu->async_stack, ASYNC_ORDER); - free_pages((unsigned long) pcpu->lowcore, LC_ORDER); - } + lc = lowcore_ptr[cpu]; + nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; + async_stack = lc->async_stack - STACK_INIT_OFFSET; + mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); + lowcore_ptr[cpu] = NULL; + abs_lowcore_unmap(cpu); + nmi_free_mcesa(&lc->mcesad); + stack_free(async_stack); + stack_free(mcck_stack); + free_pages(nodat_stack, THREAD_SIZE_ORDER); + free_pages((unsigned long) lc, LC_ORDER); } -#endif /* CONFIG_HOTPLUG_CPU */ - static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc, *abs_lc; - atomic_inc(&init_mm.context.attach_count); + lc = lowcore_ptr[cpu]; + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->pcpu = (unsigned long)pcpu; + lc->restart_flags = RESTART_FLAG_CTLREGS; + lc->spinlock_lockval = arch_spin_lockval(cpu); + lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; - lc->kernel_asce = S390_lowcore.kernel_asce; - lc->machine_flags = S390_lowcore.machine_flags; - lc->ftrace_func = S390_lowcore.ftrace_func; - lc->user_timer = lc->system_timer = lc->steal_timer = 0; - __ctl_store(lc->cregs_save_area, 0, 15); + lc->kernel_asce = get_lowcore()->kernel_asce; + lc->user_asce = s390_invalid_asce; + lc->user_timer = lc->system_timer = + lc->steal_timer = lc->avg_steal_timer = 0; + abs_lc = get_abs_lowcore(); + memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); + put_abs_lowcore(abs_lc); + lc->cregs_save_area[1] = lc->user_asce; + lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); - memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); + arch_spin_lock_setup(cpu); } -static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +static void pcpu_attach_task(int cpu, struct task_struct *tsk) { - struct _lowcore *lc = pcpu->lowcore; - struct thread_info *ti = task_thread_info(tsk); - - lc->kernel_stack = (unsigned long) task_stack_page(tsk) - + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->thread_info = (unsigned long) task_thread_info(tsk); - lc->current_task = (unsigned long) tsk; - lc->user_timer = ti->user_timer; - lc->system_timer = ti->system_timer; + struct lowcore *lc; + + lc = lowcore_ptr[cpu]; + lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; + lc->current_task = (unsigned long)tsk; + lc->lpp = LPP_MAGIC; + lc->current_pid = tsk->pid; + lc->user_timer = tsk->thread.user_timer; + lc->guest_timer = tsk->thread.guest_timer; + lc->system_timer = tsk->thread.system_timer; + lc->hardirq_timer = tsk->thread.hardirq_timer; + lc->softirq_timer = tsk->thread.softirq_timer; lc->steal_timer = 0; +#ifdef CONFIG_STACKPROTECTOR + lc->stack_canary = tsk->stack_canary; +#endif } -static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc; + lc = lowcore_ptr[cpu]; lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; - lc->restart_source = -1UL; - pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); + lc->restart_source = -1U; + pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0); } +typedef void (pcpu_delegate_fn)(void *); + /* * Call function via PSW restart on pcpu and stop the current cpu. */ -static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), - void *data, unsigned long stack) +static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) +{ + func(data); /* should not return */ +} + +static void __noreturn pcpu_delegate(struct pcpu *pcpu, int cpu, + pcpu_delegate_fn *func, + void *data, unsigned long stack) { - struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; - unsigned long source_cpu = stap(); + struct lowcore *lc, *abs_lc; + unsigned int source_cpu; - __load_psw_mask(psw_kernel_bits); - if (pcpu->address == source_cpu) - func(data); /* should not return */ + lc = lowcore_ptr[cpu]; + source_cpu = stap(); + + if (pcpu->address == source_cpu) { + call_on_stack(2, stack, void, __pcpu_delegate, + pcpu_delegate_fn *, func, void *, data); + } /* Stop target cpu (if func returns this stops the current cpu). */ pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0); /* Restart func on the target cpu and stop the current cpu. */ - mem_assign_absolute(lc->restart_stack, stack); - mem_assign_absolute(lc->restart_fn, (unsigned long) func); - mem_assign_absolute(lc->restart_data, (unsigned long) data); - mem_assign_absolute(lc->restart_source, source_cpu); + if (lc) { + lc->restart_stack = stack; + lc->restart_fn = (unsigned long)func; + lc->restart_data = (unsigned long)data; + lc->restart_source = source_cpu; + } else { + abs_lc = get_abs_lowcore(); + abs_lc->restart_stack = stack; + abs_lc->restart_fn = (unsigned long)func; + abs_lc->restart_data = (unsigned long)data; + abs_lc->restart_source = source_cpu; + put_abs_lowcore(abs_lc); + } asm volatile( "0: sigp 0,%0,%2 # sigp restart to target cpu\n" " brc 2,0b # busy, try again\n" "1: sigp 0,%1,%3 # sigp stop to current cpu\n" - " brc 2,1b # busy, try again\n" + " brc 2,1b # busy, try again" : : "d" (pcpu->address), "d" (source_cpu), "K" (SIGP_RESTART), "K" (SIGP_STOP) : "0", "1", "cc"); @@ -305,27 +350,36 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), } /* - * Call function on an online CPU. + * Enable additional logical cpus for multi-threading. */ -void smp_call_online_cpu(void (*func)(void *), void *data) +static int pcpu_set_smt(unsigned int mtid) { - struct pcpu *pcpu; + int cc; - /* Use the current cpu if it is online. */ - pcpu = pcpu_find_address(cpu_online_mask, stap()); - if (!pcpu) - /* Use the first online cpu. */ - pcpu = pcpu_devices + cpumask_first(cpu_online_mask); - pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); + if (smp_cpu_mtid == mtid) + return 0; + cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL); + if (cc == 0) { + smp_cpu_mtid = mtid; + smp_cpu_mt_shift = 0; + while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) + smp_cpu_mt_shift++; + per_cpu(pcpu_devices, 0).address = stap(); + } + return cc; } /* * Call function on the ipl CPU. */ -void smp_call_ipl_cpu(void (*func)(void *), void *data) +void __noreturn smp_call_ipl_cpu(void (*func)(void *), void *data) { - pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->panic_stack + PAGE_SIZE); + struct lowcore *lc = lowcore_ptr[0]; + + if (ipl_pcpu->address == stap()) + lc = get_lowcore(); + + pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack); } int smp_find_processor_id(u16 address) @@ -333,81 +387,94 @@ int smp_find_processor_id(u16 address) int cpu; for_each_present_cpu(cpu) - if (pcpu_devices[cpu].address == address) + if (per_cpu(pcpu_devices, cpu).address == address) return cpu; return -1; } -int smp_vcpu_scheduled(int cpu) +void schedule_mcck_handler(void) { - return pcpu_running(pcpu_devices + cpu); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending); } -void smp_yield(void) +bool notrace arch_vcpu_is_preempted(int cpu) { - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); + if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) + return false; + if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu))) + return false; + return true; } +EXPORT_SYMBOL(arch_vcpu_is_preempted); -void smp_yield_cpu(int cpu) +void notrace smp_yield_cpu(int cpu) { - if (MACHINE_HAS_DIAG9C) - asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); - else if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); + if (!machine_has_diag9c()) + return; + diag_stat_inc_norecursion(DIAG_STAT_X09C); + asm volatile("diag %0,0,0x9c" + : : "d" (per_cpu(pcpu_devices, cpu).address)); } +EXPORT_SYMBOL_GPL(smp_yield_cpu); /* * Send cpus emergency shutdown signal. This gives the cpus the * opportunity to complete outstanding interrupts. */ -void smp_emergency_stop(cpumask_t *cpumask) +void notrace smp_emergency_stop(void) { + static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + static cpumask_t cpumask; u64 end; int cpu; - end = get_tod_clock() + (1000000UL << 12); - for_each_cpu(cpu, cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; + arch_spin_lock(&lock); + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); + + end = get_tod_clock_monotonic() + (1000000UL << 12); + for_each_cpu(cpu, &cpumask) { + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 0, NULL) == SIGP_CC_BUSY && - get_tod_clock() < end) + get_tod_clock_monotonic() < end) cpu_relax(); } - while (get_tod_clock() < end) { - for_each_cpu(cpu, cpumask) - if (pcpu_stopped(pcpu_devices + cpu)) - cpumask_clear_cpu(cpu, cpumask); - if (cpumask_empty(cpumask)) + while (get_tod_clock_monotonic() < end) { + for_each_cpu(cpu, &cpumask) + if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) + cpumask_clear_cpu(cpu, &cpumask); + if (cpumask_empty(&cpumask)) break; cpu_relax(); } + arch_spin_unlock(&lock); } +NOKPROBE_SYMBOL(smp_emergency_stop); /* * Stop all cpus but the current one. */ void smp_send_stop(void) { - cpumask_t cpumask; + struct pcpu *pcpu; int cpu; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + __load_psw_mask(PSW_KERNEL_BITS); trace_hardirqs_off(); debug_set_critical(); - cpumask_copy(&cpumask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &cpumask); if (oops_in_progress) - smp_emergency_stop(&cpumask); + smp_emergency_stop(); /* stop all processors */ - for_each_cpu(cpu, &cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); pcpu_sigp_retry(pcpu, SIGP_STOP, 0); while (!pcpu_stopped(pcpu)) cpu_relax(); @@ -415,15 +482,6 @@ void smp_send_stop(void) } /* - * Stop the current cpu. - */ -void smp_stop_cpu(void) -{ - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); - for (;;) ; -} - -/* * This is the main routine where commands issued by other * cpus are handled. */ @@ -432,13 +490,17 @@ static void smp_handle_ext_call(void) unsigned long bits; /* handle bit signal external calls */ - bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + bits = this_cpu_xchg(pcpu_devices.ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) scheduler_ipi(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); + if (test_bit(ec_mcck_pending, &bits)) + s390_handle_mcck(); + if (test_bit(ec_irq_work, &bits)) + irq_work_run(); } static void do_ext_call_interrupt(struct ext_code ext_code, @@ -453,267 +515,349 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } -#ifndef CONFIG_64BIT -/* - * this function sends a 'purge tlb' signal to another CPU. - */ -static void smp_ptlb_callback(void *info) -{ - __tlb_flush_local(); -} - -void smp_ptlb_all(void) -{ - on_each_cpu(smp_ptlb_callback, NULL, 1); -} -EXPORT_SYMBOL(smp_ptlb_all); -#endif /* ! CONFIG_64BIT */ - /* * this function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing * anything. Worst case is that we lose a reschedule ... */ -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_schedule); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule); } -/* - * parameter area for the set/clear control bit callbacks - */ -struct ec_creg_mask_parms { - unsigned long orval; - unsigned long andval; - int cr; -}; - -/* - * callback for setting/clearing control bits - */ -static void smp_ctl_bit_callback(void *info) +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) { - struct ec_creg_mask_parms *pp = info; - unsigned long cregs[16]; - - __ctl_store(cregs, 0, 15); - cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; - __ctl_load(cregs, 0, 15); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work); } +#endif -/* - * Set a bit in a control register of all cpus - */ -void smp_ctl_set_bit(int cr, int bit) +#ifdef CONFIG_CRASH_DUMP + +int smp_store_status(int cpu) { - struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; + struct lowcore *lc; + struct pcpu *pcpu; + unsigned long pa; - on_each_cpu(smp_ctl_bit_callback, &parms, 1); + pcpu = per_cpu_ptr(&pcpu_devices, cpu); + lc = lowcore_ptr[cpu]; + pa = __pa(&lc->floating_pt_save_area); + if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, + pa) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + if (!cpu_has_vx() && !cpu_has_gs()) + return 0; + pa = lc->mcesad & MCESA_ORIGIN_MASK; + if (cpu_has_gs()) + pa |= lc->mcesad & MCESA_LC_MASK; + if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + pa) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + return 0; } -EXPORT_SYMBOL(smp_ctl_set_bit); /* - * Clear a bit in a control register of all cpus + * Collect CPU state of the previous, crashed system. + * There are three cases: + * 1) standard zfcp/nvme dump + * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The boot CPU state is located in + * the absolute lowcore of the memory stored in the HSA. The zcore code + * will copy the boot CPU state from the HSA. + * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory) + * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The firmware or the boot-loader + * stored the registers of the boot CPU in the absolute lowcore in the + * memory of the old system. + * 3) kdump or stand-alone kdump for DASD + * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The kexec code or the boot-loader + * stored the registers of the boot CPU in the memory of the old system. + * + * Note that the legacy kdump mode where the old kernel stored the CPU states + * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr= + * kernel parameter. The is_kdump_kernel() implementation on s390 is independent + * of the elfcorehdr= parameter. */ -void smp_ctl_clear_bit(int cr, int bit) +static bool dump_available(void) { - struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; - - on_each_cpu(smp_ctl_bit_callback, &parms, 1); + return oldmem_data.start || is_ipl_type_dump(); } -EXPORT_SYMBOL(smp_ctl_clear_bit); -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) +void __init smp_save_dump_ipl_cpu(void) +{ + struct save_area *sa; + void *regs; -struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; -EXPORT_SYMBOL_GPL(zfcpdump_save_areas); + if (!dump_available()) + return; + sa = save_area_alloc(true); + regs = memblock_alloc_or_panic(512, 8); + copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); + save_area_add_regs(sa, regs); + memblock_free(regs, 512); + if (cpu_has_vx()) + save_area_add_vxrs(sa, boot_cpu_vector_save_area); +} -static void __init smp_get_save_area(int cpu, u16 address) +void __init smp_save_dump_secondary_cpus(void) { - void *lc = pcpu_devices[0].lowcore; - struct save_area *save_area; + int addr, boot_cpu_addr, max_cpu_addr; + struct save_area *sa; + void *page; - if (is_kdump_kernel()) - return; - if (!OLDMEM_BASE && (address == boot_cpu_address || - ipl_info.type != IPL_TYPE_FCP_DUMP)) - return; - if (cpu >= NR_CPUS) { - pr_warning("CPU %i exceeds the maximum %i and is excluded " - "from the dump\n", cpu, NR_CPUS - 1); - return; - } - save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL); - if (!save_area) - panic("could not allocate memory for save area\n"); - zfcpdump_save_areas[cpu] = save_area; -#ifdef CONFIG_CRASH_DUMP - if (address == boot_cpu_address) { - /* Copy the registers of the boot cpu. */ - copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), - SAVE_AREA_BASE - PAGE_SIZE, 0); + if (!dump_available()) return; + /* Allocate a page as dumping area for the store status sigps */ + page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!page) + panic("ERROR: Failed to allocate %lx bytes below %lx\n", + PAGE_SIZE, 1UL << 31); + + /* Set multi-threading state to the previous system. */ + pcpu_set_smt(sclp.mtid_prev); + boot_cpu_addr = stap(); + max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; + for (addr = 0; addr <= max_cpu_addr; addr++) { + if (addr == boot_cpu_addr) + continue; + if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == + SIGP_CC_NOT_OPERATIONAL) + continue; + sa = save_area_alloc(false); + __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); + save_area_add_regs(sa, page); + if (cpu_has_vx()) { + __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page)); + save_area_add_vxrs(sa, page); + } } -#endif - /* Get the registers of a non-boot cpu. */ - __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); - memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); + memblock_free(page, PAGE_SIZE); + diag_amode31_ops.diag308_reset(); + pcpu_set_smt(0); } +#endif /* CONFIG_CRASH_DUMP */ -int smp_store_status(int cpu) +void smp_cpu_set_polarization(int cpu, int val) { - struct pcpu *pcpu; - - pcpu = pcpu_devices + cpu; - if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, - 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) - return -EIO; - return 0; + per_cpu(pcpu_devices, cpu).polarization = val; } -#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ +int smp_cpu_get_polarization(int cpu) +{ + return per_cpu(pcpu_devices, cpu).polarization; +} -static inline void smp_get_save_area(int cpu, u16 address) { } +void smp_cpu_set_capacity(int cpu, unsigned long val) +{ + per_cpu(pcpu_devices, cpu).capacity = val; +} -#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ +unsigned long smp_cpu_get_capacity(int cpu) +{ + return per_cpu(pcpu_devices, cpu).capacity; +} -void smp_cpu_set_polarization(int cpu, int val) +void smp_set_core_capacity(int cpu, unsigned long val) { - pcpu_devices[cpu].polarization = val; + int i; + + cpu = smp_get_base_cpu(cpu); + for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++) + smp_cpu_set_capacity(i, val); } -int smp_cpu_get_polarization(int cpu) +int smp_cpu_get_cpu_address(int cpu) { - return pcpu_devices[cpu].polarization; + return per_cpu(pcpu_devices, cpu).address; } -static struct sclp_cpu_info *smp_get_cpu_info(void) +static void __ref smp_get_core_info(struct sclp_core_info *info, int early) { static int use_sigp_detection; - struct sclp_cpu_info *info; int address; - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + if (use_sigp_detection || sclp_get_core_info(info, early)) { use_sigp_detection = 1; - for (address = 0; address <= MAX_CPU_ADDRESS; address++) { - if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == + for (address = 0; + address < (SCLP_MAX_CORES << smp_cpu_mt_shift); + address += (1U << smp_cpu_mt_shift)) { + if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) == SIGP_CC_NOT_OPERATIONAL) continue; - info->cpu[info->configured].address = address; + info->core[info->configured].core_id = + address >> smp_cpu_mt_shift; + info->core[info->configured].type = boot_core_type; info->configured++; } info->combined = info->configured; } - return info; } -static int __cpuinit smp_add_present_cpu(int cpu); - -static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info, - int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; int cpu, nr, i; + u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) - continue; - if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - pcpu = pcpu_devices + cpu; - pcpu->address = info->cpu[i].address; - pcpu->state = (i >= info->configured) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH); set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) + if (!early && arch_register_cpu(cpu)) set_cpu_present(cpu, false); else nr++; - cpu = cpumask_next(cpu, &avail); + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); } return nr; } -static void __init smp_detect_cpus(void) +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) { - unsigned int cpu, c_cpus, s_cpus; - struct sclp_cpu_info *info; + struct sclp_core_entry *core; + static cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; - info = smp_get_cpu_info(); - if (!info) - panic("smp_detect_cpus failed to allocate memory\n"); - if (info->has_cpu_type) { - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address != boot_cpu_address) - continue; - /* The boot cpu dictates the cpu type. */ - boot_cpu_type = info->cpu[cpu].type; - break; + cpus_read_lock(); + mutex_lock(&smp_cpu_state_mutex); + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); + break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } + mutex_unlock(&smp_cpu_state_mutex); + cpus_read_unlock(); + return nr; +} + +void __init smp_detect_cpus(void) +{ + unsigned int cpu, mtid, c_cpus, s_cpus; + struct sclp_core_info *info; + u16 address; + + /* Get CPU information */ + info = memblock_alloc_or_panic(sizeof(*info), 8); + smp_get_core_info(info, 1); + /* Find boot CPU type */ + if (sclp.has_core_type) { + address = stap(); + for (cpu = 0; cpu < info->combined; cpu++) + if (info->core[cpu].core_id == address) { + /* The boot cpu dictates the cpu type. */ + boot_core_type = info->core[cpu].type; + break; + } + if (cpu >= info->combined) + panic("Could not find boot CPU type"); + } + + /* Set multi-threading state for the current system */ + mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; + mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; + pcpu_set_smt(mtid); + cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1); + + /* Print number of CPUs */ c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) + if (sclp.has_core_type && + info->core[cpu].type != boot_core_type) continue; - if (cpu < info->configured) { - smp_get_save_area(c_cpus, info->cpu[cpu].address); - c_cpus++; - } else - s_cpus++; + if (cpu < info->configured) + c_cpus += smp_cpu_mtid + 1; + else + s_cpus += smp_cpu_mtid + 1; } pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); - get_online_cpus(); - __smp_rescan_cpus(info, 0); - put_online_cpus(); - kfree(info); + memblock_free(info, sizeof(*info)); } /* * Activate a secondary processor. */ -static void __cpuinit smp_start_secondary(void *cpuvoid) +static void smp_start_secondary(void *cpuvoid) { - S390_lowcore.last_update_clock = get_tod_clock(); - S390_lowcore.restart_stack = (unsigned long) restart_stack; - S390_lowcore.restart_fn = (unsigned long) do_restart; - S390_lowcore.restart_data = 0; - S390_lowcore.restart_source = -1UL; - restore_access_regs(S390_lowcore.access_regs_save_area); - __ctl_load(S390_lowcore.cregs_save_area, 0, 15); - __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + struct lowcore *lc = get_lowcore(); + int cpu = raw_smp_processor_id(); + + lc->last_update_clock = get_tod_clock(); + lc->restart_stack = (unsigned long)restart_stack; + lc->restart_fn = (unsigned long)do_restart; + lc->restart_data = 0; + lc->restart_source = -1U; + lc->restart_flags = 0; + restore_access_regs(lc->access_regs_save_area); cpu_init(); - preempt_disable(); + rcutree_report_cpu_starting(cpu); init_cpu_timer(); - init_cpu_vtimer(); + vtime_init(); + vdso_getcpu_init(); pfault_init(); - notify_cpu_starting(smp_processor_id()); - set_cpu_online(smp_processor_id(), true); + cpumask_set_cpu(cpu, &cpu_setup_mask); + update_cpu_masks(); + notify_cpu_starting(cpu); + if (topology_cpu_dedicated(cpu)) + set_cpu_flag(CIF_DEDICATED_CPU); + else + clear_cpu_flag(CIF_DEDICATED_CPU); + set_cpu_online(cpu, true); inc_irq_stat(CPU_RST); local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* Upping and downing of CPUs */ -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); int rc; - pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != @@ -723,44 +867,50 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) rc = pcpu_alloc_lowcore(pcpu, cpu); if (rc) return rc; + /* + * Make sure global control register contents do not change + * until new CPU has initialized control registers. + */ + system_ctlreg_lock(); pcpu_prepare_secondary(pcpu, cpu); - pcpu_attach_task(pcpu, tidle); - pcpu_start_fn(pcpu, smp_start_secondary, NULL); + pcpu_attach_task(cpu, tidle); + pcpu_start_fn(cpu, smp_start_secondary, NULL); + /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); + system_ctlreg_unlock(); return 0; } -static int __init setup_possible_cpus(char *s) -{ - int max, cpu; +static unsigned int setup_possible_cpus __initdata; - if (kstrtoint(s, 0, &max) < 0) - return 0; - init_cpu_possible(cpumask_of(0)); - for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++) - set_cpu_possible(cpu, true); +static int __init _setup_possible_cpus(char *s) +{ + get_option(&s, &setup_possible_cpus); return 0; } -early_param("possible_cpus", setup_possible_cpus); - -#ifdef CONFIG_HOTPLUG_CPU +early_param("possible_cpus", _setup_possible_cpus); int __cpu_disable(void) { - unsigned long cregs[16]; + struct ctlreg cregs[16]; + int cpu; /* Handle possible pending IPIs */ smp_handle_ext_call(); - set_cpu_online(smp_processor_id(), false); + cpu = smp_processor_id(); + set_cpu_online(cpu, false); + cpumask_clear_cpu(cpu, &cpu_setup_mask); + update_cpu_masks(); /* Disable pseudo page faults on this cpu. */ pfault_fini(); /* Disable interrupt sources via control register. */ - __ctl_store(cregs, 0, 15); - cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ - cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ - cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ - __ctl_load(cregs, 0, 15); + __local_ctl_store(0, 15, cregs); + cregs[0].val &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6].val &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14].val &= ~0x1f000000UL; /* disable most machine checks */ + __local_ctl_load(0, 15, cregs); + clear_cpu_flag(CIF_NOHZ_DELAY); return 0; } @@ -769,58 +919,67 @@ void __cpu_die(unsigned int cpu) struct pcpu *pcpu; /* Wait until target cpu is down */ - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); while (!pcpu_stopped(pcpu)) cpu_relax(); - pcpu_free_lowcore(pcpu); - atomic_dec(&init_mm.context.attach_count); + pcpu_free_lowcore(pcpu, cpu); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + pcpu->flags = 0; } void __noreturn cpu_die(void) { idle_task_exit(); - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0); for (;;) ; } -#endif /* CONFIG_HOTPLUG_CPU */ +void __init smp_fill_possible_mask(void) +{ + unsigned int possible, sclp_max, cpu; + + sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; + sclp_max = min(smp_max_threads, sclp_max); + sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids; + possible = setup_possible_cpus ?: nr_cpu_ids; + possible = min(possible, sclp_max); + for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); +} void __init smp_prepare_cpus(unsigned int max_cpus) { - /* request the 0x1201 emergency signal external interrupt */ - if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) + if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); - /* request the 0x1202 external call external interrupt */ - if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + system_ctl_set_bit(0, 14); + if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); - smp_detect_cpus(); + system_ctl_set_bit(0, 13); + smp_rescan_cpus(true); } void __init smp_prepare_boot_cpu(void) { - struct pcpu *pcpu = pcpu_devices; - - boot_cpu_address = stap(); - pcpu->state = CPU_STATE_CONFIGURED; - pcpu->address = boot_cpu_address; - pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); - pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE - + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE - + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - S390_lowcore.percpu_offset = __per_cpu_offset[0]; - smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); - set_cpu_present(0, true); - set_cpu_online(0, true); -} + struct lowcore *lc = get_lowcore(); -void __init smp_cpus_done(unsigned int max_cpus) -{ + WARN_ON(!cpu_present(0) || !cpu_online(0)); + lc->percpu_offset = __per_cpu_offset[0]; + ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0); + ipl_pcpu->state = CPU_STATE_CONFIGURED; + lc->pcpu = (unsigned long)ipl_pcpu; + smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); + smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH); } void __init smp_setup_processor_id(void) { - S390_lowcore.cpu_nr = 0; + struct lowcore *lc = get_lowcore(); + + lc->cpu_nr = 0; + per_cpu(pcpu_devices, 0).address = stap(); + lc->spinlock_lockval = arch_spin_lockval(0); + lc->spinlock_index = 0; } /* @@ -834,14 +993,13 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } -#ifdef CONFIG_HOTPLUG_CPU static ssize_t cpu_configure_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); + count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -851,41 +1009,53 @@ static ssize_t cpu_configure_store(struct device *dev, const char *buf, size_t count) { struct pcpu *pcpu; - int cpu, val, rc; + int cpu, val, rc, i; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; - /* disallow configuration changes of online cpus and cpu 0 */ + /* disallow configuration changes of online cpus */ cpu = dev->id; - if (cpu_online(cpu) || cpu == 0) - goto out; - pcpu = pcpu_devices + cpu; + cpu = smp_get_base_cpu(cpu); + for (i = 0; i <= smp_cpu_mtid; i++) + if (cpu_online(cpu + i)) + goto out; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); rc = 0; switch (val) { case 0: if (pcpu->state != CPU_STATE_CONFIGURED) break; - rc = sclp_cpu_deconfigure(pcpu->address); + rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_STANDBY; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; case 1: if (pcpu->state != CPU_STATE_STANDBY) break; - rc = sclp_cpu_configure(pcpu->address); + rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; default: @@ -893,23 +1063,20 @@ static ssize_t cpu_configure_store(struct device *dev, } out: mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return rc ? rc : count; } static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); -#endif /* CONFIG_HOTPLUG_CPU */ static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); + return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { -#ifdef CONFIG_HOTPLUG_CPU &dev_attr_configure.attr, -#endif &dev_attr_address.attr, NULL, }; @@ -918,42 +1085,6 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_idle_count(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long idle_count; - unsigned int sequence; - - do { - sequence = ACCESS_ONCE(idle->sequence); - idle_count = ACCESS_ONCE(idle->idle_count); - if (ACCESS_ONCE(idle->clock_idle_enter)) - idle_count++; - } while ((sequence & 1) || (idle->sequence != sequence)); - return sprintf(buf, "%llu\n", idle_count); -} -static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); - -static ssize_t show_idle_time(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_time = ACCESS_ONCE(idle->idle_time); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (idle->sequence != sequence)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; - return sprintf(buf, "%llu\n", idle_time >> 12); -} -static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); - static struct attribute *cpu_online_attrs[] = { &dev_attr_idle_count.attr, &dev_attr_idle_time_us.attr, @@ -964,76 +1095,61 @@ static struct attribute_group cpu_online_attr_group = { .attrs = cpu_online_attrs, }; -static int __cpuinit smp_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int smp_cpu_online(unsigned int cpu) { - unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &pcpu_devices[cpu].cpu; - struct device *s = &c->dev; - int err = 0; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - err = sysfs_create_group(&s->kobj, &cpu_online_attr_group); - break; - case CPU_DEAD: - sysfs_remove_group(&s->kobj, &cpu_online_attr_group); - break; - } - return notifier_from_errno(err); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); + + return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); +} + +static int smp_cpu_pre_down(unsigned int cpu) +{ + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); + + sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); + return 0; } -static int __cpuinit smp_add_present_cpu(int cpu) +bool arch_cpu_is_hotpluggable(int cpu) { - struct cpu *c = &pcpu_devices[cpu].cpu; - struct device *s = &c->dev; + return !!cpu; +} + +int arch_register_cpu(int cpu) +{ + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); int rc; - c->hotpluggable = 1; + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); rc = register_cpu(c, cpu); if (rc) goto out; - rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); + rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group); if (rc) goto out_cpu; - if (cpu_online(cpu)) { - rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); - if (rc) - goto out_online; - } rc = topology_cpu_init(c); if (rc) goto out_topology; return 0; out_topology: - if (cpu_online(cpu)) - sysfs_remove_group(&s->kobj, &cpu_online_attr_group); -out_online: - sysfs_remove_group(&s->kobj, &cpu_common_attr_group); + sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group); out_cpu: -#ifdef CONFIG_HOTPLUG_CPU unregister_cpu(c); -#endif out: return rc; } -#ifdef CONFIG_HOTPLUG_CPU - -int __ref smp_rescan_cpus(void) +int __ref smp_rescan_cpus(bool early) { - struct sclp_cpu_info *info; + struct sclp_core_info *info; int nr; - info = smp_get_cpu_info(); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - get_online_cpus(); - mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); - mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + smp_get_core_info(info, 0); + nr = __smp_rescan_cpus(info, early); kfree(info); if (nr) topology_schedule_update(); @@ -1047,27 +1163,30 @@ static ssize_t __ref rescan_store(struct device *dev, { int rc; - rc = smp_rescan_cpus(); + rc = lock_device_hotplug_sysfs(); + if (rc) + return rc; + rc = smp_rescan_cpus(false); + unlock_device_hotplug(); return rc ? rc : count; } -static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); -#endif /* CONFIG_HOTPLUG_CPU */ +static DEVICE_ATTR_WO(rescan); static int __init s390_smp_init(void) { - int cpu, rc; + struct device *dev_root; + int rc; - hotcpu_notifier(smp_cpu_notify, 0); -#ifdef CONFIG_HOTPLUG_CPU - rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); - if (rc) - return rc; -#endif - for_each_present_cpu(cpu) { - rc = smp_add_present_cpu(cpu); + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + rc = device_create_file(dev_root, &dev_attr_rescan); + put_device(dev_root); if (rc) return rc; } - return 0; + rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", + smp_cpu_online, smp_cpu_pre_down); + rc = rc <= 0 ? rc : 0; + return rc; } subsys_initcall(s390_smp_init); diff --git a/arch/s390/kernel/stackprotector.c b/arch/s390/kernel/stackprotector.c new file mode 100644 index 000000000000..d4e40483f008 --- /dev/null +++ b/arch/s390/kernel/stackprotector.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef pr_fmt +#define pr_fmt(fmt) "stackprot: " fmt +#endif + +#include <linux/export.h> +#include <linux/uaccess.h> +#include <linux/printk.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> +#include <asm/machine.h> +#include <asm/asm-offsets.h> +#include <asm/arch-stackprotector.h> + +#ifdef __DECOMPRESSOR + +#define DEBUGP boot_debug +#define EMERGP boot_emerg +#define PANIC boot_panic + +#else /* __DECOMPRESSOR */ + +#define DEBUGP pr_debug +#define EMERGP pr_emerg +#define PANIC panic + +#endif /* __DECOMPRESSOR */ + +int __bootdata_preserved(stack_protector_debug); + +unsigned long __stack_chk_guard; +EXPORT_SYMBOL(__stack_chk_guard); + +struct insn_ril { + u8 opc1 : 8; + u8 r1 : 4; + u8 opc2 : 4; + u32 imm; +} __packed; + +/* + * Convert a virtual instruction address to a real instruction address. The + * decompressor needs to patch instructions within the kernel image based on + * their virtual addresses, while dynamic address translation is still + * disabled. Therefore a translation from virtual kernel image addresses to + * the corresponding physical addresses is required. + * + * After dynamic address translation is enabled and when the kernel needs to + * patch instructions such a translation is not required since the addresses + * are identical. + */ +static struct insn_ril *vaddress_to_insn(unsigned long vaddress) +{ +#ifdef __DECOMPRESSOR + return (struct insn_ril *)__kernel_pa(vaddress); +#else + return (struct insn_ril *)vaddress; +#endif +} + +static unsigned long insn_to_vaddress(struct insn_ril *insn) +{ +#ifdef __DECOMPRESSOR + return (unsigned long)__kernel_va(insn); +#else + return (unsigned long)insn; +#endif +} + +#define INSN_RIL_STRING_SIZE (sizeof(struct insn_ril) * 2 + 1) + +static void insn_ril_to_string(char *str, struct insn_ril *insn) +{ + u8 *ptr = (u8 *)insn; + int i; + + for (i = 0; i < sizeof(*insn); i++) + hex_byte_pack(&str[2 * i], ptr[i]); + str[2 * i] = 0; +} + +static void stack_protector_dump(struct insn_ril *old, struct insn_ril *new) +{ + char ostr[INSN_RIL_STRING_SIZE]; + char nstr[INSN_RIL_STRING_SIZE]; + + insn_ril_to_string(ostr, old); + insn_ril_to_string(nstr, new); + DEBUGP("%016lx: %s -> %s\n", insn_to_vaddress(old), ostr, nstr); +} + +static int stack_protector_verify(struct insn_ril *insn, unsigned long kernel_start) +{ + char istr[INSN_RIL_STRING_SIZE]; + unsigned long vaddress, offset; + + /* larl */ + if (insn->opc1 == 0xc0 && insn->opc2 == 0x0) + return 0; + /* lgrl */ + if (insn->opc1 == 0xc4 && insn->opc2 == 0x8) + return 0; + insn_ril_to_string(istr, insn); + vaddress = insn_to_vaddress(insn); + if (__is_defined(__DECOMPRESSOR)) { + offset = (unsigned long)insn - kernel_start + TEXT_OFFSET; + EMERGP("Unexpected instruction at %016lx/%016lx: %s\n", vaddress, offset, istr); + PANIC("Stackprotector error\n"); + } else { + EMERGP("Unexpected instruction at %016lx: %s\n", vaddress, istr); + } + return -EINVAL; +} + +int __stack_protector_apply(unsigned long *start, unsigned long *end, unsigned long kernel_start) +{ + unsigned long canary, *loc; + struct insn_ril *insn, new; + int rc; + + /* + * Convert LARL/LGRL instructions to LLILF so register R1 contains the + * address of the per-cpu / per-process stack canary: + * + * LARL/LGRL R1,__stack_chk_guard => LLILF R1,__lc_stack_canary + */ + canary = __LC_STACK_CANARY; + if (machine_has_relocated_lowcore()) + canary += LOWCORE_ALT_ADDRESS; + for (loc = start; loc < end; loc++) { + insn = vaddress_to_insn(*loc); + rc = stack_protector_verify(insn, kernel_start); + if (rc) + return rc; + new = *insn; + new.opc1 = 0xc0; + new.opc2 = 0xf; + new.imm = canary; + if (stack_protector_debug) + stack_protector_dump(insn, &new); + s390_kernel_write(insn, &new, sizeof(*insn)); + } + return 0; +} + +#ifdef __DECOMPRESSOR +void __stack_protector_apply_early(unsigned long kernel_start) +{ + unsigned long *start, *end; + + start = (unsigned long *)vmlinux.stack_prot_start; + end = (unsigned long *)vmlinux.stack_prot_end; + __stack_protector_apply(start, end, kernel_start); +} +#endif diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 1785cd82253c..3aae7f70e6ab 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -1,96 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack trace management functions * * Copyright IBM Corp. 2006 - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/sched.h> +#include <linux/perf_event.h> #include <linux/stacktrace.h> -#include <linux/kallsyms.h> -#include <linux/module.h> - -static unsigned long save_context_stack(struct stack_trace *trace, - unsigned long sp, - unsigned long low, - unsigned long high, - int savesched) +#include <linux/uaccess.h> +#include <asm/asm-offsets.h> +#include <asm/stacktrace.h> +#include <asm/unwind.h> +#include <asm/kprobes.h> +#include <asm/ptrace.h> + +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { - struct stack_frame *sf; - struct pt_regs *regs; + struct unwind_state state; unsigned long addr; - while(1) { - sp &= PSW_ADDR_INSN; - if (sp < low || sp > high) - return sp; - sf = (struct stack_frame *)sp; - while(1) { - addr = sf->gprs[8] & PSW_ADDR_INSN; - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *)sp; - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long)(sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *)sp; - addr = regs->psw.addr & PSW_ADDR_INSN; - if (savesched || !in_sched_functions(addr)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - } - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = regs->gprs[15]; + unwind_for_each_frame(&state, task, regs, 0) { + addr = unwind_get_return_address(&state); + if (!addr || !consume_entry(cookie, addr)) + break; + } +} + +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) +{ + struct unwind_state state; + unsigned long addr; + + unwind_for_each_frame(&state, task, NULL, 0) { + if (state.stack_info.type != STACK_TYPE_TASK) + return -EINVAL; + + if (state.regs) + return -EINVAL; + + addr = unwind_get_return_address(&state); + if (!addr) + return -EINVAL; + +#ifdef CONFIG_RETHOOK + /* + * Mark stacktraces with krethook functions on them + * as unreliable. + */ + if (state.ip == (unsigned long)arch_rethook_trampoline) + return -EINVAL; +#endif + + if (!consume_entry(cookie, addr)) + return -EINVAL; + } + + /* Check for stack corruption */ + if (unwind_error(&state)) + return -EINVAL; + return 0; +} + +static inline bool store_ip(stack_trace_consume_fn consume_entry, void *cookie, + struct perf_callchain_entry_ctx *entry, bool perf, + unsigned long ip) +{ +#ifdef CONFIG_PERF_EVENTS + if (perf) { + if (perf_callchain_store(entry, ip)) + return false; + return true; } +#endif + return consume_entry(cookie, ip); +} + +static inline bool ip_invalid(unsigned long ip) +{ + /* + * Perform some basic checks if an instruction address taken + * from unreliable source is invalid. + */ + if (ip & 1) + return true; + if (ip < mmap_min_addr) + return true; + if (ip >= current->mm->context.asce_limit) + return true; + return false; } -void save_stack_trace(struct stack_trace *trace) +static inline bool ip_within_vdso(unsigned long ip) { - register unsigned long sp asm ("15"); - unsigned long orig_sp, new_sp; - - orig_sp = sp & PSW_ADDR_INSN; - new_sp = save_context_stack(trace, orig_sp, - S390_lowcore.panic_stack - PAGE_SIZE, - S390_lowcore.panic_stack, 1); - if (new_sp != orig_sp) + return in_range(ip, current->mm->context.vdso_base, vdso_text_size()); +} + +void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie, + struct perf_callchain_entry_ctx *entry, + const struct pt_regs *regs, bool perf) +{ + struct stack_frame_vdso_wrapper __user *sf_vdso; + struct stack_frame_user __user *sf; + unsigned long ip, sp; + bool first = true; + + if (!current->mm) return; - new_sp = save_context_stack(trace, new_sp, - S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack, 1); - if (new_sp != orig_sp) + ip = instruction_pointer(regs); + if (!store_ip(consume_entry, cookie, entry, perf, ip)) return; - save_context_stack(trace, new_sp, - S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE, 1); + sf = (void __user *)user_stack_pointer(regs); + pagefault_disable(); + while (1) { + if (__get_user(sp, &sf->back_chain)) + break; + /* + * VDSO entry code has a non-standard stack frame layout. + * See VDSO user wrapper code for details. + */ + if (!sp && ip_within_vdso(ip)) { + sf_vdso = (void __user *)sf; + if (__get_user(ip, &sf_vdso->return_address)) + break; + sp = (unsigned long)sf + STACK_FRAME_VDSO_OVERHEAD; + sf = (void __user *)sp; + if (__get_user(sp, &sf->back_chain)) + break; + } else { + sf = (void __user *)sp; + if (__get_user(ip, &sf->gprs[8])) + break; + } + /* Sanity check: ABI requires SP to be 8 byte aligned. */ + if (sp & 0x7) + break; + if (ip_invalid(ip)) { + /* + * If the instruction address is invalid, and this + * is the first stack frame, assume r14 has not + * been written to the stack yet. Otherwise exit. + */ + if (!first) + break; + ip = regs->gprs[14]; + if (ip_invalid(ip)) + break; + } + if (!store_ip(consume_entry, cookie, entry, perf, ip)) + break; + first = false; + } + pagefault_enable(); } -EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) { - unsigned long sp, low, high; - - sp = tsk->thread.ksp & PSW_ADDR_INSN; - low = (unsigned long) task_stack_page(tsk); - high = (unsigned long) task_pt_regs(tsk); - save_context_stack(trace, sp, low, high, 0); - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; + arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false); } -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c new file mode 100644 index 000000000000..5eae2e25997a --- /dev/null +++ b/arch/s390/kernel/sthyi.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * store hypervisor information instruction emulation functions. + * + * Copyright IBM Corp. 2016 + * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> + */ + +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/pagemap.h> +#include <linux/vmalloc.h> +#include <linux/syscalls.h> +#include <linux/mutex.h> +#include <asm/asm-offsets.h> +#include <asm/sclp.h> +#include <asm/diag.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> +#include <asm/facility.h> +#include <asm/sthyi.h> +#include <asm/asm.h> +#include "entry.h" + +#define DED_WEIGHT 0xffff +/* + * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string + * as they are justified with spaces. + */ +#define CP 0xc3d7404040404040UL +#define IFL 0xc9c6d34040404040UL + +enum hdr_flags { + HDR_NOT_LPAR = 0x10, + HDR_STACK_INCM = 0x20, + HDR_STSI_UNAV = 0x40, + HDR_PERF_UNAV = 0x80, +}; + +enum mac_validity { + MAC_NAME_VLD = 0x20, + MAC_ID_VLD = 0x40, + MAC_CNT_VLD = 0x80, +}; + +enum par_flag { + PAR_MT_EN = 0x80, +}; + +enum par_validity { + PAR_GRP_VLD = 0x08, + PAR_ID_VLD = 0x10, + PAR_ABS_VLD = 0x20, + PAR_WGHT_VLD = 0x40, + PAR_PCNT_VLD = 0x80, +}; + +struct hdr_sctn { + u8 infhflg1; + u8 infhflg2; /* reserved */ + u8 infhval1; /* reserved */ + u8 infhval2; /* reserved */ + u8 reserved[3]; + u8 infhygct; + u16 infhtotl; + u16 infhdln; + u16 infmoff; + u16 infmlen; + u16 infpoff; + u16 infplen; + u16 infhoff1; + u16 infhlen1; + u16 infgoff1; + u16 infglen1; + u16 infhoff2; + u16 infhlen2; + u16 infgoff2; + u16 infglen2; + u16 infhoff3; + u16 infhlen3; + u16 infgoff3; + u16 infglen3; + u8 reserved2[4]; +} __packed; + +struct mac_sctn { + u8 infmflg1; /* reserved */ + u8 infmflg2; /* reserved */ + u8 infmval1; + u8 infmval2; /* reserved */ + u16 infmscps; + u16 infmdcps; + u16 infmsifl; + u16 infmdifl; + char infmname[8]; + char infmtype[4]; + char infmmanu[16]; + char infmseq[16]; + char infmpman[4]; + u8 reserved[4]; +} __packed; + +struct par_sctn { + u8 infpflg1; + u8 infpflg2; /* reserved */ + u8 infpval1; + u8 infpval2; /* reserved */ + u16 infppnum; + u16 infpscps; + u16 infpdcps; + u16 infpsifl; + u16 infpdifl; + u16 reserved; + char infppnam[8]; + u32 infpwbcp; + u32 infpabcp; + u32 infpwbif; + u32 infpabif; + char infplgnm[8]; + u32 infplgcp; + u32 infplgif; +} __packed; + +struct sthyi_sctns { + struct hdr_sctn hdr; + struct mac_sctn mac; + struct par_sctn par; +} __packed; + +struct cpu_inf { + u64 lpar_cap; + u64 lpar_grp_cap; + u64 lpar_weight; + u64 all_weight; + int cpu_num_ded; + int cpu_num_shd; +}; + +struct lpar_cpu_inf { + struct cpu_inf cp; + struct cpu_inf ifl; +}; + +/* + * STHYI requires extensive locking in the higher hypervisors + * and is very computational/memory expensive. Therefore we + * cache the retrieved data whose valid period is 1s. + */ +#define CACHE_VALID_JIFFIES HZ + +struct sthyi_info { + void *info; + unsigned long end; +}; + +static DEFINE_MUTEX(sthyi_mutex); +static struct sthyi_info sthyi_cache; + +static inline u64 cpu_id(u8 ctidx, void *diag224_buf) +{ + return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); +} + +/* + * Scales the cpu capping from the lpar range to the one expected in + * sthyi data. + * + * diag204 reports a cap in hundredths of processor units. + * z/VM's range for one core is 0 - 0x10000. + */ +static u32 scale_cap(u32 in) +{ + return (0x10000 * in) / 100; +} + +static void fill_hdr(struct sthyi_sctns *sctns) +{ + sctns->hdr.infhdln = sizeof(sctns->hdr); + sctns->hdr.infmoff = sizeof(sctns->hdr); + sctns->hdr.infmlen = sizeof(sctns->mac); + sctns->hdr.infplen = sizeof(sctns->par); + sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; + sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; +} + +static void fill_stsi_mac(struct sthyi_sctns *sctns, + struct sysinfo_1_1_1 *sysinfo) +{ + sclp_ocf_cpc_name_copy(sctns->mac.infmname); + if (*(u64 *)sctns->mac.infmname != 0) + sctns->mac.infmval1 |= MAC_NAME_VLD; + + if (stsi(sysinfo, 1, 1, 1)) + return; + + memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); + memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); + memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); + memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); + + sctns->mac.infmval1 |= MAC_ID_VLD; +} + +static void fill_stsi_par(struct sthyi_sctns *sctns, + struct sysinfo_2_2_2 *sysinfo) +{ + if (stsi(sysinfo, 2, 2, 2)) + return; + + sctns->par.infppnum = sysinfo->lpar_number; + memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); + + sctns->par.infpval1 |= PAR_ID_VLD; +} + +static void fill_stsi(struct sthyi_sctns *sctns) +{ + void *sysinfo; + + /* Errors are handled through the validity bits in the response. */ + sysinfo = (void *)__get_free_page(GFP_KERNEL); + if (!sysinfo) + return; + + fill_stsi_mac(sctns, sysinfo); + fill_stsi_par(sctns, sysinfo); + + free_pages((unsigned long)sysinfo, 0); +} + +static void fill_diag_mac(struct sthyi_sctns *sctns, + struct diag204_x_phys_block *block, + void *diag224_buf) +{ + int i; + + for (i = 0; i < block->hdr.cpus; i++) { + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdcps++; + else + sctns->mac.infmscps++; + break; + case IFL: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdifl++; + else + sctns->mac.infmsifl++; + break; + } + } + sctns->mac.infmval1 |= MAC_CNT_VLD; +} + +/* Returns a pointer to the next partition block. */ +static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, + bool this_lpar, + void *diag224_buf, + struct diag204_x_part_block *block) +{ + int i, capped = 0, weight_cp = 0, weight_ifl = 0; + struct cpu_inf *cpu_inf; + + for (i = 0; i < block->hdr.rcpus; i++) { + if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) + continue; + + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + cpu_inf = &part_inf->cp; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_cp |= block->cpus[i].cur_weight; + break; + case IFL: + cpu_inf = &part_inf->ifl; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_ifl |= block->cpus[i].cur_weight; + break; + default: + continue; + } + + if (!this_lpar) + continue; + + capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; + cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; + cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; + + if (block->cpus[i].weight == DED_WEIGHT) + cpu_inf->cpu_num_ded += 1; + else + cpu_inf->cpu_num_shd += 1; + } + + if (this_lpar && capped) { + part_inf->cp.lpar_weight = weight_cp; + part_inf->ifl.lpar_weight = weight_ifl; + } + part_inf->cp.all_weight += weight_cp; + part_inf->ifl.all_weight += weight_ifl; + return (struct diag204_x_part_block *)&block->cpus[i]; +} + +static void *diag204_get_data(bool diag204_allow_busy) +{ + unsigned long subcode; + void *diag204_buf; + int pages, rc; + + subcode = DIAG204_SUBC_RSI; + subcode |= DIAG204_INFO_EXT; + pages = diag204(subcode, 0, NULL); + if (pages < 0) + return ERR_PTR(pages); + if (pages == 0) + return ERR_PTR(-ENODATA); + diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), + PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!diag204_buf) + return ERR_PTR(-ENOMEM); + subcode = DIAG204_SUBC_STIB7; + subcode |= DIAG204_INFO_EXT; + if (diag204_has_bif() && diag204_allow_busy) + subcode |= DIAG204_BIF_BIT; + rc = diag204(subcode, pages, diag204_buf); + if (rc < 0) { + vfree(diag204_buf); + return ERR_PTR(rc); + } + return diag204_buf; +} + +static bool is_diag204_cached(struct sthyi_sctns *sctns) +{ + /* + * Check if validity bits are set when diag204 data + * is gathered. + */ + if (sctns->par.infpval1) + return true; + return false; +} + +static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf) +{ + int i; + bool this_lpar; + void *diag224_buf = NULL; + struct diag204_x_info_blk_hdr *ti_hdr; + struct diag204_x_part_block *part_block; + struct diag204_x_phys_block *phys_block; + struct lpar_cpu_inf lpar_inf = {}; + + diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); + if (!diag224_buf || diag224(diag224_buf)) + goto out; + + ti_hdr = diag204_buf; + part_block = diag204_buf + sizeof(*ti_hdr); + + for (i = 0; i < ti_hdr->npar; i++) { + /* + * For the calling lpar we also need to get the cpu + * caps and weights. The time information block header + * specifies the offset to the partition block of the + * caller lpar, so we know when we process its data. + */ + this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; + part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, + part_block); + } + + phys_block = (struct diag204_x_phys_block *)part_block; + part_block = diag204_buf + ti_hdr->this_part; + if (part_block->hdr.mtid) + sctns->par.infpflg1 = PAR_MT_EN; + + sctns->par.infpval1 |= PAR_GRP_VLD; + sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); + sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); + memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, + sizeof(sctns->par.infplgnm)); + + sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; + sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; + sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; + sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; + sctns->par.infpval1 |= PAR_PCNT_VLD; + + sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); + sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); + sctns->par.infpval1 |= PAR_ABS_VLD; + + /* + * Everything below needs global performance data to be + * meaningful. + */ + if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { + sctns->hdr.infhflg1 |= HDR_PERF_UNAV; + goto out; + } + + fill_diag_mac(sctns, phys_block, diag224_buf); + + if (lpar_inf.cp.lpar_weight) { + sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * + lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; + } + + if (lpar_inf.ifl.lpar_weight) { + sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * + lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; + } + sctns->par.infpval1 |= PAR_WGHT_VLD; + +out: + free_page((unsigned long)diag224_buf); +} + +static int sthyi(u64 vaddr, u64 *rc) +{ + union register_pair r1 = { .even = 0, }; /* subcode */ + union register_pair r2 = { .even = vaddr, }; + int cc; + + asm volatile( + ".insn rre,0xB2560000,%[r1],%[r2]\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [r2] "+&d" (r2.pair) + : [r1] "d" (r1.pair) + : CC_CLOBBER_LIST("memory")); + *rc = r2.odd; + return CC_TRANSFORM(cc); +} + +static int fill_dst(void *dst, u64 *rc) +{ + void *diag204_buf; + + struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; + + /* + * If the facility is on, we don't want to emulate the instruction. + * We ask the hypervisor to provide the data. + */ + if (test_facility(74)) { + memset(dst, 0, PAGE_SIZE); + return sthyi((u64)dst, rc); + } + /* + * When emulating, if diag204 returns BUSY don't reset dst buffer + * and use cached data. + */ + *rc = 0; + diag204_buf = diag204_get_data(is_diag204_cached(sctns)); + if (IS_ERR(diag204_buf)) + return PTR_ERR(diag204_buf); + memset(dst, 0, PAGE_SIZE); + fill_hdr(sctns); + fill_stsi(sctns); + fill_diag(sctns, diag204_buf); + vfree(diag204_buf); + return 0; +} + +static int sthyi_init_cache(void) +{ + if (sthyi_cache.info) + return 0; + sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); + if (!sthyi_cache.info) + return -ENOMEM; + sthyi_cache.end = jiffies - 1; /* expired */ + return 0; +} + +static int sthyi_update_cache(u64 *rc) +{ + int r; + + r = fill_dst(sthyi_cache.info, rc); + if (r == 0) { + sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; + } else if (r == -EBUSY) { + /* mark as expired and return 0 to keep using cached data */ + sthyi_cache.end = jiffies - 1; + r = 0; + } + return r; +} + +/* + * sthyi_fill - Fill page with data returned by the STHYI instruction + * + * @dst: Pointer to zeroed page + * @rc: Pointer for storing the return code of the instruction + * + * Fills the destination with system information returned by the STHYI + * instruction. The data is generated by emulation or execution of STHYI, + * if available. The return value is either a negative error value or + * the condition code that would be returned, the rc parameter is the + * return code which is passed in register R2 + 1. + */ +int sthyi_fill(void *dst, u64 *rc) +{ + int r; + + mutex_lock(&sthyi_mutex); + r = sthyi_init_cache(); + if (r) + goto out; + + if (time_is_before_jiffies(sthyi_cache.end)) { + /* cache expired */ + r = sthyi_update_cache(rc); + if (r) + goto out; + } + *rc = 0; + memcpy(dst, sthyi_cache.info, PAGE_SIZE); +out: + mutex_unlock(&sthyi_mutex); + return r; +} +EXPORT_SYMBOL_GPL(sthyi_fill); + +SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, + u64 __user *, return_code, unsigned long, flags) +{ + u64 sthyi_rc; + void *info; + int r; + + if (flags) + return -EINVAL; + if (function_code != STHYI_FC_CP_IFL_CAP) + return -EOPNOTSUPP; + info = (void *)get_zeroed_page(GFP_KERNEL); + if (!info) + return -ENOMEM; + r = sthyi_fill(info, &sthyi_rc); + if (r < 0) + goto out; + if (return_code && put_user(sthyi_rc, return_code)) { + r = -EFAULT; + goto out; + } + if (copy_to_user(buffer, info, PAGE_SIZE)) + r = -EFAULT; +out: + free_page((unsigned long)info); + return r; +} diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c deleted file mode 100644 index c479d2f9605b..000000000000 --- a/arch/s390/kernel/suspend.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Suspend support specific for s390. - * - * Copyright IBM Corp. 2009 - * - * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> - */ - -#include <linux/pfn.h> -#include <linux/suspend.h> -#include <linux/mm.h> -#include <asm/ctl_reg.h> - -/* - * References to section boundaries - */ -extern const void __nosave_begin, __nosave_end; - -/* - * The restore of the saved pages in an hibernation image will set - * the change and referenced bits in the storage key for each page. - * Overindication of the referenced bits after an hibernation cycle - * does not cause any harm but the overindication of the change bits - * would cause trouble. - * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each - * page to the most significant byte of the associated page frame - * number in the hibernation image. - */ - -/* - * Key storage is allocated as a linked list of pages. - * The size of the keys array is (PAGE_SIZE - sizeof(long)) - */ -struct page_key_data { - struct page_key_data *next; - unsigned char data[]; -}; - -#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) - -static struct page_key_data *page_key_data; -static struct page_key_data *page_key_rp, *page_key_wp; -static unsigned long page_key_rx, page_key_wx; -unsigned long suspend_zero_pages; - -/* - * For each page in the hibernation image one additional byte is - * stored in the most significant byte of the page frame number. - * On suspend no additional memory is required but on resume the - * keys need to be memorized until the page data has been restored. - * Only then can the storage keys be set to their old state. - */ -unsigned long page_key_additional_pages(unsigned long pages) -{ - return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); -} - -/* - * Free page_key_data list of arrays. - */ -void page_key_free(void) -{ - struct page_key_data *pkd; - - while (page_key_data) { - pkd = page_key_data; - page_key_data = pkd->next; - free_page((unsigned long) pkd); - } -} - -/* - * Allocate page_key_data list of arrays with enough room to store - * one byte for each page in the hibernation image. - */ -int page_key_alloc(unsigned long pages) -{ - struct page_key_data *pk; - unsigned long size; - - size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); - while (size--) { - pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); - if (!pk) { - page_key_free(); - return -ENOMEM; - } - pk->next = page_key_data; - page_key_data = pk; - } - page_key_rp = page_key_wp = page_key_data; - page_key_rx = page_key_wx = 0; - return 0; -} - -/* - * Save the storage key into the upper 8 bits of the page frame number. - */ -void page_key_read(unsigned long *pfn) -{ - unsigned long addr; - - addr = (unsigned long) page_address(pfn_to_page(*pfn)); - *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); -} - -/* - * Extract the storage key from the upper 8 bits of the page frame number - * and store it in the page_key_data list of arrays. - */ -void page_key_memorize(unsigned long *pfn) -{ - page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; - *(unsigned char *) pfn = 0; - if (++page_key_wx < PAGE_KEY_DATA_SIZE) - return; - page_key_wp = page_key_wp->next; - page_key_wx = 0; -} - -/* - * Get the next key from the page_key_data list of arrays and set the - * storage key of the page referred by @address. If @address refers to - * a "safe" page the swsusp_arch_resume code will transfer the storage - * key from the buffer page to the original page. - */ -void page_key_write(void *address) -{ - page_set_storage_key((unsigned long) address, - page_key_rp->data[page_key_rx], 0); - if (++page_key_rx >= PAGE_KEY_DATA_SIZE) - return; - page_key_rp = page_key_rp->next; - page_key_rx = 0; -} - -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); - unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); - - /* Always save lowcore pages (LC protection might be enabled). */ - if (pfn <= LC_PAGES) - return 0; - if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) - return 1; - /* Skip memory holes and read-only pages (NSS, DCSS, ...). */ - if (tprot(PFN_PHYS(pfn))) - return 1; - return 0; -} - -/* - * PM notifier callback for suspend - */ -static int suspend_pm_cb(struct notifier_block *nb, unsigned long action, - void *ptr) -{ - switch (action) { - case PM_SUSPEND_PREPARE: - case PM_HIBERNATION_PREPARE: - suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER); - if (!suspend_zero_pages) - return NOTIFY_BAD; - break; - case PM_POST_SUSPEND: - case PM_POST_HIBERNATION: - free_pages(suspend_zero_pages, LC_ORDER); - break; - default: - return NOTIFY_DONE; - } - return NOTIFY_OK; -} - -static int __init suspend_pm_init(void) -{ - pm_notifier(suspend_pm_cb, 0); - return 0; -} -arch_initcall(suspend_pm_init); - -void save_processor_state(void) -{ - /* swsusp_arch_suspend() actually saves all cpu register contents. - * Machine checks must be disabled since swsusp_arch_suspend() stores - * register contents to their lowcore save areas. That's the same - * place where register contents on machine checks would be saved. - * To avoid register corruption disable machine checks. - * We must also disable machine checks in the new psw mask for - * program checks, since swsusp_arch_suspend() may generate program - * checks. Disabling machine checks for all other new psw masks is - * just paranoia. - */ - local_mcck_disable(); - /* Disable lowcore protection */ - __ctl_clear_bit(0,28); - S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK; - S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK; - S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK; - S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK; -} - -void restore_processor_state(void) -{ - S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK; - S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK; - S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK; - S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK; - /* Enable lowcore protection */ - __ctl_set_bit(0,28); - local_mcck_enable(); -} diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S deleted file mode 100644 index c487be4cfc81..000000000000 --- a/arch/s390/kernel/swsusp_asm64.S +++ /dev/null @@ -1,309 +0,0 @@ -/* - * S390 64-bit swsusp implementation - * - * Copyright IBM Corp. 2009 - * - * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> - * Michael Holzheu <holzheu@linux.vnet.ibm.com> - */ - -#include <linux/linkage.h> -#include <asm/page.h> -#include <asm/ptrace.h> -#include <asm/thread_info.h> -#include <asm/asm-offsets.h> -#include <asm/sigp.h> - -/* - * Save register context in absolute 0 lowcore and call swsusp_save() to - * create in-memory kernel image. The context is saved in the designated - * "store status" memory locations (see POP). - * We return from this function twice. The first time during the suspend to - * disk process. The second time via the swsusp_arch_resume() function - * (see below) in the resume process. - * This function runs with disabled interrupts. - */ - .section .text -ENTRY(swsusp_arch_suspend) - stmg %r6,%r15,__SF_GPRS(%r15) - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) - - /* Deactivate DAT */ - stnsm __SF_EMPTY(%r15),0xfb - - /* Store prefix register on stack */ - stpx __SF_EMPTY(%r15) - - /* Save prefix register contents for lowcore copy */ - llgf %r10,__SF_EMPTY(%r15) - - /* Get pointer to save area */ - lghi %r1,0x1000 - - /* Save CPU address */ - stap __LC_EXT_CPU_ADDR(%r0) - - /* Store registers */ - mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ - stfpc 0x31c(%r1) /* store fpu control */ - std 0,0x200(%r1) /* store f0 */ - std 1,0x208(%r1) /* store f1 */ - std 2,0x210(%r1) /* store f2 */ - std 3,0x218(%r1) /* store f3 */ - std 4,0x220(%r1) /* store f4 */ - std 5,0x228(%r1) /* store f5 */ - std 6,0x230(%r1) /* store f6 */ - std 7,0x238(%r1) /* store f7 */ - std 8,0x240(%r1) /* store f8 */ - std 9,0x248(%r1) /* store f9 */ - std 10,0x250(%r1) /* store f10 */ - std 11,0x258(%r1) /* store f11 */ - std 12,0x260(%r1) /* store f12 */ - std 13,0x268(%r1) /* store f13 */ - std 14,0x270(%r1) /* store f14 */ - std 15,0x278(%r1) /* store f15 */ - stam %a0,%a15,0x340(%r1) /* store access registers */ - stctg %c0,%c15,0x380(%r1) /* store control registers */ - stmg %r0,%r15,0x280(%r1) /* store general registers */ - - stpt 0x328(%r1) /* store timer */ - stck __SF_EMPTY(%r15) /* store clock */ - stckc 0x330(%r1) /* store clock comparator */ - - /* Update cputime accounting before going to sleep */ - lg %r0,__LC_LAST_UPDATE_TIMER - slg %r0,0x328(%r1) - alg %r0,__LC_SYSTEM_TIMER - stg %r0,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1) - lg %r0,__LC_LAST_UPDATE_CLOCK - slg %r0,__SF_EMPTY(%r15) - alg %r0,__LC_STEAL_TIMER - stg %r0,__LC_STEAL_TIMER - mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15) - - /* Activate DAT */ - stosm __SF_EMPTY(%r15),0x04 - - /* Set prefix page to zero */ - xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) - spx __SF_EMPTY(%r15) - - /* Save absolute zero pages */ - larl %r2,suspend_zero_pages - lg %r2,0(%r2) - lghi %r4,0 - lghi %r3,2*PAGE_SIZE - lghi %r5,2*PAGE_SIZE -1: mvcle %r2,%r4,0 - jo 1b - - /* Copy lowcore to absolute zero lowcore */ - lghi %r2,0 - lgr %r4,%r10 - lghi %r3,2*PAGE_SIZE - lghi %r5,2*PAGE_SIZE -1: mvcle %r2,%r4,0 - jo 1b - - /* Save image */ - brasl %r14,swsusp_save - - /* Restore prefix register and return */ - lghi %r1,0x1000 - spx 0x318(%r1) - lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) - lghi %r2,0 - br %r14 - -/* - * Restore saved memory image to correct place and restore register context. - * Then we return to the function that called swsusp_arch_suspend(). - * swsusp_arch_resume() runs with disabled interrupts. - */ -ENTRY(swsusp_arch_resume) - stmg %r6,%r15,__SF_GPRS(%r15) - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) - - /* Make all free pages stable */ - lghi %r2,1 - brasl %r14,arch_set_page_states - - /* Deactivate DAT */ - stnsm __SF_EMPTY(%r15),0xfb - - /* Set prefix page to zero */ - xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) - spx __SF_EMPTY(%r15) - - /* Restore saved image */ - larl %r1,restore_pblist - lg %r1,0(%r1) - ltgr %r1,%r1 - jz 2f -0: - lg %r2,8(%r1) - lg %r4,0(%r1) - iske %r0,%r4 - lghi %r3,PAGE_SIZE - lghi %r5,PAGE_SIZE -1: - mvcle %r2,%r4,0 - jo 1b - lg %r2,8(%r1) - sske %r0,%r2 - lg %r1,16(%r1) - ltgr %r1,%r1 - jnz 0b -2: - ptlb /* flush tlb */ - - /* Reset System */ - larl %r1,restart_entry - larl %r2,.Lrestart_diag308_psw - og %r1,0(%r2) - stg %r1,0(%r0) - larl %r1,.Lnew_pgm_check_psw - epsw %r2,%r3 - stm %r2,%r3,0(%r1) - mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1) - lghi %r0,0 - diag %r0,%r0,0x308 -restart_entry: - lhi %r1,1 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - sam64 - larl %r1,.Lnew_pgm_check_psw - lpswe 0(%r1) -pgm_check_entry: - - /* Switch to original suspend CPU */ - larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ - stap 0(%r1) - llgh %r2,0(%r1) - llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ - cgr %r1,%r2 - je restore_registers /* r1 = r2 -> nothing to do */ - larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ - mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) -3: - sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */ - brc 8,4f /* accepted */ - brc 2,3b /* busy, try again */ - - /* Suspend CPU not available -> panic */ - larl %r15,init_thread_union - ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) - larl %r2,.Lpanic_string - larl %r3,_sclp_print_early - lghi %r1,0 - sam31 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - basr %r14,%r3 - larl %r3,.Ldisabled_wait_31 - lpsw 0(%r3) -4: - /* Switch to suspend CPU */ - sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */ - brc 2,4b /* busy, try again */ -5: - sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */ - brc 2,5b /* busy, try again */ -6: j 6b - -restart_suspend: - larl %r1,.Lresume_cpu - llgh %r2,0(%r1) -7: - sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */ - brc 8,7b /* accepted, status 0, still running */ - brc 2,7b /* busy, try again */ - tmll %r9,0x40 /* Test if resume CPU is stopped */ - jz 7b - -restore_registers: - /* Restore registers */ - lghi %r13,0x1000 /* %r1 = pointer to save area */ - - /* Ignore time spent in suspended state. */ - llgf %r1,0x318(%r13) - stck __LC_LAST_UPDATE_CLOCK(%r1) - spt 0x328(%r13) /* reprogram timer */ - //sckc 0x330(%r13) /* set clock comparator */ - - lctlg %c0,%c15,0x380(%r13) /* load control registers */ - lam %a0,%a15,0x340(%r13) /* load access registers */ - - lfpc 0x31c(%r13) /* load fpu control */ - ld 0,0x200(%r13) /* load f0 */ - ld 1,0x208(%r13) /* load f1 */ - ld 2,0x210(%r13) /* load f2 */ - ld 3,0x218(%r13) /* load f3 */ - ld 4,0x220(%r13) /* load f4 */ - ld 5,0x228(%r13) /* load f5 */ - ld 6,0x230(%r13) /* load f6 */ - ld 7,0x238(%r13) /* load f7 */ - ld 8,0x240(%r13) /* load f8 */ - ld 9,0x248(%r13) /* load f9 */ - ld 10,0x250(%r13) /* load f10 */ - ld 11,0x258(%r13) /* load f11 */ - ld 12,0x260(%r13) /* load f12 */ - ld 13,0x268(%r13) /* load f13 */ - ld 14,0x270(%r13) /* load f14 */ - ld 15,0x278(%r13) /* load f15 */ - - /* Load old stack */ - lg %r15,0x2f8(%r13) - - /* Save prefix register */ - mvc __SF_EMPTY(4,%r15),0x318(%r13) - - /* Restore absolute zero pages */ - lghi %r2,0 - larl %r4,suspend_zero_pages - lg %r4,0(%r4) - lghi %r3,2*PAGE_SIZE - lghi %r5,2*PAGE_SIZE -1: mvcle %r2,%r4,0 - jo 1b - - /* Restore prefix register */ - spx __SF_EMPTY(%r15) - - /* Activate DAT */ - stosm __SF_EMPTY(%r15),0x04 - - /* Make all free pages unstable */ - lghi %r2,0 - brasl %r14,arch_set_page_states - - /* Log potential guest relocation */ - brasl %r14,lgr_info_log - - /* Reinitialize the channel subsystem */ - brasl %r14,channel_subsystem_reinit - - /* Return 0 */ - lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) - lghi %r2,0 - br %r14 - - .section .data..nosave,"aw",@progbits - .align 8 -.Ldisabled_wait_31: - .long 0x000a0000,0x00000000 -.Lpanic_string: - .asciz "Resume not possible because suspend CPU is no longer available" - .align 8 -.Lrestart_diag308_psw: - .long 0x00080000,0x80000000 -.Lrestart_suspend_psw: - .quad 0x0000000180000000,restart_suspend -.Lnew_pgm_check_psw: - .quad 0,pgm_check_entry -.Lresume_cpu: - .byte 0,0 diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c deleted file mode 100644 index 23eb222c1658..000000000000 --- a/arch/s390/kernel/sys_s390.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * S390 version - * Copyright IBM Corp. 1999, 2000 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Thomas Spatzier (tspat@de.ibm.com) - * - * Derived from "arch/i386/kernel/sys_i386.c" - * - * This file contains various random system calls that - * have a non-standard calling sequence on the Linux/s390 - * platform. - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/stat.h> -#include <linux/syscalls.h> -#include <linux/mman.h> -#include <linux/file.h> -#include <linux/utsname.h> -#include <linux/personality.h> -#include <linux/unistd.h> -#include <linux/ipc.h> -#include <asm/uaccess.h> -#include "entry.h" - -/* - * Perform the mmap() system call. Linux for S/390 isn't able to handle more - * than 5 system call parameters, so this system call uses a memory block - * for parameter passing. - */ - -struct s390_mmap_arg_struct { - unsigned long addr; - unsigned long len; - unsigned long prot; - unsigned long flags; - unsigned long fd; - unsigned long offset; -}; - -SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) -{ - struct s390_mmap_arg_struct a; - int error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls. - */ -SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, - unsigned long, third, void __user *, ptr) -{ - if (call >> 16) - return -EINVAL; - /* The s390 sys_ipc variant has only five parameters instead of six - * like the generic variant. The only difference is the handling of - * the SEMTIMEDOP subcall where on s390 the third parameter is used - * as a pointer to a struct timespec where the generic variant uses - * the fifth parameter. - * Therefore we can call the generic variant by simply passing the - * third parameter also as fifth parameter. - */ - return sys_ipc(call, first, second, third, ptr, third); -} - -#ifdef CONFIG_64BIT -SYSCALL_DEFINE1(s390_personality, unsigned int, personality) -{ - unsigned int ret; - - if (personality(current->personality) == PER_LINUX32 && - personality(personality) == PER_LINUX) - personality |= PER_LINUX32; - ret = sys_personality(personality); - if (personality(ret) == PER_LINUX32) - ret &= ~PER_LINUX32; - - return ret; -} -#endif /* CONFIG_64BIT */ - -/* - * Wrapper function for sys_fadvise64/fadvise64_64 - */ -#ifndef CONFIG_64BIT - -SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low, - size_t, len, int, advice) -{ - return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low, - len, advice); -} - -struct fadvise64_64_args { - int fd; - long long offset; - long long len; - int advice; -}; - -SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) -{ - struct fadvise64_64_args a; - - if ( copy_from_user(&a, args, sizeof(a)) ) - return -EFAULT; - return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); -} - -/* - * This is a wrapper to call sys_fallocate(). For 31 bit s390 the last - * 64 bit argument "len" is split into the upper and lower 32 bits. The - * system call wrapper in the user space loads the value to %r6/%r7. - * The code in entry.S keeps the values in %r2 - %r6 where they are and - * stores %r7 to 96(%r15). But the standard C linkage requires that - * the whole 64 bit value for len is stored on the stack and doesn't - * use %r6 at all. So s390_fallocate has to convert the arguments from - * %r2: fd, %r3: mode, %r4/%r5: offset, %r6/96(%r15)-99(%r15): len - * to - * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len - */ -SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset, - u32, len_high, u32, len_low) -{ - return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); -} -#endif diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c new file mode 100644 index 000000000000..795b6cca74c9 --- /dev/null +++ b/arch/s390/kernel/syscall.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Thomas Spatzier (tspat@de.ibm.com) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/s390 + * platform. + */ + +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/stat.h> +#include <linux/syscalls.h> +#include <linux/mman.h> +#include <linux/file.h> +#include <linux/utsname.h> +#include <linux/personality.h> +#include <linux/unistd.h> +#include <linux/ipc.h> +#include <linux/uaccess.h> +#include <linux/string.h> +#include <linux/thread_info.h> +#include <linux/entry-common.h> + +#include <asm/ptrace.h> +#include <asm/vtime.h> + +#include "entry.h" + +#define __SYSCALL(nr, sym) long __s390x_##sym(struct pt_regs *); +#include <asm/syscall_table.h> +#undef __SYSCALL + +#define __SYSCALL(nr, sym) [nr] = (__s390x_##sym), +const sys_call_ptr_t sys_call_table[__NR_syscalls] = { +#include <asm/syscall_table.h> +}; +#undef __SYSCALL + +#ifdef CONFIG_SYSVIPC +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls. + */ +SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) +{ + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return ksys_ipc(call, first, second, third, ptr, third); +} +#endif /* CONFIG_SYSVIPC */ + +SYSCALL_DEFINE1(s390_personality, unsigned int, personality) +{ + unsigned int ret = current->personality; + + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; + + if (personality != 0xffffffff) + set_personality(personality); + + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; + + return ret; +} + +SYSCALL_DEFINE0(ni_syscall) +{ + return -ENOSYS; +} + +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) +{ + unsigned long nr; + + add_random_kstack_offset(); + enter_from_user_mode(regs); + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + if (unlikely(per_trap)) + set_thread_flag(TIF_PER_TRAP); + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); + nr = regs->int_code & 0xffff; + if (likely(!nr)) { + nr = regs->gprs[1] & 0xffff; + regs->int_code &= ~0xffffUL; + regs->int_code |= nr; + } + regs->gprs[2] = nr; + if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { + regs->psw.addr = current->restart_block.arch_data; + current->restart_block.arch_data = 1; + } + nr = syscall_enter_from_user_mode_work(regs, nr); + /* + * In the s390 ptrace ABI, both the syscall number and the return value + * use gpr2. However, userspace puts the syscall number either in the + * svc instruction itself, or uses gpr1. To make at least skipping syscalls + * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here + * and if set, the syscall will be skipped. + */ + if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) + goto out; + regs->gprs[2] = -ENOSYS; + if (likely(nr < NR_syscalls)) + regs->gprs[2] = sys_call_table[nr](regs); +out: + syscall_exit_to_user_mode(regs); +} diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S deleted file mode 100644 index 913410bd74a3..000000000000 --- a/arch/s390/kernel/syscalls.S +++ /dev/null @@ -1,355 +0,0 @@ -/* - * definitions for sys_call_table, each line represents an - * entry in the table in the form - * SYSCALL(31 bit syscall, 64 bit syscall, 31 bit emulated syscall) - * - * this file is meant to be included from entry.S and entry64.S - */ - -#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall) - -NI_SYSCALL /* 0 */ -SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper) -SYSCALL(sys_fork,sys_fork,sys_fork) -SYSCALL(sys_read,sys_read,sys32_read_wrapper) -SYSCALL(sys_write,sys_write,sys32_write_wrapper) -SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */ -SYSCALL(sys_close,sys_close,sys32_close_wrapper) -SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) -SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper) -SYSCALL(sys_link,sys_link,sys32_link_wrapper) -SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */ -SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper) -SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper) -SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ -SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper) -SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ -SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/ -NI_SYSCALL /* old break syscall holder */ -NI_SYSCALL /* old stat syscall holder */ -SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) -SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ -SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) -SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) -SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/ -SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/ -SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ -SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) -SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) -NI_SYSCALL /* old fstat syscall */ -SYSCALL(sys_pause,sys_pause,sys_pause) -SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ -NI_SYSCALL /* old stty syscall */ -NI_SYSCALL /* old gtty syscall */ -SYSCALL(sys_access,sys_access,sys32_access_wrapper) -SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper) -NI_SYSCALL /* 35 old ftime syscall */ -SYSCALL(sys_sync,sys_sync,sys_sync) -SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper) -SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper) -SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper) -SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */ -SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper) -SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper) -SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) -NI_SYSCALL /* old prof syscall */ -SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ -SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/ -SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/ -SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) -SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */ -SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */ -SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper) -SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper) -NI_SYSCALL /* old lock syscall */ -SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) -SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ -NI_SYSCALL /* intel mpx syscall */ -SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper) -NI_SYSCALL /* old ulimit syscall */ -NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */ -SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper) -SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper) -SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper) -SYSCALL(sys_getppid,sys_getppid,sys_getppid) -SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */ -SYSCALL(sys_setsid,sys_setsid,sys_setsid) -SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction) -NI_SYSCALL /* old sgetmask syscall*/ -NI_SYSCALL /* old ssetmask syscall*/ -SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */ -SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */ -SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper) -SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) -SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) -SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ -SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) -SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) -SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) -SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) -SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ -SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ -NI_SYSCALL /* old select syscall */ -SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper) -NI_SYSCALL /* old lstat syscall */ -SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */ -SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) -SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) -SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) -SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ -SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper) /* 90 */ -SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) -SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) -SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) -SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) -SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/ -SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) -SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper) -NI_SYSCALL /* old profil syscall */ -SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper) -SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */ -NI_SYSCALL /* ioperm for i386 */ -SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper) -SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper) -SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer) -SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */ -SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) -SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) -SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) -NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */ -SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) -NI_SYSCALL /* old "idle" system call */ -NI_SYSCALL /* vm86old for i386 */ -SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) -SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ -SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) -SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) -SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) -SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) -SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */ -SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper) -SYSCALL(sys_newuname,sys_newuname,sys32_newuname_wrapper) -NI_SYSCALL /* modify_ldt for i386 */ -SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) -SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ -SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask) -NI_SYSCALL /* old "create module" */ -SYSCALL(sys_init_module,sys_init_module,sys_init_module_wrapper) -SYSCALL(sys_delete_module,sys_delete_module,sys_delete_module_wrapper) -NI_SYSCALL /* 130: old get_kernel_syms */ -SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper) -SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper) -SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper) -SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) -SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ -SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper) -NI_SYSCALL /* for afs_syscall */ -SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */ -SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ -SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ -SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) -SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) -SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) -SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper) -SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ -SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) -SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper) -SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper) -SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl) -SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */ -SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper) -SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper) -SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall) -SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper) -SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */ -SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper) -SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper) -SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) -SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper) -SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */ -SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) -SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) -SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper) -SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */ -SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */ -NI_SYSCALL /* for vm86 */ -NI_SYSCALL /* old sys_query_module */ -SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) -NI_SYSCALL /* old nfsservctl */ -SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ -SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ -SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) -SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) -SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction) -SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */ -SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending) -SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait) -SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo) -SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend) -SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */ -SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper) -SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */ -SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) -SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) -SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ -SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack) -SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) -NI_SYSCALL /* streams1 */ -NI_SYSCALL /* streams2 */ -SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ -SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) -SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) -SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper) -SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper) -SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */ -SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) -SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper) -SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) -SYSCALL(sys_getuid,sys_getuid,sys_getuid) -SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ -SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid) -SYSCALL(sys_getegid,sys_getegid,sys_getegid) -SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper) -SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper) -SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */ -SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper) -SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper) -SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper) -SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper) -SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */ -SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper) -SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper) -SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper) -SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper) -SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */ -SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper) -SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper) -SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper) -SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) -SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ -SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) -SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper) -SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) -SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) -SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) -SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper) -SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper) -SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper) -SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper) -SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper) -SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper) -SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper) -SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */ -SYSCALL(sys_gettid,sys_gettid,sys_gettid) -SYSCALL(sys_tkill,sys_tkill,sys_tkill_wrapper) -SYSCALL(sys_futex,sys_futex,compat_sys_futex) -SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper) -SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */ -SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill_wrapper) -NI_SYSCALL /* reserved for TUX */ -SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) -SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper) -SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */ -SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper) -SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper) -SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper) -SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) -SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ -SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) -SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) -SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) -SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) -SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ -SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) -SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper) -SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper) -SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) -SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ -SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) -SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ -SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) -SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) -SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) -SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) -NI_SYSCALL /* 268 sys_mbind */ -NI_SYSCALL /* 269 sys_get_mempolicy */ -NI_SYSCALL /* 270 sys_set_mempolicy */ -SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) -SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper) -SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) -SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) -SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ -SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) -SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) -SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper) -SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper) -SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl_wrapper) /* 280 */ -SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid) -SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper) -SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper) -SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init) -SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */ -SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper) -NI_SYSCALL /* 287 sys_migrate_pages */ -SYSCALL(sys_openat,sys_openat,compat_sys_openat) -SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) -SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ -SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) -SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) -SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper) -SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) -SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ -SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) -SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper) -SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper) -SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper) -SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */ -SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper) -SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper) -SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper) -SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list) -SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) -SYSCALL(sys_splice,sys_splice,sys_splice_wrapper) -SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) -SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) -SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) -NI_SYSCALL /* 310 sys_move_pages */ -SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) -SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) -SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) -SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) -SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ -SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) -NI_SYSCALL /* 317 old sys_timer_fd */ -SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) -SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper) -SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */ -SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime) -SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4) -SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper) -SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper) -SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */ -SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) -SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) -SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv) -SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev) -SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ -SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) -SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper) -SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark) -SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper) -SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */ -SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at) -SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) -SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) -SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) -SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ -SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) -SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper) -SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper) -SYSCALL(sys_finit_module,sys_finit_module,sys_finit_module_wrapper) diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile new file mode 100644 index 000000000000..d5fca0ca0890 --- /dev/null +++ b/arch/s390/kernel/syscalls/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +kapi := arch/$(SRCARCH)/include/generated/asm +uapi := arch/$(SRCARCH)/include/generated/uapi/asm + +$(shell mkdir -p $(uapi) $(kapi)) + +syscall := $(src)/syscall.tbl +syshdr := $(srctree)/scripts/syscallhdr.sh +systbl := $(srctree)/scripts/syscalltbl.sh + +quiet_cmd_syshdr = SYSHDR $@ + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis common,$* $< $@ + +quiet_cmd_systbl = SYSTBL $@ + cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis common,$* $< $@ + +$(uapi)/unistd_%.h: $(syscall) $(syshdr) FORCE + $(call if_changed,syshdr) + +$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE + $(call if_changed,systbl) + +uapisyshdr-y += unistd_64.h +kapisyshdr-y += syscall_table.h + +uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) +kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y)) +targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y)) + +PHONY += all +all: $(uapisyshdr-y) $(kapisyshdr-y) + @: diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl new file mode 100644 index 000000000000..417ed16b3c63 --- /dev/null +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -0,0 +1,399 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# System call table for s390 +# +# Format: +# <nr> <abi> <syscall> <entry> +# +# <abi> is always common. + +1 common exit sys_exit +2 common fork sys_fork +3 common read sys_read +4 common write sys_write +5 common open sys_open +6 common close sys_close +7 common restart_syscall sys_restart_syscall +8 common creat sys_creat +9 common link sys_link +10 common unlink sys_unlink +11 common execve sys_execve +12 common chdir sys_chdir +14 common mknod sys_mknod +15 common chmod sys_chmod +19 common lseek sys_lseek +20 common getpid sys_getpid +21 common mount sys_mount +22 common umount sys_oldumount +26 common ptrace sys_ptrace +27 common alarm sys_alarm +29 common pause sys_pause +30 common utime sys_utime +33 common access sys_access +34 common nice sys_nice +36 common sync sys_sync +37 common kill sys_kill +38 common rename sys_rename +39 common mkdir sys_mkdir +40 common rmdir sys_rmdir +41 common dup sys_dup +42 common pipe sys_pipe +43 common times sys_times +45 common brk sys_brk +48 common signal sys_signal +51 common acct sys_acct +52 common umount2 sys_umount +54 common ioctl sys_ioctl +55 common fcntl sys_fcntl +57 common setpgid sys_setpgid +60 common umask sys_umask +61 common chroot sys_chroot +62 common ustat sys_ustat +63 common dup2 sys_dup2 +64 common getppid sys_getppid +65 common getpgrp sys_getpgrp +66 common setsid sys_setsid +67 common sigaction sys_sigaction +72 common sigsuspend sys_sigsuspend +73 common sigpending sys_sigpending +74 common sethostname sys_sethostname +75 common setrlimit sys_setrlimit +77 common getrusage sys_getrusage +78 common gettimeofday sys_gettimeofday +79 common settimeofday sys_settimeofday +83 common symlink sys_symlink +85 common readlink sys_readlink +86 common uselib sys_uselib +87 common swapon sys_swapon +88 common reboot sys_reboot +89 common readdir sys_ni_syscall +90 common mmap sys_old_mmap +91 common munmap sys_munmap +92 common truncate sys_truncate +93 common ftruncate sys_ftruncate +94 common fchmod sys_fchmod +96 common getpriority sys_getpriority +97 common setpriority sys_setpriority +99 common statfs sys_statfs +100 common fstatfs sys_fstatfs +102 common socketcall sys_socketcall +103 common syslog sys_syslog +104 common setitimer sys_setitimer +105 common getitimer sys_getitimer +106 common stat sys_newstat +107 common lstat sys_newlstat +108 common fstat sys_newfstat +110 common lookup_dcookie sys_ni_syscall +111 common vhangup sys_vhangup +112 common idle sys_ni_syscall +114 common wait4 sys_wait4 +115 common swapoff sys_swapoff +116 common sysinfo sys_sysinfo +117 common ipc sys_s390_ipc +118 common fsync sys_fsync +119 common sigreturn sys_sigreturn +120 common clone sys_clone +121 common setdomainname sys_setdomainname +122 common uname sys_newuname +124 common adjtimex sys_adjtimex +125 common mprotect sys_mprotect +126 common sigprocmask sys_sigprocmask +127 common create_module sys_ni_syscall +128 common init_module sys_init_module +129 common delete_module sys_delete_module +130 common get_kernel_syms sys_ni_syscall +131 common quotactl sys_quotactl +132 common getpgid sys_getpgid +133 common fchdir sys_fchdir +134 common bdflush sys_ni_syscall +135 common sysfs sys_sysfs +136 common personality sys_s390_personality +137 common afs_syscall sys_ni_syscall +141 common getdents sys_getdents +142 common select sys_select +143 common flock sys_flock +144 common msync sys_msync +145 common readv sys_readv +146 common writev sys_writev +147 common getsid sys_getsid +148 common fdatasync sys_fdatasync +149 common _sysctl sys_ni_syscall +150 common mlock sys_mlock +151 common munlock sys_munlock +152 common mlockall sys_mlockall +153 common munlockall sys_munlockall +154 common sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler +157 common sched_getscheduler sys_sched_getscheduler +158 common sched_yield sys_sched_yield +159 common sched_get_priority_max sys_sched_get_priority_max +160 common sched_get_priority_min sys_sched_get_priority_min +161 common sched_rr_get_interval sys_sched_rr_get_interval +162 common nanosleep sys_nanosleep +163 common mremap sys_mremap +167 common query_module sys_ni_syscall +168 common poll sys_poll +169 common nfsservctl sys_ni_syscall +172 common prctl sys_prctl +173 common rt_sigreturn sys_rt_sigreturn +174 common rt_sigaction sys_rt_sigaction +175 common rt_sigprocmask sys_rt_sigprocmask +176 common rt_sigpending sys_rt_sigpending +177 common rt_sigtimedwait sys_rt_sigtimedwait +178 common rt_sigqueueinfo sys_rt_sigqueueinfo +179 common rt_sigsuspend sys_rt_sigsuspend +180 common pread64 sys_pread64 +181 common pwrite64 sys_pwrite64 +183 common getcwd sys_getcwd +184 common capget sys_capget +185 common capset sys_capset +186 common sigaltstack sys_sigaltstack +187 common sendfile sys_sendfile64 +188 common getpmsg sys_ni_syscall +189 common putpmsg sys_ni_syscall +190 common vfork sys_vfork +191 common getrlimit sys_getrlimit +198 common lchown sys_lchown +199 common getuid sys_getuid +200 common getgid sys_getgid +201 common geteuid sys_geteuid +202 common getegid sys_getegid +203 common setreuid sys_setreuid +204 common setregid sys_setregid +205 common getgroups sys_getgroups +206 common setgroups sys_setgroups +207 common fchown sys_fchown +208 common setresuid sys_setresuid +209 common getresuid sys_getresuid +210 common setresgid sys_setresgid +211 common getresgid sys_getresgid +212 common chown sys_chown +213 common setuid sys_setuid +214 common setgid sys_setgid +215 common setfsuid sys_setfsuid +216 common setfsgid sys_setfsgid +217 common pivot_root sys_pivot_root +218 common mincore sys_mincore +219 common madvise sys_madvise +220 common getdents64 sys_getdents64 +222 common readahead sys_readahead +224 common setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr +231 common llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr +233 common removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr +236 common gettid sys_gettid +237 common tkill sys_tkill +238 common futex sys_futex +239 common sched_setaffinity sys_sched_setaffinity +240 common sched_getaffinity sys_sched_getaffinity +241 common tgkill sys_tgkill +243 common io_setup sys_io_setup +244 common io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents +246 common io_submit sys_io_submit +247 common io_cancel sys_io_cancel +248 common exit_group sys_exit_group +249 common epoll_create sys_epoll_create +250 common epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address +253 common fadvise64 sys_fadvise64_64 +254 common timer_create sys_timer_create +255 common timer_settime sys_timer_settime +256 common timer_gettime sys_timer_gettime +257 common timer_getoverrun sys_timer_getoverrun +258 common timer_delete sys_timer_delete +259 common clock_settime sys_clock_settime +260 common clock_gettime sys_clock_gettime +261 common clock_getres sys_clock_getres +262 common clock_nanosleep sys_clock_nanosleep +265 common statfs64 sys_statfs64 +266 common fstatfs64 sys_fstatfs64 +267 common remap_file_pages sys_remap_file_pages +268 common mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy +271 common mq_open sys_mq_open +272 common mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend +274 common mq_timedreceive sys_mq_timedreceive +275 common mq_notify sys_mq_notify +276 common mq_getsetattr sys_mq_getsetattr +277 common kexec_load sys_kexec_load +278 common add_key sys_add_key +279 common request_key sys_request_key +280 common keyctl sys_keyctl +281 common waitid sys_waitid +282 common ioprio_set sys_ioprio_set +283 common ioprio_get sys_ioprio_get +284 common inotify_init sys_inotify_init +285 common inotify_add_watch sys_inotify_add_watch +286 common inotify_rm_watch sys_inotify_rm_watch +287 common migrate_pages sys_migrate_pages +288 common openat sys_openat +289 common mkdirat sys_mkdirat +290 common mknodat sys_mknodat +291 common fchownat sys_fchownat +292 common futimesat sys_futimesat +293 common newfstatat sys_newfstatat +294 common unlinkat sys_unlinkat +295 common renameat sys_renameat +296 common linkat sys_linkat +297 common symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat +300 common faccessat sys_faccessat +301 common pselect6 sys_pselect6 +302 common ppoll sys_ppoll +303 common unshare sys_unshare +304 common set_robust_list sys_set_robust_list +305 common get_robust_list sys_get_robust_list +306 common splice sys_splice +307 common sync_file_range sys_sync_file_range +308 common tee sys_tee +309 common vmsplice sys_vmsplice +310 common move_pages sys_move_pages +311 common getcpu sys_getcpu +312 common epoll_pwait sys_epoll_pwait +313 common utimes sys_utimes +314 common fallocate sys_fallocate +315 common utimensat sys_utimensat +316 common signalfd sys_signalfd +317 common timerfd sys_ni_syscall +318 common eventfd sys_eventfd +319 common timerfd_create sys_timerfd_create +320 common timerfd_settime sys_timerfd_settime +321 common timerfd_gettime sys_timerfd_gettime +322 common signalfd4 sys_signalfd4 +323 common eventfd2 sys_eventfd2 +324 common inotify_init1 sys_inotify_init1 +325 common pipe2 sys_pipe2 +326 common dup3 sys_dup3 +327 common epoll_create1 sys_epoll_create1 +328 common preadv sys_preadv +329 common pwritev sys_pwritev +330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +331 common perf_event_open sys_perf_event_open +332 common fanotify_init sys_fanotify_init +333 common fanotify_mark sys_fanotify_mark +334 common prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at +336 common open_by_handle_at sys_open_by_handle_at +337 common clock_adjtime sys_clock_adjtime +338 common syncfs sys_syncfs +339 common setns sys_setns +340 common process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev +342 common s390_runtime_instr sys_s390_runtime_instr +343 common kcmp sys_kcmp +344 common finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 +348 common seccomp sys_seccomp +349 common getrandom sys_getrandom +350 common memfd_create sys_memfd_create +351 common bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read +354 common execveat sys_execveat +355 common userfaultfd sys_userfaultfd +356 common membarrier sys_membarrier +357 common recvmmsg sys_recvmmsg +358 common sendmmsg sys_sendmmsg +359 common socket sys_socket +360 common socketpair sys_socketpair +361 common bind sys_bind +362 common connect sys_connect +363 common listen sys_listen +364 common accept4 sys_accept4 +365 common getsockopt sys_getsockopt +366 common setsockopt sys_setsockopt +367 common getsockname sys_getsockname +368 common getpeername sys_getpeername +369 common sendto sys_sendto +370 common sendmsg sys_sendmsg +371 common recvfrom sys_recvfrom +372 common recvmsg sys_recvmsg +373 common shutdown sys_shutdown +374 common mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range +376 common preadv2 sys_preadv2 +377 common pwritev2 sys_pwritev2 +378 common s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx +380 common s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents +383 common rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free +# room for arch specific syscalls +392 common semtimedop sys_semtimedop +393 common semget sys_semget +394 common semctl sys_semctl +395 common shmget sys_shmget +396 common shmctl sys_shmctl +397 common shmat sys_shmat +398 common shmdt sys_shmdt +399 common msgget sys_msgget +400 common msgsnd sys_msgsnd +401 common msgrcv sys_msgrcv +402 common msgctl sys_msgctl +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 +436 common close_range sys_close_range +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +443 common quotactl_fd sys_quotactl_fd +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount +459 common lsm_get_self_attr sys_lsm_get_self_attr +460 common lsm_set_self_attr sys_lsm_set_self_attr +461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 62f89d98e880..33ca3e47a0e6 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -1,62 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2001, 2009 * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/cpufeature.h> +#include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> +#include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/ebcdic.h> +#include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> - -/* Sigh, math-emu. Don't ask. */ -#include <asm/sfp-util.h> -#include <math-emu/soft-fp.h> -#include <math-emu/single.h> +#include <asm/fpu.h> +#include <asm/asm.h> int topology_max_mnest; -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) +#ifdef CONFIG_PROC_FS + +static bool convert_ext_name(unsigned char encoding, char *name, size_t len) { - register int r0 asm("0") = (fc << 28) | sel1; - register int r1 asm("1") = sel2; - int rc = 0; - - asm volatile( - " stsi 0(%3)\n" - "0: jz 2f\n" - "1: lhi %1,%4\n" - "2:\n" - EX_TABLE(0b, 1b) - : "+d" (r0), "+d" (rc) - : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) - : "cc", "memory"); - if (rc) - return rc; - return fc ? 0 : ((unsigned int) r0) >> 28; + switch (encoding) { + case 1: /* EBCDIC */ + EBCASC(name, len); + break; + case 2: /* UTF-8 */ + break; + default: + return false; + } + return true; } -EXPORT_SYMBOL(stsi); static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) { + bool has_var_cap; int i; if (stsi(info, 1, 1, 1)) return; + has_var_cap = !!info->model_var_cap[0]; EBCASC(info->manufacturer, sizeof(info->manufacturer)); EBCASC(info->type, sizeof(info->type)); EBCASC(info->model, sizeof(info->model)); @@ -65,8 +59,12 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) EBCASC(info->model_capacity, sizeof(info->model_capacity)); EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap)); EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); + if (has_var_cap) + EBCASC(info->model_var_cap, sizeof(info->model_var_cap)); seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer); seq_printf(m, "Type: %-4.4s\n", info->type); + if (info->lic) + seq_printf(m, "LIC Identifier: %016lx\n", info->lic); /* * Sigh: the model field has been renamed with System z9 * to model_capacity and a new model field has been added @@ -90,12 +88,18 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n", info->model_temp_cap, info->model_temp_cap_rating); + if (has_var_cap && info->model_var_cap_rating) + seq_printf(m, "Model Var. Capacity: %-16.16s %08u\n", + info->model_var_cap, + info->model_var_cap_rating); if (info->ncr) seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr); if (info->npr) seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr); if (info->ntr) seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr); + if (has_var_cap && info->nvr) + seq_printf(m, "Nominal Var. Rating: %08u\n", info->nvr); if (info->cai) { seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai); seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr); @@ -111,11 +115,10 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) { - static int max_mnest; - int i, rc; + int i; seq_putc(m, '\n'); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; if (stsi(info, 15, 1, topology_max_mnest)) return; @@ -123,7 +126,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) for (i = 0; i < TOPOLOGY_NR_MAG; i++) seq_printf(m, " %d", info->mag[i]); seq_putc(m, '\n'); -#ifdef CONFIG_SCHED_MC +#ifdef CONFIG_SCHED_TOPOLOGY store_topology(info); seq_printf(m, "CPU Topology SW: "); for (i = 0; i < TOPOLOGY_NR_MAG; i++) @@ -145,6 +148,10 @@ static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info) seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured); seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby); seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved); + if (info->mt_installed) { + seq_printf(m, "CPUs G-MTID: %d\n", info->mt_gtid); + seq_printf(m, "CPUs S-MTID: %d\n", info->mt_stid); + } /* * Sigh 2. According to the specification the alternate * capability field is a 32 bit floating point number @@ -194,6 +201,33 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); + if (info->mt_installed) { + seq_printf(m, "LPAR CPUs G-MTID: %d\n", info->mt_gtid); + seq_printf(m, "LPAR CPUs S-MTID: %d\n", info->mt_stid); + seq_printf(m, "LPAR CPUs PS-MTID: %d\n", info->mt_psmtid); + } + if (convert_ext_name(info->vsne, info->ext_name, sizeof(info->ext_name))) { + seq_printf(m, "LPAR Extended Name: %-.256s\n", info->ext_name); + seq_printf(m, "LPAR UUID: %pUb\n", &info->uuid); + } +} + +static void print_ext_name(struct seq_file *m, int lvl, + struct sysinfo_3_2_2 *info) +{ + size_t len = sizeof(info->ext_names[lvl]); + + if (!convert_ext_name(info->vm[lvl].evmne, info->ext_names[lvl], len)) + return; + seq_printf(m, "VM%02d Extended Name: %-.256s\n", lvl, + info->ext_names[lvl]); +} + +static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info) +{ + if (uuid_is_null(&info->vm[i].uuid)) + return; + seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid); } static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) @@ -213,6 +247,8 @@ static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured); seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby); seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved); + print_ext_name(m, i, info); + print_uuid(m, i, info); } } @@ -238,25 +274,15 @@ static int sysinfo_show(struct seq_file *m, void *v) return 0; } -static int sysinfo_open(struct inode *inode, struct file *file) -{ - return single_open(file, sysinfo_show, NULL); -} - -static const struct file_operations sysinfo_fops = { - .open = sysinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init sysinfo_create_proc(void) { - proc_create("sysinfo", 0444, NULL, &sysinfo_fops); + proc_create_single("sysinfo", 0444, NULL, sysinfo_show); return 0; } device_initcall(sysinfo_create_proc); +#endif /* CONFIG_PROC_FS */ + /* * Service levels interface. */ @@ -330,24 +356,12 @@ static const struct seq_operations service_level_seq_ops = { .show = service_level_show }; -static int service_level_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &service_level_seq_ops); -} - -static const struct file_operations service_level_ops = { - .open = service_level_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - static void service_level_vm_print(struct seq_file *m, struct service_level *slr) { char *query_buffer, *str; - query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA); + query_buffer = kmalloc(1024, GFP_KERNEL); if (!query_buffer) return; cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL); @@ -364,8 +378,8 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { - proc_create("service_levels", 0, NULL, &service_level_ops); - if (MACHINE_IS_VM) + proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); + if (machine_is_vm()) register_service_level(&service_level_vm); return 0; } @@ -376,11 +390,9 @@ subsys_initcall(create_proc_service_level); */ void s390_adjust_jiffies(void) { + DECLARE_KERNEL_FPU_ONSTACK16(fpu); struct sysinfo_1_2_2 *info; - const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */ - FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); - FP_DECL_EX; - unsigned int capability; + unsigned long capability; info = (void *) get_zeroed_page(GFP_KERNEL); if (!info) @@ -396,15 +408,18 @@ void s390_adjust_jiffies(void) * higher cpu capacity. Bogomips are the other way round. * To get to a halfway suitable number we divide 1e7 * by the cpu capability number. Yes, that means a floating - * point division .. math-emu here we come :-) + * point division .. */ - FP_UNPACK_SP(SA, &fmil); - if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, (long) info->capability, 64, long); + kernel_fpu_begin(&fpu, KERNEL_FPR); + fpu_sfpc(0); + if (info->capability & 0xff800000) + fpu_ldgr(2, info->capability); else - FP_UNPACK_SP(SB, &info->capability); - FP_DIV_S(SR, SA, SB); - FP_TO_INT_S(capability, SR, 32, 0); + fpu_cefbr(2, info->capability); + fpu_cefbr(0, 10000000); + fpu_debr(0, 2); + capability = fpu_cgebr(0, 5); + kernel_fpu_end(&fpu, KERNEL_FPR); } else /* * Really old machine without stsi block for basic @@ -418,7 +433,7 @@ void s390_adjust_jiffies(void) /* * calibrate the delay loop */ -void __cpuinit calibrate_delay(void) +void calibrate_delay(void) { s390_adjust_jiffies(); /* Print the good old Bogomips line .. */ @@ -426,3 +441,96 @@ void __cpuinit calibrate_delay(void) "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); } + +#ifdef CONFIG_DEBUG_FS + +#define STSI_FILE(fc, s1, s2) \ +static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\ +{ \ + file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \ + if (!file->private_data) \ + return -ENOMEM; \ + if (stsi(file->private_data, fc, s1, s2)) { \ + free_page((unsigned long)file->private_data); \ + file->private_data = NULL; \ + return -EACCES; \ + } \ + return nonseekable_open(inode, file); \ +} \ + \ +static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \ + .open = stsi_open_##fc##_##s1##_##s2, \ + .release = stsi_release, \ + .read = stsi_read, \ +}; + +static int stsi_release(struct inode *inode, struct file *file) +{ + free_page((unsigned long)file->private_data); + return 0; +} + +static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) +{ + return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE); +} + +STSI_FILE( 1, 1, 1); +STSI_FILE( 1, 2, 1); +STSI_FILE( 1, 2, 2); +STSI_FILE( 2, 2, 1); +STSI_FILE( 2, 2, 2); +STSI_FILE( 3, 2, 2); +STSI_FILE(15, 1, 2); +STSI_FILE(15, 1, 3); +STSI_FILE(15, 1, 4); +STSI_FILE(15, 1, 5); +STSI_FILE(15, 1, 6); + +struct stsi_file { + const struct file_operations *fops; + char *name; +}; + +static struct stsi_file stsi_file[] __initdata = { + {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"}, + {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"}, + {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"}, + {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"}, + {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"}, + {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"}, + {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"}, + {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"}, + {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"}, + {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"}, + {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"}, +}; + +static u8 stsi_0_0_0; + +static __init int stsi_init_debugfs(void) +{ + struct dentry *stsi_root; + struct stsi_file *sf; + int lvl, i; + + stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir); + lvl = stsi(NULL, 0, 0, 0); + if (lvl > 0) + stsi_0_0_0 = lvl; + debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0); + for (i = 0; i < ARRAY_SIZE(stsi_file); i++) { + sf = &stsi_file[i]; + debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); + } + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { + char link_to[10]; + + snprintf(link_to, sizeof(link_to), "15_1_%d", topology_mnest_limit()); + debugfs_create_symlink("topology", stsi_root, link_to); + } + return 0; +} +device_initcall(stsi_init_debugfs); + +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S new file mode 100644 index 000000000000..26f2981aa09e --- /dev/null +++ b/arch/s390/kernel/text_amode31.S @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Code that needs to run below 2 GB. + * + * Copyright IBM Corp. 2019 + */ + +#include <linux/linkage.h> +#include <asm/asm-extable.h> +#include <asm/errno.h> +#include <asm/sigp.h> + + .section .amode31.text,"ax" +/* + * Simplified version of expoline thunk. The normal thunks can not be used here, + * because they might be more than 2 GB away, and not reachable by the relative + * branch. No comdat, exrl, etc. optimizations used here, because it only + * affects a few functions that are not performance-relevant. + */ + .macro BR_EX_AMODE31_r14 + exrl 0,0f + j . +0: br %r14 + .endm + +/* + * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode) + */ +SYM_FUNC_START(_diag14_amode31) + lgr %r1,%r2 + lgr %r2,%r3 + lgr %r3,%r4 + lhi %r5,-EIO + sam31 + diag %r1,%r2,0x14 +.Ldiag14_ex: + ipm %r5 + srl %r5,28 +.Ldiag14_fault: + sam64 + lgfr %r2,%r5 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault) +SYM_FUNC_END(_diag14_amode31) + +/* + * int _diag210_amode31(struct diag210 *addr) + */ +SYM_FUNC_START(_diag210_amode31) + lgr %r1,%r2 + lhi %r2,-1 + sam31 + diag %r1,%r0,0x210 +.Ldiag210_ex: + ipm %r2 + srl %r2,28 +.Ldiag210_fault: + sam64 + lgfr %r2,%r2 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault) +SYM_FUNC_END(_diag210_amode31) + +/* + * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len) +*/ +SYM_FUNC_START(_diag8c_amode31) + llgf %r3,0(%r3) + sam31 + diag %r2,%r4,0x8c +.Ldiag8c_ex: + sam64 + lgfr %r2,%r3 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex) +SYM_FUNC_END(_diag8c_amode31) +/* + * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode) + */ +SYM_FUNC_START(_diag26c_amode31) + lghi %r5,-EOPNOTSUPP + sam31 + diag %r2,%r4,0x26c +.Ldiag26c_ex: + sam64 + lgfr %r2,%r5 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex) +SYM_FUNC_END(_diag26c_amode31) + +/* + * void _diag0c_amode31(unsigned long rx) + */ +SYM_FUNC_START(_diag0c_amode31) + sam31 + diag %r2,%r2,0x0c + sam64 + BR_EX_AMODE31_r14 +SYM_FUNC_END(_diag0c_amode31) + +/* + * void _diag308_reset_amode31(void) + * + * Calls diag 308 subcode 1 and continues execution + */ +SYM_FUNC_START(_diag308_reset_amode31) + larl %r4,ctlregs # Save control registers + stctg %c0,%c15,0(%r4) + lg %r2,0(%r4) # Disable lowcore protection + nilh %r2,0xefff + larl %r4,ctlreg0 + stg %r2,0(%r4) + lctlg %c0,%c0,0(%r4) + larl %r4,fpctl # Floating point control register + stfpc 0(%r4) + larl %r4,prefix # Save prefix register + stpx 0(%r4) + larl %r4,prefix_zero # Set prefix register to 0 + spx 0(%r4) + larl %r4,continue_psw # Save PSW flags + epsw %r2,%r3 + stm %r2,%r3,0(%r4) + larl %r4,.Lrestart_part2 # Setup restart PSW at absolute 0 + larl %r3,restart_diag308_psw + og %r4,0(%r3) # Save PSW + lghi %r3,0 + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + lghi %r0,0 + diag %r0,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,ctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + larl %r4,fpctl # Restore floating point ctl register + lfpc 0(%r4) + larl %r4,prefix # Restore prefix register + spx 0(%r4) + larl %r4,continue_psw # Restore PSW flags + larl %r2,.Lcontinue + stg %r2,8(%r4) + lpswe 0(%r4) +.Lcontinue: + BR_EX_AMODE31_r14 +SYM_FUNC_END(_diag308_reset_amode31) + + .section .amode31.data,"aw",@progbits + .balign 8 +SYM_DATA_LOCAL(restart_diag308_psw, .long 0x00080000,0x80000000) +SYM_DATA_LOCAL(continue_psw, .quad 0,0) +SYM_DATA_LOCAL(ctlreg0, .quad 0) +SYM_DATA_LOCAL(ctlregs, .fill 16,8,0) +SYM_DATA_LOCAL(fpctl, .long 0) +SYM_DATA_LOCAL(prefix, .long 0) +SYM_DATA_LOCAL(prefix_zero, .long 0) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 876546b9cfa1..bd0df61d1907 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Time of day based timer functions. * @@ -11,13 +12,13 @@ * Copyright (C) 1991, 1992, 1995 Linus Torvalds */ -#define KMSG_COMPONENT "time" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "time: " fmt #include <linux/kernel_stat.h> #include <linux/errno.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/kernel.h> #include <linux/param.h> #include <linux/string.h> @@ -34,104 +35,107 @@ #include <linux/profile.h> #include <linux/timex.h> #include <linux/notifier.h> -#include <linux/timekeeper_internal.h> #include <linux/clockchips.h> #include <linux/gfp.h> #include <linux/kprobes.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <vdso/vsyscall.h> +#include <vdso/clocksource.h> +#include <vdso/helpers.h> +#include <asm/facility.h> #include <asm/delay.h> #include <asm/div64.h> #include <asm/vdso.h> #include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/vtimer.h> -#include <asm/etr.h> +#include <asm/stp.h> #include <asm/cio.h> #include "entry.h" -/* change this if you have some constant time drift */ -#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) -#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12) +union tod_clock __bootdata_preserved(tod_clock_base); +EXPORT_SYMBOL_GPL(tod_clock_base); -u64 sched_clock_base_cc = -1; /* Force to data section. */ -EXPORT_SYMBOL_GPL(sched_clock_base_cc); +u64 __bootdata_preserved(clock_comparator_max); +EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); +ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier); +EXPORT_SYMBOL(s390_epoch_delta_notifier); + +unsigned char ptff_function_mask[16]; + +static unsigned long lpar_offset; +static unsigned long initial_leap_seconds; + /* - * Scheduler clock - returns current time in nanosec units. + * Get time offsets with PTFF */ -unsigned long long notrace __kprobes sched_clock(void) +void __init time_early_init(void) { - return tod_to_ns(get_tod_clock_monotonic()); + struct ptff_qto qto; + struct ptff_qui qui; + + vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod; + + if (!test_facility(28)) + return; + + ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF); + + /* get LPAR offset */ + if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) + lpar_offset = qto.tod_epoch_difference; + + /* get initial leap seconds */ + if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0) + initial_leap_seconds = (unsigned long) + ((long) qui.old_leap * 4096000000L); +} + +unsigned long long noinstr sched_clock_noinstr(void) +{ + return tod_to_ns(__get_tod_clock_monotonic()); } /* - * Monotonic_clock - returns # of nanoseconds passed since time_init() + * Scheduler clock - returns current time in nanosec units. */ -unsigned long long monotonic_clock(void) +unsigned long long notrace sched_clock(void) { - return sched_clock(); + return tod_to_ns(get_tod_clock_monotonic()); } -EXPORT_SYMBOL(monotonic_clock); +NOKPROBE_SYMBOL(sched_clock); -void tod_to_timeval(__u64 todval, struct timespec *xt) +static void ext_to_timespec64(union tod_clock *clk, struct timespec64 *xt) { - unsigned long long sec; + unsigned long rem, sec, nsec; - sec = todval >> 12; - do_div(sec, 1000000); + sec = clk->us; + rem = do_div(sec, 1000000); + nsec = ((clk->sus + (rem << 12)) * 125) >> 9; xt->tv_sec = sec; - todval -= (sec * 1000000) << 12; - xt->tv_nsec = ((todval * 1000) >> 12); + xt->tv_nsec = nsec; } -EXPORT_SYMBOL(tod_to_timeval); void clock_comparator_work(void) { struct clock_event_device *cd; - S390_lowcore.clock_comparator = -1ULL; - set_clock_comparator(S390_lowcore.clock_comparator); - cd = &__get_cpu_var(comparators); + get_lowcore()->clock_comparator = clock_comparator_max; + cd = this_cpu_ptr(&comparators); cd->event_handler(cd); } -/* - * Fixup the clock comparator. - */ -static void fixup_clock_comparator(unsigned long long delta) -{ - /* If nobody is waiting there's nothing to fix. */ - if (S390_lowcore.clock_comparator == -1ULL) - return; - S390_lowcore.clock_comparator += delta; - set_clock_comparator(S390_lowcore.clock_comparator); -} - -static int s390_next_ktime(ktime_t expires, +static int s390_next_event(unsigned long delta, struct clock_event_device *evt) { - struct timespec ts; - u64 nsecs; - - ts.tv_sec = ts.tv_nsec = 0; - monotonic_to_bootbased(&ts); - nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires)); - do_div(nsecs, 125); - S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9); - /* Program the maximum value if we have an overflow (== year 2042) */ - if (unlikely(S390_lowcore.clock_comparator < sched_clock_base_cc)) - S390_lowcore.clock_comparator = -1ULL; - set_clock_comparator(S390_lowcore.clock_comparator); + get_lowcore()->clock_comparator = get_tod_clock() + delta; + set_clock_comparator(get_lowcore()->clock_comparator); return 0; } -static void s390_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ -} - /* * Set up lowcore and control register of the current cpu to * enable TOD clock and clock comparator interrupts. @@ -141,30 +145,30 @@ void init_cpu_timer(void) struct clock_event_device *cd; int cpu; - S390_lowcore.clock_comparator = -1ULL; - set_clock_comparator(S390_lowcore.clock_comparator); + get_lowcore()->clock_comparator = clock_comparator_max; + set_clock_comparator(get_lowcore()->clock_comparator); cpu = smp_processor_id(); cd = &per_cpu(comparators, cpu); cd->name = "comparator"; - cd->features = CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_KTIME; + cd->features = CLOCK_EVT_FEAT_ONESHOT; cd->mult = 16777; cd->shift = 12; cd->min_delta_ns = 1; + cd->min_delta_ticks = 1; cd->max_delta_ns = LONG_MAX; + cd->max_delta_ticks = ULONG_MAX; cd->rating = 400; cd->cpumask = cpumask_of(cpu); - cd->set_next_ktime = s390_next_ktime; - cd->set_mode = s390_set_mode; + cd->set_next_event = s390_next_event; clockevents_register_device(cd); /* Enable clock comparator timer interrupt. */ - __ctl_set_bit(0,11); + local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SUBMASK_BIT); /* Always allow the timing alert external interrupt. */ - __ctl_set_bit(0, 4); + local_ctl_set_bit(0, CR0_ETR_SUBMASK_BIT); } static void clock_comparator_interrupt(struct ext_code ext_code, @@ -172,49 +176,64 @@ static void clock_comparator_interrupt(struct ext_code ext_code, unsigned long param64) { inc_irq_stat(IRQEXT_CLK); - if (S390_lowcore.clock_comparator == -1ULL) - set_clock_comparator(S390_lowcore.clock_comparator); + if (get_lowcore()->clock_comparator == clock_comparator_max) + set_clock_comparator(get_lowcore()->clock_comparator); } -static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); static void timing_alert_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { inc_irq_stat(IRQEXT_TLA); - if (param32 & 0x00c40000) - etr_timing_alert((struct etr_irq_parm *) ¶m32); if (param32 & 0x00038000) stp_timing_alert((struct stp_irq_parm *) ¶m32); } -static void etr_reset(void); static void stp_reset(void); -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { - tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); + union tod_clock clk; + u64 delta; + + delta = initial_leap_seconds + TOD_UNIX_EPOCH; + store_tod_clock_ext(&clk); + clk.eitod -= delta; + ext_to_timespec64(&clk, ts); } -void read_boot_clock(struct timespec *ts) +void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, + struct timespec64 *boot_offset) { - tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); + struct timespec64 boot_time; + union tod_clock clk; + u64 delta; + + delta = initial_leap_seconds + TOD_UNIX_EPOCH; + clk = tod_clock_base; + clk.eitod -= delta; + ext_to_timespec64(&clk, &boot_time); + + read_persistent_clock64(wall_time); + *boot_offset = timespec64_sub(*wall_time, boot_time); } -static cycle_t read_tod_clock(struct clocksource *cs) +static u64 read_tod_clock(struct clocksource *cs) { - return get_tod_clock(); + return get_tod_clock_monotonic(); } static struct clocksource clocksource_tod = { .name = "tod", .rating = 400, .read = read_tod_clock, - .mask = -1ULL, - .mult = 1000, - .shift = 12, + .mask = CLOCKSOURCE_MASK(64), + .mult = 4096000, + .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .vdso_clock_mode = VDSO_CLOCKMODE_TOD, + .id = CSID_S390_TOD, }; struct clocksource * __init clocksource_default_clock(void) @@ -222,38 +241,6 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) -{ - if (clock != &clocksource_tod) - return; - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_wmb(); - vdso_data->xtime_tod_stamp = clock->cycle_last; - vdso_data->xtime_clock_sec = wall_time->tv_sec; - vdso_data->xtime_clock_nsec = wall_time->tv_nsec; - vdso_data->wtom_clock_sec = wtm->tv_sec; - vdso_data->wtom_clock_nsec = wtm->tv_nsec; - vdso_data->ntp_mult = mult; - smp_wmb(); - ++vdso_data->tb_update_count; -} - -extern struct timezone sys_tz; - -void update_vsyscall_tz(void) -{ - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_wmb(); - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; - smp_wmb(); - ++vdso_data->tb_update_count; -} - /* * Initialize the TOD clock and the CPU timer of * the boot cpu. @@ -261,18 +248,17 @@ void update_vsyscall_tz(void) void __init time_init(void) { /* Reset time synchronization interfaces. */ - etr_reset(); stp_reset(); /* request the clock comparator external interrupt */ - if (register_external_interrupt(0x1004, clock_comparator_interrupt)) - panic("Couldn't request external interrupt 0x1004"); + if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt)) + panic("Couldn't request external interrupt 0x1004"); /* request the timing alert external interrupt */ - if (register_external_interrupt(0x1406, timing_alert_interrupt)) + if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) panic("Couldn't request external interrupt 0x1406"); - if (clocksource_register(&clocksource_tod) != 0) + if (__clocksource_register(&clocksource_tod) != 0) panic("Could not register TOD clock source"); /* Enable TOD clock interrupts on the boot cpu. */ @@ -282,106 +268,66 @@ void __init time_init(void) vtime_init(); } -/* - * The time is "clock". old is what we think the time is. - * Adjust the value by a multiple of jiffies and add the delta to ntp. - * "delay" is an approximation how long the synchronization took. If - * the time correction is positive, then "delay" is subtracted from - * the time difference and only the remaining part is passed to ntp. - */ -static unsigned long long adjust_time(unsigned long long old, - unsigned long long clock, - unsigned long long delay) -{ - unsigned long long delta, ticks; - struct timex adjust; - - if (clock > old) { - /* It is later than we thought. */ - delta = ticks = clock - old; - delta = ticks = (delta < delay) ? 0 : delta - delay; - delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - adjust.offset = ticks * (1000000 / HZ); - } else { - /* It is earlier than we thought. */ - delta = ticks = old - clock; - delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - delta = -delta; - adjust.offset = -ticks * (1000000 / HZ); - } - sched_clock_base_cc += delta; - if (adjust.offset != 0) { - pr_notice("The ETR interface has adjusted the clock " - "by %li microseconds\n", adjust.offset); - adjust.modes = ADJ_OFFSET_SINGLESHOT; - do_adjtimex(&adjust); - } - return delta; -} - static DEFINE_PER_CPU(atomic_t, clock_sync_word); -static DEFINE_MUTEX(clock_sync_mutex); +static DEFINE_MUTEX(stp_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_ETR 0 -#define CLOCK_SYNC_HAS_STP 1 -#define CLOCK_SYNC_ETR 2 -#define CLOCK_SYNC_STP 3 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_STPINFO_VALID 2 /* - * The synchronous get_clock function. It will write the current clock - * value to the clock pointer and return 0 if the clock is in sync with - * the external time source. If the clock mode is local it will return - * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external - * reference. + * The get_clock function for the physical clock. It will get the current + * TOD clock, subtract the LPAR offset and write the result to *clock. + * The function returns 0 if the clock is in sync with the external time + * source. If the clock mode is local it will return -EOPNOTSUPP and + * -EAGAIN if the clock is not in sync with the external reference. */ -int get_sync_clock(unsigned long long *clock) +int get_phys_clock(unsigned long *clock) { atomic_t *sw_ptr; unsigned int sw0, sw1; sw_ptr = &get_cpu_var(clock_sync_word); sw0 = atomic_read(sw_ptr); - *clock = get_tod_clock(); + *clock = get_tod_clock() - lpar_offset; sw1 = atomic_read(sw_ptr); put_cpu_var(clock_sync_word); if (sw0 == sw1 && (sw0 & 0x80000000U)) /* Success: time is in sync. */ return 0; - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) && - !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return -EOPNOTSUPP; - if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) && - !test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) + if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return -EACCES; return -EAGAIN; } -EXPORT_SYMBOL(get_sync_clock); +EXPORT_SYMBOL(get_phys_clock); /* - * Make get_sync_clock return -EAGAIN. + * Make get_phys_clock() return -EAGAIN. */ static void disable_sync_clock(void *dummy) { - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); /* - * Clear the in-sync bit 2^31. All get_sync_clock calls will + * Clear the in-sync bit 2^31. All get_phys_clock calls will * fail until the sync bit is turned back on. In addition * increase the "sequence" counter to avoid the race of an - * etr event and the complete recovery against get_sync_clock. + * stp event and the complete recovery against get_phys_clock. */ - atomic_clear_mask(0x80000000, sw_ptr); + atomic_andnot(0x80000000, sw_ptr); atomic_inc(sw_ptr); } /* - * Make get_sync_clock return 0 again. + * Make get_phys_clock() return 0 again. * Needs to be called from a context disabled for preemption. */ static void enable_sync_clock(void) { - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); - atomic_set_mask(0x80000000, sw_ptr); + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); + atomic_or(0x80000000, sw_ptr); } /* @@ -398,1039 +344,69 @@ static inline int check_sync_clock(void) return rc; } -/* Single threaded workqueue used for etr and stp sync events */ -static struct workqueue_struct *time_sync_wq; - -static void __init time_init_wq(void) -{ - if (time_sync_wq) - return; - time_sync_wq = create_singlethread_workqueue("timesync"); -} - -/* - * External Time Reference (ETR) code. - */ -static int etr_port0_online; -static int etr_port1_online; -static int etr_steai_available; - -static int __init early_parse_etr(char *p) -{ - if (strncmp(p, "off", 3) == 0) - etr_port0_online = etr_port1_online = 0; - else if (strncmp(p, "port0", 5) == 0) - etr_port0_online = 1; - else if (strncmp(p, "port1", 5) == 0) - etr_port1_online = 1; - else if (strncmp(p, "on", 2) == 0) - etr_port0_online = etr_port1_online = 1; - return 0; -} -early_param("etr", early_parse_etr); - -enum etr_event { - ETR_EVENT_PORT0_CHANGE, - ETR_EVENT_PORT1_CHANGE, - ETR_EVENT_PORT_ALERT, - ETR_EVENT_SYNC_CHECK, - ETR_EVENT_SWITCH_LOCAL, - ETR_EVENT_UPDATE, -}; - /* - * Valid bit combinations of the eacr register are (x = don't care): - * e0 e1 dp p0 p1 ea es sl - * 0 0 x 0 0 0 0 0 initial, disabled state - * 0 0 x 0 1 1 0 0 port 1 online - * 0 0 x 1 0 1 0 0 port 0 online - * 0 0 x 1 1 1 0 0 both ports online - * 0 1 x 0 1 1 0 0 port 1 online and usable, ETR or PPS mode - * 0 1 x 0 1 1 0 1 port 1 online, usable and ETR mode - * 0 1 x 0 1 1 1 0 port 1 online, usable, PPS mode, in-sync - * 0 1 x 0 1 1 1 1 port 1 online, usable, ETR mode, in-sync - * 0 1 x 1 1 1 0 0 both ports online, port 1 usable - * 0 1 x 1 1 1 1 0 both ports online, port 1 usable, PPS mode, in-sync - * 0 1 x 1 1 1 1 1 both ports online, port 1 usable, ETR mode, in-sync - * 1 0 x 1 0 1 0 0 port 0 online and usable, ETR or PPS mode - * 1 0 x 1 0 1 0 1 port 0 online, usable and ETR mode - * 1 0 x 1 0 1 1 0 port 0 online, usable, PPS mode, in-sync - * 1 0 x 1 0 1 1 1 port 0 online, usable, ETR mode, in-sync - * 1 0 x 1 1 1 0 0 both ports online, port 0 usable - * 1 0 x 1 1 1 1 0 both ports online, port 0 usable, PPS mode, in-sync - * 1 0 x 1 1 1 1 1 both ports online, port 0 usable, ETR mode, in-sync - * 1 1 x 1 1 1 1 0 both ports online & usable, ETR, in-sync - * 1 1 x 1 1 1 1 1 both ports online & usable, ETR, in-sync + * Apply clock delta to the global data structures. + * This is called once on the CPU that performed the clock sync. */ -static struct etr_eacr etr_eacr; -static u64 etr_tolec; /* time of last eacr update */ -static struct etr_aib etr_port0; -static int etr_port0_uptodate; -static struct etr_aib etr_port1; -static int etr_port1_uptodate; -static unsigned long etr_events; -static struct timer_list etr_timer; - -static void etr_timeout(unsigned long dummy); -static void etr_work_fn(struct work_struct *work); -static DEFINE_MUTEX(etr_work_mutex); -static DECLARE_WORK(etr_work, etr_work_fn); - -/* - * Reset ETR attachment. - */ -static void etr_reset(void) +static void clock_sync_global(long delta) { - etr_eacr = (struct etr_eacr) { - .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0, - .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0, - .es = 0, .sl = 0 }; - if (etr_setr(&etr_eacr) == 0) { - etr_tolec = get_tod_clock(); - set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - } else if (etr_port0_online || etr_port1_online) { - pr_warning("The real or virtual hardware system does " - "not provide an ETR interface\n"); - etr_port0_online = etr_port1_online = 0; - } -} - -static int __init etr_init(void) -{ - struct etr_aib aib; + struct ptff_qto qto; - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) - return 0; - time_init_wq(); - /* Check if this machine has the steai instruction. */ - if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) - etr_steai_available = 1; - setup_timer(&etr_timer, etr_timeout, 0UL); - if (etr_port0_online) { - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } - if (etr_port1_online) { - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } - return 0; + /* Fixup the monotonic sched clock. */ + tod_clock_base.eitod += delta; + vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod; + /* Update LPAR offset. */ + if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) + lpar_offset = qto.tod_epoch_difference; + /* Call the TOD clock change notifier. */ + atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &delta); } -arch_initcall(etr_init); - -/* - * Two sorts of ETR machine checks. The architecture reads: - * "When a machine-check niterruption occurs and if a switch-to-local or - * ETR-sync-check interrupt request is pending but disabled, this pending - * disabled interruption request is indicated and is cleared". - * Which means that we can get etr_switch_to_local events from the machine - * check handler although the interruption condition is disabled. Lovely.. - */ - /* - * Switch to local machine check. This is called when the last usable - * ETR port goes inactive. After switch to local the clock is not in sync. + * Apply clock delta to the per-CPU data structures of this CPU. + * This is called for each online CPU after the call to clock_sync_global. */ -void etr_switch_to_local(void) +static void clock_sync_local(long delta) { - if (!etr_eacr.sl) - return; - disable_sync_clock(NULL); - if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { - etr_eacr.es = etr_eacr.sl = 0; - etr_setr(&etr_eacr); - queue_work(time_sync_wq, &etr_work); + /* Add the delta to the clock comparator. */ + if (get_lowcore()->clock_comparator != clock_comparator_max) { + get_lowcore()->clock_comparator += delta; + set_clock_comparator(get_lowcore()->clock_comparator); } + /* Adjust the last_update_clock time-stamp. */ + get_lowcore()->last_update_clock += delta; } -/* - * ETR sync check machine check. This is called when the ETR OTE and the - * local clock OTE are farther apart than the ETR sync check tolerance. - * After a ETR sync check the clock is not in sync. The machine check - * is broadcasted to all cpus at the same time. - */ -void etr_sync_check(void) -{ - if (!etr_eacr.es) - return; - disable_sync_clock(NULL); - if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { - etr_eacr.es = 0; - etr_setr(&etr_eacr); - queue_work(time_sync_wq, &etr_work); - } -} - -/* - * ETR timing alert. There are two causes: - * 1) port state change, check the usability of the port - * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the - * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3) - * or ETR-data word 4 (edf4) has changed. - */ -static void etr_timing_alert(struct etr_irq_parm *intparm) -{ - if (intparm->pc0) - /* ETR port 0 state change. */ - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - if (intparm->pc1) - /* ETR port 1 state change. */ - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - if (intparm->eai) - /* - * ETR port alert on either port 0, 1 or both. - * Both ports are not up-to-date now. - */ - set_bit(ETR_EVENT_PORT_ALERT, &etr_events); - queue_work(time_sync_wq, &etr_work); -} - -static void etr_timeout(unsigned long dummy) -{ - set_bit(ETR_EVENT_UPDATE, &etr_events); - queue_work(time_sync_wq, &etr_work); -} - -/* - * Check if the etr mode is pss. - */ -static inline int etr_mode_is_pps(struct etr_eacr eacr) -{ - return eacr.es && !eacr.sl; -} - -/* - * Check if the etr mode is etr. - */ -static inline int etr_mode_is_etr(struct etr_eacr eacr) -{ - return eacr.es && eacr.sl; -} - -/* - * Check if the port can be used for TOD synchronization. - * For PPS mode the port has to receive OTEs. For ETR mode - * the port has to receive OTEs, the ETR stepping bit has to - * be zero and the validity bits for data frame 1, 2, and 3 - * have to be 1. - */ -static int etr_port_valid(struct etr_aib *aib, int port) -{ - unsigned int psc; - - /* Check that this port is receiving OTEs. */ - if (aib->tsp == 0) - return 0; - - psc = port ? aib->esw.psc1 : aib->esw.psc0; - if (psc == etr_lpsc_pps_mode) - return 1; - if (psc == etr_lpsc_operational_step) - return !aib->esw.y && aib->slsw.v1 && - aib->slsw.v2 && aib->slsw.v3; - return 0; -} - -/* - * Check if two ports are on the same network. - */ -static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2) -{ - // FIXME: any other fields we have to compare? - return aib1->edf1.net_id == aib2->edf1.net_id; -} - -/* - * Wrapper for etr_stei that converts physical port states - * to logical port states to be consistent with the output - * of stetr (see etr_psc vs. etr_lpsc). - */ -static void etr_steai_cv(struct etr_aib *aib, unsigned int func) -{ - BUG_ON(etr_steai(aib, func) != 0); - /* Convert port state to logical port state. */ - if (aib->esw.psc0 == 1) - aib->esw.psc0 = 2; - else if (aib->esw.psc0 == 0 && aib->esw.p == 0) - aib->esw.psc0 = 1; - if (aib->esw.psc1 == 1) - aib->esw.psc1 = 2; - else if (aib->esw.psc1 == 0 && aib->esw.p == 1) - aib->esw.psc1 = 1; -} +/* Single threaded workqueue used for stp sync events */ +static struct workqueue_struct *time_sync_wq; -/* - * Check if the aib a2 is still connected to the same attachment as - * aib a1, the etv values differ by one and a2 is valid. - */ -static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) +static void __init time_init_wq(void) { - int state_a1, state_a2; - - /* Paranoia check: e0/e1 should better be the same. */ - if (a1->esw.eacr.e0 != a2->esw.eacr.e0 || - a1->esw.eacr.e1 != a2->esw.eacr.e1) - return 0; - - /* Still connected to the same etr ? */ - state_a1 = p ? a1->esw.psc1 : a1->esw.psc0; - state_a2 = p ? a2->esw.psc1 : a2->esw.psc0; - if (state_a1 == etr_lpsc_operational_step) { - if (state_a2 != etr_lpsc_operational_step || - a1->edf1.net_id != a2->edf1.net_id || - a1->edf1.etr_id != a2->edf1.etr_id || - a1->edf1.etr_pn != a2->edf1.etr_pn) - return 0; - } else if (state_a2 != etr_lpsc_pps_mode) - return 0; - - /* The ETV value of a2 needs to be ETV of a1 + 1. */ - if (a1->edf2.etv + 1 != a2->edf2.etv) - return 0; - - if (!etr_port_valid(a2, p)) - return 0; - - return 1; + if (time_sync_wq) + return; + time_sync_wq = create_singlethread_workqueue("timesync"); } struct clock_sync_data { atomic_t cpus; int in_sync; - unsigned long long fixup_cc; - int etr_port; - struct etr_aib *etr_aib; + long clock_delta; }; -static void clock_sync_cpu(struct clock_sync_data *sync) -{ - atomic_dec(&sync->cpus); - enable_sync_clock(); - /* - * This looks like a busy wait loop but it isn't. etr_sync_cpus - * is called on all other cpus while the TOD clocks is stopped. - * __udelay will stop the cpu on an enabled wait psw until the - * TOD is running again. - */ - while (sync->in_sync == 0) { - __udelay(1); - /* - * A different cpu changes *in_sync. Therefore use - * barrier() to force memory access. - */ - barrier(); - } - if (sync->in_sync != 1) - /* Didn't work. Clear per-cpu in sync bit again. */ - disable_sync_clock(NULL); - /* - * This round of TOD syncing is done. Set the clock comparator - * to the next tick and let the processor continue. - */ - fixup_clock_comparator(sync->fixup_cc); -} - -/* - * Sync the TOD clock using the port referred to by aibp. This port - * has to be enabled and the other port has to be disabled. The - * last eacr update has to be more than 1.6 seconds in the past. - */ -static int etr_sync_clock(void *data) -{ - static int first; - unsigned long long clock, old_clock, delay, delta; - struct clock_sync_data *etr_sync; - struct etr_aib *sync_port, *aib; - int port; - int rc; - - etr_sync = data; - - if (xchg(&first, 1) == 1) { - /* Slave */ - clock_sync_cpu(etr_sync); - return 0; - } - - /* Wait until all other cpus entered the sync function. */ - while (atomic_read(&etr_sync->cpus) != 0) - cpu_relax(); - - port = etr_sync->etr_port; - aib = etr_sync->etr_aib; - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - enable_sync_clock(); - - /* Set clock to next OTE. */ - __ctl_set_bit(14, 21); - __ctl_set_bit(0, 29); - clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; - old_clock = get_tod_clock(); - if (set_tod_clock(clock) == 0) { - __udelay(1); /* Wait for the clock to start. */ - __ctl_clear_bit(0, 29); - __ctl_clear_bit(14, 21); - etr_stetr(aib); - /* Adjust Linux timing variables. */ - delay = (unsigned long long) - (aib->edf2.etv - sync_port->edf2.etv) << 32; - delta = adjust_time(old_clock, clock, delay); - etr_sync->fixup_cc = delta; - fixup_clock_comparator(delta); - /* Verify that the clock is properly set. */ - if (!etr_aib_follows(sync_port, aib, port)) { - /* Didn't work. */ - disable_sync_clock(NULL); - etr_sync->in_sync = -EAGAIN; - rc = -EAGAIN; - } else { - etr_sync->in_sync = 1; - rc = 0; - } - } else { - /* Could not set the clock ?!? */ - __ctl_clear_bit(0, 29); - __ctl_clear_bit(14, 21); - disable_sync_clock(NULL); - etr_sync->in_sync = -EAGAIN; - rc = -EAGAIN; - } - xchg(&first, 0); - return rc; -} - -static int etr_sync_clock_stop(struct etr_aib *aib, int port) -{ - struct clock_sync_data etr_sync; - struct etr_aib *sync_port; - int follows; - int rc; - - /* Check if the current aib is adjacent to the sync port aib. */ - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - follows = etr_aib_follows(sync_port, aib, port); - memcpy(sync_port, aib, sizeof(*aib)); - if (!follows) - return -EAGAIN; - memset(&etr_sync, 0, sizeof(etr_sync)); - etr_sync.etr_aib = aib; - etr_sync.etr_port = port; - get_online_cpus(); - atomic_set(&etr_sync.cpus, num_online_cpus() - 1); - rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); - put_online_cpus(); - return rc; -} - -/* - * Handle the immediate effects of the different events. - * The port change event is used for online/offline changes. - */ -static struct etr_eacr etr_handle_events(struct etr_eacr eacr) -{ - if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) - eacr.es = 0; - if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) - eacr.es = eacr.sl = 0; - if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events)) - etr_port0_uptodate = etr_port1_uptodate = 0; - - if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) { - if (eacr.e0) - /* - * Port change of an enabled port. We have to - * assume that this can have caused an stepping - * port switch. - */ - etr_tolec = get_tod_clock(); - eacr.p0 = etr_port0_online; - if (!eacr.p0) - eacr.e0 = 0; - etr_port0_uptodate = 0; - } - if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) { - if (eacr.e1) - /* - * Port change of an enabled port. We have to - * assume that this can have caused an stepping - * port switch. - */ - etr_tolec = get_tod_clock(); - eacr.p1 = etr_port1_online; - if (!eacr.p1) - eacr.e1 = 0; - etr_port1_uptodate = 0; - } - clear_bit(ETR_EVENT_UPDATE, &etr_events); - return eacr; -} - -/* - * Set up a timer that expires after the etr_tolec + 1.6 seconds if - * one of the ports needs an update. - */ -static void etr_set_tolec_timeout(unsigned long long now) -{ - unsigned long micros; - - if ((!etr_eacr.p0 || etr_port0_uptodate) && - (!etr_eacr.p1 || etr_port1_uptodate)) - return; - micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0; - micros = (micros > 1600000) ? 0 : 1600000 - micros; - mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1); -} - -/* - * Set up a time that expires after 1/2 second. - */ -static void etr_set_sync_timeout(void) -{ - mod_timer(&etr_timer, jiffies + HZ/2); -} - -/* - * Update the aib information for one or both ports. - */ -static struct etr_eacr etr_handle_update(struct etr_aib *aib, - struct etr_eacr eacr) -{ - /* With both ports disabled the aib information is useless. */ - if (!eacr.e0 && !eacr.e1) - return eacr; - - /* Update port0 or port1 with aib stored in etr_work_fn. */ - if (aib->esw.q == 0) { - /* Information for port 0 stored. */ - if (eacr.p0 && !etr_port0_uptodate) { - etr_port0 = *aib; - if (etr_port0_online) - etr_port0_uptodate = 1; - } - } else { - /* Information for port 1 stored. */ - if (eacr.p1 && !etr_port1_uptodate) { - etr_port1 = *aib; - if (etr_port0_online) - etr_port1_uptodate = 1; - } - } - - /* - * Do not try to get the alternate port aib if the clock - * is not in sync yet. - */ - if (!eacr.es || !check_sync_clock()) - return eacr; - - /* - * If steai is available we can get the information about - * the other port immediately. If only stetr is available the - * data-port bit toggle has to be used. - */ - if (etr_steai_available) { - if (eacr.p0 && !etr_port0_uptodate) { - etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0); - etr_port0_uptodate = 1; - } - if (eacr.p1 && !etr_port1_uptodate) { - etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1); - etr_port1_uptodate = 1; - } - } else { - /* - * One port was updated above, if the other - * port is not uptodate toggle dp bit. - */ - if ((eacr.p0 && !etr_port0_uptodate) || - (eacr.p1 && !etr_port1_uptodate)) - eacr.dp ^= 1; - else - eacr.dp = 0; - } - return eacr; -} - -/* - * Write new etr control register if it differs from the current one. - * Return 1 if etr_tolec has been updated as well. - */ -static void etr_update_eacr(struct etr_eacr eacr) -{ - int dp_changed; - - if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0) - /* No change, return. */ - return; - /* - * The disable of an active port of the change of the data port - * bit can/will cause a change in the data port. - */ - dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 || - (etr_eacr.dp ^ eacr.dp) != 0; - etr_eacr = eacr; - etr_setr(&etr_eacr); - if (dp_changed) - etr_tolec = get_tod_clock(); -} - -/* - * ETR work. In this function you'll find the main logic. In - * particular this is the only function that calls etr_update_eacr(), - * it "controls" the etr control register. - */ -static void etr_work_fn(struct work_struct *work) -{ - unsigned long long now; - struct etr_eacr eacr; - struct etr_aib aib; - int sync_port; - - /* prevent multiple execution. */ - mutex_lock(&etr_work_mutex); - - /* Create working copy of etr_eacr. */ - eacr = etr_eacr; - - /* Check for the different events and their immediate effects. */ - eacr = etr_handle_events(eacr); - - /* Check if ETR is supposed to be active. */ - eacr.ea = eacr.p0 || eacr.p1; - if (!eacr.ea) { - /* Both ports offline. Reset everything. */ - eacr.dp = eacr.es = eacr.sl = 0; - on_each_cpu(disable_sync_clock, NULL, 1); - del_timer_sync(&etr_timer); - etr_update_eacr(eacr); - goto out_unlock; - } - - /* Store aib to get the current ETR status word. */ - BUG_ON(etr_stetr(&aib) != 0); - etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */ - now = get_tod_clock(); - - /* - * Update the port information if the last stepping port change - * or data port change is older than 1.6 seconds. - */ - if (now >= etr_tolec + (1600000 << 12)) - eacr = etr_handle_update(&aib, eacr); - - /* - * Select ports to enable. The preferred synchronization mode is PPS. - * If a port can be enabled depends on a number of things: - * 1) The port needs to be online and uptodate. A port is not - * disabled just because it is not uptodate, but it is only - * enabled if it is uptodate. - * 2) The port needs to have the same mode (pps / etr). - * 3) The port needs to be usable -> etr_port_valid() == 1 - * 4) To enable the second port the clock needs to be in sync. - * 5) If both ports are useable and are ETR ports, the network id - * has to be the same. - * The eacr.sl bit is used to indicate etr mode vs. pps mode. - */ - if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) { - eacr.sl = 0; - eacr.e0 = 1; - if (!etr_mode_is_pps(etr_eacr)) - eacr.es = 0; - if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode) - eacr.e1 = 0; - // FIXME: uptodate checks ? - else if (etr_port0_uptodate && etr_port1_uptodate) - eacr.e1 = 1; - sync_port = (etr_port0_uptodate && - etr_port_valid(&etr_port0, 0)) ? 0 : -1; - } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) { - eacr.sl = 0; - eacr.e0 = 0; - eacr.e1 = 1; - if (!etr_mode_is_pps(etr_eacr)) - eacr.es = 0; - sync_port = (etr_port1_uptodate && - etr_port_valid(&etr_port1, 1)) ? 1 : -1; - } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) { - eacr.sl = 1; - eacr.e0 = 1; - if (!etr_mode_is_etr(etr_eacr)) - eacr.es = 0; - if (!eacr.es || !eacr.p1 || - aib.esw.psc1 != etr_lpsc_operational_alt) - eacr.e1 = 0; - else if (etr_port0_uptodate && etr_port1_uptodate && - etr_compare_network(&etr_port0, &etr_port1)) - eacr.e1 = 1; - sync_port = (etr_port0_uptodate && - etr_port_valid(&etr_port0, 0)) ? 0 : -1; - } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) { - eacr.sl = 1; - eacr.e0 = 0; - eacr.e1 = 1; - if (!etr_mode_is_etr(etr_eacr)) - eacr.es = 0; - sync_port = (etr_port1_uptodate && - etr_port_valid(&etr_port1, 1)) ? 1 : -1; - } else { - /* Both ports not usable. */ - eacr.es = eacr.sl = 0; - sync_port = -1; - } - - /* - * If the clock is in sync just update the eacr and return. - * If there is no valid sync port wait for a port update. - */ - if ((eacr.es && check_sync_clock()) || sync_port < 0) { - etr_update_eacr(eacr); - etr_set_tolec_timeout(now); - goto out_unlock; - } - - /* - * Prepare control register for clock syncing - * (reset data port bit, set sync check control. - */ - eacr.dp = 0; - eacr.es = 1; - - /* - * Update eacr and try to synchronize the clock. If the update - * of eacr caused a stepping port switch (or if we have to - * assume that a stepping port switch has occurred) or the - * clock syncing failed, reset the sync check control bit - * and set up a timer to try again after 0.5 seconds - */ - etr_update_eacr(eacr); - if (now < etr_tolec + (1600000 << 12) || - etr_sync_clock_stop(&aib, sync_port) != 0) { - /* Sync failed. Try again in 1/2 second. */ - eacr.es = 0; - etr_update_eacr(eacr); - etr_set_sync_timeout(); - } else - etr_set_tolec_timeout(now); -out_unlock: - mutex_unlock(&etr_work_mutex); -} - -/* - * Sysfs interface functions - */ -static struct bus_type etr_subsys = { - .name = "etr", - .dev_name = "etr", -}; - -static struct device etr_port0_dev = { - .id = 0, - .bus = &etr_subsys, -}; - -static struct device etr_port1_dev = { - .id = 1, - .bus = &etr_subsys, -}; - -/* - * ETR subsys attributes - */ -static ssize_t etr_stepping_port_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%i\n", etr_port0.esw.p); -} - -static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); - -static ssize_t etr_stepping_mode_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - char *mode_str; - - if (etr_mode_is_pps(etr_eacr)) - mode_str = "pps"; - else if (etr_mode_is_etr(etr_eacr)) - mode_str = "etr"; - else - mode_str = "local"; - return sprintf(buf, "%s\n", mode_str); -} - -static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); - -/* - * ETR port attributes - */ -static inline struct etr_aib *etr_aib_from_dev(struct device *dev) -{ - if (dev == &etr_port0_dev) - return etr_port0_online ? &etr_port0 : NULL; - else - return etr_port1_online ? &etr_port1 : NULL; -} - -static ssize_t etr_online_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - unsigned int online; - - online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online; - return sprintf(buf, "%i\n", online); -} - -static ssize_t etr_online_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - unsigned int value; - - value = simple_strtoul(buf, NULL, 0); - if (value != 0 && value != 1) - return -EINVAL; - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) - return -EOPNOTSUPP; - mutex_lock(&clock_sync_mutex); - if (dev == &etr_port0_dev) { - if (etr_port0_online == value) - goto out; /* Nothing to do. */ - etr_port0_online = value; - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - else - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } else { - if (etr_port1_online == value) - goto out; /* Nothing to do. */ - etr_port1_online = value; - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - else - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } -out: - mutex_unlock(&clock_sync_mutex); - return count; -} - -static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); - -static ssize_t etr_stepping_control_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? - etr_eacr.e0 : etr_eacr.e1); -} - -static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); - -static ssize_t etr_mode_code_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (!etr_port0_online && !etr_port1_online) - /* Status word is not uptodate if both ports are offline. */ - return -ENODATA; - return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? - etr_port0.esw.psc0 : etr_port0.esw.psc1); -} - -static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); - -static ssize_t etr_untuned_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.u); -} - -static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); - -static ssize_t etr_network_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.net_id); -} - -static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); - -static ssize_t etr_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.etr_id); -} - -static DEVICE_ATTR(id, 0400, etr_id_show, NULL); - -static ssize_t etr_port_number_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.etr_pn); -} - -static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); - -static ssize_t etr_coupled_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.c); -} - -static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); - -static ssize_t etr_local_time_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.blto); -} - -static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); - -static ssize_t etr_utc_offset_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.buo); -} - -static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); - -static struct device_attribute *etr_port_attributes[] = { - &dev_attr_online, - &dev_attr_stepping_control, - &dev_attr_state_code, - &dev_attr_untuned, - &dev_attr_network, - &dev_attr_id, - &dev_attr_port, - &dev_attr_coupled, - &dev_attr_local_time, - &dev_attr_utc_offset, - NULL -}; - -static int __init etr_register_port(struct device *dev) -{ - struct device_attribute **attr; - int rc; - - rc = device_register(dev); - if (rc) - goto out; - for (attr = etr_port_attributes; *attr; attr++) { - rc = device_create_file(dev, *attr); - if (rc) - goto out_unreg; - } - return 0; -out_unreg: - for (; attr >= etr_port_attributes; attr--) - device_remove_file(dev, *attr); - device_unregister(dev); -out: - return rc; -} - -static void __init etr_unregister_port(struct device *dev) -{ - struct device_attribute **attr; - - for (attr = etr_port_attributes; *attr; attr++) - device_remove_file(dev, *attr); - device_unregister(dev); -} - -static int __init etr_init_sysfs(void) -{ - int rc; - - rc = subsys_system_register(&etr_subsys, NULL); - if (rc) - goto out; - rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); - if (rc) - goto out_unreg_subsys; - rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); - if (rc) - goto out_remove_stepping_port; - rc = etr_register_port(&etr_port0_dev); - if (rc) - goto out_remove_stepping_mode; - rc = etr_register_port(&etr_port1_dev); - if (rc) - goto out_remove_port0; - return 0; - -out_remove_port0: - etr_unregister_port(&etr_port0_dev); -out_remove_stepping_mode: - device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); -out_remove_stepping_port: - device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); -out_unreg_subsys: - bus_unregister(&etr_subsys); -out: - return rc; -} - -device_initcall(etr_init_sysfs); - /* * Server Time Protocol (STP) code. */ -static int stp_online; +static bool stp_online = true; static struct stp_sstpi stp_info; static void *stp_page; static void stp_work_fn(struct work_struct *work); -static DEFINE_MUTEX(stp_work_mutex); static DECLARE_WORK(stp_work, stp_work_fn); static struct timer_list stp_timer; static int __init early_parse_stp(char *p) { - if (strncmp(p, "off", 3) == 0) - stp_online = 0; - else if (strncmp(p, "on", 2) == 0) - stp_online = 1; - return 0; + return kstrtobool(p, &stp_online); } early_param("stp", early_parse_stp); @@ -1442,19 +418,23 @@ static void __init stp_reset(void) int rc; stp_page = (void *) get_zeroed_page(GFP_ATOMIC); - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - pr_warning("The real or virtual hardware system does " - "not provide an STP interface\n"); free_page((unsigned long) stp_page); stp_page = NULL; - stp_online = 0; + stp_online = false; } } -static void stp_timeout(unsigned long dummy) +bool stp_enabled(void) +{ + return test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online; +} +EXPORT_SYMBOL(stp_enabled); + +static void stp_timeout(struct timer_list *unused) { queue_work(time_sync_wq, &stp_work); } @@ -1463,7 +443,7 @@ static int __init stp_init(void) { if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return 0; - setup_timer(&stp_timer, stp_timeout, 0UL); + timer_setup(&stp_timer, stp_timeout, 0); time_init_wq(); if (!stp_online) return 0; @@ -1493,10 +473,10 @@ static void stp_timing_alert(struct stp_irq_parm *intparm) * After a STP sync check the clock is not in sync. The machine check * is broadcasted to all cpus at the same time. */ -void stp_sync_check(void) +int stp_sync_check(void) { disable_sync_clock(NULL); - queue_work(time_sync_wq, &stp_work); + return 1; } /* @@ -1505,55 +485,74 @@ void stp_sync_check(void) * have matching CTN ids and have a valid stratum-1 configuration * but the configurations do not match. */ -void stp_island_check(void) +int stp_island_check(void) { disable_sync_clock(NULL); + return 1; +} + +void stp_queue_work(void) +{ queue_work(time_sync_wq, &stp_work); } +static int __store_stpinfo(void) +{ + int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + + if (rc) + clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + else + set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + return rc; +} + +static int stpinfo_valid(void) +{ + return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); +} static int stp_sync_clock(void *data) { + struct clock_sync_data *sync = data; + long clock_delta, flags; static int first; - unsigned long long old_clock, delta; - struct clock_sync_data *stp_sync; int rc; - stp_sync = data; - - if (xchg(&first, 1) == 1) { - /* Slave */ - clock_sync_cpu(stp_sync); - return 0; - } - - /* Wait until all other cpus entered the sync function. */ - while (atomic_read(&stp_sync->cpus) != 0) - cpu_relax(); - enable_sync_clock(); - - rc = 0; - if (stp_info.todoff[0] || stp_info.todoff[1] || - stp_info.todoff[2] || stp_info.todoff[3] || - stp_info.tmd != 2) { - old_clock = get_tod_clock(); - rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); - if (rc == 0) { - delta = adjust_time(old_clock, get_tod_clock(), 0); - fixup_clock_comparator(delta); - rc = chsc_sstpi(stp_page, &stp_info, - sizeof(struct stp_sstpi)); - if (rc == 0 && stp_info.tmd != 2) - rc = -EAGAIN; + if (xchg(&first, 1) == 0) { + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&sync->cpus) != 0) + cpu_relax(); + rc = 0; + if (stp_info.todoff || stp_info.tmd != 2) { + flags = vdso_update_begin(); + rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, + &clock_delta); + if (rc == 0) { + sync->clock_delta = clock_delta; + clock_sync_global(clock_delta); + rc = __store_stpinfo(); + if (rc == 0 && stp_info.tmd != 2) + rc = -EAGAIN; + } + vdso_update_end(flags); } + sync->in_sync = rc ? -EAGAIN : 1; + xchg(&first, 0); + } else { + /* Slave */ + atomic_dec(&sync->cpus); + /* Wait for in_sync to be set. */ + while (READ_ONCE(sync->in_sync) == 0) + ; } - if (rc) { + if (sync->in_sync != 1) + /* Didn't work. Clear per-cpu in sync bit again. */ disable_sync_clock(NULL); - stp_sync->in_sync = -EAGAIN; - } else - stp_sync->in_sync = 1; - xchg(&first, 0); + /* Apply clock delta to per-CPU fields of this CPU. */ + clock_sync_local(sync->clock_delta); + return 0; } @@ -1567,160 +566,222 @@ static void stp_work_fn(struct work_struct *work) int rc; /* prevent multiple execution. */ - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (!stp_online) { - chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); - del_timer_sync(&stp_timer); + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); + timer_delete_sync(&stp_timer); goto out_unlock; } - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL); if (rc) goto out_unlock; - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc || stp_info.c == 0) goto out_unlock; /* Skip synchronization if the clock is already in sync. */ - if (check_sync_clock()) - goto out_unlock; - - memset(&stp_sync, 0, sizeof(stp_sync)); - get_online_cpus(); - atomic_set(&stp_sync.cpus, num_online_cpus() - 1); - stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); - put_online_cpus(); + if (!check_sync_clock()) { + memset(&stp_sync, 0, sizeof(stp_sync)); + cpus_read_lock(); + atomic_set(&stp_sync.cpus, num_online_cpus() - 1); + stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask); + cpus_read_unlock(); + } if (!check_sync_clock()) /* - * There is a usable clock but the synchonization failed. + * There is a usable clock but the synchronization failed. * Retry after a second. */ - mod_timer(&stp_timer, jiffies + HZ); + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); out_unlock: - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); } /* * STP subsys sysfs interface functions */ -static struct bus_type stp_subsys = { +static const struct bus_type stp_subsys = { .name = "stp", .dev_name = "stp", }; -static ssize_t stp_ctn_id_show(struct device *dev, +static ssize_t ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid()) + ret = sysfs_emit(buf, "%016lx\n", + *(unsigned long *)stp_info.ctnid); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); +static DEVICE_ATTR_RO(ctn_id); -static ssize_t stp_ctn_type_show(struct device *dev, +static ssize_t ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.ctn); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid()) + ret = sysfs_emit(buf, "%i\n", stp_info.ctn); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); +static DEVICE_ATTR_RO(ctn_type); -static ssize_t stp_dst_offset_show(struct device *dev, +static ssize_t dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x2000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x2000)) + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.dsto); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); +static DEVICE_ATTR_RO(dst_offset); -static ssize_t stp_leap_seconds_show(struct device *dev, +static ssize_t leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x8000)) + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x8000)) + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.leaps); + mutex_unlock(&stp_mutex); + return ret; +} + +static DEVICE_ATTR_RO(leap_seconds); + +static ssize_t leap_seconds_scheduled_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct stp_stzi stzi; + ssize_t ret; + + mutex_lock(&stp_mutex); + if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) { + mutex_unlock(&stp_mutex); return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + } + + ret = chsc_stzi(stp_page, &stzi, sizeof(stzi)); + mutex_unlock(&stp_mutex); + if (ret < 0) + return ret; + + if (!stzi.lsoib.p) + return sysfs_emit(buf, "0,0\n"); + + return sysfs_emit(buf, "%lu,%d\n", + tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC, + stzi.lsoib.nlso - stzi.lsoib.also); } -static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); +static DEVICE_ATTR_RO(leap_seconds_scheduled); -static ssize_t stp_stratum_show(struct device *dev, +static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid()) + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.stratum); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); +static DEVICE_ATTR_RO(stratum); -static ssize_t stp_time_offset_show(struct device *dev, +static ssize_t time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x0800)) - return -ENODATA; - return sprintf(buf, "%i\n", (int) stp_info.tto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x0800)) + ret = sysfs_emit(buf, "%i\n", (int)stp_info.tto); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); +static DEVICE_ATTR_RO(time_offset); -static ssize_t stp_time_zone_offset_show(struct device *dev, +static ssize_t time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x4000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x4000)) + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.tzo); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(time_zone_offset, 0400, - stp_time_zone_offset_show, NULL); +static DEVICE_ATTR_RO(time_zone_offset); -static ssize_t stp_timing_mode_show(struct device *dev, +static ssize_t timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tmd); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid()) + ret = sysfs_emit(buf, "%i\n", stp_info.tmd); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); +static DEVICE_ATTR_RO(timing_mode); -static ssize_t stp_timing_state_show(struct device *dev, +static ssize_t timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tst); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_mutex); + if (stpinfo_valid()) + ret = sysfs_emit(buf, "%i\n", stp_info.tst); + mutex_unlock(&stp_mutex); + return ret; } -static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); +static DEVICE_ATTR_RO(timing_state); -static ssize_t stp_online_show(struct device *dev, +static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%i\n", stp_online); + return sysfs_emit(buf, "%i\n", stp_online); } -static ssize_t stp_online_store(struct device *dev, +static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -1731,14 +792,14 @@ static ssize_t stp_online_store(struct device *dev, return -EINVAL; if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return -EOPNOTSUPP; - mutex_lock(&clock_sync_mutex); + mutex_lock(&stp_mutex); stp_online = value; if (stp_online) set_bit(CLOCK_SYNC_STP, &clock_sync_flags); else clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); queue_work(time_sync_wq, &stp_work); - mutex_unlock(&clock_sync_mutex); + mutex_unlock(&stp_mutex); return count; } @@ -1746,46 +807,27 @@ static ssize_t stp_online_store(struct device *dev, * Can't use DEVICE_ATTR because the attribute should be named * stp/online but dev_attr_online already exists in this file .. */ -static struct device_attribute dev_attr_stp_online = { - .attr = { .name = "online", .mode = 0600 }, - .show = stp_online_show, - .store = stp_online_store, -}; - -static struct device_attribute *stp_attributes[] = { - &dev_attr_ctn_id, - &dev_attr_ctn_type, - &dev_attr_dst_offset, - &dev_attr_leap_seconds, - &dev_attr_stp_online, - &dev_attr_stratum, - &dev_attr_time_offset, - &dev_attr_time_zone_offset, - &dev_attr_timing_mode, - &dev_attr_timing_state, +static DEVICE_ATTR_RW(online); + +static struct attribute *stp_dev_attrs[] = { + &dev_attr_ctn_id.attr, + &dev_attr_ctn_type.attr, + &dev_attr_dst_offset.attr, + &dev_attr_leap_seconds.attr, + &dev_attr_online.attr, + &dev_attr_leap_seconds_scheduled.attr, + &dev_attr_stratum.attr, + &dev_attr_time_offset.attr, + &dev_attr_time_zone_offset.attr, + &dev_attr_timing_mode.attr, + &dev_attr_timing_state.attr, NULL }; +ATTRIBUTE_GROUPS(stp_dev); static int __init stp_init_sysfs(void) { - struct device_attribute **attr; - int rc; - - rc = subsys_system_register(&stp_subsys, NULL); - if (rc) - goto out; - for (attr = stp_attributes; *attr; attr++) { - rc = device_create_file(stp_subsys.dev_root, *attr); - if (rc) - goto out_unreg; - } - return 0; -out_unreg: - for (; attr >= stp_attributes; attr--) - device_remove_file(stp_subsys.dev_root, *attr); - bus_unregister(&stp_subsys); -out: - return rc; + return subsys_system_register(&stp_subsys, stp_dev_groups); } device_initcall(stp_init_sysfs); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4b2e3e317004..1913a5566ac2 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,92 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ -#define KMSG_COMPONENT "cpu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "cpu: " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> +#include <linux/uaccess.h> +#include <linux/sysctl.h> #include <linux/cpuset.h> #include <linux/device.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/init.h> +#include <linux/sched/topology.h> #include <linux/delay.h> +#include <linux/init.h> +#include <linux/slab.h> #include <linux/cpu.h> #include <linux/smp.h> #include <linux/mm.h> +#include <linux/nodemask.h> +#include <linux/node.h> +#include <asm/hiperdispatch.h> #include <asm/sysinfo.h> +#include <asm/asm.h> #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) #define PTF_CHECK (2UL) +enum { + TOPOLOGY_MODE_HW, + TOPOLOGY_MODE_SINGLE, + TOPOLOGY_MODE_PACKAGE, + TOPOLOGY_MODE_UNINITIALIZED +}; + struct mask_info { struct mask_info *next; unsigned char id; cpumask_t mask; }; +static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; static void set_topology_timer(void); static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; +static int cpu_management; -static int topology_enabled = 1; static DECLARE_WORK(topology_work, topology_work_fn); -/* topology_lock protects the socket and book linked lists */ -static DEFINE_SPINLOCK(topology_lock); +/* + * Socket/Book linked lists and cpu_topology updates are + * protected by "sched_domains_mutex". + */ static struct mask_info socket_info; static struct mask_info book_info; +static struct mask_info drawer_info; struct cpu_topology_s390 cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); -static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) +static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) { - cpumask_t mask; + static cpumask_t mask; + + cpumask_clear(&mask); + if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) + goto out; + cpumask_set_cpu(cpu, &mask); + switch (topology_mode) { + case TOPOLOGY_MODE_HW: + while (info) { + if (cpumask_test_cpu(cpu, &info->mask)) { + cpumask_copy(&mask, &info->mask); + break; + } + info = info->next; + } + break; + case TOPOLOGY_MODE_PACKAGE: + cpumask_copy(&mask, cpu_present_mask); + break; + default: + fallthrough; + case TOPOLOGY_MODE_SINGLE: + break; + } + cpumask_and(&mask, &mask, &cpu_setup_mask); +out: + cpumask_copy(dst, &mask); +} + +static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) +{ + static cpumask_t mask; + unsigned int max_cpu; - cpumask_copy(&mask, cpumask_of(cpu)); - if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) - return mask; - for (; info; info = info->next) { - if (cpumask_test_cpu(cpu, &info->mask)) - return info->mask; + cpumask_clear(&mask); + if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) + goto out; + cpumask_set_cpu(cpu, &mask); + if (topology_mode != TOPOLOGY_MODE_HW) + goto out; + cpu -= cpu % (smp_cpu_mtid + 1); + max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); + for (; cpu <= max_cpu; cpu++) { + if (cpumask_test_cpu(cpu, &cpu_setup_mask)) + cpumask_set_cpu(cpu, &mask); } - return mask; +out: + cpumask_copy(dst, &mask); } -static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, - struct mask_info *book, - struct mask_info *socket, - int one_socket_per_cpu) +#define TOPOLOGY_CORE_BITS 64 + +static void add_cpus_to_mask(struct topology_core *tl_core, + struct mask_info *drawer, + struct mask_info *book, + struct mask_info *socket) { - unsigned int cpu; + struct cpu_topology_s390 *topo; + unsigned int core; - for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) { - unsigned int rcpu; - int lcpu; + for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { + unsigned int max_cpu, rcore; + int cpu; - rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; - lcpu = smp_find_processor_id(rcpu); - if (lcpu < 0) + rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; + cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); + if (cpu < 0) continue; - cpumask_set_cpu(lcpu, &book->mask); - cpu_topology[lcpu].book_id = book->id; - cpumask_set_cpu(lcpu, &socket->mask); - cpu_topology[lcpu].core_id = rcpu; - if (one_socket_per_cpu) { - cpu_topology[lcpu].socket_id = rcpu; - socket = socket->next; - } else { - cpu_topology[lcpu].socket_id = socket->id; + max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); + for (; cpu <= max_cpu; cpu++) { + topo = &cpu_topology[cpu]; + topo->drawer_id = drawer->id; + topo->book_id = book->id; + topo->socket_id = socket->id; + topo->core_id = rcore; + topo->thread_id = cpu; + topo->dedicated = tl_core->d; + cpumask_set_cpu(cpu, &drawer->mask); + cpumask_set_cpu(cpu, &book->mask); + cpumask_set_cpu(cpu, &socket->mask); + smp_cpu_set_polarization(cpu, tl_core->pp); + smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH); } - smp_cpu_set_polarization(lcpu, tl_cpu->pp); } - return socket; } static void clear_masks(void) @@ -103,25 +166,36 @@ static void clear_masks(void) cpumask_clear(&info->mask); info = info->next; } + info = &drawer_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } } static union topology_entry *next_tle(union topology_entry *tle) { if (!tle->nl) - return (union topology_entry *)((struct topology_cpu *)tle + 1); + return (union topology_entry *)((struct topology_core *)tle + 1); return (union topology_entry *)((struct topology_container *)tle + 1); } -static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) +static void tl_to_masks(struct sysinfo_15_1_x *info) { struct mask_info *socket = &socket_info; struct mask_info *book = &book_info; + struct mask_info *drawer = &drawer_info; union topology_entry *tle, *end; + clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { + case 3: + drawer = drawer->next; + drawer->id = tle->container.id; + break; case 2: book = book->next; book->id = tle->container.id; @@ -131,32 +205,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) socket->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, socket, 0); - break; - default: - clear_masks(); - return; - } - tle = next_tle(tle); - } -} - -static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) -{ - struct mask_info *socket = &socket_info; - struct mask_info *book = &book_info; - union topology_entry *tle, *end; - - tle = info->tle; - end = (union topology_entry *)((unsigned long)info + info->length); - while (tle < end) { - switch (tle->nl) { - case 1: - book = book->next; - book->id = tle->container.id; - break; - case 0: - socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); + add_cpus_to_mask(&tle->cpu, drawer, book, socket); break; default: clear_masks(); @@ -166,52 +215,32 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) } } -static void tl_to_masks(struct sysinfo_15_1_x *info) -{ - struct cpuid cpu_id; - - spin_lock_irq(&topology_lock); - get_cpu_id(&cpu_id); - clear_masks(); - switch (cpu_id.machine) { - case 0x2097: - case 0x2098: - __tl_to_masks_z10(info); - break; - default: - __tl_to_masks_generic(info); - } - spin_unlock_irq(&topology_lock); -} - static void topology_update_polarization_simple(void) { int cpu; - mutex_lock(&smp_cpu_state_mutex); for_each_possible_cpu(cpu) smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); - mutex_unlock(&smp_cpu_state_mutex); } static int ptf(unsigned long fc) { - int rc; + int cc; asm volatile( - " .insn rre,0xb9a20000,%1,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (rc) - : "d" (fc) : "cc"); - return rc; + " .insn rre,0xb9a20000,%[fc],%[fc]\n" + CC_IPM(cc) + : CC_OUT(cc, cc) + : [fc] "d" (fc) + : CC_CLOBBER); + return CC_TRANSFORM(cc); } int topology_set_cpu_management(int fc) { int cpu, rc; - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -224,51 +253,91 @@ int topology_set_cpu_management(int fc) return rc; } -static void update_cpu_masks(void) +void update_cpu_masks(void) { - unsigned long flags; - int cpu; + struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; + int cpu, sibling, pkg_first, smt_first, id; - spin_lock_irqsave(&topology_lock, flags); for_each_possible_cpu(cpu) { - cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu); - cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu); - if (!MACHINE_HAS_TOPOLOGY) { - cpu_topology[cpu].core_id = cpu; - cpu_topology[cpu].socket_id = cpu; - cpu_topology[cpu].book_id = cpu; + topo = &cpu_topology[cpu]; + cpu_thread_map(&topo->thread_mask, cpu); + cpu_group_map(&topo->core_mask, &socket_info, cpu); + cpu_group_map(&topo->book_mask, &book_info, cpu); + cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); + topo->booted_cores = 0; + if (topology_mode != TOPOLOGY_MODE_HW) { + id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; + topo->thread_id = cpu; + topo->core_id = cpu; + topo->socket_id = id; + topo->book_id = id; + topo->drawer_id = id; + } + } + hd_reset_state(); + for_each_online_cpu(cpu) { + topo = &cpu_topology[cpu]; + pkg_first = cpumask_first(&topo->core_mask); + topo_package = &cpu_topology[pkg_first]; + if (cpu == pkg_first) { + for_each_cpu(sibling, &topo->core_mask) { + topo_sibling = &cpu_topology[sibling]; + smt_first = cpumask_first(&topo_sibling->thread_mask); + if (sibling == smt_first) { + topo_package->booted_cores++; + hd_add_core(sibling); + } + } + } else { + topo->booted_cores = topo_package->booted_cores; } } - spin_unlock_irqrestore(&topology_lock, flags); } void store_topology(struct sysinfo_15_1_x *info) { - if (topology_max_mnest >= 3) - stsi(info, 15, 1, 3); + stsi(info, 15, 1, topology_mnest_limit()); +} + +static void __arch_update_dedicated_flag(void *arg) +{ + if (topology_cpu_dedicated(smp_processor_id())) + set_cpu_flag(CIF_DEDICATED_CPU); else - stsi(info, 15, 1, 2); + clear_cpu_flag(CIF_DEDICATED_CPU); } -int arch_update_cpu_topology(void) +static int __arch_update_cpu_topology(void) { struct sysinfo_15_1_x *info = tl_info; - struct device *dev; - int cpu; + int rc, hd_status; - if (!MACHINE_HAS_TOPOLOGY) { - update_cpu_masks(); - topology_update_polarization_simple(); - return 0; + hd_status = 0; + rc = 0; + mutex_lock(&smp_cpu_state_mutex); + if (cpu_has_topology()) { + rc = 1; + store_topology(info); + tl_to_masks(info); } - store_topology(info); - tl_to_masks(info); update_cpu_masks(); - for_each_online_cpu(cpu) { - dev = get_cpu_device(cpu); - kobject_uevent(&dev->kobj, KOBJ_CHANGE); - } - return 1; + if (!cpu_has_topology()) + topology_update_polarization_simple(); + if (cpu_management == 1) + hd_status = hd_enable_hiperdispatch(); + mutex_unlock(&smp_cpu_state_mutex); + if (hd_status == 0) + hd_disable_hiperdispatch(); + return rc; +} + +int arch_update_cpu_topology(void) +{ + int rc; + + rc = __arch_update_cpu_topology(); + on_each_cpu(__arch_update_dedicated_flag, NULL, 0); + return rc; } static void topology_work_fn(struct work_struct *work) @@ -281,29 +350,33 @@ void topology_schedule_update(void) schedule_work(&topology_work); } -static void topology_timer_fn(unsigned long ignored) +static void topology_flush_work(void) +{ + flush_work(&topology_work); +} + +static void topology_timer_fn(struct timer_list *unused) { if (ptf(PTF_CHECK)) topology_schedule_update(); set_topology_timer(); } -static struct timer_list topology_timer = - TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); +static struct timer_list topology_timer; static atomic_t topology_poll = ATOMIC_INIT(0); static void set_topology_timer(void) { if (atomic_add_unless(&topology_poll, -1, 0)) - mod_timer(&topology_timer, jiffies + HZ / 10); + mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + HZ * 60); + mod_timer(&topology_timer, jiffies + secs_to_jiffies(60)); } void topology_expect_change(void) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; /* This is racy, but it doesn't matter since it is just a heuristic. * Worst case is that we poll in a higher frequency for a bit longer. @@ -314,50 +387,25 @@ void topology_expect_change(void) set_topology_timer(); } -static int __init early_parse_topology(char *p) +static int set_polarization(int polarization) { - if (strncmp(p, "off", 3)) - return 0; - topology_enabled = 0; - return 0; -} -early_param("topology", early_parse_topology); - -static void __init alloc_masks(struct sysinfo_15_1_x *info, - struct mask_info *mask, int offset) -{ - int i, nr_masks; + int rc = 0; - nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; - for (i = 0; i < info->mnest - offset; i++) - nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; - nr_masks = max(nr_masks, 1); - for (i = 0; i < nr_masks; i++) { - mask->next = alloc_bootmem(sizeof(struct mask_info)); - mask = mask->next; - } -} - -void __init s390_init_cpu_topology(void) -{ - struct sysinfo_15_1_x *info; - int i; - - if (!MACHINE_HAS_TOPOLOGY) - return; - tl_info = alloc_bootmem_pages(PAGE_SIZE); - info = tl_info; - store_topology(info); - pr_info("The CPU configuration topology of the machine is:"); - for (i = 0; i < TOPOLOGY_NR_MAG; i++) - printk(KERN_CONT " %d", info->mag[i]); - printk(KERN_CONT " / %d\n", info->mnest); - alloc_masks(info, &socket_info, 1); - alloc_masks(info, &book_info, 2); + cpus_read_lock(); + mutex_lock(&smp_cpu_state_mutex); + if (cpu_management == polarization) + goto out; + rc = topology_set_cpu_management(polarization); + if (rc) + goto out; + cpu_management = polarization; + topology_expect_change(); +out: + mutex_unlock(&smp_cpu_state_mutex); + cpus_read_unlock(); + return rc; } -static int cpu_management; - static ssize_t dispatching_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -365,7 +413,7 @@ static ssize_t dispatching_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", cpu_management); + count = sysfs_emit(buf, "%d\n", cpu_management); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -382,23 +430,10 @@ static ssize_t dispatching_store(struct device *dev, return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - rc = 0; - get_online_cpus(); - mutex_lock(&smp_cpu_state_mutex); - if (cpu_management == val) - goto out; - rc = topology_set_cpu_management(val); - if (rc) - goto out; - cpu_management = val; - topology_expect_change(); -out: - mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + rc = set_polarization(val); return rc ? rc : count; } -static DEVICE_ATTR(dispatching, 0644, dispatching_show, - dispatching_store); +static DEVICE_ATTR_RW(dispatching); static ssize_t cpu_polarization_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -409,19 +444,19 @@ static ssize_t cpu_polarization_show(struct device *dev, mutex_lock(&smp_cpu_state_mutex); switch (smp_cpu_get_polarization(cpu)) { case POLARIZATION_HRZ: - count = sprintf(buf, "horizontal\n"); + count = sysfs_emit(buf, "horizontal\n"); break; case POLARIZATION_VL: - count = sprintf(buf, "vertical:low\n"); + count = sysfs_emit(buf, "vertical:low\n"); break; case POLARIZATION_VM: - count = sprintf(buf, "vertical:medium\n"); + count = sysfs_emit(buf, "vertical:medium\n"); break; case POLARIZATION_VH: - count = sprintf(buf, "vertical:high\n"); + count = sysfs_emit(buf, "vertical:high\n"); break; default: - count = sprintf(buf, "unknown\n"); + count = sysfs_emit(buf, "unknown\n"); break; } mutex_unlock(&smp_cpu_state_mutex); @@ -438,20 +473,226 @@ static struct attribute_group topology_cpu_attr_group = { .attrs = topology_cpu_attrs, }; +static ssize_t cpu_dedicated_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu)); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); + +static struct attribute *topology_extra_cpu_attrs[] = { + &dev_attr_dedicated.attr, + NULL, +}; + +static struct attribute_group topology_extra_cpu_attr_group = { + .attrs = topology_extra_cpu_attrs, +}; + int topology_cpu_init(struct cpu *cpu) { - return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); + int rc; + + rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); + if (rc || !cpu_has_topology()) + return rc; + rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); + if (rc) + sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); + return rc; +} + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_mask; +} + +static const struct cpumask *tl_book_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return &cpu_topology[cpu].book_mask; +} + +static const struct cpumask *tl_drawer_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return &cpu_topology[cpu].drawer_mask; +} + +static struct sched_domain_topology_level s390_topology[] = { + SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT), + SDTL_INIT(tl_mc_mask, cpu_core_flags, MC), + SDTL_INIT(tl_book_mask, NULL, BOOK), + SDTL_INIT(tl_drawer_mask, NULL, DRAWER), + SDTL_INIT(tl_pkg_mask, NULL, PKG), + { NULL, }, +}; + +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) +{ + int i, nr_masks; + + nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; + for (i = 0; i < info->mnest - offset; i++) + nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; + nr_masks = max(nr_masks, 1); + for (i = 0; i < nr_masks; i++) { + mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8); + mask = mask->next; + } +} + +static int __init detect_polarization(union topology_entry *tle) +{ + struct topology_core *tl_core; + + while (tle->nl) + tle = next_tle(tle); + tl_core = (struct topology_core *)tle; + return tl_core->pp != POLARIZATION_HRZ; +} + +void __init topology_init_early(void) +{ + struct sysinfo_15_1_x *info; + + set_sched_topology(s390_topology); + if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { + if (cpu_has_topology()) + topology_mode = TOPOLOGY_MODE_HW; + else + topology_mode = TOPOLOGY_MODE_SINGLE; + } + if (!cpu_has_topology()) + goto out; + tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); + info = tl_info; + store_topology(info); + cpu_management = detect_polarization(info->tle); + pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", + info->mag[0], info->mag[1], info->mag[2], info->mag[3], + info->mag[4], info->mag[5], info->mnest); + alloc_masks(info, &socket_info, 1); + alloc_masks(info, &book_info, 2); + alloc_masks(info, &drawer_info, 3); +out: + cpumask_set_cpu(0, &cpu_setup_mask); + __arch_update_cpu_topology(); + __arch_update_dedicated_flag(NULL); +} + +static inline int topology_get_mode(int enabled) +{ + if (!enabled) + return TOPOLOGY_MODE_SINGLE; + return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } +static inline int topology_is_enabled(void) +{ + return topology_mode != TOPOLOGY_MODE_SINGLE; +} + +static int __init topology_setup(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + topology_mode = topology_get_mode(enabled); + return 0; +} +early_param("topology", topology_setup); + +static int topology_ctl_handler(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int enabled = topology_is_enabled(); + int new_mode; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &enabled, + .maxlen = sizeof(int), + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; + + mutex_lock(&smp_cpu_state_mutex); + new_mode = topology_get_mode(enabled); + if (topology_mode != new_mode) { + topology_mode = new_mode; + topology_schedule_update(); + } + mutex_unlock(&smp_cpu_state_mutex); + topology_flush_work(); + + return rc; +} + +static int polarization_ctl_handler(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int polarization; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &polarization, + .maxlen = sizeof(int), + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + + polarization = cpu_management; + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; + return set_polarization(polarization); +} + +static const struct ctl_table topology_ctl_table[] = { + { + .procname = "topology", + .mode = 0644, + .proc_handler = topology_ctl_handler, + }, + { + .procname = "polarization", + .mode = 0644, + .proc_handler = polarization_ctl_handler, + }, +}; + static int __init topology_init(void) { - if (!MACHINE_HAS_TOPOLOGY) { + struct device *dev_root; + int rc = 0; + + timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); + if (cpu_has_topology()) + set_topology_timer(); + else topology_update_polarization_simple(); - goto out; + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL)) + set_polarization(1); + register_sysctl("s390", topology_ctl_table); + + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + rc = device_create_file(dev_root, &dev_attr_dispatching); + put_device(dev_root); } - set_topology_timer(); -out: - update_cpu_masks(); - return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); + return rc; } device_initcall(topology_init); diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c new file mode 100644 index 000000000000..11a669f3cc93 --- /dev/null +++ b/arch/s390/kernel/trace.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tracepoint definitions for s390 + * + * Copyright IBM Corp. 2015 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/percpu.h> +#define CREATE_TRACE_POINTS +#include <asm/trace/diag.h> + +EXPORT_TRACEPOINT_SYMBOL(s390_diagnose); + +static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth); + +void notrace trace_s390_diagnose_norecursion(int diag_nr) +{ + unsigned long flags; + unsigned int *depth; + + /* Avoid lockdep recursion. */ + if (IS_ENABLED(CONFIG_LOCKDEP)) + return; + local_irq_save(flags); + depth = this_cpu_ptr(&diagnose_trace_depth); + if (*depth == 0) { + (*depth)++; + trace_s390_diagnose(diag_nr); + (*depth)--; + } + local_irq_restore(flags); +} diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c5762324d9ee..19687dab32f7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -1,161 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 version * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds */ -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> -#include <linux/module.h> +#include <linux/randomize_kstack.h> +#include <linux/extable.h> #include <linux/ptrace.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/mm.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/cpu.h> +#include <linux/entry-common.h> +#include <linux/kmsan.h> +#include <asm/asm-extable.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> +#include <asm/vtime.h> +#include <asm/fpu.h> +#include <asm/fault.h> #include "entry.h" -int show_unhandled_signals = 1; - static inline void __user *get_trap_ip(struct pt_regs *regs) { -#ifdef CONFIG_64BIT unsigned long address; if (regs->int_code & 0x200) - address = *(unsigned long *)(current->thread.trap_tdb + 24); + address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; - return (void __user *) - ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN); -#else - return (void __user *) - ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); -#endif -} - -static inline void report_user_fault(struct pt_regs *regs, int signr) -{ - if ((task_pid_nr(current) > 1) && !show_unhandled_signals) - return; - if (!unhandled_signal(current, signr)) - return; - if (!printk_ratelimit()) - return; - printk("User process fault: interruption code 0x%X ", regs->int_code); - print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); - printk("\n"); - show_regs(regs); + return (void __user *)(address - (regs->int_code >> 16)); } +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { return 1; } +#endif -static void __kprobes do_trap(struct pt_regs *regs, - int si_signo, int si_code, char *str) +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - siginfo_t info; - - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) - return; - if (user_mode(regs)) { - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = get_trap_ip(regs); - force_sig_info(si_signo, &info, current); - report_user_fault(regs, si_signo); - } else { - const struct exception_table_entry *fixup; - fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); - if (fixup) - regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; - else { - enum bug_trap_type btt; - - btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); - if (btt == BUG_TRAP_TYPE_WARN) - return; + force_sig_fault(si_signo, si_code, get_trap_ip(regs)); + report_user_fault(regs, si_signo, 0); + } else { + if (!fixup_exception(regs)) die(regs, str); - } - } + } } -void __kprobes do_per_trap(struct pt_regs *regs) +static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - siginfo_t info; + if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP) + return; + do_report_trap(regs, si_signo, si_code, str); +} +NOKPROBE_SYMBOL(do_trap); +void do_per_trap(struct pt_regs *regs) +{ if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; if (!current->ptrace) return; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_HWBKPT; - info.si_addr = - (void __force __user *) current->thread.per_event.address; - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address); } +NOKPROBE_SYMBOL(do_per_trap); -void default_trap_handler(struct pt_regs *regs) +static void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { - report_user_fault(regs, SIGSEGV); - do_exit(SIGSEGV); + report_user_fault(regs, SIGSEGV, 0); + force_exit_sig(SIGSEGV); } else die(regs, "Unknown program exception"); } #define DO_ERROR_INFO(name, signr, sicode, str) \ -void name(struct pt_regs *regs) \ +static void name(struct pt_regs *regs) \ { \ do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, - "addressing exception") -DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, - "execute exception") -DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, - "fixpoint divide exception") -DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, - "fixpoint overflow exception") -DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, - "HFP overflow exception") -DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, - "HFP underflow exception") -DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, - "HFP significance exception") -DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, - "HFP divide exception") -DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, - "HFP square root exception") -DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, - "operand exception") -DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, - "privileged operation") -DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, - "special operation exception") -DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, - "translation exception") - -#ifdef CONFIG_64BIT -DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, - "transaction constraint exception") -#endif - -static inline void do_fp_trap(struct pt_regs *regs, int fpc) +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception") +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") + +static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; + /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ @@ -173,201 +129,89 @@ static inline void do_fp_trap(struct pt_regs *regs, int fpc) do_trap(regs, SIGFPE, si_code, "floating point exception"); } -void __kprobes illegal_op(struct pt_regs *regs) +static void translation_specification_exception(struct pt_regs *regs) { - siginfo_t info; - __u8 opcode[6]; - __u16 __user *location; + /* May never happen. */ + panic("Translation-Specification Exception"); +} + +static void illegal_op(struct pt_regs *regs) +{ + int is_uprobe_insn = 0; + u16 __user *location; int signal = 0; + u16 opcode; location = get_trap_ip(regs); - if (user_mode(regs)) { - if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + if (get_user(opcode, location)) return; - if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (current->ptrace) { - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_addr = location; - force_sig_info(SIGTRAP, &info, current); - } else + if (opcode == S390_BREAKPOINT_U16) { + if (current->ptrace) + force_sig_fault(SIGTRAP, TRAP_BRKPT, location); + else signal = SIGILL; -#ifdef CONFIG_MATHEMU - } else if (opcode[0] == 0xb3) { - if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; - signal = math_emu_b3(opcode, regs); - } else if (opcode[0] == 0xed) { - if (get_user(*((__u32 *) (opcode+2)), - (__u32 __user *)(location+1))) - return; - signal = math_emu_ed(opcode, regs); - } else if (*((__u16 *) opcode) == 0xb299) { - if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; - signal = math_emu_srnm(opcode, regs); - } else if (*((__u16 *) opcode) == 0xb29c) { - if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; - signal = math_emu_stfpc(opcode, regs); - } else if (*((__u16 *) opcode) == 0xb29d) { - if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; - signal = math_emu_lfpc(opcode, regs); +#ifdef CONFIG_UPROBES + } else if (opcode == UPROBE_SWBP_INSN) { + is_uprobe_insn = 1; #endif - } else + } else { signal = SIGILL; - } else { - /* - * If we get an illegal op in kernel mode, send it through the - * kprobes notifier. If kprobes doesn't pick it up, SIGILL - */ - if (notify_die(DIE_BPT, "bpt", regs, 0, - 3, SIGTRAP) != NOTIFY_STOP) + } + } + /* + * This is either an illegal op in kernel mode, or user space trapped + * on a uprobes illegal instruction. See if kprobes or uprobes picks + * it up. If not, SIGILL. + */ + if (is_uprobe_insn || !user_mode(regs)) { + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } - -#ifdef CONFIG_MATHEMU - if (signal == SIGFPE) - do_fp_trap(regs, current->thread.fp_regs.fpc); - else if (signal == SIGSEGV) - do_trap(regs, signal, SEGV_MAPERR, "user address fault"); - else -#endif if (signal) do_trap(regs, signal, ILL_ILLOPC, "illegal operation"); } +NOKPROBE_SYMBOL(illegal_op); - -#ifdef CONFIG_MATHEMU -void specification_exception(struct pt_regs *regs) +static void vector_exception(struct pt_regs *regs) { - __u8 opcode[6]; - __u16 __user *location = NULL; - int signal = 0; - - location = (__u16 __user *) get_trap_ip(regs); - - if (user_mode(regs)) { - get_user(*((__u16 *) opcode), location); - switch (opcode[0]) { - case 0x28: /* LDR Rx,Ry */ - signal = math_emu_ldr(opcode); - break; - case 0x38: /* LER Rx,Ry */ - signal = math_emu_ler(opcode); - break; - case 0x60: /* STD R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_std(opcode, regs); - break; - case 0x68: /* LD R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_ld(opcode, regs); - break; - case 0x70: /* STE R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_ste(opcode, regs); - break; - case 0x78: /* LE R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_le(opcode, regs); - break; - default: - signal = SIGILL; - break; - } - } else - signal = SIGILL; - - if (signal == SIGFPE) - do_fp_trap(regs, current->thread.fp_regs.fpc); - else if (signal) - do_trap(regs, signal, ILL_ILLOPN, "specification exception"); + int si_code, vic; + + /* get vector interrupt code from fpc */ + save_user_fpu_regs(); + vic = (current->thread.ufpu.fpc & 0xf00) >> 8; + switch (vic) { + case 1: /* invalid vector operation */ + si_code = FPE_FLTINV; + break; + case 2: /* division by zero */ + si_code = FPE_FLTDIV; + break; + case 3: /* overflow */ + si_code = FPE_FLTOVF; + break; + case 4: /* underflow */ + si_code = FPE_FLTUND; + break; + case 5: /* inexact */ + si_code = FPE_FLTRES; + break; + default: /* unknown cause */ + si_code = 0; + } + do_trap(regs, SIGFPE, si_code, "vector exception"); } -#else -DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, - "specification exception"); -#endif -void data_exception(struct pt_regs *regs) +static void data_exception(struct pt_regs *regs) { - __u16 __user *location; - int signal = 0; - - location = get_trap_ip(regs); - - if (MACHINE_HAS_IEEE) - asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); - -#ifdef CONFIG_MATHEMU - else if (user_mode(regs)) { - __u8 opcode[6]; - get_user(*((__u16 *) opcode), location); - switch (opcode[0]) { - case 0x28: /* LDR Rx,Ry */ - signal = math_emu_ldr(opcode); - break; - case 0x38: /* LER Rx,Ry */ - signal = math_emu_ler(opcode); - break; - case 0x60: /* STD R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_std(opcode, regs); - break; - case 0x68: /* LD R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_ld(opcode, regs); - break; - case 0x70: /* STE R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_ste(opcode, regs); - break; - case 0x78: /* LE R,D(X,B) */ - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_le(opcode, regs); - break; - case 0xb3: - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_b3(opcode, regs); - break; - case 0xed: - get_user(*((__u32 *) (opcode+2)), - (__u32 __user *)(location+1)); - signal = math_emu_ed(opcode, regs); - break; - case 0xb2: - if (opcode[1] == 0x99) { - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_srnm(opcode, regs); - } else if (opcode[1] == 0x9c) { - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_stfpc(opcode, regs); - } else if (opcode[1] == 0x9d) { - get_user(*((__u16 *) (opcode+2)), location+1); - signal = math_emu_lfpc(opcode, regs); - } else - signal = SIGILL; - break; - default: - signal = SIGILL; - break; - } - } -#endif - if (current->thread.fp_regs.fpc & FPC_DXC_MASK) - signal = SIGFPE; + save_user_fpu_regs(); + if (current->thread.ufpu.fpc & FPC_DXC_MASK) + do_fp_trap(regs, current->thread.ufpu.fpc); else - signal = SIGILL; - if (signal == SIGFPE) - do_fp_trap(regs, current->thread.fp_regs.fpc); - else if (signal) - do_trap(regs, signal, ILL_ILLOPN, "data exception"); + do_trap(regs, SIGILL, ILL_ILLOPN, "data exception"); } -void space_switch_exception(struct pt_regs *regs) +static void space_switch_exception(struct pt_regs *regs) { /* Set user psw back to home space mode. */ if (user_mode(regs)) @@ -376,16 +220,186 @@ void space_switch_exception(struct pt_regs *regs) do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } -void __kprobes kernel_stack_overflow(struct pt_regs * regs) +static void monitor_event_exception(struct pt_regs *regs) { + if (user_mode(regs)) + return; + switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { + case BUG_TRAP_TYPE_NONE: + fixup_exception(regs); + break; + case BUG_TRAP_TYPE_WARN: + break; + case BUG_TRAP_TYPE_BUG: + die(regs, "monitor event"); + break; + } +} + +void kernel_stack_invalid(struct pt_regs *regs) +{ + /* + * Normally regs are unpoisoned by the generic entry code, but + * kernel_stack_overflow() is a rare case that is called bypassing it. + */ + kmsan_unpoison_entry_regs(regs); bust_spinlocks(1); - printk("Kernel stack overflow.\n"); + pr_emerg("Kernel stack pointer invalid\n"); show_regs(regs); bust_spinlocks(0); - panic("Corrupt kernel stack, can't continue."); + panic("Invalid kernel stack pointer, cannot continue"); +} +NOKPROBE_SYMBOL(kernel_stack_invalid); + +static void __init test_monitor_call(void) +{ + int val = 1; + + if (!IS_ENABLED(CONFIG_BUG)) + return; + asm_inline volatile( + " mc 0,0\n" + "0: lhi %[val],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [val] "+d" (val)); + if (!val) + panic("Monitor call doesn't work!\n"); } void __init trap_init(void) { + struct lowcore *lc = get_lowcore(); + unsigned long flags; + struct ctlreg cr0; + + local_irq_save(flags); + cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT); + psw_bits(lc->external_new_psw).mcheck = 1; + psw_bits(lc->program_new_psw).mcheck = 1; + psw_bits(lc->svc_new_psw).mcheck = 1; + psw_bits(lc->io_new_psw).mcheck = 1; + local_ctl_load(0, &cr0); + local_irq_restore(flags); local_mcck_enable(); + test_monitor_call(); +} + +static void (*pgm_check_table[128])(struct pt_regs *regs); + +void noinstr __do_pgm_check(struct pt_regs *regs) +{ + struct lowcore *lc = get_lowcore(); + irqentry_state_t state; + unsigned int trapnr; + union teid teid; + + teid.val = lc->trans_exc_code; + regs->int_code = lc->pgm_int_code; + regs->int_parm_long = teid.val; + /* + * In case of a guest fault, short-circuit the fault handler and return. + * This way the sie64a() function will return 0; fault address and + * other relevant bits are saved in current->thread.gmap_teid, and + * the fault number in current->thread.gmap_int_code. KVM will be + * able to use this information to handle the fault. + */ + if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) { + current->thread.gmap_teid.val = regs->int_parm_long; + current->thread.gmap_int_code = regs->int_code & 0xffff; + return; + } + state = irqentry_enter(regs); + if (user_mode(regs)) { + update_timer_sys(); + if (!cpu_has_bear()) { + if (regs->last_break < 4096) + regs->last_break = 1; + } + current->thread.last_break = regs->last_break; + } + if (lc->pgm_code & 0x0200) { + /* transaction abort */ + current->thread.trap_tdb = lc->pgm_tdb; + } + if (lc->pgm_code & PGM_INT_CODE_PER) { + if (user_mode(regs)) { + struct per_event *ev = ¤t->thread.per_event; + + set_thread_flag(TIF_PER_TRAP); + ev->address = lc->per_address; + ev->cause = lc->per_code_combined; + ev->paid = lc->per_access_id; + } else { + /* PER event in kernel is kprobes */ + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + do_per_trap(regs); + goto out; + } + } + if (!irqs_disabled_flags(regs->psw.mask)) + trace_hardirqs_on(); + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + trapnr = regs->int_code & PGM_INT_CODE_MASK; + if (trapnr) + pgm_check_table[trapnr](regs); +out: + local_irq_disable(); + irqentry_exit(regs, state); } + +/* + * The program check table contains exactly 128 (0x00-0x7f) entries. Each + * line defines the function to be called corresponding to the program check + * interruption code. + */ +static void (*pgm_check_table[128])(struct pt_regs *regs) = { + [0x00] = default_trap_handler, + [0x01] = illegal_op, + [0x02] = privileged_op, + [0x03] = execute_exception, + [0x04] = do_protection_exception, + [0x05] = addressing_exception, + [0x06] = specification_exception, + [0x07] = data_exception, + [0x08] = overflow_exception, + [0x09] = divide_exception, + [0x0a] = overflow_exception, + [0x0b] = divide_exception, + [0x0c] = hfp_overflow_exception, + [0x0d] = hfp_underflow_exception, + [0x0e] = hfp_significance_exception, + [0x0f] = hfp_divide_exception, + [0x10] = do_dat_exception, + [0x11] = do_dat_exception, + [0x12] = translation_specification_exception, + [0x13] = special_op_exception, + [0x14] = default_trap_handler, + [0x15] = operand_exception, + [0x16] = default_trap_handler, + [0x17] = default_trap_handler, + [0x18] = transaction_exception, + [0x19] = default_trap_handler, + [0x1a] = default_trap_handler, + [0x1b] = vector_exception, + [0x1c] = space_switch_exception, + [0x1d] = hfp_sqrt_exception, + [0x1e ... 0x37] = default_trap_handler, + [0x38] = do_dat_exception, + [0x39] = do_dat_exception, + [0x3a] = do_dat_exception, + [0x3b] = do_dat_exception, + [0x3c] = default_trap_handler, + [0x3d] = do_secure_storage_access, + [0x3e] = default_trap_handler, + [0x3f] = default_trap_handler, + [0x40] = monitor_event_exception, + [0x41 ... 0x7f] = default_trap_handler, +}; + +#define COND_TRAP(x) asm( \ + ".weak " __stringify(x) "\n\t" \ + ".set " __stringify(x) "," \ + __stringify(default_trap_handler)) + +COND_TRAP(do_secure_storage_access); diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c new file mode 100644 index 000000000000..0f88caca4eaf --- /dev/null +++ b/arch/s390/kernel/unwind_bc.c @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/interrupt.h> +#include <asm/sections.h> +#include <asm/ptrace.h> +#include <asm/bitops.h> +#include <asm/stacktrace.h> +#include <asm/unwind.h> + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + return __kernel_text_address(state->ip) ? state->ip : 0; +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +static bool outside_of_stack(struct unwind_state *state, unsigned long sp) +{ + return (sp <= state->sp) || + (sp > state->stack_info.end - sizeof(struct stack_frame)); +} + +static bool update_stack_info(struct unwind_state *state, unsigned long sp) +{ + struct stack_info *info = &state->stack_info; + unsigned long *mask = &state->stack_mask; + + /* New stack pointer leaves the current stack */ + if (get_stack_info(sp, state->task, info, mask) != 0 || + !on_stack(info, sp, sizeof(struct stack_frame))) + /* 'sp' does not point to a valid stack */ + return false; + return true; +} + +static inline bool is_final_pt_regs(struct unwind_state *state, + struct pt_regs *regs) +{ + /* user mode or kernel thread pt_regs at the bottom of task stack */ + if (task_pt_regs(state->task) == regs) + return true; + + /* user mode pt_regs at the bottom of irq stack */ + return state->stack_info.type == STACK_TYPE_IRQ && + state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs && + READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE; +} + +/* Avoid KMSAN false positives from touching uninitialized frames. */ +__no_kmsan_checks +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + struct stack_frame *sf; + struct pt_regs *regs; + unsigned long sp, ip; + bool reliable; + + regs = state->regs; + if (unlikely(regs)) { + sp = state->sp; + sf = (struct stack_frame *) sp; + ip = READ_ONCE_NOCHECK(sf->gprs[8]); + reliable = false; + regs = NULL; + /* skip bogus %r14 or if is the same as regs->psw.addr */ + if (!__kernel_text_address(ip) || state->ip == unwind_recover_ret_addr(state, ip)) { + state->regs = NULL; + return unwind_next_frame(state); + } + } else { + sf = (struct stack_frame *) state->sp; + sp = READ_ONCE_NOCHECK(sf->back_chain); + if (likely(sp)) { + /* Non-zero back-chain points to the previous frame */ + if (unlikely(outside_of_stack(state, sp))) { + if (!update_stack_info(state, sp)) + goto out_err; + } + sf = (struct stack_frame *) sp; + ip = READ_ONCE_NOCHECK(sf->gprs[8]); + reliable = true; + } else { + /* No back-chain, look for a pt_regs structure */ + sp = state->sp + STACK_FRAME_OVERHEAD; + if (!on_stack(info, sp, sizeof(struct pt_regs))) + goto out_err; + regs = (struct pt_regs *) sp; + if (is_final_pt_regs(state, regs)) + goto out_stop; + ip = READ_ONCE_NOCHECK(regs->psw.addr); + sp = READ_ONCE_NOCHECK(regs->gprs[15]); + if (unlikely(outside_of_stack(state, sp))) { + if (!update_stack_info(state, sp)) + goto out_err; + } + reliable = true; + } + } + + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (sp & 0x7) + goto out_err; + + /* Update unwind state */ + state->sp = sp; + state->regs = regs; + state->reliable = reliable; + state->ip = unwind_recover_ret_addr(state, ip); + return true; + +out_err: + state->error = true; +out_stop: + state->stack_info.type = STACK_TYPE_UNKNOWN; + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); + +/* Avoid KMSAN false positives from touching uninitialized frames. */ +__no_kmsan_checks +void __unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long first_frame) +{ + struct stack_info *info = &state->stack_info; + struct stack_frame *sf; + unsigned long ip, sp; + + memset(state, 0, sizeof(*state)); + state->task = task; + state->regs = regs; + + /* Don't even attempt to start from user mode regs: */ + if (regs && user_mode(regs)) { + info->type = STACK_TYPE_UNKNOWN; + return; + } + + /* Get the instruction pointer from pt_regs or the stack frame */ + if (regs) { + ip = regs->psw.addr; + sp = regs->gprs[15]; + } else if (task == current) { + sp = current_frame_address(); + } else { + sp = task->thread.ksp; + } + + /* Get current stack pointer and initialize stack info */ + if (!update_stack_info(state, sp)) { + /* Something is wrong with the stack pointer */ + info->type = STACK_TYPE_UNKNOWN; + state->error = true; + return; + } + + if (!regs) { + /* Stack frame is within valid stack */ + sf = (struct stack_frame *)sp; + ip = READ_ONCE_NOCHECK(sf->gprs[8]); + } + + /* Update unwind state */ + state->sp = sp; + state->reliable = true; + state->ip = unwind_recover_ret_addr(state, ip); + + if (!first_frame) + return; + /* Skip through the call chain to the specified starting frame */ + while (!unwind_done(state)) { + if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) { + if (state->sp >= first_frame) + break; + } + unwind_next_frame(state); + } +} +EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c new file mode 100644 index 000000000000..c624f3361e43 --- /dev/null +++ b/arch/s390/kernel/uprobes.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * User-space Probes (UProbes) for s390 + * + * Copyright IBM Corp. 2014 + * Author(s): Jan Willeke, + */ + +#include <linux/uaccess.h> +#include <linux/uprobes.h> +#include <linux/kdebug.h> +#include <linux/sched/task_stack.h> + +#include <asm/facility.h> +#include <asm/kprobes.h> +#include <asm/dis.h> +#include "entry.h" + +#define UPROBE_TRAP_NR UINT_MAX + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + return probe_is_prohibited_opcode(auprobe->insn); +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) + return -EINVAL; + if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) + return -EINVAL; + clear_thread_flag(TIF_PER_TRAP); + auprobe->saved_per = psw_bits(regs->psw).per; + auprobe->saved_int_code = regs->int_code; + regs->int_code = UPROBE_TRAP_NR; + regs->psw.addr = current->utask->xol_vaddr; + set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + return 0; +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) +{ + struct pt_regs *regs = task_pt_regs(tsk); + + if (regs->int_code != UPROBE_TRAP_NR) + return true; + return false; +} + +static int check_per_event(unsigned short cause, unsigned long control, + struct pt_regs *regs) +{ + if (!(regs->psw.mask & PSW_MASK_PER)) + return 0; + /* user space single step */ + if (control == 0) + return 1; + /* over indication for storage alteration */ + if ((control & 0x20200000) && (cause & 0x2000)) + return 1; + if (cause & 0x8000) { + /* all branches */ + if ((control & 0x80800000) == 0x80000000) + return 1; + /* branch into selected range */ + if (((control & 0x80800000) == 0x80800000) && + regs->psw.addr >= current->thread.per_user.start && + regs->psw.addr <= current->thread.per_user.end) + return 1; + } + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + int fixup = probe_get_fixup_type(auprobe->insn); + struct uprobe_task *utask = current->utask; + + clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + psw_bits(regs->psw).per = auprobe->saved_per; + regs->int_code = auprobe->saved_int_code; + + if (fixup & FIXUP_PSW_NORMAL) + regs->psw.addr += utask->vaddr - utask->xol_vaddr; + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (auprobe->insn[0] & 0xf0) >> 4; + + regs->gprs[reg] += utask->vaddr - utask->xol_vaddr; + } + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = insn_length(auprobe->insn[0] >> 8); + + if (regs->psw.addr - utask->xol_vaddr == ilen) + regs->psw.addr = utask->vaddr + ilen; + } + if (check_per_event(current->thread.per_event.cause, + current->thread.per_user.control, regs)) { + /* fix per address */ + current->thread.per_event.address = utask->vaddr; + /* trigger per event */ + set_thread_flag(TIF_PER_TRAP); + } + return 0; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + if (!user_mode(regs)) + return NOTIFY_DONE; + if (regs->int_code & 0x200) /* Trap during transaction */ + return NOTIFY_DONE; + switch (val) { + case DIE_BPT: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_SSTEP: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + default: + break; + } + return NOTIFY_DONE; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + clear_thread_flag(TIF_UPROBE_SINGLESTEP); + regs->int_code = auprobe->saved_int_code; + regs->psw.addr = current->utask->vaddr; + current->thread.per_event.address = current->utask->vaddr; +} + +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, + struct pt_regs *regs) +{ + unsigned long orig; + + orig = regs->gprs[14]; + regs->gprs[14] = trampoline; + return orig; +} + +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) +{ + if (ctx == RP_CHECK_CHAIN_CALL) + return user_stack_pointer(regs) <= ret->stack; + else + return user_stack_pointer(regs) < ret->stack; +} + +/* Instruction Emulation */ + +#define EMU_ILLEGAL_OP 1 +#define EMU_SPECIFICATION 2 +#define EMU_ADDRESSING 3 + +#define emu_load_ril(ptr, output) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else \ + *(output) = input; \ + __rc; \ +}) + +#define emu_store_ril(regs, ptr, input) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(ptr) __ptr = (ptr); \ + int __rc = 0; \ + \ + if ((u64 __force)__ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (put_user(*(input), __ptr)) \ + __rc = EMU_ADDRESSING; \ + if (__rc == 0) \ + sim_stor_event(regs, \ + (void __force *)__ptr, \ + mask + 1); \ + __rc; \ +}) + +#define emu_cmp_ril(regs, ptr, cmp) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else if (input > *(cmp)) \ + psw_bits((regs)->psw).cc = 1; \ + else if (input < *(cmp)) \ + psw_bits((regs)->psw).cc = 2; \ + else \ + psw_bits((regs)->psw).cc = 0; \ + __rc; \ +}) + +struct insn_ril { + u8 opc0; + u8 reg : 4; + u8 opc1 : 4; + s32 disp; +} __packed; + +union split_register { + u64 u64; + u32 u32[2]; + u16 u16[4]; + s64 s64; + s32 s32[2]; + s16 s16[4]; +}; + +/* + * If user per registers are setup to trace storage alterations and an + * emulated store took place on a fitting address a user trap is generated. + */ +static void sim_stor_event(struct pt_regs *regs, void *addr, int len) +{ + if (!(regs->psw.mask & PSW_MASK_PER)) + return; + if (!(current->thread.per_user.control & PER_EVENT_STORE)) + return; + if ((void *)current->thread.per_user.start > (addr + len)) + return; + if ((void *)current->thread.per_user.end < addr) + return; + current->thread.per_event.address = regs->psw.addr; + current->thread.per_event.cause = PER_EVENT_STORE >> 16; + set_thread_flag(TIF_PER_TRAP); +} + +/* + * pc relative instructions are emulated, since parameters may not be + * accessible from the xol area due to range limitations. + */ +static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + union split_register *rx; + struct insn_ril *insn; + unsigned int ilen; + void *uptr; + int rc = 0; + + insn = (struct insn_ril *) &auprobe->insn; + rx = (union split_register *) ®s->gprs[insn->reg]; + uptr = (void *)(regs->psw.addr + (insn->disp * 2)); + ilen = insn_length(insn->opc0); + + switch (insn->opc0) { + case 0xc0: + switch (insn->opc1) { + case 0x00: /* larl */ + rx->u64 = (unsigned long)uptr; + break; + } + break; + case 0xc4: + switch (insn->opc1) { + case 0x02: /* llhrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]); + break; + case 0x04: /* lghrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u64); + break; + case 0x05: /* lhrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]); + break; + case 0x06: /* llghrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u64); + break; + case 0x08: /* lgrl */ + rc = emu_load_ril((u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* lgfrl */ + rc = emu_load_ril((s32 __user *)uptr, &rx->u64); + break; + case 0x0d: /* lrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]); + break; + case 0x0e: /* llgfrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u64); + break; + case 0x07: /* sthrl */ + rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]); + break; + case 0x0b: /* stgrl */ + rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64); + break; + case 0x0f: /* strl */ + rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + case 0xc6: + switch (insn->opc1) { + case 0x04: /* cghrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64); + break; + case 0x05: /* chrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]); + break; + case 0x06: /* clghrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64); + break; + case 0x07: /* clhrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]); + break; + case 0x08: /* cgrl */ + rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64); + break; + case 0x0a: /* clgrl */ + rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* cgfrl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64); + break; + case 0x0d: /* crl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]); + break; + case 0x0e: /* clgfrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64); + break; + case 0x0f: /* clrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + } + regs->psw.addr = __forward_psw(regs->psw, ilen); + switch (rc) { + case EMU_ILLEGAL_OP: + regs->int_code = ilen << 16 | 0x0001; + do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL); + break; + case EMU_SPECIFICATION: + regs->int_code = ilen << 16 | 0x0006; + do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL); + break; + case EMU_ADDRESSING: + regs->int_code = ilen << 16 | 0x0005; + do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL); + break; + } +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) || + (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)) { + regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); + do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); + return true; + } + if (probe_is_insn_relative_long(auprobe->insn)) { + handle_insn_ril(auprobe, regs); + return true; + } + return false; +} diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c new file mode 100644 index 000000000000..ed46950be86f --- /dev/null +++ b/arch/s390/kernel/uv.c @@ -0,0 +1,947 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Ultravisor functions and initialization + * + * Copyright IBM Corp. 2019, 2024 + */ +#define pr_fmt(fmt) "prot_virt: " fmt + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/sizes.h> +#include <linux/bitmap.h> +#include <linux/memblock.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/pagewalk.h> +#include <linux/backing-dev.h> +#include <asm/facility.h> +#include <asm/sections.h> +#include <asm/uv.h> + +/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ +int __bootdata_preserved(prot_virt_guest); +EXPORT_SYMBOL(prot_virt_guest); + +/* + * uv_info contains both host and guest information but it's currently only + * expected to be used within modules if it's the KVM module or for + * any PV guest module. + * + * The kernel itself will write these values once in uv_query_info() + * and then make some of them readable via a sysfs interface. + */ +struct uv_info __bootdata_preserved(uv_info); +EXPORT_SYMBOL(uv_info); + +int __bootdata_preserved(prot_virt_host); +EXPORT_SYMBOL(prot_virt_host); + +static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len) +{ + struct uv_cb_init uvcb = { + .header.cmd = UVC_CMD_INIT_UV, + .header.len = sizeof(uvcb), + .stor_origin = stor_base, + .stor_len = stor_len, + }; + + if (uv_call(0, (uint64_t)&uvcb)) { + pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n", + uvcb.header.rc, uvcb.header.rrc); + return -1; + } + return 0; +} + +void __init setup_uv(void) +{ + void *uv_stor_base; + + if (!is_prot_virt_host()) + return; + + uv_stor_base = memblock_alloc_try_nid( + uv_info.uv_base_stor_len, SZ_1M, SZ_2G, + MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); + if (!uv_stor_base) { + pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n", + uv_info.uv_base_stor_len); + goto fail; + } + + if (uv_init(__pa(uv_stor_base), uv_info.uv_base_stor_len)) { + memblock_free(uv_stor_base, uv_info.uv_base_stor_len); + goto fail; + } + + pr_info("Reserving %luMB as ultravisor base storage\n", + uv_info.uv_base_stor_len >> 20); + return; +fail: + pr_info("Disabling support for protected virtualization"); + prot_virt_host = 0; +} + +/* + * Requests the Ultravisor to pin the page in the shared state. This will + * cause an intercept when the guest attempts to unshare the pinned page. + */ +int uv_pin_shared(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_PIN_PAGE_SHARED, + .header.len = sizeof(uvcb), + .paddr = paddr, + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(uv_pin_shared); + +/* + * Requests the Ultravisor to destroy a guest page and make it + * accessible to the host. The destroy clears the page instead of + * exporting. + * + * @paddr: Absolute host address of page to be destroyed + */ +static int uv_destroy(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_DESTR_SEC_STOR, + .header.len = sizeof(uvcb), + .paddr = paddr + }; + + if (uv_call(0, (u64)&uvcb)) { + /* + * Older firmware uses 107/d as an indication of a non secure + * page. Let us emulate the newer variant (no-op). + */ + if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd) + return 0; + return -EINVAL; + } + return 0; +} + +/* + * The caller must already hold a reference to the folio + */ +int uv_destroy_folio(struct folio *folio) +{ + int rc; + + /* Large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) + return 0; + + folio_get(folio); + rc = uv_destroy(folio_to_phys(folio)); + if (!rc) + clear_bit(PG_arch_1, &folio->flags.f); + folio_put(folio); + return rc; +} +EXPORT_SYMBOL(uv_destroy_folio); + +/* + * The present PTE still indirectly holds a folio reference through the mapping. + */ +int uv_destroy_pte(pte_t pte) +{ + VM_WARN_ON(!pte_present(pte)); + return uv_destroy_folio(pfn_folio(pte_pfn(pte))); +} + +/* + * Requests the Ultravisor to encrypt a guest page and make it + * accessible to the host for paging (export). + * + * @paddr: Absolute host address of page to be exported + */ +int uv_convert_from_secure(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, + .header.len = sizeof(uvcb), + .paddr = paddr + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(uv_convert_from_secure); + +/* + * The caller must already hold a reference to the folio. + */ +int uv_convert_from_secure_folio(struct folio *folio) +{ + int rc; + + /* Large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) + return 0; + + folio_get(folio); + rc = uv_convert_from_secure(folio_to_phys(folio)); + if (!rc) + clear_bit(PG_arch_1, &folio->flags.f); + folio_put(folio); + return rc; +} +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); + +/* + * The present PTE still indirectly holds a folio reference through the mapping. + */ +int uv_convert_from_secure_pte(pte_t pte) +{ + VM_WARN_ON(!pte_present(pte)); + return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); +} + +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + +/* + * Calculate the expected ref_count for a folio that would otherwise have no + * further pins. This was cribbed from similar functions in other places in + * the kernel, but with some slight modifications. We know that a secure + * folio can not be a large folio, for example. + */ +static int expected_folio_refs(struct folio *folio) +{ + int res; + + res = folio_mapcount(folio); + if (folio_test_swapcache(folio)) { + res++; + } else if (folio_mapping(folio)) { + res++; + if (folio->private) + res++; + } + return res; +} + +/** + * __make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()), and the folio must be + * locked. + */ +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +{ + int expected, cc = 0; + + if (folio_test_writeback(folio)) + return -EBUSY; + expected = expected_folio_refs(folio) + 1; + if (!folio_ref_freeze(folio, expected)) + return -EBUSY; + set_bit(PG_arch_1, &folio->flags.f); + /* + * If the UVC does not succeed or fail immediately, we don't want to + * loop for long, or we might get stall notifications. + * On the other hand, this is a complex scenario and we are holding a lot of + * locks, so we can't easily sleep and reschedule. We try only once, + * and if the UVC returned busy or partial completion, we return + * -EAGAIN and we let the callers deal with it. + */ + cc = __uv_call(0, (u64)uvcb); + folio_ref_unfreeze(folio, expected); + /* + * Return -ENXIO if the folio was not mapped, -EINVAL for other errors. + * If busy or partially completed, return -EAGAIN. + */ + if (cc == UVC_CC_OK) + return 0; + else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL) + return -EAGAIN; + return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; +} + +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) +{ + int rc; + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; +} + +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. + * @mm: the mm containing the folio to work on + * @folio: the folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). + */ +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) +{ + int rc, tried_splits; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + + folio_lock(folio); + rc = split_folio(folio); + if (rc != -EBUSY) { + folio_unlock(folio); + return rc; + } + + /* + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. + */ + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; + } + + /* + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. + */ + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); + folio_unlock(folio); + + if (unlikely(!inode)) + break; + + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); + } + return -EAGAIN; +} + +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) +{ + struct vm_area_struct *vma; + struct folio_walk fw; + struct folio *folio; + int rc; + + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } + + folio_get(folio); + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); + folio_walk_end(&fw, vma); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } + folio_put(folio); + + return rc; +} +EXPORT_SYMBOL_GPL(make_hva_secure); + +/* + * To be called with the folio locked or with an extra reference! This will + * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. + * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will + * become a no-op if the folio is already exported. + */ +int arch_make_folio_accessible(struct folio *folio) +{ + int rc = 0; + + /* Large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) + return 0; + + /* + * PG_arch_1 is used in 2 places: + * 1. for storage keys of hugetlb folios and KVM + * 2. As an indication that this small folio might be secure. This can + * overindicate, e.g. we set the bit before calling + * convert_to_secure. + * As secure pages are never large folios, both variants can co-exists. + */ + if (!test_bit(PG_arch_1, &folio->flags.f)) + return 0; + + rc = uv_pin_shared(folio_to_phys(folio)); + if (!rc) { + clear_bit(PG_arch_1, &folio->flags.f); + return 0; + } + + rc = uv_convert_from_secure(folio_to_phys(folio)); + if (!rc) { + clear_bit(PG_arch_1, &folio->flags.f); + return 0; + } + + return rc; +} +EXPORT_SYMBOL_GPL(arch_make_folio_accessible); + +static ssize_t uv_query_facilities(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n", + uv_info.inst_calls_list[0], + uv_info.inst_calls_list[1], + uv_info.inst_calls_list[2], + uv_info.inst_calls_list[3]); +} + +static struct kobj_attribute uv_query_facilities_attr = + __ATTR(facilities, 0444, uv_query_facilities, NULL); + +static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver); +} + +static struct kobj_attribute uv_query_supp_se_hdr_ver_attr = + __ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL); + +static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf); +} + +static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr = + __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL); + +static ssize_t uv_query_dump_cpu_len(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len); +} + +static struct kobj_attribute uv_query_dump_cpu_len_attr = + __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL); + +static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len); +} + +static struct kobj_attribute uv_query_dump_storage_state_len_attr = + __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL); + +static ssize_t uv_query_dump_finalize_len(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len); +} + +static struct kobj_attribute uv_query_dump_finalize_len_attr = + __ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL); + +static ssize_t uv_query_feature_indications(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications); +} + +static struct kobj_attribute uv_query_feature_indications_attr = + __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL); + +static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1); +} + +static struct kobj_attribute uv_query_max_guest_cpus_attr = + __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL); + +static ssize_t uv_query_max_guest_vms(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf); +} + +static struct kobj_attribute uv_query_max_guest_vms_attr = + __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL); + +static ssize_t uv_query_max_guest_addr(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr); +} + +static struct kobj_attribute uv_query_max_guest_addr_attr = + __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL); + +static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver); +} + +static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr = + __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL); + +static ssize_t uv_query_supp_att_pflags(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags); +} + +static struct kobj_attribute uv_query_supp_att_pflags_attr = + __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL); + +static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver); +} + +static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr = + __ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL); + +static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf); +} + +static struct kobj_attribute uv_query_supp_add_secret_pcf_attr = + __ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL); + +static ssize_t uv_query_supp_secret_types(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types); +} + +static struct kobj_attribute uv_query_supp_secret_types_attr = + __ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL); + +static ssize_t uv_query_max_secrets(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", + uv_info.max_assoc_secrets + uv_info.max_retr_secrets); +} + +static struct kobj_attribute uv_query_max_secrets_attr = + __ATTR(max_secrets, 0444, uv_query_max_secrets, NULL); + +static ssize_t uv_query_max_retr_secrets(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", uv_info.max_retr_secrets); +} + +static struct kobj_attribute uv_query_max_retr_secrets_attr = + __ATTR(max_retr_secrets, 0444, uv_query_max_retr_secrets, NULL); + +static ssize_t uv_query_max_assoc_secrets(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%d\n", uv_info.max_assoc_secrets); +} + +static struct kobj_attribute uv_query_max_assoc_secrets_attr = + __ATTR(max_assoc_secrets, 0444, uv_query_max_assoc_secrets, NULL); + +static struct attribute *uv_query_attrs[] = { + &uv_query_facilities_attr.attr, + &uv_query_feature_indications_attr.attr, + &uv_query_max_guest_cpus_attr.attr, + &uv_query_max_guest_vms_attr.attr, + &uv_query_max_guest_addr_attr.attr, + &uv_query_supp_se_hdr_ver_attr.attr, + &uv_query_supp_se_hdr_pcf_attr.attr, + &uv_query_dump_storage_state_len_attr.attr, + &uv_query_dump_finalize_len_attr.attr, + &uv_query_dump_cpu_len_attr.attr, + &uv_query_supp_att_req_hdr_ver_attr.attr, + &uv_query_supp_att_pflags_attr.attr, + &uv_query_supp_add_secret_req_ver_attr.attr, + &uv_query_supp_add_secret_pcf_attr.attr, + &uv_query_supp_secret_types_attr.attr, + &uv_query_max_secrets_attr.attr, + &uv_query_max_assoc_secrets_attr.attr, + &uv_query_max_retr_secrets_attr.attr, + NULL, +}; + +static inline struct uv_cb_query_keys uv_query_keys(void) +{ + struct uv_cb_query_keys uvcb = { + .header.cmd = UVC_CMD_QUERY_KEYS, + .header.len = sizeof(uvcb) + }; + + uv_call(0, (uint64_t)&uvcb); + return uvcb; +} + +static inline ssize_t emit_hash(struct uv_key_hash *hash, char *buf, int at) +{ + return sysfs_emit_at(buf, at, "%016llx%016llx%016llx%016llx\n", + hash->dword[0], hash->dword[1], hash->dword[2], hash->dword[3]); +} + +static ssize_t uv_keys_host_key(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uv_cb_query_keys uvcb = uv_query_keys(); + + return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_HK], buf, 0); +} + +static struct kobj_attribute uv_keys_host_key_attr = + __ATTR(host_key, 0444, uv_keys_host_key, NULL); + +static ssize_t uv_keys_backup_host_key(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uv_cb_query_keys uvcb = uv_query_keys(); + + return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_BACK_HK], buf, 0); +} + +static struct kobj_attribute uv_keys_backup_host_key_attr = + __ATTR(backup_host_key, 0444, uv_keys_backup_host_key, NULL); + +static ssize_t uv_keys_all(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uv_cb_query_keys uvcb = uv_query_keys(); + ssize_t len = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(uvcb.key_hashes); i++) + len += emit_hash(uvcb.key_hashes + i, buf, len); + + return len; +} + +static struct kobj_attribute uv_keys_all_attr = + __ATTR(all, 0444, uv_keys_all, NULL); + +static struct attribute_group uv_query_attr_group = { + .attrs = uv_query_attrs, +}; + +static struct attribute *uv_keys_attrs[] = { + &uv_keys_host_key_attr.attr, + &uv_keys_backup_host_key_attr.attr, + &uv_keys_all_attr.attr, + NULL, +}; + +static struct attribute_group uv_keys_attr_group = { + .attrs = uv_keys_attrs, +}; + +static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", prot_virt_guest); +} + +static ssize_t uv_is_prot_virt_host(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", prot_virt_host); +} + +static struct kobj_attribute uv_prot_virt_guest = + __ATTR(prot_virt_guest, 0444, uv_is_prot_virt_guest, NULL); + +static struct kobj_attribute uv_prot_virt_host = + __ATTR(prot_virt_host, 0444, uv_is_prot_virt_host, NULL); + +static const struct attribute *uv_prot_virt_attrs[] = { + &uv_prot_virt_guest.attr, + &uv_prot_virt_host.attr, + NULL, +}; + +static struct kset *uv_query_kset; +static struct kset *uv_keys_kset; +static struct kobject *uv_kobj; + +static int __init uv_sysfs_dir_init(const struct attribute_group *grp, + struct kset **uv_dir_kset, const char *name) +{ + struct kset *kset; + int rc; + + kset = kset_create_and_add(name, NULL, uv_kobj); + if (!kset) + return -ENOMEM; + *uv_dir_kset = kset; + + rc = sysfs_create_group(&kset->kobj, grp); + if (rc) + kset_unregister(kset); + return rc; +} + +static int __init uv_sysfs_init(void) +{ + int rc = -ENOMEM; + + if (!test_facility(158)) + return 0; + + uv_kobj = kobject_create_and_add("uv", firmware_kobj); + if (!uv_kobj) + return -ENOMEM; + + rc = sysfs_create_files(uv_kobj, uv_prot_virt_attrs); + if (rc) + goto out_kobj; + + rc = uv_sysfs_dir_init(&uv_query_attr_group, &uv_query_kset, "query"); + if (rc) + goto out_ind_files; + + /* Get installed key hashes if available, ignore any errors */ + if (test_bit_inv(BIT_UVC_CMD_QUERY_KEYS, uv_info.inst_calls_list)) + uv_sysfs_dir_init(&uv_keys_attr_group, &uv_keys_kset, "keys"); + + return 0; + +out_ind_files: + sysfs_remove_files(uv_kobj, uv_prot_virt_attrs); +out_kobj: + kobject_del(uv_kobj); + kobject_put(uv_kobj); + return rc; +} +device_initcall(uv_sysfs_init); + +/* + * Locate a secret in the list by its id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Search for a secret with the given secret_id in the Ultravisor secret store. + * + * Context: might sleep. + */ +static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], + const struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) +{ + u16 i; + + for (i = 0; i < list->total_num_secrets; i++) { + if (memcmp(secret_id, list->secrets[i].id, UV_SECRET_ID_LEN) == 0) { + *secret = list->secrets[i].hdr; + return 0; + } + } + return -ENOENT; +} + +/** + * uv_find_secret() - search secret metadata for a given secret id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Context: might sleep. + */ +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) +{ + u16 start_idx = 0; + u16 list_rc; + int ret; + + do { + uv_list_secrets(list, start_idx, &list_rc, NULL); + if (list_rc != UVC_RC_EXECUTED && list_rc != UVC_RC_MORE_DATA) { + if (list_rc == UVC_RC_INV_CMD) + return -ENODEV; + else + return -EIO; + } + ret = find_secret_in_page(secret_id, list, secret); + if (ret == 0) + return ret; + start_idx = list->next_secret_idx; + } while (list_rc == UVC_RC_MORE_DATA && start_idx < list->next_secret_idx); + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(uv_find_secret); + +/** + * uv_retrieve_secret() - get the secret value for the secret index. + * @secret_idx: Secret index for which the secret should be retrieved. + * @buf: Buffer to store retrieved secret. + * @buf_size: Size of the buffer. The correct buffer size is reported as part of + * the result from `uv_get_secret_metadata`. + * + * Calls the Retrieve Secret UVC and translates the UV return code into an errno. + * + * Context: might sleep. + * + * Return: + * * %0 - Entry found; buffer contains a valid secret. + * * %ENOENT: - No entry found or secret at the index is non-retrievable. + * * %ENODEV: - Not supported: UV not available or command not available. + * * %EINVAL: - Buffer too small for content. + * * %EIO: - Other unexpected UV error. + */ +int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size) +{ + struct uv_cb_retr_secr uvcb = { + .header.len = sizeof(uvcb), + .header.cmd = UVC_CMD_RETR_SECRET, + .secret_idx = secret_idx, + .buf_addr = (u64)buf, + .buf_size = buf_size, + }; + + uv_call_sched(0, (u64)&uvcb); + + switch (uvcb.header.rc) { + case UVC_RC_EXECUTED: + return 0; + case UVC_RC_INV_CMD: + return -ENODEV; + case UVC_RC_RETR_SECR_STORE_EMPTY: + case UVC_RC_RETR_SECR_INV_SECRET: + case UVC_RC_RETR_SECR_INV_IDX: + return -ENOENT; + case UVC_RC_RETR_SECR_BUF_SMALL: + return -EINVAL; + default: + return -EIO; + } +} +EXPORT_SYMBOL_GPL(uv_retrieve_secret); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d7776281cb60..a27a90a199be 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -1,334 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 /* * vdso setup for s390 * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ -#include <linux/module.h> +#include <linux/binfmts.h> +#include <linux/elf.h> #include <linux/errno.h> -#include <linux/sched.h> +#include <linux/init.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/stddef.h> -#include <linux/unistd.h> #include <linux/slab.h> -#include <linux/user.h> -#include <linux/elf.h> -#include <linux/security.h> -#include <linux/bootmem.h> -#include <linux/compat.h> -#include <asm/asm-offsets.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/mmu.h> -#include <asm/mmu_context.h> -#include <asm/sections.h> +#include <linux/smp.h> +#include <linux/random.h> +#include <linux/vdso_datastore.h> +#include <vdso/datapage.h> +#include <asm/vdso/vsyscall.h> +#include <asm/alternative.h> #include <asm/vdso.h> -#include <asm/facility.h> - -#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) -extern char vdso32_start, vdso32_end; -static void *vdso32_kbase = &vdso32_start; -static unsigned int vdso32_pages; -static struct page **vdso32_pagelist; -#endif - -#ifdef CONFIG_64BIT -extern char vdso64_start, vdso64_end; -static void *vdso64_kbase = &vdso64_start; -static unsigned int vdso64_pages; -static struct page **vdso64_pagelist; -#endif /* CONFIG_64BIT */ -/* - * Should the kernel map a VDSO page into processes and pass its - * address down to glibc upon exec()? - */ -unsigned int __read_mostly vdso_enabled = 1; +extern char vdso_start[], vdso_end[]; -static int __init vdso_setup(char *s) +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *vma) { - unsigned long val; - int rc; - - rc = 0; - if (strncmp(s, "on", 3) == 0) - vdso_enabled = 1; - else if (strncmp(s, "off", 4) == 0) - vdso_enabled = 0; - else { - rc = strict_strtoul(s, 0, &val); - vdso_enabled = rc ? 0 : !!val; - } - return !rc; + current->mm->context.vdso_base = vma->vm_start; + return 0; } -__setup("vdso=", vdso_setup); -/* - * The vdso data page - */ -static union { - struct vdso_data data; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +static struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .mremap = vdso_mremap, +}; -/* - * Setup vdso data page. - */ -static void vdso_init_data(struct vdso_data *vd) +int vdso_getcpu_init(void) { - vd->ectg_available = - s390_user_mode != HOME_SPACE_MODE && test_facility(31); + set_tod_programmable_field(smp_processor_id()); + return 0; } +early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ -#ifdef CONFIG_64BIT -/* - * Allocate/free per cpu vdso data. - */ -#define SEGMENT_ORDER 2 - -int vdso_alloc_per_cpu(struct _lowcore *lowcore) +static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) { - unsigned long segment_table, page_table, page_frame; - u32 *psal, *aste; - int i; - - lowcore->vdso_per_cpu_data = __LC_PASTE; + unsigned long vvar_start, vdso_text_start, vdso_text_len; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; - if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled) - return 0; + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); + if (mmap_write_lock_killable(mm)) + return -EINTR; - segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); - page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); - page_frame = get_zeroed_page(GFP_KERNEL); - if (!segment_table || !page_table || !page_frame) + vdso_text_len = vdso_end - vdso_start; + vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0); + rc = vvar_start; + if (IS_ERR_VALUE(vvar_start)) goto out; - - clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, - PAGE_SIZE << SEGMENT_ORDER); - clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, - 256*sizeof(unsigned long)); - - *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_RO + page_frame; - - psal = (u32 *) (page_table + 256*sizeof(unsigned long)); - aste = psal + 32; - - for (i = 4; i < 32; i += 4) - psal[i] = 0x80000000; - - lowcore->paste[4] = (u32)(addr_t) psal; - psal[0] = 0x20000000; - psal[2] = (u32)(addr_t) aste; - *(unsigned long *) (aste + 2) = segment_table + - _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; - aste[4] = (u32)(addr_t) psal; - lowcore->vdso_per_cpu_data = page_frame; - - return 0; - + vma = vdso_install_vvar_mapping(mm, vvar_start); + rc = PTR_ERR(vma); + if (IS_ERR(vma)) + goto out; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; + /* VM_MAYWRITE for COW so gdb can set breakpoints */ + vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_mapping); + if (IS_ERR(vma)) { + do_munmap(mm, vvar_start, PAGE_SIZE, NULL); + rc = PTR_ERR(vma); + } else { + current->mm->context.vdso_base = vdso_text_start; + rc = 0; + } out: - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); - return -ENOMEM; + mmap_write_unlock(mm); + return rc; } -void vdso_free_per_cpu(struct _lowcore *lowcore) +static unsigned long vdso_addr(unsigned long start, unsigned long len) { - unsigned long segment_table, page_table, page_frame; - u32 *psal, *aste; - - if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled) - return; + unsigned long addr, end, offset; - psal = (u32 *)(addr_t) lowcore->paste[4]; - aste = (u32 *)(addr_t) psal[2]; - segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; - page_table = *(unsigned long *) segment_table; - page_frame = *(unsigned long *) page_table; - - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; + if (end >= VDSO_BASE) + end = VDSO_BASE; + end -= len; + + if (end > start) { + offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } + return addr; } -static void vdso_init_cr5(void) +unsigned long vdso_text_size(void) { - unsigned long cr5; - - if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled) - return; - cr5 = offsetof(struct _lowcore, paste); - __ctl_load(cr5, 5, 5); + return PAGE_ALIGN(vdso_end - vdso_start); } -#endif /* CONFIG_64BIT */ -/* - * This is called from binfmt_elf, we create the special vma for the - * vDSO and insert it into the mm struct tree - */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +unsigned long vdso_size(void) { - struct mm_struct *mm = current->mm; - struct page **vdso_pagelist; - unsigned long vdso_pages; - unsigned long vdso_base; - int rc; - - if (!vdso_enabled) - return 0; - /* - * Only map the vdso for dynamically linked elf binaries. - */ - if (!uses_interp) - return 0; - -#ifdef CONFIG_64BIT - vdso_pagelist = vdso64_pagelist; - vdso_pages = vdso64_pages; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; - } -#endif -#else - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; -#endif - - /* - * vDSO has a problem and was disabled, just don't "enable" it for - * the process - */ - if (vdso_pages == 0) - return 0; - - current->mm->context.vdso_base = 0; - - /* - * pick a base address for the vDSO in process space. We try to put - * it at vdso_base which is the "natural" base for it, but we might - * fail and end up putting it elsewhere. - */ - down_write(&mm->mmap_sem); - vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - rc = vdso_base; - goto out_up; - } - - /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). - */ - current->mm->context.vdso_base = vdso_base; - - /* - * our vma flags don't have VM_WRITE so by default, the process - * isn't allowed to write those pages. - * gdb can break that with ptrace interface, and thus trigger COW - * on those pages but it's then your responsibility to never do that - * on the "data" page of the vDSO or you'll stop getting kernel - * updates and your nice userland gettimeofday will be totally dead. - * It's fine to use that for setting breakpoints in the vDSO code - * pages though. - */ - rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (rc) - current->mm->context.vdso_base = 0; -out_up: - up_write(&mm->mmap_sem); - return rc; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } -const char *arch_vma_name(struct vm_area_struct *vma) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) - return "[vdso]"; - return NULL; + unsigned long addr = VDSO_BASE; + unsigned long size = vdso_size(); + + if (current->flags & PF_RANDOMIZE) + addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size); + return map_vdso(addr, size); } -static int __init vdso_init(void) +static struct page ** __init vdso_setup_pages(void *start, void *end) { + int pages = (end - start) >> PAGE_SHIFT; + struct page **pagelist; int i; - if (!vdso_enabled) - return 0; - vdso_init_data(vdso_data); -#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) - /* Calculate the size of the 32 bit vDSO */ - vdso32_pages = ((&vdso32_end - &vdso32_start - + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; - - /* Make sure pages are in the correct state */ - vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1), - GFP_KERNEL); - BUG_ON(vdso32_pagelist == NULL); - for (i = 0; i < vdso32_pages - 1; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - vdso32_pagelist[i] = pg; - } - vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); - vdso32_pagelist[vdso32_pages] = NULL; -#endif - -#ifdef CONFIG_64BIT - /* Calculate the size of the 64 bit vDSO */ - vdso64_pages = ((&vdso64_end - &vdso64_start - + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; - - /* Make sure pages are in the correct state */ - vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1), - GFP_KERNEL); - BUG_ON(vdso64_pagelist == NULL); - for (i = 0; i < vdso64_pages - 1; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - vdso64_pagelist[i] = pg; - } - vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); - vdso64_pagelist[vdso64_pages] = NULL; - if (vdso_alloc_per_cpu(&S390_lowcore)) - BUG(); - vdso_init_cr5(); -#endif /* CONFIG_64BIT */ - - get_page(virt_to_page(vdso_data)); - - smp_wmb(); - - return 0; + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + for (i = 0; i < pages; i++) + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); + return pagelist; } -early_initcall(vdso_init); -int in_gate_area_no_mm(unsigned long addr) +static void vdso_apply_alternatives(void) { - return 0; + const struct elf64_shdr *alt, *shdr; + struct alt_instr *start, *end; + const struct elf64_hdr *hdr; + + hdr = (struct elf64_hdr *)vdso_start; + shdr = (void *)hdr + hdr->e_shoff; + alt = find_section(hdr, shdr, ".altinstructions"); + if (!alt) + return; + start = (void *)hdr + alt->sh_offset; + end = (void *)hdr + alt->sh_offset + alt->sh_size; + apply_alternatives(start, end); } -int in_gate_area(struct mm_struct *mm, unsigned long addr) +static int __init vdso_init(void) { + vdso_apply_alternatives(); + vdso_mapping.pages = vdso_setup_pages(vdso_start, vdso_end); return 0; } - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} +arch_initcall(vdso_init); diff --git a/arch/s390/kernel/vdso/.gitignore b/arch/s390/kernel/vdso/.gitignore new file mode 100644 index 000000000000..652e31d82582 --- /dev/null +++ b/arch/s390/kernel/vdso/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +vdso.lds diff --git a/arch/s390/kernel/vdso/Makefile b/arch/s390/kernel/vdso/Makefile new file mode 100644 index 000000000000..2fa12d4ac106 --- /dev/null +++ b/arch/s390/kernel/vdso/Makefile @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: GPL-2.0 +# List of files in the vdso + +# Include the generic Makefile to check the built vdso. +include $(srctree)/lib/vdso/Makefile.include +obj-vdso = vdso_user_wrapper.o note.o vgetrandom-chacha.o +obj-cvdso = vdso_generic.o getcpu.o vgetrandom.o +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) +CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vdso_generic.o = $(VDSO_CFLAGS_REMOVE) + +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif + +# Build rules + +targets := $(obj-vdso) $(obj-cvdso) vdso.so vdso.so.dbg +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) +obj-cvdso := $(addprefix $(obj)/, $(obj-cvdso)) + +KBUILD_AFLAGS_VDSO := $(KBUILD_AFLAGS) -DBUILD_VDSO + +KBUILD_CFLAGS_VDSO := $(KBUILD_CFLAGS) -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS_VDSO := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_VDSO)) +KBUILD_CFLAGS_VDSO += -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables +KBUILD_CFLAGS_VDSO += -fno-stack-protector +ldflags-y := -shared -soname=linux-vdso.so.1 \ + --hash-style=both --build-id=sha1 -T + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_VDSO) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_VDSO) + +obj-y += vdso_wrapper.o +targets += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso_wrapper.o : $(obj)/vdso.so + +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) + +# link rule for the .so file, .lds has to be first +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) $(obj-cvdso) FORCE + $(call if_changed,vdso_and_check) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso): %.o: %.S FORCE + $(call if_changed_dep,vdsoas) + +$(obj-cvdso): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) + +# actual build commands +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdsocc = VDSOC $@ + cmd_vdsocc = $(CC) $(c_flags) -c -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso/gen_vdso_offsets.sh b/arch/s390/kernel/vdso/gen_vdso_offsets.sh new file mode 100755 index 000000000000..359982fb002d --- /dev/null +++ b/arch/s390/kernel/vdso/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Inspired by arm64 version. +# + +LC_ALL=C +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/s390/kernel/vdso/getcpu.c b/arch/s390/kernel/vdso/getcpu.c new file mode 100644 index 000000000000..5c5d4a848b76 --- /dev/null +++ b/arch/s390/kernel/vdso/getcpu.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright IBM Corp. 2020 */ + +#include <linux/compiler.h> +#include <linux/getcpu.h> +#include <asm/timex.h> +#include "vdso.h" + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + union tod_clock clk; + + /* CPU number is stored in the programmable field of the TOD clock */ + store_tod_clock_ext(&clk); + if (cpu) + *cpu = clk.pf; + /* NUMA node is always zero */ + if (node) + *node = 0; + return 0; +} diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso/note.S index 79a071e4357e..db19d0680a0a 100644 --- a/arch/s390/kernel/vdso64/note.S +++ b/arch/s390/kernel/vdso/note.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. * Here we can supply some information useful to userland. diff --git a/arch/s390/kernel/vdso/vdso.h b/arch/s390/kernel/vdso/vdso.h new file mode 100644 index 000000000000..8cff033dd854 --- /dev/null +++ b/arch/s390/kernel/vdso/vdso.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_S390_KERNEL_VDSO_VDSO_H +#define __ARCH_S390_KERNEL_VDSO_VDSO_H + +#include <vdso/datapage.h> + +struct getcpu_cache; + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); +int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); + +#endif /* __ARCH_S390_KERNEL_VDSO_VDSO_H */ diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso/vdso.lds.S index 9f5979d102a9..7bec4de0e8e0 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso/vdso.lds.S @@ -1,16 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This is the infamous ld script for the 64 bits vdso * library */ + +#include <asm/vdso/vsyscall.h> +#include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(_start) SECTIONS { - . = VDSO64_LBASE + SIZEOF_HEADERS; + VDSO_VVAR_SYMS + + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -36,6 +43,10 @@ SECTIONS .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } .rodata1 : { *(.rodata1) } + . = ALIGN(8); + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + .dynamic : { *(.dynamic) } :text :dynamic .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr @@ -44,56 +55,16 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } + STABS_DEBUG + DWARF_DEBUG .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - . = ALIGN(4096); - PROVIDE(_vdso_data = .); - /DISCARD/ : { *(.note.GNU-stack) *(.branch_lt) @@ -132,7 +103,11 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; - + __kernel_getcpu; + __kernel_restart_syscall; + __kernel_rt_sigreturn; + __kernel_sigreturn; + __kernel_getrandom; local: *; }; } diff --git a/arch/s390/kernel/vdso/vdso_generic.c b/arch/s390/kernel/vdso/vdso_generic.c new file mode 100644 index 000000000000..a9aa75643c08 --- /dev/null +++ b/arch/s390/kernel/vdso/vdso_generic.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/vdso/gettimeofday.c" +#include "vdso.h" + +int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_getres(clock, ts); +} diff --git a/arch/s390/kernel/vdso/vdso_user_wrapper.S b/arch/s390/kernel/vdso/vdso_user_wrapper.S new file mode 100644 index 000000000000..aa06c85bcbd3 --- /dev/null +++ b/arch/s390/kernel/vdso/vdso_user_wrapper.S @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/vdso.h> +#include <asm/unistd.h> +#include <asm/asm-offsets.h> +#include <asm/dwarf.h> +#include <asm/ptrace.h> + +/* + * Older glibc version called vdso without allocating a stackframe. This wrapper + * is just used to allocate a stackframe. See + * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e + * for details. + */ +.macro vdso_func func +SYM_FUNC_START(__kernel_\func) + CFI_STARTPROC + aghi %r15,-STACK_FRAME_VDSO_OVERHEAD + CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD) + CFI_VAL_OFFSET 15,-STACK_FRAME_USER_OVERHEAD + stg %r14,__SFVDSO_RETURN_ADDRESS(%r15) + CFI_REL_OFFSET 14,__SFVDSO_RETURN_ADDRESS + xc __SFUSER_BACKCHAIN(8,%r15),__SFUSER_BACKCHAIN(%r15) + brasl %r14,__s390_vdso_\func + lg %r14,__SFVDSO_RETURN_ADDRESS(%r15) + CFI_RESTORE 14 + aghi %r15,STACK_FRAME_VDSO_OVERHEAD + CFI_DEF_CFA_OFFSET STACK_FRAME_USER_OVERHEAD + CFI_RESTORE 15 + br %r14 + CFI_ENDPROC +SYM_FUNC_END(__kernel_\func) +.endm + +vdso_func gettimeofday +vdso_func clock_getres +vdso_func clock_gettime +vdso_func getcpu + +.macro vdso_syscall func,syscall +SYM_FUNC_START(__kernel_\func) + CFI_STARTPROC + svc \syscall + /* Make sure we notice when a syscall returns, which shouldn't happen */ + .word 0 + CFI_ENDPROC +SYM_FUNC_END(__kernel_\func) +.endm + +vdso_syscall restart_syscall,__NR_restart_syscall +vdso_syscall sigreturn,__NR_sigreturn +vdso_syscall rt_sigreturn,__NR_rt_sigreturn diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso/vdso_wrapper.S index c245842b516f..f69e62a14978 100644 --- a/arch/s390/kernel/vdso64/vdso64_wrapper.S +++ b/arch/s390/kernel/vdso/vdso_wrapper.S @@ -1,14 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #include <linux/init.h> #include <linux/linkage.h> #include <asm/page.h> __PAGE_ALIGNED_DATA - .globl vdso64_start, vdso64_end + .globl vdso_start, vdso_end .balign PAGE_SIZE -vdso64_start: - .incbin "arch/s390/kernel/vdso64/vdso64.so" +vdso_start: + .incbin "arch/s390/kernel/vdso/vdso.so" .balign PAGE_SIZE -vdso64_end: +vdso_end: .previous diff --git a/arch/s390/kernel/vdso/vgetrandom-chacha.S b/arch/s390/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..09c034c2f853 --- /dev/null +++ b/arch/s390/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/stringify.h> +#include <linux/linkage.h> +#include <asm/alternative.h> +#include <asm/dwarf.h> +#include <asm/fpu-insn.h> + +#define STATE0 %v0 +#define STATE1 %v1 +#define STATE2 %v2 +#define STATE3 %v3 +#define COPY0 %v4 +#define COPY1 %v5 +#define COPY2 %v6 +#define COPY3 %v7 +#define BEPERM %v19 +#define TMP0 %v20 +#define TMP1 %v21 +#define TMP2 %v22 +#define TMP3 %v23 + + .section .rodata + + .balign 32 +SYM_DATA_START_LOCAL(chacha20_constants) + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral + .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap +SYM_DATA_END(chacha20_constants) + + .text +/* + * s390 ChaCha20 implementation meant for vDSO. Produces a given positive + * number of blocks of output with nonce 0, taking an input key and 8-bytes + * counter. Does not spill to the stack. + * + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, + * const uint8_t *key, + * uint32_t *counter, + * size_t nblocks) + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + CFI_STARTPROC + larl %r1,chacha20_constants + + /* COPY0 = "expand 32-byte k" */ + VL COPY0,0,,%r1 + + /* BEPERM = byte selectors for VPERM */ + ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148) + + /* COPY1,COPY2 = key */ + VLM COPY1,COPY2,0,%r3 + + /* COPY3 = counter || zero nonce */ + lg %r3,0(%r4) + VZERO COPY3 + VLVGG COPY3,%r3,0 + + lghi %r1,0 +.Lblock: + VLR STATE0,COPY0 + VLR STATE1,COPY1 + VLR STATE2,COPY2 + VLR STATE3,COPY3 + + lghi %r0,10 +.Ldoubleround: + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,16 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,12 + + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,8 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,7 + + /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */ + VSLDB STATE1,STATE1,STATE1,4 + /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ + VSLDB STATE2,STATE2,STATE2,8 + /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */ + VSLDB STATE3,STATE3,STATE3,12 + + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,16 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,12 + + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,8 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,7 + + /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */ + VSLDB STATE1,STATE1,STATE1,12 + /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ + VSLDB STATE2,STATE2,STATE2,8 + /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */ + VSLDB STATE3,STATE3,STATE3,4 + brctg %r0,.Ldoubleround + + /* OUTPUT0 = STATE0 + COPY0 */ + VAF STATE0,STATE0,COPY0 + /* OUTPUT1 = STATE1 + COPY1 */ + VAF STATE1,STATE1,COPY1 + /* OUTPUT2 = STATE2 + COPY2 */ + VAF STATE2,STATE2,COPY2 + /* OUTPUT3 = STATE3 + COPY3 */ + VAF STATE3,STATE3,COPY3 + + ALTERNATIVE \ + __stringify( \ + /* Convert STATE to little endian and store to OUTPUT */\ + VPERM TMP0,STATE0,STATE0,BEPERM; \ + VPERM TMP1,STATE1,STATE1,BEPERM; \ + VPERM TMP2,STATE2,STATE2,BEPERM; \ + VPERM TMP3,STATE3,STATE3,BEPERM; \ + VSTM TMP0,TMP3,0,%r2), \ + __stringify( \ + /* 32 bit wise little endian store to OUTPUT */ \ + VSTBRF STATE0,0,,%r2; \ + VSTBRF STATE1,16,,%r2; \ + VSTBRF STATE2,32,,%r2; \ + VSTBRF STATE3,48,,%r2; \ + brcl 0,0), \ + ALT_FACILITY(148) + + /* ++COPY3.COUNTER */ + /* alsih %r3,1 */ + .insn rilu,0xcc0a00000000,%r3,1 + alcr %r3,%r1 + VLVGG COPY3,%r3,0 + + /* OUTPUT += 64, --NBLOCKS */ + aghi %r2,64 + brctg %r5,.Lblock + + /* COUNTER = COPY3.COUNTER */ + stg %r3,0(%r4) + + /* Zero out potentially sensitive regs */ + VZERO STATE0 + VZERO STATE1 + VZERO STATE2 + VZERO STATE3 + VZERO COPY1 + VZERO COPY2 + + /* Early exit if TMP0-TMP3 have not been used */ + ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148) + + VZERO TMP0 + VZERO TMP1 + VZERO TMP2 + VZERO TMP3 + + br %r14 + CFI_ENDPROC +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/s390/kernel/vdso/vgetrandom.c b/arch/s390/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..b5268b507fb5 --- /dev/null +++ b/arch/s390/kernel/vdso/vgetrandom.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <asm/facility.h> +#include <uapi/asm-generic/errno.h> +#include "vdso.h" + +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + if (test_facility(129)) + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); + if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) + return -ENOSYS; + return getrandom_syscall(buffer, len, flags); +} diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore deleted file mode 100644 index e45fba9d0ced..000000000000 --- a/arch/s390/kernel/vdso32/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vdso32.lds diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile deleted file mode 100644 index 8ad2b34ad151..000000000000 --- a/arch/s390/kernel/vdso32/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -# List of files in the vdso, has to be asm only for now - -obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o - -# Build rules - -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg -obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) - -KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_31 += -m31 -s - -KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) - -obj-y += vdso32_wrapper.o -extra-y += vdso32.lds -CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) - -# Disable gcov profiling for VDSO code -GCOV_PROFILE := n - -# Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so - -# link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) - $(call if_changed,vdso32ld) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -# assembly rules for the .S files -$(obj-vdso32): %.o: %.S - $(call if_changed_dep,vdso32as) - -# actual build commands -quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ -quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S deleted file mode 100644 index 36aaa25d05da..000000000000 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Userland implementation of clock_getres() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - - .text - .align 4 - .globl __kernel_clock_getres - .type __kernel_clock_getres,@function -__kernel_clock_getres: - .cfi_startproc - chi %r2,__CLOCK_REALTIME - je 0f - chi %r2,__CLOCK_MONOTONIC - jne 3f -0: ltr %r3,%r3 - jz 2f /* res == NULL */ - basr %r1,0 -1: l %r0,4f-1b(%r1) - xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ - st %r0,4(%r3) /* store tp->tv_usec */ -2: lhi %r2,0 - br %r14 -3: lhi %r1,__NR_clock_getres /* fallback to svc */ - svc 0 - br %r14 -4: .long __CLOCK_REALTIME_RES - .cfi_endproc - .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S deleted file mode 100644 index b2224e0b974c..000000000000 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Userland implementation of clock_gettime() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - - .text - .align 4 - .globl __kernel_clock_gettime - .type __kernel_clock_gettime,@function -__kernel_clock_gettime: - .cfi_startproc - basr %r5,0 -0: al %r5,21f-0b(%r5) /* get &_vdso_data */ - chi %r2,__CLOCK_REALTIME - je 10f - chi %r2,__CLOCK_MONOTONIC - jne 19f - - /* CLOCK_MONOTONIC */ - ltr %r3,%r3 - jz 9f /* tp == NULL */ -1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) - s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,2f - ahi %r0,-1 -2: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ - lr %r2,%r0 - l %r0,__VDSO_NTP_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 3f - a %r0,__VDSO_NTP_MULT(%r5) -3: alr %r0,%r2 - srdl %r0,12 - al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,4f - ahi %r0,1 -4: l %r2,__VDSO_XTIME_SEC+4(%r5) - al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ - al %r1,__VDSO_WTOM_NSEC+4(%r5) - brc 12,5f - ahi %r0,1 -5: al %r2,__VDSO_WTOM_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 1b - basr %r5,0 -6: ltr %r0,%r0 - jnz 7f - cl %r1,20f-6b(%r5) - jl 8f -7: ahi %r2,1 - sl %r1,20f-6b(%r5) - brc 3,6b - ahi %r0,-1 - j 6b -8: st %r2,0(%r3) /* store tp->tv_sec */ - st %r1,4(%r3) /* store tp->tv_nsec */ -9: lhi %r2,0 - br %r14 - - /* CLOCK_REALTIME */ -10: ltr %r3,%r3 /* tp == NULL */ - jz 18f -11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 11b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) - s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,12f - ahi %r0,-1 -12: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ - lr %r2,%r0 - l %r0,__VDSO_NTP_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 13f - a %r0,__VDSO_NTP_MULT(%r5) -13: alr %r0,%r2 - srdl %r0,12 - al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,14f - ahi %r0,1 -14: l %r2,__VDSO_XTIME_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 11b - basr %r5,0 -15: ltr %r0,%r0 - jnz 16f - cl %r1,20f-15b(%r5) - jl 17f -16: ahi %r2,1 - sl %r1,20f-15b(%r5) - brc 3,15b - ahi %r0,-1 - j 15b -17: st %r2,0(%r3) /* store tp->tv_sec */ - st %r1,4(%r3) /* store tp->tv_nsec */ -18: lhi %r2,0 - br %r14 - - /* Fallback to system call */ -19: lhi %r1,__NR_clock_gettime - svc 0 - br %r14 - -20: .long 1000000000 -21: .long _vdso_data - 0b - .cfi_endproc - .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S deleted file mode 100644 index 2d3633175e3b..000000000000 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Userland implementation of gettimeofday() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - - .text - .align 4 - .globl __kernel_gettimeofday - .type __kernel_gettimeofday,@function -__kernel_gettimeofday: - .cfi_startproc - basr %r5,0 -0: al %r5,13f-0b(%r5) /* get &_vdso_data */ -1: ltr %r3,%r3 /* check if tz is NULL */ - je 2f - mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) -2: ltr %r2,%r2 /* check if tv is NULL */ - je 10f - l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) - s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,3f - ahi %r0,-1 -3: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ - st %r0,24(%r15) - l %r0,__VDSO_NTP_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 4f - a %r0,__VDSO_NTP_MULT(%r5) -4: al %r0,24(%r15) - srdl %r0,12 - al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,5f - ahi %r0,1 -5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 1b - l %r4,24(%r15) /* get tv_sec from stack */ - basr %r5,0 -6: ltr %r0,%r0 - jnz 7f - cl %r1,11f-6b(%r5) - jl 8f -7: ahi %r4,1 - sl %r1,11f-6b(%r5) - brc 3,6b - ahi %r0,-1 - j 6b -8: st %r4,0(%r2) /* store tv->tv_sec */ - ltr %r1,%r1 - m %r0,12f-6b(%r5) - jnm 9f - al %r0,12f-6b(%r5) -9: srl %r0,6 - st %r0,4(%r2) /* store tv->tv_usec */ -10: slr %r2,%r2 - br %r14 -11: .long 1000000000 -12: .long 274877907 -13: .long _vdso_data - 0b - .cfi_endproc - .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S deleted file mode 100644 index 79a071e4357e..000000000000 --- a/arch/s390/kernel/vdso32/note.S +++ /dev/null @@ -1,12 +0,0 @@ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include <linux/uts.h> -#include <linux/version.h> -#include <linux/elfnote.h> - -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S deleted file mode 100644 index a8c379fa1247..000000000000 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ /dev/null @@ -1,138 +0,0 @@ -/* - * This is the infamous ld script for the 32 bits vdso - * library - */ -#include <asm/vdso.h> - -OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390:31-bit) -ENTRY(_start) - -SECTIONS -{ - . = VDSO32_LBASE + SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note.*) } :text :note - - . = ALIGN(16); - .text : { - *(.text .stub .text.* .gnu.linkonce.t.*) - } :text - PROVIDE(__etext = .); - PROVIDE(_etext = .); - PROVIDE(etext = .); - - /* - * Other stuff is appended to the text segment: - */ - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - .rodata1 : { *(.rodata1) } - - .dynamic : { *(.dynamic) } :text :dynamic - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } - - .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .got ALIGN(8) : { *(.got .toc) } - - _end = .; - PROVIDE(end = .); - - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } - .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - - . = ALIGN(4096); - PROVIDE(_vdso_data = .); - - /DISCARD/ : { - *(.note.GNU-stack) - *(.branch_lt) - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * Very old versions of ld do not recognize this name token; use the constant. - */ -#define PT_GNU_EH_FRAME 0x6474e550 - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - VDSO_VERSION_STRING { - global: - /* - * Has to be there for the kernel to find - */ - __kernel_gettimeofday; - __kernel_clock_gettime; - __kernel_clock_getres; - - local: *; - }; -} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S deleted file mode 100644 index ae42f8ce350b..000000000000 --- a/arch/s390/kernel/vdso32/vdso32_wrapper.S +++ /dev/null @@ -1,14 +0,0 @@ -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/page.h> - - __PAGE_ALIGNED_DATA - - .globl vdso32_start, vdso32_end - .balign PAGE_SIZE -vdso32_start: - .incbin "arch/s390/kernel/vdso32/vdso32.so" - .balign PAGE_SIZE -vdso32_end: - - .previous diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore deleted file mode 100644 index 3fd18cf9fec2..000000000000 --- a/arch/s390/kernel/vdso64/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vdso64.lds diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile deleted file mode 100644 index 2a8ddfd12a5b..000000000000 --- a/arch/s390/kernel/vdso64/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -# List of files in the vdso, has to be asm only for now - -obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o - -# Build rules - -targets := $(obj-vdso64) vdso64.so vdso64.so.dbg -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) - -KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_64 += -m64 -s - -KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) - -obj-y += vdso64_wrapper.o -extra-y += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) - -# Disable gcov profiling for VDSO code -GCOV_PROFILE := n - -# Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so - -# link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) - $(call if_changed,vdso64ld) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -# assembly rules for the .S files -$(obj-vdso64): %.o: %.S - $(call if_changed_dep,vdso64as) - -# actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ -quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso64.so: $(obj)/vdso64.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso64.so diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S deleted file mode 100644 index 176e1f75f9aa..000000000000 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Userland implementation of clock_getres() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - - .text - .align 4 - .globl __kernel_clock_getres - .type __kernel_clock_getres,@function -__kernel_clock_getres: - .cfi_startproc - cghi %r2,__CLOCK_REALTIME - je 0f - cghi %r2,__CLOCK_MONOTONIC - je 0f - cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ - jne 2f - larl %r5,_vdso_data - icm %r0,15,__LC_ECTG_OK(%r5) - jz 2f -0: ltgr %r3,%r3 - jz 1f /* res == NULL */ - larl %r1,3f - lg %r0,0(%r1) - xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ - stg %r0,8(%r3) /* store tp->tv_usec */ -1: lghi %r2,0 - br %r14 -2: lghi %r1,__NR_clock_getres /* fallback to svc */ - svc 0 - br %r14 -3: .quad __CLOCK_REALTIME_RES - .cfi_endproc - .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S deleted file mode 100644 index d46c95ed5f19..000000000000 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Userland implementation of clock_gettime() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - - .text - .align 4 - .globl __kernel_clock_gettime - .type __kernel_clock_gettime,@function -__kernel_clock_gettime: - .cfi_startproc - larl %r5,_vdso_data - cghi %r2,__CLOCK_REALTIME - je 4f - cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ - je 9f - cghi %r2,__CLOCK_MONOTONIC - jne 12f - - /* CLOCK_MONOTONIC */ - ltgr %r3,%r3 - jz 3f /* tp == NULL */ -0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 0b - stck 48(%r15) /* Store TOD clock */ - lg %r1,48(%r15) - sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ - srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - lg %r0,__VDSO_XTIME_SEC(%r5) - alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ - alg %r0,__VDSO_WTOM_SEC(%r5) - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 0b - larl %r5,13f -1: clg %r1,0(%r5) - jl 2f - slg %r1,0(%r5) - aghi %r0,1 - j 1b -2: stg %r0,0(%r3) /* store tp->tv_sec */ - stg %r1,8(%r3) /* store tp->tv_nsec */ -3: lghi %r2,0 - br %r14 - - /* CLOCK_REALTIME */ -4: ltr %r3,%r3 /* tp == NULL */ - jz 8f -5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 5b - stck 48(%r15) /* Store TOD clock */ - lg %r1,48(%r15) - sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ - srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - lg %r0,__VDSO_XTIME_SEC(%r5) - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 5b - larl %r5,13f -6: clg %r1,0(%r5) - jl 7f - slg %r1,0(%r5) - aghi %r0,1 - j 6b -7: stg %r0,0(%r3) /* store tp->tv_sec */ - stg %r1,8(%r3) /* store tp->tv_nsec */ -8: lghi %r2,0 - br %r14 - - /* CLOCK_THREAD_CPUTIME_ID for this thread */ -9: icm %r0,15,__VDSO_ECTG_OK(%r5) - jz 12f - ear %r2,%a4 - llilh %r4,0x0100 - sar %a4,%r4 - lghi %r4,0 - epsw %r5,0 - sacf 512 /* Magic ectg instruction */ - .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 - tml %r5,0x4000 - jo 11f - tml %r5,0x8000 - jno 10f - sacf 256 - j 11f -10: sacf 0 -11: sar %a4,%r2 - algr %r1,%r0 /* r1 = cputime as TOD value */ - mghi %r1,1000 /* convert to nanoseconds */ - srlg %r1,%r1,12 /* r1 = cputime in nanosec */ - lgr %r4,%r1 - larl %r5,13f - srlg %r1,%r1,9 /* divide by 1000000000 */ - mlg %r0,8(%r5) - srlg %r0,%r0,11 /* r0 = tv_sec */ - stg %r0,0(%r3) - msg %r0,0(%r5) /* calculate tv_nsec */ - slgr %r4,%r0 /* r4 = tv_nsec */ - stg %r4,8(%r3) - lghi %r2,0 - br %r14 - - /* Fallback to system call */ -12: lghi %r1,__NR_clock_gettime - svc 0 - br %r14 - -13: .quad 1000000000 -14: .quad 19342813113834067 - .cfi_endproc - .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S deleted file mode 100644 index 36ee674722ec..000000000000 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - - .text - .align 4 - .globl __kernel_gettimeofday - .type __kernel_gettimeofday,@function -__kernel_gettimeofday: - .cfi_startproc - larl %r5,_vdso_data -0: ltgr %r3,%r3 /* check if tz is NULL */ - je 1f - mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) -1: ltgr %r2,%r2 /* check if tv is NULL */ - je 4f - lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 0b - stck 48(%r15) /* Store TOD clock */ - lg %r1,48(%r15) - sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ - srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime.tv_nsec */ - lg %r0,__VDSO_XTIME_SEC(%r5) /* xtime.tv_sec */ - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 0b - larl %r5,5f -2: clg %r1,0(%r5) - jl 3f - slg %r1,0(%r5) - aghi %r0,1 - j 2b -3: stg %r0,0(%r2) /* store tv->tv_sec */ - slgr %r0,%r0 /* tv_nsec -> tv_usec */ - ml %r0,8(%r5) - srlg %r0,%r0,6 - stg %r0,8(%r2) /* store tv->tv_usec */ -4: lghi %r2,0 - br %r14 -5: .quad 1000000000 - .long 274877907 - .cfi_endproc - .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c new file mode 100644 index 000000000000..cc8933e04ff7 --- /dev/null +++ b/arch/s390/kernel/vmcore_info.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/vmcore_info.h> +#include <linux/mm.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> +#include <asm/setup.h> + +void arch_crash_save_vmcoreinfo(void) +{ + struct lowcore *abs_lc; + + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_SYMBOL(high_memory); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); + vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31); + vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31); + vmcoreinfo_append_str("IDENTITYBASE=%lx\n", __identity_base); + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + vmcoreinfo_append_str("KERNELOFFPHYS=%lx\n", __kaslr_offset_phys); + abs_lc = get_abs_lowcore(); + abs_lc->vmcore_info = paddr_vmcoreinfo_note(); + put_abs_lowcore(abs_lc); +} diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 35b13ed0af5f..53bcbb91bb9b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -1,22 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* ld script to make s390 Linux kernel * Written by Martin Schwidefsky (schwidefsky@de.ibm.com) */ #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/ftrace.lds.h> + +/* + * Put .bss..swapper_pg_dir as the first thing in .bss. This will + * make sure it has 16k alignment. + */ +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \ + *(.bss..invalid_pg_dir) + +#define RO_EXCEPTION_TABLE_ALIGN 16 + +/* Handle ro_after_init data on our own. */ +#define RO_AFTER_INIT_DATA + +#define RUNTIME_DISCARD_EXIT + +#define EMITS_PT_NOTE + #include <asm-generic/vmlinux.lds.h> +#include <asm/vmlinux.lds.h> -#ifndef CONFIG_64BIT -OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390:31-bit) -ENTRY(startup) -jiffies = jiffies_64 + 4; -#else OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(startup) +ENTRY(startup_continue) jiffies = jiffies_64; -#endif PHDRS { text PT_LOAD FLAGS(5); /* R_E */ @@ -26,46 +39,81 @@ PHDRS { SECTIONS { - . = 0x00000000; + . = TEXT_OFFSET; .text : { - _text = .; /* Text and read-only data */ + _stext = .; /* Start of text section */ + _text = .; /* Text and read-only data */ HEAD_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT - *(.fixup) + SOFTIRQENTRY_TEXT + FTRACE_HOTPATCH_TRAMPOLINES_TEXT + *(.text..*_indirect_*) *(.gnu.warning) + . = ALIGN(PAGE_SIZE); + _etext = .; /* End of text section */ } :text = 0x0700 - _etext = .; /* End of text section */ + RO_DATA(PAGE_SIZE) - NOTES :text :note - - .dummy : { *(.dummy) } :data + . = ALIGN(PAGE_SIZE); + _sdata = .; /* Start of data section */ - RO_DATA_SECTION(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); + __start_ro_after_init = .; + .data..ro_after_init : { + *(.data..ro_after_init) + JUMP_TABLE_DATA + } :data + . = ALIGN(PAGE_SIZE); + __end_ro_after_init = .; -#ifdef CONFIG_SHARED_KERNEL - . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */ -#endif + . = ALIGN(8); + .skey_region_table : { + __skey_region_start = .; + KEEP(*(.skey_region)) + __skey_region_end = .; + } - . = ALIGN(PAGE_SIZE); - _eshared = .; /* End of shareable data */ - _sdata = .; /* Start of data section */ + .data.rel.ro : { + *(.data.rel.ro .data.rel.ro.*) + } + .got : { + __got_start = .; + *(.got) + __got_end = .; + } - EXCEPTION_TABLE(16) :data + RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) + .data.rel : { + *(.data.rel*) + } + BOOT_DATA_PRESERVED - RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) + . = ALIGN(8); + .amode31.refs : { + _start_amode31_refs = .; + *(.amode31.refs) + _end_amode31_refs = .; + } + . = ALIGN(PAGE_SIZE); _edata = .; /* End of data section */ /* will be freed after init */ . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; - INIT_TEXT_SECTION(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + _sinittext = .; + INIT_TEXT + . = ALIGN(PAGE_SIZE); + _einittext = .; + } /* * .exit.text is discarded at runtime, not link time, @@ -79,22 +127,162 @@ SECTIONS EXIT_DATA } + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + * Note, that it is a part of __init region. + */ + . = ALIGN(8); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + /* + * And here are the replacement instructions. The linker sticks + * them as binary blobs. The .altinstructions has enough data to + * get the address and the length of them to patch the kernel safely. + * Note, that it is a part of __init region. + */ + .altinstr_replacement : { + *(.altinstr_replacement) + } + +#ifdef CONFIG_STACKPROTECTOR + . = ALIGN(8); + .stack_prot_table : { + __stack_prot_start = .; + KEEP(*(__stack_protector_loc)) + __stack_prot_end = .; + } +#endif + + /* + * Table with the patch locations to undo expolines + */ + . = ALIGN(4); + .nospec_call_table : { + __nospec_call_start = . ; + *(.s390_indirect*) + __nospec_call_end = . ; + } + .nospec_return_table : { + __nospec_return_start = . ; + *(.s390_return*) + __nospec_return_end = . ; + } + + BOOT_DATA + + /* + * .amode31 section for code, data, ex_table that need to stay + * below 2 GB, even when the kernel is relocated above 2 GB. + */ + . = ALIGN(PAGE_SIZE); + _samode31 = .; + .amode31.text : { + _stext_amode31 = .; + *(.amode31.text) + *(.amode31.text.*_indirect_*) + . = ALIGN(PAGE_SIZE); + _etext_amode31 = .; + } + . = ALIGN(16); + .amode31.ex_table : { + _start_amode31_ex_table = .; + KEEP(*(.amode31.ex_table)) + _stop_amode31_ex_table = .; + } + . = ALIGN(PAGE_SIZE); + .amode31.data : { + *(.amode31.data) + } + . = _samode31 + AMODE31_SIZE; + _eamode31 = .; + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) + RUNTIME_CONST_VARIABLES + PERCPU_SECTION(0x100) + . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ - BSS_SECTION(0, 2, 0) + BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE) + . = ALIGN(PAGE_SIZE); _end = . ; /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG + ELF_DETAILS + + /* + * Make sure that the .got.plt is either completely empty or it + * contains only the three reserved double words. + */ + .got.plt : { + *(.got.plt) + } + ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") + + /* + * Sections that should stay zero sized, which is safer to + * explicitly check instead of blindly discarding. + */ + .plt : { + *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) + } + ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") + .rela.dyn : { + *(.rela.*) *(.rela_*) + } + ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") + + /* + * uncompressed image info used by the decompressor + * it should match struct vmlinux_info + */ + .vmlinux.info 0 (INFO) : { + QUAD(startup_continue) /* entry */ + QUAD(__bss_start - _stext) /* image_size */ + QUAD(__bss_stop - __bss_start) /* bss_size */ + QUAD(__boot_data_start) /* bootdata_off */ + QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */ + QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */ + QUAD(__boot_data_preserved_end - + __boot_data_preserved_start) /* bootdata_preserved_size */ + QUAD(__got_start) /* got_start */ + QUAD(__got_end) /* got_end */ + QUAD(_eamode31 - _samode31) /* amode31_size */ + QUAD(init_mm) + QUAD(swapper_pg_dir) + QUAD(invalid_pg_dir) + QUAD(__alt_instructions) + QUAD(__alt_instructions_end) +#ifdef CONFIG_STACKPROTECTOR + QUAD(__stack_prot_start) + QUAD(__stack_prot_end) +#endif +#ifdef CONFIG_KASAN + QUAD(kasan_early_shadow_page) + QUAD(kasan_early_shadow_pte) + QUAD(kasan_early_shadow_pmd) + QUAD(kasan_early_shadow_pud) + QUAD(kasan_early_shadow_p4d) +#endif + } :NONE /* Sections to be discarded */ DISCARDS + /DISCARD/ : { + *(.eh_frame) + *(.interp) + } } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 3fb09359eda6..234a0ba30510 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Virtual cpu timer based timer functions. * @@ -6,49 +7,43 @@ */ #include <linux/kernel_stat.h> -#include <linux/notifier.h> -#include <linux/kprobes.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/timex.h> #include <linux/types.h> #include <linux/time.h> -#include <linux/cpu.h> -#include <linux/smp.h> - -#include <asm/irq_regs.h> +#include <asm/alternative.h> #include <asm/cputime.h> #include <asm/vtimer.h> -#include <asm/irq.h> +#include <asm/vtime.h> +#include <asm/cpu_mf.h> +#include <asm/smp.h> + #include "entry.h" static void virt_timer_expire(void); -DEFINE_PER_CPU(struct s390_idle_data, s390_idle); - static LIST_HEAD(virt_timer_list); static DEFINE_SPINLOCK(virt_timer_lock); static atomic64_t virt_timer_current; static atomic64_t virt_timer_elapsed; -static inline u64 get_vtimer(void) -{ - u64 timer; - - asm volatile("stpt %0" : "=m" (timer)); - return timer; -} +DEFINE_PER_CPU(u64, mt_cycles[8]); +static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 }; +static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 }; +static DEFINE_PER_CPU(u64, mt_scaling_jiffies); static inline void set_vtimer(u64 expires) { + struct lowcore *lc = get_lowcore(); u64 timer; asm volatile( " stpt %0\n" /* Store current cpu timer value */ " spt %1" /* Set new value imm. afterwards */ - : "=m" (timer) : "m" (expires)); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; + : "=Q" (timer) : "Q" (expires)); + lc->system_timer += lc->last_update_timer - timer; + lc->last_update_timer = expires; } static inline int virt_timer_forward(u64 elapsed) @@ -61,55 +56,141 @@ static inline int virt_timer_forward(u64 elapsed) return elapsed >= atomic64_read(&virt_timer_current); } +static void update_mt_scaling(void) +{ + u64 cycles_new[8], *cycles_old; + u64 delta, fac, mult, div; + int i; + + stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new); + cycles_old = this_cpu_ptr(mt_cycles); + fac = 1; + mult = div = 0; + for (i = 0; i <= smp_cpu_mtid; i++) { + delta = cycles_new[i] - cycles_old[i]; + div += delta; + mult *= i + 1; + mult += delta * fac; + fac *= i + 1; + } + div *= fac; + if (div > 0) { + /* Update scaling factor */ + __this_cpu_write(mt_scaling_mult, mult); + __this_cpu_write(mt_scaling_div, div); + memcpy(cycles_old, cycles_new, + sizeof(u64) * (smp_cpu_mtid + 1)); + } + __this_cpu_write(mt_scaling_jiffies, jiffies_64); +} + +static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new) +{ + u64 delta; + + delta = new - *tsk_vtime; + *tsk_vtime = new; + return delta; +} + + +static inline u64 scale_vtime(u64 vtime) +{ + u64 mult = __this_cpu_read(mt_scaling_mult); + u64 div = __this_cpu_read(mt_scaling_div); + + if (smp_cpu_mtid) + return vtime * mult / div; + return vtime; +} + +static void account_system_index_scaled(struct task_struct *p, u64 cputime, + enum cpu_usage_stat index) +{ + p->stimescaled += cputime_to_nsecs(scale_vtime(cputime)); + account_system_index_time(p, cputime_to_nsecs(cputime), index); +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) +static int do_account_vtime(struct task_struct *tsk) { - struct thread_info *ti = task_thread_info(tsk); - u64 timer, clock, user, system, steal; + u64 timer, clock, user, guest, system, hardirq, softirq; + struct lowcore *lc = get_lowcore(); - timer = S390_lowcore.last_update_timer; - clock = S390_lowcore.last_update_clock; + timer = lc->last_update_timer; + clock = lc->last_update_clock; asm volatile( " stpt %0\n" /* Store current cpu timer value */ - " stck %1" /* Store current tod clock value */ - : "=m" (S390_lowcore.last_update_timer), - "=m" (S390_lowcore.last_update_clock)); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; - - user = S390_lowcore.user_timer - ti->user_timer; - S390_lowcore.steal_timer -= user; - ti->user_timer = S390_lowcore.user_timer; - account_user_time(tsk, user, user); - - system = S390_lowcore.system_timer - ti->system_timer; - S390_lowcore.steal_timer -= system; - ti->system_timer = S390_lowcore.system_timer; - account_system_time(tsk, hardirq_offset, system, system); - - steal = S390_lowcore.steal_timer; - if ((s64) steal > 0) { - S390_lowcore.steal_timer = 0; - account_steal_time(steal); + " stckf %1" /* Store current tod clock value */ + : "=Q" (lc->last_update_timer), + "=Q" (lc->last_update_clock) + : : "cc"); + clock = lc->last_update_clock - clock; + timer -= lc->last_update_timer; + + if (hardirq_count()) + lc->hardirq_timer += timer; + else + lc->system_timer += timer; + + /* Update MT utilization calculation */ + if (smp_cpu_mtid && + time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) + update_mt_scaling(); + + /* Calculate cputime delta */ + user = update_tsk_timer(&tsk->thread.user_timer, + READ_ONCE(lc->user_timer)); + guest = update_tsk_timer(&tsk->thread.guest_timer, + READ_ONCE(lc->guest_timer)); + system = update_tsk_timer(&tsk->thread.system_timer, + READ_ONCE(lc->system_timer)); + hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, + READ_ONCE(lc->hardirq_timer)); + softirq = update_tsk_timer(&tsk->thread.softirq_timer, + READ_ONCE(lc->softirq_timer)); + lc->steal_timer += + clock - user - guest - system - hardirq - softirq; + + /* Push account value */ + if (user) { + account_user_time(tsk, cputime_to_nsecs(user)); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(user)); } - return virt_timer_forward(user + system); + if (guest) { + account_guest_time(tsk, cputime_to_nsecs(guest)); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest)); + } + + if (system) + account_system_index_scaled(tsk, system, CPUTIME_SYSTEM); + if (hardirq) + account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ); + if (softirq) + account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ); + + return virt_timer_forward(user + guest + system + hardirq + softirq); } void vtime_task_switch(struct task_struct *prev) { - struct thread_info *ti; - - do_account_vtime(prev, 0); - ti = task_thread_info(prev); - ti->user_timer = S390_lowcore.user_timer; - ti->system_timer = S390_lowcore.system_timer; - ti = task_thread_info(current); - S390_lowcore.user_timer = ti->user_timer; - S390_lowcore.system_timer = ti->system_timer; + struct lowcore *lc = get_lowcore(); + + do_account_vtime(prev); + prev->thread.user_timer = lc->user_timer; + prev->thread.guest_timer = lc->guest_timer; + prev->thread.system_timer = lc->system_timer; + prev->thread.hardirq_timer = lc->hardirq_timer; + prev->thread.softirq_timer = lc->softirq_timer; + lc->user_timer = current->thread.user_timer; + lc->guest_timer = current->thread.guest_timer; + lc->system_timer = current->thread.system_timer; + lc->hardirq_timer = current->thread.hardirq_timer; + lc->softirq_timer = current->thread.softirq_timer; } /* @@ -117,81 +198,67 @@ void vtime_task_switch(struct task_struct *prev) * accounting system time in order to correctly compute * the stolen time accounting. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { - if (do_account_vtime(tsk, HARDIRQ_OFFSET)) + struct lowcore *lc = get_lowcore(); + u64 steal, avg_steal; + + if (do_account_vtime(tsk)) virt_timer_expire(); + + steal = lc->steal_timer; + avg_steal = lc->avg_steal_timer; + if ((s64) steal > 0) { + lc->steal_timer = 0; + account_steal_time(cputime_to_nsecs(steal)); + avg_steal += steal; + } + lc->avg_steal_timer = avg_steal / 2; +} + +static u64 vtime_delta(void) +{ + struct lowcore *lc = get_lowcore(); + u64 timer = lc->last_update_timer; + + lc->last_update_timer = get_cpu_timer(); + return timer - lc->last_update_timer; } /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void vtime_account_irq_enter(struct task_struct *tsk) +void vtime_account_kernel(struct task_struct *tsk) { - struct thread_info *ti = task_thread_info(tsk); - u64 timer, system; - - WARN_ON_ONCE(!irqs_disabled()); + struct lowcore *lc = get_lowcore(); + u64 delta = vtime_delta(); - timer = S390_lowcore.last_update_timer; - S390_lowcore.last_update_timer = get_vtimer(); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; + if (tsk->flags & PF_VCPU) + lc->guest_timer += delta; + else + lc->system_timer += delta; - system = S390_lowcore.system_timer - ti->system_timer; - S390_lowcore.steal_timer -= system; - ti->system_timer = S390_lowcore.system_timer; - account_system_time(tsk, 0, system, system); - - virt_timer_forward(system); + virt_timer_forward(delta); } -EXPORT_SYMBOL_GPL(vtime_account_irq_enter); - -void vtime_account_system(struct task_struct *tsk) -__attribute__((alias("vtime_account_irq_enter"))); -EXPORT_SYMBOL_GPL(vtime_account_system); +EXPORT_SYMBOL_GPL(vtime_account_kernel); -void __kprobes vtime_stop_cpu(void) +void vtime_account_softirq(struct task_struct *tsk) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - unsigned long long idle_time; - unsigned long psw_mask; - - trace_hardirqs_on(); - - /* Wait for external, I/O or machine check interrupt. */ - psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - idle->nohz_delay = 0; - - /* Call the assembler magic in entry.S */ - psw_idle(idle, psw_mask); - - /* Account time spent with enabled wait psw loaded as idle time. */ - idle->sequence++; - smp_wmb(); - idle_time = idle->clock_idle_exit - idle->clock_idle_enter; - idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; - idle->idle_time += idle_time; - idle->idle_count++; - account_idle_time(idle_time); - smp_wmb(); - idle->sequence++; + u64 delta = vtime_delta(); + + get_lowcore()->softirq_timer += delta; + + virt_timer_forward(delta); } -cputime64_t s390_get_idle_time(int cpu) +void vtime_account_hardirq(struct task_struct *tsk) { - struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (idle->sequence != sequence)); - return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; + u64 delta = vtime_delta(); + + get_lowcore()->hardirq_timer += delta; + + virt_timer_forward(delta); } /* @@ -295,7 +362,7 @@ static void __add_vtimer(struct vtimer_list *timer, int periodic) } /* - * add_virt_timer - add an oneshot virtual CPU timer + * add_virt_timer - add a oneshot virtual CPU timer */ void add_virt_timer(struct vtimer_list *timer) { @@ -371,31 +438,15 @@ EXPORT_SYMBOL(del_virt_timer); /* * Start the virtual CPU timer on the current CPU. */ -void __cpuinit init_cpu_vtimer(void) +void vtime_init(void) { /* set initial cpu timer */ set_vtimer(VTIMER_MAX_SLICE); -} - -static int __cpuinit s390_nohz_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - struct s390_idle_data *idle; - long cpu = (long) hcpu; - - idle = &per_cpu(s390_idle, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DYING: - idle->nohz_delay = 0; - default: - break; + /* Setup initial MT scaling values */ + if (smp_cpu_mtid) { + __this_cpu_write(mt_scaling_jiffies, jiffies); + __this_cpu_write(mt_scaling_mult, 1); + __this_cpu_write(mt_scaling_div, 1); + stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles)); } - return NOTIFY_OK; -} - -void __init vtime_init(void) -{ - /* Enable cpu timer interrupts on the boot cpu. */ - init_cpu_vtimer(); - cpu_notifier(s390_nohz_notify, 0); } diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c new file mode 100644 index 000000000000..949fdbf0e8b6 --- /dev/null +++ b/arch/s390/kernel/wti.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for warning track interruption + * + * Copyright IBM Corp. 2023 + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/kallsyms.h> +#include <linux/smpboot.h> +#include <linux/irq.h> +#include <uapi/linux/sched/types.h> +#include <asm/debug.h> +#include <asm/diag.h> +#include <asm/sclp.h> + +#define WTI_DBF_LEN 64 + +struct wti_debug { + unsigned long missed; + unsigned long addr; + pid_t pid; +}; + +struct wti_state { + /* debug data for s390dbf */ + struct wti_debug dbg; + /* + * Represents the real-time thread responsible to + * acknowledge the warning-track interrupt and trigger + * preliminary and postliminary precautions. + */ + struct task_struct *thread; + /* + * If pending is true, the real-time thread must be scheduled. + * If not, a wake up of that thread will remain a noop. + */ + bool pending; +}; + +static DEFINE_PER_CPU(struct wti_state, wti_state); + +static debug_info_t *wti_dbg; + +/* + * During a warning-track grace period, interrupts are disabled + * to prevent delays of the warning-track acknowledgment. + * + * Once the CPU is physically dispatched again, interrupts are + * re-enabled. + */ + +static void wti_irq_disable(void) +{ + unsigned long flags; + struct ctlreg cr6; + + local_irq_save(flags); + local_ctl_store(6, &cr6); + /* disable all I/O interrupts */ + cr6.val &= ~0xff000000UL; + local_ctl_load(6, &cr6); + local_irq_restore(flags); +} + +static void wti_irq_enable(void) +{ + unsigned long flags; + struct ctlreg cr6; + + local_irq_save(flags); + local_ctl_store(6, &cr6); + /* enable all I/O interrupts */ + cr6.val |= 0xff000000UL; + local_ctl_load(6, &cr6); + local_irq_restore(flags); +} + +static void store_debug_data(struct wti_state *st) +{ + struct pt_regs *regs = get_irq_regs(); + + st->dbg.pid = current->pid; + st->dbg.addr = 0; + if (!user_mode(regs)) + st->dbg.addr = regs->psw.addr; +} + +static void wti_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct wti_state *st = this_cpu_ptr(&wti_state); + + inc_irq_stat(IRQEXT_WTI); + wti_irq_disable(); + store_debug_data(st); + st->pending = true; + wake_up_process(st->thread); +} + +static int wti_pending(unsigned int cpu) +{ + struct wti_state *st = per_cpu_ptr(&wti_state, cpu); + + return st->pending; +} + +static void wti_dbf_grace_period(struct wti_state *st) +{ + struct wti_debug *wdi = &st->dbg; + char buf[WTI_DBF_LEN]; + + if (wdi->addr) + snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr); + else + snprintf(buf, sizeof(buf), "%d <user>", wdi->pid); + debug_text_event(wti_dbg, 2, buf); + wdi->missed++; +} + +static int wti_show(struct seq_file *seq, void *v) +{ + struct wti_state *st; + int cpu; + + cpus_read_lock(); + seq_puts(seq, " "); + for_each_online_cpu(cpu) + seq_printf(seq, "CPU%-8d", cpu); + seq_putc(seq, '\n'); + for_each_online_cpu(cpu) { + st = per_cpu_ptr(&wti_state, cpu); + seq_printf(seq, " %10lu", st->dbg.missed); + } + seq_putc(seq, '\n'); + cpus_read_unlock(); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(wti); + +static void wti_thread_fn(unsigned int cpu) +{ + struct wti_state *st = per_cpu_ptr(&wti_state, cpu); + + st->pending = false; + /* + * Yield CPU voluntarily to the hypervisor. Control + * resumes when hypervisor decides to dispatch CPU + * to this LPAR again. + */ + if (diag49c(DIAG49C_SUBC_ACK)) + wti_dbf_grace_period(st); + wti_irq_enable(); +} + +static struct smp_hotplug_thread wti_threads = { + .store = &wti_state.thread, + .thread_should_run = wti_pending, + .thread_fn = wti_thread_fn, + .thread_comm = "cpuwti/%u", + .selfparking = false, +}; + +static int __init wti_init(void) +{ + struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 }; + struct dentry *wti_dir; + struct wti_state *st; + int cpu, rc; + + rc = -EOPNOTSUPP; + if (!sclp.has_wti) + goto out; + rc = smpboot_register_percpu_thread(&wti_threads); + if (WARN_ON(rc)) + goto out; + for_each_online_cpu(cpu) { + st = per_cpu_ptr(&wti_state, cpu); + sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param); + } + rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt); + if (rc) { + pr_warn("Couldn't request external interrupt 0x1007\n"); + goto out_thread; + } + irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK); + rc = diag49c(DIAG49C_SUBC_REG); + if (rc) { + pr_warn("Failed to register warning track interrupt through DIAG 49C\n"); + rc = -EOPNOTSUPP; + goto out_subclass; + } + wti_dir = debugfs_create_dir("wti", arch_debugfs_dir); + debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops); + wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN); + if (!wti_dbg) { + rc = -ENOMEM; + goto out_debug_register; + } + rc = debug_register_view(wti_dbg, &debug_hex_ascii_view); + if (rc) + goto out_debug_register; + goto out; +out_debug_register: + debug_unregister(wti_dbg); +out_subclass: + irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK); + unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt); +out_thread: + smpboot_unregister_percpu_thread(&wti_threads); +out: + return rc; +} +late_initcall(wti_init); |
