summaryrefslogtreecommitdiff
path: root/arch/s390/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/.gitignore1
-rw-r--r--arch/s390/kernel/Makefile98
-rw-r--r--arch/s390/kernel/abs_lowcore.c47
-rw-r--r--arch/s390/kernel/alternative.c90
-rw-r--r--arch/s390/kernel/asm-offsets.c316
-rw-r--r--arch/s390/kernel/audit.c29
-rw-r--r--arch/s390/kernel/audit.h15
-rw-r--r--arch/s390/kernel/base.S206
-rw-r--r--arch/s390/kernel/bitmap.c54
-rw-r--r--arch/s390/kernel/cache.c399
-rw-r--r--arch/s390/kernel/cert_store.c813
-rw-r--r--arch/s390/kernel/compat_audit.c44
-rw-r--r--arch/s390/kernel/compat_exec_domain.c29
-rw-r--r--arch/s390/kernel/compat_linux.c513
-rw-r--r--arch/s390/kernel/compat_linux.h120
-rw-r--r--arch/s390/kernel/compat_ptrace.h63
-rw-r--r--arch/s390/kernel/compat_signal.c452
-rw-r--r--arch/s390/kernel/compat_wrapper.S1414
-rw-r--r--arch/s390/kernel/cpacf.c118
-rw-r--r--arch/s390/kernel/cpcmd.c76
-rw-r--r--arch/s390/kernel/cpufeature.c52
-rw-r--r--arch/s390/kernel/crash_dump.c702
-rw-r--r--arch/s390/kernel/ctlreg.c122
-rw-r--r--arch/s390/kernel/debug.c1567
-rw-r--r--arch/s390/kernel/diag.c81
-rw-r--r--arch/s390/kernel/diag/Makefile1
-rw-r--r--arch/s390/kernel/diag/diag.c324
-rw-r--r--arch/s390/kernel/diag/diag310.c276
-rw-r--r--arch/s390/kernel/diag/diag324.c224
-rw-r--r--arch/s390/kernel/diag/diag_ioctl.h14
-rw-r--r--arch/s390/kernel/diag/diag_misc.c63
-rw-r--r--arch/s390/kernel/dis.c1994
-rw-r--r--arch/s390/kernel/dumpstack.c274
-rw-r--r--arch/s390/kernel/early.c528
-rw-r--r--arch/s390/kernel/early_printk.c44
-rw-r--r--arch/s390/kernel/ebcdic.c43
-rw-r--r--arch/s390/kernel/entry.S1357
-rw-r--r--arch/s390/kernel/entry.h97
-rw-r--r--arch/s390/kernel/entry64.S1011
-rw-r--r--arch/s390/kernel/facility.c22
-rw-r--r--arch/s390/kernel/fpu.c195
-rw-r--r--arch/s390/kernel/ftrace.c425
-rw-r--r--arch/s390/kernel/ftrace.h22
-rw-r--r--arch/s390/kernel/guarded_storage.c129
-rw-r--r--arch/s390/kernel/head.S505
-rw-r--r--arch/s390/kernel/head31.S107
-rw-r--r--arch/s390/kernel/head64.S102
-rw-r--r--arch/s390/kernel/head_kdump.S108
-rw-r--r--arch/s390/kernel/hiperdispatch.c430
-rw-r--r--arch/s390/kernel/idle.c99
-rw-r--r--arch/s390/kernel/ima_arch.c14
-rw-r--r--arch/s390/kernel/ipl.c2020
-rw-r--r--arch/s390/kernel/ipl_vmparm.c38
-rw-r--r--arch/s390/kernel/irq.c500
-rw-r--r--arch/s390/kernel/jump_label.c86
-rw-r--r--arch/s390/kernel/kdebugfs.c14
-rw-r--r--arch/s390/kernel/kexec_elf.c138
-rw-r--r--arch/s390/kernel/kexec_image.c67
-rw-r--r--arch/s390/kernel/kprobes.c566
-rw-r--r--arch/s390/kernel/lgr.c17
-rw-r--r--arch/s390/kernel/machine_kexec.c252
-rw-r--r--arch/s390/kernel/machine_kexec_file.c389
-rw-r--r--arch/s390/kernel/machine_kexec_reloc.c56
-rw-r--r--arch/s390/kernel/mcount.S218
-rw-r--r--arch/s390/kernel/mcount64.S65
-rw-r--r--arch/s390/kernel/module.c323
-rw-r--r--arch/s390/kernel/nmi.c594
-rw-r--r--arch/s390/kernel/nospec-branch.c158
-rw-r--r--arch/s390/kernel/nospec-sysfs.c23
-rw-r--r--arch/s390/kernel/numa.c28
-rw-r--r--arch/s390/kernel/os_info.c62
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c1994
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c1065
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2103
-rw-r--r--arch/s390/kernel/perf_event.c247
-rw-r--r--arch/s390/kernel/perf_pai.c1230
-rw-r--r--arch/s390/kernel/perf_regs.c63
-rw-r--r--arch/s390/kernel/pgm_check.S152
-rw-r--r--arch/s390/kernel/process.c284
-rw-r--r--arch/s390/kernel/processor.c371
-rw-r--r--arch/s390/kernel/ptrace.c1311
-rw-r--r--arch/s390/kernel/reipl.S147
-rw-r--r--arch/s390/kernel/reipl64.S155
-rw-r--r--arch/s390/kernel/relocate_kernel.S148
-rw-r--r--arch/s390/kernel/relocate_kernel64.S121
-rw-r--r--arch/s390/kernel/rethook.c34
-rw-r--r--arch/s390/kernel/rethook.h7
-rw-r--r--arch/s390/kernel/runtime_instr.c112
-rw-r--r--arch/s390/kernel/s390_ksyms.c13
-rw-r--r--arch/s390/kernel/sclp.S359
-rw-r--r--arch/s390/kernel/setup.c1234
-rw-r--r--arch/s390/kernel/signal.c450
-rw-r--r--arch/s390/kernel/skey.c48
-rw-r--r--arch/s390/kernel/smp.c1129
-rw-r--r--arch/s390/kernel/stackprotector.c156
-rw-r--r--arch/s390/kernel/stacktrace.c218
-rw-r--r--arch/s390/kernel/sthyi.c556
-rw-r--r--arch/s390/kernel/suspend.c213
-rw-r--r--arch/s390/kernel/swsusp_asm64.S309
-rw-r--r--arch/s390/kernel/sys_s390.c140
-rw-r--r--arch/s390/kernel/syscall.c138
-rw-r--r--arch/s390/kernel/syscalls.S355
-rw-r--r--arch/s390/kernel/syscalls/Makefile32
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl399
-rw-r--r--arch/s390/kernel/sysinfo.c256
-rw-r--r--arch/s390/kernel/text_amode31.S158
-rw-r--r--arch/s390/kernel/time.c1692
-rw-r--r--arch/s390/kernel/topology.c635
-rw-r--r--arch/s390/kernel/trace.c33
-rw-r--r--arch/s390/kernel/traps.c570
-rw-r--r--arch/s390/kernel/unwind_bc.c184
-rw-r--r--arch/s390/kernel/uprobes.c380
-rw-r--r--arch/s390/kernel/uv.c947
-rw-r--r--arch/s390/kernel/vdso.c395
-rw-r--r--arch/s390/kernel/vdso/.gitignore2
-rw-r--r--arch/s390/kernel/vdso/Makefile76
-rwxr-xr-xarch/s390/kernel/vdso/gen_vdso_offsets.sh15
-rw-r--r--arch/s390/kernel/vdso/getcpu.c21
-rw-r--r--arch/s390/kernel/vdso/note.S (renamed from arch/s390/kernel/vdso64/note.S)1
-rw-r--r--arch/s390/kernel/vdso/vdso.h15
-rw-r--r--arch/s390/kernel/vdso/vdso.lds.S (renamed from arch/s390/kernel/vdso64/vdso64.lds.S)67
-rw-r--r--arch/s390/kernel/vdso/vdso_generic.c19
-rw-r--r--arch/s390/kernel/vdso/vdso_user_wrapper.S52
-rw-r--r--arch/s390/kernel/vdso/vdso_wrapper.S (renamed from arch/s390/kernel/vdso64/vdso64_wrapper.S)9
-rw-r--r--arch/s390/kernel/vdso/vgetrandom-chacha.S181
-rw-r--r--arch/s390/kernel/vdso/vgetrandom.c14
-rw-r--r--arch/s390/kernel/vdso32/.gitignore1
-rw-r--r--arch/s390/kernel/vdso32/Makefile58
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S39
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S128
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S78
-rw-r--r--arch/s390/kernel/vdso32/note.S12
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S138
-rw-r--r--arch/s390/kernel/vdso32/vdso32_wrapper.S14
-rw-r--r--arch/s390/kernel/vdso64/.gitignore1
-rw-r--r--arch/s390/kernel/vdso64/Makefile58
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S44
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S125
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S56
-rw-r--r--arch/s390/kernel/vmcore_info.c24
-rw-r--r--arch/s390/kernel/vmlinux.lds.S240
-rw-r--r--arch/s390/kernel/vtime.c323
-rw-r--r--arch/s390/kernel/wti.c215
143 files changed, 24418 insertions, 19594 deletions
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
index c5f676c3c224..bbb90f92d051 100644
--- a/arch/s390/kernel/.gitignore
+++ b/arch/s390/kernel/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 4bb2a4656163..42c83d60d6fa 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -1,17 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux kernel.
#
ifdef CONFIG_FUNCTION_TRACER
-# Don't trace early setup code and tracing code
-CFLAGS_REMOVE_early.o = -pg
-CFLAGS_REMOVE_ftrace.o = -pg
+
+# Do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+
+# Do not trace early setup code
+CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_stacktrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind_bc.o = $(CC_FLAGS_FTRACE)
+
endif
+GCOV_PROFILE_early.o := n
+KCOV_INSTRUMENT_early.o := n
+UBSAN_SANITIZE_early.o := n
+KASAN_SANITIZE_ipl.o := n
+KASAN_SANITIZE_machine_kexec.o := n
+
#
# Passing null pointers is ok for smp code, since we access the lowcore here.
#
-CFLAGS_smp.o := -Wno-nonnull
+CFLAGS_smp.o := -Wno-nonnull
#
# Disable tailcall optimizations for stack / callchain walking functions
@@ -20,51 +34,51 @@ CFLAGS_smp.o := -Wno-nonnull
#
CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
+CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-#
-# Pass UTS_MACHINE for user_regset definition
-#
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+obj-y := head.o traps.o time.o process.o early.o setup.o idle.o vtime.o
+obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
+obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
+obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
+obj-y += entry.o reipl.o kdebugfs.o alternative.o skey.o
+obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
+obj-y += diag/
-CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
+always-$(KBUILD_BUILTIN) += vmlinux.lds
-obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o
-obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
-obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y += dumpstack.o
+obj-$(CONFIG_SYSFS) += nospec-sysfs.o
+CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
-obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
-obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
-obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
-
-extra-y += head.o vmlinux.lds
-extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o)
-
-obj-$(CONFIG_MODULES) += s390_ksyms.o module.o
-obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SCHED_BOOK) += topology.o
-obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o
+obj-$(CONFIG_SYSFS) += cpacf.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
-compat-obj-$(CONFIG_AUDIT) += compat_audit.o
-obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \
- compat_wrapper.o compat_exec_domain.o \
- $(compat-obj-y)
-
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
-obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
-obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-$(CONFIG_KPROBES) += mcount.o
+obj-$(CONFIG_RETHOOK) += rethook.o
+obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
+obj-$(CONFIG_UPROBES) += uprobes.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_STACKPROTECTOR) += stackprotector.o
+obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
+obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
+obj-$(CONFIG_CERT_STORE) += cert_store.o
+obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
-ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
-obj-y += runtime_instr.o cache.o
-endif
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
+obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS) += perf_pai.o
+
+obj-$(CONFIG_TRACEPOINTS) += trace.o
# vdso
-obj-$(CONFIG_64BIT) += vdso64/
-obj-$(CONFIG_32BIT) += vdso32/
-obj-$(CONFIG_COMPAT) += vdso32/
+obj-y += vdso/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..6252b7d115dd
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ unsigned long phys = __pa(lc);
+ int rc, i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+ if (rc) {
+ /*
+ * Do not unmap allocated page tables in case the
+ * allocation was not requested. In such a case the
+ * request is expected coming from an atomic context,
+ * while the unmap attempt might sleep.
+ */
+ if (alloc) {
+ for (--i; i >= 0; i--) {
+ addr -= PAGE_SIZE;
+ vmem_unmap_4k_page(addr);
+ }
+ }
+ return rc;
+ }
+ addr += PAGE_SIZE;
+ phys += PAGE_SIZE;
+ }
+ return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ int i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ vmem_unmap_4k_page(addr);
+ addr += PAGE_SIZE;
+ }
+}
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
new file mode 100644
index 000000000000..90c0e6408992
--- /dev/null
+++ b/arch/s390/kernel/alternative.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) "alt: " fmt
+#endif
+
+#include <linux/uaccess.h>
+#include <linux/printk.h>
+#include <asm/nospec-branch.h>
+#include <asm/abs_lowcore.h>
+#include <asm/alternative.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/machine.h>
+
+#ifndef a_debug
+#define a_debug pr_debug
+#endif
+
+#ifndef __kernel_va
+#define __kernel_va(x) (void *)(x)
+#endif
+
+unsigned long __bootdata_preserved(machine_features[1]);
+
+struct alt_debug {
+ unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG];
+ unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG];
+ int spec;
+};
+
+static struct alt_debug __bootdata_preserved(alt_debug);
+
+static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data)
+{
+ char oinsn[33], ninsn[33];
+ unsigned long kptr;
+ unsigned int pos;
+
+ for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++)
+ hex_byte_pack(&oinsn[2 * pos], old[pos]);
+ oinsn[2 * pos] = 0;
+ for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++)
+ hex_byte_pack(&ninsn[2 * pos], new[pos]);
+ ninsn[2 * pos] = 0;
+ kptr = (unsigned long)__kernel_va(old);
+ a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn);
+}
+
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
+{
+ struct alt_debug *d;
+ struct alt_instr *a;
+ bool debug, replace;
+ u8 *old, *new;
+
+ /*
+ * The scan order should be from start to end. A later scanned
+ * alternative code can overwrite previously scanned alternative code.
+ */
+ d = &alt_debug;
+ for (a = start; a < end; a++) {
+ if (!(a->ctx & ctx))
+ continue;
+ switch (a->type) {
+ case ALT_TYPE_FACILITY:
+ replace = test_facility(a->data);
+ debug = __test_facility(a->data, d->facilities);
+ break;
+ case ALT_TYPE_FEATURE:
+ replace = test_machine_feature(a->data);
+ debug = __test_machine_feature(a->data, d->mfeatures);
+ break;
+ case ALT_TYPE_SPEC:
+ replace = nobp_enabled();
+ debug = d->spec;
+ break;
+ default:
+ replace = false;
+ debug = false;
+ }
+ if (!replace)
+ continue;
+ old = (u8 *)&a->instr_offset + a->instr_offset;
+ new = (u8 *)&a->repl_offset + a->repl_offset;
+ if (debug)
+ alternative_dump(old, new, a->instrlen, a->type, a->data);
+ s390_kernel_write(old, new, a->instrlen);
+ }
+}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 2416138ebd3e..e1a5b5b54e4f 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -1,170 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Generate definitions needed by assembly language modules.
* This code generates raw asm output which is post-processed to extract
* and format the required data.
*/
-
-#define ASM_OFFSETS_C
+#define COMPILE_OFFSETS
#include <linux/kbuild.h>
-#include <linux/kvm_host.h>
#include <linux/sched.h>
-#include <asm/cputime.h>
-#include <asm/vdso.h>
-#include <asm/pgtable.h>
-
-/*
- * Make sure that the compiler is new enough. We want a compiler that
- * is known to work with the "Q" assembler constraint.
- */
-#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-#error Your compiler is too old; please use version 3.3.3 or newer
-#endif
+#include <linux/purgatory.h>
+#include <linux/pgtable.h>
+#include <linux/ftrace_regs.h>
+#include <asm/kvm_host_types.h>
+#include <asm/stacktrace.h>
+#include <asm/ptrace.h>
int main(void)
{
- DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
- DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
- DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
- BLANK();
- DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
+ /* task struct offsets */
+ OFFSET(__TASK_stack, task_struct, stack);
+ OFFSET(__TASK_thread, task_struct, thread);
+ OFFSET(__TASK_pid, task_struct, pid);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(__TASK_stack_canary, task_struct, stack_canary);
+#endif
BLANK();
- DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
- DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
- DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
+ /* thread struct offsets */
+ OFFSET(__THREAD_ksp, thread_struct, ksp);
BLANK();
- DEFINE(__TI_task, offsetof(struct thread_info, task));
- DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
- DEFINE(__TI_flags, offsetof(struct thread_info, flags));
- DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
- DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
- DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
- DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
- DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
- DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
+ /* thread info offsets */
+ OFFSET(__TI_flags, task_struct, thread_info.flags);
+ OFFSET(__TI_sie, task_struct, thread_info.sie);
BLANK();
- DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
- DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
- DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
- DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
- DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
- DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
- DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
+ /* pt_regs offsets */
+ OFFSET(__PT_PSW, pt_regs, psw);
+ OFFSET(__PT_GPRS, pt_regs, gprs);
+ OFFSET(__PT_R0, pt_regs, gprs[0]);
+ OFFSET(__PT_R1, pt_regs, gprs[1]);
+ OFFSET(__PT_R2, pt_regs, gprs[2]);
+ OFFSET(__PT_R3, pt_regs, gprs[3]);
+ OFFSET(__PT_R4, pt_regs, gprs[4]);
+ OFFSET(__PT_R5, pt_regs, gprs[5]);
+ OFFSET(__PT_R6, pt_regs, gprs[6]);
+ OFFSET(__PT_R7, pt_regs, gprs[7]);
+ OFFSET(__PT_R8, pt_regs, gprs[8]);
+ OFFSET(__PT_R9, pt_regs, gprs[9]);
+ OFFSET(__PT_R10, pt_regs, gprs[10]);
+ OFFSET(__PT_R11, pt_regs, gprs[11]);
+ OFFSET(__PT_R12, pt_regs, gprs[12]);
+ OFFSET(__PT_R13, pt_regs, gprs[13]);
+ OFFSET(__PT_R14, pt_regs, gprs[14]);
+ OFFSET(__PT_R15, pt_regs, gprs[15]);
+ OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+ OFFSET(__PT_INT_CODE, pt_regs, int_code);
+ OFFSET(__PT_FLAGS, pt_regs, flags);
+ OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
- DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
- DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
- DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
+ /* stack_frame offsets */
+ OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
+ OFFSET(__SF_GPRS, stack_frame, gprs);
+ OFFSET(__SF_EMPTY, stack_frame, empty[0]);
+ OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block);
+ OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
+ OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
+ OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
+ OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+ OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
+ OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq);
+ DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
- /* timeval/timezone offsets for use by vdso */
- DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count));
- DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
- DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
- DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
- DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
- DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
- DEFINE(__VDSO_NTP_MULT, offsetof(struct vdso_data, ntp_mult));
- DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
- DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
- /* constants used by the vdso */
- DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+ OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
+ DEFINE(STACK_FRAME_USER_OVERHEAD, sizeof(struct stack_frame_user));
+ OFFSET(__SFVDSO_RETURN_ADDRESS, stack_frame_vdso_wrapper, return_address);
+ DEFINE(STACK_FRAME_VDSO_OVERHEAD, sizeof(struct stack_frame_vdso_wrapper));
BLANK();
- /* idle data offsets */
- DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
- DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit));
- DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter));
- DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit));
- /* lowcore offsets */
- DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
- DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
- DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
- DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
- DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
- DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
- DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
- DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
- DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
- DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
- DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
- DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
- DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
- DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
- DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
- DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
- DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
- DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
- DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
- DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
- DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
- DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
- DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
- DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
- DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
- DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
- DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
- DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
- DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
- DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+ /* hardware defined lowcore locations 0x000 - 0x1ff */
+ OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
+ OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
+ OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
+ OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
+ OFFSET(__LC_PGM_CODE, lowcore, pgm_code);
+ OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code);
+ OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
+ OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
+ OFFSET(__LC_PER_CODE, lowcore, per_code);
+ OFFSET(__LC_PER_ATMID, lowcore, per_atmid);
+ OFFSET(__LC_PER_ADDRESS, lowcore, per_address);
+ OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id);
+ OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id);
+ OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id);
+ OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id);
+ OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code);
+ OFFSET(__LC_MON_CODE, lowcore, monitor_code);
+ OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id);
+ OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr);
+ OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm);
+ OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word);
+ OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code);
+ OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
+ OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
+ OFFSET(__LC_PGM_LAST_BREAK, lowcore, pgm_last_break);
+ OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe);
+ OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe);
+ OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
+ OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
+ OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
+ OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw);
+ OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw);
+ OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw);
+ OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw);
+ OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw);
+ OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw);
+ OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw);
+ OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
+ OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
+ /* software defined lowcore locations 0x200 - 0xdff*/
+ OFFSET(__LC_SAVE_AREA, lowcore, save_area);
+ OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
+ OFFSET(__LC_PCPU, lowcore, pcpu);
+ OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
+ OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
+ OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
+ OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer);
+ OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer);
+ OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
+ OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
+ OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
+ OFFSET(__LC_CURRENT, lowcore, current_task);
+ OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
+ OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
+ OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
+ OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
+ OFFSET(__LC_MCCK_STACK, lowcore, mcck_stack);
+ OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
+ OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
+ OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
+ OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags);
+ OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce);
+ OFFSET(__LC_USER_ASCE, lowcore, user_asce);
+ OFFSET(__LC_LPP, lowcore, lpp);
+ OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
+ OFFSET(__LC_LAST_BREAK, lowcore, last_break);
+ /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+ OFFSET(__LC_STACK_CANARY, lowcore, stack_canary);
+ OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
+ OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info);
+ OFFSET(__LC_OS_INFO, lowcore, os_info);
+ /* hardware defined lowcore locations 0x1000 - 0x18ff */
+ OFFSET(__LC_MCESAD, lowcore, mcesad);
+ OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
+ OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
+ OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
+ OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area);
+ OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area);
+ OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area);
+ OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area);
+ OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area);
+ OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area);
+ OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area);
+ OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area);
+ OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
+ OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
BLANK();
- DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
- DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
- DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
- DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
- DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
- DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
- DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
- DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
- DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
- DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
- DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
- DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer));
- DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
- DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
- DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
- DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
- DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
- DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
- DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
- DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
- DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
- DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
- DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
- DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
- DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
- DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
- DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
- DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
- DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
- DEFINE(__LC_IRB, offsetof(struct _lowcore, irb));
- DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
- BLANK();
- DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
- DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
- DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
- DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area));
- DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area));
- DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
- DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
- DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
-#ifdef CONFIG_32BIT
- DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
-#else /* CONFIG_32BIT */
- DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
- DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
- DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
- DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
- DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
- DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
- DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
- DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
- DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
- DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
- DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
- DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
-#endif /* CONFIG_32BIT */
+ /* gmap/sie offsets */
+ OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
+ OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
+ /* kexec_sha_region */
+ OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start);
+ OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len);
+ DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region));
+ /* sizeof kernel parameter area */
+ DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea));
+ /* kernel parameter area offsets */
+ DEFINE(IPL_DEVICE, PARMAREA + offsetof(struct parmarea, ipl_device));
+ DEFINE(INITRD_START, PARMAREA + offsetof(struct parmarea, initrd_start));
+ DEFINE(INITRD_SIZE, PARMAREA + offsetof(struct parmarea, initrd_size));
+ DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base));
+ DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
+ DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
+ DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+ OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
+ DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
+
+ OFFSET(__PCPU_FLAGS, pcpu, flags);
return 0;
}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
index f4932c22ebe4..7897d9411e13 100644
--- a/arch/s390/kernel/audit.c
+++ b/arch/s390/kernel/audit.c
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/types.h>
#include <linux/audit.h>
#include <asm/unistd.h>
-#include "audit.h"
static unsigned dir_class[] = {
#include <asm-generic/audit_dir_write.h>
@@ -31,42 +31,29 @@ static unsigned signal_class[] = {
int audit_classify_arch(int arch)
{
-#ifdef CONFIG_COMPAT
- if (arch == AUDIT_ARCH_S390)
- return 1;
-#endif
return 0;
}
int audit_classify_syscall(int abi, unsigned syscall)
{
-#ifdef CONFIG_COMPAT
- if (abi == AUDIT_ARCH_S390)
- return s390_classify_syscall(syscall);
-#endif
switch(syscall) {
case __NR_open:
- return 2;
+ return AUDITSC_OPEN;
case __NR_openat:
- return 3;
+ return AUDITSC_OPENAT;
case __NR_socketcall:
- return 4;
+ return AUDITSC_SOCKETCALL;
case __NR_execve:
- return 5;
+ return AUDITSC_EXECVE;
+ case __NR_openat2:
+ return AUDITSC_OPENAT2;
default:
- return 0;
+ return AUDITSC_NATIVE;
}
}
static int __init audit_classes_init(void)
{
-#ifdef CONFIG_COMPAT
- audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class);
- audit_register_class(AUDIT_CLASS_READ_32, s390_read_class);
- audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class);
- audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class);
- audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class);
-#endif
audit_register_class(AUDIT_CLASS_WRITE, write_class);
audit_register_class(AUDIT_CLASS_READ, read_class);
audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h
deleted file mode 100644
index 12b56f4b5a73..000000000000
--- a/arch/s390/kernel/audit.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __ARCH_S390_KERNEL_AUDIT_H
-#define __ARCH_S390_KERNEL_AUDIT_H
-
-#include <linux/types.h>
-
-#ifdef CONFIG_COMPAT
-extern int s390_classify_syscall(unsigned);
-extern __u32 s390_dir_class[];
-extern __u32 s390_write_class[];
-extern __u32 s390_read_class[];
-extern __u32 s390_chattr_class[];
-extern __u32 s390_signal_class[];
-#endif /* CONFIG_COMPAT */
-
-#endif /* __ARCH_S390_KERNEL_AUDIT_H */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
deleted file mode 100644
index 797a823a2275..000000000000
--- a/arch/s390/kernel/base.S
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * arch/s390/kernel/base.S
- *
- * Copyright IBM Corp. 2006, 2007
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- * Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/sigp.h>
-
-#ifdef CONFIG_64BIT
-
-ENTRY(s390_base_mcck_handler)
- basr %r13,0
-0: lg %r15,__LC_PANIC_STACK # load panic stack
- aghi %r15,-STACK_FRAME_OVERHEAD
- larl %r1,s390_base_mcck_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
- jz 1f
- basr %r14,%r1
-1: la %r1,4095
- lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
- lpswe __LC_MCK_OLD_PSW
-
- .section .bss
- .align 8
- .globl s390_base_mcck_handler_fn
-s390_base_mcck_handler_fn:
- .quad 0
- .previous
-
-ENTRY(s390_base_ext_handler)
- stmg %r0,%r15,__LC_SAVE_AREA_ASYNC
- basr %r13,0
-0: aghi %r15,-STACK_FRAME_OVERHEAD
- larl %r1,s390_base_ext_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
- jz 1f
- basr %r14,%r1
-1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
- ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
- lpswe __LC_EXT_OLD_PSW
-
- .section .bss
- .align 8
- .globl s390_base_ext_handler_fn
-s390_base_ext_handler_fn:
- .quad 0
- .previous
-
-ENTRY(s390_base_pgm_handler)
- stmg %r0,%r15,__LC_SAVE_AREA_SYNC
- basr %r13,0
-0: aghi %r15,-STACK_FRAME_OVERHEAD
- larl %r1,s390_base_pgm_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
- jz 1f
- basr %r14,%r1
- lmg %r0,%r15,__LC_SAVE_AREA_SYNC
- lpswe __LC_PGM_OLD_PSW
-1: lpswe disabled_wait_psw-0b(%r13)
-
- .align 8
-disabled_wait_psw:
- .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler
-
- .section .bss
- .align 8
- .globl s390_base_pgm_handler_fn
-s390_base_pgm_handler_fn:
- .quad 0
- .previous
-
-#
-# Calls diag 308 subcode 1 and continues execution
-#
-# The following conditions must be ensured before calling this function:
-# * Prefix register = 0
-# * Lowcore protection is disabled
-#
-ENTRY(diag308_reset)
- larl %r4,.Lctlregs # Save control registers
- stctg %c0,%c15,0(%r4)
- larl %r4,.Lfpctl # Floating point control register
- stfpc 0(%r4)
- larl %r4,.Lcontinue_psw # Save PSW flags
- epsw %r2,%r3
- stm %r2,%r3,0(%r4)
- larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0
- lghi %r3,0
- lg %r4,0(%r4) # Save PSW
- sturg %r4,%r3 # Use sturg, because of large pages
- lghi %r1,1
- diag %r1,%r1,0x308
-.Lrestart_part2:
- lhi %r0,0 # Load r0 with zero
- lhi %r1,2 # Use mode 2 = ESAME (dump)
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
- sam64 # Switch to 64 bit addressing mode
- larl %r4,.Lctlregs # Restore control registers
- lctlg %c0,%c15,0(%r4)
- larl %r4,.Lfpctl # Restore floating point ctl register
- lfpc 0(%r4)
- larl %r4,.Lcontinue_psw # Restore PSW flags
- lpswe 0(%r4)
-.Lcontinue:
- br %r14
-.align 16
-.Lrestart_psw:
- .long 0x00080000,0x80000000 + .Lrestart_part2
-
- .section .data..nosave,"aw",@progbits
-.align 8
-.Lcontinue_psw:
- .quad 0,.Lcontinue
- .previous
-
- .section .bss
-.align 8
-.Lctlregs:
- .rept 16
- .quad 0
- .endr
-.Lfpctl:
- .long 0
- .previous
-
-#else /* CONFIG_64BIT */
-
-ENTRY(s390_base_mcck_handler)
- basr %r13,0
-0: l %r15,__LC_PANIC_STACK # load panic stack
- ahi %r15,-STACK_FRAME_OVERHEAD
- l %r1,2f-0b(%r13)
- l %r1,0(%r1)
- ltr %r1,%r1
- jz 1f
- basr %r14,%r1
-1: lm %r0,%r15,__LC_GPREGS_SAVE_AREA
- lpsw __LC_MCK_OLD_PSW
-
-2: .long s390_base_mcck_handler_fn
-
- .section .bss
- .align 4
- .globl s390_base_mcck_handler_fn
-s390_base_mcck_handler_fn:
- .long 0
- .previous
-
-ENTRY(s390_base_ext_handler)
- stm %r0,%r15,__LC_SAVE_AREA_ASYNC
- basr %r13,0
-0: ahi %r15,-STACK_FRAME_OVERHEAD
- l %r1,2f-0b(%r13)
- l %r1,0(%r1)
- ltr %r1,%r1
- jz 1f
- basr %r14,%r1
-1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC
- ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
- lpsw __LC_EXT_OLD_PSW
-
-2: .long s390_base_ext_handler_fn
-
- .section .bss
- .align 4
- .globl s390_base_ext_handler_fn
-s390_base_ext_handler_fn:
- .long 0
- .previous
-
-ENTRY(s390_base_pgm_handler)
- stm %r0,%r15,__LC_SAVE_AREA_SYNC
- basr %r13,0
-0: ahi %r15,-STACK_FRAME_OVERHEAD
- l %r1,2f-0b(%r13)
- l %r1,0(%r1)
- ltr %r1,%r1
- jz 1f
- basr %r14,%r1
- lm %r0,%r15,__LC_SAVE_AREA_SYNC
- lpsw __LC_PGM_OLD_PSW
-
-1: lpsw disabled_wait_psw-0b(%r13)
-
-2: .long s390_base_pgm_handler_fn
-
-disabled_wait_psw:
- .align 8
- .long 0x000a0000,0x00000000 + s390_base_pgm_handler
-
- .section .bss
- .align 4
- .globl s390_base_pgm_handler_fn
-s390_base_pgm_handler_fn:
- .long 0
- .previous
-
-#endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/bitmap.c b/arch/s390/kernel/bitmap.c
deleted file mode 100644
index 102da5e23037..000000000000
--- a/arch/s390/kernel/bitmap.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Bitmaps for set_bit, clear_bit, test_and_set_bit, ...
- * See include/asm/{bitops.h|posix_types.h} for details
- *
- * Copyright IBM Corp. 1999, 2009
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
- */
-
-#include <linux/bitops.h>
-#include <linux/module.h>
-
-const char _oi_bitmap[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
-EXPORT_SYMBOL(_oi_bitmap);
-
-const char _ni_bitmap[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f };
-EXPORT_SYMBOL(_ni_bitmap);
-
-const char _zb_findmap[] = {
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 };
-EXPORT_SYMBOL(_zb_findmap);
-
-const char _sb_findmap[] = {
- 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 };
-EXPORT_SYMBOL(_sb_findmap);
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 64b24650e4f8..4f2669030220 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -1,41 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Extract CPU cache information and expose them via sysfs.
*
* Copyright IBM Corp. 2012
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#include <linux/notifier.h>
#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/cacheinfo.h>
#include <asm/facility.h>
-struct cache {
- unsigned long size;
- unsigned int line_size;
- unsigned int associativity;
- unsigned int nr_sets;
- unsigned int level : 3;
- unsigned int type : 2;
- unsigned int private : 1;
- struct list_head list;
-};
-
-struct cache_dir {
- struct kobject *kobj;
- struct cache_index_dir *index;
-};
-
-struct cache_index_dir {
- struct kobject kobj;
- int cpu;
- struct cache *cache;
- struct cache_index_dir *next;
-};
-
enum {
CACHE_SCOPE_NOTEXISTS,
CACHE_SCOPE_PRIVATE,
@@ -44,10 +18,10 @@ enum {
};
enum {
- CACHE_TYPE_SEPARATE,
- CACHE_TYPE_DATA,
- CACHE_TYPE_INSTRUCTION,
- CACHE_TYPE_UNIFIED,
+ CTYPE_SEPARATE,
+ CTYPE_DATA,
+ CTYPE_INSTRUCTION,
+ CTYPE_UNIFIED,
};
enum {
@@ -70,319 +44,128 @@ struct cache_info {
};
#define CACHE_MAX_LEVEL 8
-
union cache_topology {
struct cache_info ci[CACHE_MAX_LEVEL];
- unsigned long long raw;
+ unsigned long raw;
};
static const char * const cache_type_string[] = {
- "Data",
+ "",
"Instruction",
+ "Data",
+ "",
"Unified",
};
-static struct cache_dir *cache_dir_cpu[NR_CPUS];
-static LIST_HEAD(cache_list);
+static const enum cache_type cache_type_map[] = {
+ [CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE,
+ [CTYPE_DATA] = CACHE_TYPE_DATA,
+ [CTYPE_INSTRUCTION] = CACHE_TYPE_INST,
+ [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
void show_cacheinfo(struct seq_file *m)
{
- struct cache *cache;
- int index = 0;
+ struct cpu_cacheinfo *this_cpu_ci;
+ struct cacheinfo *cache;
+ int idx;
- list_for_each_entry(cache, &cache_list, list) {
- seq_printf(m, "cache%-11d: ", index);
+ this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
+ for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+ cache = this_cpu_ci->info_list + idx;
+ seq_printf(m, "cache%-11d: ", idx);
seq_printf(m, "level=%d ", cache->level);
seq_printf(m, "type=%s ", cache_type_string[cache->type]);
- seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared");
- seq_printf(m, "size=%luK ", cache->size >> 10);
- seq_printf(m, "line_size=%u ", cache->line_size);
- seq_printf(m, "associativity=%d", cache->associativity);
+ seq_printf(m, "scope=%s ",
+ cache->disable_sysfs ? "Shared" : "Private");
+ seq_printf(m, "size=%dK ", cache->size >> 10);
+ seq_printf(m, "line_size=%u ", cache->coherency_line_size);
+ seq_printf(m, "associativity=%d", cache->ways_of_associativity);
seq_puts(m, "\n");
- index++;
}
}
-static inline unsigned long ecag(int ai, int li, int ti)
+static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
{
- unsigned long cmd, val;
+ if (level >= CACHE_MAX_LEVEL)
+ return CACHE_TYPE_NOCACHE;
+ ci += level;
+ if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
+ return CACHE_TYPE_NOCACHE;
+ return cache_type_map[ci->type];
+}
- cmd = ai << 4 | li << 1 | ti;
- asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
- : "=d" (val) : "a" (cmd));
- return val;
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+ return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
}
-static int __init cache_add(int level, int private, int type)
+static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
+ enum cache_type type, unsigned int level, int cpu)
{
- struct cache *cache;
- int ti;
+ int ti, num_sets;
- cache = kzalloc(sizeof(*cache), GFP_KERNEL);
- if (!cache)
- return -ENOMEM;
- if (type == CACHE_TYPE_INSTRUCTION)
+ if (type == CACHE_TYPE_INST)
ti = CACHE_TI_INSTRUCTION;
else
ti = CACHE_TI_UNIFIED;
- cache->size = ecag(EXTRACT_SIZE, level, ti);
- cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
- cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
- cache->nr_sets = cache->size / cache->associativity;
- cache->nr_sets /= cache->line_size;
- cache->private = private;
- cache->level = level + 1;
- cache->type = type - 1;
- list_add_tail(&cache->list, &cache_list);
- return 0;
-}
-
-static void __init cache_build_info(void)
-{
- struct cache *cache, *next;
+ this_leaf->level = level + 1;
+ this_leaf->type = type;
+ this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+ this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+ this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
+ num_sets = this_leaf->size / this_leaf->coherency_line_size;
+ num_sets /= this_leaf->ways_of_associativity;
+ this_leaf->number_of_sets = num_sets;
+ cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+ if (!private)
+ this_leaf->disable_sysfs = true;
+}
+
+int init_cache_level(unsigned int cpu)
+{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ unsigned int level = 0, leaves = 0;
union cache_topology ct;
- int level, private, rc;
+ enum cache_type ctype;
+ if (!this_cpu_ci)
+ return -EINVAL;
ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
- for (level = 0; level < CACHE_MAX_LEVEL; level++) {
- switch (ct.ci[level].scope) {
- case CACHE_SCOPE_NOTEXISTS:
- case CACHE_SCOPE_RESERVED:
- return;
- case CACHE_SCOPE_SHARED:
- private = 0;
- break;
- case CACHE_SCOPE_PRIVATE:
- private = 1;
+ do {
+ ctype = get_cache_type(&ct.ci[0], level);
+ if (ctype == CACHE_TYPE_NOCACHE)
break;
- }
- if (ct.ci[level].type == CACHE_TYPE_SEPARATE) {
- rc = cache_add(level, private, CACHE_TYPE_DATA);
- rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION);
- } else {
- rc = cache_add(level, private, ct.ci[level].type);
- }
- if (rc)
- goto error;
- }
- return;
-error:
- list_for_each_entry_safe(cache, next, &cache_list, list) {
- list_del(&cache->list);
- kfree(cache);
- }
-}
-
-static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu)
-{
- struct cache_dir *cache_dir;
- struct kobject *kobj = NULL;
- struct device *dev;
-
- dev = get_cpu_device(cpu);
- if (!dev)
- goto out;
- kobj = kobject_create_and_add("cache", &dev->kobj);
- if (!kobj)
- goto out;
- cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
- if (!cache_dir)
- goto out;
- cache_dir->kobj = kobj;
- cache_dir_cpu[cpu] = cache_dir;
- return cache_dir;
-out:
- kobject_put(kobj);
- return NULL;
-}
-
-static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj)
-{
- return container_of(kobj, struct cache_index_dir, kobj);
-}
-
-static void cache_index_release(struct kobject *kobj)
-{
- struct cache_index_dir *index;
-
- index = kobj_to_cache_index_dir(kobj);
- kfree(index);
-}
-
-static ssize_t cache_index_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct kobj_attribute *kobj_attr;
-
- kobj_attr = container_of(attr, struct kobj_attribute, attr);
- return kobj_attr->show(kobj, kobj_attr, buf);
-}
-
-#define DEFINE_CACHE_ATTR(_name, _format, _value) \
-static ssize_t cache_##_name##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- char *buf) \
-{ \
- struct cache_index_dir *index; \
- \
- index = kobj_to_cache_index_dir(kobj); \
- return sprintf(buf, _format, _value); \
-} \
-static struct kobj_attribute cache_##_name##_attr = \
- __ATTR(_name, 0444, cache_##_name##_show, NULL);
-
-DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10);
-DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size);
-DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets);
-DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity);
-DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]);
-DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level);
-
-static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf)
-{
- struct cache_index_dir *index;
- int len;
-
- index = kobj_to_cache_index_dir(kobj);
- len = type ?
- cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) :
- cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu));
- len += sprintf(&buf[len], "\n");
- return len;
-}
-
-static ssize_t shared_cpu_map_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return shared_cpu_map_func(kobj, 0, buf);
-}
-static struct kobj_attribute cache_shared_cpu_map_attr =
- __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
-
-static ssize_t shared_cpu_list_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return shared_cpu_map_func(kobj, 1, buf);
-}
-static struct kobj_attribute cache_shared_cpu_list_attr =
- __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
-
-static struct attribute *cache_index_default_attrs[] = {
- &cache_type_attr.attr,
- &cache_size_attr.attr,
- &cache_number_of_sets_attr.attr,
- &cache_ways_of_associativity_attr.attr,
- &cache_level_attr.attr,
- &cache_coherency_line_size_attr.attr,
- &cache_shared_cpu_map_attr.attr,
- &cache_shared_cpu_list_attr.attr,
- NULL,
-};
-
-static const struct sysfs_ops cache_index_ops = {
- .show = cache_index_show,
-};
-
-static struct kobj_type cache_index_type = {
- .sysfs_ops = &cache_index_ops,
- .release = cache_index_release,
- .default_attrs = cache_index_default_attrs,
-};
-
-static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir,
- struct cache *cache, int index,
- int cpu)
-{
- struct cache_index_dir *index_dir;
- int rc;
-
- index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
- if (!index_dir)
- return -ENOMEM;
- index_dir->cache = cache;
- index_dir->cpu = cpu;
- rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
- cache_dir->kobj, "index%d", index);
- if (rc)
- goto out;
- index_dir->next = cache_dir->index;
- cache_dir->index = index_dir;
+ /* Separate instruction and data caches */
+ leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
+ } while (++level < CACHE_MAX_LEVEL);
+ this_cpu_ci->num_levels = level;
+ this_cpu_ci->num_leaves = leaves;
return 0;
-out:
- kfree(index_dir);
- return rc;
}
-static int __cpuinit cache_add_cpu(int cpu)
+int populate_cache_leaves(unsigned int cpu)
{
- struct cache_dir *cache_dir;
- struct cache *cache;
- int rc, index = 0;
-
- if (list_empty(&cache_list))
- return 0;
- cache_dir = cache_create_cache_dir(cpu);
- if (!cache_dir)
- return -ENOMEM;
- list_for_each_entry(cache, &cache_list, list) {
- if (!cache->private)
- break;
- rc = cache_create_index_dir(cache_dir, cache, index, cpu);
- if (rc)
- return rc;
- index++;
- }
- return 0;
-}
-
-static void __cpuinit cache_remove_cpu(int cpu)
-{
- struct cache_index_dir *index, *next;
- struct cache_dir *cache_dir;
-
- cache_dir = cache_dir_cpu[cpu];
- if (!cache_dir)
- return;
- index = cache_dir->index;
- while (index) {
- next = index->next;
- kobject_put(&index->kobj);
- index = next;
- }
- kobject_put(cache_dir->kobj);
- kfree(cache_dir);
- cache_dir_cpu[cpu] = NULL;
-}
-
-static int __cpuinit cache_hotplug(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
- int rc = 0;
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ unsigned int level, idx, pvt;
+ union cache_topology ct;
+ enum cache_type ctype;
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- rc = cache_add_cpu(cpu);
- if (rc)
- cache_remove_cpu(cpu);
- break;
- case CPU_DEAD:
- cache_remove_cpu(cpu);
- break;
+ ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+ for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
+ idx < this_cpu_ci->num_leaves; idx++, level++) {
+ if (!this_leaf)
+ return -EINVAL;
+ pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
+ ctype = get_cache_type(&ct.ci[0], level);
+ if (ctype == CACHE_TYPE_SEPARATE) {
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
+ } else {
+ ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
+ }
}
- return rc ? NOTIFY_BAD : NOTIFY_OK;
-}
-
-static int __init cache_init(void)
-{
- int cpu;
-
- if (!test_facility(34))
- return 0;
- cache_build_info();
- for_each_online_cpu(cpu)
- cache_add_cpu(cpu);
- hotcpu_notifier(cache_hotplug, 0);
+ this_cpu_ci->cpu_map_populated = true;
return 0;
}
-device_initcall(cache_init);
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
new file mode 100644
index 000000000000..c217a5e64094
--- /dev/null
+++ b/arch/s390/kernel/cert_store.c
@@ -0,0 +1,813 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DIAG 0x320 support and certificate store handling
+ *
+ * Copyright IBM Corp. 2023
+ * Author(s): Anastasia Eskova <anastasia.eskova@ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key-type.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+#include <crypto/sha2.h>
+#include <keys/user-type.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+
+#define DIAG_MAX_RETRIES 10
+
+#define VCE_FLAGS_VALID_MASK 0x80
+
+#define ISM_LEN_DWORDS 4
+#define VCSSB_LEN_BYTES 128
+#define VCSSB_LEN_NO_CERTS 4
+#define VCB_LEN_NO_CERTS 64
+#define VC_NAME_LEN_BYTES 64
+
+#define CERT_STORE_KEY_TYPE_NAME "cert_store_key"
+#define CERT_STORE_KEYRING_NAME "cert_store"
+
+static debug_info_t *cert_store_dbf;
+static debug_info_t *cert_store_hexdump;
+
+#define pr_dbf_msg(fmt, ...) \
+ debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__)
+
+enum diag320_subcode {
+ DIAG320_SUBCODES = 0,
+ DIAG320_STORAGE = 1,
+ DIAG320_CERT_BLOCK = 2,
+};
+
+enum diag320_rc {
+ DIAG320_RC_OK = 0x0001,
+ DIAG320_RC_CS_NOMATCH = 0x0306,
+};
+
+/* Verification Certificates Store Support Block (VCSSB). */
+struct vcssb {
+ u32 vcssb_length;
+ u8 pad_0x04[3];
+ u8 version;
+ u8 pad_0x08[8];
+ u32 cs_token;
+ u8 pad_0x14[12];
+ u16 total_vc_index_count;
+ u16 max_vc_index_count;
+ u8 pad_0x24[28];
+ u32 max_vce_length;
+ u32 max_vcxe_length;
+ u8 pad_0x48[8];
+ u32 max_single_vcb_length;
+ u32 total_vcb_length;
+ u32 max_single_vcxb_length;
+ u32 total_vcxb_length;
+ u8 pad_0x60[32];
+} __packed __aligned(8);
+
+/* Verification Certificate Entry (VCE) Header. */
+struct vce_header {
+ u32 vce_length;
+ u8 flags;
+ u8 key_type;
+ u16 vc_index;
+ u8 vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */
+ u8 vc_format;
+ u8 pad_0x49;
+ u16 key_id_length;
+ u8 pad_0x4c;
+ u8 vc_hash_type;
+ u16 vc_hash_length;
+ u8 pad_0x50[4];
+ u32 vc_length;
+ u8 pad_0x58[8];
+ u16 vc_hash_offset;
+ u16 vc_offset;
+ u8 pad_0x64[28];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB) Header. */
+struct vcb_header {
+ u32 vcb_input_length;
+ u8 pad_0x04[4];
+ u16 first_vc_index;
+ u16 last_vc_index;
+ u32 pad_0x0c;
+ u32 cs_token;
+ u8 pad_0x14[12];
+ u32 vcb_output_length;
+ u8 pad_0x24[3];
+ u8 version;
+ u16 stored_vc_count;
+ u16 remaining_vc_count;
+ u8 pad_0x2c[20];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB). */
+struct vcb {
+ struct vcb_header vcb_hdr;
+ u8 vcb_buf[];
+} __packed __aligned(4);
+
+/* Verification Certificate Entry (VCE). */
+struct vce {
+ struct vce_header vce_hdr;
+ u8 cert_data_buf[];
+} __packed __aligned(4);
+
+static void cert_store_key_describe(const struct key *key, struct seq_file *m)
+{
+ char ascii[VC_NAME_LEN_BYTES + 1];
+
+ /*
+ * First 64 bytes of the key description is key name in EBCDIC CP 500.
+ * Convert it to ASCII for displaying in /proc/keys.
+ */
+ strscpy(ascii, key->description);
+ EBCASC_500(ascii, VC_NAME_LEN_BYTES);
+ seq_puts(m, ascii);
+
+ seq_puts(m, &key->description[VC_NAME_LEN_BYTES]);
+ if (key_is_positive(key))
+ seq_printf(m, ": %u", key->datalen);
+}
+
+/*
+ * Certificate store key type takes over properties of
+ * user key but cannot be updated.
+ */
+static struct key_type key_type_cert_store_key = {
+ .name = CERT_STORE_KEY_TYPE_NAME,
+ .preparse = user_preparse,
+ .free_preparse = user_free_preparse,
+ .instantiate = generic_key_instantiate,
+ .revoke = user_revoke,
+ .destroy = user_destroy,
+ .describe = cert_store_key_describe,
+ .read = user_read,
+};
+
+/* Logging functions. */
+static void pr_dbf_vcb(const struct vcb *b)
+{
+ pr_dbf_msg("VCB Header:");
+ pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length);
+ pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index);
+ pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index);
+ pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token);
+ pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length);
+ pr_dbf_msg("version: %d", b->vcb_hdr.version);
+ pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count);
+ pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count);
+}
+
+static void pr_dbf_vce(const struct vce *e)
+{
+ unsigned char vc_name[VC_NAME_LEN_BYTES + 1];
+ char log_string[VC_NAME_LEN_BYTES + 40];
+
+ pr_dbf_msg("VCE Header:");
+ pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length);
+ pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags);
+ pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type);
+ pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index);
+ pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format);
+ pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length);
+ pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type);
+ pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length);
+ pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset);
+ pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length);
+ pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset);
+
+ /* Certificate name in ASCII. */
+ memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES);
+ EBCASC_500(vc_name, VC_NAME_LEN_BYTES);
+ vc_name[VC_NAME_LEN_BYTES] = '\0';
+
+ snprintf(log_string, sizeof(log_string),
+ "index: %d vce_hdr.vc_name (ASCII): %s",
+ e->vce_hdr.vc_index, vc_name);
+ debug_text_event(cert_store_hexdump, 3, log_string);
+
+ /* Certificate data. */
+ debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start");
+ debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128);
+ debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end");
+ debug_event(cert_store_hexdump, 3,
+ (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128);
+}
+
+static void pr_dbf_vcssb(const struct vcssb *s)
+{
+ debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1");
+ debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES);
+
+ pr_dbf_msg("VCSSB:");
+ pr_dbf_msg("vcssb_length: %u", s->vcssb_length);
+ pr_dbf_msg("version: %u", s->version);
+ pr_dbf_msg("cs_token: %u", s->cs_token);
+ pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count);
+ pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count);
+ pr_dbf_msg("max_vce_length: %u", s->max_vce_length);
+ pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length);
+ pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length);
+ pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length);
+ pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length);
+ pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length);
+}
+
+static int __diag320(unsigned long subcode, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr, };
+
+ asm_inline volatile(
+ " diag %[rp],%[subcode],0x320\n"
+ "0: nopr %%r7\n"
+ EX_TABLE(0b, 0b)
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "cc", "memory");
+
+ return rp.odd;
+}
+
+static int diag320(unsigned long subcode, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X320);
+
+ return __diag320(subcode, addr);
+}
+
+/*
+ * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in
+ * VCE. Return -EINVAL if hashes don't match.
+ */
+static int check_certificate_hash(const struct vce *vce)
+{
+ u8 hash[SHA256_DIGEST_SIZE];
+ u16 vc_hash_length;
+ u8 *vce_hash;
+
+ vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset;
+ vc_hash_length = vce->vce_hdr.vc_hash_length;
+ sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash);
+ if (memcmp(vce_hash, hash, vc_hash_length) == 0)
+ return 0;
+
+ pr_dbf_msg("SHA256 hash of received certificate does not match");
+ debug_text_event(cert_store_hexdump, 3, "VCE hash:");
+ debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE);
+ debug_text_event(cert_store_hexdump, 3, "Calculated hash:");
+ debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE);
+
+ return -EINVAL;
+}
+
+static int check_certificate_valid(const struct vce *vce)
+{
+ if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) {
+ pr_dbf_msg("Certificate entry is invalid");
+ return -EINVAL;
+ }
+ if (vce->vce_hdr.vc_format != 1) {
+ pr_dbf_msg("Certificate format is not supported");
+ return -EINVAL;
+ }
+ if (vce->vce_hdr.vc_hash_type != 1) {
+ pr_dbf_msg("Hash type is not supported");
+ return -EINVAL;
+ }
+
+ return check_certificate_hash(vce);
+}
+
+static struct key *get_user_session_keyring(void)
+{
+ key_ref_t us_keyring_ref;
+
+ us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING,
+ KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+ if (IS_ERR(us_keyring_ref)) {
+ pr_dbf_msg("Couldn't get user session keyring: %ld",
+ PTR_ERR(us_keyring_ref));
+ return ERR_PTR(-ENOKEY);
+ }
+ key_ref_put(us_keyring_ref);
+ return key_ref_to_ptr(us_keyring_ref);
+}
+
+/* Invalidate all keys from cert_store keyring. */
+static int invalidate_keyring_keys(struct key *keyring)
+{
+ unsigned long num_keys, key_index;
+ size_t keyring_payload_len;
+ key_serial_t *key_array;
+ struct key *current_key;
+ int rc;
+
+ keyring_payload_len = key_type_keyring.read(keyring, NULL, 0);
+ num_keys = keyring_payload_len / sizeof(key_serial_t);
+ key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL);
+ if (!key_array)
+ return -ENOMEM;
+
+ rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len);
+ if (rc != keyring_payload_len) {
+ pr_dbf_msg("Couldn't read keyring payload");
+ goto out;
+ }
+
+ for (key_index = 0; key_index < num_keys; key_index++) {
+ current_key = key_lookup(key_array[key_index]);
+ pr_dbf_msg("Invalidating key %08x", current_key->serial);
+
+ key_invalidate(current_key);
+ key_put(current_key);
+ rc = key_unlink(keyring, current_key);
+ if (rc) {
+ pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc);
+ break;
+ }
+ }
+out:
+ kfree(key_array);
+ return rc;
+}
+
+static struct key *find_cs_keyring(void)
+{
+ key_ref_t cs_keyring_ref;
+ struct key *cs_keyring;
+
+ cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true),
+ &key_type_keyring, CERT_STORE_KEYRING_NAME,
+ false);
+ if (!IS_ERR(cs_keyring_ref)) {
+ cs_keyring = key_ref_to_ptr(cs_keyring_ref);
+ key_ref_put(cs_keyring_ref);
+ goto found;
+ }
+ /* Search default locations: thread, process, session keyrings */
+ cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL);
+ if (IS_ERR(cs_keyring))
+ return NULL;
+ key_put(cs_keyring);
+found:
+ return cs_keyring;
+}
+
+static void cleanup_cs_keys(void)
+{
+ struct key *cs_keyring;
+
+ cs_keyring = find_cs_keyring();
+ if (!cs_keyring)
+ return;
+
+ pr_dbf_msg("Found cert_store keyring. Purging...");
+ /*
+ * Remove cert_store_key_type in case invalidation
+ * of old cert_store keys failed (= severe error).
+ */
+ if (invalidate_keyring_keys(cs_keyring))
+ unregister_key_type(&key_type_cert_store_key);
+
+ keyring_clear(cs_keyring);
+ key_invalidate(cs_keyring);
+ key_put(cs_keyring);
+ key_unlink(get_user_session_keyring(), cs_keyring);
+}
+
+static struct key *create_cs_keyring(void)
+{
+ static struct key *cs_keyring;
+
+ /* Cleanup previous cs_keyring and all associated keys if any. */
+ cleanup_cs_keys();
+ cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID,
+ GLOBAL_ROOT_GID, current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP,
+ NULL, get_user_session_keyring());
+ if (IS_ERR(cs_keyring)) {
+ pr_dbf_msg("Can't allocate cert_store keyring");
+ return NULL;
+ }
+
+ pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial);
+
+ /*
+ * In case a previous clean-up ran into an
+ * error and unregistered key type.
+ */
+ register_key_type(&key_type_cert_store_key);
+
+ return cs_keyring;
+}
+
+/*
+ * Allocate memory and create key description in format
+ * [key name in EBCDIC]:[VCE index]:[CS token].
+ * Return a pointer to key description or NULL if memory
+ * allocation failed. Memory should be freed by caller.
+ */
+static char *get_key_description(struct vcssb *vcssb, const struct vce *vce)
+{
+ size_t len, name_len;
+ u32 cs_token;
+ char *desc;
+
+ cs_token = vcssb->cs_token;
+ /* Description string contains "%64s:%05u:%010u\0". */
+ name_len = sizeof(vce->vce_hdr.vc_name);
+ len = name_len + 1 + 5 + 1 + 10 + 1;
+ desc = kmalloc(len, GFP_KERNEL);
+ if (!desc)
+ return NULL;
+
+ memcpy(desc, vce->vce_hdr.vc_name, name_len);
+ snprintf(desc + name_len, len - name_len, ":%05u:%010u",
+ vce->vce_hdr.vc_index, cs_token);
+
+ return desc;
+}
+
+/*
+ * Create a key of type "cert_store_key" using the data from VCE for key
+ * payload and key description. Link the key to "cert_store" keyring.
+ */
+static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce,
+ struct key *keyring)
+{
+ key_ref_t newkey;
+ char *desc;
+ int rc;
+
+ desc = get_key_description(vcssb, vce);
+ if (!desc)
+ return -ENOMEM;
+
+ newkey = key_create_or_update(
+ make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME,
+ desc, (u8 *)vce + vce->vce_hdr.vc_offset,
+ vce->vce_hdr.vc_length,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_NOT_IN_QUOTA);
+
+ rc = PTR_ERR_OR_ZERO(newkey);
+ if (rc) {
+ pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc);
+ rc = -ENOKEY;
+ goto out;
+ }
+
+ key_ref_put(newkey);
+out:
+ kfree(desc);
+ return rc;
+}
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */
+static int get_vcssb(struct vcssb *vcssb)
+{
+ int diag320_rc;
+
+ memset(vcssb, 0, sizeof(*vcssb));
+ vcssb->vcssb_length = VCSSB_LEN_BYTES;
+ diag320_rc = diag320(DIAG320_STORAGE, vcssb);
+ pr_dbf_vcssb(vcssb);
+
+ if (diag320_rc != DIAG320_RC_OK) {
+ pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc);
+ return -EIO;
+ }
+ if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) {
+ pr_dbf_msg("No certificates available for current configuration");
+ return -ENOKEY;
+ }
+
+ return 0;
+}
+
+static u32 get_4k_mult_vcb_size(struct vcssb *vcssb)
+{
+ return round_up(vcssb->max_single_vcb_length, PAGE_SIZE);
+}
+
+/* Fill input fields of single-entry VCB that will be read by LPAR. */
+static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index)
+{
+ memset(vcb, 0, sizeof(*vcb));
+ vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb);
+ vcb->vcb_hdr.cs_token = vcssb->cs_token;
+
+ /* Request single entry. */
+ vcb->vcb_hdr.first_vc_index = index;
+ vcb->vcb_hdr.last_vc_index = index;
+}
+
+static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce)
+{
+ struct vce *extracted_vce;
+
+ extracted_vce = (struct vce *)vcb->vcb_buf;
+ memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length);
+ pr_dbf_vce(vce);
+}
+
+static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb)
+{
+ int rc, diag320_rc;
+
+ fill_vcb_input(vcssb, vcb, index);
+
+ diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb);
+ pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc);
+ pr_dbf_vcb(vcb);
+
+ switch (diag320_rc) {
+ case DIAG320_RC_OK:
+ rc = 0;
+ if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) {
+ pr_dbf_msg("No certificate entry for index %u", index);
+ rc = -ENOKEY;
+ } else if (vcb->vcb_hdr.remaining_vc_count != 0) {
+ /* Retry on insufficient space. */
+ pr_dbf_msg("Couldn't get all requested certificates");
+ rc = -EAGAIN;
+ }
+ break;
+ case DIAG320_RC_CS_NOMATCH:
+ pr_dbf_msg("Certificate Store token mismatch");
+ rc = -EAGAIN;
+ break;
+ default:
+ pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc);
+ rc = -EINVAL;
+ break;
+ }
+
+ return rc;
+}
+
+/*
+ * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call,
+ * extract VCE and create a key from its' certificate.
+ */
+static int create_key_from_sevcb(struct vcssb *vcssb, u16 index,
+ struct key *keyring)
+{
+ struct vcb *vcb;
+ struct vce *vce;
+ int rc;
+
+ rc = -ENOMEM;
+ vcb = vmalloc(get_4k_mult_vcb_size(vcssb));
+ vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr));
+ if (!vcb || !vce)
+ goto out;
+
+ rc = get_sevcb(vcssb, index, vcb);
+ if (rc)
+ goto out;
+
+ extract_vce_from_sevcb(vcb, vce);
+ rc = check_certificate_valid(vce);
+ if (rc)
+ goto out;
+
+ rc = create_key_from_vce(vcssb, vce, keyring);
+ if (rc)
+ goto out;
+
+ pr_dbf_msg("Successfully created key from Certificate Entry %d", index);
+out:
+ vfree(vce);
+ vfree(vcb);
+ return rc;
+}
+
+/*
+ * Request a single-entry VCB for each VCE available for the partition.
+ * Create a key from it and link it to cert_store keyring. If no keys
+ * could be created (i.e. VCEs were invalid) return -ENOKEY.
+ */
+static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring)
+{
+ int rc, index, count, added;
+
+ count = 0;
+ added = 0;
+ /* Certificate Store entries indices start with 1 and have no gaps. */
+ for (index = 1; index < vcssb->total_vc_index_count + 1; index++) {
+ pr_dbf_msg("Creating key from VCE %u", index);
+ rc = create_key_from_sevcb(vcssb, index, keyring);
+ count++;
+
+ if (rc == -EAGAIN)
+ return rc;
+
+ if (rc)
+ pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc);
+ else
+ added++;
+ }
+
+ if (added == 0) {
+ pr_dbf_msg("Processed %d entries. No keys created", count);
+ return -ENOKEY;
+ }
+
+ pr_info("Added %d of %d keys to cert_store keyring", added, count);
+
+ /*
+ * Do not allow to link more keys to certificate store keyring after all
+ * the VCEs were processed.
+ */
+ rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL);
+ if (rc)
+ pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc);
+
+ return 0;
+}
+
+/*
+ * Check which DIAG320 subcodes are installed.
+ * Return -ENOENT if subcodes 1 or 2 are not available.
+ */
+static int query_diag320_subcodes(void)
+{
+ unsigned long ism[ISM_LEN_DWORDS];
+ int rc;
+
+ rc = diag320(0, ism);
+ if (rc != DIAG320_RC_OK) {
+ pr_dbf_msg("DIAG320 subcode query returned %04x", rc);
+ return -ENOENT;
+ }
+
+ debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0");
+ debug_event(cert_store_hexdump, 3, ism, sizeof(ism));
+
+ if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) {
+ pr_dbf_msg("Not all required DIAG320 subcodes are installed");
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+/*
+ * Check if Certificate Store is supported by the firmware and DIAG320 subcodes
+ * 1 and 2 are installed. Create cert_store keyring and link all certificates
+ * available for the current partition to it as "cert_store_key" type
+ * keys. On refresh or error invalidate cert_store keyring and destroy
+ * all keys of "cert_store_key" type.
+ */
+static int fill_cs_keyring(void)
+{
+ struct key *cs_keyring;
+ struct vcssb *vcssb;
+ int rc;
+
+ rc = -ENOMEM;
+ vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL);
+ if (!vcssb)
+ goto cleanup_keys;
+
+ rc = -ENOENT;
+ if (!sclp.has_diag320) {
+ pr_dbf_msg("Certificate Store is not supported");
+ goto cleanup_keys;
+ }
+
+ rc = query_diag320_subcodes();
+ if (rc)
+ goto cleanup_keys;
+
+ rc = get_vcssb(vcssb);
+ if (rc)
+ goto cleanup_keys;
+
+ rc = -ENOMEM;
+ cs_keyring = create_cs_keyring();
+ if (!cs_keyring)
+ goto cleanup_keys;
+
+ rc = add_certificates_to_keyring(vcssb, cs_keyring);
+ if (rc)
+ goto cleanup_cs_keyring;
+
+ goto out;
+
+cleanup_cs_keyring:
+ key_put(cs_keyring);
+cleanup_keys:
+ cleanup_cs_keys();
+out:
+ kfree(vcssb);
+ return rc;
+}
+
+static DEFINE_MUTEX(cs_refresh_lock);
+static int cs_status_val = -1;
+
+static ssize_t cs_status_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ if (cs_status_val == -1)
+ return sysfs_emit(buf, "uninitialized\n");
+ else if (cs_status_val == 0)
+ return sysfs_emit(buf, "ok\n");
+
+ return sysfs_emit(buf, "failed (%d)\n", cs_status_val);
+}
+
+static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status);
+
+static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc, retries;
+
+ pr_dbf_msg("Refresh certificate store information requested");
+ rc = mutex_lock_interruptible(&cs_refresh_lock);
+ if (rc)
+ return rc;
+
+ for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) {
+ /* Request certificates from certificate store. */
+ rc = fill_cs_keyring();
+ if (rc)
+ pr_dbf_msg("Failed to refresh certificate store information (%d)", rc);
+ if (rc != -EAGAIN)
+ break;
+ }
+ cs_status_val = rc;
+ mutex_unlock(&cs_refresh_lock);
+
+ return rc ?: count;
+}
+
+static struct kobj_attribute refresh_attr = __ATTR_WO(refresh);
+
+static const struct attribute *cert_store_attrs[] __initconst = {
+ &cs_status_attr.attr,
+ &refresh_attr.attr,
+ NULL,
+};
+
+static struct kobject *cert_store_kobj;
+
+static int __init cert_store_init(void)
+{
+ int rc = -ENOMEM;
+
+ cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64);
+ if (!cert_store_dbf)
+ goto cleanup_dbf;
+
+ cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128);
+ if (!cert_store_hexdump)
+ goto cleanup_dbf;
+
+ debug_register_view(cert_store_hexdump, &debug_hex_ascii_view);
+ debug_register_view(cert_store_dbf, &debug_sprintf_view);
+
+ /* Create directory /sys/firmware/cert_store. */
+ cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj);
+ if (!cert_store_kobj)
+ goto cleanup_dbf;
+
+ rc = sysfs_create_files(cert_store_kobj, cert_store_attrs);
+ if (rc)
+ goto cleanup_kobj;
+
+ register_key_type(&key_type_cert_store_key);
+
+ return rc;
+
+cleanup_kobj:
+ kobject_put(cert_store_kobj);
+cleanup_dbf:
+ debug_unregister(cert_store_dbf);
+ debug_unregister(cert_store_hexdump);
+
+ return rc;
+}
+device_initcall(cert_store_init);
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
deleted file mode 100644
index d6487bf879e5..000000000000
--- a/arch/s390/kernel/compat_audit.c
+++ /dev/null
@@ -1,44 +0,0 @@
-#undef __s390x__
-#include <asm/unistd.h>
-#include "audit.h"
-
-unsigned s390_dir_class[] = {
-#include <asm-generic/audit_dir_write.h>
-~0U
-};
-
-unsigned s390_chattr_class[] = {
-#include <asm-generic/audit_change_attr.h>
-~0U
-};
-
-unsigned s390_write_class[] = {
-#include <asm-generic/audit_write.h>
-~0U
-};
-
-unsigned s390_read_class[] = {
-#include <asm-generic/audit_read.h>
-~0U
-};
-
-unsigned s390_signal_class[] = {
-#include <asm-generic/audit_signal.h>
-~0U
-};
-
-int s390_classify_syscall(unsigned syscall)
-{
- switch(syscall) {
- case __NR_open:
- return 2;
- case __NR_openat:
- return 3;
- case __NR_socketcall:
- return 4;
- case __NR_execve:
- return 5;
- default:
- return 1;
- }
-}
diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c
deleted file mode 100644
index 765fabdada9f..000000000000
--- a/arch/s390/kernel/compat_exec_domain.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Support for 32-bit Linux for S390 personality.
- *
- * Copyright IBM Corp. 2000
- * Author(s): Gerhard Tonn (ton@de.ibm.com)
- *
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/personality.h>
-#include <linux/sched.h>
-
-static struct exec_domain s390_exec_domain;
-
-static int __init s390_init (void)
-{
- s390_exec_domain.name = "Linux/s390";
- s390_exec_domain.handler = NULL;
- s390_exec_domain.pers_low = PER_LINUX32;
- s390_exec_domain.pers_high = PER_LINUX32;
- s390_exec_domain.signal_map = default_exec_domain.signal_map;
- s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
- register_exec_domain(&s390_exec_domain);
- return 0;
-}
-
-__initcall(s390_init);
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
deleted file mode 100644
index 8b6e4f5288a2..000000000000
--- a/arch/s390/kernel/compat_linux.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * S390 version
- * Copyright IBM Corp. 2000
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Gerhard Tonn (ton@de.ibm.com)
- * Thomas Spatzier (tspat@de.ibm.com)
- *
- * Conversion between 31bit and 64bit native syscalls.
- *
- * Heavily inspired by the 32-bit Sparc compat code which is
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/file.h>
-#include <linux/signal.h>
-#include <linux/resource.h>
-#include <linux/times.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/uio.h>
-#include <linux/quota.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/personality.h>
-#include <linux/stat.h>
-#include <linux/filter.h>
-#include <linux/highmem.h>
-#include <linux/highuid.h>
-#include <linux/mman.h>
-#include <linux/ipv6.h>
-#include <linux/in.h>
-#include <linux/icmpv6.h>
-#include <linux/syscalls.h>
-#include <linux/sysctl.h>
-#include <linux/binfmts.h>
-#include <linux/capability.h>
-#include <linux/compat.h>
-#include <linux/vfs.h>
-#include <linux/ptrace.h>
-#include <linux/fadvise.h>
-#include <linux/ipc.h>
-#include <linux/slab.h>
-
-#include <asm/types.h>
-#include <asm/uaccess.h>
-
-#include <net/scm.h>
-#include <net/sock.h>
-
-#include "compat_linux.h"
-
-u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT |
- PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK |
- PSW32_MASK_PSTATE | PSW32_ASC_HOME;
-
-/* For this source file, we want overflow handling. */
-
-#undef high2lowuid
-#undef high2lowgid
-#undef low2highuid
-#undef low2highgid
-#undef SET_UID16
-#undef SET_GID16
-#undef NEW_TO_OLD_UID
-#undef NEW_TO_OLD_GID
-#undef SET_OLDSTAT_UID
-#undef SET_OLDSTAT_GID
-#undef SET_STAT_UID
-#undef SET_STAT_GID
-
-#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid)
-#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid)
-#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid)
-#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid)
-#define SET_UID16(var, uid) var = high2lowuid(uid)
-#define SET_GID16(var, gid) var = high2lowgid(gid)
-#define NEW_TO_OLD_UID(uid) high2lowuid(uid)
-#define NEW_TO_OLD_GID(gid) high2lowgid(gid)
-#define SET_OLDSTAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
-#define SET_OLDSTAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
-#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
-#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
-
-asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group)
-{
- return sys_chown(filename, low2highuid(user), low2highgid(group));
-}
-
-asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group)
-{
- return sys_lchown(filename, low2highuid(user), low2highgid(group));
-}
-
-asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group)
-{
- return sys_fchown(fd, low2highuid(user), low2highgid(group));
-}
-
-asmlinkage long sys32_setregid16(u16 rgid, u16 egid)
-{
- return sys_setregid(low2highgid(rgid), low2highgid(egid));
-}
-
-asmlinkage long sys32_setgid16(u16 gid)
-{
- return sys_setgid((gid_t)gid);
-}
-
-asmlinkage long sys32_setreuid16(u16 ruid, u16 euid)
-{
- return sys_setreuid(low2highuid(ruid), low2highuid(euid));
-}
-
-asmlinkage long sys32_setuid16(u16 uid)
-{
- return sys_setuid((uid_t)uid);
-}
-
-asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid)
-{
- return sys_setresuid(low2highuid(ruid), low2highuid(euid),
- low2highuid(suid));
-}
-
-asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __user *suidp)
-{
- const struct cred *cred = current_cred();
- int retval;
- u16 ruid, euid, suid;
-
- ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid));
- euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid));
- suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid));
-
- if (!(retval = put_user(ruid, ruidp)) &&
- !(retval = put_user(euid, euidp)))
- retval = put_user(suid, suidp);
-
- return retval;
-}
-
-asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid)
-{
- return sys_setresgid(low2highgid(rgid), low2highgid(egid),
- low2highgid(sgid));
-}
-
-asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __user *sgidp)
-{
- const struct cred *cred = current_cred();
- int retval;
- u16 rgid, egid, sgid;
-
- rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid));
- egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid));
- sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid));
-
- if (!(retval = put_user(rgid, rgidp)) &&
- !(retval = put_user(egid, egidp)))
- retval = put_user(sgid, sgidp);
-
- return retval;
-}
-
-asmlinkage long sys32_setfsuid16(u16 uid)
-{
- return sys_setfsuid((uid_t)uid);
-}
-
-asmlinkage long sys32_setfsgid16(u16 gid)
-{
- return sys_setfsgid((gid_t)gid);
-}
-
-static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
-{
- struct user_namespace *user_ns = current_user_ns();
- int i;
- u16 group;
- kgid_t kgid;
-
- for (i = 0; i < group_info->ngroups; i++) {
- kgid = GROUP_AT(group_info, i);
- group = (u16)from_kgid_munged(user_ns, kgid);
- if (put_user(group, grouplist+i))
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist)
-{
- struct user_namespace *user_ns = current_user_ns();
- int i;
- u16 group;
- kgid_t kgid;
-
- for (i = 0; i < group_info->ngroups; i++) {
- if (get_user(group, grouplist+i))
- return -EFAULT;
-
- kgid = make_kgid(user_ns, (gid_t)group);
- if (!gid_valid(kgid))
- return -EINVAL;
-
- GROUP_AT(group_info, i) = kgid;
- }
-
- return 0;
-}
-
-asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist)
-{
- int i;
-
- if (gidsetsize < 0)
- return -EINVAL;
-
- get_group_info(current->cred->group_info);
- i = current->cred->group_info->ngroups;
- if (gidsetsize) {
- if (i > gidsetsize) {
- i = -EINVAL;
- goto out;
- }
- if (groups16_to_user(grouplist, current->cred->group_info)) {
- i = -EFAULT;
- goto out;
- }
- }
-out:
- put_group_info(current->cred->group_info);
- return i;
-}
-
-asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist)
-{
- struct group_info *group_info;
- int retval;
-
- if (!capable(CAP_SETGID))
- return -EPERM;
- if ((unsigned)gidsetsize > NGROUPS_MAX)
- return -EINVAL;
-
- group_info = groups_alloc(gidsetsize);
- if (!group_info)
- return -ENOMEM;
- retval = groups16_from_user(group_info, grouplist);
- if (retval) {
- put_group_info(group_info);
- return retval;
- }
-
- retval = set_current_groups(group_info);
- put_group_info(group_info);
-
- return retval;
-}
-
-asmlinkage long sys32_getuid16(void)
-{
- return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
-}
-
-asmlinkage long sys32_geteuid16(void)
-{
- return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
-}
-
-asmlinkage long sys32_getgid16(void)
-{
- return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
-}
-
-asmlinkage long sys32_getegid16(void)
-{
- return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
-}
-
-#ifdef CONFIG_SYSVIPC
-COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
- unsigned long, third, compat_uptr_t, ptr)
-{
- if (call >> 16) /* hack for backward compatibility */
- return -EINVAL;
- return compat_sys_ipc(call, first, second, third, ptr, third);
-}
-#endif
-
-asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low)
-{
- if ((int)high < 0)
- return -EINVAL;
- else
- return sys_truncate(path, (high << 32) | low);
-}
-
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low)
-{
- if ((int)high < 0)
- return -EINVAL;
- else
- return sys_ftruncate(fd, (high << 32) | low);
-}
-
-asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
- size_t count, u32 poshi, u32 poslo)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
- return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
- size_t count, u32 poshi, u32 poslo)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
- return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count)
-{
- return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count);
-}
-
-struct stat64_emu31 {
- unsigned long long st_dev;
- unsigned int __pad1;
-#define STAT64_HAS_BROKEN_ST_INO 1
- u32 __st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- u32 st_uid;
- u32 st_gid;
- unsigned long long st_rdev;
- unsigned int __pad3;
- long st_size;
- u32 st_blksize;
- unsigned char __pad4[4];
- u32 __pad5; /* future possible st_blocks high bits */
- u32 st_blocks; /* Number 512-byte blocks allocated. */
- u32 st_atime;
- u32 __pad6;
- u32 st_mtime;
- u32 __pad7;
- u32 st_ctime;
- u32 __pad8; /* will be high 32 bits of ctime someday */
- unsigned long st_ino;
-};
-
-static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
-{
- struct stat64_emu31 tmp;
-
- memset(&tmp, 0, sizeof(tmp));
-
- tmp.st_dev = huge_encode_dev(stat->dev);
- tmp.st_ino = stat->ino;
- tmp.__st_ino = (u32)stat->ino;
- tmp.st_mode = stat->mode;
- tmp.st_nlink = (unsigned int)stat->nlink;
- tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
- tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
- tmp.st_rdev = huge_encode_dev(stat->rdev);
- tmp.st_size = stat->size;
- tmp.st_blksize = (u32)stat->blksize;
- tmp.st_blocks = (u32)stat->blocks;
- tmp.st_atime = (u32)stat->atime.tv_sec;
- tmp.st_mtime = (u32)stat->mtime.tv_sec;
- tmp.st_ctime = (u32)stat->ctime.tv_sec;
-
- return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
-}
-
-asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf)
-{
- struct kstat stat;
- int ret = vfs_stat(filename, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf)
-{
- struct kstat stat;
- int ret = vfs_lstat(filename, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf)
-{
- struct kstat stat;
- int ret = vfs_fstat(fd, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-asmlinkage long sys32_fstatat64(unsigned int dfd, const char __user *filename,
- struct stat64_emu31 __user* statbuf, int flag)
-{
- struct kstat stat;
- int error;
-
- error = vfs_fstatat(dfd, filename, &stat, flag);
- if (error)
- return error;
- return cp_stat64(statbuf, &stat);
-}
-
-/*
- * Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct_emu31 {
- compat_ulong_t addr;
- compat_ulong_t len;
- compat_ulong_t prot;
- compat_ulong_t flags;
- compat_ulong_t fd;
- compat_ulong_t offset;
-};
-
-asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg)
-{
- struct mmap_arg_struct_emu31 a;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
- if (a.offset & ~PAGE_MASK)
- return -EINVAL;
- return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
- a.offset >> PAGE_SHIFT);
-}
-
-asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg)
-{
- struct mmap_arg_struct_emu31 a;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
- return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-}
-
-asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
-
- return sys_read(fd, buf, count);
-}
-
-asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t count)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
-
- return sys_write(fd, buf, count);
-}
-
-/*
- * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
- * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
- * because the 31 bit values differ from the 64 bit values.
- */
-
-asmlinkage long
-sys32_fadvise64(int fd, loff_t offset, size_t len, int advise)
-{
- if (advise == 4)
- advise = POSIX_FADV_DONTNEED;
- else if (advise == 5)
- advise = POSIX_FADV_NOREUSE;
- return sys_fadvise64(fd, offset, len, advise);
-}
-
-struct fadvise64_64_args {
- int fd;
- long long offset;
- long long len;
- int advice;
-};
-
-asmlinkage long
-sys32_fadvise64_64(struct fadvise64_64_args __user *args)
-{
- struct fadvise64_64_args a;
-
- if ( copy_from_user(&a, args, sizeof(a)) )
- return -EFAULT;
- if (a.advice == 4)
- a.advice = POSIX_FADV_DONTNEED;
- else if (a.advice == 5)
- a.advice = POSIX_FADV_NOREUSE;
- return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
-}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
deleted file mode 100644
index 976518c0592a..000000000000
--- a/arch/s390/kernel/compat_linux.h
+++ /dev/null
@@ -1,120 +0,0 @@
-#ifndef _ASM_S390X_S390_H
-#define _ASM_S390X_S390_H
-
-#include <linux/compat.h>
-#include <linux/socket.h>
-#include <linux/syscalls.h>
-
-/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
-/* to a 64 bit pointer */
-#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
-#define AA(__x) \
- ((unsigned long)(__x))
-
-/* Now 32bit compatibility types */
-struct ipc_kludge_32 {
- __u32 msgp; /* pointer */
- __s32 msgtyp;
-};
-
-/* asm/sigcontext.h */
-typedef union
-{
- __u64 d;
- __u32 f;
-} freg_t32;
-
-typedef struct
-{
- unsigned int fpc;
- freg_t32 fprs[__NUM_FPRS];
-} _s390_fp_regs32;
-
-typedef struct
-{
- __u32 mask;
- __u32 addr;
-} _psw_t32 __attribute__ ((aligned(8)));
-
-typedef struct
-{
- _psw_t32 psw;
- __u32 gprs[__NUM_GPRS];
- __u32 acrs[__NUM_ACRS];
-} _s390_regs_common32;
-
-typedef struct
-{
- _s390_regs_common32 regs;
- _s390_fp_regs32 fpregs;
-} _sigregs32;
-
-#define _SIGCONTEXT_NSIG32 64
-#define _SIGCONTEXT_NSIG_BPW32 32
-#define __SIGNAL_FRAMESIZE32 96
-#define _SIGMASK_COPY_SIZE32 (sizeof(u32)*2)
-
-struct sigcontext32
-{
- __u32 oldmask[_COMPAT_NSIG_WORDS];
- __u32 sregs; /* pointer */
-};
-
-/* asm/signal.h */
-
-/* asm/ucontext.h */
-struct ucontext32 {
- __u32 uc_flags;
- __u32 uc_link; /* pointer */
- compat_stack_t uc_stack;
- _sigregs32 uc_mcontext;
- compat_sigset_t uc_sigmask; /* mask last for extensibility */
-};
-
-struct stat64_emu31;
-struct mmap_arg_struct_emu31;
-struct fadvise64_64_args;
-
-long sys32_chown16(const char __user * filename, u16 user, u16 group);
-long sys32_lchown16(const char __user * filename, u16 user, u16 group);
-long sys32_fchown16(unsigned int fd, u16 user, u16 group);
-long sys32_setregid16(u16 rgid, u16 egid);
-long sys32_setgid16(u16 gid);
-long sys32_setreuid16(u16 ruid, u16 euid);
-long sys32_setuid16(u16 uid);
-long sys32_setresuid16(u16 ruid, u16 euid, u16 suid);
-long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
-long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid);
-long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
-long sys32_setfsuid16(u16 uid);
-long sys32_setfsgid16(u16 gid);
-long sys32_getgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_setgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_getuid16(void);
-long sys32_geteuid16(void);
-long sys32_getgid16(void);
-long sys32_getegid16(void);
-long sys32_truncate64(const char __user * path, unsigned long high,
- unsigned long low);
-long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low);
-long sys32_init_module(void __user *umod, unsigned long len,
- const char __user *uargs);
-long sys32_delete_module(const char __user *name_user, unsigned int flags);
-long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
- u32 poshi, u32 poslo);
-long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
- size_t count, u32 poshi, u32 poslo);
-compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count);
-long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf);
-long sys32_lstat64(const char __user * filename,
- struct stat64_emu31 __user * statbuf);
-long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf);
-long sys32_fstatat64(unsigned int dfd, const char __user *filename,
- struct stat64_emu31 __user* statbuf, int flag);
-unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_read(unsigned int fd, char __user * buf, size_t count);
-long sys32_write(unsigned int fd, const char __user * buf, size_t count);
-long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
-long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
-#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
deleted file mode 100644
index 12b823833510..000000000000
--- a/arch/s390/kernel/compat_ptrace.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef _PTRACE32_H
-#define _PTRACE32_H
-
-#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */
-#include "compat_linux.h" /* needed for psw_compat_t */
-
-struct compat_per_struct_kernel {
- __u32 cr9; /* PER control bits */
- __u32 cr10; /* PER starting address */
- __u32 cr11; /* PER ending address */
- __u32 bits; /* Obsolete software bits */
- __u32 starting_addr; /* User specified start address */
- __u32 ending_addr; /* User specified end address */
- __u16 perc_atmid; /* PER trap ATMID */
- __u32 address; /* PER trap instruction address */
- __u8 access_id; /* PER trap access identification */
-};
-
-struct compat_user_regs_struct
-{
- psw_compat_t psw;
- u32 gprs[NUM_GPRS];
- u32 acrs[NUM_ACRS];
- u32 orig_gpr2;
- /* nb: there's a 4-byte hole here */
- s390_fp_regs fp_regs;
- /*
- * These per registers are in here so that gdb can modify them
- * itself as there is no "official" ptrace interface for hardware
- * watchpoints. This is the way intel does it.
- */
- struct compat_per_struct_kernel per_info;
- u32 ieee_instruction_pointer; /* obsolete, always 0 */
-};
-
-struct compat_user {
- /* We start with the registers, to mimic the way that "memory"
- is returned from the ptrace(3,...) function. */
- struct compat_user_regs_struct regs;
- /* The rest of this junk is to help gdb figure out what goes where */
- u32 u_tsize; /* Text segment size (pages). */
- u32 u_dsize; /* Data segment size (pages). */
- u32 u_ssize; /* Stack segment size (pages). */
- u32 start_code; /* Starting virtual address of text. */
- u32 start_stack; /* Starting virtual address of stack area.
- This is actually the bottom of the stack,
- the top of the stack is always found in the
- esp register. */
- s32 signal; /* Signal that caused the core dump. */
- u32 u_ar0; /* Used by gdb to help find the values for */
- /* the registers. */
- u32 magic; /* To uniquely identify a core file */
- char u_comm[32]; /* User command that was responsible */
-};
-
-typedef struct
-{
- __u32 len;
- __u32 kernel_addr;
- __u32 process_addr;
-} compat_ptrace_area;
-
-#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
deleted file mode 100644
index c439ac9ced09..000000000000
--- a/arch/s390/kernel/compat_signal.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright IBM Corp. 2000, 2006
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- * Gerhard Tonn (ton@de.ibm.com)
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- */
-
-#include <linux/compat.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/tty.h>
-#include <linux/personality.h>
-#include <linux/binfmts.h>
-#include <asm/ucontext.h>
-#include <asm/uaccess.h>
-#include <asm/lowcore.h>
-#include <asm/switch_to.h>
-#include "compat_linux.h"
-#include "compat_ptrace.h"
-#include "entry.h"
-
-typedef struct
-{
- __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
- struct sigcontext32 sc;
- _sigregs32 sregs;
- int signo;
- __u32 gprs_high[NUM_GPRS];
- __u8 retcode[S390_SYSCALL_SIZE];
-} sigframe32;
-
-typedef struct
-{
- __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
- __u8 retcode[S390_SYSCALL_SIZE];
- compat_siginfo_t info;
- struct ucontext32 uc;
- __u32 gprs_high[NUM_GPRS];
-} rt_sigframe32;
-
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
-{
- int err;
-
- /* If you change siginfo_t structure, please be sure
- this code is fixed accordingly.
- It should never copy any pad contained in the structure
- to avoid security leaks, but must copy the generic
- 3 ints plus the relevant union member.
- This routine must convert siginfo from 64bit to 32bit as well
- at the same time. */
- err = __put_user(from->si_signo, &to->si_signo);
- err |= __put_user(from->si_errno, &to->si_errno);
- err |= __put_user((short)from->si_code, &to->si_code);
- if (from->si_code < 0)
- err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
- else {
- switch (from->si_code >> 16) {
- case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
- case __SI_MESGQ >> 16:
- err |= __put_user(from->si_int, &to->si_int);
- /* fallthrough */
- case __SI_KILL >> 16:
- err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- break;
- case __SI_CHLD >> 16:
- err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(from->si_utime, &to->si_utime);
- err |= __put_user(from->si_stime, &to->si_stime);
- err |= __put_user(from->si_status, &to->si_status);
- break;
- case __SI_FAULT >> 16:
- err |= __put_user((unsigned long) from->si_addr,
- &to->si_addr);
- break;
- case __SI_POLL >> 16:
- err |= __put_user(from->si_band, &to->si_band);
- err |= __put_user(from->si_fd, &to->si_fd);
- break;
- case __SI_TIMER >> 16:
- err |= __put_user(from->si_tid, &to->si_tid);
- err |= __put_user(from->si_overrun, &to->si_overrun);
- err |= __put_user(from->si_int, &to->si_int);
- break;
- default:
- break;
- }
- }
- return err;
-}
-
-int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
-{
- int err;
- u32 tmp;
-
- err = __get_user(to->si_signo, &from->si_signo);
- err |= __get_user(to->si_errno, &from->si_errno);
- err |= __get_user(to->si_code, &from->si_code);
-
- if (to->si_code < 0)
- err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
- else {
- switch (to->si_code >> 16) {
- case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
- case __SI_MESGQ >> 16:
- err |= __get_user(to->si_int, &from->si_int);
- /* fallthrough */
- case __SI_KILL >> 16:
- err |= __get_user(to->si_pid, &from->si_pid);
- err |= __get_user(to->si_uid, &from->si_uid);
- break;
- case __SI_CHLD >> 16:
- err |= __get_user(to->si_pid, &from->si_pid);
- err |= __get_user(to->si_uid, &from->si_uid);
- err |= __get_user(to->si_utime, &from->si_utime);
- err |= __get_user(to->si_stime, &from->si_stime);
- err |= __get_user(to->si_status, &from->si_status);
- break;
- case __SI_FAULT >> 16:
- err |= __get_user(tmp, &from->si_addr);
- to->si_addr = (void __force __user *)
- (u64) (tmp & PSW32_ADDR_INSN);
- break;
- case __SI_POLL >> 16:
- err |= __get_user(to->si_band, &from->si_band);
- err |= __get_user(to->si_fd, &from->si_fd);
- break;
- case __SI_TIMER >> 16:
- err |= __get_user(to->si_tid, &from->si_tid);
- err |= __get_user(to->si_overrun, &from->si_overrun);
- err |= __get_user(to->si_int, &from->si_int);
- break;
- default:
- break;
- }
- }
- return err;
-}
-
-static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
-{
- _s390_regs_common32 regs32;
- int err, i;
-
- regs32.psw.mask = psw32_user_bits |
- ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER);
- regs32.psw.addr = (__u32) regs->psw.addr |
- (__u32)(regs->psw.mask & PSW_MASK_BA);
- for (i = 0; i < NUM_GPRS; i++)
- regs32.gprs[i] = (__u32) regs->gprs[i];
- save_access_regs(current->thread.acrs);
- memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs));
- err = __copy_to_user(&sregs->regs, &regs32, sizeof(regs32));
- if (err)
- return err;
- save_fp_regs(&current->thread.fp_regs);
- /* s390_fp_regs and _s390_fp_regs32 are the same ! */
- return __copy_to_user(&sregs->fpregs, &current->thread.fp_regs,
- sizeof(_s390_fp_regs32));
-}
-
-static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
-{
- _s390_regs_common32 regs32;
- int err, i;
-
- /* Alwys make any pending restarted system call return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
- err = __copy_from_user(&regs32, &sregs->regs, sizeof(regs32));
- if (err)
- return err;
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
- (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 |
- (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE);
- /* Check for invalid user address space control. */
- if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
- regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
- (regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
- for (i = 0; i < NUM_GPRS; i++)
- regs->gprs[i] = (__u64) regs32.gprs[i];
- memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs));
- restore_access_regs(current->thread.acrs);
-
- err = __copy_from_user(&current->thread.fp_regs, &sregs->fpregs,
- sizeof(_s390_fp_regs32));
- current->thread.fp_regs.fpc &= FPC_VALID_MASK;
- if (err)
- return err;
-
- restore_fp_regs(&current->thread.fp_regs);
- clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */
- return 0;
-}
-
-static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
-{
- __u32 gprs_high[NUM_GPRS];
- int i;
-
- for (i = 0; i < NUM_GPRS; i++)
- gprs_high[i] = regs->gprs[i] >> 32;
-
- return __copy_to_user(uregs, &gprs_high, sizeof(gprs_high));
-}
-
-static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
-{
- __u32 gprs_high[NUM_GPRS];
- int err, i;
-
- err = __copy_from_user(&gprs_high, uregs, sizeof(gprs_high));
- if (err)
- return err;
- for (i = 0; i < NUM_GPRS; i++)
- *(__u32 *)&regs->gprs[i] = gprs_high[i];
- return 0;
-}
-
-asmlinkage long sys32_sigreturn(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
- sigset_t set;
-
- if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
- goto badframe;
- set_current_blocked(&set);
- if (restore_sigregs32(regs, &frame->sregs))
- goto badframe;
- if (restore_sigregs_gprs_high(regs, frame->gprs_high))
- goto badframe;
- return regs->gprs[2];
-badframe:
- force_sig(SIGSEGV, current);
- return 0;
-}
-
-asmlinkage long sys32_rt_sigreturn(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
- sigset_t set;
-
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
- goto badframe;
- set_current_blocked(&set);
- if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
- goto badframe;
- if (restore_sigregs_gprs_high(regs, frame->gprs_high))
- goto badframe;
- if (compat_restore_altstack(&frame->uc.uc_stack))
- goto badframe;
- return regs->gprs[2];
-badframe:
- force_sig(SIGSEGV, current);
- return 0;
-}
-
-/*
- * Set up a signal frame.
- */
-
-
-/*
- * Determine which stack to use..
- */
-static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
-{
- unsigned long sp;
-
- /* Default to using normal stack */
- sp = (unsigned long) A(regs->gprs[15]);
-
- /* Overflow on alternate signal stack gives SIGSEGV. */
- if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
- return (void __user *) -1UL;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (! sas_ss_flags(sp))
- sp = current->sas_ss_sp + current->sas_ss_size;
- }
-
- return (void __user *)((sp - frame_size) & -8ul);
-}
-
-static inline int map_signal(int sig)
-{
- if (current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32)
- return current_thread_info()->exec_domain->signal_invmap[sig];
- else
- return sig;
-}
-
-static int setup_frame32(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs * regs)
-{
- sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32));
-
- if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
- if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32))
- goto give_sigsegv;
-
- if (save_sigregs32(regs, &frame->sregs))
- goto give_sigsegv;
- if (save_sigregs_gprs_high(regs, frame->gprs_high))
- goto give_sigsegv;
- if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs))
- goto give_sigsegv;
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
- } else {
- regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
- (u16 __force __user *)(frame->retcode)))
- goto give_sigsegv;
- }
-
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
- goto give_sigsegv;
-
- /* Set up registers for signal handler */
- regs->gprs[15] = (__force __u64) frame;
- /* Force 31 bit amode and default user address space control. */
- regs->psw.mask = PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
- (regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__force __u64) ka->sa.sa_handler;
-
- regs->gprs[2] = map_signal(sig);
- regs->gprs[3] = (__force __u64) &frame->sc;
-
- /* We forgot to include these in the sigcontext.
- To avoid breaking binary compatibility, they are passed as args. */
- if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
- sig == SIGTRAP || sig == SIGFPE) {
- /* set extra registers only for synchronous signals */
- regs->gprs[4] = regs->int_code & 127;
- regs->gprs[5] = regs->int_parm_long;
- regs->gprs[6] = task_thread_info(current)->last_break;
- }
-
- /* Place signal number on stack to allow backtrace from handler. */
- if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
- goto give_sigsegv;
- return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
-}
-
-static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
-{
- int err = 0;
- rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32));
-
- if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
- if (copy_siginfo_to_user32(&frame->info, info))
- goto give_sigsegv;
-
- /* Create the ucontext. */
- err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
- err |= save_sigregs32(regs, &frame->uc.uc_mcontext);
- err |= save_sigregs_gprs_high(regs, frame->gprs_high);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto give_sigsegv;
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
- } else {
- regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE;
- err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
- (u16 __force __user *)(frame->retcode));
- }
-
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
- goto give_sigsegv;
-
- /* Set up registers for signal handler */
- regs->gprs[15] = (__force __u64) frame;
- /* Force 31 bit amode and default user address space control. */
- regs->psw.mask = PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
- (regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64) ka->sa.sa_handler;
-
- regs->gprs[2] = map_signal(sig);
- regs->gprs[3] = (__force __u64) &frame->info;
- regs->gprs[4] = (__force __u64) &frame->uc;
- regs->gprs[5] = task_thread_info(current)->last_break;
- return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
-}
-
-/*
- * OK, we're invoking a handler
- */
-
-void handle_signal32(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
-{
- int ret;
-
- /* Set up the stack frame */
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame32(sig, ka, info, oldset, regs);
- else
- ret = setup_frame32(sig, ka, oldset, regs);
- if (ret)
- return;
- signal_delivered(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLE_STEP));
-}
-
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
deleted file mode 100644
index 9cb1b975b353..000000000000
--- a/arch/s390/kernel/compat_wrapper.S
+++ /dev/null
@@ -1,1414 +0,0 @@
-/*
-* wrapper for 31 bit compatible system calls.
-*
-* Copyright IBM Corp. 2000, 2006
-* Author(s): Gerhard Tonn (ton@de.ibm.com),
-* Thomas Spatzier (tspat@de.ibm.com)
-*/
-
-#include <linux/linkage.h>
-
-ENTRY(sys32_exit_wrapper)
- lgfr %r2,%r2 # int
- jg sys_exit # branch to sys_exit
-
-ENTRY(sys32_read_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys32_read # branch to sys_read
-
-ENTRY(sys32_write_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- jg sys32_write # branch to system call
-
-ENTRY(sys32_close_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_close # branch to system call
-
-ENTRY(sys32_creat_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_creat # branch to system call
-
-ENTRY(sys32_link_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_link # branch to system call
-
-ENTRY(sys32_unlink_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_unlink # branch to system call
-
-ENTRY(sys32_chdir_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_chdir # branch to system call
-
-ENTRY(sys32_time_wrapper)
- llgtr %r2,%r2 # int *
- jg compat_sys_time # branch to system call
-
-ENTRY(sys32_mknod_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # dev
- jg sys_mknod # branch to system call
-
-ENTRY(sys32_chmod_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # mode_t
- jg sys_chmod # branch to system call
-
-ENTRY(sys32_lchown16_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_uid_emu31_t
- jg sys32_lchown16 # branch to system call
-
-#sys32_getpid_wrapper # void
-
-ENTRY(sys32_mount_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # char *
- llgfr %r5,%r5 # unsigned long
- llgtr %r6,%r6 # void *
- jg compat_sys_mount # branch to system call
-
-ENTRY(sys32_oldumount_wrapper)
- llgtr %r2,%r2 # char *
- jg sys_oldumount # branch to system call
-
-ENTRY(sys32_setuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- jg sys32_setuid16 # branch to system call
-
-#sys32_getuid16_wrapper # void
-
-ENTRY(sys32_ptrace_wrapper)
- lgfr %r2,%r2 # long
- lgfr %r3,%r3 # long
- llgtr %r4,%r4 # long
- llgfr %r5,%r5 # long
- jg compat_sys_ptrace # branch to system call
-
-ENTRY(sys32_alarm_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_alarm # branch to system call
-
-ENTRY(compat_sys_utime_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_utimbuf *
- jg compat_sys_utime # branch to system call
-
-ENTRY(sys32_access_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_access # branch to system call
-
-ENTRY(sys32_nice_wrapper)
- lgfr %r2,%r2 # int
- jg sys_nice # branch to system call
-
-#sys32_sync_wrapper # void
-
-ENTRY(sys32_kill_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_kill # branch to system call
-
-ENTRY(sys32_rename_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_rename # branch to system call
-
-ENTRY(sys32_mkdir_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_mkdir # branch to system call
-
-ENTRY(sys32_rmdir_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_rmdir # branch to system call
-
-ENTRY(sys32_dup_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_dup # branch to system call
-
-ENTRY(sys32_pipe_wrapper)
- llgtr %r2,%r2 # u32 *
- jg sys_pipe # branch to system call
-
-ENTRY(compat_sys_times_wrapper)
- llgtr %r2,%r2 # struct compat_tms *
- jg compat_sys_times # branch to system call
-
-ENTRY(sys32_brk_wrapper)
- llgtr %r2,%r2 # unsigned long
- jg sys_brk # branch to system call
-
-ENTRY(sys32_setgid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- jg sys32_setgid16 # branch to system call
-
-#sys32_getgid16_wrapper # void
-
-ENTRY(sys32_signal_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __sighandler_t
- jg sys_signal
-
-#sys32_geteuid16_wrapper # void
-
-#sys32_getegid16_wrapper # void
-
-ENTRY(sys32_acct_wrapper)
- llgtr %r2,%r2 # char *
- jg sys_acct # branch to system call
-
-ENTRY(sys32_umount_wrapper)
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_umount # branch to system call
-
-ENTRY(compat_sys_ioctl_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_ioctl # branch to system call
-
-ENTRY(compat_sys_fcntl_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_fcntl # branch to system call
-
-ENTRY(sys32_setpgid_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # pid_t
- jg sys_setpgid # branch to system call
-
-ENTRY(sys32_umask_wrapper)
- lgfr %r2,%r2 # int
- jg sys_umask # branch to system call
-
-ENTRY(sys32_chroot_wrapper)
- llgtr %r2,%r2 # char *
- jg sys_chroot # branch to system call
-
-ENTRY(sys32_ustat_wrapper)
- llgfr %r2,%r2 # dev_t
- llgtr %r3,%r3 # struct ustat *
- jg compat_sys_ustat
-
-ENTRY(sys32_dup2_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_dup2 # branch to system call
-
-#sys32_getppid_wrapper # void
-
-#sys32_getpgrp_wrapper # void
-
-#sys32_setsid_wrapper # void
-
-ENTRY(sys32_setreuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- jg sys32_setreuid16 # branch to system call
-
-ENTRY(sys32_setregid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- llgfr %r3,%r3 # __kernel_old_gid_emu31_t
- jg sys32_setregid16 # branch to system call
-
-ENTRY(sys_sigsuspend_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # old_sigset_t
- jg sys_sigsuspend
-
-ENTRY(compat_sys_sigpending_wrapper)
- llgtr %r2,%r2 # compat_old_sigset_t *
- jg compat_sys_sigpending # branch to system call
-
-ENTRY(sys32_sethostname_wrapper)
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_sethostname # branch to system call
-
-ENTRY(compat_sys_setrlimit_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_setrlimit # branch to system call
-
-ENTRY(compat_sys_old_getrlimit_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_old_getrlimit # branch to system call
-
-ENTRY(compat_sys_getrlimit_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_getrlimit # branch to system call
-
-ENTRY(sys32_mmap2_wrapper)
- llgtr %r2,%r2 # struct mmap_arg_struct_emu31 *
- jg sys32_mmap2 # branch to system call
-
-ENTRY(compat_sys_gettimeofday_wrapper)
- llgtr %r2,%r2 # struct timeval_emu31 *
- llgtr %r3,%r3 # struct timezone *
- jg compat_sys_gettimeofday # branch to system call
-
-ENTRY(compat_sys_settimeofday_wrapper)
- llgtr %r2,%r2 # struct timeval_emu31 *
- llgtr %r3,%r3 # struct timezone *
- jg compat_sys_settimeofday # branch to system call
-
-ENTRY(sys32_getgroups16_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- jg sys32_getgroups16 # branch to system call
-
-ENTRY(sys32_setgroups16_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- jg sys32_setgroups16 # branch to system call
-
-ENTRY(sys32_symlink_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_symlink # branch to system call
-
-ENTRY(sys32_readlink_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # char *
- lgfr %r4,%r4 # int
- jg sys_readlink # branch to system call
-
-ENTRY(sys32_uselib_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_uselib # branch to system call
-
-ENTRY(sys32_swapon_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_swapon # branch to system call
-
-ENTRY(sys32_reboot_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # unsigned int
- llgtr %r5,%r5 # void *
- jg sys_reboot # branch to system call
-
-ENTRY(old32_readdir_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_old_readdir # branch to system call
-
-ENTRY(old32_mmap_wrapper)
- llgtr %r2,%r2 # struct mmap_arg_struct_emu31 *
- jg old32_mmap # branch to system call
-
-ENTRY(sys32_munmap_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_munmap # branch to system call
-
-ENTRY(sys32_fchmod_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # mode_t
- jg sys_fchmod # branch to system call
-
-ENTRY(sys32_fchown16_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # compat_uid_t
- llgfr %r4,%r4 # compat_uid_t
- jg sys32_fchown16 # branch to system call
-
-ENTRY(sys32_getpriority_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_getpriority # branch to system call
-
-ENTRY(sys32_setpriority_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg sys_setpriority # branch to system call
-
-ENTRY(compat_sys_statfs_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_statfs *
- jg compat_sys_statfs # branch to system call
-
-ENTRY(compat_sys_fstatfs_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct compat_statfs *
- jg compat_sys_fstatfs # branch to system call
-
-ENTRY(compat_sys_socketcall_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # u32 *
- jg compat_sys_socketcall # branch to system call
-
-ENTRY(sys32_syslog_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- lgfr %r4,%r4 # int
- jg sys_syslog # branch to system call
-
-ENTRY(compat_sys_newstat_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newstat # branch to system call
-
-ENTRY(compat_sys_newlstat_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newlstat # branch to system call
-
-ENTRY(compat_sys_newfstat_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newfstat # branch to system call
-
-#sys32_vhangup_wrapper # void
-
-ENTRY(sys32_swapoff_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_swapoff # branch to system call
-
-ENTRY(compat_sys_sysinfo_wrapper)
- llgtr %r2,%r2 # struct sysinfo_emu31 *
- jg compat_sys_sysinfo # branch to system call
-
-ENTRY(sys32_fsync_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_fsync # branch to system call
-
-#sys32_sigreturn_wrapper # done in sigreturn_glue
-
-#sys32_clone_wrapper # done in clone_glue
-
-ENTRY(sys32_setdomainname_wrapper)
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_setdomainname # branch to system call
-
-ENTRY(sys32_newuname_wrapper)
- llgtr %r2,%r2 # struct new_utsname *
- jg sys_newuname # branch to system call
-
-ENTRY(compat_sys_adjtimex_wrapper)
- llgtr %r2,%r2 # struct compat_timex *
- jg compat_sys_adjtimex # branch to system call
-
-ENTRY(sys32_mprotect_wrapper)
- llgtr %r2,%r2 # unsigned long (actually pointer
- llgfr %r3,%r3 # size_t
- llgfr %r4,%r4 # unsigned long
- jg sys_mprotect # branch to system call
-
-ENTRY(sys_init_module_wrapper)
- llgtr %r2,%r2 # void *
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # char *
- jg sys_init_module # branch to system call
-
-ENTRY(sys_delete_module_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # unsigned int
- jg sys_delete_module # branch to system call
-
-ENTRY(sys32_quotactl_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # qid_t
- llgtr %r5,%r5 # caddr_t
- jg sys_quotactl # branch to system call
-
-ENTRY(sys32_getpgid_wrapper)
- lgfr %r2,%r2 # pid_t
- jg sys_getpgid # branch to system call
-
-ENTRY(sys32_fchdir_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_fchdir # branch to system call
-
-ENTRY(sys32_bdflush_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # long
- jg sys_bdflush # branch to system call
-
-ENTRY(sys32_sysfs_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys_sysfs # branch to system call
-
-ENTRY(sys32_personality_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_s390_personality # branch to system call
-
-ENTRY(sys32_setfsuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- jg sys32_setfsuid16 # branch to system call
-
-ENTRY(sys32_setfsgid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- jg sys32_setfsgid16 # branch to system call
-
-ENTRY(sys32_llseek_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgtr %r5,%r5 # loff_t *
- llgfr %r6,%r6 # unsigned int
- jg sys_llseek # branch to system call
-
-ENTRY(sys32_getdents_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_getdents # branch to system call
-
-ENTRY(compat_sys_select_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_fd_set *
- llgtr %r4,%r4 # compat_fd_set *
- llgtr %r5,%r5 # compat_fd_set *
- llgtr %r6,%r6 # struct compat_timeval *
- jg compat_sys_select # branch to system call
-
-ENTRY(sys32_flock_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_flock # branch to system call
-
-ENTRY(sys32_msync_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- lgfr %r4,%r4 # int
- jg sys_msync # branch to system call
-
-ENTRY(compat_sys_readv_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct compat_iovec *
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_readv # branch to system call
-
-ENTRY(compat_sys_writev_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct compat_iovec *
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_writev # branch to system call
-
-ENTRY(sys32_getsid_wrapper)
- lgfr %r2,%r2 # pid_t
- jg sys_getsid # branch to system call
-
-ENTRY(sys32_fdatasync_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_fdatasync # branch to system call
-
-ENTRY(sys32_mlock_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_mlock # branch to system call
-
-ENTRY(sys32_munlock_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_munlock # branch to system call
-
-ENTRY(sys32_mlockall_wrapper)
- lgfr %r2,%r2 # int
- jg sys_mlockall # branch to system call
-
-#sys32_munlockall_wrapper # void
-
-ENTRY(sys32_sched_setparam_wrapper)
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct sched_param *
- jg sys_sched_setparam # branch to system call
-
-ENTRY(sys32_sched_getparam_wrapper)
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct sched_param *
- jg sys_sched_getparam # branch to system call
-
-ENTRY(sys32_sched_setscheduler_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct sched_param *
- jg sys_sched_setscheduler # branch to system call
-
-ENTRY(sys32_sched_getscheduler_wrapper)
- lgfr %r2,%r2 # pid_t
- jg sys_sched_getscheduler # branch to system call
-
-#sys32_sched_yield_wrapper # void
-
-ENTRY(sys32_sched_get_priority_max_wrapper)
- lgfr %r2,%r2 # int
- jg sys_sched_get_priority_max # branch to system call
-
-ENTRY(sys32_sched_get_priority_min_wrapper)
- lgfr %r2,%r2 # int
- jg sys_sched_get_priority_min # branch to system call
-
-ENTRY(compat_sys_nanosleep_wrapper)
- llgtr %r2,%r2 # struct compat_timespec *
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_nanosleep # branch to system call
-
-ENTRY(sys32_mremap_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_mremap # branch to system call
-
-ENTRY(sys32_setresuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_uid_emu31_t
- jg sys32_setresuid16 # branch to system call
-
-ENTRY(sys32_getresuid16_wrapper)
- llgtr %r2,%r2 # __kernel_old_uid_emu31_t *
- llgtr %r3,%r3 # __kernel_old_uid_emu31_t *
- llgtr %r4,%r4 # __kernel_old_uid_emu31_t *
- jg sys32_getresuid16 # branch to system call
-
-ENTRY(sys32_poll_wrapper)
- llgtr %r2,%r2 # struct pollfd *
- llgfr %r3,%r3 # unsigned int
- lgfr %r4,%r4 # int
- jg sys_poll # branch to system call
-
-ENTRY(sys32_setresgid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- llgfr %r3,%r3 # __kernel_old_gid_emu31_t
- llgfr %r4,%r4 # __kernel_old_gid_emu31_t
- jg sys32_setresgid16 # branch to system call
-
-ENTRY(sys32_getresgid16_wrapper)
- llgtr %r2,%r2 # __kernel_old_gid_emu31_t *
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- llgtr %r4,%r4 # __kernel_old_gid_emu31_t *
- jg sys32_getresgid16 # branch to system call
-
-ENTRY(sys32_prctl_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_prctl # branch to system call
-
-#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue
-
-ENTRY(sys32_pread64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg sys32_pread64 # branch to system call
-
-ENTRY(sys32_pwrite64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg sys32_pwrite64 # branch to system call
-
-ENTRY(sys32_chown16_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_gid_emu31_t
- jg sys32_chown16 # branch to system call
-
-ENTRY(sys32_getcwd_wrapper)
- llgtr %r2,%r2 # char *
- llgfr %r3,%r3 # unsigned long
- jg sys_getcwd # branch to system call
-
-ENTRY(sys32_capget_wrapper)
- llgtr %r2,%r2 # cap_user_header_t
- llgtr %r3,%r3 # cap_user_data_t
- jg sys_capget # branch to system call
-
-ENTRY(sys32_capset_wrapper)
- llgtr %r2,%r2 # cap_user_header_t
- llgtr %r3,%r3 # const cap_user_data_t
- jg sys_capset # branch to system call
-
-#sys32_vfork_wrapper # done in vfork_glue
-
-ENTRY(sys32_truncate64_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys32_truncate64 # branch to system call
-
-ENTRY(sys32_ftruncate64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys32_ftruncate64 # branch to system call
-
-ENTRY(sys32_lchown_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_lchown # branch to system call
-
-#sys32_getuid_wrapper # void
-#sys32_getgid_wrapper # void
-#sys32_geteuid_wrapper # void
-#sys32_getegid_wrapper # void
-
-ENTRY(sys32_setreuid_wrapper)
- llgfr %r2,%r2 # uid_t
- llgfr %r3,%r3 # uid_t
- jg sys_setreuid # branch to system call
-
-ENTRY(sys32_setregid_wrapper)
- llgfr %r2,%r2 # gid_t
- llgfr %r3,%r3 # gid_t
- jg sys_setregid # branch to system call
-
-ENTRY(sys32_getgroups_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # gid_t *
- jg sys_getgroups # branch to system call
-
-ENTRY(sys32_setgroups_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # gid_t *
- jg sys_setgroups # branch to system call
-
-ENTRY(sys32_fchown_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_fchown # branch to system call
-
-ENTRY(sys32_setresuid_wrapper)
- llgfr %r2,%r2 # uid_t
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # uid_t
- jg sys_setresuid # branch to system call
-
-ENTRY(sys32_getresuid_wrapper)
- llgtr %r2,%r2 # uid_t *
- llgtr %r3,%r3 # uid_t *
- llgtr %r4,%r4 # uid_t *
- jg sys_getresuid # branch to system call
-
-ENTRY(sys32_setresgid_wrapper)
- llgfr %r2,%r2 # gid_t
- llgfr %r3,%r3 # gid_t
- llgfr %r4,%r4 # gid_t
- jg sys_setresgid # branch to system call
-
-ENTRY(sys32_getresgid_wrapper)
- llgtr %r2,%r2 # gid_t *
- llgtr %r3,%r3 # gid_t *
- llgtr %r4,%r4 # gid_t *
- jg sys_getresgid # branch to system call
-
-ENTRY(sys32_chown_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_chown # branch to system call
-
-ENTRY(sys32_setuid_wrapper)
- llgfr %r2,%r2 # uid_t
- jg sys_setuid # branch to system call
-
-ENTRY(sys32_setgid_wrapper)
- llgfr %r2,%r2 # gid_t
- jg sys_setgid # branch to system call
-
-ENTRY(sys32_setfsuid_wrapper)
- llgfr %r2,%r2 # uid_t
- jg sys_setfsuid # branch to system call
-
-ENTRY(sys32_setfsgid_wrapper)
- llgfr %r2,%r2 # gid_t
- jg sys_setfsgid # branch to system call
-
-ENTRY(sys32_pivot_root_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_pivot_root # branch to system call
-
-ENTRY(sys32_mincore_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- llgtr %r4,%r4 # unsigned char *
- jg sys_mincore # branch to system call
-
-ENTRY(sys32_madvise_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- lgfr %r4,%r4 # int
- jg sys_madvise # branch to system call
-
-ENTRY(sys32_getdents64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg sys_getdents64 # branch to system call
-
-ENTRY(compat_sys_fcntl64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_fcntl64 # branch to system call
-
-ENTRY(sys32_stat64_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_stat64 # branch to system call
-
-ENTRY(sys32_lstat64_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_lstat64 # branch to system call
-
-ENTRY(sys32_stime_wrapper)
- llgtr %r2,%r2 # long *
- jg compat_sys_stime # branch to system call
-
-ENTRY(sys32_fstat64_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_fstat64 # branch to system call
-
-ENTRY(sys32_setxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_setxattr
-
-ENTRY(sys32_lsetxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_lsetxattr
-
-ENTRY(sys32_fsetxattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_fsetxattr
-
-ENTRY(sys32_getxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_getxattr
-
-ENTRY(sys32_lgetxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_lgetxattr
-
-ENTRY(sys32_fgetxattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_fgetxattr
-
-ENTRY(sys32_listxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_listxattr
-
-ENTRY(sys32_llistxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_llistxattr
-
-ENTRY(sys32_flistxattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_flistxattr
-
-ENTRY(sys32_removexattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- jg sys_removexattr
-
-ENTRY(sys32_lremovexattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- jg sys_lremovexattr
-
-ENTRY(sys32_fremovexattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- jg sys_fremovexattr
-
-ENTRY(sys32_sched_setaffinity_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # unsigned long *
- jg compat_sys_sched_setaffinity
-
-ENTRY(sys32_sched_getaffinity_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # unsigned long *
- jg compat_sys_sched_getaffinity
-
-ENTRY(sys32_exit_group_wrapper)
- lgfr %r2,%r2 # int
- jg sys_exit_group # branch to system call
-
-ENTRY(sys32_set_tid_address_wrapper)
- llgtr %r2,%r2 # int *
- jg sys_set_tid_address # branch to system call
-
-ENTRY(sys_epoll_create_wrapper)
- lgfr %r2,%r2 # int
- jg sys_epoll_create # branch to system call
-
-ENTRY(sys_epoll_ctl_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct epoll_event *
- jg sys_epoll_ctl # branch to system call
-
-ENTRY(sys_epoll_wait_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct epoll_event *
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- jg sys_epoll_wait # branch to system call
-
-ENTRY(sys32_fadvise64_wrapper)
- lgfr %r2,%r2 # int
- sllg %r3,%r3,32 # get high word of 64bit loff_t
- or %r3,%r4 # get low word of 64bit loff_t
- llgfr %r4,%r5 # size_t (unsigned long)
- lgfr %r5,%r6 # int
- jg sys32_fadvise64
-
-ENTRY(sys32_fadvise64_64_wrapper)
- llgtr %r2,%r2 # struct fadvise64_64_args *
- jg sys32_fadvise64_64
-
-ENTRY(sys32_clock_settime_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_settime
-
-ENTRY(sys32_clock_gettime_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_gettime
-
-ENTRY(sys32_clock_getres_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_getres
-
-ENTRY(sys32_clock_nanosleep_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_timespec *
- llgtr %r5,%r5 # struct compat_timespec *
- jg compat_sys_clock_nanosleep
-
-ENTRY(sys32_timer_create_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- llgtr %r3,%r3 # struct compat_sigevent *
- llgtr %r4,%r4 # timer_t *
- jg compat_sys_timer_create
-
-ENTRY(sys32_timer_settime_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_itimerspec *
- llgtr %r5,%r5 # struct compat_itimerspec *
- jg compat_sys_timer_settime
-
-ENTRY(sys32_timer_gettime_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- llgtr %r3,%r3 # struct compat_itimerspec *
- jg compat_sys_timer_gettime
-
-ENTRY(sys32_timer_getoverrun_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- jg sys_timer_getoverrun
-
-ENTRY(sys32_timer_delete_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- jg sys_timer_delete
-
-ENTRY(sys32_io_setup_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # u32 *
- jg compat_sys_io_setup
-
-ENTRY(sys32_io_destroy_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- jg sys_io_destroy
-
-ENTRY(sys32_io_getevents_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- lgfr %r3,%r3 # long
- lgfr %r4,%r4 # long
- llgtr %r5,%r5 # struct io_event *
- llgtr %r6,%r6 # struct compat_timespec *
- jg compat_sys_io_getevents
-
-ENTRY(sys32_io_submit_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- lgfr %r3,%r3 # long
- llgtr %r4,%r4 # struct iocb **
- jg compat_sys_io_submit
-
-ENTRY(sys32_io_cancel_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- llgtr %r3,%r3 # struct iocb *
- llgtr %r4,%r4 # struct io_event *
- jg sys_io_cancel
-
-ENTRY(compat_sys_statfs64_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # compat_size_t
- llgtr %r4,%r4 # struct compat_statfs64 *
- jg compat_sys_statfs64
-
-ENTRY(compat_sys_fstatfs64_wrapper)
- llgfr %r2,%r2 # unsigned int fd
- llgfr %r3,%r3 # compat_size_t
- llgtr %r4,%r4 # struct compat_statfs64 *
- jg compat_sys_fstatfs64
-
-ENTRY(compat_sys_mq_open_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # mode_t
- llgtr %r5,%r5 # struct compat_mq_attr *
- jg compat_sys_mq_open
-
-ENTRY(sys32_mq_unlink_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_mq_unlink
-
-ENTRY(compat_sys_mq_timedsend_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # unsigned int
- llgtr %r6,%r6 # const struct compat_timespec *
- jg compat_sys_mq_timedsend
-
-ENTRY(compat_sys_mq_timedreceive_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- llgtr %r5,%r5 # unsigned int *
- llgtr %r6,%r6 # const struct compat_timespec *
- jg compat_sys_mq_timedreceive
-
-ENTRY(compat_sys_mq_notify_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # struct compat_sigevent *
- jg compat_sys_mq_notify
-
-ENTRY(compat_sys_mq_getsetattr_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # struct compat_mq_attr *
- llgtr %r4,%r4 # struct compat_mq_attr *
- jg compat_sys_mq_getsetattr
-
-ENTRY(compat_sys_add_key_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # const void *
- llgfr %r5,%r5 # size_t
- llgfr %r6,%r6 # (key_serial_t) u32
- jg sys_add_key
-
-ENTRY(compat_sys_request_key_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # const void *
- llgfr %r5,%r5 # (key_serial_t) u32
- jg sys_request_key
-
-ENTRY(sys32_remap_file_pages_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_remap_file_pages
-
-ENTRY(compat_sys_kexec_load_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # struct kexec_segment *
- llgfr %r5,%r5 # unsigned long
- jg compat_sys_kexec_load
-
-ENTRY(sys_ioprio_set_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg sys_ioprio_set
-
-ENTRY(sys_ioprio_get_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_ioprio_get
-
-ENTRY(sys_inotify_add_watch_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # u32
- jg sys_inotify_add_watch
-
-ENTRY(sys_inotify_rm_watch_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # u32
- jg sys_inotify_rm_watch
-
-ENTRY(sys_mkdirat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_mkdirat
-
-ENTRY(sys_mknodat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgfr %r5,%r5 # unsigned int
- jg sys_mknodat
-
-ENTRY(sys_fchownat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # uid_t
- llgfr %r5,%r5 # gid_t
- lgfr %r6,%r6 # int
- jg sys_fchownat
-
-ENTRY(compat_sys_futimesat_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct timeval *
- jg compat_sys_futimesat
-
-ENTRY(sys32_fstatat64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct stat64 *
- lgfr %r5,%r5 # int
- jg sys32_fstatat64
-
-ENTRY(sys_unlinkat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_unlinkat
-
-ENTRY(sys_renameat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # const char *
- jg sys_renameat
-
-ENTRY(sys_linkat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # const char *
- lgfr %r6,%r6 # int
- jg sys_linkat
-
-ENTRY(sys_symlinkat_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # const char *
- jg sys_symlinkat
-
-ENTRY(sys_readlinkat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # char *
- lgfr %r5,%r5 # int
- jg sys_readlinkat
-
-ENTRY(sys_fchmodat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # mode_t
- jg sys_fchmodat
-
-ENTRY(sys_faccessat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_faccessat
-
-ENTRY(compat_sys_pselect6_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # fd_set *
- llgtr %r4,%r4 # fd_set *
- llgtr %r5,%r5 # fd_set *
- llgtr %r6,%r6 # struct timespec *
- llgt %r0,164(%r15) # void *
- stg %r0,160(%r15)
- jg compat_sys_pselect6
-
-ENTRY(compat_sys_ppoll_wrapper)
- llgtr %r2,%r2 # struct pollfd *
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # struct timespec *
- llgtr %r5,%r5 # const sigset_t *
- llgfr %r6,%r6 # size_t
- jg compat_sys_ppoll
-
-ENTRY(sys_unshare_wrapper)
- llgfr %r2,%r2 # unsigned long
- jg sys_unshare
-
-ENTRY(sys_splice_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # loff_t *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # loff_t *
- llgfr %r6,%r6 # size_t
- llgf %r0,164(%r15) # unsigned int
- stg %r0,160(%r15)
- jg sys_splice
-
-ENTRY(sys_sync_file_range_wrapper)
- lgfr %r2,%r2 # int
- sllg %r3,%r3,32 # get high word of 64bit loff_t
- or %r3,%r4 # get low word of 64bit loff_t
- sllg %r4,%r5,32 # get high word of 64bit loff_t
- or %r4,%r6 # get low word of 64bit loff_t
- llgf %r5,164(%r15) # unsigned int
- jg sys_sync_file_range
-
-ENTRY(sys_tee_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # unsigned int
- jg sys_tee
-
-ENTRY(sys_getcpu_wrapper)
- llgtr %r2,%r2 # unsigned *
- llgtr %r3,%r3 # unsigned *
- llgtr %r4,%r4 # struct getcpu_cache *
- jg sys_getcpu
-
-ENTRY(compat_sys_utimes_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_timeval *
- jg compat_sys_utimes
-
-ENTRY(compat_sys_utimensat_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct compat_timespec *
- lgfr %r5,%r5 # int
- jg compat_sys_utimensat
-
-ENTRY(sys_eventfd_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_eventfd
-
-ENTRY(sys_fallocate_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- sllg %r4,%r4,32 # get high word of 64bit loff_t
- lr %r4,%r5 # get low word of 64bit loff_t
- sllg %r5,%r6,32 # get high word of 64bit loff_t
- l %r5,164(%r15) # get low word of 64bit loff_t
- jg sys_fallocate
-
-ENTRY(sys_timerfd_create_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_timerfd_create
-
-ENTRY(sys_eventfd2_wrapper)
- llgfr %r2,%r2 # unsigned int
- lgfr %r3,%r3 # int
- jg sys_eventfd2
-
-ENTRY(sys_inotify_init1_wrapper)
- lgfr %r2,%r2 # int
- jg sys_inotify_init1
-
-ENTRY(sys_pipe2_wrapper)
- llgtr %r2,%r2 # u32 *
- lgfr %r3,%r3 # int
- jg sys_pipe2 # branch to system call
-
-ENTRY(sys_dup3_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- lgfr %r4,%r4 # int
- jg sys_dup3 # branch to system call
-
-ENTRY(sys_epoll_create1_wrapper)
- lgfr %r2,%r2 # int
- jg sys_epoll_create1 # branch to system call
-
-ENTRY(sys32_readahead_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # u32
- llgfr %r4,%r4 # u32
- lgfr %r5,%r5 # s32
- jg sys32_readahead # branch to system call
-
-ENTRY(sys_tkill_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # int
- jg sys_tkill # branch to system call
-
-ENTRY(sys_tgkill_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # pid_t
- lgfr %r4,%r4 # int
- jg sys_tgkill # branch to system call
-
-ENTRY(compat_sys_keyctl_wrapper)
- llgfr %r2,%r2 # u32
- llgfr %r3,%r3 # u32
- llgfr %r4,%r4 # u32
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg compat_sys_keyctl # branch to system call
-
-ENTRY(sys_perf_event_open_wrapper)
- llgtr %r2,%r2 # const struct perf_event_attr *
- lgfr %r3,%r3 # pid_t
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- llgfr %r6,%r6 # unsigned long
- jg sys_perf_event_open # branch to system call
-
-ENTRY(sys_clone_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # int *
- llgtr %r5,%r5 # int *
- jg sys_clone # branch to system call
-
-ENTRY(sys32_execve_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # compat_uptr_t *
- llgtr %r4,%r4 # compat_uptr_t *
- jg compat_sys_execve # branch to system call
-
-ENTRY(sys_fanotify_init_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_fanotify_init # branch to system call
-
-ENTRY(sys_prlimit64_wrapper)
- lgfr %r2,%r2 # pid_t
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # const struct rlimit64 __user *
- llgtr %r5,%r5 # struct rlimit64 __user *
- jg sys_prlimit64 # branch to system call
-
-ENTRY(sys_name_to_handle_at_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char __user *
- llgtr %r4,%r4 # struct file_handle __user *
- llgtr %r5,%r5 # int __user *
- lgfr %r6,%r6 # int
- jg sys_name_to_handle_at
-
-ENTRY(compat_sys_clock_adjtime_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timex __user *
- jg compat_sys_clock_adjtime
-
-ENTRY(sys_syncfs_wrapper)
- lgfr %r2,%r2 # int
- jg sys_syncfs
-
-ENTRY(sys_setns_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_setns
-
-ENTRY(compat_sys_process_vm_readv_wrapper)
- lgfr %r2,%r2 # compat_pid_t
- llgtr %r3,%r3 # struct compat_iovec __user *
- llgfr %r4,%r4 # unsigned long
- llgtr %r5,%r5 # struct compat_iovec __user *
- llgfr %r6,%r6 # unsigned long
- llgf %r0,164(%r15) # unsigned long
- stg %r0,160(%r15)
- jg compat_sys_process_vm_readv
-
-ENTRY(compat_sys_process_vm_writev_wrapper)
- lgfr %r2,%r2 # compat_pid_t
- llgtr %r3,%r3 # struct compat_iovec __user *
- llgfr %r4,%r4 # unsigned long
- llgtr %r5,%r5 # struct compat_iovec __user *
- llgfr %r6,%r6 # unsigned long
- llgf %r0,164(%r15) # unsigned long
- stg %r0,160(%r15)
- jg compat_sys_process_vm_writev
-
-ENTRY(sys_s390_runtime_instr_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_s390_runtime_instr
-
-ENTRY(sys_kcmp_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # pid_t
- lgfr %r4,%r4 # int
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_kcmp
-
-ENTRY(sys_finit_module_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char __user *
- lgfr %r4,%r4 # int
- jg sys_finit_module
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
new file mode 100644
index 000000000000..9d85b4bc7036
--- /dev/null
+++ b/arch/s390/kernel/cpacf.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "cpacf: " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <asm/cpacf.h>
+
+#define CPACF_QUERY(name, instruction) \
+static ssize_t name##_query_raw_read(struct file *fp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_mask_t mask; \
+ \
+ if (!cpacf_query(CPACF_##instruction, &mask)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \
+} \
+static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+
+CPACF_QUERY(km, KM);
+CPACF_QUERY(kmc, KMC);
+CPACF_QUERY(kimd, KIMD);
+CPACF_QUERY(klmd, KLMD);
+CPACF_QUERY(kmac, KMAC);
+CPACF_QUERY(pckmo, PCKMO);
+CPACF_QUERY(kmf, KMF);
+CPACF_QUERY(kmctr, KMCTR);
+CPACF_QUERY(kmo, KMO);
+CPACF_QUERY(pcc, PCC);
+CPACF_QUERY(prno, PRNO);
+CPACF_QUERY(kma, KMA);
+CPACF_QUERY(kdsa, KDSA);
+
+#define CPACF_QAI(name, instruction) \
+static ssize_t name##_query_auth_info_raw_read( \
+ struct file *fp, struct kobject *kobj, \
+ const struct bin_attribute *attr, char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_qai_t qai; \
+ \
+ if (!cpacf_qai(CPACF_##instruction, &qai)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &qai, \
+ sizeof(qai)); \
+} \
+static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+
+CPACF_QAI(km, KM);
+CPACF_QAI(kmc, KMC);
+CPACF_QAI(kimd, KIMD);
+CPACF_QAI(klmd, KLMD);
+CPACF_QAI(kmac, KMAC);
+CPACF_QAI(pckmo, PCKMO);
+CPACF_QAI(kmf, KMF);
+CPACF_QAI(kmctr, KMCTR);
+CPACF_QAI(kmo, KMO);
+CPACF_QAI(pcc, PCC);
+CPACF_QAI(prno, PRNO);
+CPACF_QAI(kma, KMA);
+CPACF_QAI(kdsa, KDSA);
+
+static const struct bin_attribute *const cpacf_attrs[] = {
+ &bin_attr_km_query_raw,
+ &bin_attr_kmc_query_raw,
+ &bin_attr_kimd_query_raw,
+ &bin_attr_klmd_query_raw,
+ &bin_attr_kmac_query_raw,
+ &bin_attr_pckmo_query_raw,
+ &bin_attr_kmf_query_raw,
+ &bin_attr_kmctr_query_raw,
+ &bin_attr_kmo_query_raw,
+ &bin_attr_pcc_query_raw,
+ &bin_attr_prno_query_raw,
+ &bin_attr_kma_query_raw,
+ &bin_attr_kdsa_query_raw,
+ &bin_attr_km_query_auth_info_raw,
+ &bin_attr_kmc_query_auth_info_raw,
+ &bin_attr_kimd_query_auth_info_raw,
+ &bin_attr_klmd_query_auth_info_raw,
+ &bin_attr_kmac_query_auth_info_raw,
+ &bin_attr_pckmo_query_auth_info_raw,
+ &bin_attr_kmf_query_auth_info_raw,
+ &bin_attr_kmctr_query_auth_info_raw,
+ &bin_attr_kmo_query_auth_info_raw,
+ &bin_attr_pcc_query_auth_info_raw,
+ &bin_attr_prno_query_auth_info_raw,
+ &bin_attr_kma_query_auth_info_raw,
+ &bin_attr_kdsa_query_auth_info_raw,
+ NULL,
+};
+
+static const struct attribute_group cpacf_attr_grp = {
+ .name = "cpacf",
+ .bin_attrs = cpacf_attrs,
+};
+
+static int __init cpacf_init(void)
+{
+ struct device *cpu_root;
+ int rc = 0;
+
+ cpu_root = bus_get_dev_root(&cpu_subsys);
+ if (cpu_root) {
+ rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp);
+ put_device(cpu_root);
+ }
+ return rc;
+}
+device_initcall(cpacf_init);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index d7b0c4d27880..ab611764642a 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* S390 version
* Copyright IBM Corp. 1999, 2007
@@ -5,68 +6,58 @@
* Christian Borntraeger (cborntra@de.ibm.com),
*/
-#define KMSG_COMPONENT "cpcmd"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpcmd: " fmt
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
-#include <asm/io.h>
+#include <asm/asm.h>
static DEFINE_SPINLOCK(cpcmd_lock);
static char cpcmd_buf[241];
static int diag8_noresponse(int cmdlen)
{
- register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
- register unsigned long reg3 asm ("3") = cmdlen;
-
asm volatile(
-#ifndef CONFIG_64BIT
- " diag %1,%0,0x8\n"
-#else /* CONFIG_64BIT */
- " sam31\n"
- " diag %1,%0,0x8\n"
- " sam64\n"
-#endif /* CONFIG_64BIT */
- : "+d" (reg3) : "d" (reg2) : "cc");
- return reg3;
+ " diag %[rx],%[ry],0x8\n"
+ : [ry] "+&d" (cmdlen)
+ : [rx] "d" (__pa(cpcmd_buf))
+ : "cc");
+ return cmdlen;
}
static int diag8_response(int cmdlen, char *response, int *rlen)
{
- register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
- register unsigned long reg3 asm ("3") = (addr_t) response;
- register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L;
- register unsigned long reg5 asm ("5") = *rlen;
+ union register_pair rx, ry;
+ int cc;
+ rx.even = __pa(cpcmd_buf);
+ rx.odd = __pa(response);
+ ry.even = cmdlen | 0x40000000L;
+ ry.odd = *rlen;
asm volatile(
-#ifndef CONFIG_64BIT
- " diag %2,%0,0x8\n"
- " brc 8,1f\n"
- " ar %1,%4\n"
-#else /* CONFIG_64BIT */
- " sam31\n"
- " diag %2,%0,0x8\n"
- " sam64\n"
- " brc 8,1f\n"
- " agr %1,%4\n"
-#endif /* CONFIG_64BIT */
- "1:\n"
- : "+d" (reg4), "+d" (reg5)
- : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc");
- *rlen = reg5;
- return reg4;
+ " diag %[rx],%[ry],0x8\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [ry] "+d" (ry.pair)
+ : [rx] "d" (rx.pair)
+ : CC_CLOBBER);
+ if (CC_TRANSFORM(cc))
+ *rlen += ry.odd;
+ else
+ *rlen = ry.odd;
+ return ry.even;
}
/*
* __cpcmd has some restrictions over cpcmd
- * - the response buffer must reside below 2GB (if any)
* - __cpcmd is unlocked and therefore not SMP-safe
*/
int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
@@ -80,6 +71,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
memcpy(cpcmd_buf, cmd, cmdlen);
ASCEBC(cpcmd_buf, cmdlen);
+ diag_stat_inc(DIAG_STAT_X008);
if (response) {
memset(response, 0, rlen);
response_len = rlen;
@@ -96,16 +88,14 @@ EXPORT_SYMBOL(__cpcmd);
int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
{
+ unsigned long flags;
char *lowbuf;
int len;
- unsigned long flags;
- if ((virt_to_phys(response) != (unsigned long) response) ||
- (((unsigned long)response + rlen) >> 31)) {
- lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
+ if (is_vmalloc_or_module_addr(response)) {
+ lowbuf = kmalloc(rlen, GFP_KERNEL);
if (!lowbuf) {
- pr_warning("The cpcmd kernel function failed to "
- "allocate a response buffer\n");
+ pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
return -ENOMEM;
}
spin_lock_irqsave(&cpcmd_lock, flags);
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 000000000000..c9eef9ed876b
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/export.h>
+#include <linux/bug.h>
+#include <asm/machine.h>
+#include <asm/elf.h>
+
+enum {
+ TYPE_HWCAP,
+ TYPE_FACILITY,
+ TYPE_MACHINE,
+};
+
+struct s390_cpu_feature {
+ unsigned int type : 4;
+ unsigned int num : 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+ [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+ [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+ [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+ [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ struct s390_cpu_feature *feature;
+
+ if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+ return 0;
+ feature = &s390_cpu_features[num];
+ switch (feature->type) {
+ case TYPE_HWCAP:
+ return !!(elf_hwcap & BIT(feature->num));
+ case TYPE_FACILITY:
+ return test_facility(feature->num);
+ case TYPE_MACHINE:
+ return test_machine_feature(feature->num);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f703d91bf720..d4839de8ce9d 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* S390 kdump implementation
*
@@ -6,212 +7,346 @@
*/
#include <linux/crash_dump.h>
+#include <linux/export.h>
#include <asm/lowcore.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
#include <linux/gfp.h>
#include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/elf.h>
+#include <linux/uio.h>
+#include <asm/asm-offsets.h>
#include <asm/os_info.h>
#include <asm/elf.h>
#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/maccess.h>
+#include <asm/fpu.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+ .cnt = 1,
+ .max = 1,
+ .total_size = 0,
+ .regions = &oldmem_region,
+ .name = "oldmem",
+};
+
+struct save_area {
+ struct list_head list;
+ u64 psw[2];
+ u64 ctrs[16];
+ u64 gprs[16];
+ u32 acrs[16];
+ u64 fprs[16];
+ u32 fpc;
+ u32 prefix;
+ u32 todpreg;
+ u64 timer;
+ u64 todcmp;
+ u64 vxrs_low[16];
+ __vector128 vxrs_high[16];
+};
+
+static LIST_HEAD(dump_save_areas);
+
/*
- * Copy one page from "oldmem"
- *
- * For the kdump reserved memory this functions performs a swap operation:
- * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
- * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ * Allocate a save area
*/
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+struct save_area * __init save_area_alloc(bool is_boot_cpu)
{
- unsigned long src;
+ struct save_area *sa;
- if (!csize)
- return 0;
+ sa = memblock_alloc_or_panic(sizeof(*sa), 8);
- src = (pfn << PAGE_SHIFT) + offset;
- if (src < OLDMEM_SIZE)
- src += OLDMEM_BASE;
- else if (src > OLDMEM_BASE &&
- src < OLDMEM_BASE + OLDMEM_SIZE)
- src -= OLDMEM_BASE;
- if (userbuf)
- copy_to_user_real((void __force __user *) buf, (void *) src,
- csize);
+ if (is_boot_cpu)
+ list_add(&sa->list, &dump_save_areas);
else
- memcpy_real(buf, (void *) src, csize);
- return csize;
+ list_add_tail(&sa->list, &dump_save_areas);
+ return sa;
}
/*
- * Copy memory from old kernel
+ * Return the address of the save area for the boot CPU
*/
-int copy_from_oldmem(void *dest, void *src, size_t count)
+struct save_area * __init save_area_boot_cpu(void)
{
- unsigned long copied = 0;
- int rc;
-
- if ((unsigned long) src < OLDMEM_SIZE) {
- copied = min(count, OLDMEM_SIZE - (unsigned long) src);
- rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
- if (rc)
- return rc;
- }
- return memcpy_real(dest + copied, src + copied, count - copied);
+ return list_first_entry_or_null(&dump_save_areas, struct save_area, list);
}
/*
- * Alloc memory and panic in case of ENOMEM
+ * Copy CPU registers into the save area
*/
-static void *kzalloc_panic(int len)
+void __init save_area_add_regs(struct save_area *sa, void *regs)
{
- void *rc;
-
- rc = kzalloc(len, GFP_KERNEL);
- if (!rc)
- panic("s390 kdump kzalloc (%d) failed", len);
- return rc;
+ struct lowcore *lc;
+
+ lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA);
+ memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw));
+ memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs));
+ memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs));
+ memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs));
+ memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs));
+ memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc));
+ memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix));
+ memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg));
+ memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer));
+ memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp));
}
/*
- * Get memory layout and create hole for oldmem
+ * Copy vector registers into the save area
*/
-static struct mem_chunk *get_memory_layout(void)
+void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
{
- struct mem_chunk *chunk_array;
+ int i;
- chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk));
- detect_memory_layout(chunk_array, 0);
- create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE);
- return chunk_array;
+ /* Copy lower halves of vector registers 0-15 */
+ for (i = 0; i < 16; i++)
+ sa->vxrs_low[i] = vxrs[i].low;
+ /* Copy vector registers 16-31 */
+ memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
-/*
- * Initialize ELF note
- */
-static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
- const char *name)
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- Elf64_Nhdr *note;
- u64 len;
-
- note = (Elf64_Nhdr *)buf;
- note->n_namesz = strlen(name) + 1;
- note->n_descsz = d_len;
- note->n_type = type;
- len = sizeof(Elf64_Nhdr);
-
- memcpy(buf + len, name, note->n_namesz);
- len = roundup(len + note->n_namesz, 4);
-
- memcpy(buf + len, desc, note->n_descsz);
- len = roundup(len + note->n_descsz, 4);
+ size_t len, copied, res = 0;
+
+ while (count) {
+ if (!oldmem_data.start && src < sclp.hsa_size) {
+ /* Copy from zfcp/nvme dump HSA area */
+ len = min(count, sclp.hsa_size - src);
+ copied = memcpy_hsa_iter(iter, src, len);
+ } else {
+ /* Check for swapped kdump oldmem areas */
+ if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
+ src -= oldmem_data.start;
+ len = min(count, oldmem_data.size - src);
+ } else if (oldmem_data.start && src < oldmem_data.size) {
+ len = min(count, oldmem_data.size - src);
+ src += oldmem_data.start;
+ } else {
+ len = count;
+ }
+ copied = memcpy_real_iter(iter, src, len);
+ }
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
+ }
+ return res;
+}
- return PTR_ADD(buf, len);
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+{
+ struct iov_iter iter;
+ struct kvec kvec;
+
+ kvec.iov_base = dst;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
+ if (copy_oldmem_iter(&iter, src, count) < count)
+ return -EFAULT;
+ return 0;
}
/*
- * Initialize prstatus note
+ * Copy one page from "oldmem"
*/
-static void *nt_prstatus(void *ptr, struct save_area *sa)
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
+ unsigned long offset)
{
- struct elf_prstatus nt_prstatus;
- static int cpu_nr = 1;
-
- memset(&nt_prstatus, 0, sizeof(nt_prstatus));
- memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs));
- memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
- memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs));
- nt_prstatus.pr_pid = cpu_nr;
- cpu_nr++;
+ unsigned long src;
- return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus),
- "CORE");
+ src = pfn_to_phys(pfn) + offset;
+ return copy_oldmem_iter(iter, src, csize);
}
/*
- * Initialize fpregset (floating point) note
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
*/
-static void *nt_fpregset(void *ptr, struct save_area *sa)
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
{
- elf_fpregset_t nt_fpregset;
-
- memset(&nt_fpregset, 0, sizeof(nt_fpregset));
- memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg));
- memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs));
+ unsigned long size_old;
+ int rc;
- return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset),
- "CORE");
+ if (pfn < oldmem_data.size >> PAGE_SHIFT) {
+ size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT));
+ rc = remap_pfn_range(vma, from,
+ pfn + (oldmem_data.start >> PAGE_SHIFT),
+ size_old, prot);
+ if (rc || size == size_old)
+ return rc;
+ size -= size_old;
+ from += size_old;
+ pfn += size_old >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
}
/*
- * Initialize timer note
+ * Remap "oldmem" for zfcp/nvme dump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
*/
-static void *nt_s390_timer(void *ptr, struct save_area *sa)
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+ unsigned long from,
+ unsigned long pfn,
+ unsigned long size, pgprot_t prot)
{
- return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
- KEXEC_CORE_NOTE_NAME);
+ unsigned long hsa_end = sclp.hsa_size;
+ unsigned long size_hsa;
+
+ if (pfn < hsa_end >> PAGE_SHIFT) {
+ size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+ if (size == size_hsa)
+ return 0;
+ size -= size_hsa;
+ from += size_hsa;
+ pfn += size_hsa >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
}
/*
- * Initialize TOD clock comparator note
+ * Remap "oldmem" for kdump or zfcp/nvme dump
*/
-static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
{
- return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
- sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
+ if (oldmem_data.start)
+ return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+ else
+ return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+ prot);
}
/*
- * Initialize TOD programmable register note
+ * Return true only when in a kdump or stand-alone kdump environment.
+ * Note that /proc/vmcore might also be available in "standard zfcp/nvme dump"
+ * environments, where this function returns false; see dump_available().
*/
-static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
+bool is_kdump_kernel(void)
{
- return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
- sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
+ return oldmem_data.start;
}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
/*
- * Initialize control register note
+ * Initialize ELF note
*/
-static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
+static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
+ const char *name)
{
- return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
- sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
+ Elf64_Nhdr *note;
+ u64 len;
+
+ note = (Elf64_Nhdr *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = d_len;
+ note->n_type = type;
+ len = sizeof(Elf64_Nhdr);
+
+ memcpy(buf + len, name, note->n_namesz);
+ len = roundup(len + note->n_namesz, 4);
+
+ memcpy(buf + len, desc, note->n_descsz);
+ len = roundup(len + note->n_descsz, 4);
+
+ return PTR_ADD(buf, len);
}
+#define nt_init(buf, type, desc) \
+ nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type)
+
/*
- * Initialize prefix register note
+ * Calculate the size of ELF note
*/
-static void *nt_s390_prefix(void *ptr, struct save_area *sa)
+static size_t nt_size_name(int d_len, const char *name)
{
- return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
- sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
+ size_t size;
+
+ size = sizeof(Elf64_Nhdr);
+ size += roundup(strlen(name) + 1, 4);
+ size += roundup(d_len, 4);
+
+ return size;
}
+#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type)
+
/*
* Fill ELF notes for one CPU with save area registers
*/
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa)
-{
- ptr = nt_prstatus(ptr, sa);
- ptr = nt_fpregset(ptr, sa);
- ptr = nt_s390_timer(ptr, sa);
- ptr = nt_s390_tod_cmp(ptr, sa);
- ptr = nt_s390_tod_preg(ptr, sa);
- ptr = nt_s390_ctrs(ptr, sa);
- ptr = nt_s390_prefix(ptr, sa);
+static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
+{
+ struct elf_prstatus nt_prstatus;
+ elf_fpregset_t nt_fpregset;
+
+ /* Prepare prstatus note */
+ memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+ memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs));
+ memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+ memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs));
+ nt_prstatus.common.pr_pid = cpu;
+ /* Prepare fpregset (floating point) note */
+ memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+ memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
+ memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
+ /* Create ELF notes for the CPU */
+ ptr = nt_init(ptr, PRSTATUS, nt_prstatus);
+ ptr = nt_init(ptr, PRFPREG, nt_fpregset);
+ ptr = nt_init(ptr, S390_TIMER, sa->timer);
+ ptr = nt_init(ptr, S390_TODCMP, sa->todcmp);
+ ptr = nt_init(ptr, S390_TODPREG, sa->todpreg);
+ ptr = nt_init(ptr, S390_CTRS, sa->ctrs);
+ ptr = nt_init(ptr, S390_PREFIX, sa->prefix);
+ if (cpu_has_vx()) {
+ ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high);
+ ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low);
+ }
return ptr;
}
/*
+ * Calculate size of ELF notes per cpu
+ */
+static size_t get_cpu_elf_notes_size(void)
+{
+ struct save_area *sa = NULL;
+ size_t size;
+
+ size = nt_size(PRSTATUS, struct elf_prstatus);
+ size += nt_size(PRFPREG, elf_fpregset_t);
+ size += nt_size(S390_TIMER, sa->timer);
+ size += nt_size(S390_TODCMP, sa->todcmp);
+ size += nt_size(S390_TODPREG, sa->todpreg);
+ size += nt_size(S390_CTRS, sa->ctrs);
+ size += nt_size(S390_PREFIX, sa->prefix);
+ if (cpu_has_vx()) {
+ size += nt_size(S390_VXRS_HIGH, sa->vxrs_high);
+ size += nt_size(S390_VXRS_LOW, sa->vxrs_low);
+ }
+
+ return size;
+}
+
+/*
* Initialize prpsinfo note (new kernel)
*/
static void *nt_prpsinfo(void *ptr)
@@ -220,9 +355,8 @@ static void *nt_prpsinfo(void *ptr)
memset(&prpsinfo, 0, sizeof(prpsinfo));
prpsinfo.pr_sname = 'R';
- strcpy(prpsinfo.pr_fname, "vmlinux");
- return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo),
- KEXEC_CORE_NOTE_NAME);
+ strscpy(prpsinfo.pr_fname, "vmlinux");
+ return nt_init(ptr, PRPSINFO, prpsinfo);
}
/*
@@ -231,21 +365,26 @@ static void *nt_prpsinfo(void *ptr)
static void *get_vmcoreinfo_old(unsigned long *size)
{
char nt_name[11], *vmcoreinfo;
+ unsigned long addr;
Elf64_Nhdr note;
- void *addr;
- if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+ if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr)))
return NULL;
memset(nt_name, 0, sizeof(nt_name));
- if (copy_from_oldmem(&note, addr, sizeof(note)))
+ if (copy_oldmem_kernel(&note, addr, sizeof(note)))
+ return NULL;
+ if (copy_oldmem_kernel(nt_name, addr + sizeof(note),
+ sizeof(nt_name) - 1))
return NULL;
- if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1))
+ if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0)
return NULL;
- if (strcmp(nt_name, "VMCOREINFO") != 0)
+ vmcoreinfo = kzalloc(note.n_descsz, GFP_KERNEL);
+ if (!vmcoreinfo)
return NULL;
- vmcoreinfo = kzalloc_panic(note.n_descsz);
- if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz))
+ if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) {
+ kfree(vmcoreinfo);
return NULL;
+ }
*size = note.n_descsz;
return vmcoreinfo;
}
@@ -255,21 +394,58 @@ static void *get_vmcoreinfo_old(unsigned long *size)
*/
static void *nt_vmcoreinfo(void *ptr)
{
+ const char *name = VMCOREINFO_NOTE_NAME;
unsigned long size;
void *vmcoreinfo;
vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
- if (!vmcoreinfo)
- vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (vmcoreinfo)
+ return nt_init_name(ptr, 0, vmcoreinfo, size, name);
+
+ vmcoreinfo = get_vmcoreinfo_old(&size);
if (!vmcoreinfo)
return ptr;
- return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
+ ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name);
+ kfree(vmcoreinfo);
+ return ptr;
+}
+
+static size_t nt_vmcoreinfo_size(void)
+{
+ const char *name = VMCOREINFO_NOTE_NAME;
+ unsigned long size;
+ void *vmcoreinfo;
+
+ vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+ if (vmcoreinfo)
+ return nt_size_name(size, name);
+
+ vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (!vmcoreinfo)
+ return 0;
+
+ kfree(vmcoreinfo);
+ return nt_size_name(size, name);
+}
+
+/*
+ * Initialize final note (needed for /proc/vmcore code)
+ */
+static void *nt_final(void *ptr)
+{
+ Elf64_Nhdr *note;
+
+ note = (Elf64_Nhdr *) ptr;
+ note->n_namesz = 0;
+ note->n_descsz = 0;
+ note->n_type = 0;
+ return PTR_ADD(ptr, sizeof(Elf64_Nhdr));
}
/*
* Initialize ELF header (new kernel)
*/
-static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
{
memset(ehdr, 0, sizeof(*ehdr));
memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -283,7 +459,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
ehdr->e_phoff = sizeof(Elf64_Ehdr);
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
ehdr->e_phentsize = sizeof(Elf64_Phdr);
- ehdr->e_phnum = mem_chunk_cnt + 1;
+ /* Number of PT_LOAD program headers plus PT_NOTE program header */
+ ehdr->e_phnum = phdr_count + 1;
return ehdr + 1;
}
@@ -292,13 +469,12 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
*/
static int get_cpu_cnt(void)
{
- int i, cpus = 0;
+ struct save_area *sa;
+ int cpus = 0;
- for (i = 0; zfcpdump_save_areas[i]; i++) {
- if (zfcpdump_save_areas[i]->pref_reg == 0)
- continue;
- cpus++;
- }
+ list_for_each_entry(sa, &dump_save_areas, list)
+ if (sa->prefix != 0)
+ cpus++;
return cpus;
}
@@ -307,60 +483,83 @@ static int get_cpu_cnt(void)
*/
static int get_mem_chunk_cnt(void)
{
- struct mem_chunk *chunk_array, *mem_chunk;
- int i, cnt = 0;
-
- chunk_array = get_memory_layout();
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- mem_chunk = &chunk_array[i];
- if (chunk_array[i].type != CHUNK_READ_WRITE &&
- chunk_array[i].type != CHUNK_READ_ONLY)
- continue;
- if (mem_chunk->size == 0)
- continue;
+ int cnt = 0;
+ u64 idx;
+
+ for_each_physmem_range(idx, &oldmem_type, NULL, NULL)
cnt++;
- }
- kfree(chunk_array);
return cnt;
}
-/*
- * Relocate pointer in order to allow vmcore code access the data
- */
-static inline unsigned long relocate(unsigned long addr)
+static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr,
+ unsigned long vaddr, unsigned long size)
{
- return OLDMEM_BASE + addr;
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = vaddr;
+ phdr->p_offset = paddr;
+ phdr->p_paddr = paddr;
+ phdr->p_filesz = size;
+ phdr->p_memsz = size;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
}
/*
* Initialize ELF loads (new kernel)
*/
-static int loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
{
- struct mem_chunk *chunk_array, *mem_chunk;
- int i;
-
- chunk_array = get_memory_layout();
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- mem_chunk = &chunk_array[i];
- if (mem_chunk->size == 0)
- continue;
- if (chunk_array[i].type != CHUNK_READ_WRITE &&
- chunk_array[i].type != CHUNK_READ_ONLY)
- continue;
- else
- phdr->p_filesz = mem_chunk->size;
- phdr->p_type = PT_LOAD;
- phdr->p_offset = mem_chunk->addr;
- phdr->p_vaddr = mem_chunk->addr;
- phdr->p_paddr = mem_chunk->addr;
- phdr->p_memsz = mem_chunk->size;
- phdr->p_flags = PF_R | PF_W | PF_X;
- phdr->p_align = PAGE_SIZE;
+ unsigned long old_identity_base = 0;
+ phys_addr_t start, end;
+ u64 idx;
+
+ if (os_info_has_vm)
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ for_each_physmem_range(idx, &oldmem_type, &start, &end) {
+ fill_ptload(phdr, start, old_identity_base + start,
+ end - start);
phdr++;
}
- kfree(chunk_array);
- return i;
+}
+
+static bool os_info_has_vm(void)
+{
+ return os_info_old_value(OS_INFO_KASLR_OFFSET);
+}
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+/*
+ * Fill PT_LOAD for a physical memory range owned by a device and detected by
+ * its device driver.
+ */
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+ unsigned long long paddr, unsigned long long size)
+{
+ unsigned long old_identity_base = 0;
+
+ if (os_info_has_vm())
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ fill_ptload(phdr, paddr, old_identity_base + paddr, size);
+}
+#endif
+
+/*
+ * Prepare PT_LOAD type program header for kernel image region
+ */
+static void text_init(Elf64_Phdr *phdr)
+{
+ unsigned long start_phys = os_info_old_value(OS_INFO_IMAGE_PHYS);
+ unsigned long start = os_info_old_value(OS_INFO_IMAGE_START);
+ unsigned long end = os_info_old_value(OS_INFO_IMAGE_END);
+
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = start;
+ phdr->p_filesz = end - start;
+ phdr->p_memsz = end - start;
+ phdr->p_offset = start_phys;
+ phdr->p_paddr = start_phys;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
}
/*
@@ -370,74 +569,139 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
{
struct save_area *sa;
void *ptr_start = ptr;
- int i;
+ int cpu;
ptr = nt_prpsinfo(ptr);
- for (i = 0; zfcpdump_save_areas[i]; i++) {
- sa = zfcpdump_save_areas[i];
- if (sa->pref_reg == 0)
- continue;
- ptr = fill_cpu_elf_notes(ptr, sa);
- }
+ cpu = 1;
+ list_for_each_entry(sa, &dump_save_areas, list)
+ if (sa->prefix != 0)
+ ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
ptr = nt_vmcoreinfo(ptr);
+ ptr = nt_final(ptr);
memset(phdr, 0, sizeof(*phdr));
phdr->p_type = PT_NOTE;
- phdr->p_offset = relocate(notes_offset);
+ phdr->p_offset = notes_offset;
phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
phdr->p_memsz = phdr->p_filesz;
return ptr;
}
+static size_t get_elfcorehdr_size(int phdr_count)
+{
+ size_t size;
+
+ size = sizeof(Elf64_Ehdr);
+ /* PT_NOTES */
+ size += sizeof(Elf64_Phdr);
+ /* nt_prpsinfo */
+ size += nt_size(PRPSINFO, struct elf_prpsinfo);
+ /* regsets */
+ size += get_cpu_cnt() * get_cpu_elf_notes_size();
+ /* nt_vmcoreinfo */
+ size += nt_vmcoreinfo_size();
+ /* nt_final */
+ size += sizeof(Elf64_Nhdr);
+ /* PT_LOADS */
+ size += phdr_count * sizeof(Elf64_Phdr);
+
+ return size;
+}
+
/*
* Create ELF core header (new kernel)
*/
-static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
- Elf64_Phdr *phdr_notes, *phdr_loads;
- int mem_chunk_cnt;
+ Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
+ int mem_chunk_cnt, phdr_text_cnt;
+ size_t alloc_size;
void *ptr, *hdr;
- u32 alloc_size;
u64 hdr_off;
+ /* If we are not in kdump or zfcp/nvme dump mode return */
+ if (!oldmem_data.start && !is_ipl_type_dump())
+ return 0;
+ /* If we cannot get HSA size for zfcp/nvme dump return error */
+ if (is_ipl_type_dump() && !sclp.hsa_size)
+ return -ENODEV;
+
+ /* For kdump, exclude previous crashkernel memory */
+ if (oldmem_data.start) {
+ oldmem_region.base = oldmem_data.start;
+ oldmem_region.size = oldmem_data.size;
+ oldmem_type.total_size = oldmem_data.size;
+ }
+
mem_chunk_cnt = get_mem_chunk_cnt();
+ phdr_text_cnt = os_info_has_vm() ? 1 : 0;
+
+ alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
+
+ hdr = kzalloc(alloc_size, GFP_KERNEL);
+
+ /*
+ * Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+ * a dump with this crash kernel will fail. Panic now to allow other
+ * dump mechanisms to take over.
+ */
+ if (!hdr)
+ panic("s390 kdump allocating elfcorehdr failed");
- alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
- mem_chunk_cnt * sizeof(Elf64_Phdr);
- hdr = kzalloc_panic(alloc_size);
/* Init elf header */
- ptr = ehdr_init(hdr, mem_chunk_cnt);
+ phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
/* Init program headers */
- phdr_notes = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
- phdr_loads = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ if (phdr_text_cnt) {
+ phdr_text = phdr_notes + 1;
+ phdr_loads = phdr_text + 1;
+ } else {
+ phdr_loads = phdr_notes + 1;
+ }
+ ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
/* Init notes */
hdr_off = PTR_DIFF(ptr, hdr);
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+ /* Init kernel text program header */
+ if (phdr_text_cnt)
+ text_init(phdr_text);
/* Init loads */
+ loads_init(phdr_loads, phdr_text_cnt);
+ /* Finalize program headers */
hdr_off = PTR_DIFF(ptr, hdr);
- loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off);
- *elfcorebuf_sz = hdr_off;
- *elfcorebuf = (void *) relocate((unsigned long) hdr);
- BUG_ON(*elfcorebuf_sz > alloc_size);
+ *addr = (unsigned long long) hdr;
+ *size = (unsigned long long) hdr_off;
+ BUG_ON(elfcorehdr_size > alloc_size);
+ return 0;
}
/*
- * Create kdump ELF core header in new kernel, if it has not been passed via
- * the "elfcorehdr" kernel parameter
+ * Free ELF core header (new kernel)
*/
-static int setup_kdump_elfcorehdr(void)
+void elfcorehdr_free(unsigned long long addr)
{
- size_t elfcorebuf_sz;
- char *elfcorebuf;
+ kfree((void *)(unsigned long)addr);
+}
- if (!OLDMEM_BASE || is_kdump_kernel())
- return -EINVAL;
- s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz);
- elfcorehdr_addr = (unsigned long long) elfcorebuf;
- elfcorehdr_size = elfcorebuf_sz;
- return 0;
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
}
-subsys_initcall(setup_kdump_elfcorehdr);
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
+}
diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c
new file mode 100644
index 000000000000..a0501f4c7e7a
--- /dev/null
+++ b/arch/s390/kernel/ctlreg.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/irqflags.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cache.h>
+#include <asm/abs_lowcore.h>
+#include <asm/ctlreg.h>
+
+/*
+ * ctl_lock guards access to global control register contents which
+ * are kept in the control register save area within absolute lowcore
+ * at physical address zero.
+ */
+static DEFINE_SPINLOCK(system_ctl_lock);
+
+void system_ctlreg_lock(void)
+ __acquires(&system_ctl_lock)
+{
+ spin_lock(&system_ctl_lock);
+}
+
+void system_ctlreg_unlock(void)
+ __releases(&system_ctl_lock)
+{
+ spin_unlock(&system_ctl_lock);
+}
+
+static bool system_ctlreg_area_init __ro_after_init;
+
+void __init system_ctlreg_init_save_area(struct lowcore *lc)
+{
+ struct lowcore *abs_lc;
+
+ abs_lc = get_abs_lowcore();
+ __local_ctl_store(0, 15, lc->cregs_save_area);
+ __local_ctl_store(0, 15, abs_lc->cregs_save_area);
+ put_abs_lowcore(abs_lc);
+ system_ctlreg_area_init = true;
+}
+
+struct ctlreg_parms {
+ unsigned long andval;
+ unsigned long orval;
+ unsigned long val;
+ int request;
+ int cr;
+};
+
+static void ctlreg_callback(void *info)
+{
+ struct ctlreg_parms *pp = info;
+ struct ctlreg regs[16];
+
+ __local_ctl_store(0, 15, regs);
+ if (pp->request == CTLREG_LOAD) {
+ regs[pp->cr].val = pp->val;
+ } else {
+ regs[pp->cr].val &= pp->andval;
+ regs[pp->cr].val |= pp->orval;
+ }
+ __local_ctl_load(0, 15, regs);
+}
+
+static void system_ctlreg_update(void *info)
+{
+ unsigned long flags;
+
+ if (system_state == SYSTEM_BOOTING) {
+ /*
+ * For very early calls do not call on_each_cpu()
+ * since not everything might be setup.
+ */
+ local_irq_save(flags);
+ ctlreg_callback(info);
+ local_irq_restore(flags);
+ } else {
+ on_each_cpu(ctlreg_callback, info, 1);
+ }
+}
+
+void system_ctlreg_modify(unsigned int cr, unsigned long data, int request)
+{
+ struct ctlreg_parms pp = { .cr = cr, .request = request, };
+ struct lowcore *abs_lc;
+
+ switch (request) {
+ case CTLREG_SET_BIT:
+ pp.orval = 1UL << data;
+ pp.andval = -1UL;
+ break;
+ case CTLREG_CLEAR_BIT:
+ pp.orval = 0;
+ pp.andval = ~(1UL << data);
+ break;
+ case CTLREG_LOAD:
+ pp.val = data;
+ break;
+ }
+ if (system_ctlreg_area_init) {
+ system_ctlreg_lock();
+ abs_lc = get_abs_lowcore();
+ if (request == CTLREG_LOAD) {
+ abs_lc->cregs_save_area[cr].val = pp.val;
+ } else {
+ abs_lc->cregs_save_area[cr].val &= pp.andval;
+ abs_lc->cregs_save_area[cr].val |= pp.orval;
+ }
+ put_abs_lowcore(abs_lc);
+ system_ctlreg_update(&pp);
+ system_ctlreg_unlock();
+ } else {
+ system_ctlreg_update(&pp);
+ }
+}
+EXPORT_SYMBOL(system_ctlreg_modify);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index f1279dc2e1bc..71cdb6845dd7 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1,16 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* S/390 debug facility
*
- * Copyright IBM Corp. 1999, 2012
+ * Copyright IBM Corp. 1999, 2020
*
* Author(s): Michael Holzheu (holzheu@de.ibm.com),
- * Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ * Holger Smolinski (Holger.Smolinski@de.ibm.com)
*
* Bugreports to: <Linux390@de.ibm.com>
*/
-#define KMSG_COMPONENT "s390dbf"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "s390dbf: " fmt
#include <linux/stddef.h>
#include <linux/kernel.h>
@@ -19,10 +19,12 @@
#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/sysctl.h>
-#include <asm/uaccess.h>
-#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
#include <linux/debugfs.h>
#include <asm/debug.h>
@@ -36,82 +38,67 @@
typedef struct file_private_info {
loff_t offset; /* offset of last read in file */
- int act_area; /* number of last formated area */
- int act_page; /* act page in given area */
- int act_entry; /* last formated entry (offset */
- /* relative to beginning of last */
- /* formated page) */
- size_t act_entry_offset; /* up to this offset we copied */
- /* in last read the last formated */
+ int act_area; /* number of last formatted area */
+ int act_page; /* act page in given area */
+ int act_entry; /* last formatted entry (offset */
+ /* relative to beginning of last */
+ /* formatted page) */
+ size_t act_entry_offset; /* up to this offset we copied */
+ /* in last read the last formatted */
/* entry to userland */
char temp_buf[2048]; /* buffer for output */
- debug_info_t *debug_info_org; /* original debug information */
+ debug_info_t *debug_info_org; /* original debug information */
debug_info_t *debug_info_snap; /* snapshot of debug information */
struct debug_view *view; /* used view of debug info */
} file_private_info_t;
-typedef struct
-{
+typedef struct {
char *string;
- /*
- * This assumes that all args are converted into longs
- * on L/390 this is the case for all types of parameter
- * except of floats, and long long (32 bit)
+ /*
+ * This assumes that all args are converted into longs
+ * on L/390 this is the case for all types of parameter
+ * except of floats, and long long (32 bit)
*
*/
- long args[0];
+ long args[];
} debug_sprintf_entry_t;
-
-/* internal function prototyes */
+/* internal function prototypes */
static int debug_init(void);
static ssize_t debug_output(struct file *file, char __user *user_buf,
- size_t user_len, loff_t * offset);
+ size_t user_len, loff_t *offset);
static ssize_t debug_input(struct file *file, const char __user *user_buf,
- size_t user_len, loff_t * offset);
+ size_t user_len, loff_t *offset);
static int debug_open(struct inode *inode, struct file *file);
static int debug_close(struct inode *inode, struct file *file);
static debug_info_t *debug_info_create(const char *name, int pages_per_area,
- int nr_areas, int buf_size, umode_t mode);
+ int nr_areas, int buf_size, umode_t mode);
static void debug_info_get(debug_info_t *);
static void debug_info_put(debug_info_t *);
-static int debug_prolog_level_fn(debug_info_t * id,
- struct debug_view *view, char *out_buf);
-static int debug_input_level_fn(debug_info_t * id, struct debug_view *view,
- struct file *file, const char __user *user_buf,
- size_t user_buf_size, loff_t * offset);
-static int debug_prolog_pages_fn(debug_info_t * id,
- struct debug_view *view, char *out_buf);
-static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view,
- struct file *file, const char __user *user_buf,
- size_t user_buf_size, loff_t * offset);
-static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view,
- struct file *file, const char __user *user_buf,
- size_t user_buf_size, loff_t * offset);
-static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
- char *out_buf, const char *in_buf);
-static int debug_raw_format_fn(debug_info_t * id,
- struct debug_view *view, char *out_buf,
- const char *in_buf);
-static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view,
- int area, debug_entry_t * entry, char *out_buf);
-
-static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view,
- char *out_buf, debug_sprintf_entry_t *curr_event);
+static int debug_prolog_level_fn(debug_info_t *id,
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t *offset);
+static int debug_prolog_pages_fn(debug_info_t *id,
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t *offset);
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t *offset);
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size,
+ const char *in_buf);
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
+static void debug_events_append(debug_info_t *dest, debug_info_t *src);
/* globals */
-struct debug_view debug_raw_view = {
- "raw",
- NULL,
- &debug_raw_header_fn,
- &debug_raw_format_fn,
- NULL,
- NULL
-};
-EXPORT_SYMBOL(debug_raw_view);
-
struct debug_view debug_hex_ascii_view = {
"hex_ascii",
NULL,
@@ -141,19 +128,19 @@ static struct debug_view debug_pages_view = {
};
static struct debug_view debug_flush_view = {
- "flush",
- NULL,
- NULL,
- NULL,
- &debug_input_flush_fn,
- NULL
+ "flush",
+ NULL,
+ NULL,
+ NULL,
+ &debug_input_flush_fn,
+ NULL
};
struct debug_view debug_sprintf_view = {
"sprintf",
NULL,
&debug_dflt_header_fn,
- (debug_format_proc_t*)&debug_sprintf_format_fn,
+ &debug_sprintf_format_fn,
NULL,
NULL
};
@@ -164,20 +151,19 @@ static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
/* static globals */
-static debug_info_t *debug_area_first = NULL;
-static debug_info_t *debug_area_last = NULL;
+static debug_info_t *debug_area_first;
+static debug_info_t *debug_area_last;
static DEFINE_MUTEX(debug_mutex);
static int initialized;
static int debug_critical;
static const struct file_operations debug_file_ops = {
- .owner = THIS_MODULE,
- .read = debug_output,
- .write = debug_input,
- .open = debug_open,
+ .owner = THIS_MODULE,
+ .read = debug_output,
+ .write = debug_input,
+ .open = debug_open,
.release = debug_close,
- .llseek = no_llseek,
};
static struct dentry *debug_debugfs_root_entry;
@@ -190,29 +176,26 @@ static struct dentry *debug_debugfs_root_entry;
* areas[areanumber][pagenumber][pageoffset]
*/
-static debug_entry_t***
-debug_areas_alloc(int pages_per_area, int nr_areas)
+static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
{
- debug_entry_t*** areas;
- int i,j;
+ debug_entry_t ***areas;
+ int i, j;
- areas = kmalloc(nr_areas *
- sizeof(debug_entry_t**),
- GFP_KERNEL);
+ areas = kmalloc_array(nr_areas, sizeof(debug_entry_t **), GFP_KERNEL);
if (!areas)
goto fail_malloc_areas;
for (i = 0; i < nr_areas; i++) {
- areas[i] = kmalloc(pages_per_area *
- sizeof(debug_entry_t*),GFP_KERNEL);
- if (!areas[i]) {
+ /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */
+ areas[i] = kmalloc_array(pages_per_area,
+ sizeof(debug_entry_t *),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!areas[i])
goto fail_malloc_areas2;
- }
- for(j = 0; j < pages_per_area; j++) {
+ for (j = 0; j < pages_per_area; j++) {
areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if(!areas[i][j]) {
- for(j--; j >=0 ; j--) {
+ if (!areas[i][j]) {
+ for (j--; j >= 0 ; j--)
kfree(areas[i][j]);
- }
kfree(areas[i]);
goto fail_malloc_areas2;
}
@@ -221,63 +204,56 @@ debug_areas_alloc(int pages_per_area, int nr_areas)
return areas;
fail_malloc_areas2:
- for(i--; i >= 0; i--){
- for(j=0; j < pages_per_area;j++){
+ for (i--; i >= 0; i--) {
+ for (j = 0; j < pages_per_area; j++)
kfree(areas[i][j]);
- }
kfree(areas[i]);
}
kfree(areas);
fail_malloc_areas:
return NULL;
-
}
-
/*
* debug_info_alloc
* - alloc new debug-info
*/
-
-static debug_info_t*
-debug_info_alloc(const char *name, int pages_per_area, int nr_areas,
- int buf_size, int level, int mode)
+static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, int level,
+ int mode)
{
- debug_info_t* rc;
+ debug_info_t *rc;
/* alloc everything */
-
rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL);
- if(!rc)
+ if (!rc)
goto fail_malloc_rc;
rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
- if(!rc->active_entries)
+ if (!rc->active_entries)
goto fail_malloc_active_entries;
rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
- if(!rc->active_pages)
+ if (!rc->active_pages)
goto fail_malloc_active_pages;
- if((mode == ALL_AREAS) && (pages_per_area != 0)){
+ if ((mode == ALL_AREAS) && (pages_per_area != 0)) {
rc->areas = debug_areas_alloc(pages_per_area, nr_areas);
- if(!rc->areas)
+ if (!rc->areas)
goto fail_malloc_areas;
} else {
rc->areas = NULL;
}
/* initialize members */
-
spin_lock_init(&rc->lock);
rc->pages_per_area = pages_per_area;
- rc->nr_areas = nr_areas;
+ rc->nr_areas = nr_areas;
rc->active_area = 0;
- rc->level = level;
- rc->buf_size = buf_size;
- rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strlcpy(rc->name, name, sizeof(rc->name));
+ rc->level = level;
+ rc->buf_size = buf_size;
+ rc->entry_size = sizeof(debug_entry_t) + buf_size;
+ strscpy(rc->name, name);
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
- memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS *
- sizeof(struct dentry*));
- atomic_set(&(rc->ref_count), 0);
+ memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
+ refcount_set(&(rc->ref_count), 0);
return rc;
@@ -295,18 +271,15 @@ fail_malloc_rc:
* debug_areas_free
* - free all debug areas
*/
-
-static void
-debug_areas_free(debug_info_t* db_info)
+static void debug_areas_free(debug_info_t *db_info)
{
- int i,j;
+ int i, j;
- if(!db_info->areas)
+ if (!db_info->areas)
return;
for (i = 0; i < db_info->nr_areas; i++) {
- for(j = 0; j < db_info->pages_per_area; j++) {
+ for (j = 0; j < db_info->pages_per_area; j++)
kfree(db_info->areas[i][j]);
- }
kfree(db_info->areas[i]);
}
kfree(db_info->areas);
@@ -317,9 +290,8 @@ debug_areas_free(debug_info_t* db_info)
* debug_info_free
* - free memory debug-info
*/
-
-static void
-debug_info_free(debug_info_t* db_info){
+static void debug_info_free(debug_info_t *db_info)
+{
debug_areas_free(db_info);
kfree(db_info->active_entries);
kfree(db_info->active_pages);
@@ -331,37 +303,18 @@ debug_info_free(debug_info_t* db_info){
* - create new debug-info
*/
-static debug_info_t*
-debug_info_create(const char *name, int pages_per_area, int nr_areas,
- int buf_size, umode_t mode)
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, umode_t mode)
{
- debug_info_t* rc;
+ debug_info_t *rc;
- rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size,
- DEBUG_DEFAULT_LEVEL, ALL_AREAS);
- if(!rc)
+ rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size,
+ DEBUG_DEFAULT_LEVEL, ALL_AREAS);
+ if (!rc)
goto out;
rc->mode = mode & ~S_IFMT;
-
- /* create root directory */
- rc->debugfs_root_entry = debugfs_create_dir(rc->name,
- debug_debugfs_root_entry);
-
- /* append new element to linked list */
- if (!debug_area_first) {
- /* first element in list */
- debug_area_first = rc;
- rc->prev = NULL;
- } else {
- /* append element to end of list */
- debug_area_last->next = rc;
- rc->prev = debug_area_last;
- }
- debug_area_last = rc;
- rc->next = NULL;
-
- debug_info_get(rc);
+ refcount_set(&rc->ref_count, 1);
out:
return rc;
}
@@ -370,24 +323,22 @@ out:
* debug_info_copy
* - copy debug-info
*/
-
-static debug_info_t*
-debug_info_copy(debug_info_t* in, int mode)
+static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
{
- int i,j;
- debug_info_t* rc;
- unsigned long flags;
+ unsigned long flags;
+ debug_info_t *rc;
+ int i, j;
/* get a consistent copy of the debug areas */
do {
rc = debug_info_alloc(in->name, in->pages_per_area,
in->nr_areas, in->buf_size, in->level, mode);
spin_lock_irqsave(&in->lock, flags);
- if(!rc)
+ if (!rc)
goto out;
/* has something changed in the meantime ? */
- if((rc->pages_per_area == in->pages_per_area) &&
- (rc->nr_areas == in->nr_areas)) {
+ if ((rc->pages_per_area == in->pages_per_area) &&
+ (rc->nr_areas == in->nr_areas)) {
break;
}
spin_unlock_irqrestore(&in->lock, flags);
@@ -395,155 +346,222 @@ debug_info_copy(debug_info_t* in, int mode)
} while (1);
if (mode == NO_AREAS)
- goto out;
+ goto out;
- for(i = 0; i < in->nr_areas; i++){
- for(j = 0; j < in->pages_per_area; j++) {
- memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE);
- }
- }
+ for (i = 0; i < in->nr_areas; i++) {
+ for (j = 0; j < in->pages_per_area; j++)
+ memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+ rc->active_pages[i] = in->active_pages[i];
+ rc->active_entries[i] = in->active_entries[i];
+ }
+ rc->active_area = in->active_area;
out:
- spin_unlock_irqrestore(&in->lock, flags);
- return rc;
+ spin_unlock_irqrestore(&in->lock, flags);
+ return rc;
}
/*
* debug_info_get
* - increments reference count for debug-info
*/
-
-static void
-debug_info_get(debug_info_t * db_info)
+static void debug_info_get(debug_info_t *db_info)
{
if (db_info)
- atomic_inc(&db_info->ref_count);
+ refcount_inc(&db_info->ref_count);
}
/*
* debug_info_put:
* - decreases reference count for debug-info and frees it if necessary
*/
-
-static void
-debug_info_put(debug_info_t *db_info)
+static void debug_info_put(debug_info_t *db_info)
{
- int i;
-
if (!db_info)
return;
- if (atomic_dec_and_test(&db_info->ref_count)) {
- for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
- if (!db_info->views[i])
- continue;
- debugfs_remove(db_info->debugfs_entries[i]);
- }
- debugfs_remove(db_info->debugfs_root_entry);
- if(db_info == debug_area_first)
- debug_area_first = db_info->next;
- if(db_info == debug_area_last)
- debug_area_last = db_info->prev;
- if(db_info->prev) db_info->prev->next = db_info->next;
- if(db_info->next) db_info->next->prev = db_info->prev;
+ if (refcount_dec_and_test(&db_info->ref_count))
debug_info_free(db_info);
- }
}
/*
* debug_format_entry:
- * - format one debug entry and return size of formated data
+ * - format one debug entry and return size of formatted data
*/
-
-static int
-debug_format_entry(file_private_info_t *p_info)
+static int debug_format_entry(file_private_info_t *p_info)
{
- debug_info_t *id_snap = p_info->debug_info_snap;
+ debug_info_t *id_snap = p_info->debug_info_snap;
struct debug_view *view = p_info->view;
debug_entry_t *act_entry;
size_t len = 0;
- if(p_info->act_entry == DEBUG_PROLOG_ENTRY){
+
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
/* print prolog */
- if (view->prolog_proc)
- len += view->prolog_proc(id_snap,view,p_info->temp_buf);
+ if (view->prolog_proc) {
+ len += view->prolog_proc(id_snap, view, p_info->temp_buf,
+ sizeof(p_info->temp_buf));
+ }
goto out;
}
if (!id_snap->areas) /* this is true, if we have a prolog only view */
goto out; /* or if 'pages_per_area' is 0 */
- act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area]
- [p_info->act_page] + p_info->act_entry);
-
- if (act_entry->id.stck == 0LL)
- goto out; /* empty entry */
- if (view->header_proc)
+ act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
+ [p_info->act_page] + p_info->act_entry);
+
+ if (act_entry->clock == 0LL)
+ goto out; /* empty entry */
+ if (view->header_proc) {
len += view->header_proc(id_snap, view, p_info->act_area,
- act_entry, p_info->temp_buf + len);
- if (view->format_proc)
+ act_entry, p_info->temp_buf + len,
+ sizeof(p_info->temp_buf) - len);
+ }
+ if (view->format_proc) {
len += view->format_proc(id_snap, view, p_info->temp_buf + len,
- DEBUG_DATA(act_entry));
+ sizeof(p_info->temp_buf) - len,
+ DEBUG_DATA(act_entry));
+ }
out:
- return len;
+ return len;
}
-/*
- * debug_next_entry:
- * - goto next entry in p_info
+/**
+ * debug_next_entry - Go to the next entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the next entry. If no further entry
+ * exists the current position is set to one after the end the return value
+ * indicates that no further entries exist.
+ *
+ * Return: True if there are more following entries, false otherwise
*/
-
-static inline int
-debug_next_entry(file_private_info_t *p_info)
+static inline bool debug_next_entry(file_private_info_t *p_info)
{
debug_info_t *id;
id = p_info->debug_info_snap;
- if(p_info->act_entry == DEBUG_PROLOG_ENTRY){
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
p_info->act_entry = 0;
p_info->act_page = 0;
- goto out;
+ return true;
}
- if(!id->areas)
- return 1;
+ if (!id->areas)
+ return false;
p_info->act_entry += id->entry_size;
/* switch to next page, if we reached the end of the page */
- if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){
+ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
/* next page */
p_info->act_entry = 0;
p_info->act_page += 1;
- if((p_info->act_page % id->pages_per_area) == 0) {
+ if ((p_info->act_page % id->pages_per_area) == 0) {
/* next area */
- p_info->act_area++;
- p_info->act_page=0;
+ p_info->act_area++;
+ p_info->act_page = 0;
}
- if(p_info->act_area >= id->nr_areas)
- return 1;
+ if (p_info->act_area >= id->nr_areas)
+ return false;
}
-out:
- return 0;
+ return true;
+}
+
+/**
+ * debug_to_act_entry - Go to the currently active entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the currently active
+ * entry of @p_info->debug_info_snap
+ */
+static void debug_to_act_entry(file_private_info_t *p_info)
+{
+ debug_info_t *snap_id;
+
+ snap_id = p_info->debug_info_snap;
+ p_info->act_area = snap_id->active_area;
+ p_info->act_page = snap_id->active_pages[snap_id->active_area];
+ p_info->act_entry = snap_id->active_entries[snap_id->active_area];
+}
+
+/**
+ * debug_prev_entry - Go to the previous entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the previous entry. If no previous entry
+ * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
+ * indicates that no previous entries exist.
+ *
+ * Return: True if there are more previous entries, false otherwise
+ */
+
+static inline bool debug_prev_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id;
+
+ id = p_info->debug_info_snap;
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
+ debug_to_act_entry(p_info);
+ if (!id->areas)
+ return false;
+ p_info->act_entry -= id->entry_size;
+ /* switch to prev page, if we reached the beginning of the page */
+ if (p_info->act_entry < 0) {
+ /* end of previous page */
+ p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
+ p_info->act_page--;
+ if (p_info->act_page < 0) {
+ /* previous area */
+ p_info->act_area--;
+ p_info->act_page = id->pages_per_area - 1;
+ }
+ if (p_info->act_area < 0)
+ p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
+ }
+ /* check full circle */
+ if (id->active_area == p_info->act_area &&
+ id->active_pages[id->active_area] == p_info->act_page &&
+ id->active_entries[id->active_area] == p_info->act_entry)
+ return false;
+ return true;
+}
+
+/**
+ * debug_move_entry - Go to next entry in either the forward or backward direction
+ * @p_info: Private info that is manipulated
+ * @reverse: If true go to the next entry in reverse i.e. previous
+ *
+ * Sets the current position in @p_info to the next (@reverse == false) or
+ * previous (@reverse == true) entry.
+ *
+ * Return: True if there are further entries in that direction,
+ * false otherwise.
+ */
+static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
+{
+ if (reverse)
+ return debug_prev_entry(p_info);
+ else
+ return debug_next_entry(p_info);
}
/*
* debug_output:
* - called for user read()
- * - copies formated debug entries to the user buffer
+ * - copies formatted debug entries to the user buffer
*/
-
-static ssize_t
-debug_output(struct file *file, /* file descriptor */
- char __user *user_buf, /* user buffer */
- size_t len, /* length of buffer */
- loff_t *offset) /* offset in the file */
+static ssize_t debug_output(struct file *file, /* file descriptor */
+ char __user *user_buf, /* user buffer */
+ size_t len, /* length of buffer */
+ loff_t *offset) /* offset in the file */
{
size_t count = 0;
size_t entry_offset;
file_private_info_t *p_info;
- p_info = ((file_private_info_t *) file->private_data);
- if (*offset != p_info->offset)
+ p_info = (file_private_info_t *) file->private_data;
+ if (*offset != p_info->offset)
return -EPIPE;
- if(p_info->act_area >= p_info->debug_info_snap->nr_areas)
+ if (p_info->act_area >= p_info->debug_info_snap->nr_areas)
return 0;
entry_offset = p_info->act_entry_offset;
- while(count < len){
- int formatted_line_size;
+ while (count < len) {
int formatted_line_residue;
+ int formatted_line_size;
int user_buf_residue;
size_t copy_size;
@@ -551,21 +569,21 @@ debug_output(struct file *file, /* file descriptor */
formatted_line_residue = formatted_line_size - entry_offset;
user_buf_residue = len-count;
copy_size = min(user_buf_residue, formatted_line_residue);
- if(copy_size){
+ if (copy_size) {
if (copy_to_user(user_buf + count, p_info->temp_buf
- + entry_offset, copy_size))
+ + entry_offset, copy_size))
return -EFAULT;
count += copy_size;
entry_offset += copy_size;
}
- if(copy_size == formatted_line_residue){
+ if (copy_size == formatted_line_residue) {
entry_offset = 0;
- if(debug_next_entry(p_info))
+ if (!debug_next_entry(p_info))
goto out;
}
}
out:
- p_info->offset = *offset + count;
+ p_info->offset = *offset + count;
p_info->act_entry_offset = entry_offset;
*offset = p_info->offset;
return count;
@@ -576,39 +594,72 @@ out:
* - called for user write()
* - calls input function of view
*/
-
-static ssize_t
-debug_input(struct file *file, const char __user *user_buf, size_t length,
- loff_t *offset)
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+ size_t length, loff_t *offset)
{
- int rc = 0;
file_private_info_t *p_info;
+ int rc = 0;
mutex_lock(&debug_mutex);
p_info = ((file_private_info_t *) file->private_data);
- if (p_info->view->input_proc)
+ if (p_info->view->input_proc) {
rc = p_info->view->input_proc(p_info->debug_info_org,
p_info->view, file, user_buf,
length, offset);
- else
+ } else {
rc = -EPERM;
+ }
mutex_unlock(&debug_mutex);
- return rc; /* number of input characters */
+ return rc; /* number of input characters */
+}
+
+static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
+ struct debug_view *view)
+{
+ debug_info_t *debug_info_snapshot;
+ file_private_info_t *p_info;
+
+ /*
+ * Make snapshot of current debug areas to get it consistent.
+ * To copy all the areas is only needed, if we have a view which
+ * formats the debug areas.
+ */
+ if (!view->format_proc && !view->header_proc)
+ debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+ else
+ debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+ if (!debug_info_snapshot)
+ return NULL;
+ p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ if (!p_info) {
+ debug_info_free(debug_info_snapshot);
+ return NULL;
+ }
+ p_info->offset = 0;
+ p_info->debug_info_snap = debug_info_snapshot;
+ p_info->debug_info_org = debug_info;
+ p_info->view = view;
+ p_info->act_area = 0;
+ p_info->act_page = 0;
+ p_info->act_entry = DEBUG_PROLOG_ENTRY;
+ p_info->act_entry_offset = 0;
+ debug_info_get(debug_info);
+
+ return p_info;
}
/*
* debug_open:
* - called for user open()
- * - copies formated output to private_data area of the file
+ * - copies formatted output to private_data area of the file
* handle
*/
-
-static int
-debug_open(struct inode *inode, struct file *file)
+static int debug_open(struct inode *inode, struct file *file)
{
- int i, rc = 0;
+ debug_info_t *debug_info;
file_private_info_t *p_info;
- debug_info_t *debug_info, *debug_info_snapshot;
+ int i, rc = 0;
mutex_lock(&debug_mutex);
debug_info = file_inode(file)->i_private;
@@ -616,80 +667,140 @@ debug_open(struct inode *inode, struct file *file)
for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
if (!debug_info->views[i])
continue;
- else if (debug_info->debugfs_entries[i] ==
- file->f_path.dentry) {
- goto found; /* found view ! */
- }
+ else if (debug_info->debugfs_entries[i] == file->f_path.dentry)
+ goto found; /* found view ! */
}
/* no entry found */
rc = -EINVAL;
goto out;
found:
-
- /* Make snapshot of current debug areas to get it consistent. */
- /* To copy all the areas is only needed, if we have a view which */
- /* formats the debug areas. */
-
- if(!debug_info->views[i]->format_proc &&
- !debug_info->views[i]->header_proc){
- debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
- } else {
- debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
- }
-
- if(!debug_info_snapshot){
- rc = -ENOMEM;
- goto out;
- }
- p_info = kmalloc(sizeof(file_private_info_t),
- GFP_KERNEL);
- if(!p_info){
- debug_info_free(debug_info_snapshot);
+ p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
+ if (!p_info) {
rc = -ENOMEM;
goto out;
}
- p_info->offset = 0;
- p_info->debug_info_snap = debug_info_snapshot;
- p_info->debug_info_org = debug_info;
- p_info->view = debug_info->views[i];
- p_info->act_area = 0;
- p_info->act_page = 0;
- p_info->act_entry = DEBUG_PROLOG_ENTRY;
- p_info->act_entry_offset = 0;
file->private_data = p_info;
- debug_info_get(debug_info);
nonseekable_open(inode, file);
out:
mutex_unlock(&debug_mutex);
return rc;
}
+static void debug_file_private_free(file_private_info_t *p_info)
+{
+ if (p_info->debug_info_snap)
+ debug_info_free(p_info->debug_info_snap);
+ debug_info_put(p_info->debug_info_org);
+ kfree(p_info);
+}
+
/*
* debug_close:
* - called for user close()
* - deletes private_data area of the file handle
*/
-
-static int
-debug_close(struct inode *inode, struct file *file)
+static int debug_close(struct inode *inode, struct file *file)
{
file_private_info_t *p_info;
+
p_info = (file_private_info_t *) file->private_data;
- if(p_info->debug_info_snap)
- debug_info_free(p_info->debug_info_snap);
- debug_info_put(p_info->debug_info_org);
- kfree(file->private_data);
- return 0; /* success */
+ debug_file_private_free(p_info);
+ file->private_data = NULL;
+ return 0; /* success */
}
-/*
- * debug_register_mode:
- * - Creates and initializes debug area for the caller
- * The mode parameter allows to specify access rights for the s390dbf files
- * - Returns handle for debug area
+/**
+ * debug_dump - Get a textual representation of debug info, or as much as fits
+ * @id: Debug information to use
+ * @view: View with which to dump the debug information
+ * @buf: Buffer the textual debug data representation is written to
+ * @buf_size: Size of the buffer, including the trailing '\0' byte
+ * @reverse: Go backwards from the last written entry
+ *
+ * This function may be used whenever a textual representation of the debug
+ * information is required without using an s390dbf file.
+ *
+ * Note: It is the callers responsibility to supply a view that is compatible
+ * with the debug information data.
+ *
+ * Return: On success returns the number of bytes written to the buffer not
+ * including the trailing '\0' byte. If bug_size == 0 the function returns 0.
+ * On failure an error code less than 0 is returned.
*/
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+ char *buf, size_t buf_size, bool reverse)
+{
+ file_private_info_t *p_info;
+ size_t size, offset = 0;
+
+ /* Need space for '\0' byte */
+ if (buf_size < 1)
+ return 0;
+ buf_size--;
+
+ p_info = debug_file_private_alloc(id, view);
+ if (!p_info)
+ return -ENOMEM;
+
+ /* There is always at least the DEBUG_PROLOG_ENTRY */
+ do {
+ size = debug_format_entry(p_info);
+ size = min(size, buf_size - offset);
+ memcpy(buf + offset, p_info->temp_buf, size);
+ offset += size;
+ if (offset >= buf_size)
+ break;
+ } while (debug_move_entry(p_info, reverse));
+ debug_file_private_free(p_info);
+ buf[offset] = '\0';
+
+ return offset;
+}
+/* Create debugfs entries and add to internal list. */
+static void _debug_register(debug_info_t *id)
+{
+ /* create root directory */
+ id->debugfs_root_entry = debugfs_create_dir(id->name,
+ debug_debugfs_root_entry);
+
+ /* append new element to linked list */
+ if (!debug_area_first) {
+ /* first element in list */
+ debug_area_first = id;
+ id->prev = NULL;
+ } else {
+ /* append element to end of list */
+ debug_area_last->next = id;
+ id->prev = debug_area_last;
+ }
+ debug_area_last = id;
+ id->next = NULL;
+
+ debug_register_view(id, &debug_level_view);
+ debug_register_view(id, &debug_flush_view);
+ debug_register_view(id, &debug_pages_view);
+}
+
+/**
+ * debug_register_mode() - creates and initializes debug area.
+ *
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area: Number of pages, which will be allocated per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ * @mode: File mode for debugfs files. E.g. S_IRWXUGO
+ * @uid: User ID for debugfs files. Currently only 0 is supported.
+ * @gid: Group ID for debugfs files. Currently only 0 is supported.
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * Must not be called within an interrupt handler.
+ */
debug_info_t *debug_register_mode(const char *name, int pages_per_area,
int nr_areas, int buf_size, umode_t mode,
uid_t uid, gid_t gid)
@@ -699,34 +810,38 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
/* Since debugfs currently does not support uid/gid other than root, */
/* we do not allow gid/uid != 0 until we get support for that. */
if ((uid != 0) || (gid != 0))
- pr_warning("Root becomes the owner of all s390dbf files "
- "in sysfs\n");
+ pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
BUG_ON(!initialized);
- mutex_lock(&debug_mutex);
-
- /* create new debug_info */
+ /* create new debug_info */
rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
- if(!rc)
- goto out;
- debug_register_view(rc, &debug_level_view);
- debug_register_view(rc, &debug_flush_view);
- debug_register_view(rc, &debug_pages_view);
-out:
- if (!rc){
+ if (rc) {
+ mutex_lock(&debug_mutex);
+ _debug_register(rc);
+ mutex_unlock(&debug_mutex);
+ } else {
pr_err("Registering debug feature %s failed\n", name);
- }
- mutex_unlock(&debug_mutex);
+ }
return rc;
}
EXPORT_SYMBOL(debug_register_mode);
-/*
- * debug_register:
- * - creates and initializes debug area for the caller
- * - returns handle for debug area
+/**
+ * debug_register() - creates and initializes debug area with default file mode.
+ *
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area: Number of pages, which will be allocated per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * The debugfs file mode access permissions are read and write for user.
+ * Must not be called within an interrupt handler.
*/
-
debug_info_t *debug_register(const char *name, int pages_per_area,
int nr_areas, int buf_size)
{
@@ -735,22 +850,99 @@ debug_info_t *debug_register(const char *name, int pages_per_area,
}
EXPORT_SYMBOL(debug_register);
-/*
- * debug_unregister:
- * - give back debug area
+/**
+ * debug_register_static() - registers a static debug area
+ *
+ * @id: Handle for static debug area
+ * @pages_per_area: Number of pages per area
+ * @nr_areas: Number of debug areas
+ *
+ * Register debug_info_t defined using DEFINE_STATIC_DEBUG_INFO.
+ *
+ * Note: This function is called automatically via an initcall generated by
+ * DEFINE_STATIC_DEBUG_INFO.
*/
+void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas)
+{
+ unsigned long flags;
+ debug_info_t *copy;
+
+ if (!initialized) {
+ pr_err("Tried to register debug feature %s too early\n",
+ id->name);
+ return;
+ }
+
+ copy = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+ id->level, ALL_AREAS);
+ if (!copy) {
+ pr_err("Registering debug feature %s failed\n", id->name);
+
+ /* Clear pointers to prevent tracing into released initdata. */
+ spin_lock_irqsave(&id->lock, flags);
+ id->areas = NULL;
+ id->active_pages = NULL;
+ id->active_entries = NULL;
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return;
+ }
+
+ /* Replace static trace area with dynamic copy. */
+ spin_lock_irqsave(&id->lock, flags);
+ debug_events_append(copy, id);
+ debug_areas_swap(id, copy);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ /* Clear pointers to initdata and discard copy. */
+ copy->areas = NULL;
+ copy->active_pages = NULL;
+ copy->active_entries = NULL;
+ debug_info_free(copy);
+
+ mutex_lock(&debug_mutex);
+ _debug_register(id);
+ mutex_unlock(&debug_mutex);
+}
-void
-debug_unregister(debug_info_t * id)
+/* Remove debugfs entries and remove from internal list. */
+static void _debug_unregister(debug_info_t *id)
+{
+ int i;
+
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!id->views[i])
+ continue;
+ debugfs_remove(id->debugfs_entries[i]);
+ }
+ debugfs_remove(id->debugfs_root_entry);
+ if (id == debug_area_first)
+ debug_area_first = id->next;
+ if (id == debug_area_last)
+ debug_area_last = id->prev;
+ if (id->prev)
+ id->prev->next = id->next;
+ if (id->next)
+ id->next->prev = id->prev;
+}
+
+/**
+ * debug_unregister() - give back debug area.
+ *
+ * @id: handle for debug log
+ *
+ * Return:
+ * none
+ */
+void debug_unregister(debug_info_t *id)
{
if (!id)
- goto out;
+ return;
mutex_lock(&debug_mutex);
- debug_info_put(id);
+ _debug_unregister(id);
mutex_unlock(&debug_mutex);
-out:
- return;
+ debug_info_put(id);
}
EXPORT_SYMBOL(debug_unregister);
@@ -758,62 +950,59 @@ EXPORT_SYMBOL(debug_unregister);
* debug_set_size:
* - set area size (number of pages) and number of areas
*/
-static int
-debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area)
+static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area)
{
+ debug_info_t *new_id;
unsigned long flags;
- debug_entry_t *** new_areas;
- int rc=0;
- if(!id || (nr_areas <= 0) || (pages_per_area < 0))
+ if (!id || (nr_areas <= 0) || (pages_per_area < 0))
return -EINVAL;
- if(pages_per_area > 0){
- new_areas = debug_areas_alloc(pages_per_area, nr_areas);
- if(!new_areas) {
- pr_info("Allocating memory for %i pages failed\n",
- pages_per_area);
- rc = -ENOMEM;
- goto out;
- }
- } else {
- new_areas = NULL;
+
+ new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+ id->level, ALL_AREAS);
+ if (!new_id) {
+ pr_info("Allocating memory for %i pages failed\n",
+ pages_per_area);
+ return -ENOMEM;
}
- spin_lock_irqsave(&id->lock,flags);
- debug_areas_free(id);
- id->areas = new_areas;
- id->nr_areas = nr_areas;
- id->pages_per_area = pages_per_area;
- id->active_area = 0;
- memset(id->active_entries,0,sizeof(int)*id->nr_areas);
- memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
- spin_unlock_irqrestore(&id->lock,flags);
- pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area);
-out:
- return rc;
+
+ spin_lock_irqsave(&id->lock, flags);
+ debug_events_append(new_id, id);
+ debug_areas_swap(new_id, id);
+ debug_info_free(new_id);
+ spin_unlock_irqrestore(&id->lock, flags);
+ pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area);
+
+ return 0;
}
-/*
- * debug_set_level:
- * - set actual debug level
+/**
+ * debug_set_level() - Sets new actual debug level if new_level is valid.
+ *
+ * @id: handle for debug log
+ * @new_level: new debug level
+ *
+ * Return:
+ * none
*/
-
-void
-debug_set_level(debug_info_t* id, int new_level)
+void debug_set_level(debug_info_t *id, int new_level)
{
unsigned long flags;
- if(!id)
- return;
- spin_lock_irqsave(&id->lock,flags);
- if(new_level == DEBUG_OFF_LEVEL){
- id->level = DEBUG_OFF_LEVEL;
- pr_info("%s: switched off\n",id->name);
- } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
+
+ if (!id)
+ return;
+
+ if (new_level == DEBUG_OFF_LEVEL) {
+ pr_info("%s: switched off\n", id->name);
+ } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
pr_info("%s: level %i is out of range (%i - %i)\n",
- id->name, new_level, 0, DEBUG_MAX_LEVEL);
- } else {
- id->level = new_level;
- }
- spin_unlock_irqrestore(&id->lock,flags);
+ id->name, new_level, 0, DEBUG_MAX_LEVEL);
+ return;
+ }
+
+ spin_lock_irqsave(&id->lock, flags);
+ id->level = new_level;
+ spin_unlock_irqrestore(&id->lock, flags);
}
EXPORT_SYMBOL(debug_set_level);
@@ -821,12 +1010,10 @@ EXPORT_SYMBOL(debug_set_level);
* proceed_active_entry:
* - set active entry to next in the ring buffer
*/
-
-static inline void
-proceed_active_entry(debug_info_t * id)
+static inline void proceed_active_entry(debug_info_t *id)
{
if ((id->active_entries[id->active_area] += id->entry_size)
- > (PAGE_SIZE - id->entry_size)){
+ > (PAGE_SIZE - id->entry_size)) {
id->active_entries[id->active_area] = 0;
id->active_pages[id->active_area] =
(id->active_pages[id->active_area] + 1) %
@@ -838,9 +1025,7 @@ proceed_active_entry(debug_info_t * id)
* proceed_active_area:
* - set active area to next in the ring buffer
*/
-
-static inline void
-proceed_active_area(debug_info_t * id)
+static inline void proceed_active_area(debug_info_t *id)
{
id->active_area++;
id->active_area = id->active_area % id->nr_areas;
@@ -849,13 +1034,47 @@ proceed_active_area(debug_info_t * id)
/*
* get_active_entry:
*/
-
-static inline debug_entry_t*
-get_active_entry(debug_info_t * id)
+static inline debug_entry_t *get_active_entry(debug_info_t *id)
{
return (debug_entry_t *) (((char *) id->areas[id->active_area]
- [id->active_pages[id->active_area]]) +
- id->active_entries[id->active_area]);
+ [id->active_pages[id->active_area]]) +
+ id->active_entries[id->active_area]);
+}
+
+/* Swap debug areas of a and b. */
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b)
+{
+ swap(a->nr_areas, b->nr_areas);
+ swap(a->pages_per_area, b->pages_per_area);
+ swap(a->areas, b->areas);
+ swap(a->active_area, b->active_area);
+ swap(a->active_pages, b->active_pages);
+ swap(a->active_entries, b->active_entries);
+}
+
+/* Append all debug events in active area from source to destination log. */
+static void debug_events_append(debug_info_t *dest, debug_info_t *src)
+{
+ debug_entry_t *from, *to, *last;
+
+ if (!src->areas || !dest->areas)
+ return;
+
+ /* Loop over all entries in src, starting with oldest. */
+ from = get_active_entry(src);
+ last = from;
+ do {
+ if (from->clock != 0LL) {
+ to = get_active_entry(dest);
+ memset(to, 0, dest->entry_size);
+ memcpy(to, from, min(src->entry_size,
+ dest->entry_size));
+ proceed_active_entry(dest);
+ }
+
+ proceed_active_entry(src);
+ from = get_active_entry(src);
+ } while (from != last);
}
/*
@@ -863,22 +1082,27 @@ get_active_entry(debug_info_t * id)
* - set timestamp, caller address, cpu number etc.
*/
-static inline void
-debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
- int exception)
+static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
+ int level, int exception)
{
- active->id.stck = get_tod_clock();
- active->id.fields.cpuid = smp_processor_id();
+ unsigned long timestamp;
+ union tod_clock clk;
+
+ store_tod_clock_ext(&clk);
+ timestamp = clk.us;
+ timestamp -= TOD_UNIX_EPOCH >> 12;
+ active->clock = timestamp;
+ active->cpu = smp_processor_id();
active->caller = __builtin_return_address(0);
- active->id.fields.exception = exception;
- active->id.fields.level = level;
+ active->exception = exception;
+ active->level = level;
proceed_active_entry(id);
- if(exception)
+ if (exception)
proceed_active_area(id);
}
-static int debug_stoppable=1;
-static int debug_active=1;
+static int debug_stoppable = 1;
+static int debug_active = 1;
#define CTL_S390DBF_STOPPABLE 5678
#define CTL_S390DBF_ACTIVE 5679
@@ -888,9 +1112,8 @@ static int debug_active=1;
* always allow read, allow write only if debug_stoppable is set or
* if debug_active is already off
*/
-static int
-s390dbf_procactive(ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int s390dbf_procactive(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
return proc_dointvec(table, write, buffer, lenp, ppos);
@@ -898,45 +1121,51 @@ s390dbf_procactive(ctl_table *table, int write,
return 0;
}
-
-static struct ctl_table s390dbf_table[] = {
+static const struct ctl_table s390dbf_table[] = {
{
- .procname = "debug_stoppable",
+ .procname = "debug_stoppable",
.data = &debug_stoppable,
.maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec,
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
},
- {
- .procname = "debug_active",
+ {
+ .procname = "debug_active",
.data = &debug_active,
.maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = s390dbf_procactive,
- },
- { }
-};
-
-static struct ctl_table s390dbf_dir_table[] = {
- {
- .procname = "s390dbf",
- .maxlen = 0,
- .mode = S_IRUGO | S_IXUGO,
- .child = s390dbf_table,
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = s390dbf_procactive,
},
- { }
};
static struct ctl_table_header *s390dbf_sysctl_header;
-void
-debug_stop_all(void)
+/**
+ * debug_stop_all() - stops the debug feature if stopping is allowed.
+ *
+ * Return:
+ * - none
+ *
+ * Currently used in case of a kernel oops.
+ */
+void debug_stop_all(void)
{
if (debug_stoppable)
debug_active = 0;
}
EXPORT_SYMBOL(debug_stop_all);
+/**
+ * debug_set_critical() - event/exception functions try lock instead of spin.
+ *
+ * Return:
+ * - none
+ *
+ * Currently used in case of stopping all CPUs but the current one.
+ * Once in this state, functions to write a debug entry for an
+ * event or exception no longer spin on the debug area lock,
+ * but only try to get it and fail if they do not get the lock.
+ */
void debug_set_critical(void)
{
debug_critical = 1;
@@ -946,26 +1175,31 @@ void debug_set_critical(void)
* debug_event_common:
* - write debug entry with given size
*/
-
-debug_entry_t*
-debug_event_common(debug_info_t * id, int level, const void *buf, int len)
+debug_entry_t *debug_event_common(debug_info_t *id, int level, const void *buf,
+ int len)
{
- unsigned long flags;
debug_entry_t *active;
+ unsigned long flags;
if (!debug_active || !id->areas)
return NULL;
if (debug_critical) {
if (!spin_trylock_irqsave(&id->lock, flags))
return NULL;
- } else
+ } else {
spin_lock_irqsave(&id->lock, flags);
- active = get_active_entry(id);
- memset(DEBUG_DATA(active), 0, id->buf_size);
- memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
- debug_finish_entry(id, active, level, 0);
- spin_unlock_irqrestore(&id->lock, flags);
+ }
+ do {
+ active = get_active_entry(id);
+ memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+ if (len < id->buf_size)
+ memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+ debug_finish_entry(id, active, level, 0);
+ len -= id->buf_size;
+ buf += id->buf_size;
+ } while (len > 0);
+ spin_unlock_irqrestore(&id->lock, flags);
return active;
}
EXPORT_SYMBOL(debug_event_common);
@@ -974,26 +1208,31 @@ EXPORT_SYMBOL(debug_event_common);
* debug_exception_common:
* - write debug entry with given size and switch to next debug area
*/
-
-debug_entry_t
-*debug_exception_common(debug_info_t * id, int level, const void *buf, int len)
+debug_entry_t *debug_exception_common(debug_info_t *id, int level,
+ const void *buf, int len)
{
- unsigned long flags;
debug_entry_t *active;
+ unsigned long flags;
if (!debug_active || !id->areas)
return NULL;
if (debug_critical) {
if (!spin_trylock_irqsave(&id->lock, flags))
return NULL;
- } else
+ } else {
spin_lock_irqsave(&id->lock, flags);
- active = get_active_entry(id);
- memset(DEBUG_DATA(active), 0, id->buf_size);
- memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
- debug_finish_entry(id, active, level, 1);
- spin_unlock_irqrestore(&id->lock, flags);
+ }
+ do {
+ active = get_active_entry(id);
+ memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+ if (len < id->buf_size)
+ memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+ debug_finish_entry(id, active, level, len <= id->buf_size);
+ len -= id->buf_size;
+ buf += id->buf_size;
+ } while (len > 0);
+ spin_unlock_irqrestore(&id->lock, flags);
return active;
}
EXPORT_SYMBOL(debug_exception_common);
@@ -1001,108 +1240,106 @@ EXPORT_SYMBOL(debug_exception_common);
/*
* counts arguments in format string for sprintf view
*/
-
-static inline int
-debug_count_numargs(char *string)
+static inline int debug_count_numargs(char *string)
{
- int numargs=0;
+ int numargs = 0;
- while(*string) {
- if(*string++=='%')
+ while (*string) {
+ if (*string++ == '%')
numargs++;
}
- return(numargs);
+ return numargs;
}
/*
* debug_sprintf_event:
*/
-
-debug_entry_t*
-debug_sprintf_event(debug_info_t* id, int level,char *string,...)
+debug_entry_t *__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
{
- va_list ap;
- int numargs,idx;
- unsigned long flags;
debug_sprintf_entry_t *curr_event;
debug_entry_t *active;
+ unsigned long flags;
+ int numargs, idx;
+ va_list ap;
- if((!id) || (level > id->level))
- return NULL;
if (!debug_active || !id->areas)
return NULL;
- numargs=debug_count_numargs(string);
+ numargs = debug_count_numargs(string);
if (debug_critical) {
if (!spin_trylock_irqsave(&id->lock, flags))
return NULL;
- } else
+ } else {
spin_lock_irqsave(&id->lock, flags);
+ }
active = get_active_entry(id);
- curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active);
- va_start(ap,string);
- curr_event->string=string;
- for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++)
- curr_event->args[idx]=va_arg(ap,long);
+ curr_event = (debug_sprintf_entry_t *) DEBUG_DATA(active);
+ va_start(ap, string);
+ curr_event->string = string;
+ for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+ curr_event->args[idx] = va_arg(ap, long);
va_end(ap);
debug_finish_entry(id, active, level, 0);
spin_unlock_irqrestore(&id->lock, flags);
return active;
}
-EXPORT_SYMBOL(debug_sprintf_event);
+EXPORT_SYMBOL(__debug_sprintf_event);
/*
* debug_sprintf_exception:
*/
-
-debug_entry_t*
-debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
+debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
{
- va_list ap;
- int numargs,idx;
- unsigned long flags;
debug_sprintf_entry_t *curr_event;
debug_entry_t *active;
+ unsigned long flags;
+ int numargs, idx;
+ va_list ap;
- if((!id) || (level > id->level))
- return NULL;
if (!debug_active || !id->areas)
return NULL;
- numargs=debug_count_numargs(string);
+ numargs = debug_count_numargs(string);
if (debug_critical) {
if (!spin_trylock_irqsave(&id->lock, flags))
return NULL;
- } else
+ } else {
spin_lock_irqsave(&id->lock, flags);
+ }
active = get_active_entry(id);
- curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active);
- va_start(ap,string);
- curr_event->string=string;
- for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++)
- curr_event->args[idx]=va_arg(ap,long);
+ curr_event = (debug_sprintf_entry_t *)DEBUG_DATA(active);
+ va_start(ap, string);
+ curr_event->string = string;
+ for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+ curr_event->args[idx] = va_arg(ap, long);
va_end(ap);
debug_finish_entry(id, active, level, 1);
spin_unlock_irqrestore(&id->lock, flags);
return active;
}
-EXPORT_SYMBOL(debug_sprintf_exception);
+EXPORT_SYMBOL(__debug_sprintf_exception);
-/*
- * debug_register_view:
+/**
+ * debug_register_view() - registers new debug view and creates debugfs
+ * dir entry
+ *
+ * @id: handle for debug log
+ * @view: pointer to debug view struct
+ *
+ * Return:
+ * - 0 : ok
+ * - < 0: Error
*/
-
-int
-debug_register_view(debug_info_t * id, struct debug_view *view)
+int debug_register_view(debug_info_t *id, struct debug_view *view)
{
- int rc = 0;
- int i;
unsigned long flags;
- umode_t mode;
struct dentry *pde;
+ umode_t mode;
+ int rc = 0;
+ int i;
if (!id)
goto out;
@@ -1112,40 +1349,41 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
if (!view->input_proc)
mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
- id , &debug_file_ops);
- if (!pde){
- pr_err("Registering view %s/%s failed due to out of "
- "memory\n", id->name,view->name);
- rc = -1;
- goto out;
- }
+ id, &debug_file_ops);
spin_lock_irqsave(&id->lock, flags);
for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
if (!id->views[i])
break;
}
if (i == DEBUG_MAX_VIEWS) {
- pr_err("Registering view %s/%s would exceed the maximum "
- "number of views %i\n", id->name, view->name, i);
rc = -1;
} else {
id->views[i] = view;
id->debugfs_entries[i] = pde;
}
spin_unlock_irqrestore(&id->lock, flags);
- if (rc)
+ if (rc) {
+ pr_err("Registering view %s/%s would exceed the maximum "
+ "number of views %i\n", id->name, view->name, i);
debugfs_remove(pde);
+ }
out:
return rc;
}
EXPORT_SYMBOL(debug_register_view);
-/*
- * debug_unregister_view:
+/**
+ * debug_unregister_view() - unregisters debug view and removes debugfs
+ * dir entry
+ *
+ * @id: handle for debug log
+ * @view: pointer to debug view struct
+ *
+ * Return:
+ * - 0 : ok
+ * - < 0: Error
*/
-
-int
-debug_unregister_view(debug_info_t * id, struct debug_view *view)
+int debug_unregister_view(debug_info_t *id, struct debug_view *view)
{
struct dentry *dentry = NULL;
unsigned long flags;
@@ -1158,9 +1396,9 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view)
if (id->views[i] == view)
break;
}
- if (i == DEBUG_MAX_VIEWS)
+ if (i == DEBUG_MAX_VIEWS) {
rc = -1;
- else {
+ } else {
dentry = id->debugfs_entries[i];
id->views[i] = NULL;
id->debugfs_entries[i] = NULL;
@@ -1172,36 +1410,28 @@ out:
}
EXPORT_SYMBOL(debug_unregister_view);
-static inline char *
-debug_get_user_string(const char __user *user_buf, size_t user_len)
+static inline char *debug_get_user_string(const char __user *user_buf,
+ size_t user_len)
{
- char* buffer;
-
- buffer = kmalloc(user_len + 1, GFP_KERNEL);
- if (!buffer)
- return ERR_PTR(-ENOMEM);
- if (copy_from_user(buffer, user_buf, user_len) != 0) {
- kfree(buffer);
- return ERR_PTR(-EFAULT);
- }
+ char *buffer;
+
+ buffer = memdup_user_nul(user_buf, user_len);
+ if (IS_ERR(buffer))
+ return buffer;
/* got the string, now strip linefeed. */
if (buffer[user_len - 1] == '\n')
buffer[user_len - 1] = 0;
- else
- buffer[user_len] = 0;
- return buffer;
+ return buffer;
}
-static inline int
-debug_get_uint(char *buf)
+static inline int debug_get_uint(char *buf)
{
int rc;
buf = skip_spaces(buf);
rc = simple_strtoul(buf, &buf, 10);
- if(*buf){
+ if (*buf)
rc = -EINVAL;
- }
return rc;
}
@@ -1214,43 +1444,41 @@ debug_get_uint(char *buf)
* prints out actual debug level
*/
-static int
-debug_prolog_pages_fn(debug_info_t * id,
- struct debug_view *view, char *out_buf)
+static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size)
{
- return sprintf(out_buf, "%i\n", id->pages_per_area);
+ return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area);
}
/*
* reads new size (number of pages per debug area)
*/
-static int
-debug_input_pages_fn(debug_info_t * id, struct debug_view *view,
- struct file *file, const char __user *user_buf,
- size_t user_len, loff_t * offset)
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t *offset)
{
+ int rc, new_pages;
char *str;
- int rc,new_pages;
if (user_len > 0x10000)
- user_len = 0x10000;
- if (*offset != 0){
+ user_len = 0x10000;
+ if (*offset != 0) {
rc = -EPIPE;
goto out;
}
- str = debug_get_user_string(user_buf,user_len);
- if(IS_ERR(str)){
+ str = debug_get_user_string(user_buf, user_len);
+ if (IS_ERR(str)) {
rc = PTR_ERR(str);
goto out;
}
new_pages = debug_get_uint(str);
- if(new_pages < 0){
+ if (new_pages < 0) {
rc = -EINVAL;
goto free_str;
}
- rc = debug_set_size(id,id->nr_areas, new_pages);
- if(rc != 0){
+ rc = debug_set_size(id, id->nr_areas, new_pages);
+ if (rc != 0) {
rc = -EINVAL;
goto free_str;
}
@@ -1265,54 +1493,48 @@ out:
/*
* prints out actual debug level
*/
-
-static int
-debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf)
+static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size)
{
int rc = 0;
- if(id->level == DEBUG_OFF_LEVEL) {
- rc = sprintf(out_buf,"-\n");
- }
- else {
- rc = sprintf(out_buf, "%i\n", id->level);
- }
+ if (id->level == DEBUG_OFF_LEVEL)
+ rc = scnprintf(out_buf, out_buf_size, "-\n");
+ else
+ rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level);
return rc;
}
/*
* reads new debug level
*/
-
-static int
-debug_input_level_fn(debug_info_t * id, struct debug_view *view,
- struct file *file, const char __user *user_buf,
- size_t user_len, loff_t * offset)
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t *offset)
{
+ int rc, new_level;
char *str;
- int rc,new_level;
if (user_len > 0x10000)
- user_len = 0x10000;
- if (*offset != 0){
+ user_len = 0x10000;
+ if (*offset != 0) {
rc = -EPIPE;
goto out;
}
- str = debug_get_user_string(user_buf,user_len);
- if(IS_ERR(str)){
+ str = debug_get_user_string(user_buf, user_len);
+ if (IS_ERR(str)) {
rc = PTR_ERR(str);
goto out;
}
- if(str[0] == '-'){
+ if (str[0] == '-') {
debug_set_level(id, DEBUG_OFF_LEVEL);
rc = user_len;
goto free_str;
} else {
new_level = debug_get_uint(str);
}
- if(new_level < 0) {
- pr_warning("%s is not a valid level for a debug "
- "feature\n", str);
+ if (new_level < 0) {
+ pr_warn("%s is not a valid level for a debug feature\n", str);
rc = -EINVAL;
} else {
debug_set_level(id, new_level);
@@ -1325,208 +1547,173 @@ out:
return rc; /* number of input characters */
}
-
/*
* flushes debug areas
*/
-
-static void debug_flush(debug_info_t* id, int area)
+static void debug_flush(debug_info_t *id, int area)
{
- unsigned long flags;
- int i,j;
-
- if(!id || !id->areas)
- return;
- spin_lock_irqsave(&id->lock,flags);
- if(area == DEBUG_FLUSH_ALL){
- id->active_area = 0;
- memset(id->active_entries, 0, id->nr_areas * sizeof(int));
- for (i = 0; i < id->nr_areas; i++) {
+ unsigned long flags;
+ int i, j;
+
+ if (!id || !id->areas)
+ return;
+ spin_lock_irqsave(&id->lock, flags);
+ if (area == DEBUG_FLUSH_ALL) {
+ id->active_area = 0;
+ memset(id->active_entries, 0, id->nr_areas * sizeof(int));
+ for (i = 0; i < id->nr_areas; i++) {
id->active_pages[i] = 0;
- for(j = 0; j < id->pages_per_area; j++) {
- memset(id->areas[i][j], 0, PAGE_SIZE);
- }
+ for (j = 0; j < id->pages_per_area; j++)
+ memset(id->areas[i][j], 0, PAGE_SIZE);
}
- } else if(area >= 0 && area < id->nr_areas) {
- id->active_entries[area] = 0;
+ } else if (area >= 0 && area < id->nr_areas) {
+ id->active_entries[area] = 0;
id->active_pages[area] = 0;
- for(i = 0; i < id->pages_per_area; i++) {
- memset(id->areas[area][i],0,PAGE_SIZE);
- }
- }
- spin_unlock_irqrestore(&id->lock,flags);
+ for (i = 0; i < id->pages_per_area; i++)
+ memset(id->areas[area][i], 0, PAGE_SIZE);
+ }
+ spin_unlock_irqrestore(&id->lock, flags);
}
/*
- * view function: flushes debug areas
+ * view function: flushes debug areas
*/
-
-static int
-debug_input_flush_fn(debug_info_t * id, struct debug_view *view,
- struct file *file, const char __user *user_buf,
- size_t user_len, loff_t * offset)
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t *offset)
{
- char input_buf[1];
- int rc = user_len;
+ char input_buf[1];
+ int rc = user_len;
if (user_len > 0x10000)
- user_len = 0x10000;
- if (*offset != 0){
+ user_len = 0x10000;
+ if (*offset != 0) {
rc = -EPIPE;
- goto out;
+ goto out;
+ }
+ if (copy_from_user(input_buf, user_buf, 1)) {
+ rc = -EFAULT;
+ goto out;
+ }
+ if (input_buf[0] == '-') {
+ debug_flush(id, DEBUG_FLUSH_ALL);
+ goto out;
+ }
+ if (isdigit(input_buf[0])) {
+ int area = ((int) input_buf[0] - (int) '0');
+
+ debug_flush(id, area);
+ goto out;
}
- if (copy_from_user(input_buf, user_buf, 1)){
- rc = -EFAULT;
- goto out;
- }
- if(input_buf[0] == '-') {
- debug_flush(id, DEBUG_FLUSH_ALL);
- goto out;
- }
- if (isdigit(input_buf[0])) {
- int area = ((int) input_buf[0] - (int) '0');
- debug_flush(id, area);
- goto out;
- }
pr_info("Flushing debug data failed because %c is not a valid "
"area\n", input_buf[0]);
out:
- *offset += user_len;
- return rc; /* number of input characters */
-}
-
-/*
- * prints debug header in raw format
- */
-
-static int
-debug_raw_header_fn(debug_info_t * id, struct debug_view *view,
- int area, debug_entry_t * entry, char *out_buf)
-{
- int rc;
-
- rc = sizeof(debug_entry_t);
- memcpy(out_buf,entry,sizeof(debug_entry_t));
- return rc;
-}
-
-/*
- * prints debug data in raw format
- */
-
-static int
-debug_raw_format_fn(debug_info_t * id, struct debug_view *view,
- char *out_buf, const char *in_buf)
-{
- int rc;
-
- rc = id->buf_size;
- memcpy(out_buf, in_buf, id->buf_size);
- return rc;
+ *offset += user_len;
+ return rc; /* number of input characters */
}
/*
* prints debug data in hex/ascii format
*/
-
-static int
-debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
- char *out_buf, const char *in_buf)
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size, const char *in_buf)
{
int i, rc = 0;
for (i = 0; i < id->buf_size; i++) {
- rc += sprintf(out_buf + rc, "%02x ",
- ((unsigned char *) in_buf)[i]);
- }
- rc += sprintf(out_buf + rc, "| ");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc,
+ "%02x ", ((unsigned char *)in_buf)[i]);
+ }
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "| ");
for (i = 0; i < id->buf_size; i++) {
unsigned char c = in_buf[i];
+
if (isascii(c) && isprint(c))
- rc += sprintf(out_buf + rc, "%c", c);
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c);
else
- rc += sprintf(out_buf + rc, ".");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, ".");
}
- rc += sprintf(out_buf + rc, "\n");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n");
return rc;
}
/*
* prints header for debug entry
*/
-
-int
-debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
- int area, debug_entry_t * entry, char *out_buf)
+int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
{
- struct timespec time_spec;
- char *except_str;
+ unsigned long sec, usec;
unsigned long caller;
- int rc = 0;
unsigned int level;
+ char *except_str;
+ int rc = 0;
- level = entry->id.fields.level;
- stck_to_timespec(entry->id.stck, &time_spec);
+ level = entry->level;
+ sec = entry->clock;
+ usec = do_div(sec, USEC_PER_SEC);
- if (entry->id.fields.exception)
+ if (entry->exception)
except_str = "*";
else
except_str = "-";
- caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
- rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ",
- area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level,
- except_str, entry->id.fields.cpuid, (void *) caller);
+ caller = (unsigned long) entry->caller;
+ rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ",
+ area, sec, usec, level, except_str,
+ entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
/*
- * prints debug data sprintf-formated:
- * debug_sprinf_event/exception calls must be used together with this view
+ * prints debug data sprintf-formatted:
+ * debug_sprintf_event/exception calls must be used together with this view
*/
#define DEBUG_SPRINTF_MAX_ARGS 10
-static int
-debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view,
- char *out_buf, debug_sprintf_entry_t *curr_event)
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size, const char *inbuf)
{
- int num_longs, num_used_args = 0,i, rc = 0;
+ debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
+ int num_longs, num_used_args = 0, i, rc = 0;
int index[DEBUG_SPRINTF_MAX_ARGS];
/* count of longs fit into one entry */
- num_longs = id->buf_size / sizeof(long);
+ num_longs = id->buf_size / sizeof(long);
- if(num_longs < 1)
+ if (num_longs < 1)
goto out; /* bufsize of entry too small */
- if(num_longs == 1) {
+ if (num_longs == 1) {
/* no args, we use only the string */
- strcpy(out_buf, curr_event->string);
- rc = strlen(curr_event->string);
+ rc = strscpy(out_buf, curr_event->string, out_buf_size);
+ if (rc == -E2BIG)
+ rc = out_buf_size;
goto out;
}
/* number of arguments used for sprintf (without the format string) */
- num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1));
+ num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1));
- memset(index,0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int));
+ memset(index, 0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int));
- for(i = 0; i < num_used_args; i++)
+ for (i = 0; i < num_used_args; i++)
index[i] = i;
- rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
- curr_event->args[index[1]], curr_event->args[index[2]],
- curr_event->args[index[3]], curr_event->args[index[4]],
- curr_event->args[index[5]], curr_event->args[index[6]],
- curr_event->args[index[7]], curr_event->args[index[8]],
- curr_event->args[index[9]]);
-
+ rc = scnprintf(out_buf, out_buf_size,
+ curr_event->string, curr_event->args[index[0]],
+ curr_event->args[index[1]], curr_event->args[index[2]],
+ curr_event->args[index[3]], curr_event->args[index[4]],
+ curr_event->args[index[5]], curr_event->args[index[6]],
+ curr_event->args[index[7]], curr_event->args[index[8]],
+ curr_event->args[index[9]]);
out:
-
return rc;
}
+EXPORT_SYMBOL(debug_sprintf_format_fn);
/*
* debug_init:
@@ -1534,7 +1721,7 @@ out:
*/
static int __init debug_init(void)
{
- s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
+ s390dbf_sysctl_header = register_sysctl("s390dbf", s390dbf_table);
mutex_lock(&debug_mutex);
debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
initialized = 1;
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
deleted file mode 100644
index 8237fc07ac79..000000000000
--- a/arch/s390/kernel/diag.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Implementation of s390 diagnose codes
- *
- * Copyright IBM Corp. 2007
- * Author(s): Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/module.h>
-#include <asm/diag.h>
-
-/*
- * Diagnose 14: Input spool file manipulation
- */
-int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
-{
- register unsigned long _ry1 asm("2") = ry1;
- register unsigned long _ry2 asm("3") = subcode;
- int rc = 0;
-
- asm volatile(
-#ifdef CONFIG_64BIT
- " sam31\n"
- " diag %2,2,0x14\n"
- " sam64\n"
-#else
- " diag %2,2,0x14\n"
-#endif
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc), "+d" (_ry2)
- : "d" (rx), "d" (_ry1)
- : "cc");
-
- return rc;
-}
-EXPORT_SYMBOL(diag14);
-
-/*
- * Diagnose 210: Get information about a virtual device
- */
-int diag210(struct diag210 *addr)
-{
- /*
- * diag 210 needs its data below the 2GB border, so we
- * use a static data area to be sure
- */
- static struct diag210 diag210_tmp;
- static DEFINE_SPINLOCK(diag210_lock);
- unsigned long flags;
- int ccode;
-
- spin_lock_irqsave(&diag210_lock, flags);
- diag210_tmp = *addr;
-
-#ifdef CONFIG_64BIT
- asm volatile(
- " lhi %0,-1\n"
- " sam31\n"
- " diag %1,0,0x210\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1: sam64\n"
- EX_TABLE(0b, 1b)
- : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
-#else
- asm volatile(
- " lhi %0,-1\n"
- " diag %1,0,0x210\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1:\n"
- EX_TABLE(0b, 1b)
- : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
-#endif
-
- *addr = diag210_tmp;
- spin_unlock_irqrestore(&diag210_lock, flags);
-
- return ccode;
-}
-EXPORT_SYMBOL(diag210);
diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile
new file mode 100644
index 000000000000..956aee6c4090
--- /dev/null
+++ b/arch/s390/kernel/diag/Makefile
@@ -0,0 +1 @@
+obj-y := diag_misc.o diag324.o diag.o diag310.o
diff --git a/arch/s390/kernel/diag/diag.c b/arch/s390/kernel/diag/diag.c
new file mode 100644
index 000000000000..56b862ba9be8
--- /dev/null
+++ b/arch/s390/kernel/diag/diag.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implementation of s390 diagnose codes
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-extable.h>
+#include <asm/diag.h>
+#include <asm/trace/diag.h>
+#include <asm/sections.h>
+#include <asm/asm.h>
+#include "../entry.h"
+
+struct diag_stat {
+ unsigned int counter[NR_DIAG_STAT];
+};
+
+static DEFINE_PER_CPU(struct diag_stat, diag_stat);
+
+struct diag_desc {
+ int code;
+ char *name;
+};
+
+static const struct diag_desc diag_map[NR_DIAG_STAT] = {
+ [DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" },
+ [DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" },
+ [DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" },
+ [DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
+ [DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
+ [DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+ [DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" },
+ [DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
+ [DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
+ [DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
+ [DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" },
+ [DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
+ [DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
+ [DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+ [DIAG_STAT_X26C] = { .code = 0x26c, .name = "Certain System Information" },
+ [DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
+ [DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
+ [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
+ [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
+ [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+ [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" },
+ [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
+ [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+ [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
+ [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
+ [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
+};
+
+struct diag_ops __amode31_ref diag_amode31_ops = {
+ .diag210 = _diag210_amode31,
+ .diag26c = _diag26c_amode31,
+ .diag14 = _diag14_amode31,
+ .diag0c = _diag0c_amode31,
+ .diag8c = _diag8c_amode31,
+ .diag308_reset = _diag308_reset_amode31
+};
+
+static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
+struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
+
+static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data");
+static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31;
+
+static int show_diag_stat(struct seq_file *m, void *v)
+{
+ struct diag_stat *stat;
+ unsigned long n = (unsigned long) v - 1;
+ int cpu, prec, tmp;
+
+ cpus_read_lock();
+ if (n == 0) {
+ seq_puts(m, " ");
+
+ for_each_online_cpu(cpu) {
+ prec = 10;
+ for (tmp = 10; cpu >= tmp; tmp *= 10)
+ prec--;
+ seq_printf(m, "%*s%d", prec, "CPU", cpu);
+ }
+ seq_putc(m, '\n');
+ } else if (n <= NR_DIAG_STAT) {
+ seq_printf(m, "diag %03x:", diag_map[n-1].code);
+ for_each_online_cpu(cpu) {
+ stat = &per_cpu(diag_stat, cpu);
+ seq_printf(m, " %10u", stat->counter[n-1]);
+ }
+ seq_printf(m, " %s\n", diag_map[n-1].name);
+ }
+ cpus_read_unlock();
+ return 0;
+}
+
+static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return show_diag_stat_start(m, pos);
+}
+
+static void show_diag_stat_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations show_diag_stat_sops = {
+ .start = show_diag_stat_start,
+ .next = show_diag_stat_next,
+ .stop = show_diag_stat_stop,
+ .show = show_diag_stat,
+};
+
+DEFINE_SEQ_ATTRIBUTE(show_diag_stat);
+
+static int __init show_diag_stat_init(void)
+{
+ debugfs_create_file("diag_stat", 0400, NULL, NULL,
+ &show_diag_stat_fops);
+ return 0;
+}
+
+device_initcall(show_diag_stat_init);
+
+void diag_stat_inc(enum diag_stat_enum nr)
+{
+ this_cpu_inc(diag_stat.counter[nr]);
+ trace_s390_diagnose(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc);
+
+void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
+{
+ this_cpu_inc(diag_stat.counter[nr]);
+ trace_s390_diagnose_norecursion(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc_norecursion);
+
+/*
+ * Diagnose 0c: Pseudo Timer
+ */
+void diag0c(struct hypfs_diag0c_entry *data)
+{
+ diag_stat_inc(DIAG_STAT_X00C);
+ diag_amode31_ops.diag0c(virt_to_phys(data));
+}
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ *
+ * The subcode parameter determines the type of the first parameter rx.
+ * Currently used are the following 3 subcommands:
+ * 0x0: Read the Next Spool File Buffer (Data Record)
+ * 0x28: Position a Spool File to the Designated Record
+ * 0xfff: Retrieve Next File Descriptor
+ *
+ * For subcommands 0x0 and 0xfff, the value of the first parameter is
+ * a virtual address of a memory buffer and needs virtual to physical
+ * address translation. For other subcommands the rx parameter is not
+ * a virtual address.
+ */
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+ diag_stat_inc(DIAG_STAT_X014);
+ switch (subcode) {
+ case 0x0:
+ case 0xfff:
+ rx = virt_to_phys((void *)rx);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+ return diag_amode31_ops.diag14(rx, ry1, subcode);
+}
+EXPORT_SYMBOL(diag14);
+
+#define DIAG204_BUSY_RC 8
+
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
+{
+ union register_pair rp = { .even = *subcode, .odd = size };
+
+ asm_inline volatile(
+ " diag %[addr],%[rp],0x204\n"
+ "0: nopr %%r7\n"
+ EX_TABLE(0b,0b)
+ : [rp] "+&d" (rp.pair) : [addr] "d" (addr) : "memory");
+ *subcode = rp.even;
+ return rp.odd;
+}
+
+/**
+ * diag204() - Issue diagnose 204 call.
+ * @subcode: Subcode of diagnose 204 to be executed.
+ * @size: Size of area in pages which @area points to, if given.
+ * @addr: Vmalloc'ed memory area where the result is written to.
+ *
+ * Execute diagnose 204 with the given subcode and write the result to the
+ * memory area specified with @addr. For subcodes which do not write a
+ * result to memory both @size and @addr must be zero. If @addr is
+ * specified it must be page aligned and must have been allocated with
+ * vmalloc(). Conversion to real / physical addresses will be handled by
+ * this function if required.
+ */
+int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+ if (addr) {
+ if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
+ return -EINVAL;
+ }
+ if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
+ addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
+ diag_stat_inc(DIAG_STAT_X204);
+ size = __diag204(&subcode, size, addr);
+ if (subcode == DIAG204_BUSY_RC)
+ return -EBUSY;
+ else if (subcode)
+ return -EOPNOTSUPP;
+ return size;
+}
+EXPORT_SYMBOL(diag204);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag210(struct diag210 *addr)
+{
+ static DEFINE_SPINLOCK(diag210_lock);
+ unsigned long flags;
+ int ccode;
+
+ spin_lock_irqsave(&diag210_lock, flags);
+ *__diag210_tmp_amode31 = *addr;
+
+ diag_stat_inc(DIAG_STAT_X210);
+ ccode = diag_amode31_ops.diag210(__diag210_tmp_amode31);
+
+ *addr = *__diag210_tmp_amode31;
+ spin_unlock_irqrestore(&diag210_lock, flags);
+
+ return ccode;
+}
+EXPORT_SYMBOL(diag210);
+
+/*
+ * Diagnose 8C: Access 3270 Display Device Information
+ */
+int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
+{
+ static DEFINE_SPINLOCK(diag8c_lock);
+ unsigned long flags;
+ int ccode;
+
+ spin_lock_irqsave(&diag8c_lock, flags);
+
+ diag_stat_inc(DIAG_STAT_X08C);
+ ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr));
+
+ *addr = *__diag8c_tmp_amode31;
+ spin_unlock_irqrestore(&diag8c_lock, flags);
+
+ return ccode;
+}
+EXPORT_SYMBOL(diag8c);
+
+int diag224(void *ptr)
+{
+ unsigned long addr = __pa(ptr);
+ int rc = -EOPNOTSUPP;
+
+ diag_stat_inc(DIAG_STAT_X224);
+ asm_inline volatile("\n"
+ " diag %[type],%[addr],0x224\n"
+ "0: lhi %[rc],0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : [rc] "+d" (rc)
+ , "=m" (*(struct { char buf[PAGE_SIZE]; } *)ptr)
+ : [type] "d" (0), [addr] "d" (addr));
+ return rc;
+}
+EXPORT_SYMBOL(diag224);
+
+/*
+ * Diagnose 26C: Access Certain System Information
+ */
+int diag26c(void *req, void *resp, enum diag26c_sc subcode)
+{
+ diag_stat_inc(DIAG_STAT_X26C);
+ return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
+}
+EXPORT_SYMBOL(diag26c);
+
+int diag49c(unsigned long subcode)
+{
+ int cc;
+
+ diag_stat_inc(DIAG_STAT_X49C);
+ asm volatile(
+ " diag %[subcode],0,0x49c\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [subcode] "d" (subcode)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
+}
+EXPORT_SYMBOL(diag49c);
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
new file mode 100644
index 000000000000..f411562aa7f6
--- /dev/null
+++ b/arch/s390/kernel/diag/diag310.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request memory topology information via diag0x310.
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+#define DIAG310_LEVELMIN 1
+#define DIAG310_LEVELMAX 6
+
+enum diag310_sc {
+ DIAG310_SUBC_0 = 0,
+ DIAG310_SUBC_1 = 1,
+ DIAG310_SUBC_4 = 4,
+ DIAG310_SUBC_5 = 5
+};
+
+enum diag310_retcode {
+ DIAG310_RET_SUCCESS = 0x0001,
+ DIAG310_RET_BUSY = 0x0101,
+ DIAG310_RET_OPNOTSUPP = 0x0102,
+ DIAG310_RET_SC4_INVAL = 0x0401,
+ DIAG310_RET_SC4_NODATA = 0x0402,
+ DIAG310_RET_SC5_INVAL = 0x0501,
+ DIAG310_RET_SC5_NODATA = 0x0502,
+ DIAG310_RET_SC5_ESIZE = 0x0503
+};
+
+union diag310_response {
+ u64 response;
+ struct {
+ u64 result : 32;
+ u64 : 16;
+ u64 rc : 16;
+ };
+};
+
+union diag310_req_subcode {
+ u64 subcode;
+ struct {
+ u64 : 48;
+ u64 st : 8;
+ u64 sc : 8;
+ };
+};
+
+union diag310_req_size {
+ u64 size;
+ struct {
+ u64 page_count : 32;
+ u64 : 32;
+ };
+};
+
+static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr, .odd = size };
+
+ diag_stat_inc(DIAG_STAT_X310);
+ asm volatile("diag %[rp],%[subcode],0x310"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static int diag310_result_to_errno(unsigned int result)
+{
+ switch (result) {
+ case DIAG310_RET_BUSY:
+ return -EBUSY;
+ case DIAG310_RET_OPNOTSUPP:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int diag310_get_subcode_mask(unsigned long *mask)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_0, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *mask = res.response;
+ return 0;
+}
+
+static int diag310_get_memtop_stride(unsigned long *stride)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_1, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *stride = res.result;
+ return 0;
+}
+
+static int diag310_get_memtop_size(unsigned long *pages, unsigned long level)
+{
+ union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level };
+ union diag310_response res;
+
+ res.response = diag310(req.subcode, 0, NULL);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ *pages = res.result;
+ return 0;
+ case DIAG310_RET_SC4_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC4_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level)
+{
+ union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level };
+ union diag310_req_size req_size = { .page_count = pages };
+ union diag310_response res;
+
+ res.response = diag310(req_sc.subcode, req_size.size, buf);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ return 0;
+ case DIAG310_RET_SC5_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC5_ESIZE:
+ return -EOVERFLOW;
+ case DIAG310_RET_SC5_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_check_features(void)
+{
+ static int features_available;
+ unsigned long mask;
+ int rc;
+
+ if (READ_ONCE(features_available))
+ return 0;
+ if (!sclp.has_diag310)
+ return -EOPNOTSUPP;
+ rc = diag310_get_subcode_mask(&mask);
+ if (rc)
+ return rc;
+ if (!test_bit_inv(DIAG310_SUBC_1, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_4, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_5, &mask))
+ return -EOPNOTSUPP;
+ WRITE_ONCE(features_available, 1);
+ return 0;
+}
+
+static int memtop_get_stride_len(unsigned long *res)
+{
+ static unsigned long memtop_stride;
+ unsigned long stride;
+ int rc;
+
+ stride = READ_ONCE(memtop_stride);
+ if (!stride) {
+ rc = diag310_get_memtop_stride(&stride);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_stride, stride);
+ }
+ *res = stride;
+ return 0;
+}
+
+static int memtop_get_page_count(unsigned long *res, unsigned long level)
+{
+ static unsigned long memtop_pages[DIAG310_LEVELMAX];
+ unsigned long pages;
+ int rc;
+
+ if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN)
+ return -EINVAL;
+ pages = READ_ONCE(memtop_pages[level - 1]);
+ if (!pages) {
+ rc = diag310_get_memtop_size(&pages, level);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_pages[level - 1], pages);
+ }
+ *res = pages;
+ return 0;
+}
+
+long diag310_memtop_stride(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long stride;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ rc = memtop_get_stride_len(&stride);
+ if (rc)
+ return rc;
+ if (put_user(stride, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_len(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long pages, level;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, argp))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ if (put_user(pages * PAGE_SIZE, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_buf(unsigned long arg)
+{
+ struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg;
+ unsigned long level, pages, data_size;
+ u64 address;
+ void *buf;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, &udata->nesting_lvl))
+ return -EFAULT;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ data_size = pages * PAGE_SIZE;
+ buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ if (!buf)
+ return -ENOMEM;
+ rc = diag310_store_topology_map(buf, pages, level);
+ if (rc)
+ goto out;
+ if (copy_to_user((void __user *)address, buf, data_size))
+ rc = -EFAULT;
+out:
+ vfree(buf);
+ return rc;
+}
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
new file mode 100644
index 000000000000..fe325c2a2d0d
--- /dev/null
+++ b/arch/s390/kernel/diag/diag324.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request power readings for resources in a computing environment via
+ * diag 0x324. diag 0x324 stores the power readings in the power information
+ * block (pib).
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "diag324: " fmt
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <asm/timex.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+enum subcode {
+ DIAG324_SUBC_0 = 0,
+ DIAG324_SUBC_1 = 1,
+ DIAG324_SUBC_2 = 2,
+};
+
+enum retcode {
+ DIAG324_RET_SUCCESS = 0x0001,
+ DIAG324_RET_SUBC_NOTAVAIL = 0x0103,
+ DIAG324_RET_INSUFFICIENT_SIZE = 0x0104,
+ DIAG324_RET_READING_UNAVAILABLE = 0x0105,
+};
+
+union diag324_response {
+ u64 response;
+ struct {
+ u64 installed : 32;
+ u64 : 16;
+ u64 rc : 16;
+ } sc0;
+ struct {
+ u64 format : 16;
+ u64 : 16;
+ u64 pib_len : 16;
+ u64 rc : 16;
+ } sc1;
+ struct {
+ u64 : 48;
+ u64 rc : 16;
+ } sc2;
+};
+
+union diag324_request {
+ u64 request;
+ struct {
+ u64 : 32;
+ u64 allocated : 16;
+ u64 : 12;
+ u64 sc : 4;
+ } sc2;
+};
+
+struct pib {
+ u32 : 8;
+ u32 num : 8;
+ u32 len : 16;
+ u32 : 24;
+ u32 hlen : 8;
+ u64 : 64;
+ u64 intv;
+ u8 r[];
+} __packed;
+
+struct pibdata {
+ struct pib *pib;
+ ktime_t expire;
+ u64 sequence;
+ size_t len;
+ int rc;
+};
+
+static DEFINE_MUTEX(pibmutex);
+static struct pibdata pibdata;
+
+#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
+
+static void pibwork_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
+
+static unsigned long diag324(unsigned long subcode, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr };
+
+ diag_stat_inc(DIAG_STAT_X324);
+ asm volatile("diag %[rp],%[subcode],0x324"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static void pibwork_handler(struct work_struct *work)
+{
+ struct pibdata *data = &pibdata;
+ ktime_t timedout;
+
+ mutex_lock(&pibmutex);
+ timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
+ if (ktime_before(ktime_get(), timedout)) {
+ mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ goto out;
+ }
+ vfree(data->pib);
+ data->pib = NULL;
+out:
+ mutex_unlock(&pibmutex);
+}
+
+static void pib_update(struct pibdata *data)
+{
+ union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
+ union diag324_response res;
+ int rc;
+
+ memset(data->pib, 0, data->len);
+ res.response = diag324(req.request, data->pib);
+ switch (res.sc2.rc) {
+ case DIAG324_RET_SUCCESS:
+ rc = 0;
+ break;
+ case DIAG324_RET_SUBC_NOTAVAIL:
+ rc = -ENOENT;
+ break;
+ case DIAG324_RET_INSUFFICIENT_SIZE:
+ rc = -EMSGSIZE;
+ break;
+ case DIAG324_RET_READING_UNAVAILABLE:
+ rc = -EBUSY;
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ data->rc = rc;
+}
+
+long diag324_pibbuf(unsigned long arg)
+{
+ struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
+ struct pibdata *data = &pibdata;
+ static bool first = true;
+ u64 address;
+ int rc;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ mutex_lock(&pibmutex);
+ rc = -ENOMEM;
+ if (!data->pib)
+ data->pib = vmalloc(data->len);
+ if (!data->pib)
+ goto out;
+ if (first || ktime_after(ktime_get(), data->expire)) {
+ pib_update(data);
+ data->sequence++;
+ data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
+ mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ first = false;
+ }
+ rc = data->rc;
+ if (rc != 0 && rc != -EBUSY)
+ goto out;
+ rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
+ rc |= put_user(data->sequence, &udata->sequence);
+ if (rc)
+ rc = -EFAULT;
+out:
+ mutex_unlock(&pibmutex);
+ return rc;
+}
+
+long diag324_piblen(unsigned long arg)
+{
+ struct pibdata *data = &pibdata;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (put_user(data->len, (size_t __user *)arg))
+ return -EFAULT;
+ return 0;
+}
+
+static int __init diag324_init(void)
+{
+ union diag324_response res;
+ unsigned long installed;
+
+ if (!sclp.has_diag324)
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_0, NULL);
+ if (res.sc0.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ installed = res.response;
+ if (!test_bit_inv(DIAG324_SUBC_1, &installed))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG324_SUBC_2, &installed))
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_1, NULL);
+ if (res.sc1.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ pibdata.len = res.sc1.pib_len;
+ return 0;
+}
+device_initcall(diag324_init);
diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h
new file mode 100644
index 000000000000..7080be946785
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DIAG_IOCTL_H
+#define _DIAG_IOCTL_H
+
+#include <linux/types.h>
+
+long diag324_pibbuf(unsigned long arg);
+long diag324_piblen(unsigned long arg);
+
+long diag310_memtop_stride(unsigned long arg);
+long diag310_memtop_len(unsigned long arg);
+long diag310_memtop_buf(unsigned long arg);
+
+#endif /* _DIAG_IOCTL_H */
diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c
new file mode 100644
index 000000000000..efffe02ea02e
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_misc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide diagnose information via misc device /dev/diag.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ long rc;
+
+ switch (cmd) {
+ case DIAG324_GET_PIBLEN:
+ rc = diag324_piblen(arg);
+ break;
+ case DIAG324_GET_PIBBUF:
+ rc = diag324_pibbuf(arg);
+ break;
+ case DIAG310_GET_STRIDE:
+ rc = diag310_memtop_stride(arg);
+ break;
+ case DIAG310_GET_MEMTOPLEN:
+ rc = diag310_memtop_len(arg);
+ break;
+ case DIAG310_GET_MEMTOPBUF:
+ rc = diag310_memtop_buf(arg);
+ break;
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
+ }
+ return rc;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .unlocked_ioctl = diag_ioctl,
+};
+
+static struct miscdevice diagdev = {
+ .name = "diag",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &fops,
+ .mode = 0444,
+};
+
+static int diag_init(void)
+{
+ return misc_register(&diagdev);
+}
+
+device_initcall(diag_init);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index be87d3e05a5b..1cec93895b3a 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Disassemble s390 instructions.
*
@@ -16,69 +17,95 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/reboot.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
#include <linux/atomic.h>
-#include <asm/mathemu.h>
+#include <linux/io.h>
+#include <asm/dis.h>
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/debug.h>
#include <asm/irq.h>
-#ifndef CONFIG_64BIT
-#define ONELONG "%08lx: "
-#else /* CONFIG_64BIT */
-#define ONELONG "%016lx: "
-#endif /* CONFIG_64BIT */
-
+/* Type of operand */
#define OPERAND_GPR 0x1 /* Operand printed as %rx */
#define OPERAND_FPR 0x2 /* Operand printed as %fx */
#define OPERAND_AR 0x4 /* Operand printed as %ax */
#define OPERAND_CR 0x8 /* Operand printed as %cx */
-#define OPERAND_DISP 0x10 /* Operand printed as displacement */
-#define OPERAND_BASE 0x20 /* Operand printed as base register */
-#define OPERAND_INDEX 0x40 /* Operand printed as index register */
-#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */
-#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */
-#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */
+#define OPERAND_VR 0x10 /* Operand printed as %vx */
+#define OPERAND_DISP 0x20 /* Operand printed as displacement */
+#define OPERAND_BASE 0x40 /* Operand printed as base register */
+#define OPERAND_INDEX 0x80 /* Operand printed as index register */
+#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */
+#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */
+
+struct s390_operand {
+ unsigned char bits; /* The number of bits in the operand. */
+ unsigned char shift; /* The number of bits to shift. */
+ unsigned short flags; /* One bit syntax flags. */
+};
+
+struct s390_insn {
+ union {
+ const char name[5];
+ struct {
+ unsigned char zero;
+ unsigned int offset;
+ } __packed;
+ };
+ unsigned char opfrag;
+ unsigned char format;
+};
+
+struct s390_opcode_offset {
+ unsigned char opcode;
+ unsigned char mask;
+ unsigned char byte;
+ unsigned short offset;
+ unsigned short count;
+} __packed;
enum {
- UNUSED, /* Indicates the end of the operand list */
- R_8, /* GPR starting at position 8 */
- R_12, /* GPR starting at position 12 */
- R_16, /* GPR starting at position 16 */
- R_20, /* GPR starting at position 20 */
- R_24, /* GPR starting at position 24 */
- R_28, /* GPR starting at position 28 */
- R_32, /* GPR starting at position 32 */
- F_8, /* FPR starting at position 8 */
- F_12, /* FPR starting at position 12 */
- F_16, /* FPR starting at position 16 */
- F_20, /* FPR starting at position 16 */
- F_24, /* FPR starting at position 24 */
- F_28, /* FPR starting at position 28 */
- F_32, /* FPR starting at position 32 */
+ UNUSED,
A_8, /* Access reg. starting at position 8 */
A_12, /* Access reg. starting at position 12 */
A_24, /* Access reg. starting at position 24 */
A_28, /* Access reg. starting at position 28 */
- C_8, /* Control reg. starting at position 8 */
- C_12, /* Control reg. starting at position 12 */
B_16, /* Base register starting at position 16 */
B_32, /* Base register starting at position 32 */
- X_12, /* Index register starting at position 12 */
+ C_8, /* Control reg. starting at position 8 */
+ C_12, /* Control reg. starting at position 12 */
+ D20_20, /* 20 bit displacement starting at 20 */
D_20, /* Displacement starting at position 20 */
D_36, /* Displacement starting at position 36 */
- D20_20, /* 20 bit displacement starting at 20 */
+ F_8, /* FPR starting at position 8 */
+ F_12, /* FPR starting at position 12 */
+ F_16, /* FPR starting at position 16 */
+ F_24, /* FPR starting at position 24 */
+ F_28, /* FPR starting at position 28 */
+ F_32, /* FPR starting at position 32 */
+ I8_8, /* 8 bit signed value starting at 8 */
+ I8_32, /* 8 bit signed value starting at 32 */
+ I16_16, /* 16 bit signed value starting at 16 */
+ I16_32, /* 16 bit signed value starting at 32 */
+ I32_16, /* 32 bit signed value starting at 16 */
+ J12_12, /* 12 bit PC relative offset at 12 */
+ J16_16, /* 16 bit PC relative offset at 16 */
+ J16_32, /* 16 bit PC relative offset at 32 */
+ J24_24, /* 24 bit PC relative offset at 24 */
+ J32_16, /* 32 bit PC relative offset at 16 */
L4_8, /* 4 bit length starting at position 8 */
L4_12, /* 4 bit length starting at position 12 */
L8_8, /* 8 bit length starting at position 8 */
+ R_8, /* GPR starting at position 8 */
+ R_12, /* GPR starting at position 12 */
+ R_16, /* GPR starting at position 16 */
+ R_24, /* GPR starting at position 24 */
+ R_28, /* GPR starting at position 28 */
U4_8, /* 4 bit unsigned value starting at 8 */
U4_12, /* 4 bit unsigned value starting at 12 */
U4_16, /* 4 bit unsigned value starting at 16 */
@@ -90,1500 +117,248 @@ enum {
U8_8, /* 8 bit unsigned value starting at 8 */
U8_16, /* 8 bit unsigned value starting at 16 */
U8_24, /* 8 bit unsigned value starting at 24 */
+ U8_28, /* 8 bit unsigned value starting at 28 */
U8_32, /* 8 bit unsigned value starting at 32 */
- I8_8, /* 8 bit signed value starting at 8 */
- I8_32, /* 8 bit signed value starting at 32 */
- J12_12, /* PC relative offset at 12 */
- I16_16, /* 16 bit signed value starting at 16 */
- I16_32, /* 32 bit signed value starting at 16 */
- U16_16, /* 16 bit unsigned value starting at 16 */
- U16_32, /* 32 bit unsigned value starting at 16 */
- J16_16, /* PC relative jump offset at 16 */
- J16_32, /* PC relative offset at 16 */
- I24_24, /* 24 bit signed value starting at 24 */
- J32_16, /* PC relative long offset at 16 */
- I32_16, /* 32 bit signed value starting at 16 */
- U32_16, /* 32 bit unsigned value starting at 16 */
- M_16, /* 4 bit optional mask starting at 16 */
- M_20, /* 4 bit optional mask starting at 20 */
- RO_28, /* optional GPR starting at position 28 */
-};
-
-/*
- * Enumeration of the different instruction formats.
- * For details consult the principles of operation.
- */
-enum {
- INSTR_INVALID,
- INSTR_E,
- INSTR_IE_UU,
- INSTR_MII_UPI,
- INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU,
- INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0,
- INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP,
- INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU,
- INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP,
- INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0,
- INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF,
- INSTR_RRE_RR, INSTR_RRE_RR_OPT,
- INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR,
- INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR,
- INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR,
- INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF,
- INSTR_RRF_UUFR, INSTR_RRF_UURF,
- INSTR_RRR_F0FF, INSTR_RRS_RRRDU,
- INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR,
- INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD,
- INSTR_RSI_RRP,
- INSTR_RSL_LRDFU, INSTR_RSL_R0RD,
- INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD,
- INSTR_RSY_RDRM,
- INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD,
- INSTR_RS_RURD,
- INSTR_RXE_FRRD, INSTR_RXE_RRRD,
- INSTR_RXF_FRRDF,
- INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD,
- INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD,
- INSTR_SIL_RDI, INSTR_SIL_RDU,
- INSTR_SIY_IRD, INSTR_SIY_URD,
- INSTR_SI_URD,
- INSTR_SMI_U0RDP,
- INSTR_SSE_RDRD,
- INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2,
- INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD,
- INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3,
- INSTR_S_00, INSTR_S_RD,
-};
-
-struct operand {
- int bits; /* The number of bits in the operand. */
- int shift; /* The number of bits to shift. */
- int flags; /* One bit syntax flags. */
-};
-
-struct insn {
- const char name[5];
- unsigned char opfrag;
- unsigned char format;
+ U12_16, /* 12 bit unsigned value starting at 16 */
+ U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_20, /* 16 bit unsigned value starting at 20 */
+ U16_32, /* 16 bit unsigned value starting at 32 */
+ U32_16, /* 32 bit unsigned value starting at 16 */
+ VX_12, /* Vector index register starting at position 12 */
+ V_8, /* Vector reg. starting at position 8 */
+ V_12, /* Vector reg. starting at position 12 */
+ V_16, /* Vector reg. starting at position 16 */
+ V_32, /* Vector reg. starting at position 32 */
+ X_12, /* Index register starting at position 12 */
};
-static const struct operand operands[] =
-{
- [UNUSED] = { 0, 0, 0 },
- [R_8] = { 4, 8, OPERAND_GPR },
- [R_12] = { 4, 12, OPERAND_GPR },
- [R_16] = { 4, 16, OPERAND_GPR },
- [R_20] = { 4, 20, OPERAND_GPR },
- [R_24] = { 4, 24, OPERAND_GPR },
- [R_28] = { 4, 28, OPERAND_GPR },
- [R_32] = { 4, 32, OPERAND_GPR },
- [F_8] = { 4, 8, OPERAND_FPR },
- [F_12] = { 4, 12, OPERAND_FPR },
- [F_16] = { 4, 16, OPERAND_FPR },
- [F_20] = { 4, 16, OPERAND_FPR },
- [F_24] = { 4, 24, OPERAND_FPR },
- [F_28] = { 4, 28, OPERAND_FPR },
- [F_32] = { 4, 32, OPERAND_FPR },
+static const struct s390_operand operands[] = {
+ [UNUSED] = { 0, 0, 0 },
[A_8] = { 4, 8, OPERAND_AR },
[A_12] = { 4, 12, OPERAND_AR },
[A_24] = { 4, 24, OPERAND_AR },
[A_28] = { 4, 28, OPERAND_AR },
- [C_8] = { 4, 8, OPERAND_CR },
- [C_12] = { 4, 12, OPERAND_CR },
[B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR },
[B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR },
- [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR },
+ [C_8] = { 4, 8, OPERAND_CR },
+ [C_12] = { 4, 12, OPERAND_CR },
+ [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
[D_20] = { 12, 20, OPERAND_DISP },
[D_36] = { 12, 36, OPERAND_DISP },
- [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
+ [F_8] = { 4, 8, OPERAND_FPR },
+ [F_12] = { 4, 12, OPERAND_FPR },
+ [F_16] = { 4, 16, OPERAND_FPR },
+ [F_24] = { 4, 24, OPERAND_FPR },
+ [F_28] = { 4, 28, OPERAND_FPR },
+ [F_32] = { 4, 32, OPERAND_FPR },
+ [I8_8] = { 8, 8, OPERAND_SIGNED },
+ [I8_32] = { 8, 32, OPERAND_SIGNED },
+ [I16_16] = { 16, 16, OPERAND_SIGNED },
+ [I16_32] = { 16, 32, OPERAND_SIGNED },
+ [I32_16] = { 32, 16, OPERAND_SIGNED },
+ [J12_12] = { 12, 12, OPERAND_PCREL },
+ [J16_16] = { 16, 16, OPERAND_PCREL },
+ [J16_32] = { 16, 32, OPERAND_PCREL },
+ [J24_24] = { 24, 24, OPERAND_PCREL },
+ [J32_16] = { 32, 16, OPERAND_PCREL },
[L4_8] = { 4, 8, OPERAND_LENGTH },
- [L4_12] = { 4, 12, OPERAND_LENGTH },
+ [L4_12] = { 4, 12, OPERAND_LENGTH },
[L8_8] = { 8, 8, OPERAND_LENGTH },
+ [R_8] = { 4, 8, OPERAND_GPR },
+ [R_12] = { 4, 12, OPERAND_GPR },
+ [R_16] = { 4, 16, OPERAND_GPR },
+ [R_24] = { 4, 24, OPERAND_GPR },
+ [R_28] = { 4, 28, OPERAND_GPR },
[U4_8] = { 4, 8, 0 },
- [U4_12] = { 4, 12, 0 },
- [U4_16] = { 4, 16, 0 },
- [U4_20] = { 4, 20, 0 },
- [U4_24] = { 4, 24, 0 },
- [U4_28] = { 4, 28, 0 },
- [U4_32] = { 4, 32, 0 },
- [U4_36] = { 4, 36, 0 },
+ [U4_12] = { 4, 12, 0 },
+ [U4_16] = { 4, 16, 0 },
+ [U4_20] = { 4, 20, 0 },
+ [U4_24] = { 4, 24, 0 },
+ [U4_28] = { 4, 28, 0 },
+ [U4_32] = { 4, 32, 0 },
+ [U4_36] = { 4, 36, 0 },
[U8_8] = { 8, 8, 0 },
- [U8_16] = { 8, 16, 0 },
- [U8_24] = { 8, 24, 0 },
- [U8_32] = { 8, 32, 0 },
- [J12_12] = { 12, 12, OPERAND_PCREL },
- [I16_16] = { 16, 16, OPERAND_SIGNED },
+ [U8_16] = { 8, 16, 0 },
+ [U8_24] = { 8, 24, 0 },
+ [U8_28] = { 8, 28, 0 },
+ [U8_32] = { 8, 32, 0 },
+ [U12_16] = { 12, 16, 0 },
[U16_16] = { 16, 16, 0 },
+ [U16_20] = { 16, 20, 0 },
[U16_32] = { 16, 32, 0 },
- [J16_16] = { 16, 16, OPERAND_PCREL },
- [J16_32] = { 16, 32, OPERAND_PCREL },
- [I16_32] = { 16, 32, OPERAND_SIGNED },
- [I24_24] = { 24, 24, OPERAND_SIGNED },
- [J32_16] = { 32, 16, OPERAND_PCREL },
- [I32_16] = { 32, 16, OPERAND_SIGNED },
[U32_16] = { 32, 16, 0 },
- [M_16] = { 4, 16, 0 },
- [M_20] = { 4, 20, 0 },
- [RO_28] = { 4, 28, OPERAND_GPR }
-};
-
-static const unsigned char formats[][7] = {
- [INSTR_E] = { 0xff, 0,0,0,0,0,0 },
- [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 },
- [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 },
- [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 },
- [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 },
- [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 },
- [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 },
- [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
- [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 },
- [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 },
- [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 },
- [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 },
- [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 },
- [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 },
- [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 },
- [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 },
- [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 },
- [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 },
- [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 },
- [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 },
- [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 },
- [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 },
- [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 },
- [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 },
- [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 },
- [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 },
- [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 },
- [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 },
- [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 },
- [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 },
- [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 },
- [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 },
- [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 },
- [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 },
- [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 },
- [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 },
- [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 },
- [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 },
- [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 },
- [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 },
- [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 },
- [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 },
- [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 },
- [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 },
- [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 },
- [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 },
- [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 },
- [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 },
- [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 },
- [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 },
- [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 },
- [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 },
- [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 },
- [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 },
- [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 },
- [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 },
- [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 },
- [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 },
- [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 },
- [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
- [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
- [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
- [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
- [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 },
- [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 },
- [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 },
- [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 },
- [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 },
- [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 },
- [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
- [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 },
- [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
- [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 },
- [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
- [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
- [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
- [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
- [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 },
- [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 },
- [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 },
- [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 },
- [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
- [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
- [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 },
- [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 },
- [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 },
- [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 },
- [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 },
- [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 },
- [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 },
- [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 },
- [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 },
- [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 },
- [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 },
- [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 },
- [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 },
- [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 },
- [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 },
- [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 },
- [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 },
- [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 },
-};
-
-enum {
- LONG_INSN_ALGHSIK,
- LONG_INSN_ALHHHR,
- LONG_INSN_ALHHLR,
- LONG_INSN_ALHSIK,
- LONG_INSN_ALSIHN,
- LONG_INSN_CDFBRA,
- LONG_INSN_CDGBRA,
- LONG_INSN_CDGTRA,
- LONG_INSN_CDLFBR,
- LONG_INSN_CDLFTR,
- LONG_INSN_CDLGBR,
- LONG_INSN_CDLGTR,
- LONG_INSN_CEFBRA,
- LONG_INSN_CEGBRA,
- LONG_INSN_CELFBR,
- LONG_INSN_CELGBR,
- LONG_INSN_CFDBRA,
- LONG_INSN_CFEBRA,
- LONG_INSN_CFXBRA,
- LONG_INSN_CGDBRA,
- LONG_INSN_CGDTRA,
- LONG_INSN_CGEBRA,
- LONG_INSN_CGXBRA,
- LONG_INSN_CGXTRA,
- LONG_INSN_CLFDBR,
- LONG_INSN_CLFDTR,
- LONG_INSN_CLFEBR,
- LONG_INSN_CLFHSI,
- LONG_INSN_CLFXBR,
- LONG_INSN_CLFXTR,
- LONG_INSN_CLGDBR,
- LONG_INSN_CLGDTR,
- LONG_INSN_CLGEBR,
- LONG_INSN_CLGFRL,
- LONG_INSN_CLGHRL,
- LONG_INSN_CLGHSI,
- LONG_INSN_CLGXBR,
- LONG_INSN_CLGXTR,
- LONG_INSN_CLHHSI,
- LONG_INSN_CXFBRA,
- LONG_INSN_CXGBRA,
- LONG_INSN_CXGTRA,
- LONG_INSN_CXLFBR,
- LONG_INSN_CXLFTR,
- LONG_INSN_CXLGBR,
- LONG_INSN_CXLGTR,
- LONG_INSN_FIDBRA,
- LONG_INSN_FIEBRA,
- LONG_INSN_FIXBRA,
- LONG_INSN_LDXBRA,
- LONG_INSN_LEDBRA,
- LONG_INSN_LEXBRA,
- LONG_INSN_LLGFAT,
- LONG_INSN_LLGFRL,
- LONG_INSN_LLGHRL,
- LONG_INSN_LLGTAT,
- LONG_INSN_POPCNT,
- LONG_INSN_RIEMIT,
- LONG_INSN_RINEXT,
- LONG_INSN_RISBGN,
- LONG_INSN_RISBHG,
- LONG_INSN_RISBLG,
- LONG_INSN_SLHHHR,
- LONG_INSN_SLHHLR,
- LONG_INSN_TABORT,
- LONG_INSN_TBEGIN,
- LONG_INSN_TBEGINC,
- LONG_INSN_PCISTG,
- LONG_INSN_MPCIFC,
- LONG_INSN_STPCIFC,
- LONG_INSN_PCISTB,
-};
-
-static char *long_insn_name[] = {
- [LONG_INSN_ALGHSIK] = "alghsik",
- [LONG_INSN_ALHHHR] = "alhhhr",
- [LONG_INSN_ALHHLR] = "alhhlr",
- [LONG_INSN_ALHSIK] = "alhsik",
- [LONG_INSN_ALSIHN] = "alsihn",
- [LONG_INSN_CDFBRA] = "cdfbra",
- [LONG_INSN_CDGBRA] = "cdgbra",
- [LONG_INSN_CDGTRA] = "cdgtra",
- [LONG_INSN_CDLFBR] = "cdlfbr",
- [LONG_INSN_CDLFTR] = "cdlftr",
- [LONG_INSN_CDLGBR] = "cdlgbr",
- [LONG_INSN_CDLGTR] = "cdlgtr",
- [LONG_INSN_CEFBRA] = "cefbra",
- [LONG_INSN_CEGBRA] = "cegbra",
- [LONG_INSN_CELFBR] = "celfbr",
- [LONG_INSN_CELGBR] = "celgbr",
- [LONG_INSN_CFDBRA] = "cfdbra",
- [LONG_INSN_CFEBRA] = "cfebra",
- [LONG_INSN_CFXBRA] = "cfxbra",
- [LONG_INSN_CGDBRA] = "cgdbra",
- [LONG_INSN_CGDTRA] = "cgdtra",
- [LONG_INSN_CGEBRA] = "cgebra",
- [LONG_INSN_CGXBRA] = "cgxbra",
- [LONG_INSN_CGXTRA] = "cgxtra",
- [LONG_INSN_CLFDBR] = "clfdbr",
- [LONG_INSN_CLFDTR] = "clfdtr",
- [LONG_INSN_CLFEBR] = "clfebr",
- [LONG_INSN_CLFHSI] = "clfhsi",
- [LONG_INSN_CLFXBR] = "clfxbr",
- [LONG_INSN_CLFXTR] = "clfxtr",
- [LONG_INSN_CLGDBR] = "clgdbr",
- [LONG_INSN_CLGDTR] = "clgdtr",
- [LONG_INSN_CLGEBR] = "clgebr",
- [LONG_INSN_CLGFRL] = "clgfrl",
- [LONG_INSN_CLGHRL] = "clghrl",
- [LONG_INSN_CLGHSI] = "clghsi",
- [LONG_INSN_CLGXBR] = "clgxbr",
- [LONG_INSN_CLGXTR] = "clgxtr",
- [LONG_INSN_CLHHSI] = "clhhsi",
- [LONG_INSN_CXFBRA] = "cxfbra",
- [LONG_INSN_CXGBRA] = "cxgbra",
- [LONG_INSN_CXGTRA] = "cxgtra",
- [LONG_INSN_CXLFBR] = "cxlfbr",
- [LONG_INSN_CXLFTR] = "cxlftr",
- [LONG_INSN_CXLGBR] = "cxlgbr",
- [LONG_INSN_CXLGTR] = "cxlgtr",
- [LONG_INSN_FIDBRA] = "fidbra",
- [LONG_INSN_FIEBRA] = "fiebra",
- [LONG_INSN_FIXBRA] = "fixbra",
- [LONG_INSN_LDXBRA] = "ldxbra",
- [LONG_INSN_LEDBRA] = "ledbra",
- [LONG_INSN_LEXBRA] = "lexbra",
- [LONG_INSN_LLGFAT] = "llgfat",
- [LONG_INSN_LLGFRL] = "llgfrl",
- [LONG_INSN_LLGHRL] = "llghrl",
- [LONG_INSN_LLGTAT] = "llgtat",
- [LONG_INSN_POPCNT] = "popcnt",
- [LONG_INSN_RIEMIT] = "riemit",
- [LONG_INSN_RINEXT] = "rinext",
- [LONG_INSN_RISBGN] = "risbgn",
- [LONG_INSN_RISBHG] = "risbhg",
- [LONG_INSN_RISBLG] = "risblg",
- [LONG_INSN_SLHHHR] = "slhhhr",
- [LONG_INSN_SLHHLR] = "slhhlr",
- [LONG_INSN_TABORT] = "tabort",
- [LONG_INSN_TBEGIN] = "tbegin",
- [LONG_INSN_TBEGINC] = "tbeginc",
- [LONG_INSN_PCISTG] = "pcistg",
- [LONG_INSN_MPCIFC] = "mpcifc",
- [LONG_INSN_STPCIFC] = "stpcifc",
- [LONG_INSN_PCISTB] = "pcistb",
-};
-
-static struct insn opcode[] = {
-#ifdef CONFIG_64BIT
- { "bprp", 0xc5, INSTR_MII_UPI },
- { "bpp", 0xc7, INSTR_SMI_U0RDP },
- { "trtr", 0xd0, INSTR_SS_L0RDRD },
- { "lmd", 0xef, INSTR_SS_RRRDRD3 },
-#endif
- { "spm", 0x04, INSTR_RR_R0 },
- { "balr", 0x05, INSTR_RR_RR },
- { "bctr", 0x06, INSTR_RR_RR },
- { "bcr", 0x07, INSTR_RR_UR },
- { "svc", 0x0a, INSTR_RR_U0 },
- { "bsm", 0x0b, INSTR_RR_RR },
- { "bassm", 0x0c, INSTR_RR_RR },
- { "basr", 0x0d, INSTR_RR_RR },
- { "mvcl", 0x0e, INSTR_RR_RR },
- { "clcl", 0x0f, INSTR_RR_RR },
- { "lpr", 0x10, INSTR_RR_RR },
- { "lnr", 0x11, INSTR_RR_RR },
- { "ltr", 0x12, INSTR_RR_RR },
- { "lcr", 0x13, INSTR_RR_RR },
- { "nr", 0x14, INSTR_RR_RR },
- { "clr", 0x15, INSTR_RR_RR },
- { "or", 0x16, INSTR_RR_RR },
- { "xr", 0x17, INSTR_RR_RR },
- { "lr", 0x18, INSTR_RR_RR },
- { "cr", 0x19, INSTR_RR_RR },
- { "ar", 0x1a, INSTR_RR_RR },
- { "sr", 0x1b, INSTR_RR_RR },
- { "mr", 0x1c, INSTR_RR_RR },
- { "dr", 0x1d, INSTR_RR_RR },
- { "alr", 0x1e, INSTR_RR_RR },
- { "slr", 0x1f, INSTR_RR_RR },
- { "lpdr", 0x20, INSTR_RR_FF },
- { "lndr", 0x21, INSTR_RR_FF },
- { "ltdr", 0x22, INSTR_RR_FF },
- { "lcdr", 0x23, INSTR_RR_FF },
- { "hdr", 0x24, INSTR_RR_FF },
- { "ldxr", 0x25, INSTR_RR_FF },
- { "mxr", 0x26, INSTR_RR_FF },
- { "mxdr", 0x27, INSTR_RR_FF },
- { "ldr", 0x28, INSTR_RR_FF },
- { "cdr", 0x29, INSTR_RR_FF },
- { "adr", 0x2a, INSTR_RR_FF },
- { "sdr", 0x2b, INSTR_RR_FF },
- { "mdr", 0x2c, INSTR_RR_FF },
- { "ddr", 0x2d, INSTR_RR_FF },
- { "awr", 0x2e, INSTR_RR_FF },
- { "swr", 0x2f, INSTR_RR_FF },
- { "lper", 0x30, INSTR_RR_FF },
- { "lner", 0x31, INSTR_RR_FF },
- { "lter", 0x32, INSTR_RR_FF },
- { "lcer", 0x33, INSTR_RR_FF },
- { "her", 0x34, INSTR_RR_FF },
- { "ledr", 0x35, INSTR_RR_FF },
- { "axr", 0x36, INSTR_RR_FF },
- { "sxr", 0x37, INSTR_RR_FF },
- { "ler", 0x38, INSTR_RR_FF },
- { "cer", 0x39, INSTR_RR_FF },
- { "aer", 0x3a, INSTR_RR_FF },
- { "ser", 0x3b, INSTR_RR_FF },
- { "mder", 0x3c, INSTR_RR_FF },
- { "der", 0x3d, INSTR_RR_FF },
- { "aur", 0x3e, INSTR_RR_FF },
- { "sur", 0x3f, INSTR_RR_FF },
- { "sth", 0x40, INSTR_RX_RRRD },
- { "la", 0x41, INSTR_RX_RRRD },
- { "stc", 0x42, INSTR_RX_RRRD },
- { "ic", 0x43, INSTR_RX_RRRD },
- { "ex", 0x44, INSTR_RX_RRRD },
- { "bal", 0x45, INSTR_RX_RRRD },
- { "bct", 0x46, INSTR_RX_RRRD },
- { "bc", 0x47, INSTR_RX_URRD },
- { "lh", 0x48, INSTR_RX_RRRD },
- { "ch", 0x49, INSTR_RX_RRRD },
- { "ah", 0x4a, INSTR_RX_RRRD },
- { "sh", 0x4b, INSTR_RX_RRRD },
- { "mh", 0x4c, INSTR_RX_RRRD },
- { "bas", 0x4d, INSTR_RX_RRRD },
- { "cvd", 0x4e, INSTR_RX_RRRD },
- { "cvb", 0x4f, INSTR_RX_RRRD },
- { "st", 0x50, INSTR_RX_RRRD },
- { "lae", 0x51, INSTR_RX_RRRD },
- { "n", 0x54, INSTR_RX_RRRD },
- { "cl", 0x55, INSTR_RX_RRRD },
- { "o", 0x56, INSTR_RX_RRRD },
- { "x", 0x57, INSTR_RX_RRRD },
- { "l", 0x58, INSTR_RX_RRRD },
- { "c", 0x59, INSTR_RX_RRRD },
- { "a", 0x5a, INSTR_RX_RRRD },
- { "s", 0x5b, INSTR_RX_RRRD },
- { "m", 0x5c, INSTR_RX_RRRD },
- { "d", 0x5d, INSTR_RX_RRRD },
- { "al", 0x5e, INSTR_RX_RRRD },
- { "sl", 0x5f, INSTR_RX_RRRD },
- { "std", 0x60, INSTR_RX_FRRD },
- { "mxd", 0x67, INSTR_RX_FRRD },
- { "ld", 0x68, INSTR_RX_FRRD },
- { "cd", 0x69, INSTR_RX_FRRD },
- { "ad", 0x6a, INSTR_RX_FRRD },
- { "sd", 0x6b, INSTR_RX_FRRD },
- { "md", 0x6c, INSTR_RX_FRRD },
- { "dd", 0x6d, INSTR_RX_FRRD },
- { "aw", 0x6e, INSTR_RX_FRRD },
- { "sw", 0x6f, INSTR_RX_FRRD },
- { "ste", 0x70, INSTR_RX_FRRD },
- { "ms", 0x71, INSTR_RX_RRRD },
- { "le", 0x78, INSTR_RX_FRRD },
- { "ce", 0x79, INSTR_RX_FRRD },
- { "ae", 0x7a, INSTR_RX_FRRD },
- { "se", 0x7b, INSTR_RX_FRRD },
- { "mde", 0x7c, INSTR_RX_FRRD },
- { "de", 0x7d, INSTR_RX_FRRD },
- { "au", 0x7e, INSTR_RX_FRRD },
- { "su", 0x7f, INSTR_RX_FRRD },
- { "ssm", 0x80, INSTR_S_RD },
- { "lpsw", 0x82, INSTR_S_RD },
- { "diag", 0x83, INSTR_RS_RRRD },
- { "brxh", 0x84, INSTR_RSI_RRP },
- { "brxle", 0x85, INSTR_RSI_RRP },
- { "bxh", 0x86, INSTR_RS_RRRD },
- { "bxle", 0x87, INSTR_RS_RRRD },
- { "srl", 0x88, INSTR_RS_R0RD },
- { "sll", 0x89, INSTR_RS_R0RD },
- { "sra", 0x8a, INSTR_RS_R0RD },
- { "sla", 0x8b, INSTR_RS_R0RD },
- { "srdl", 0x8c, INSTR_RS_R0RD },
- { "sldl", 0x8d, INSTR_RS_R0RD },
- { "srda", 0x8e, INSTR_RS_R0RD },
- { "slda", 0x8f, INSTR_RS_R0RD },
- { "stm", 0x90, INSTR_RS_RRRD },
- { "tm", 0x91, INSTR_SI_URD },
- { "mvi", 0x92, INSTR_SI_URD },
- { "ts", 0x93, INSTR_S_RD },
- { "ni", 0x94, INSTR_SI_URD },
- { "cli", 0x95, INSTR_SI_URD },
- { "oi", 0x96, INSTR_SI_URD },
- { "xi", 0x97, INSTR_SI_URD },
- { "lm", 0x98, INSTR_RS_RRRD },
- { "trace", 0x99, INSTR_RS_RRRD },
- { "lam", 0x9a, INSTR_RS_AARD },
- { "stam", 0x9b, INSTR_RS_AARD },
- { "mvcle", 0xa8, INSTR_RS_RRRD },
- { "clcle", 0xa9, INSTR_RS_RRRD },
- { "stnsm", 0xac, INSTR_SI_URD },
- { "stosm", 0xad, INSTR_SI_URD },
- { "sigp", 0xae, INSTR_RS_RRRD },
- { "mc", 0xaf, INSTR_SI_URD },
- { "lra", 0xb1, INSTR_RX_RRRD },
- { "stctl", 0xb6, INSTR_RS_CCRD },
- { "lctl", 0xb7, INSTR_RS_CCRD },
- { "cs", 0xba, INSTR_RS_RRRD },
- { "cds", 0xbb, INSTR_RS_RRRD },
- { "clm", 0xbd, INSTR_RS_RURD },
- { "stcm", 0xbe, INSTR_RS_RURD },
- { "icm", 0xbf, INSTR_RS_RURD },
- { "mvn", 0xd1, INSTR_SS_L0RDRD },
- { "mvc", 0xd2, INSTR_SS_L0RDRD },
- { "mvz", 0xd3, INSTR_SS_L0RDRD },
- { "nc", 0xd4, INSTR_SS_L0RDRD },
- { "clc", 0xd5, INSTR_SS_L0RDRD },
- { "oc", 0xd6, INSTR_SS_L0RDRD },
- { "xc", 0xd7, INSTR_SS_L0RDRD },
- { "mvck", 0xd9, INSTR_SS_RRRDRD },
- { "mvcp", 0xda, INSTR_SS_RRRDRD },
- { "mvcs", 0xdb, INSTR_SS_RRRDRD },
- { "tr", 0xdc, INSTR_SS_L0RDRD },
- { "trt", 0xdd, INSTR_SS_L0RDRD },
- { "ed", 0xde, INSTR_SS_L0RDRD },
- { "edmk", 0xdf, INSTR_SS_L0RDRD },
- { "pku", 0xe1, INSTR_SS_L0RDRD },
- { "unpku", 0xe2, INSTR_SS_L0RDRD },
- { "mvcin", 0xe8, INSTR_SS_L0RDRD },
- { "pka", 0xe9, INSTR_SS_L0RDRD },
- { "unpka", 0xea, INSTR_SS_L0RDRD },
- { "plo", 0xee, INSTR_SS_RRRDRD2 },
- { "srp", 0xf0, INSTR_SS_LIRDRD },
- { "mvo", 0xf1, INSTR_SS_LLRDRD },
- { "pack", 0xf2, INSTR_SS_LLRDRD },
- { "unpk", 0xf3, INSTR_SS_LLRDRD },
- { "zap", 0xf8, INSTR_SS_LLRDRD },
- { "cp", 0xf9, INSTR_SS_LLRDRD },
- { "ap", 0xfa, INSTR_SS_LLRDRD },
- { "sp", 0xfb, INSTR_SS_LLRDRD },
- { "mp", 0xfc, INSTR_SS_LLRDRD },
- { "dp", 0xfd, INSTR_SS_LLRDRD },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_01[] = {
-#ifdef CONFIG_64BIT
- { "ptff", 0x04, INSTR_E },
- { "pfpo", 0x0a, INSTR_E },
- { "sam64", 0x0e, INSTR_E },
-#endif
- { "pr", 0x01, INSTR_E },
- { "upt", 0x02, INSTR_E },
- { "sckpf", 0x07, INSTR_E },
- { "tam", 0x0b, INSTR_E },
- { "sam24", 0x0c, INSTR_E },
- { "sam31", 0x0d, INSTR_E },
- { "trap2", 0xff, INSTR_E },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_a5[] = {
-#ifdef CONFIG_64BIT
- { "iihh", 0x00, INSTR_RI_RU },
- { "iihl", 0x01, INSTR_RI_RU },
- { "iilh", 0x02, INSTR_RI_RU },
- { "iill", 0x03, INSTR_RI_RU },
- { "nihh", 0x04, INSTR_RI_RU },
- { "nihl", 0x05, INSTR_RI_RU },
- { "nilh", 0x06, INSTR_RI_RU },
- { "nill", 0x07, INSTR_RI_RU },
- { "oihh", 0x08, INSTR_RI_RU },
- { "oihl", 0x09, INSTR_RI_RU },
- { "oilh", 0x0a, INSTR_RI_RU },
- { "oill", 0x0b, INSTR_RI_RU },
- { "llihh", 0x0c, INSTR_RI_RU },
- { "llihl", 0x0d, INSTR_RI_RU },
- { "llilh", 0x0e, INSTR_RI_RU },
- { "llill", 0x0f, INSTR_RI_RU },
-#endif
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_a7[] = {
-#ifdef CONFIG_64BIT
- { "tmhh", 0x02, INSTR_RI_RU },
- { "tmhl", 0x03, INSTR_RI_RU },
- { "brctg", 0x07, INSTR_RI_RP },
- { "lghi", 0x09, INSTR_RI_RI },
- { "aghi", 0x0b, INSTR_RI_RI },
- { "mghi", 0x0d, INSTR_RI_RI },
- { "cghi", 0x0f, INSTR_RI_RI },
-#endif
- { "tmlh", 0x00, INSTR_RI_RU },
- { "tmll", 0x01, INSTR_RI_RU },
- { "brc", 0x04, INSTR_RI_UP },
- { "bras", 0x05, INSTR_RI_RP },
- { "brct", 0x06, INSTR_RI_RP },
- { "lhi", 0x08, INSTR_RI_RI },
- { "ahi", 0x0a, INSTR_RI_RI },
- { "mhi", 0x0c, INSTR_RI_RI },
- { "chi", 0x0e, INSTR_RI_RI },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_aa[] = {
-#ifdef CONFIG_64BIT
- { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
- { "rion", 0x01, INSTR_RI_RI },
- { "tric", 0x02, INSTR_RI_RI },
- { "rioff", 0x03, INSTR_RI_RI },
- { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
-#endif
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_b2[] = {
-#ifdef CONFIG_64BIT
- { "stckf", 0x7c, INSTR_S_RD },
- { "lpp", 0x80, INSTR_S_RD },
- { "lcctl", 0x84, INSTR_S_RD },
- { "lpctl", 0x85, INSTR_S_RD },
- { "qsi", 0x86, INSTR_S_RD },
- { "lsctl", 0x87, INSTR_S_RD },
- { "qctri", 0x8e, INSTR_S_RD },
- { "stfle", 0xb0, INSTR_S_RD },
- { "lpswe", 0xb2, INSTR_S_RD },
- { "srnmb", 0xb8, INSTR_S_RD },
- { "srnmt", 0xb9, INSTR_S_RD },
- { "lfas", 0xbd, INSTR_S_RD },
- { "scctr", 0xe0, INSTR_RRE_RR },
- { "spctr", 0xe1, INSTR_RRE_RR },
- { "ecctr", 0xe4, INSTR_RRE_RR },
- { "epctr", 0xe5, INSTR_RRE_RR },
- { "ppa", 0xe8, INSTR_RRF_U0RR },
- { "etnd", 0xec, INSTR_RRE_R0 },
- { "ecpga", 0xed, INSTR_RRE_RR },
- { "tend", 0xf8, INSTR_S_00 },
- { "niai", 0xfa, INSTR_IE_UU },
- { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD },
-#endif
- { "stidp", 0x02, INSTR_S_RD },
- { "sck", 0x04, INSTR_S_RD },
- { "stck", 0x05, INSTR_S_RD },
- { "sckc", 0x06, INSTR_S_RD },
- { "stckc", 0x07, INSTR_S_RD },
- { "spt", 0x08, INSTR_S_RD },
- { "stpt", 0x09, INSTR_S_RD },
- { "spka", 0x0a, INSTR_S_RD },
- { "ipk", 0x0b, INSTR_S_00 },
- { "ptlb", 0x0d, INSTR_S_00 },
- { "spx", 0x10, INSTR_S_RD },
- { "stpx", 0x11, INSTR_S_RD },
- { "stap", 0x12, INSTR_S_RD },
- { "sie", 0x14, INSTR_S_RD },
- { "pc", 0x18, INSTR_S_RD },
- { "sac", 0x19, INSTR_S_RD },
- { "cfc", 0x1a, INSTR_S_RD },
- { "servc", 0x20, INSTR_RRE_RR },
- { "ipte", 0x21, INSTR_RRE_RR },
- { "ipm", 0x22, INSTR_RRE_R0 },
- { "ivsk", 0x23, INSTR_RRE_RR },
- { "iac", 0x24, INSTR_RRE_R0 },
- { "ssar", 0x25, INSTR_RRE_R0 },
- { "epar", 0x26, INSTR_RRE_R0 },
- { "esar", 0x27, INSTR_RRE_R0 },
- { "pt", 0x28, INSTR_RRE_RR },
- { "iske", 0x29, INSTR_RRE_RR },
- { "rrbe", 0x2a, INSTR_RRE_RR },
- { "sske", 0x2b, INSTR_RRF_M0RR },
- { "tb", 0x2c, INSTR_RRE_0R },
- { "dxr", 0x2d, INSTR_RRE_FF },
- { "pgin", 0x2e, INSTR_RRE_RR },
- { "pgout", 0x2f, INSTR_RRE_RR },
- { "csch", 0x30, INSTR_S_00 },
- { "hsch", 0x31, INSTR_S_00 },
- { "msch", 0x32, INSTR_S_RD },
- { "ssch", 0x33, INSTR_S_RD },
- { "stsch", 0x34, INSTR_S_RD },
- { "tsch", 0x35, INSTR_S_RD },
- { "tpi", 0x36, INSTR_S_RD },
- { "sal", 0x37, INSTR_S_00 },
- { "rsch", 0x38, INSTR_S_00 },
- { "stcrw", 0x39, INSTR_S_RD },
- { "stcps", 0x3a, INSTR_S_RD },
- { "rchp", 0x3b, INSTR_S_00 },
- { "schm", 0x3c, INSTR_S_00 },
- { "bakr", 0x40, INSTR_RRE_RR },
- { "cksm", 0x41, INSTR_RRE_RR },
- { "sqdr", 0x44, INSTR_RRE_FF },
- { "sqer", 0x45, INSTR_RRE_FF },
- { "stura", 0x46, INSTR_RRE_RR },
- { "msta", 0x47, INSTR_RRE_R0 },
- { "palb", 0x48, INSTR_RRE_00 },
- { "ereg", 0x49, INSTR_RRE_RR },
- { "esta", 0x4a, INSTR_RRE_RR },
- { "lura", 0x4b, INSTR_RRE_RR },
- { "tar", 0x4c, INSTR_RRE_AR },
- { "cpya", 0x4d, INSTR_RRE_AA },
- { "sar", 0x4e, INSTR_RRE_AR },
- { "ear", 0x4f, INSTR_RRE_RA },
- { "csp", 0x50, INSTR_RRE_RR },
- { "msr", 0x52, INSTR_RRE_RR },
- { "mvpg", 0x54, INSTR_RRE_RR },
- { "mvst", 0x55, INSTR_RRE_RR },
- { "cuse", 0x57, INSTR_RRE_RR },
- { "bsg", 0x58, INSTR_RRE_RR },
- { "bsa", 0x5a, INSTR_RRE_RR },
- { "clst", 0x5d, INSTR_RRE_RR },
- { "srst", 0x5e, INSTR_RRE_RR },
- { "cmpsc", 0x63, INSTR_RRE_RR },
- { "siga", 0x74, INSTR_S_RD },
- { "xsch", 0x76, INSTR_S_00 },
- { "rp", 0x77, INSTR_S_RD },
- { "stcke", 0x78, INSTR_S_RD },
- { "sacf", 0x79, INSTR_S_RD },
- { "stsi", 0x7d, INSTR_S_RD },
- { "srnm", 0x99, INSTR_S_RD },
- { "stfpc", 0x9c, INSTR_S_RD },
- { "lfpc", 0x9d, INSTR_S_RD },
- { "tre", 0xa5, INSTR_RRE_RR },
- { "cuutf", 0xa6, INSTR_RRF_M0RR },
- { "cutfu", 0xa7, INSTR_RRF_M0RR },
- { "stfl", 0xb1, INSTR_S_RD },
- { "trap4", 0xff, INSTR_S_RD },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_b3[] = {
-#ifdef CONFIG_64BIT
- { "maylr", 0x38, INSTR_RRF_F0FF },
- { "mylr", 0x39, INSTR_RRF_F0FF },
- { "mayr", 0x3a, INSTR_RRF_F0FF },
- { "myr", 0x3b, INSTR_RRF_F0FF },
- { "mayhr", 0x3c, INSTR_RRF_F0FF },
- { "myhr", 0x3d, INSTR_RRF_F0FF },
- { "lpdfr", 0x70, INSTR_RRE_FF },
- { "lndfr", 0x71, INSTR_RRE_FF },
- { "cpsdr", 0x72, INSTR_RRF_F0FF2 },
- { "lcdfr", 0x73, INSTR_RRE_FF },
- { "sfasr", 0x85, INSTR_RRE_R0 },
- { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR },
- { "ldgr", 0xc1, INSTR_RRE_FR },
- { "cegr", 0xc4, INSTR_RRE_FR },
- { "cdgr", 0xc5, INSTR_RRE_FR },
- { "cxgr", 0xc6, INSTR_RRE_FR },
- { "cger", 0xc8, INSTR_RRF_U0RF },
- { "cgdr", 0xc9, INSTR_RRF_U0RF },
- { "cgxr", 0xca, INSTR_RRF_U0RF },
- { "lgdr", 0xcd, INSTR_RRE_RF },
- { "mdtra", 0xd0, INSTR_RRF_FUFF2 },
- { "ddtra", 0xd1, INSTR_RRF_FUFF2 },
- { "adtra", 0xd2, INSTR_RRF_FUFF2 },
- { "sdtra", 0xd3, INSTR_RRF_FUFF2 },
- { "ldetr", 0xd4, INSTR_RRF_0UFF },
- { "ledtr", 0xd5, INSTR_RRF_UUFF },
- { "ltdtr", 0xd6, INSTR_RRE_FF },
- { "fidtr", 0xd7, INSTR_RRF_UUFF },
- { "mxtra", 0xd8, INSTR_RRF_FUFF2 },
- { "dxtra", 0xd9, INSTR_RRF_FUFF2 },
- { "axtra", 0xda, INSTR_RRF_FUFF2 },
- { "sxtra", 0xdb, INSTR_RRF_FUFF2 },
- { "lxdtr", 0xdc, INSTR_RRF_0UFF },
- { "ldxtr", 0xdd, INSTR_RRF_UUFF },
- { "ltxtr", 0xde, INSTR_RRE_FF },
- { "fixtr", 0xdf, INSTR_RRF_UUFF },
- { "kdtr", 0xe0, INSTR_RRE_FF },
- { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF },
- { "cudtr", 0xe2, INSTR_RRE_RF },
- { "csdtr", 0xe3, INSTR_RRE_RF },
- { "cdtr", 0xe4, INSTR_RRE_FF },
- { "eedtr", 0xe5, INSTR_RRE_RF },
- { "esdtr", 0xe7, INSTR_RRE_RF },
- { "kxtr", 0xe8, INSTR_RRE_FF },
- { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR },
- { "cuxtr", 0xea, INSTR_RRE_RF },
- { "csxtr", 0xeb, INSTR_RRE_RF },
- { "cxtr", 0xec, INSTR_RRE_FF },
- { "eextr", 0xed, INSTR_RRE_RF },
- { "esxtr", 0xef, INSTR_RRE_RF },
- { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR },
- { "cdutr", 0xf2, INSTR_RRE_FR },
- { "cdstr", 0xf3, INSTR_RRE_FR },
- { "cedtr", 0xf4, INSTR_RRE_FF },
- { "qadtr", 0xf5, INSTR_RRF_FUFF },
- { "iedtr", 0xf6, INSTR_RRF_F0FR },
- { "rrdtr", 0xf7, INSTR_RRF_FFRU },
- { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF },
- { "cxutr", 0xfa, INSTR_RRE_FR },
- { "cxstr", 0xfb, INSTR_RRE_FR },
- { "cextr", 0xfc, INSTR_RRE_FF },
- { "qaxtr", 0xfd, INSTR_RRF_FUFF },
- { "iextr", 0xfe, INSTR_RRF_F0FR },
- { "rrxtr", 0xff, INSTR_RRF_FFRU },
-#endif
- { "lpebr", 0x00, INSTR_RRE_FF },
- { "lnebr", 0x01, INSTR_RRE_FF },
- { "ltebr", 0x02, INSTR_RRE_FF },
- { "lcebr", 0x03, INSTR_RRE_FF },
- { "ldebr", 0x04, INSTR_RRE_FF },
- { "lxdbr", 0x05, INSTR_RRE_FF },
- { "lxebr", 0x06, INSTR_RRE_FF },
- { "mxdbr", 0x07, INSTR_RRE_FF },
- { "kebr", 0x08, INSTR_RRE_FF },
- { "cebr", 0x09, INSTR_RRE_FF },
- { "aebr", 0x0a, INSTR_RRE_FF },
- { "sebr", 0x0b, INSTR_RRE_FF },
- { "mdebr", 0x0c, INSTR_RRE_FF },
- { "debr", 0x0d, INSTR_RRE_FF },
- { "maebr", 0x0e, INSTR_RRF_F0FF },
- { "msebr", 0x0f, INSTR_RRF_F0FF },
- { "lpdbr", 0x10, INSTR_RRE_FF },
- { "lndbr", 0x11, INSTR_RRE_FF },
- { "ltdbr", 0x12, INSTR_RRE_FF },
- { "lcdbr", 0x13, INSTR_RRE_FF },
- { "sqebr", 0x14, INSTR_RRE_FF },
- { "sqdbr", 0x15, INSTR_RRE_FF },
- { "sqxbr", 0x16, INSTR_RRE_FF },
- { "meebr", 0x17, INSTR_RRE_FF },
- { "kdbr", 0x18, INSTR_RRE_FF },
- { "cdbr", 0x19, INSTR_RRE_FF },
- { "adbr", 0x1a, INSTR_RRE_FF },
- { "sdbr", 0x1b, INSTR_RRE_FF },
- { "mdbr", 0x1c, INSTR_RRE_FF },
- { "ddbr", 0x1d, INSTR_RRE_FF },
- { "madbr", 0x1e, INSTR_RRF_F0FF },
- { "msdbr", 0x1f, INSTR_RRF_F0FF },
- { "lder", 0x24, INSTR_RRE_FF },
- { "lxdr", 0x25, INSTR_RRE_FF },
- { "lxer", 0x26, INSTR_RRE_FF },
- { "maer", 0x2e, INSTR_RRF_F0FF },
- { "mser", 0x2f, INSTR_RRF_F0FF },
- { "sqxr", 0x36, INSTR_RRE_FF },
- { "meer", 0x37, INSTR_RRE_FF },
- { "madr", 0x3e, INSTR_RRF_F0FF },
- { "msdr", 0x3f, INSTR_RRF_F0FF },
- { "lpxbr", 0x40, INSTR_RRE_FF },
- { "lnxbr", 0x41, INSTR_RRE_FF },
- { "ltxbr", 0x42, INSTR_RRE_FF },
- { "lcxbr", 0x43, INSTR_RRE_FF },
- { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF },
- { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF },
- { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF },
- { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF },
- { "kxbr", 0x48, INSTR_RRE_FF },
- { "cxbr", 0x49, INSTR_RRE_FF },
- { "axbr", 0x4a, INSTR_RRE_FF },
- { "sxbr", 0x4b, INSTR_RRE_FF },
- { "mxbr", 0x4c, INSTR_RRE_FF },
- { "dxbr", 0x4d, INSTR_RRE_FF },
- { "tbedr", 0x50, INSTR_RRF_U0FF },
- { "tbdr", 0x51, INSTR_RRF_U0FF },
- { "diebr", 0x53, INSTR_RRF_FUFF },
- { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF },
- { "thder", 0x58, INSTR_RRE_FF },
- { "thdr", 0x59, INSTR_RRE_FF },
- { "didbr", 0x5b, INSTR_RRF_FUFF },
- { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF },
- { "lpxr", 0x60, INSTR_RRE_FF },
- { "lnxr", 0x61, INSTR_RRE_FF },
- { "ltxr", 0x62, INSTR_RRE_FF },
- { "lcxr", 0x63, INSTR_RRE_FF },
- { "lxr", 0x65, INSTR_RRE_FF },
- { "lexr", 0x66, INSTR_RRE_FF },
- { "fixr", 0x67, INSTR_RRE_FF },
- { "cxr", 0x69, INSTR_RRE_FF },
- { "lzer", 0x74, INSTR_RRE_F0 },
- { "lzdr", 0x75, INSTR_RRE_F0 },
- { "lzxr", 0x76, INSTR_RRE_F0 },
- { "fier", 0x77, INSTR_RRE_FF },
- { "fidr", 0x7f, INSTR_RRE_FF },
- { "sfpc", 0x84, INSTR_RRE_RR_OPT },
- { "efpc", 0x8c, INSTR_RRE_RR_OPT },
- { "cefbr", 0x94, INSTR_RRE_RF },
- { "cdfbr", 0x95, INSTR_RRE_RF },
- { "cxfbr", 0x96, INSTR_RRE_RF },
- { "cfebr", 0x98, INSTR_RRF_U0RF },
- { "cfdbr", 0x99, INSTR_RRF_U0RF },
- { "cfxbr", 0x9a, INSTR_RRF_U0RF },
- { "cefr", 0xb4, INSTR_RRE_FR },
- { "cdfr", 0xb5, INSTR_RRE_FR },
- { "cxfr", 0xb6, INSTR_RRE_FR },
- { "cfer", 0xb8, INSTR_RRF_U0RF },
- { "cfdr", 0xb9, INSTR_RRF_U0RF },
- { "cfxr", 0xba, INSTR_RRF_U0RF },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_b9[] = {
-#ifdef CONFIG_64BIT
- { "lpgr", 0x00, INSTR_RRE_RR },
- { "lngr", 0x01, INSTR_RRE_RR },
- { "ltgr", 0x02, INSTR_RRE_RR },
- { "lcgr", 0x03, INSTR_RRE_RR },
- { "lgr", 0x04, INSTR_RRE_RR },
- { "lurag", 0x05, INSTR_RRE_RR },
- { "lgbr", 0x06, INSTR_RRE_RR },
- { "lghr", 0x07, INSTR_RRE_RR },
- { "agr", 0x08, INSTR_RRE_RR },
- { "sgr", 0x09, INSTR_RRE_RR },
- { "algr", 0x0a, INSTR_RRE_RR },
- { "slgr", 0x0b, INSTR_RRE_RR },
- { "msgr", 0x0c, INSTR_RRE_RR },
- { "dsgr", 0x0d, INSTR_RRE_RR },
- { "eregg", 0x0e, INSTR_RRE_RR },
- { "lrvgr", 0x0f, INSTR_RRE_RR },
- { "lpgfr", 0x10, INSTR_RRE_RR },
- { "lngfr", 0x11, INSTR_RRE_RR },
- { "ltgfr", 0x12, INSTR_RRE_RR },
- { "lcgfr", 0x13, INSTR_RRE_RR },
- { "lgfr", 0x14, INSTR_RRE_RR },
- { "llgfr", 0x16, INSTR_RRE_RR },
- { "llgtr", 0x17, INSTR_RRE_RR },
- { "agfr", 0x18, INSTR_RRE_RR },
- { "sgfr", 0x19, INSTR_RRE_RR },
- { "algfr", 0x1a, INSTR_RRE_RR },
- { "slgfr", 0x1b, INSTR_RRE_RR },
- { "msgfr", 0x1c, INSTR_RRE_RR },
- { "dsgfr", 0x1d, INSTR_RRE_RR },
- { "cgr", 0x20, INSTR_RRE_RR },
- { "clgr", 0x21, INSTR_RRE_RR },
- { "sturg", 0x25, INSTR_RRE_RR },
- { "lbr", 0x26, INSTR_RRE_RR },
- { "lhr", 0x27, INSTR_RRE_RR },
- { "cgfr", 0x30, INSTR_RRE_RR },
- { "clgfr", 0x31, INSTR_RRE_RR },
- { "cfdtr", 0x41, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF },
- { "bctgr", 0x46, INSTR_RRE_RR },
- { "cfxtr", 0x49, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR },
- { "cdftr", 0x51, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR },
- { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR },
- { "cxftr", 0x59, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF },
- { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR },
- { "cgrt", 0x60, INSTR_RRF_U0RR },
- { "clgrt", 0x61, INSTR_RRF_U0RR },
- { "crt", 0x72, INSTR_RRF_U0RR },
- { "clrt", 0x73, INSTR_RRF_U0RR },
- { "ngr", 0x80, INSTR_RRE_RR },
- { "ogr", 0x81, INSTR_RRE_RR },
- { "xgr", 0x82, INSTR_RRE_RR },
- { "flogr", 0x83, INSTR_RRE_RR },
- { "llgcr", 0x84, INSTR_RRE_RR },
- { "llghr", 0x85, INSTR_RRE_RR },
- { "mlgr", 0x86, INSTR_RRE_RR },
- { "dlgr", 0x87, INSTR_RRE_RR },
- { "alcgr", 0x88, INSTR_RRE_RR },
- { "slbgr", 0x89, INSTR_RRE_RR },
- { "cspg", 0x8a, INSTR_RRE_RR },
- { "idte", 0x8e, INSTR_RRF_R0RR },
- { "crdte", 0x8f, INSTR_RRF_RMRR },
- { "llcr", 0x94, INSTR_RRE_RR },
- { "llhr", 0x95, INSTR_RRE_RR },
- { "esea", 0x9d, INSTR_RRE_R0 },
- { "ptf", 0xa2, INSTR_RRE_R0 },
- { "lptea", 0xaa, INSTR_RRF_RURR },
- { "rrbm", 0xae, INSTR_RRE_RR },
- { "pfmf", 0xaf, INSTR_RRE_RR },
- { "cu14", 0xb0, INSTR_RRF_M0RR },
- { "cu24", 0xb1, INSTR_RRF_M0RR },
- { "cu41", 0xb2, INSTR_RRE_RR },
- { "cu42", 0xb3, INSTR_RRE_RR },
- { "trtre", 0xbd, INSTR_RRF_M0RR },
- { "srstu", 0xbe, INSTR_RRE_RR },
- { "trte", 0xbf, INSTR_RRF_M0RR },
- { "ahhhr", 0xc8, INSTR_RRF_R0RR2 },
- { "shhhr", 0xc9, INSTR_RRF_R0RR2 },
- { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 },
- { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 },
- { "chhr", 0xcd, INSTR_RRE_RR },
- { "clhhr", 0xcf, INSTR_RRE_RR },
- { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR },
- { "pcilg", 0xd2, INSTR_RRE_RR },
- { "rpcit", 0xd3, INSTR_RRE_RR },
- { "ahhlr", 0xd8, INSTR_RRF_R0RR2 },
- { "shhlr", 0xd9, INSTR_RRF_R0RR2 },
- { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 },
- { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 },
- { "chlr", 0xdd, INSTR_RRE_RR },
- { "clhlr", 0xdf, INSTR_RRE_RR },
- { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR },
- { "locgr", 0xe2, INSTR_RRF_M0RR },
- { "ngrk", 0xe4, INSTR_RRF_R0RR2 },
- { "ogrk", 0xe6, INSTR_RRF_R0RR2 },
- { "xgrk", 0xe7, INSTR_RRF_R0RR2 },
- { "agrk", 0xe8, INSTR_RRF_R0RR2 },
- { "sgrk", 0xe9, INSTR_RRF_R0RR2 },
- { "algrk", 0xea, INSTR_RRF_R0RR2 },
- { "slgrk", 0xeb, INSTR_RRF_R0RR2 },
- { "locr", 0xf2, INSTR_RRF_M0RR },
- { "nrk", 0xf4, INSTR_RRF_R0RR2 },
- { "ork", 0xf6, INSTR_RRF_R0RR2 },
- { "xrk", 0xf7, INSTR_RRF_R0RR2 },
- { "ark", 0xf8, INSTR_RRF_R0RR2 },
- { "srk", 0xf9, INSTR_RRF_R0RR2 },
- { "alrk", 0xfa, INSTR_RRF_R0RR2 },
- { "slrk", 0xfb, INSTR_RRF_R0RR2 },
-#endif
- { "kmac", 0x1e, INSTR_RRE_RR },
- { "lrvr", 0x1f, INSTR_RRE_RR },
- { "km", 0x2e, INSTR_RRE_RR },
- { "kmc", 0x2f, INSTR_RRE_RR },
- { "kimd", 0x3e, INSTR_RRE_RR },
- { "klmd", 0x3f, INSTR_RRE_RR },
- { "epsw", 0x8d, INSTR_RRE_RR },
- { "trtt", 0x90, INSTR_RRF_M0RR },
- { "trto", 0x91, INSTR_RRF_M0RR },
- { "trot", 0x92, INSTR_RRF_M0RR },
- { "troo", 0x93, INSTR_RRF_M0RR },
- { "mlr", 0x96, INSTR_RRE_RR },
- { "dlr", 0x97, INSTR_RRE_RR },
- { "alcr", 0x98, INSTR_RRE_RR },
- { "slbr", 0x99, INSTR_RRE_RR },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_c0[] = {
-#ifdef CONFIG_64BIT
- { "lgfi", 0x01, INSTR_RIL_RI },
- { "xihf", 0x06, INSTR_RIL_RU },
- { "xilf", 0x07, INSTR_RIL_RU },
- { "iihf", 0x08, INSTR_RIL_RU },
- { "iilf", 0x09, INSTR_RIL_RU },
- { "nihf", 0x0a, INSTR_RIL_RU },
- { "nilf", 0x0b, INSTR_RIL_RU },
- { "oihf", 0x0c, INSTR_RIL_RU },
- { "oilf", 0x0d, INSTR_RIL_RU },
- { "llihf", 0x0e, INSTR_RIL_RU },
- { "llilf", 0x0f, INSTR_RIL_RU },
-#endif
- { "larl", 0x00, INSTR_RIL_RP },
- { "brcl", 0x04, INSTR_RIL_UP },
- { "brasl", 0x05, INSTR_RIL_RP },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_c2[] = {
-#ifdef CONFIG_64BIT
- { "msgfi", 0x00, INSTR_RIL_RI },
- { "msfi", 0x01, INSTR_RIL_RI },
- { "slgfi", 0x04, INSTR_RIL_RU },
- { "slfi", 0x05, INSTR_RIL_RU },
- { "agfi", 0x08, INSTR_RIL_RI },
- { "afi", 0x09, INSTR_RIL_RI },
- { "algfi", 0x0a, INSTR_RIL_RU },
- { "alfi", 0x0b, INSTR_RIL_RU },
- { "cgfi", 0x0c, INSTR_RIL_RI },
- { "cfi", 0x0d, INSTR_RIL_RI },
- { "clgfi", 0x0e, INSTR_RIL_RU },
- { "clfi", 0x0f, INSTR_RIL_RU },
-#endif
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_c4[] = {
-#ifdef CONFIG_64BIT
- { "llhrl", 0x02, INSTR_RIL_RP },
- { "lghrl", 0x04, INSTR_RIL_RP },
- { "lhrl", 0x05, INSTR_RIL_RP },
- { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP },
- { "sthrl", 0x07, INSTR_RIL_RP },
- { "lgrl", 0x08, INSTR_RIL_RP },
- { "stgrl", 0x0b, INSTR_RIL_RP },
- { "lgfrl", 0x0c, INSTR_RIL_RP },
- { "lrl", 0x0d, INSTR_RIL_RP },
- { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP },
- { "strl", 0x0f, INSTR_RIL_RP },
-#endif
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_c6[] = {
-#ifdef CONFIG_64BIT
- { "exrl", 0x00, INSTR_RIL_RP },
- { "pfdrl", 0x02, INSTR_RIL_UP },
- { "cghrl", 0x04, INSTR_RIL_RP },
- { "chrl", 0x05, INSTR_RIL_RP },
- { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP },
- { "clhrl", 0x07, INSTR_RIL_RP },
- { "cgrl", 0x08, INSTR_RIL_RP },
- { "clgrl", 0x0a, INSTR_RIL_RP },
- { "cgfrl", 0x0c, INSTR_RIL_RP },
- { "crl", 0x0d, INSTR_RIL_RP },
- { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP },
- { "clrl", 0x0f, INSTR_RIL_RP },
-#endif
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_c8[] = {
-#ifdef CONFIG_64BIT
- { "mvcos", 0x00, INSTR_SSF_RRDRD },
- { "ectg", 0x01, INSTR_SSF_RRDRD },
- { "csst", 0x02, INSTR_SSF_RRDRD },
- { "lpd", 0x04, INSTR_SSF_RRDRD2 },
- { "lpdg", 0x05, INSTR_SSF_RRDRD2 },
-#endif
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_cc[] = {
-#ifdef CONFIG_64BIT
- { "brcth", 0x06, INSTR_RIL_RP },
- { "aih", 0x08, INSTR_RIL_RI },
- { "alsih", 0x0a, INSTR_RIL_RI },
- { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI },
- { "cih", 0x0d, INSTR_RIL_RI },
- { "clih", 0x0f, INSTR_RIL_RI },
-#endif
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_e3[] = {
-#ifdef CONFIG_64BIT
- { "ltg", 0x02, INSTR_RXY_RRRD },
- { "lrag", 0x03, INSTR_RXY_RRRD },
- { "lg", 0x04, INSTR_RXY_RRRD },
- { "cvby", 0x06, INSTR_RXY_RRRD },
- { "ag", 0x08, INSTR_RXY_RRRD },
- { "sg", 0x09, INSTR_RXY_RRRD },
- { "alg", 0x0a, INSTR_RXY_RRRD },
- { "slg", 0x0b, INSTR_RXY_RRRD },
- { "msg", 0x0c, INSTR_RXY_RRRD },
- { "dsg", 0x0d, INSTR_RXY_RRRD },
- { "cvbg", 0x0e, INSTR_RXY_RRRD },
- { "lrvg", 0x0f, INSTR_RXY_RRRD },
- { "lt", 0x12, INSTR_RXY_RRRD },
- { "lray", 0x13, INSTR_RXY_RRRD },
- { "lgf", 0x14, INSTR_RXY_RRRD },
- { "lgh", 0x15, INSTR_RXY_RRRD },
- { "llgf", 0x16, INSTR_RXY_RRRD },
- { "llgt", 0x17, INSTR_RXY_RRRD },
- { "agf", 0x18, INSTR_RXY_RRRD },
- { "sgf", 0x19, INSTR_RXY_RRRD },
- { "algf", 0x1a, INSTR_RXY_RRRD },
- { "slgf", 0x1b, INSTR_RXY_RRRD },
- { "msgf", 0x1c, INSTR_RXY_RRRD },
- { "dsgf", 0x1d, INSTR_RXY_RRRD },
- { "cg", 0x20, INSTR_RXY_RRRD },
- { "clg", 0x21, INSTR_RXY_RRRD },
- { "stg", 0x24, INSTR_RXY_RRRD },
- { "ntstg", 0x25, INSTR_RXY_RRRD },
- { "cvdy", 0x26, INSTR_RXY_RRRD },
- { "cvdg", 0x2e, INSTR_RXY_RRRD },
- { "strvg", 0x2f, INSTR_RXY_RRRD },
- { "cgf", 0x30, INSTR_RXY_RRRD },
- { "clgf", 0x31, INSTR_RXY_RRRD },
- { "ltgf", 0x32, INSTR_RXY_RRRD },
- { "cgh", 0x34, INSTR_RXY_RRRD },
- { "pfd", 0x36, INSTR_RXY_URRD },
- { "strvh", 0x3f, INSTR_RXY_RRRD },
- { "bctg", 0x46, INSTR_RXY_RRRD },
- { "sty", 0x50, INSTR_RXY_RRRD },
- { "msy", 0x51, INSTR_RXY_RRRD },
- { "ny", 0x54, INSTR_RXY_RRRD },
- { "cly", 0x55, INSTR_RXY_RRRD },
- { "oy", 0x56, INSTR_RXY_RRRD },
- { "xy", 0x57, INSTR_RXY_RRRD },
- { "ly", 0x58, INSTR_RXY_RRRD },
- { "cy", 0x59, INSTR_RXY_RRRD },
- { "ay", 0x5a, INSTR_RXY_RRRD },
- { "sy", 0x5b, INSTR_RXY_RRRD },
- { "mfy", 0x5c, INSTR_RXY_RRRD },
- { "aly", 0x5e, INSTR_RXY_RRRD },
- { "sly", 0x5f, INSTR_RXY_RRRD },
- { "sthy", 0x70, INSTR_RXY_RRRD },
- { "lay", 0x71, INSTR_RXY_RRRD },
- { "stcy", 0x72, INSTR_RXY_RRRD },
- { "icy", 0x73, INSTR_RXY_RRRD },
- { "laey", 0x75, INSTR_RXY_RRRD },
- { "lb", 0x76, INSTR_RXY_RRRD },
- { "lgb", 0x77, INSTR_RXY_RRRD },
- { "lhy", 0x78, INSTR_RXY_RRRD },
- { "chy", 0x79, INSTR_RXY_RRRD },
- { "ahy", 0x7a, INSTR_RXY_RRRD },
- { "shy", 0x7b, INSTR_RXY_RRRD },
- { "mhy", 0x7c, INSTR_RXY_RRRD },
- { "ng", 0x80, INSTR_RXY_RRRD },
- { "og", 0x81, INSTR_RXY_RRRD },
- { "xg", 0x82, INSTR_RXY_RRRD },
- { "lgat", 0x85, INSTR_RXY_RRRD },
- { "mlg", 0x86, INSTR_RXY_RRRD },
- { "dlg", 0x87, INSTR_RXY_RRRD },
- { "alcg", 0x88, INSTR_RXY_RRRD },
- { "slbg", 0x89, INSTR_RXY_RRRD },
- { "stpq", 0x8e, INSTR_RXY_RRRD },
- { "lpq", 0x8f, INSTR_RXY_RRRD },
- { "llgc", 0x90, INSTR_RXY_RRRD },
- { "llgh", 0x91, INSTR_RXY_RRRD },
- { "llc", 0x94, INSTR_RXY_RRRD },
- { "llh", 0x95, INSTR_RXY_RRRD },
- { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD },
- { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD },
- { "lat", 0x9f, INSTR_RXY_RRRD },
- { "lbh", 0xc0, INSTR_RXY_RRRD },
- { "llch", 0xc2, INSTR_RXY_RRRD },
- { "stch", 0xc3, INSTR_RXY_RRRD },
- { "lhh", 0xc4, INSTR_RXY_RRRD },
- { "llhh", 0xc6, INSTR_RXY_RRRD },
- { "sthh", 0xc7, INSTR_RXY_RRRD },
- { "lfhat", 0xc8, INSTR_RXY_RRRD },
- { "lfh", 0xca, INSTR_RXY_RRRD },
- { "stfh", 0xcb, INSTR_RXY_RRRD },
- { "chf", 0xcd, INSTR_RXY_RRRD },
- { "clhf", 0xcf, INSTR_RXY_RRRD },
- { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD },
- { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD },
-#endif
- { "lrv", 0x1e, INSTR_RXY_RRRD },
- { "lrvh", 0x1f, INSTR_RXY_RRRD },
- { "strv", 0x3e, INSTR_RXY_RRRD },
- { "ml", 0x96, INSTR_RXY_RRRD },
- { "dl", 0x97, INSTR_RXY_RRRD },
- { "alc", 0x98, INSTR_RXY_RRRD },
- { "slb", 0x99, INSTR_RXY_RRRD },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_e5[] = {
-#ifdef CONFIG_64BIT
- { "strag", 0x02, INSTR_SSE_RDRD },
- { "mvhhi", 0x44, INSTR_SIL_RDI },
- { "mvghi", 0x48, INSTR_SIL_RDI },
- { "mvhi", 0x4c, INSTR_SIL_RDI },
- { "chhsi", 0x54, INSTR_SIL_RDI },
- { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU },
- { "cghsi", 0x58, INSTR_SIL_RDI },
- { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU },
- { "chsi", 0x5c, INSTR_SIL_RDI },
- { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU },
- { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU },
- { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU },
-#endif
- { "lasp", 0x00, INSTR_SSE_RDRD },
- { "tprot", 0x01, INSTR_SSE_RDRD },
- { "mvcsk", 0x0e, INSTR_SSE_RDRD },
- { "mvcdk", 0x0f, INSTR_SSE_RDRD },
- { "", 0, INSTR_INVALID }
-};
-
-static struct insn opcode_eb[] = {
-#ifdef CONFIG_64BIT
- { "lmg", 0x04, INSTR_RSY_RRRD },
- { "srag", 0x0a, INSTR_RSY_RRRD },
- { "slag", 0x0b, INSTR_RSY_RRRD },
- { "srlg", 0x0c, INSTR_RSY_RRRD },
- { "sllg", 0x0d, INSTR_RSY_RRRD },
- { "tracg", 0x0f, INSTR_RSY_RRRD },
- { "csy", 0x14, INSTR_RSY_RRRD },
- { "rllg", 0x1c, INSTR_RSY_RRRD },
- { "clmh", 0x20, INSTR_RSY_RURD },
- { "clmy", 0x21, INSTR_RSY_RURD },
- { "clt", 0x23, INSTR_RSY_RURD },
- { "stmg", 0x24, INSTR_RSY_RRRD },
- { "stctg", 0x25, INSTR_RSY_CCRD },
- { "stmh", 0x26, INSTR_RSY_RRRD },
- { "clgt", 0x2b, INSTR_RSY_RURD },
- { "stcmh", 0x2c, INSTR_RSY_RURD },
- { "stcmy", 0x2d, INSTR_RSY_RURD },
- { "lctlg", 0x2f, INSTR_RSY_CCRD },
- { "csg", 0x30, INSTR_RSY_RRRD },
- { "cdsy", 0x31, INSTR_RSY_RRRD },
- { "cdsg", 0x3e, INSTR_RSY_RRRD },
- { "bxhg", 0x44, INSTR_RSY_RRRD },
- { "bxleg", 0x45, INSTR_RSY_RRRD },
- { "ecag", 0x4c, INSTR_RSY_RRRD },
- { "tmy", 0x51, INSTR_SIY_URD },
- { "mviy", 0x52, INSTR_SIY_URD },
- { "niy", 0x54, INSTR_SIY_URD },
- { "cliy", 0x55, INSTR_SIY_URD },
- { "oiy", 0x56, INSTR_SIY_URD },
- { "xiy", 0x57, INSTR_SIY_URD },
- { "asi", 0x6a, INSTR_SIY_IRD },
- { "alsi", 0x6e, INSTR_SIY_IRD },
- { "agsi", 0x7a, INSTR_SIY_IRD },
- { "algsi", 0x7e, INSTR_SIY_IRD },
- { "icmh", 0x80, INSTR_RSY_RURD },
- { "icmy", 0x81, INSTR_RSY_RURD },
- { "clclu", 0x8f, INSTR_RSY_RRRD },
- { "stmy", 0x90, INSTR_RSY_RRRD },
- { "lmh", 0x96, INSTR_RSY_RRRD },
- { "lmy", 0x98, INSTR_RSY_RRRD },
- { "lamy", 0x9a, INSTR_RSY_AARD },
- { "stamy", 0x9b, INSTR_RSY_AARD },
- { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD },
- { "sic", 0xd1, INSTR_RSY_RRRD },
- { "srak", 0xdc, INSTR_RSY_RRRD },
- { "slak", 0xdd, INSTR_RSY_RRRD },
- { "srlk", 0xde, INSTR_RSY_RRRD },
- { "sllk", 0xdf, INSTR_RSY_RRRD },
- { "locg", 0xe2, INSTR_RSY_RDRM },
- { "stocg", 0xe3, INSTR_RSY_RDRM },
- { "lang", 0xe4, INSTR_RSY_RRRD },
- { "laog", 0xe6, INSTR_RSY_RRRD },
- { "laxg", 0xe7, INSTR_RSY_RRRD },
- { "laag", 0xe8, INSTR_RSY_RRRD },
- { "laalg", 0xea, INSTR_RSY_RRRD },
- { "loc", 0xf2, INSTR_RSY_RDRM },
- { "stoc", 0xf3, INSTR_RSY_RDRM },
- { "lan", 0xf4, INSTR_RSY_RRRD },
- { "lao", 0xf6, INSTR_RSY_RRRD },
- { "lax", 0xf7, INSTR_RSY_RRRD },
- { "laa", 0xf8, INSTR_RSY_RRRD },
- { "laal", 0xfa, INSTR_RSY_RRRD },
- { "lric", 0x60, INSTR_RSY_RDRM },
- { "stric", 0x61, INSTR_RSY_RDRM },
- { "mric", 0x62, INSTR_RSY_RDRM },
-#endif
- { "rll", 0x1d, INSTR_RSY_RRRD },
- { "mvclu", 0x8e, INSTR_RSY_RRRD },
- { "tp", 0xc0, INSTR_RSL_R0RD },
- { "", 0, INSTR_INVALID }
+ [VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
+ [V_8] = { 4, 8, OPERAND_VR },
+ [V_12] = { 4, 12, OPERAND_VR },
+ [V_16] = { 4, 16, OPERAND_VR },
+ [V_32] = { 4, 32, OPERAND_VR },
+ [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR },
};
-static struct insn opcode_ec[] = {
-#ifdef CONFIG_64BIT
- { "brxhg", 0x44, INSTR_RIE_RRP },
- { "brxlg", 0x45, INSTR_RIE_RRP },
- { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU },
- { "rnsbg", 0x54, INSTR_RIE_RRUUU },
- { "risbg", 0x55, INSTR_RIE_RRUUU },
- { "rosbg", 0x56, INSTR_RIE_RRUUU },
- { "rxsbg", 0x57, INSTR_RIE_RRUUU },
- { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU },
- { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU },
- { "cgrj", 0x64, INSTR_RIE_RRPU },
- { "clgrj", 0x65, INSTR_RIE_RRPU },
- { "cgit", 0x70, INSTR_RIE_R0IU },
- { "clgit", 0x71, INSTR_RIE_R0UU },
- { "cit", 0x72, INSTR_RIE_R0IU },
- { "clfit", 0x73, INSTR_RIE_R0UU },
- { "crj", 0x76, INSTR_RIE_RRPU },
- { "clrj", 0x77, INSTR_RIE_RRPU },
- { "cgij", 0x7c, INSTR_RIE_RUPI },
- { "clgij", 0x7d, INSTR_RIE_RUPU },
- { "cij", 0x7e, INSTR_RIE_RUPI },
- { "clij", 0x7f, INSTR_RIE_RUPU },
- { "ahik", 0xd8, INSTR_RIE_RRI0 },
- { "aghik", 0xd9, INSTR_RIE_RRI0 },
- { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 },
- { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 },
- { "cgrb", 0xe4, INSTR_RRS_RRRDU },
- { "clgrb", 0xe5, INSTR_RRS_RRRDU },
- { "crb", 0xf6, INSTR_RRS_RRRDU },
- { "clrb", 0xf7, INSTR_RRS_RRRDU },
- { "cgib", 0xfc, INSTR_RIS_RURDI },
- { "clgib", 0xfd, INSTR_RIS_RURDU },
- { "cib", 0xfe, INSTR_RIS_RURDI },
- { "clib", 0xff, INSTR_RIS_RURDU },
-#endif
- { "", 0, INSTR_INVALID }
+static const unsigned char formats[][6] = {
+ [INSTR_E] = { 0, 0, 0, 0, 0, 0 },
+ [INSTR_IE_UU] = { U4_24, U4_28, 0, 0, 0, 0 },
+ [INSTR_MII_UPP] = { U4_8, J12_12, J24_24 },
+ [INSTR_RIE_R0IU] = { R_8, I16_16, U4_32, 0, 0, 0 },
+ [INSTR_RIE_R0UU] = { R_8, U16_16, U4_32, 0, 0, 0 },
+ [INSTR_RIE_RRI0] = { R_8, R_12, I16_16, 0, 0, 0 },
+ [INSTR_RIE_RRP] = { R_8, R_12, J16_16, 0, 0, 0 },
+ [INSTR_RIE_RRPU] = { R_8, R_12, U4_32, J16_16, 0, 0 },
+ [INSTR_RIE_RRUUU] = { R_8, R_12, U8_16, U8_24, U8_32, 0 },
+ [INSTR_RIE_RUI0] = { R_8, I16_16, U4_12, 0, 0, 0 },
+ [INSTR_RIE_RUPI] = { R_8, I8_32, U4_12, J16_16, 0, 0 },
+ [INSTR_RIE_RUPU] = { R_8, U8_32, U4_12, J16_16, 0, 0 },
+ [INSTR_RIL_RI] = { R_8, I32_16, 0, 0, 0, 0 },
+ [INSTR_RIL_RP] = { R_8, J32_16, 0, 0, 0, 0 },
+ [INSTR_RIL_RU] = { R_8, U32_16, 0, 0, 0, 0 },
+ [INSTR_RIL_UP] = { U4_8, J32_16, 0, 0, 0, 0 },
+ [INSTR_RIS_RURDI] = { R_8, I8_32, U4_12, D_20, B_16, 0 },
+ [INSTR_RIS_RURDU] = { R_8, U8_32, U4_12, D_20, B_16, 0 },
+ [INSTR_RI_RI] = { R_8, I16_16, 0, 0, 0, 0 },
+ [INSTR_RI_RP] = { R_8, J16_16, 0, 0, 0, 0 },
+ [INSTR_RI_RU] = { R_8, U16_16, 0, 0, 0, 0 },
+ [INSTR_RI_UP] = { U4_8, J16_16, 0, 0, 0, 0 },
+ [INSTR_RRE_00] = { 0, 0, 0, 0, 0, 0 },
+ [INSTR_RRE_AA] = { A_24, A_28, 0, 0, 0, 0 },
+ [INSTR_RRE_AR] = { A_24, R_28, 0, 0, 0, 0 },
+ [INSTR_RRE_F0] = { F_24, 0, 0, 0, 0, 0 },
+ [INSTR_RRE_FF] = { F_24, F_28, 0, 0, 0, 0 },
+ [INSTR_RRE_FR] = { F_24, R_28, 0, 0, 0, 0 },
+ [INSTR_RRE_R0] = { R_24, 0, 0, 0, 0, 0 },
+ [INSTR_RRE_RA] = { R_24, A_28, 0, 0, 0, 0 },
+ [INSTR_RRE_RF] = { R_24, F_28, 0, 0, 0, 0 },
+ [INSTR_RRE_RR] = { R_24, R_28, 0, 0, 0, 0 },
+ [INSTR_RRF_0UFF] = { F_24, F_28, U4_20, 0, 0, 0 },
+ [INSTR_RRF_0URF] = { R_24, F_28, U4_20, 0, 0, 0 },
+ [INSTR_RRF_F0FF] = { F_16, F_24, F_28, 0, 0, 0 },
+ [INSTR_RRF_F0FF2] = { F_24, F_16, F_28, 0, 0, 0 },
+ [INSTR_RRF_F0FR] = { F_24, F_16, R_28, 0, 0, 0 },
+ [INSTR_RRF_FFRU] = { F_24, F_16, R_28, U4_20, 0, 0 },
+ [INSTR_RRF_FUFF] = { F_24, F_16, F_28, U4_20, 0, 0 },
+ [INSTR_RRF_FUFF2] = { F_24, F_28, F_16, U4_20, 0, 0 },
+ [INSTR_RRF_R0RR] = { R_24, R_16, R_28, 0, 0, 0 },
+ [INSTR_RRF_R0RR2] = { R_24, R_28, R_16, 0, 0, 0 },
+ [INSTR_RRF_RURR] = { R_24, R_28, R_16, U4_20, 0, 0 },
+ [INSTR_RRF_RURR2] = { R_24, R_16, R_28, U4_20, 0, 0 },
+ [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 },
+ [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 },
+ [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 },
+ [INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 },
+ [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 },
+ [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 },
+ [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 },
+ [INSTR_RRS_RRRDU] = { R_8, R_12, U4_32, D_20, B_16 },
+ [INSTR_RR_FF] = { F_8, F_12, 0, 0, 0, 0 },
+ [INSTR_RR_R0] = { R_8, 0, 0, 0, 0, 0 },
+ [INSTR_RR_RR] = { R_8, R_12, 0, 0, 0, 0 },
+ [INSTR_RR_U0] = { U8_8, 0, 0, 0, 0, 0 },
+ [INSTR_RR_UR] = { U4_8, R_12, 0, 0, 0, 0 },
+ [INSTR_RSI_RRP] = { R_8, R_12, J16_16, 0, 0, 0 },
+ [INSTR_RSL_LRDFU] = { F_32, D_20, L8_8, B_16, U4_36, 0 },
+ [INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 },
+ [INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 },
+ [INSTR_RS_AARD] = { A_8, A_12, D_20, B_16, 0, 0 },
+ [INSTR_RS_CCRD] = { C_8, C_12, D_20, B_16, 0, 0 },
+ [INSTR_RS_R0RD] = { R_8, D_20, B_16, 0, 0, 0 },
+ [INSTR_RS_RRRD] = { R_8, R_12, D_20, B_16, 0, 0 },
+ [INSTR_RS_RURD] = { R_8, U4_12, D_20, B_16, 0, 0 },
+ [INSTR_RXE_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_RXE_RRRDU] = { R_8, D_20, X_12, B_16, U4_32, 0 },
+ [INSTR_RXF_FRRDF] = { F_32, F_8, D_20, X_12, B_16, 0 },
+ [INSTR_RXY_FRRD] = { F_8, D20_20, X_12, B_16, 0, 0 },
+ [INSTR_RXY_RRRD] = { R_8, D20_20, X_12, B_16, 0, 0 },
+ [INSTR_RXY_URRD] = { U4_8, D20_20, X_12, B_16, 0, 0 },
+ [INSTR_RX_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_RX_RRRD] = { R_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_RX_URRD] = { U4_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_SIL_RDI] = { D_20, B_16, I16_32, 0, 0, 0 },
+ [INSTR_SIL_RDU] = { D_20, B_16, U16_32, 0, 0, 0 },
+ [INSTR_SIY_IRD] = { D20_20, B_16, I8_8, 0, 0, 0 },
+ [INSTR_SIY_RD] = { D20_20, B_16, 0, 0, 0, 0 },
+ [INSTR_SIY_URD] = { D20_20, B_16, U8_8, 0, 0, 0 },
+ [INSTR_SI_RD] = { D_20, B_16, 0, 0, 0, 0 },
+ [INSTR_SI_URD] = { D_20, B_16, U8_8, 0, 0, 0 },
+ [INSTR_SMI_U0RDP] = { U4_8, J16_32, D_20, B_16, 0, 0 },
+ [INSTR_SSE_RDRD] = { D_20, B_16, D_36, B_32, 0, 0 },
+ [INSTR_SSF_RRDRD] = { D_20, B_16, D_36, B_32, R_8, 0 },
+ [INSTR_SSF_RRDRD2] = { R_8, D_20, B_16, D_36, B_32, 0 },
+ [INSTR_SS_L0RDRD] = { D_20, L8_8, B_16, D_36, B_32, 0 },
+ [INSTR_SS_L2RDRD] = { D_20, B_16, D_36, L8_8, B_32, 0 },
+ [INSTR_SS_LIRDRD] = { D_20, L4_8, B_16, D_36, B_32, U4_12 },
+ [INSTR_SS_LLRDRD] = { D_20, L4_8, B_16, D_36, L4_12, B_32 },
+ [INSTR_SS_RRRDRD] = { D_20, R_8, B_16, D_36, B_32, R_12 },
+ [INSTR_SS_RRRDRD2] = { R_8, D_20, B_16, R_12, D_36, B_32 },
+ [INSTR_SS_RRRDRD3] = { R_8, R_12, D_20, B_16, D_36, B_32 },
+ [INSTR_S_00] = { 0, 0, 0, 0, 0, 0 },
+ [INSTR_S_RD] = { D_20, B_16, 0, 0, 0, 0 },
+ [INSTR_VRI_V0IU] = { V_8, I16_16, U4_32, 0, 0, 0 },
+ [INSTR_VRI_V0U] = { V_8, U16_16, 0, 0, 0, 0 },
+ [INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 },
+ [INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 },
+ [INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 },
+ [INSTR_VRI_VV0UU] = { V_8, V_12, U8_28, U4_24, 0, 0 },
+ [INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 },
+ [INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
+ [INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
+ [INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 },
+ [INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 },
+ [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
+ [INSTR_VRI_VVV0UV] = { V_8, V_12, V_16, V_32, U8_24, 0 },
+ [INSTR_VRR_0V0U] = { V_12, U16_20, 0, 0, 0, 0 },
+ [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
+ [INSTR_VRR_0VVU] = { V_12, V_16, U16_20, 0, 0, 0 },
+ [INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
+ [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
+ [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
+ [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 },
+ [INSTR_VRR_VV0U0U] = { V_8, V_12, U4_32, U4_24, 0, 0 },
+ [INSTR_VRR_VV0U2] = { V_8, V_12, U4_24, 0, 0, 0 },
+ [INSTR_VRR_VV0UU2] = { V_8, V_12, U4_32, U4_28, 0, 0 },
+ [INSTR_VRR_VV0UUU] = { V_8, V_12, U4_32, U4_28, U4_24, 0 },
+ [INSTR_VRR_VVV] = { V_8, V_12, V_16, 0, 0, 0 },
+ [INSTR_VRR_VVV0U] = { V_8, V_12, V_16, U4_32, 0, 0 },
+ [INSTR_VRR_VVV0U0] = { V_8, V_12, V_16, U4_24, 0, 0 },
+ [INSTR_VRR_VVV0U0U] = { V_8, V_12, V_16, U4_32, U4_24, 0 },
+ [INSTR_VRR_VVV0UU] = { V_8, V_12, V_16, U4_32, U4_28, 0 },
+ [INSTR_VRR_VVV0UUU] = { V_8, V_12, V_16, U4_32, U4_28, U4_24 },
+ [INSTR_VRR_VVV0V] = { V_8, V_12, V_16, V_32, 0, 0 },
+ [INSTR_VRR_VVVU0UV] = { V_8, V_12, V_16, V_32, U4_28, U4_20 },
+ [INSTR_VRR_VVVU0V] = { V_8, V_12, V_16, V_32, U4_20, 0 },
+ [INSTR_VRR_VVVUU0V] = { V_8, V_12, V_16, V_32, U4_20, U4_24 },
+ [INSTR_VRS_RRDV] = { V_32, R_12, D_20, B_16, 0, 0 },
+ [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 },
+ [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 },
+ [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 },
+ [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 },
+ [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 },
+ [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 },
+ [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 },
+ [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 },
};
-static struct insn opcode_ed[] = {
-#ifdef CONFIG_64BIT
- { "mayl", 0x38, INSTR_RXF_FRRDF },
- { "myl", 0x39, INSTR_RXF_FRRDF },
- { "may", 0x3a, INSTR_RXF_FRRDF },
- { "my", 0x3b, INSTR_RXF_FRRDF },
- { "mayh", 0x3c, INSTR_RXF_FRRDF },
- { "myh", 0x3d, INSTR_RXF_FRRDF },
- { "sldt", 0x40, INSTR_RXF_FRRDF },
- { "srdt", 0x41, INSTR_RXF_FRRDF },
- { "slxt", 0x48, INSTR_RXF_FRRDF },
- { "srxt", 0x49, INSTR_RXF_FRRDF },
- { "tdcet", 0x50, INSTR_RXE_FRRD },
- { "tdget", 0x51, INSTR_RXE_FRRD },
- { "tdcdt", 0x54, INSTR_RXE_FRRD },
- { "tdgdt", 0x55, INSTR_RXE_FRRD },
- { "tdcxt", 0x58, INSTR_RXE_FRRD },
- { "tdgxt", 0x59, INSTR_RXE_FRRD },
- { "ley", 0x64, INSTR_RXY_FRRD },
- { "ldy", 0x65, INSTR_RXY_FRRD },
- { "stey", 0x66, INSTR_RXY_FRRD },
- { "stdy", 0x67, INSTR_RXY_FRRD },
- { "czdt", 0xa8, INSTR_RSL_LRDFU },
- { "czxt", 0xa9, INSTR_RSL_LRDFU },
- { "cdzt", 0xaa, INSTR_RSL_LRDFU },
- { "cxzt", 0xab, INSTR_RSL_LRDFU },
-#endif
- { "ldeb", 0x04, INSTR_RXE_FRRD },
- { "lxdb", 0x05, INSTR_RXE_FRRD },
- { "lxeb", 0x06, INSTR_RXE_FRRD },
- { "mxdb", 0x07, INSTR_RXE_FRRD },
- { "keb", 0x08, INSTR_RXE_FRRD },
- { "ceb", 0x09, INSTR_RXE_FRRD },
- { "aeb", 0x0a, INSTR_RXE_FRRD },
- { "seb", 0x0b, INSTR_RXE_FRRD },
- { "mdeb", 0x0c, INSTR_RXE_FRRD },
- { "deb", 0x0d, INSTR_RXE_FRRD },
- { "maeb", 0x0e, INSTR_RXF_FRRDF },
- { "mseb", 0x0f, INSTR_RXF_FRRDF },
- { "tceb", 0x10, INSTR_RXE_FRRD },
- { "tcdb", 0x11, INSTR_RXE_FRRD },
- { "tcxb", 0x12, INSTR_RXE_FRRD },
- { "sqeb", 0x14, INSTR_RXE_FRRD },
- { "sqdb", 0x15, INSTR_RXE_FRRD },
- { "meeb", 0x17, INSTR_RXE_FRRD },
- { "kdb", 0x18, INSTR_RXE_FRRD },
- { "cdb", 0x19, INSTR_RXE_FRRD },
- { "adb", 0x1a, INSTR_RXE_FRRD },
- { "sdb", 0x1b, INSTR_RXE_FRRD },
- { "mdb", 0x1c, INSTR_RXE_FRRD },
- { "ddb", 0x1d, INSTR_RXE_FRRD },
- { "madb", 0x1e, INSTR_RXF_FRRDF },
- { "msdb", 0x1f, INSTR_RXF_FRRDF },
- { "lde", 0x24, INSTR_RXE_FRRD },
- { "lxd", 0x25, INSTR_RXE_FRRD },
- { "lxe", 0x26, INSTR_RXE_FRRD },
- { "mae", 0x2e, INSTR_RXF_FRRDF },
- { "mse", 0x2f, INSTR_RXF_FRRDF },
- { "sqe", 0x34, INSTR_RXE_FRRD },
- { "sqd", 0x35, INSTR_RXE_FRRD },
- { "mee", 0x37, INSTR_RXE_FRRD },
- { "mad", 0x3e, INSTR_RXF_FRRDF },
- { "msd", 0x3f, INSTR_RXF_FRRDF },
- { "", 0, INSTR_INVALID }
-};
+static char long_insn_name[][7] = LONG_INSN_INITIALIZER;
+static struct s390_insn opcode[] = OPCODE_TABLE_INITIALIZER;
+static struct s390_opcode_offset opcode_offset[] = OPCODE_OFFSET_INITIALIZER;
/* Extracts an operand value from an instruction. */
static unsigned int extract_operand(unsigned char *code,
- const struct operand *operand)
+ const struct s390_operand *operand)
{
+ unsigned char *cp;
unsigned int val;
int bits;
/* Extract fragments of the operand byte for byte. */
- code += operand->shift / 8;
+ cp = code + operand->shift / 8;
bits = (operand->shift & 7) + operand->bits;
val = 0;
do {
val <<= 8;
- val |= (unsigned int) *code++;
+ val |= (unsigned int) *cp++;
bits -= 8;
} while (bits > 0);
val >>= -bits;
@@ -1593,6 +368,18 @@ static unsigned int extract_operand(unsigned char *code,
if (operand->bits == 20 && operand->shift == 20)
val = (val & 0xff) << 12 | (val & 0xfff00) >> 8;
+ /* Check for register extensions bits for vector registers. */
+ if (operand->flags & OPERAND_VR) {
+ if (operand->shift == 8)
+ val |= (code[4] & 8) << 1;
+ else if (operand->shift == 12)
+ val |= (code[4] & 4) << 2;
+ else if (operand->shift == 16)
+ val |= (code[4] & 2) << 3;
+ else if (operand->shift == 32)
+ val |= (code[4] & 1) << 4;
+ }
+
/* Sign extend value if the operand is signed or pc relative. */
if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) &&
(val & (1U << (operand->bits - 1))))
@@ -1608,123 +395,40 @@ static unsigned int extract_operand(unsigned char *code,
return val;
}
-static inline int insn_length(unsigned char code)
+struct s390_insn *find_insn(unsigned char *code)
{
- return ((((int) code + 64) >> 7) + 1) << 1;
-}
-
-static struct insn *find_insn(unsigned char *code)
-{
- unsigned char opfrag = code[1];
- unsigned char opmask;
- struct insn *table;
+ struct s390_opcode_offset *entry;
+ struct s390_insn *insn;
+ unsigned char opfrag;
+ int i;
- switch (code[0]) {
- case 0x01:
- table = opcode_01;
- break;
- case 0xa5:
- table = opcode_a5;
- break;
- case 0xa7:
- table = opcode_a7;
- break;
- case 0xaa:
- table = opcode_aa;
- break;
- case 0xb2:
- table = opcode_b2;
- break;
- case 0xb3:
- table = opcode_b3;
- break;
- case 0xb9:
- table = opcode_b9;
- break;
- case 0xc0:
- table = opcode_c0;
- break;
- case 0xc2:
- table = opcode_c2;
- break;
- case 0xc4:
- table = opcode_c4;
- break;
- case 0xc6:
- table = opcode_c6;
- break;
- case 0xc8:
- table = opcode_c8;
- break;
- case 0xcc:
- table = opcode_cc;
- break;
- case 0xe3:
- table = opcode_e3;
- opfrag = code[5];
- break;
- case 0xe5:
- table = opcode_e5;
- break;
- case 0xeb:
- table = opcode_eb;
- opfrag = code[5];
- break;
- case 0xec:
- table = opcode_ec;
- opfrag = code[5];
- break;
- case 0xed:
- table = opcode_ed;
- opfrag = code[5];
- break;
- default:
- table = opcode;
- opfrag = code[0];
- break;
- }
- while (table->format != INSTR_INVALID) {
- opmask = formats[table->format][0];
- if (table->opfrag == (opfrag & opmask))
- return table;
- table++;
+ /* Search the opcode offset table to find an entry which
+ * matches the beginning of the opcode. If there is no match
+ * the last entry will be used, which is the default entry for
+ * unknown instructions as well as 1-byte opcode instructions.
+ */
+ for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) {
+ entry = &opcode_offset[i];
+ if (entry->opcode == code[0])
+ break;
}
- return NULL;
-}
-/**
- * insn_to_mnemonic - decode an s390 instruction
- * @instruction: instruction to decode
- * @buf: buffer to fill with mnemonic
- * @len: length of buffer
- *
- * Decode the instruction at @instruction and store the corresponding
- * mnemonic into @buf of length @len.
- * @buf is left unchanged if the instruction could not be decoded.
- * Returns:
- * %0 on success, %-ENOENT if the instruction was not found.
- */
-int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len)
-{
- struct insn *insn;
+ opfrag = *(code + entry->byte) & entry->mask;
- insn = find_insn(instruction);
- if (!insn)
- return -ENOENT;
- if (insn->name[0] == '\0')
- snprintf(buf, len, "%s",
- long_insn_name[(int) insn->name[1]]);
- else
- snprintf(buf, len, "%.5s", insn->name);
- return 0;
+ insn = &opcode[entry->offset];
+ for (i = 0; i < entry->count; i++) {
+ if (insn->opfrag == opfrag)
+ return insn;
+ insn++;
+ }
+ return NULL;
}
-EXPORT_SYMBOL_GPL(insn_to_mnemonic);
static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
{
- struct insn *insn;
+ struct s390_insn *insn;
const unsigned char *ops;
- const struct operand *operand;
+ const struct s390_operand *operand;
unsigned int value;
char separator;
char *ptr;
@@ -1733,14 +437,14 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
ptr = buffer;
insn = find_insn(code);
if (insn) {
- if (insn->name[0] == '\0')
- ptr += sprintf(ptr, "%s\t",
- long_insn_name[(int) insn->name[1]]);
+ if (insn->zero == 0)
+ ptr += sprintf(ptr, "%.7s\t",
+ long_insn_name[insn->offset]);
else
ptr += sprintf(ptr, "%.5s\t", insn->name);
/* Extract the operands. */
separator = 0;
- for (ops = formats[insn->format] + 1, i = 0;
+ for (ops = formats[insn->format], i = 0;
*ops != 0 && i < 6; ops++, i++) {
operand = operands + *ops;
value = extract_operand(code, operand);
@@ -1754,18 +458,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
if (separator)
ptr += sprintf(ptr, "%c", separator);
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%r%i", value);
+ ptr += sprintf(ptr, "%%r%u", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%f%i", value);
+ ptr += sprintf(ptr, "%%f%u", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%a%i", value);
+ ptr += sprintf(ptr, "%%a%u", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%c%i", value);
- else if (operand->flags & OPERAND_PCREL)
- ptr += sprintf(ptr, "%lx", (signed int) value
- + addr);
- else if (operand->flags & OPERAND_SIGNED)
- ptr += sprintf(ptr, "%i", value);
+ ptr += sprintf(ptr, "%%c%u", value);
+ else if (operand->flags & OPERAND_VR)
+ ptr += sprintf(ptr, "%%v%u", value);
+ else if (operand->flags & OPERAND_PCREL) {
+ void *pcrel = (void *)((int)value + addr);
+
+ ptr += sprintf(ptr, "%px", pcrel);
+ } else if (operand->flags & OPERAND_SIGNED)
+ ptr += sprintf(ptr, "%i", (int)value);
else
ptr += sprintf(ptr, "%u", value);
if (operand->flags & OPERAND_DISP)
@@ -1781,33 +488,42 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
return (int) (ptr - buffer);
}
+static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len)
+{
+ if (user_mode(regs)) {
+ if (copy_from_user(dst, (char __user *)src, len))
+ return -EFAULT;
+ } else {
+ if (copy_from_kernel_nofault(dst, src, len))
+ return -EFAULT;
+ }
+ return 0;
+}
+
void show_code(struct pt_regs *regs)
{
char *mode = user_mode(regs) ? "User" : "Krnl";
+ unsigned long addr, pswaddr;
unsigned char code[64];
- char buffer[64], *ptr;
- mm_segment_t old_fs;
- unsigned long addr;
+ char buffer[128], *ptr;
int start, end, opsize, hops, i;
+ pswaddr = regs->psw.addr;
+ if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED))
+ pswaddr = __forward_psw(regs->psw, regs->int_code >> 16);
/* Get a snapshot of the 64 bytes surrounding the fault address. */
- old_fs = get_fs();
- set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
- for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
- addr = regs->psw.addr - 34 + start;
- if (__copy_from_user(code + start - 2,
- (char __user *) addr, 2))
+ for (start = 32; start && pswaddr >= 34 - start; start -= 2) {
+ addr = pswaddr - 34 + start;
+ if (copy_from_regs(regs, code + start - 2, (void *)addr, 2))
break;
}
for (end = 32; end < 64; end += 2) {
- addr = regs->psw.addr + end - 32;
- if (__copy_from_user(code + end,
- (char __user *) addr, 2))
+ addr = pswaddr + end - 32;
+ if (copy_from_regs(regs, code + end, (void *)addr, 2))
break;
}
- set_fs(old_fs);
- /* Code snapshot useable ? */
- if ((regs->psw.addr & 1) || start >= end) {
+ /* Code snapshot usable ? */
+ if ((pswaddr & 1) || start >= end) {
printk("%s Code: Bad PSW.\n", mode);
return;
}
@@ -1830,13 +546,13 @@ void show_code(struct pt_regs *regs)
while (start < end && hops < 8) {
opsize = insn_length(code[start]);
if (start + opsize == 32)
- *ptr++ = '#';
+ *ptr++ = '*';
else if (start == 32)
*ptr++ = '>';
else
*ptr++ = ' ';
- addr = regs->psw.addr + start - 32;
- ptr += sprintf(ptr, ONELONG, addr);
+ addr = pswaddr + start - 32;
+ ptr += sprintf(ptr, "%px: ", (void *)addr);
if (start + opsize >= end)
break;
for (i = 0; i < opsize; i++)
@@ -1846,17 +562,17 @@ void show_code(struct pt_regs *regs)
*ptr++ = '\t';
ptr += print_insn(ptr, code + start, addr);
start += opsize;
- printk(buffer);
+ pr_cont("%s", buffer);
ptr = buffer;
ptr += sprintf(ptr, "\n ");
hops++;
}
- printk("\n");
+ pr_cont("\n");
}
void print_fn_code(unsigned char *code, unsigned long len)
{
- char buffer[64], *ptr;
+ char buffer[128], *ptr;
int opsize, i;
while (len) {
@@ -1864,7 +580,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
opsize = insn_length(*code);
if (opsize > len)
break;
- ptr += sprintf(ptr, "%p: ", code);
+ ptr += sprintf(ptr, "%px: ", code);
for (i = 0; i < opsize; i++)
ptr += sprintf(ptr, "%02x", code[i]);
*ptr++ = '\t';
@@ -1873,7 +589,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
ptr += print_insn(ptr, code, (unsigned long) code);
*ptr++ = '\n';
*ptr++ = 0;
- printk(buffer);
+ printk("%s", buffer);
code += opsize;
len -= opsize;
}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 87acc38f73c6..f9d52e05e01e 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Stack dumping functions
*
@@ -11,159 +12,171 @@
#include <linux/export.h>
#include <linux/kdebug.h>
#include <linux/ptrace.h>
+#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
+#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/debug.h>
+#include <asm/dis.h>
#include <asm/ipl.h>
+#include <asm/unwind.h>
-#ifndef CONFIG_64BIT
-#define LONG "%08lx "
-#define FOURLONG "%08lx %08lx %08lx %08lx\n"
-static int kstack_depth_to_print = 12;
-#else /* CONFIG_64BIT */
-#define LONG "%016lx "
-#define FOURLONG "%016lx %016lx %016lx %016lx\n"
-static int kstack_depth_to_print = 20;
-#endif /* CONFIG_64BIT */
-
-/*
- * For show_trace we have tree different stack to consider:
- * - the panic stack which is used if the kernel stack has overflown
- * - the asynchronous interrupt stack (cpu related)
- * - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stack and can potentially
- * touch all of them. The order is: panic stack, async stack, sync stack.
- */
-static unsigned long
-__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+const char *stack_type_name(enum stack_type type)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- sp = sp & PSW_ADDR_INSN;
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN);
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN);
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN);
- low = sp;
- sp = regs->gprs[15];
+ switch (type) {
+ case STACK_TYPE_TASK:
+ return "task";
+ case STACK_TYPE_IRQ:
+ return "irq";
+ case STACK_TYPE_NODAT:
+ return "nodat";
+ case STACK_TYPE_RESTART:
+ return "restart";
+ default:
+ return "unknown";
}
}
+EXPORT_SYMBOL_GPL(stack_type_name);
+
+static inline bool in_stack(unsigned long sp, struct stack_info *info,
+ enum stack_type type, unsigned long stack)
+{
+ if (sp < stack || sp >= stack + THREAD_SIZE)
+ return false;
+ info->type = type;
+ info->begin = stack;
+ info->end = stack + THREAD_SIZE;
+ return true;
+}
-static void show_trace(struct task_struct *task, unsigned long *stack)
+static bool in_task_stack(unsigned long sp, struct task_struct *task,
+ struct stack_info *info)
{
- const unsigned long frame_size =
- STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- register unsigned long __r15 asm ("15");
- unsigned long sp;
+ unsigned long stack = (unsigned long)task_stack_page(task);
- sp = (unsigned long) stack;
- if (!sp)
- sp = task ? task->thread.ksp : __r15;
- printk("Call Trace:\n");
-#ifdef CONFIG_CHECK_STACK
- sp = __show_trace(sp,
- S390_lowcore.panic_stack + frame_size - 4096,
- S390_lowcore.panic_stack + frame_size);
-#endif
- sp = __show_trace(sp,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
- S390_lowcore.async_stack + frame_size);
- if (task)
- __show_trace(sp, (unsigned long) task_stack_page(task),
- (unsigned long) task_stack_page(task) + THREAD_SIZE);
- else
- __show_trace(sp, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
- if (!task)
- task = current;
- debug_show_held_locks(task);
+ return in_stack(sp, info, STACK_TYPE_TASK, stack);
+}
+
+static bool in_irq_stack(unsigned long sp, struct stack_info *info)
+{
+ unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET;
+
+ return in_stack(sp, info, STACK_TYPE_IRQ, stack);
+}
+
+static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
+{
+ unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET;
+
+ return in_stack(sp, info, STACK_TYPE_NODAT, stack);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
+static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
{
- register unsigned long *__r15 asm ("15");
- unsigned long *stack;
- int i;
+ unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET;
+ return in_stack(sp, info, STACK_TYPE_MCCK, stack);
+}
+
+static bool in_restart_stack(unsigned long sp, struct stack_info *info)
+{
+ unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET;
+
+ return in_stack(sp, info, STACK_TYPE_RESTART, stack);
+}
+
+int get_stack_info(unsigned long sp, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask)
+{
if (!sp)
- stack = task ? (unsigned long *) task->thread.ksp : __r15;
- else
- stack = sp;
-
- for (i = 0; i < kstack_depth_to_print; i++) {
- if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
- break;
- if ((i * sizeof(long) % 32) == 0)
- printk("%s ", i == 0 ? "" : "\n");
- printk(LONG, *stack++);
- }
- printk("\n");
- show_trace(task, sp);
+ goto unknown;
+
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (sp & 0x7)
+ goto unknown;
+
+ /* Check per-task stack */
+ if (in_task_stack(sp, task, info))
+ goto recursion_check;
+
+ if (task != current)
+ goto unknown;
+
+ /* Check per-cpu stacks */
+ if (!in_irq_stack(sp, info) &&
+ !in_nodat_stack(sp, info) &&
+ !in_restart_stack(sp, info) &&
+ !in_mcck_stack(sp, info))
+ goto unknown;
+
+recursion_check:
+ /*
+ * Make sure we don't iterate through any given stack more than once.
+ * If it comes up a second time then there's something wrong going on:
+ * just break out and report an unknown stack type.
+ */
+ if (*visit_mask & (1UL << info->type))
+ goto unknown;
+ *visit_mask |= 1UL << info->type;
+ return 0;
+unknown:
+ info->type = STACK_TYPE_UNKNOWN;
+ return -EINVAL;
}
-static void show_last_breaking_event(struct pt_regs *regs)
+void show_stack(struct task_struct *task, unsigned long *stack,
+ const char *loglvl)
{
-#ifdef CONFIG_64BIT
- printk("Last Breaking-Event-Address:\n");
- printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
-#endif
+ struct unwind_state state;
+
+ printk("%sCall Trace:\n", loglvl);
+ unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
+ printk(state.reliable ? "%s [<%016lx>] %pSR \n" :
+ "%s([<%016lx>] %pSR)\n",
+ loglvl, state.ip, (void *) state.ip);
+ debug_show_held_locks(task ? : current);
}
-static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
+static void show_last_breaking_event(struct pt_regs *regs)
{
- return (regs->psw.mask & bits) / ((~bits + 1) & bits);
+ printk("Last Breaking-Event-Address:\n");
+ printk(" [<%016lx>] ", regs->last_break);
+ if (user_mode(regs)) {
+ print_vma_addr(KERN_CONT, regs->last_break);
+ pr_cont("\n");
+ } else {
+ pr_cont("%pSR\n", (void *)regs->last_break);
+ }
}
void show_registers(struct pt_regs *regs)
{
+ struct psw_bits *psw = &psw_bits(regs->psw);
+ unsigned long pswaddr;
char *mode;
+ pswaddr = regs->psw.addr;
+ if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED))
+ pswaddr = __forward_psw(regs->psw, regs->int_code >> 16);
mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %p %p",
- mode, (void *) regs->psw.mask,
- (void *) regs->psw.addr);
- print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN);
+ printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)pswaddr);
+ if (!user_mode(regs))
+ pr_cont(" (%pSR)", (void *)pswaddr);
+ pr_cont("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
- "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
- mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
- mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
- mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
- mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
- mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
-#ifdef CONFIG_64BIT
- printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
-#endif
- printk("\n%s GPRS: " FOURLONG, mode,
+ "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext,
+ psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm);
+ pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
+ printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
- printk(" " FOURLONG,
+ printk(" %016lx %016lx %016lx %016lx\n",
regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
- printk(" " FOURLONG,
+ printk(" %016lx %016lx %016lx %016lx\n",
regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
- printk(" " FOURLONG,
+ printk(" %016lx %016lx %016lx %016lx\n",
regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
show_code(regs);
}
@@ -174,13 +187,13 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
- show_trace(NULL, (unsigned long *) regs->gprs[15]);
+ show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT);
show_last_breaking_event(regs);
}
static DEFINE_SPINLOCK(die_lock);
-void die(struct pt_regs *regs, const char *str)
+void __noreturn die(struct pt_regs *regs, const char *str)
{
static int die_counter;
@@ -190,17 +203,12 @@ void die(struct pt_regs *regs, const char *str)
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
- printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
+ printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff,
+ regs->int_code >> 17, ++die_counter);
+ pr_cont("SMP ");
+ if (debug_pagealloc_enabled())
+ pr_cont("DEBUG_PAGEALLOC");
+ pr_cont("\n");
notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
print_modules();
show_regs(regs);
@@ -212,5 +220,5 @@ void die(struct pt_regs *regs, const char *str)
if (panic_on_oops)
panic("Fatal exception: panic_on_oops");
oops_exit();
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index dc8770d7173c..b27239c03d79 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -1,24 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2007, 2009
* Author(s): Hongjie Yang <hongjie@us.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#define KMSG_COMPONENT "setup"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "setup: " fmt
+#include <linux/sched/debug.h>
+#include <linux/cpufeature.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/ctype.h>
-#include <linux/ftrace.h>
#include <linux/lockdep.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/pfn.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <asm/asm-extable.h>
+#include <linux/memblock.h>
+#include <linux/kasan.h>
+#include <asm/access-regs.h>
+#include <asm/asm-offsets.h>
+#include <asm/machine.h>
+#include <asm/diag.h>
#include <asm/ebcdic.h>
+#include <asm/fpu.h>
#include <asm/ipl.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
@@ -28,177 +36,37 @@
#include <asm/cpcmd.h>
#include <asm/sclp.h>
#include <asm/facility.h>
+#include <asm/boot_data.h>
#include "entry.h"
-/*
- * Create a Kernel NSS if the SAVESYS= parameter is defined
- */
-#define DEFSYS_CMD_SIZE 128
-#define SAVESYS_CMD_SIZE 32
-
-char kernel_nss_name[NSS_NAME_SIZE + 1];
-
-static void __init setup_boot_command_line(void);
-
-/*
- * Get the TOD clock running.
- */
-static void __init reset_tod_clock(void)
-{
- u64 time;
-
- if (store_tod_clock(&time) == 0)
- return;
- /* TOD clock not running. Set the clock to Unix Epoch. */
- if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
- disabled_wait(0);
-
- sched_clock_base_cc = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = sched_clock_base_cc;
-}
-
-#ifdef CONFIG_SHARED_KERNEL
-int __init savesys_ipl_nss(char *cmd, const int cmdlen);
-
-asm(
- " .section .init.text,\"ax\",@progbits\n"
- " .align 4\n"
- " .type savesys_ipl_nss, @function\n"
- "savesys_ipl_nss:\n"
-#ifdef CONFIG_64BIT
- " stmg 6,15,48(15)\n"
- " lgr 14,3\n"
- " sam31\n"
- " diag 2,14,0x8\n"
- " sam64\n"
- " lgr 2,14\n"
- " lmg 6,15,48(15)\n"
-#else
- " stm 6,15,24(15)\n"
- " lr 14,3\n"
- " diag 2,14,0x8\n"
- " lr 2,14\n"
- " lm 6,15,24(15)\n"
+#define __decompressor_handled_param(func, param) \
+static int __init ignore_decompressor_param_##func(char *s) \
+{ \
+ return 0; \
+} \
+early_param(#param, ignore_decompressor_param_##func)
+
+#define decompressor_handled_param(param) __decompressor_handled_param(param, param)
+
+decompressor_handled_param(mem);
+decompressor_handled_param(vmalloc);
+decompressor_handled_param(dfltcc);
+decompressor_handled_param(facilities);
+decompressor_handled_param(nokaslr);
+decompressor_handled_param(cmma);
+decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
+__decompressor_handled_param(debug_alternative, debug-alternative);
+#if IS_ENABLED(CONFIG_KVM)
+decompressor_handled_param(prot_virt);
#endif
- " br 14\n"
- " .size savesys_ipl_nss, .-savesys_ipl_nss\n"
- " .previous\n");
-static __initdata char upper_command_line[COMMAND_LINE_SIZE];
-
-static noinline __init void create_kernel_nss(void)
+static void __init kasan_early_init(void)
{
- unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
-#ifdef CONFIG_BLK_DEV_INITRD
- unsigned int sinitrd_pfn, einitrd_pfn;
+#ifdef CONFIG_KASAN
+ init_task.kasan_depth = 0;
+ kasan_init_generic();
#endif
- int response;
- int hlen;
- size_t len;
- char *savesys_ptr;
- char defsys_cmd[DEFSYS_CMD_SIZE];
- char savesys_cmd[SAVESYS_CMD_SIZE];
-
- /* Do nothing if we are not running under VM */
- if (!MACHINE_IS_VM)
- return;
-
- /* Convert COMMAND_LINE to upper case */
- for (i = 0; i < strlen(boot_command_line); i++)
- upper_command_line[i] = toupper(boot_command_line[i]);
-
- savesys_ptr = strstr(upper_command_line, "SAVESYS=");
-
- if (!savesys_ptr)
- return;
-
- savesys_ptr += 8; /* Point to the beginning of the NSS name */
- for (i = 0; i < NSS_NAME_SIZE; i++) {
- if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0')
- break;
- kernel_nss_name[i] = savesys_ptr[i];
- }
-
- stext_pfn = PFN_DOWN(__pa(&_stext));
- eshared_pfn = PFN_DOWN(__pa(&_eshared));
- end_pfn = PFN_UP(__pa(&_end));
- min_size = end_pfn << 2;
-
- hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE,
- "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
- kernel_nss_name, stext_pfn - 1, stext_pfn,
- eshared_pfn - 1, eshared_pfn, end_pfn);
-
-#ifdef CONFIG_BLK_DEV_INITRD
- if (INITRD_START && INITRD_SIZE) {
- sinitrd_pfn = PFN_DOWN(__pa(INITRD_START));
- einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE));
- min_size = einitrd_pfn << 2;
- hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
- " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn);
- }
-#endif
-
- snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
- " EW MINSIZE=%.7iK PARMREGS=0-13", min_size);
- defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0';
- snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s",
- kernel_nss_name, kernel_nss_name);
- savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0';
-
- __cpcmd(defsys_cmd, NULL, 0, &response);
-
- if (response != 0) {
- pr_err("Defining the Linux kernel NSS failed with rc=%d\n",
- response);
- kernel_nss_name[0] = '\0';
- return;
- }
-
- len = strlen(savesys_cmd);
- ASCEBC(savesys_cmd, len);
- response = savesys_ipl_nss(savesys_cmd, len);
-
- /* On success: response is equal to the command size,
- * max SAVESYS_CMD_SIZE
- * On error: response contains the numeric portion of cp error message.
- * for SAVESYS it will be >= 263
- * for missing privilege class, it will be 1
- */
- if (response > SAVESYS_CMD_SIZE || response == 1) {
- pr_err("Saving the Linux kernel NSS failed with rc=%d\n",
- response);
- kernel_nss_name[0] = '\0';
- return;
- }
-
- /* re-initialize cputime accounting. */
- sched_clock_base_cc = get_tod_clock();
- S390_lowcore.last_update_clock = sched_clock_base_cc;
- S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
- S390_lowcore.user_timer = 0;
- S390_lowcore.system_timer = 0;
- asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer));
-
- /* re-setup boot command line with new ipl vm parms */
- ipl_update_parameters();
- setup_boot_command_line();
-
- ipl_flags = IPL_NSS_VALID;
-}
-
-#else /* CONFIG_SHARED_KERNEL */
-
-static inline void create_kernel_nss(void) { }
-
-#endif /* CONFIG_SHARED_KERNEL */
-
-/*
- * Clear bss memory
- */
-static noinline __init void clear_bss_section(void)
-{
- memset(__bss_start, 0, __bss_stop - __bss_start);
}
/*
@@ -206,284 +74,170 @@ static noinline __init void clear_bss_section(void)
*/
static noinline __init void init_kernel_storage_key(void)
{
+#if PAGE_DEFAULT_KEY
unsigned long end_pfn, init_pfn;
- end_pfn = PFN_UP(__pa(&_end));
+ end_pfn = PFN_UP(__pa(_end));
for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
page_set_storage_key(init_pfn << PAGE_SHIFT,
PAGE_DEFAULT_KEY, 0);
+#endif
}
static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
-static noinline __init void detect_machine_type(void)
+/* Remove leading, trailing and double whitespace. */
+static inline void strim_all(char *str)
+{
+ char *s;
+
+ s = strim(str);
+ if (s != str)
+ memmove(str, s, strlen(s));
+ while (*str) {
+ if (!isspace(*str++))
+ continue;
+ if (isspace(*str)) {
+ s = skip_spaces(str);
+ memmove(str, s, strlen(s) + 1);
+ }
+ }
+}
+
+char arch_hw_string[128];
+
+static noinline __init void setup_arch_string(void)
{
- struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+ struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+ struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page;
+ char mstr[80], hvstr[17];
- /* Check current-configuration-level */
- if (stsi(NULL, 0, 0, 0) <= 2) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+ if (stsi(mach, 1, 1, 1))
return;
+ EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+ EBCASC(mach->type, sizeof(mach->type));
+ EBCASC(mach->model, sizeof(mach->model));
+ EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+ scnprintf(mstr, sizeof(mstr), "%-16.16s %-4.4s %-16.16s %-16.16s",
+ mach->manufacturer, mach->type,
+ mach->model, mach->model_capacity);
+ strim_all(mstr);
+ if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
+ EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
+ scnprintf(hvstr, sizeof(hvstr), "%-16.16s", vm->vm[0].cpi);
+ strim_all(hvstr);
+ } else {
+ scnprintf(hvstr, sizeof(hvstr), "%s",
+ machine_is_lpar() ? "LPAR" :
+ machine_is_vm() ? "z/VM" :
+ machine_is_kvm() ? "KVM" : "unknown");
}
- /* Get virtual-machine cpu information. */
- if (stsi(vmms, 3, 2, 2) || !vmms->count)
- return;
-
- /* Running under KVM? If not we assume z/VM */
- if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
- else
- S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+ scnprintf(arch_hw_string, sizeof(arch_hw_string), "HW: %s (%s)", mstr, hvstr);
+ dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
}
static __init void setup_topology(void)
{
-#ifdef CONFIG_64BIT
int max_mnest;
- if (!test_facility(11))
+ if (!cpu_has_topology())
return;
- S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
for (max_mnest = 6; max_mnest > 1; max_mnest--) {
if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
break;
}
topology_max_mnest = max_mnest;
-#endif
}
-static void early_pgm_check_handler(void)
+void __init __do_early_pgm_check(struct pt_regs *regs)
{
- const struct exception_table_entry *fixup;
- unsigned long addr;
+ struct lowcore *lc = get_lowcore();
+ unsigned long ip;
- addr = S390_lowcore.program_old_psw.addr;
- fixup = search_exception_tables(addr & PSW_ADDR_INSN);
- if (!fixup)
- disabled_wait(0);
- S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = lc->trans_exc_code;
+ regs->last_break = lc->pgm_last_break;
+ ip = __rewind_psw(regs->psw, regs->int_code >> 16);
+
+ /* Monitor Event? Might be a warning */
+ if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) {
+ if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN)
+ return;
+ }
+ if (fixup_exception(regs))
+ return;
+ /*
+ * Unhandled exception - system cannot continue but try to get some
+ * helpful messages to the console. Use early_printk() to print
+ * some basic information in case it is too early for printk().
+ */
+ register_early_console();
+ early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n",
+ regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr);
+ show_regs(regs);
+ disabled_wait();
}
static noinline __init void setup_lowcore_early(void)
{
+ struct lowcore *lc = get_lowcore();
psw_t psw;
- psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
- psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
- S390_lowcore.external_new_psw = psw;
- psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
- S390_lowcore.program_new_psw = psw;
- s390_base_pgm_handler_fn = early_pgm_check_handler;
-}
-
-static noinline __init void setup_facility_list(void)
-{
- stfle(S390_lowcore.stfle_fac_list,
- ARRAY_SIZE(S390_lowcore.stfle_fac_list));
-}
-
-static __init void detect_mvpg(void)
-{
-#ifndef CONFIG_64BIT
- int rc;
-
- asm volatile(
- " la 0,0\n"
- " mvpg %2,%2\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG;
-#endif
-}
-
-static __init void detect_ieee(void)
-{
-#ifndef CONFIG_64BIT
- int rc, tmp;
-
- asm volatile(
- " efpc %1,0\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE;
-#endif
+ psw.addr = (unsigned long)early_pgm_check_handler;
+ psw.mask = PSW_KERNEL_BITS;
+ lc->program_new_psw = psw;
+ lc->preempt_count = INIT_PREEMPT_COUNT;
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
}
-static __init void detect_csp(void)
+static inline void save_vector_registers(void)
{
-#ifndef CONFIG_64BIT
- int rc;
-
- asm volatile(
- " la 0,0\n"
- " la 1,0\n"
- " la 2,4\n"
- " csp 0,2\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_CSP;
+#ifdef CONFIG_CRASH_DUMP
+ if (cpu_has_vx())
+ save_vx_regs(boot_cpu_vector_save_area);
#endif
}
-static __init void detect_diag9c(void)
+static inline void setup_low_address_protection(void)
{
- unsigned int cpu_address;
- int rc;
-
- cpu_address = stap();
- asm volatile(
- " diag %2,0,0x9c\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+ system_ctl_set_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
}
-static __init void detect_diag44(void)
+static inline void setup_access_registers(void)
{
-#ifdef CONFIG_64BIT
- int rc;
+ unsigned int acrs[NUM_ACRS] = { 0 };
- asm volatile(
- " diag 0,0,0x44\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-#endif
-}
-
-static __init void detect_machine_facilities(void)
-{
-#ifdef CONFIG_64BIT
- if (test_facility(8)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
- __ctl_set_bit(0, 23);
- }
- if (test_facility(78))
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
- if (test_facility(3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
- if (test_facility(27))
- S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
- if (test_facility(40))
- S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
- if (test_facility(50) && test_facility(73))
- S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
- if (test_facility(66))
- S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
-#endif
-}
-
-static __init void rescue_initrd(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
- unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
- /*
- * Just like in case of IPL from VM reader we make sure there is a
- * gap of 4MB between end of kernel and start of initrd.
- * That way we can also be sure that saving an NSS will succeed,
- * which however only requires different segments.
- */
- if (!INITRD_START || !INITRD_SIZE)
- return;
- if (INITRD_START >= min_initrd_addr)
- return;
- memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
- INITRD_START = min_initrd_addr;
-#endif
-}
-
-/* Set up boot command line */
-static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
-{
- char *parm, *delim;
- size_t rc, len;
-
- len = strlen(boot_command_line);
-
- delim = boot_command_line + len; /* '\0' character position */
- parm = boot_command_line + len + 1; /* append right after '\0' */
-
- rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
- if (rc) {
- if (*parm == '=')
- memmove(boot_command_line, parm + 1, rc);
- else
- *delim = ' '; /* replace '\0' with space */
- }
-}
-
-static inline int has_ebcdic_char(const char *str)
-{
- int i;
-
- for (i = 0; str[i]; i++)
- if (str[i] & 0x80)
- return 1;
- return 0;
+ restore_access_regs(acrs);
}
+char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
- COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0;
- /* convert arch command line to ascii if necessary */
- if (has_ebcdic_char(COMMAND_LINE))
- EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
/* copy arch command line */
- strlcpy(boot_command_line, strstrip(COMMAND_LINE),
- ARCH_COMMAND_LINE_SIZE);
-
- /* append IPL PARM data to the boot command line */
- if (MACHINE_IS_VM)
- append_to_cmdline(append_ipl_vmparm);
+ strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
+}
- append_to_cmdline(append_ipl_scpdata);
+static void __init sort_amode31_extable(void)
+{
+ sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table);
}
-/*
- * Save ipl parameters, clear bss memory, initialize storage keys
- * and create a kernel NSS at startup if the SAVESYS= parm is defined
- */
void __init startup_init(void)
{
- reset_tod_clock();
- ipl_save_parameters();
- rescue_initrd();
- clear_bss_section();
+ kasan_early_init();
+ time_early_init();
init_kernel_storage_key();
- lockdep_init();
lockdep_off();
+ sort_amode31_extable();
setup_lowcore_early();
- setup_facility_list();
- detect_machine_type();
- ipl_update_parameters();
+ setup_arch_string();
setup_boot_command_line();
- create_kernel_nss();
- detect_mvpg();
- detect_ieee();
- detect_csp();
- detect_diag9c();
- detect_diag44();
- detect_machine_facilities();
+ save_vector_registers();
setup_topology();
- sclp_facilities_detect();
-#ifdef CONFIG_DYNAMIC_FTRACE
- S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
-#endif
+ sclp_early_detect();
+ setup_low_address_protection();
+ setup_access_registers();
lockdep_on();
}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
new file mode 100644
index 000000000000..cefe020a3be3
--- /dev/null
+++ b/arch/s390/kernel/early_printk.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2017
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+
+static void sclp_early_write(struct console *con, const char *s, unsigned int len)
+{
+ __sclp_early_printk(s, len);
+}
+
+static struct console sclp_early_console = {
+ .name = "earlysclp",
+ .write = sclp_early_write,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
+ .index = -1,
+};
+
+void __init register_early_console(void)
+{
+ if (early_console)
+ return;
+ if (!sclp.has_linemode && !sclp.has_vt220)
+ return;
+ early_console = &sclp_early_console;
+ register_console(early_console);
+}
+
+static int __init setup_early_printk(char *buf)
+{
+ if (early_console)
+ return 0;
+ /* Accept only "earlyprintk" and "earlyprintk=sclp" */
+ if (buf && !str_has_prefix(buf, "sclp"))
+ return 0;
+ register_early_console();
+ return 0;
+}
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
index b971c6be6298..0e51fa537262 100644
--- a/arch/s390/kernel/ebcdic.c
+++ b/arch/s390/kernel/ebcdic.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * ECBDIC -> ASCII, ASCII -> ECBDIC,
+ * EBCDIC -> ASCII, ASCII -> EBCDIC,
* upper to lower case (EBCDIC) conversion tables.
*
* S390 version
@@ -8,8 +9,8 @@
* Martin Peschke <peschke@fh-brandenburg.de>
*/
-#include <linux/module.h>
-#include <asm/types.h>
+#include <linux/types.h>
+#include <linux/export.h>
#include <asm/ebcdic.h>
/*
@@ -110,15 +111,15 @@ __u8 _ebcasc[256] =
0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
/* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
- /* 0x40 SP RSP ä ---- */
+ /* 0x40 SP RSP ä ---- */
0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
/* 0x48 . < ( + | */
0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
/* 0x50 & ---- */
0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
- /* 0x58 ß ! $ * ) ; */
+ /* 0x58 ß ! $ * ) ; */
0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
- /* 0x60 - / ---- Ä ---- ---- ---- */
+ /* 0x60 - / ---- Ä ---- ---- ---- */
0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
/* 0x68 ---- , % _ > ? */
0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
@@ -130,7 +131,7 @@ __u8 _ebcasc[256] =
0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
/* 0x88 h i ---- ---- ---- */
0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
- /* 0x90 ° j k l m n o p */
+ /* 0x90 ° j k l m n o p */
0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
/* 0x98 q r ---- ---- */
0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
@@ -138,25 +139,25 @@ __u8 _ebcasc[256] =
0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
/* 0xA8 y z ---- ---- ---- ---- */
0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
- /* 0xB0 ^ ---- § ---- */
+ /* 0xB0 ^ ---- § ---- */
0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
/* 0xB8 ---- [ ] ---- ---- ---- ---- */
0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
/* 0xC0 { A B C D E F G */
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
- /* 0xC8 H I ---- ö ---- */
+ /* 0xC8 H I ---- ö ---- */
0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
/* 0xD0 } J K L M N O P */
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
- /* 0xD8 Q R ---- ü */
+ /* 0xD8 Q R ---- ü */
0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
/* 0xE0 \ S T U V W X */
0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
/* 0xF0 0 1 2 3 4 5 6 7 */
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
};
@@ -259,15 +260,15 @@ __u8 _ebcasc_500[256] =
0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
/* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
- /* 0x40 SP RSP ä ---- */
+ /* 0x40 SP RSP ä ---- */
0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
/* 0x48 [ . < ( + ! */
0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
/* 0x50 & ---- */
0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
- /* 0x58 ß ] $ * ) ; ^ */
+ /* 0x58 ß ] $ * ) ; ^ */
0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
- /* 0x60 - / ---- Ä ---- ---- ---- */
+ /* 0x60 - / ---- Ä ---- ---- ---- */
0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
/* 0x68 ---- , % _ > ? */
0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
@@ -279,7 +280,7 @@ __u8 _ebcasc_500[256] =
0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
/* 0x88 h i ---- ---- ---- */
0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
- /* 0x90 ° j k l m n o p */
+ /* 0x90 ° j k l m n o p */
0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
/* 0x98 q r ---- ---- */
0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
@@ -287,25 +288,25 @@ __u8 _ebcasc_500[256] =
0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
/* 0xA8 y z ---- ---- ---- ---- */
0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
- /* 0xB0 ---- § ---- */
+ /* 0xB0 ---- § ---- */
0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
/* 0xB8 ---- | ---- ---- ---- ---- */
0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
/* 0xC0 { A B C D E F G */
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
- /* 0xC8 H I ---- ö ---- */
+ /* 0xC8 H I ---- ö ---- */
0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
/* 0xD0 } J K L M N O P */
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
- /* 0xD8 Q R ---- ü */
+ /* 0xD8 Q R ---- ü */
0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
/* 0xE0 \ S T U V W X */
0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
/* 0xF0 0 1 2 3 4 5 6 7 */
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
};
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index be7a408be7a1..b7f1553d9ee5 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* S390 low-level entry points.
*
@@ -5,12 +6,16 @@
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Hartmut Penner (hp@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- * Heiko Carstens <heiko.carstens@de.ibm.com>
*/
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/asm-extable.h>
+#include <asm/alternative.h>
+#include <asm/processor.h>
#include <asm/cache.h>
+#include <asm/dwarf.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
@@ -18,916 +23,592 @@
#include <asm/unistd.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/irq.h>
+#include <asm/fpu-insn.h>
+#include <asm/setup.h>
+#include <asm/nmi.h>
+#include <asm/nospec-insn.h>
+#include <asm/lowcore.h>
+#include <asm/machine.h>
+
+_LPP_OFFSET = __LC_LPP
+
+ .macro STBEAR address
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
+ .endm
-__PT_R0 = __PT_GPRS
-__PT_R1 = __PT_GPRS + 4
-__PT_R2 = __PT_GPRS + 8
-__PT_R3 = __PT_GPRS + 12
-__PT_R4 = __PT_GPRS + 16
-__PT_R5 = __PT_GPRS + 20
-__PT_R6 = __PT_GPRS + 24
-__PT_R7 = __PT_GPRS + 28
-__PT_R8 = __PT_GPRS + 32
-__PT_R9 = __PT_GPRS + 36
-__PT_R10 = __PT_GPRS + 40
-__PT_R11 = __PT_GPRS + 44
-__PT_R12 = __PT_GPRS + 48
-__PT_R13 = __PT_GPRS + 524
-__PT_R14 = __PT_GPRS + 56
-__PT_R15 = __PT_GPRS + 60
-
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_PER_TRAP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
-_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
- _TIF_SYSCALL_TRACEPOINT)
-
-STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
-STACK_SIZE = 1 << STACK_SHIFT
-STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-
-#define BASED(name) name-system_call(%r13)
-
- .macro TRACE_IRQS_ON
-#ifdef CONFIG_TRACE_IRQFLAGS
- basr %r2,%r0
- l %r1,BASED(.Lhardirqs_on)
- basr %r14,%r1 # call trace_hardirqs_on_caller
-#endif
+ .macro LBEAR address
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
.endm
- .macro TRACE_IRQS_OFF
-#ifdef CONFIG_TRACE_IRQFLAGS
- basr %r2,%r0
- l %r1,BASED(.Lhardirqs_off)
- basr %r14,%r1 # call trace_hardirqs_off_caller
-#endif
+ .macro LPSWEY address, lpswe
+ ALTERNATIVE_2 "b \lpswe;nopr", \
+ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \
+ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
+ ALT_FEATURE(MFEATURE_LOWCORE)
.endm
- .macro LOCKDEP_SYS_EXIT
-#ifdef CONFIG_LOCKDEP
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jz .+10
- l %r1,BASED(.Llockdep_sys_exit)
- basr %r14,%r1 # call lockdep_sys_exit
-#endif
+ .macro MBEAR reg, lowcore
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
+ ALT_FACILITY(193)
.endm
- .macro CHECK_STACK stacksize,savearea
-#ifdef CONFIG_CHECK_STACK
- tml %r15,\stacksize - CONFIG_STACK_GUARD
- la %r14,\savearea
- jz stack_overflow
-#endif
+ .macro CHECK_VMAP_STACK savearea, lowcore, oklabel
+ lgr %r14,%r15
+ nill %r14,0x10000 - THREAD_SIZE
+ oill %r14,STACK_INIT_OFFSET
+ clg %r14,__LC_KERNEL_STACK(\lowcore)
+ je \oklabel
+ clg %r14,__LC_ASYNC_STACK(\lowcore)
+ je \oklabel
+ clg %r14,__LC_MCCK_STACK(\lowcore)
+ je \oklabel
+ clg %r14,__LC_NODAT_STACK(\lowcore)
+ je \oklabel
+ clg %r14,__LC_RESTART_STACK(\lowcore)
+ je \oklabel
+ la %r14,\savearea(\lowcore)
+ j stack_invalid
.endm
- .macro SWITCH_ASYNC savearea,stack,shift
- tmh %r8,0x0001 # interrupting from user ?
- jnz 1f
- lr %r14,%r9
- sl %r14,BASED(.Lcritical_start)
- cl %r14,BASED(.Lcritical_length)
- jhe 0f
- la %r11,\savearea # inside critical section, do cleanup
- bras %r14,cleanup_critical
- tmh %r8,0x0001 # retest problem state after cleanup
- jnz 1f
-0: l %r14,\stack # are we already on the target stack?
- slr %r14,%r15
- sra %r14,\shift
- jnz 1f
- CHECK_STACK 1<<\shift,\savearea
- ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: l %r15,\stack # load target stack
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ /*
+ * The TSTMSK macro generates a test-under-mask instruction by
+ * calculating the memory offset for the specified mask value.
+ * Mask value can be any constant. The macro shifts the mask
+ * value to calculate the memory offset for the test-under-mask
+ * instruction.
+ */
+ .macro TSTMSK addr, mask, size=8, bytepos=0
+ .if (\bytepos < \size) && (\mask >> 8)
+ .if (\mask & 0xff)
+ .error "Mask exceeds byte boundary"
+ .endif
+ TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
+ .exitm
+ .endif
+ .ifeq \mask
+ .error "Mask must not be zero"
+ .endif
+ off = \size - \bytepos - 1
+ tm off+\addr, \mask
+ .endm
+
+ .macro BPOFF
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
+ .endm
+
+ .macro BPON
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
- .macro ADD64 high,low,timer
- al \high,\timer
- al \low,4+\timer
- brc 12,.+8
- ahi \high,1
+ .macro BPENTER tif_ptr,tif_mask
+ ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
+ "j .+12; nop; nop", ALT_SPEC(82)
.endm
- .macro SUB64 high,low,timer
- sl \high,\timer
- sl \low,4+\timer
- brc 3,.+8
- ahi \high,-1
+ .macro BPEXIT tif_ptr,tif_mask
+ TSTMSK \tif_ptr,\tif_mask
+ ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
+ "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
- .macro UPDATE_VTIME high,low,enter_timer
- lm \high,\low,__LC_EXIT_TIMER
- SUB64 \high,\low,\enter_timer
- ADD64 \high,\low,__LC_USER_TIMER
- stm \high,\low,__LC_USER_TIMER
- lm \high,\low,__LC_LAST_UPDATE_TIMER
- SUB64 \high,\low,__LC_EXIT_TIMER
- ADD64 \high,\low,__LC_SYSTEM_TIMER
- stm \high,\low,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
+#if IS_ENABLED(CONFIG_KVM)
+ .macro SIEEXIT sie_control,lowcore
+ lg %r9,\sie_control # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
+ lg %r9,__LC_CURRENT(\lowcore)
+ mvi __TI_sie(%r9),0
+ larl %r9,sie_exit # skip forward to sie_exit
.endm
+#endif
- .macro REENABLE_IRQS
- st %r8,__LC_RETURN_PSW
- ni __LC_RETURN_PSW,0xbf
- ssm __LC_RETURN_PSW
+ .macro STACKLEAK_ERASE
+#ifdef CONFIG_KSTACK_ERASE
+ brasl %r14,stackleak_erase_on_task_stack
+#endif
.endm
+ GEN_BR_THUNK %r14
+
.section .kprobes.text, "ax"
+.Ldummy:
+ /*
+ * The following nop exists only in order to avoid that the next
+ * symbol starts at the beginning of the kprobes text section.
+ * In that case there would be several symbols at the same address.
+ * E.g. objdump would take an arbitrary symbol when disassembling
+ * the code.
+ * With the added nop in between this cannot happen.
+ */
+ nop 0
/*
- * Scheduler resume function, called by switch_to
- * gpr2 = (task_struct *) prev
- * gpr3 = (task_struct *) next
+ * Scheduler resume function, called by __switch_to
+ * gpr2 = (task_struct *)prev
+ * gpr3 = (task_struct *)next
* Returns:
* gpr2 = prev
*/
-ENTRY(__switch_to)
- stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
- st %r15,__THREAD_ksp(%r2) # store kernel stack of prev
- l %r4,__THREAD_info(%r2) # get thread_info of prev
- l %r5,__THREAD_info(%r3) # get thread_info of next
- lr %r15,%r5
- ahi %r15,STACK_INIT # end of kernel stack of next
- st %r3,__LC_CURRENT # store task struct of next
- st %r5,__LC_THREAD_INFO # store thread info of next
- st %r15,__LC_KERNEL_STACK # store end of kernel stack
- lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
- mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
- l %r15,__THREAD_ksp(%r3) # load kernel stack of next
- tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending?
- jz 0f
- ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
- oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next
-0: lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- br %r14
+SYM_FUNC_START(__switch_to_asm)
+ stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
+ lghi %r4,__TASK_stack
+ lghi %r1,__TASK_thread
+ llill %r5,STACK_INIT_OFFSET
+ stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
+ lg %r15,0(%r4,%r3) # start of kernel stack of next
+ agr %r15,%r5 # end of kernel stack of next
+ GET_LC %r13
+ stg %r3,__LC_CURRENT(%r13) # store task struct of next
+ stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
+ lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
+ lay %r4,__TASK_pid(%r3)
+ mvc __LC_CURRENT_PID(4,%r13),0(%r4) # store pid of next
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
+#ifdef CONFIG_STACKPROTECTOR
+ lg %r3,__TASK_stack_canary(%r3)
+ stg %r3,__LC_STACK_CANARY(%r13)
+#endif
+ lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
+ BR_EX %r14
+SYM_FUNC_END(__switch_to_asm)
-__critical_start:
+#if IS_ENABLED(CONFIG_KVM)
/*
- * SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
+ * __sie64a calling convention:
+ * %r2 pointer to sie control block phys
+ * %r3 pointer to sie control block virt
+ * %r4 guest register save area
+ * %r5 guest asce
*/
+SYM_FUNC_START(__sie64a)
+ stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ GET_LC %r13
+ lg %r14,__LC_CURRENT(%r13)
+ stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
+ stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
+ stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
+ xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
+ lmg %r0,%r13,0(%r4) # load guest gprs 0-13
+ mvi __TI_sie(%r14),1
+ stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts
+ lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),3 # last exit...
+ jnz .Lsie_skip
+ lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr
+ BPEXIT __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+.Lsie_entry:
+ sie 0(%r14)
+# Let the next instruction be NOP to avoid triggering a machine check
+# and handling it in a guest as result of the instruction execution.
+ nopr 7
+.Lsie_leave:
+ BPOFF
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+.Lsie_skip:
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ GET_LC %r14
+ lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
+ lg %r14,__LC_CURRENT(%r14)
+ mvi __TI_sie(%r14),0
+SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
+ stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts
+ lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ xgr %r0,%r0 # clear guest registers to
+ xgr %r1,%r1 # prevent speculative use
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
+ lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
+ BR_EX %r14
+SYM_FUNC_END(__sie64a)
+EXPORT_SYMBOL(__sie64a)
+EXPORT_SYMBOL(sie_exit)
+#endif
-ENTRY(system_call)
- stpt __LC_SYNC_ENTER_TIMER
-sysc_stm:
- stm %r8,%r15,__LC_SAVE_AREA_SYNC
- l %r12,__LC_THREAD_INFO
- l %r13,__LC_SVC_NEW_PSW+4
-sysc_per:
- l %r15,__LC_KERNEL_STACK
- la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
-sysc_vtime:
- UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
- stm %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
- mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
-sysc_do_svc:
- oi __TI_flags+3(%r12),_TIF_SYSCALL
- l %r10,__TI_sysc_table(%r12) # 31 bit system call table
- lh %r8,__PT_INT_CODE+2(%r11)
- sla %r8,2 # shift and test for svc0
- jnz sysc_nr_ok
- # svc 0: system call number in %r1
- cl %r1,BASED(.Lnr_syscalls)
- jnl sysc_nr_ok
- sth %r1,__PT_INT_CODE+2(%r11)
- lr %r8,%r1
- sla %r8,2
-sysc_nr_ok:
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
- st %r2,__PT_ORIG_GPR2(%r11)
- st %r7,STACK_FRAME_OVERHEAD(%r15)
- l %r9,0(%r8,%r10) # get system call addr.
- tm __TI_flags+2(%r12),_TIF_TRACE >> 8
- jnz sysc_tracesys
- basr %r14,%r9 # call sys_xxxx
- st %r2,__PT_R2(%r11) # store return value
-
-sysc_return:
- LOCKDEP_SYS_EXIT
-sysc_tif:
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno sysc_restore
- tm __TI_flags+3(%r12),_TIF_WORK_SVC
- jnz sysc_work # check for work
- ni __TI_flags+3(%r12),255-_TIF_SYSCALL
-sysc_restore:
- mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
- stpt __LC_EXIT_TIMER
- lm %r0,%r15,__PT_R0(%r11)
- lpsw __LC_RETURN_PSW
-sysc_done:
-
-#
-# One of the work bits is on. Find out which one.
-#
-sysc_work:
- tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
- jo sysc_mcck_pending
- tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
- jo sysc_reschedule
- tm __TI_flags+3(%r12),_TIF_PER_TRAP
- jo sysc_singlestep
- tm __TI_flags+3(%r12),_TIF_SIGPENDING
- jo sysc_sigpending
- tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
- jo sysc_notify_resume
- j sysc_return # beware of critical section cleanup
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-sysc_reschedule:
- l %r1,BASED(.Lschedule)
- la %r14,BASED(sysc_return)
- br %r1 # call schedule
-
-#
-# _TIF_MCCK_PENDING is set, call handler
-#
-sysc_mcck_pending:
- l %r1,BASED(.Lhandle_mcck)
- la %r14,BASED(sysc_return)
- br %r1 # TIF bit will be cleared by handler
-
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-sysc_sigpending:
- lr %r2,%r11 # pass pointer to pt_regs
- l %r1,BASED(.Ldo_signal)
- basr %r14,%r1 # call do_signal
- tm __TI_flags+3(%r12),_TIF_SYSCALL
- jno sysc_return
- lm %r2,%r7,__PT_R2(%r11) # load svc arguments
- xr %r8,%r8 # svc 0 returns -ENOSYS
- clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2)
- jnl sysc_nr_ok # invalid svc number -> do svc 0
- lh %r8,__PT_INT_CODE+2(%r11) # load new svc number
- sla %r8,2
- j sysc_nr_ok # restart svc
-
-#
-# _TIF_NOTIFY_RESUME is set, call do_notify_resume
-#
-sysc_notify_resume:
- lr %r2,%r11 # pass pointer to pt_regs
- l %r1,BASED(.Ldo_notify_resume)
- la %r14,BASED(sysc_return)
- br %r1 # call do_notify_resume
-
-#
-# _TIF_PER_TRAP is set, call do_per_trap
-#
-sysc_singlestep:
- ni __TI_flags+3(%r12),255-_TIF_PER_TRAP
- lr %r2,%r11 # pass pointer to pt_regs
- l %r1,BASED(.Ldo_per_trap)
- la %r14,BASED(sysc_return)
- br %r1 # call do_per_trap
+/*
+ * SVC interrupt handler routine. System calls are synchronous events and
+ * are entered with interrupts disabled.
+ */
-#
-# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
-# and after the system call
-#
-sysc_tracesys:
- l %r1,BASED(.Ltrace_enter)
- lr %r2,%r11 # pass pointer to pt_regs
- la %r3,0
- xr %r0,%r0
- icm %r0,3,__PT_INT_CODE+2(%r11)
- st %r0,__PT_R2(%r11)
- basr %r14,%r1 # call do_syscall_trace_enter
- cl %r2,BASED(.Lnr_syscalls)
- jnl sysc_tracenogo
- lr %r8,%r2
- sll %r8,2
- l %r9,0(%r8,%r10)
-sysc_tracego:
- lm %r3,%r7,__PT_R3(%r11)
- st %r7,STACK_FRAME_OVERHEAD(%r15)
- l %r2,__PT_ORIG_GPR2(%r11)
- basr %r14,%r9 # call sys_xxx
- st %r2,__PT_R2(%r11) # store return value
-sysc_tracenogo:
- tm __TI_flags+2(%r12),_TIF_TRACE >> 8
- jz sysc_return
- l %r1,BASED(.Ltrace_exit)
- lr %r2,%r11 # pass pointer to pt_regs
- la %r14,BASED(sysc_return)
- br %r1 # call do_syscall_trace_exit
+SYM_CODE_START(system_call)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ BPOFF
+ lghi %r14,0
+.Lsysc_per:
+ STBEAR __LC_LAST_BREAK(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ # clear user controlled register to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r8,%r8
+ xgr %r9,%r9
+ xgr %r10,%r10
+ xgr %r11,%r11
+ la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
+ mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
+ MBEAR %r2,%r13
+ lgr %r3,%r14
+ brasl %r14,__do_syscall
+ STACKLEAK_ERASE
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ BPON
+ LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(system_call)
#
# a new process exits the kernel with ret_from_fork
#
-ENTRY(ret_from_fork)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- l %r12,__LC_THREAD_INFO
- l %r13,__LC_SVC_NEW_PSW+4
- l %r1,BASED(.Lschedule_tail)
- basr %r14,%r1 # call schedule_tail
- TRACE_IRQS_ON
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
- jne sysc_tracenogo
- # it's a kernel thread
- lm %r9,%r10,__PT_R9(%r11) # load gprs
-ENTRY(kernel_thread_starter)
- la %r2,0(%r10)
- basr %r14,%r9
- j sysc_tracenogo
+SYM_CODE_START(ret_from_fork)
+ lgr %r3,%r11
+ brasl %r14,__ret_from_fork
+ STACKLEAK_ERASE
+ GET_LC %r13
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ BPON
+ LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(ret_from_fork)
/*
* Program check handler routine
*/
-ENTRY(pgm_check_handler)
- stpt __LC_SYNC_ENTER_TIMER
- stm %r8,%r15,__LC_SAVE_AREA_SYNC
- l %r12,__LC_THREAD_INFO
- l %r13,__LC_SVC_NEW_PSW+4
- lm %r8,%r9,__LC_PGM_OLD_PSW
- tmh %r8,0x0001 # test problem state bit
- jnz 1f # -> fault in user space
- tmh %r8,0x4000 # PER bit set in old PSW ?
- jnz 0f # -> enabled, can't be a double fault
- tm __LC_PGM_ILC+3,0x80 # check for per exception
- jnz pgm_svcper # -> single stepped svc
-0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
- ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
- l %r15,__LC_KERNEL_STACK
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
- stm %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
- stm %r8,%r9,__PT_PSW(%r11)
- mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
- mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE
- tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 0f
- l %r1,__TI_task(%r12)
- tmh %r8,0x0001 # kernel per event ?
- jz pgm_kprobe
- oi __TI_flags+3(%r12),_TIF_PER_TRAP
- mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
- mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID
-0: REENABLE_IRQS
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
- l %r1,BASED(.Ljump_table)
- la %r10,0x7f
- n %r10,__PT_INT_CODE(%r11)
- je sysc_return
- sll %r10,2
- l %r1,0(%r10,%r1) # load address of handler routine
- lr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # branch to interrupt-handler
- j sysc_return
-
-#
-# PER event in supervisor state, must be kprobes
-#
-pgm_kprobe:
- REENABLE_IRQS
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
- l %r1,BASED(.Ldo_per_trap)
- lr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # call do_per_trap
- j sysc_return
+SYM_CODE_START(pgm_check_handler)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ BPOFF
+ lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
+ xgr %r10,%r10
+ tmhh %r8,0x0001 # coming from user space?
+ jo 3f # -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+ lg %r11,__LC_CURRENT(%r13)
+ tm __TI_sie(%r11),0xff
+ jz 1f
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
+ lghi %r10,_PIF_GUEST_FAULT
+#endif
+1: tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 2f # -> enabled, can't be a double fault
+ tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
+ jnz .Lpgm_svcper # -> single stepped svc
+2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ # CHECK_VMAP_STACK branches to stack_invalid or 4f
+ CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
+3: lg %r15,__LC_KERNEL_STACK(%r13)
+4: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stg %r10,__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r12,%r12
+ lgr %r2,%r11
+ brasl %r14,__do_pgm_check
+ tmhh %r8,0x0001 # returning to user space?
+ jno .Lpgm_exit_kernel
+ STACKLEAK_ERASE
+ BPON
+ stpt __LC_EXIT_TIMER(%r13)
+.Lpgm_exit_kernel:
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
#
# single stepped system call
#
-pgm_svcper:
- oi __TI_flags+3(%r12),_TIF_PER_TRAP
- mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW
- mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per)
- lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs
+.Lpgm_svcper:
+ mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
+ larl %r14,.Lsysc_per
+ stg %r14,__LC_RETURN_PSW+8(%r13)
+ lghi %r14,1
+ LBEAR __LC_PGM_LAST_BREAK(%r13)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
+SYM_CODE_END(pgm_check_handler)
/*
- * IO interrupt handler routine
+ * Interrupt handler macro used for external and IO interrupts.
*/
-
-ENTRY(io_int_handler)
- stck __LC_INT_CLOCK
- stpt __LC_ASYNC_ENTER_TIMER
- stm %r8,%r15,__LC_SAVE_AREA_ASYNC
- l %r12,__LC_THREAD_INFO
- l %r13,__LC_SVC_NEW_PSW+4
- lm %r8,%r9,__LC_IO_OLD_PSW
- tmh %r8,0x0001 # interrupting from user ?
- jz io_skip
- UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
-io_skip:
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- stm %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
- stm %r8,%r9,__PT_PSW(%r11)
- mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
- TRACE_IRQS_OFF
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-io_loop:
- l %r1,BASED(.Ldo_IRQ)
- lr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # call do_IRQ
- tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR
- jz io_return
- tpi 0
- jz io_return
- mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
- j io_loop
-io_return:
- LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON
-io_tif:
- tm __TI_flags+3(%r12),_TIF_WORK_INT
- jnz io_work # there is work to do (signals etc.)
-io_restore:
- mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
- stpt __LC_EXIT_TIMER
- lm %r0,%r15,__PT_R0(%r11)
- lpsw __LC_RETURN_PSW
-io_done:
-
-#
-# There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK_INT work
-# 2) if we return to kernel code and preemptive scheduling is enabled check
-# the preemption counter and if it is zero call preempt_schedule_irq
-# Before any work can be done, a switch to the kernel stack is required.
-#
-io_work:
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jo io_work_user # yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
- # check for preemptive scheduling
- icm %r0,15,__TI_precount(%r12)
- jnz io_restore # preemption disabled
- tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
- jno io_restore
- # switch to kernel stack
- l %r1,__PT_R15(%r11)
- ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
- xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
- la %r11,STACK_FRAME_OVERHEAD(%r1)
- lr %r15,%r1
- # TRACE_IRQS_ON already done at io_return, call
- # TRACE_IRQS_OFF to keep things symmetrical
- TRACE_IRQS_OFF
- l %r1,BASED(.Lpreempt_irq)
- basr %r14,%r1 # call preempt_schedule_irq
- j io_return
-#else
- j io_restore
+.macro INT_HANDLER name,lc_old_psw,handler
+SYM_CODE_START(\name)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stckf __LC_INT_CLOCK(%r13)
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ STBEAR __LC_LAST_BREAK(%r13)
+ BPOFF
+ lmg %r8,%r9,\lc_old_psw(%r13)
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz 1f
+#if IS_ENABLED(CONFIG_KVM)
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
+ jz 0f
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
+0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 2f
+1: lg %r15,__LC_KERNEL_STACK(%r13)
+2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ MBEAR %r11,%r13
+ stmg %r8,%r9,__PT_PSW(%r11)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,\handler
+ mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
+ tmhh %r8,0x0001 # returning to user ?
+ jno 2f
+ STACKLEAK_ERASE
+ BPON
+ stpt __LC_EXIT_TIMER(%r13)
+2: LBEAR __PT_LAST_BREAK(%r11)
+ lmg %r0,%r15,__PT_R0(%r11)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(\name)
+.endm
+
+ .section .irqentry.text, "ax"
+
+INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
+INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
-#
-# Need to do work before returning to userspace, switch to kernel stack
-#
-io_work_user:
- l %r1,__LC_KERNEL_STACK
- mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
- xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
- la %r11,STACK_FRAME_OVERHEAD(%r1)
- lr %r15,%r1
-
-#
-# One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
-# and _TIF_MCCK_PENDING
-#
-io_work_tif:
- tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
- jo io_mcck_pending
- tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
- jo io_reschedule
- tm __TI_flags+3(%r12),_TIF_SIGPENDING
- jo io_sigpending
- tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
- jo io_notify_resume
- j io_return # beware of critical section cleanup
-
-#
-# _TIF_MCCK_PENDING is set, call handler
-#
-io_mcck_pending:
- # TRACE_IRQS_ON already done at io_return
- l %r1,BASED(.Lhandle_mcck)
- basr %r14,%r1 # TIF bit will be cleared by handler
- TRACE_IRQS_OFF
- j io_return
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-io_reschedule:
- # TRACE_IRQS_ON already done at io_return
- l %r1,BASED(.Lschedule)
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- basr %r14,%r1 # call scheduler
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- j io_return
-
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-io_sigpending:
- # TRACE_IRQS_ON already done at io_return
- l %r1,BASED(.Ldo_signal)
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- lr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # call do_signal
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- j io_return
-
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-io_notify_resume:
- # TRACE_IRQS_ON already done at io_return
- l %r1,BASED(.Ldo_notify_resume)
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- lr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # call do_notify_resume
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- j io_return
-
-/*
- * External interrupt handler routine
- */
-
-ENTRY(ext_int_handler)
- stck __LC_INT_CLOCK
- stpt __LC_ASYNC_ENTER_TIMER
- stm %r8,%r15,__LC_SAVE_AREA_ASYNC
- l %r12,__LC_THREAD_INFO
- l %r13,__LC_SVC_NEW_PSW+4
- lm %r8,%r9,__LC_EXT_OLD_PSW
- tmh %r8,0x0001 # interrupting from user ?
- jz ext_skip
- UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
-ext_skip:
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- stm %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
- stm %r8,%r9,__PT_PSW(%r11)
- mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
- mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
- TRACE_IRQS_OFF
- lr %r2,%r11 # pass pointer to pt_regs
- l %r1,BASED(.Ldo_extint)
- basr %r14,%r1 # call do_extint
- j io_return
-
-/*
- * Load idle PSW. The second "half" of this function is in cleanup_idle.
- */
-ENTRY(psw_idle)
- st %r3,__SF_EMPTY(%r15)
- basr %r1,0
- la %r1,psw_idle_lpsw+4-.(%r1)
- st %r1,__SF_EMPTY+4(%r15)
- oi __SF_EMPTY+4(%r15),0x80
- stck __CLOCK_IDLE_ENTER(%r2)
- stpt __TIMER_IDLE_ENTER(%r2)
-psw_idle_lpsw:
- lpsw __SF_EMPTY(%r15)
- br %r14
-psw_idle_end:
-
-__critical_end:
+ .section .kprobes.text, "ax"
/*
* Machine check handler routines
*/
-
-ENTRY(mcck_int_handler)
- stck __LC_MCCK_CLOCK
- spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
- lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
- l %r12,__LC_THREAD_INFO
- l %r13,__LC_SVC_NEW_PSW+4
- lm %r8,%r9,__LC_MCK_OLD_PSW
- tm __LC_MCCK_CODE,0x80 # system damage?
- jo mcck_panic # yes -> rest of mcck code invalid
- la %r14,__LC_CPU_TIMER_SAVE_AREA
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid?
+SYM_CODE_START(mcck_int_handler)
+ BPOFF
+ GET_LC %r13
+ lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
+ jo .Lmcck_panic # yes -> rest of mcck code invalid
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
+ jno .Lmcck_panic # control registers invalid -> panic
+ ptlb
+ lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
jo 3f
- la %r14,__LC_SYNC_ENTER_TIMER
- clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
- jl 0f
- la %r14,__LC_ASYNC_ENTER_TIMER
-0: clc 0(8,%r14),__LC_EXIT_TIMER
+ la %r14,__LC_SYS_ENTER_TIMER(%r13)
+ clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
jl 1f
- la %r14,__LC_EXIT_TIMER
-1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_EXIT_TIMER(%r13)
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
jl 2f
- la %r14,__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_LAST_UPDATE_TIMER(%r13)
2: spt 0(%r14)
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
- jno mcck_panic # no -> skip cleanup critical
- tm %r8,0x0001 # interrupting from user ?
- jz mcck_skip
- UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER
-mcck_skip:
- SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT
- stm %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
- stm %r8,%r9,__PT_PSW(%r11)
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
- l %r1,BASED(.Ldo_machine_check)
- lr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # call s390_do_machine_check
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno mcck_return
- l %r1,__LC_KERNEL_STACK # switch to kernel stack
- mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
- xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
+ jno .Lmcck_panic
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz .Lmcck_user
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
+ jno .Lmcck_panic
+#if IS_ENABLED(CONFIG_KVM)
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
+ jz .Lmcck_user
+ # Need to compare the address instead of __TI_SIE flag.
+ # Otherwise there would be a race between setting the flag
+ # and entering SIE (or leaving and clearing the flag). This
+ # would cause machine checks targeted at the guest to be
+ # handled by the host.
+ larl %r14,.Lsie_entry
+ clgrjl %r9,%r14, 4f
+ larl %r14,.Lsie_leave
+ clgrjhe %r9,%r14, 4f
+ lg %r10,__LC_PCPU(%r13)
+ oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
+4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
+#endif
+.Lmcck_user:
+ lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
- lr %r15,%r1
- ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
- jno mcck_return
- TRACE_IRQS_OFF
- l %r1,BASED(.Lhandle_mcck)
- basr %r14,%r1 # call s390_handle_mcck
- TRACE_IRQS_ON
-mcck_return:
- mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
- tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
+ mvc __PT_R0(128,%r11),0(%r14)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
+ stmg %r8,%r9,__PT_PSW(%r11)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,s390_do_machine_check
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
jno 0f
- lm %r0,%r15,__PT_R0(%r11)
- stpt __LC_EXIT_TIMER
- lpsw __LC_RETURN_MCCK_PSW
-0: lm %r0,%r15,__PT_R0(%r11)
- lpsw __LC_RETURN_MCCK_PSW
-
-mcck_panic:
- l %r14,__LC_PANIC_STACK
- slr %r14,%r15
- sra %r14,PAGE_SHIFT
+ BPON
+ stpt __LC_EXIT_TIMER(%r13)
+0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
+ ALT_FACILITY(193)
+ LBEAR 0(%r12)
+ lmg %r11,%r15,__PT_R11(%r11)
+ LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
+
+.Lmcck_panic:
+ /*
+ * Iterate over all possible CPU addresses in the range 0..0xffff
+ * and stop each CPU using signal processor. Use compare and swap
+ * to allow just one CPU-stopper and prevent concurrent CPUs from
+ * stopping each other while leaving the others running.
+ */
+ lhi %r5,0
+ lhi %r6,1
+ larl %r7,stop_lock
+ cs %r5,%r6,0(%r7) # single CPU-stopper only
+ jnz 4f
+ larl %r7,this_cpu
+ stap 0(%r7) # this CPU address
+ lh %r4,0(%r7)
+ nilh %r4,0
+ lhi %r0,1
+ sll %r0,16 # CPU counter
+ lhi %r3,0 # next CPU address
+0: cr %r3,%r4
+ je 2f
+1: sigp %r1,%r3,SIGP_STOP # stop next CPU
+ brc SIGP_CC_BUSY,1b
+2: ahi %r3,1
+ brct %r0,0b
+3: sigp %r1,%r4,SIGP_STOP # stop this CPU
+ brc SIGP_CC_BUSY,3b
+4: j 4b
+SYM_CODE_END(mcck_int_handler)
+
+SYM_CODE_START(restart_int_handler)
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
+ stg %r15,__LC_SAVE_AREA_RESTART
+ TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
- l %r15,__LC_PANIC_STACK
- j mcck_skip
-0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j mcck_skip
-
-#
-# PSW restart interrupt handler
-#
-ENTRY(restart_int_handler)
- st %r15,__LC_SAVE_AREA_RESTART
- l %r15,__LC_RESTART_STACK
- ahi %r15,-__PT_SIZE # create pt_regs on stack
- xc 0(__PT_SIZE,%r15),0(%r15)
- stm %r0,%r14,__PT_R0(%r15)
- mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART
- mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw
- ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ lctlg %c0,%c15,__LC_CREGS_SAVE_AREA
+0: larl %r15,daton_psw
+ lpswe 0(%r15) # turn dat on, keep irqs off
+.Ldaton:
+ GET_LC %r15
+ lg %r15,__LC_RESTART_STACK(%r15)
+ xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ GET_LC %r13
+ mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
+ mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
- l %r1,__LC_RESTART_FN # load fn, parm & source cpu
- l %r2,__LC_RESTART_DATA
- l %r3,__LC_RESTART_SOURCE
- ltr %r3,%r3 # test source cpu address
+ lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA(%r13)
+ lgf %r3,__LC_RESTART_SOURCE(%r13)
+ ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
brc 10,0b # wait for status stored
1: basr %r14,%r1 # call function
stap __SF_EMPTY(%r15) # store cpu address
- lh %r3,__SF_EMPTY(%r15)
+ llgh %r3,__SF_EMPTY(%r15)
2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
brc 2,2b
3: j 3b
+SYM_CODE_END(restart_int_handler)
+
+ __INIT
+SYM_CODE_START(early_pgm_check_handler)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ lgr %r2,%r11
+ brasl %r14,__do_early_pgm_check
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(early_pgm_check_handler)
+ __FINIT
.section .kprobes.text, "ax"
-#ifdef CONFIG_CHECK_STACK
/*
- * The synchronous or the asynchronous stack overflowed. We are dead.
+ * The synchronous or the asynchronous stack pointer is invalid. We are dead.
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
-stack_overflow:
- l %r15,__LC_PANIC_STACK # change to panic stack
+SYM_CODE_START(stack_invalid)
+ GET_LC %r15
+ lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
- stm %r0,%r7,__PT_R0(%r11)
- stm %r8,%r9,__PT_PSW(%r11)
- mvc __PT_R8(32,%r11),0(%r14)
- l %r1,BASED(1f)
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
- lr %r2,%r11 # pass pointer to pt_regs
- br %r1 # branch to kernel_stack_overflow
-1: .long kernel_stack_overflow
-#endif
-
-cleanup_table:
- .long system_call + 0x80000000
- .long sysc_do_svc + 0x80000000
- .long sysc_tif + 0x80000000
- .long sysc_restore + 0x80000000
- .long sysc_done + 0x80000000
- .long io_tif + 0x80000000
- .long io_restore + 0x80000000
- .long io_done + 0x80000000
- .long psw_idle + 0x80000000
- .long psw_idle_end + 0x80000000
-
-cleanup_critical:
- cl %r9,BASED(cleanup_table) # system_call
- jl 0f
- cl %r9,BASED(cleanup_table+4) # sysc_do_svc
- jl cleanup_system_call
- cl %r9,BASED(cleanup_table+8) # sysc_tif
- jl 0f
- cl %r9,BASED(cleanup_table+12) # sysc_restore
- jl cleanup_sysc_tif
- cl %r9,BASED(cleanup_table+16) # sysc_done
- jl cleanup_sysc_restore
- cl %r9,BASED(cleanup_table+20) # io_tif
- jl 0f
- cl %r9,BASED(cleanup_table+24) # io_restore
- jl cleanup_io_tif
- cl %r9,BASED(cleanup_table+28) # io_done
- jl cleanup_io_restore
- cl %r9,BASED(cleanup_table+32) # psw_idle
- jl 0f
- cl %r9,BASED(cleanup_table+36) # psw_idle_end
- jl cleanup_idle
-0: br %r14
-
-cleanup_system_call:
- # check if stpt has been executed
- cl %r9,BASED(cleanup_system_call_insn)
- jh 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
- chi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: # check if stm has been executed
- cl %r9,BASED(cleanup_system_call_insn+4)
- jh 0f
- mvc __LC_SAVE_AREA_SYNC(32),0(%r11)
-0: # set up saved registers r12, and r13
- st %r12,16(%r11) # r12 thread-info pointer
- st %r13,20(%r11) # r13 literal-pool pointer
- # check if the user time calculation has been done
- cl %r9,BASED(cleanup_system_call_insn+8)
- jh 0f
- l %r10,__LC_EXIT_TIMER
- l %r15,__LC_EXIT_TIMER+4
- SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER
- ADD64 %r10,%r15,__LC_USER_TIMER
- st %r10,__LC_USER_TIMER
- st %r15,__LC_USER_TIMER+4
-0: # check if the system time calculation has been done
- cl %r9,BASED(cleanup_system_call_insn+12)
- jh 0f
- l %r10,__LC_LAST_UPDATE_TIMER
- l %r15,__LC_LAST_UPDATE_TIMER+4
- SUB64 %r10,%r15,__LC_EXIT_TIMER
- ADD64 %r10,%r15,__LC_SYSTEM_TIMER
- st %r10,__LC_SYSTEM_TIMER
- st %r15,__LC_SYSTEM_TIMER+4
-0: # update accounting time stamp
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
- # set up saved register 11
- l %r15,__LC_KERNEL_STACK
- la %r9,STACK_FRAME_OVERHEAD(%r15)
- st %r9,12(%r11) # r11 pt_regs pointer
- # fill pt_regs
- mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC
- stm %r0,%r7,__PT_R0(%r9)
- mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
- # setup saved register 15
- st %r15,28(%r11) # r15 stack pointer
- # set new psw address and exit
- l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000
- br %r14
-cleanup_system_call_insn:
- .long system_call + 0x80000000
- .long sysc_stm + 0x80000000
- .long sysc_vtime + 0x80000000 + 36
- .long sysc_vtime + 0x80000000 + 76
-
-cleanup_sysc_tif:
- l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000
- br %r14
-
-cleanup_sysc_restore:
- cl %r9,BASED(cleanup_sysc_restore_insn)
- jhe 0f
- l %r9,12(%r11) # get saved pointer to pt_regs
- mvc __LC_RETURN_PSW(8),__PT_PSW(%r9)
- mvc 0(32,%r11),__PT_R8(%r9)
- lm %r0,%r7,__PT_R0(%r9)
-0: lm %r8,%r9,__LC_RETURN_PSW
- br %r14
-cleanup_sysc_restore_insn:
- .long sysc_done - 4 + 0x80000000
-
-cleanup_io_tif:
- l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000
- br %r14
-
-cleanup_io_restore:
- cl %r9,BASED(cleanup_io_restore_insn)
- jhe 0f
- l %r9,12(%r11) # get saved r11 pointer to pt_regs
- mvc __LC_RETURN_PSW(8),__PT_PSW(%r9)
- mvc 0(32,%r11),__PT_R8(%r9)
- lm %r0,%r7,__PT_R0(%r9)
-0: lm %r8,%r9,__LC_RETURN_PSW
- br %r14
-cleanup_io_restore_insn:
- .long io_done - 4 + 0x80000000
-
-cleanup_idle:
- # copy interrupt clock & cpu timer
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
- chi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0: # check if stck has been executed
- cl %r9,BASED(cleanup_idle_insn)
- jhe 1f
- mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
- mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3)
-1: # account system time going idle
- lm %r9,%r10,__LC_STEAL_TIMER
- ADD64 %r9,%r10,__CLOCK_IDLE_ENTER(%r2)
- SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK
- stm %r9,%r10,__LC_STEAL_TIMER
- mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
- lm %r9,%r10,__LC_SYSTEM_TIMER
- ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER
- SUB64 %r9,%r10,__TIMER_IDLE_ENTER(%r2)
- stm %r9,%r10,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
- # prepare return psw
- n %r8,BASED(cleanup_idle_wait) # clear wait state bit
- l %r9,24(%r11) # return from psw_idle
- br %r14
-cleanup_idle_insn:
- .long psw_idle_lpsw + 0x80000000
-cleanup_idle_wait:
- .long 0xfffdffff
-
-/*
- * Integer constants
- */
- .align 4
-.Lnr_syscalls:
- .long NR_syscalls
-.Lvtimer_max:
- .quad 0x7fffffffffffffff
-
-/*
- * Symbol constants
- */
-.Ldo_machine_check: .long s390_do_machine_check
-.Lhandle_mcck: .long s390_handle_mcck
-.Ldo_IRQ: .long do_IRQ
-.Ldo_extint: .long do_extint
-.Ldo_signal: .long do_signal
-.Ldo_notify_resume: .long do_notify_resume
-.Ldo_per_trap: .long do_per_trap
-.Ljump_table: .long pgm_check_table
-.Lschedule: .long schedule
-#ifdef CONFIG_PREEMPT
-.Lpreempt_irq: .long preempt_schedule_irq
-#endif
-.Ltrace_enter: .long do_syscall_trace_enter
-.Ltrace_exit: .long do_syscall_trace_exit
-.Lschedule_tail: .long schedule_tail
-.Lsysc_per: .long sysc_per + 0x80000000
-#ifdef CONFIG_TRACE_IRQFLAGS
-.Lhardirqs_on: .long trace_hardirqs_on_caller
-.Lhardirqs_off: .long trace_hardirqs_off_caller
-#endif
-#ifdef CONFIG_LOCKDEP
-.Llockdep_sys_exit: .long lockdep_sys_exit
-#endif
-.Lcritical_start: .long __critical_start + 0x80000000
-.Lcritical_length: .long __critical_end - __critical_start
-
- .section .rodata, "a"
-#define SYSCALL(esa,esame,emu) .long esa
- .globl sys_call_table
-sys_call_table:
-#include "syscalls.S"
-#undef SYSCALL
+ stmg %r0,%r7,__PT_R0(%r11)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_R8(64,%r11),0(%r14)
+ GET_LC %r2
+ mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ jg kernel_stack_invalid
+SYM_CODE_END(stack_invalid)
+
+ .section .data, "aw"
+ .balign 4
+SYM_DATA_LOCAL(stop_lock, .long 0)
+SYM_DATA_LOCAL(this_cpu, .short 0)
+ .balign 8
+SYM_DATA_START_LOCAL(daton_psw)
+ .quad PSW_KERNEL_BITS
+ .quad .Ldaton
+SYM_DATA_END(daton_psw)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 3ddbc26d246e..dd55cc6bbc28 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -1,13 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ENTRY_H
#define _ENTRY_H
+#include <linux/percpu.h>
#include <linux/types.h>
#include <linux/signal.h>
+#include <asm/extable.h>
#include <asm/ptrace.h>
-#include <asm/cputime.h>
+#include <asm/idle.h>
extern void *restart_stack;
-extern unsigned long suspend_zero_pages;
void system_call(void);
void pgm_check_handler(void);
@@ -15,68 +17,59 @@ void ext_int_handler(void);
void io_int_handler(void);
void mcck_int_handler(void);
void restart_int_handler(void);
-void restart_call_handler(void);
-void psw_idle(struct s390_idle_data *, unsigned long);
+void early_pgm_check_handler(void);
-asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
-asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+struct task_struct *__switch_to_asm(struct task_struct *prev, struct task_struct *next);
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
+void __do_pgm_check(struct pt_regs *regs);
+void __do_syscall(struct pt_regs *regs, int per_trap);
+void __do_early_pgm_check(struct pt_regs *regs);
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
-void do_asce_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
+void do_secure_storage_violation(struct pt_regs *regs);
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
+void kernel_stack_invalid(struct pt_regs *regs);
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs);
-void addressing_exception(struct pt_regs *regs);
-void data_exception(struct pt_regs *regs);
-void default_trap_handler(struct pt_regs *regs);
-void divide_exception(struct pt_regs *regs);
-void execute_exception(struct pt_regs *regs);
-void hfp_divide_exception(struct pt_regs *regs);
-void hfp_overflow_exception(struct pt_regs *regs);
-void hfp_significance_exception(struct pt_regs *regs);
-void hfp_sqrt_exception(struct pt_regs *regs);
-void hfp_underflow_exception(struct pt_regs *regs);
-void illegal_op(struct pt_regs *regs);
-void operand_exception(struct pt_regs *regs);
-void overflow_exception(struct pt_regs *regs);
-void privileged_op(struct pt_regs *regs);
-void space_switch_exception(struct pt_regs *regs);
-void special_op_exception(struct pt_regs *regs);
-void specification_exception(struct pt_regs *regs);
-void transaction_exception(struct pt_regs *regs);
-void translation_exception(struct pt_regs *regs);
-
-void do_per_trap(struct pt_regs *regs);
-void syscall_trace(struct pt_regs *regs, int entryexit);
-void kernel_stack_overflow(struct pt_regs * regs);
-void do_signal(struct pt_regs *regs);
-void handle_signal32(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
-void do_notify_resume(struct pt_regs *regs);
-
-struct ext_code;
-void do_extint(struct pt_regs *regs);
-void do_restart(void);
+void do_io_irq(struct pt_regs *regs);
+void do_ext_irq(struct pt_regs *regs);
+void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
-
-void __init time_init(void);
+int setup_profiling_timer(unsigned int multiplier);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
struct old_sigaction;
-long sys_mmap2(struct s390_mmap_arg_struct __user *arg);
-long sys_s390_ipc(uint call, int first, unsigned long second,
- unsigned long third, void __user *ptr);
-long sys_s390_personality(unsigned int personality);
-long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
- size_t len, int advice);
-long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
-long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high,
- u32 len_low);
-long sys_sigreturn(void);
long sys_rt_sigreturn(void);
-long sys32_sigreturn(void);
-long sys32_rt_sigreturn(void);
+long sys_sigreturn(void);
+
+long sys_s390_personality(unsigned int personality);
+long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
+long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags);
+
+DECLARE_PER_CPU(u64, mt_cycles[8]);
+
+unsigned long stack_alloc(void);
+void stack_free(unsigned long stack);
+
+extern char kprobes_insn_page[];
+
+extern char _samode31[], _eamode31[];
+extern char _stext_amode31[], _etext_amode31[];
+extern struct exception_table_entry _start_amode31_ex_table[];
+extern struct exception_table_entry _stop_amode31_ex_table[];
+
+#define __amode31_data __section(".amode31.data")
+#define __amode31_ref __section(".amode31.refs")
+extern long _start_amode31_refs[], _end_amode31_refs[];
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
deleted file mode 100644
index 1c039d0c24c7..000000000000
--- a/arch/s390/kernel/entry64.S
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * S390 low-level entry points.
- *
- * Copyright IBM Corp. 1999, 2012
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Hartmut Penner (hp@de.ibm.com),
- * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- * Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/page.h>
-#include <asm/sigp.h>
-
-__PT_R0 = __PT_GPRS
-__PT_R1 = __PT_GPRS + 8
-__PT_R2 = __PT_GPRS + 16
-__PT_R3 = __PT_GPRS + 24
-__PT_R4 = __PT_GPRS + 32
-__PT_R5 = __PT_GPRS + 40
-__PT_R6 = __PT_GPRS + 48
-__PT_R7 = __PT_GPRS + 56
-__PT_R8 = __PT_GPRS + 64
-__PT_R9 = __PT_GPRS + 72
-__PT_R10 = __PT_GPRS + 80
-__PT_R11 = __PT_GPRS + 88
-__PT_R12 = __PT_GPRS + 96
-__PT_R13 = __PT_GPRS + 104
-__PT_R14 = __PT_GPRS + 112
-__PT_R15 = __PT_GPRS + 120
-
-STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
-STACK_SIZE = 1 << STACK_SHIFT
-STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_PER_TRAP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
-_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
- _TIF_SYSCALL_TRACEPOINT)
-
-#define BASED(name) name-system_call(%r13)
-
- .macro TRACE_IRQS_ON
-#ifdef CONFIG_TRACE_IRQFLAGS
- basr %r2,%r0
- brasl %r14,trace_hardirqs_on_caller
-#endif
- .endm
-
- .macro TRACE_IRQS_OFF
-#ifdef CONFIG_TRACE_IRQFLAGS
- basr %r2,%r0
- brasl %r14,trace_hardirqs_off_caller
-#endif
- .endm
-
- .macro LOCKDEP_SYS_EXIT
-#ifdef CONFIG_LOCKDEP
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jz .+10
- brasl %r14,lockdep_sys_exit
-#endif
- .endm
-
- .macro LPP newpp
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
- tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
- jz .+8
- .insn s,0xb2800000,\newpp
-#endif
- .endm
-
- .macro HANDLE_SIE_INTERCEPT scratch,reason
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
- tmhh %r8,0x0001 # interrupting from user ?
- jnz .+62
- lgr \scratch,%r9
- slg \scratch,BASED(.Lsie_critical)
- clg \scratch,BASED(.Lsie_critical_length)
- .if \reason==1
- # Some program interrupts are suppressing (e.g. protection).
- # We must also check the instruction after SIE in that case.
- # do_protection_exception will rewind to rewind_pad
- jh .+42
- .else
- jhe .+42
- .endif
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
- LPP __SF_EMPTY+16(%r15) # set host id
- ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
- larl %r9,sie_exit # skip forward to sie_exit
- mvi __SF_EMPTY+31(%r15),\reason # set exit reason
-#endif
- .endm
-
- .macro CHECK_STACK stacksize,savearea
-#ifdef CONFIG_CHECK_STACK
- tml %r15,\stacksize - CONFIG_STACK_GUARD
- lghi %r14,\savearea
- jz stack_overflow
-#endif
- .endm
-
- .macro SWITCH_ASYNC savearea,stack,shift
- tmhh %r8,0x0001 # interrupting from user ?
- jnz 1f
- lgr %r14,%r9
- slg %r14,BASED(.Lcritical_start)
- clg %r14,BASED(.Lcritical_length)
- jhe 0f
- lghi %r11,\savearea # inside critical section, do cleanup
- brasl %r14,cleanup_critical
- tmhh %r8,0x0001 # retest problem state after cleanup
- jnz 1f
-0: lg %r14,\stack # are we already on the target stack?
- slgr %r14,%r15
- srag %r14,%r14,\shift
- jnz 1f
- CHECK_STACK 1<<\shift,\savearea
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: lg %r15,\stack # load target stack
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
- .endm
-
- .macro UPDATE_VTIME scratch,enter_timer
- lg \scratch,__LC_EXIT_TIMER
- slg \scratch,\enter_timer
- alg \scratch,__LC_USER_TIMER
- stg \scratch,__LC_USER_TIMER
- lg \scratch,__LC_LAST_UPDATE_TIMER
- slg \scratch,__LC_EXIT_TIMER
- alg \scratch,__LC_SYSTEM_TIMER
- stg \scratch,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
- .endm
-
- .macro LAST_BREAK scratch
- srag \scratch,%r10,23
- jz .+10
- stg %r10,__TI_last_break(%r12)
- .endm
-
- .macro REENABLE_IRQS
- stg %r8,__LC_RETURN_PSW
- ni __LC_RETURN_PSW,0xbf
- ssm __LC_RETURN_PSW
- .endm
-
- .macro STCK savearea
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
- .insn s,0xb27c0000,\savearea # store clock fast
-#else
- .insn s,0xb2050000,\savearea # store clock
-#endif
- .endm
-
- .section .kprobes.text, "ax"
-
-/*
- * Scheduler resume function, called by switch_to
- * gpr2 = (task_struct *) prev
- * gpr3 = (task_struct *) next
- * Returns:
- * gpr2 = prev
- */
-ENTRY(__switch_to)
- stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
- stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev
- lg %r4,__THREAD_info(%r2) # get thread_info of prev
- lg %r5,__THREAD_info(%r3) # get thread_info of next
- lgr %r15,%r5
- aghi %r15,STACK_INIT # end of kernel stack of next
- stg %r3,__LC_CURRENT # store task struct of next
- stg %r5,__LC_THREAD_INFO # store thread info of next
- stg %r15,__LC_KERNEL_STACK # store end of kernel stack
- lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
- mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
- lg %r15,__THREAD_ksp(%r3) # load kernel stack of next
- tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending?
- jz 0f
- ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
- oi __TI_flags+7(%r5),_TIF_MCCK_PENDING # set it in next
-0: lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- br %r14
-
-__critical_start:
-/*
- * SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
- */
-
-ENTRY(system_call)
- stpt __LC_SYNC_ENTER_TIMER
-sysc_stmg:
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
-sysc_per:
- lg %r15,__LC_KERNEL_STACK
- la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
-sysc_vtime:
- UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
- LAST_BREAK %r13
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
-sysc_do_svc:
- oi __TI_flags+7(%r12),_TIF_SYSCALL
- lg %r10,__TI_sysc_table(%r12) # address of system call table
- llgh %r8,__PT_INT_CODE+2(%r11)
- slag %r8,%r8,2 # shift and test for svc 0
- jnz sysc_nr_ok
- # svc 0: system call number in %r1
- llgfr %r1,%r1 # clear high word in r1
- cghi %r1,NR_syscalls
- jnl sysc_nr_ok
- sth %r1,__PT_INT_CODE+2(%r11)
- slag %r8,%r1,2
-sysc_nr_ok:
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- stg %r2,__PT_ORIG_GPR2(%r11)
- stg %r7,STACK_FRAME_OVERHEAD(%r15)
- lgf %r9,0(%r8,%r10) # get system call add.
- tm __TI_flags+6(%r12),_TIF_TRACE >> 8
- jnz sysc_tracesys
- basr %r14,%r9 # call sys_xxxx
- stg %r2,__PT_R2(%r11) # store return value
-
-sysc_return:
- LOCKDEP_SYS_EXIT
-sysc_tif:
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno sysc_restore
- tm __TI_flags+7(%r12),_TIF_WORK_SVC
- jnz sysc_work # check for work
- ni __TI_flags+7(%r12),255-_TIF_SYSCALL
-sysc_restore:
- lg %r14,__LC_VDSO_PER_CPU
- lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
- stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
- lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_PSW
-sysc_done:
-
-#
-# One of the work bits is on. Find out which one.
-#
-sysc_work:
- tm __TI_flags+7(%r12),_TIF_MCCK_PENDING
- jo sysc_mcck_pending
- tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
- jo sysc_reschedule
- tm __TI_flags+7(%r12),_TIF_PER_TRAP
- jo sysc_singlestep
- tm __TI_flags+7(%r12),_TIF_SIGPENDING
- jo sysc_sigpending
- tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
- jo sysc_notify_resume
- j sysc_return # beware of critical section cleanup
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-sysc_reschedule:
- larl %r14,sysc_return
- jg schedule
-
-#
-# _TIF_MCCK_PENDING is set, call handler
-#
-sysc_mcck_pending:
- larl %r14,sysc_return
- jg s390_handle_mcck # TIF bit will be cleared by handler
-
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-sysc_sigpending:
- lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,do_signal
- tm __TI_flags+7(%r12),_TIF_SYSCALL
- jno sysc_return
- lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
- lghi %r8,0 # svc 0 returns -ENOSYS
- llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number
- cghi %r1,NR_syscalls
- jnl sysc_nr_ok # invalid svc number -> do svc 0
- slag %r8,%r1,2
- j sysc_nr_ok # restart svc
-
-#
-# _TIF_NOTIFY_RESUME is set, call do_notify_resume
-#
-sysc_notify_resume:
- lgr %r2,%r11 # pass pointer to pt_regs
- larl %r14,sysc_return
- jg do_notify_resume
-
-#
-# _TIF_PER_TRAP is set, call do_per_trap
-#
-sysc_singlestep:
- ni __TI_flags+7(%r12),255-_TIF_PER_TRAP
- lgr %r2,%r11 # pass pointer to pt_regs
- larl %r14,sysc_return
- jg do_per_trap
-
-#
-# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
-# and after the system call
-#
-sysc_tracesys:
- lgr %r2,%r11 # pass pointer to pt_regs
- la %r3,0
- llgh %r0,__PT_INT_CODE+2(%r11)
- stg %r0,__PT_R2(%r11)
- brasl %r14,do_syscall_trace_enter
- lghi %r0,NR_syscalls
- clgr %r0,%r2
- jnh sysc_tracenogo
- sllg %r8,%r2,2
- lgf %r9,0(%r8,%r10)
-sysc_tracego:
- lmg %r3,%r7,__PT_R3(%r11)
- stg %r7,STACK_FRAME_OVERHEAD(%r15)
- lg %r2,__PT_ORIG_GPR2(%r11)
- basr %r14,%r9 # call sys_xxx
- stg %r2,__PT_R2(%r11) # store return value
-sysc_tracenogo:
- tm __TI_flags+6(%r12),_TIF_TRACE >> 8
- jz sysc_return
- lgr %r2,%r11 # pass pointer to pt_regs
- larl %r14,sysc_return
- jg do_syscall_trace_exit
-
-#
-# a new process exits the kernel with ret_from_fork
-#
-ENTRY(ret_from_fork)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- lg %r12,__LC_THREAD_INFO
- brasl %r14,schedule_tail
- TRACE_IRQS_ON
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
- jne sysc_tracenogo
- # it's a kernel thread
- lmg %r9,%r10,__PT_R9(%r11) # load gprs
-ENTRY(kernel_thread_starter)
- la %r2,0(%r10)
- basr %r14,%r9
- j sysc_tracenogo
-
-/*
- * Program check handler routine
- */
-
-ENTRY(pgm_check_handler)
- stpt __LC_SYNC_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
- lmg %r8,%r9,__LC_PGM_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,1
- tmhh %r8,0x0001 # test problem state bit
- jnz 1f # -> fault in user space
- tmhh %r8,0x4000 # PER bit set in old PSW ?
- jnz 0f # -> enabled, can't be a double fault
- tm __LC_PGM_ILC+3,0x80 # check for per exception
- jnz pgm_svcper # -> single stepped svc
-0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
- LAST_BREAK %r14
- lg %r15,__LC_KERNEL_STACK
- lg %r14,__TI_task(%r12)
- lghi %r13,__LC_PGM_TDB
- tm __LC_PGM_ILC+2,0x02 # check for transaction abort
- jz 2f
- mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- stmg %r8,%r9,__PT_PSW(%r11)
- mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
- mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
- stg %r10,__PT_ARGS(%r11)
- tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 0f
- tmhh %r8,0x0001 # kernel per event ?
- jz pgm_kprobe
- oi __TI_flags+7(%r12),_TIF_PER_TRAP
- mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
- mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE
- mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID
-0: REENABLE_IRQS
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- larl %r1,pgm_check_table
- llgh %r10,__PT_INT_CODE+2(%r11)
- nill %r10,0x007f
- sll %r10,2
- je sysc_return
- lgf %r1,0(%r10,%r1) # load address of handler routine
- lgr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # branch to interrupt-handler
- j sysc_return
-
-#
-# PER event in supervisor state, must be kprobes
-#
-pgm_kprobe:
- REENABLE_IRQS
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,do_per_trap
- j sysc_return
-
-#
-# single stepped system call
-#
-pgm_svcper:
- oi __TI_flags+7(%r12),_TIF_PER_TRAP
- mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
- larl %r14,sysc_per
- stg %r14,__LC_RETURN_PSW+8
- lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs
-
-/*
- * IO interrupt handler routine
- */
-ENTRY(io_int_handler)
- STCK __LC_INT_CLOCK
- stpt __LC_ASYNC_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
- lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
- lmg %r8,%r9,__LC_IO_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,2
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- tmhh %r8,0x0001 # interrupting from user?
- jz io_skip
- UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
- LAST_BREAK %r14
-io_skip:
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
- stmg %r8,%r9,__PT_PSW(%r11)
- mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
- TRACE_IRQS_OFF
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-io_loop:
- lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,do_IRQ
- tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
- jz io_return
- tpi 0
- jz io_return
- mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
- j io_loop
-io_return:
- LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON
-io_tif:
- tm __TI_flags+7(%r12),_TIF_WORK_INT
- jnz io_work # there is work to do (signals etc.)
-io_restore:
- lg %r14,__LC_VDSO_PER_CPU
- lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
- stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
- lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_PSW
-io_done:
-
-#
-# There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK_INT work
-# 2) if we return to kernel code and kvm is enabled check if we need to
-# modify the psw to leave SIE
-# 3) if we return to kernel code and preemptive scheduling is enabled check
-# the preemption counter and if it is zero call preempt_schedule_irq
-# Before any work can be done, a switch to the kernel stack is required.
-#
-io_work:
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jo io_work_user # yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
- # check for preemptive scheduling
- icm %r0,15,__TI_precount(%r12)
- jnz io_restore # preemption is disabled
- tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
- jno io_restore
- # switch to kernel stack
- lg %r1,__PT_R15(%r11)
- aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
- xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
- la %r11,STACK_FRAME_OVERHEAD(%r1)
- lgr %r15,%r1
- # TRACE_IRQS_ON already done at io_return, call
- # TRACE_IRQS_OFF to keep things symmetrical
- TRACE_IRQS_OFF
- brasl %r14,preempt_schedule_irq
- j io_return
-#else
- j io_restore
-#endif
-
-#
-# Need to do work before returning to userspace, switch to kernel stack
-#
-io_work_user:
- lg %r1,__LC_KERNEL_STACK
- mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
- xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
- la %r11,STACK_FRAME_OVERHEAD(%r1)
- lgr %r15,%r1
-
-#
-# One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
-# and _TIF_MCCK_PENDING
-#
-io_work_tif:
- tm __TI_flags+7(%r12),_TIF_MCCK_PENDING
- jo io_mcck_pending
- tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
- jo io_reschedule
- tm __TI_flags+7(%r12),_TIF_SIGPENDING
- jo io_sigpending
- tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
- jo io_notify_resume
- j io_return # beware of critical section cleanup
-
-#
-# _TIF_MCCK_PENDING is set, call handler
-#
-io_mcck_pending:
- # TRACE_IRQS_ON already done at io_return
- brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler
- TRACE_IRQS_OFF
- j io_return
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-io_reschedule:
- # TRACE_IRQS_ON already done at io_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- brasl %r14,schedule # call scheduler
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- j io_return
-
-#
-# _TIF_SIGPENDING or is set, call do_signal
-#
-io_sigpending:
- # TRACE_IRQS_ON already done at io_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,do_signal
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- j io_return
-
-#
-# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
-#
-io_notify_resume:
- # TRACE_IRQS_ON already done at io_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
- lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,do_notify_resume
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
- j io_return
-
-/*
- * External interrupt handler routine
- */
-ENTRY(ext_int_handler)
- STCK __LC_INT_CLOCK
- stpt __LC_ASYNC_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
- lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
- lmg %r8,%r9,__LC_EXT_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,3
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- tmhh %r8,0x0001 # interrupting from user ?
- jz ext_skip
- UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
- LAST_BREAK %r14
-ext_skip:
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
- stmg %r8,%r9,__PT_PSW(%r11)
- lghi %r1,__LC_EXT_PARAMS2
- mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
- mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
- mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
- TRACE_IRQS_OFF
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,do_extint
- j io_return
-
-/*
- * Load idle PSW. The second "half" of this function is in cleanup_idle.
- */
-ENTRY(psw_idle)
- stg %r3,__SF_EMPTY(%r15)
- larl %r1,psw_idle_lpsw+4
- stg %r1,__SF_EMPTY+8(%r15)
- STCK __CLOCK_IDLE_ENTER(%r2)
- stpt __TIMER_IDLE_ENTER(%r2)
-psw_idle_lpsw:
- lpswe __SF_EMPTY(%r15)
- br %r14
-psw_idle_end:
-
-__critical_end:
-
-/*
- * Machine check handler routines
- */
-ENTRY(mcck_int_handler)
- STCK __LC_MCCK_CLOCK
- la %r1,4095 # revalidate r1
- spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
- lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
- lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
- lmg %r8,%r9,__LC_MCK_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,4
- tm __LC_MCCK_CODE,0x80 # system damage?
- jo mcck_panic # yes -> rest of mcck code invalid
- lghi %r14,__LC_CPU_TIMER_SAVE_AREA
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid?
- jo 3f
- la %r14,__LC_SYNC_ENTER_TIMER
- clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
- jl 0f
- la %r14,__LC_ASYNC_ENTER_TIMER
-0: clc 0(8,%r14),__LC_EXIT_TIMER
- jl 1f
- la %r14,__LC_EXIT_TIMER
-1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
- jl 2f
- la %r14,__LC_LAST_UPDATE_TIMER
-2: spt 0(%r14)
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
- jno mcck_panic # no -> skip cleanup critical
- SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
- tm %r8,0x0001 # interrupting from user ?
- jz mcck_skip
- UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
- LAST_BREAK %r14
-mcck_skip:
- lghi %r14,__LC_GPREGS_SAVE_AREA+64
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),0(%r14)
- stmg %r8,%r9,__PT_PSW(%r11)
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lgr %r2,%r11 # pass pointer to pt_regs
- brasl %r14,s390_do_machine_check
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno mcck_return
- lg %r1,__LC_KERNEL_STACK # switch to kernel stack
- mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
- xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
- la %r11,STACK_FRAME_OVERHEAD(%r1)
- lgr %r15,%r1
- ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- tm __TI_flags+7(%r12),_TIF_MCCK_PENDING
- jno mcck_return
- TRACE_IRQS_OFF
- brasl %r14,s390_handle_mcck
- TRACE_IRQS_ON
-mcck_return:
- lg %r14,__LC_VDSO_PER_CPU
- lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
- tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
- jno 0f
- stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-0: lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_MCCK_PSW
-
-mcck_panic:
- lg %r14,__LC_PANIC_STACK
- slgr %r14,%r15
- srag %r14,%r14,PAGE_SHIFT
- jz 0f
- lg %r15,__LC_PANIC_STACK
-0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j mcck_skip
-
-#
-# PSW restart interrupt handler
-#
-ENTRY(restart_int_handler)
- stg %r15,__LC_SAVE_AREA_RESTART
- lg %r15,__LC_RESTART_STACK
- aghi %r15,-__PT_SIZE # create pt_regs on stack
- xc 0(__PT_SIZE,%r15),0(%r15)
- stmg %r0,%r14,__PT_R0(%r15)
- mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
- mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
- aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
- xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
- lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
- lg %r2,__LC_RESTART_DATA
- lg %r3,__LC_RESTART_SOURCE
- ltgr %r3,%r3 # test source cpu address
- jm 1f # negative -> skip source stop
-0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
- brc 10,0b # wait for status stored
-1: basr %r14,%r1 # call function
- stap __SF_EMPTY(%r15) # store cpu address
- llgh %r3,__SF_EMPTY(%r15)
-2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
- brc 2,2b
-3: j 3b
-
- .section .kprobes.text, "ax"
-
-#ifdef CONFIG_CHECK_STACK
-/*
- * The synchronous or the asynchronous stack overflowed. We are dead.
- * No need to properly save the registers, we are going to panic anyway.
- * Setup a pt_regs so that show_trace can provide a good call trace.
- */
-stack_overflow:
- lg %r15,__LC_PANIC_STACK # change to panic stack
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- stmg %r0,%r7,__PT_R0(%r11)
- stmg %r8,%r9,__PT_PSW(%r11)
- mvc __PT_R8(64,%r11),0(%r14)
- stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lgr %r2,%r11 # pass pointer to pt_regs
- jg kernel_stack_overflow
-#endif
-
- .align 8
-cleanup_table:
- .quad system_call
- .quad sysc_do_svc
- .quad sysc_tif
- .quad sysc_restore
- .quad sysc_done
- .quad io_tif
- .quad io_restore
- .quad io_done
- .quad psw_idle
- .quad psw_idle_end
-
-cleanup_critical:
- clg %r9,BASED(cleanup_table) # system_call
- jl 0f
- clg %r9,BASED(cleanup_table+8) # sysc_do_svc
- jl cleanup_system_call
- clg %r9,BASED(cleanup_table+16) # sysc_tif
- jl 0f
- clg %r9,BASED(cleanup_table+24) # sysc_restore
- jl cleanup_sysc_tif
- clg %r9,BASED(cleanup_table+32) # sysc_done
- jl cleanup_sysc_restore
- clg %r9,BASED(cleanup_table+40) # io_tif
- jl 0f
- clg %r9,BASED(cleanup_table+48) # io_restore
- jl cleanup_io_tif
- clg %r9,BASED(cleanup_table+56) # io_done
- jl cleanup_io_restore
- clg %r9,BASED(cleanup_table+64) # psw_idle
- jl 0f
- clg %r9,BASED(cleanup_table+72) # psw_idle_end
- jl cleanup_idle
-0: br %r14
-
-
-cleanup_system_call:
- # check if stpt has been executed
- clg %r9,BASED(cleanup_system_call_insn)
- jh 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: # check if stmg has been executed
- clg %r9,BASED(cleanup_system_call_insn+8)
- jh 0f
- mvc __LC_SAVE_AREA_SYNC(64),0(%r11)
-0: # check if base register setup + TIF bit load has been done
- clg %r9,BASED(cleanup_system_call_insn+16)
- jhe 0f
- # set up saved registers r10 and r12
- stg %r10,16(%r11) # r10 last break
- stg %r12,32(%r11) # r12 thread-info pointer
-0: # check if the user time update has been done
- clg %r9,BASED(cleanup_system_call_insn+24)
- jh 0f
- lg %r15,__LC_EXIT_TIMER
- slg %r15,__LC_SYNC_ENTER_TIMER
- alg %r15,__LC_USER_TIMER
- stg %r15,__LC_USER_TIMER
-0: # check if the system time update has been done
- clg %r9,BASED(cleanup_system_call_insn+32)
- jh 0f
- lg %r15,__LC_LAST_UPDATE_TIMER
- slg %r15,__LC_EXIT_TIMER
- alg %r15,__LC_SYSTEM_TIMER
- stg %r15,__LC_SYSTEM_TIMER
-0: # update accounting time stamp
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
- # do LAST_BREAK
- lg %r9,16(%r11)
- srag %r9,%r9,23
- jz 0f
- mvc __TI_last_break(8,%r12),16(%r11)
-0: # set up saved register r11
- lg %r15,__LC_KERNEL_STACK
- la %r9,STACK_FRAME_OVERHEAD(%r15)
- stg %r9,24(%r11) # r11 pt_regs pointer
- # fill pt_regs
- mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
- stmg %r0,%r7,__PT_R0(%r9)
- mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
- # setup saved register r15
- stg %r15,56(%r11) # r15 stack pointer
- # set new psw address and exit
- larl %r9,sysc_do_svc
- br %r14
-cleanup_system_call_insn:
- .quad system_call
- .quad sysc_stmg
- .quad sysc_per
- .quad sysc_vtime+18
- .quad sysc_vtime+42
-
-cleanup_sysc_tif:
- larl %r9,sysc_tif
- br %r14
-
-cleanup_sysc_restore:
- clg %r9,BASED(cleanup_sysc_restore_insn)
- je 0f
- lg %r9,24(%r11) # get saved pointer to pt_regs
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
- mvc 0(64,%r11),__PT_R8(%r9)
- lmg %r0,%r7,__PT_R0(%r9)
-0: lmg %r8,%r9,__LC_RETURN_PSW
- br %r14
-cleanup_sysc_restore_insn:
- .quad sysc_done - 4
-
-cleanup_io_tif:
- larl %r9,io_tif
- br %r14
-
-cleanup_io_restore:
- clg %r9,BASED(cleanup_io_restore_insn)
- je 0f
- lg %r9,24(%r11) # get saved r11 pointer to pt_regs
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
- mvc 0(64,%r11),__PT_R8(%r9)
- lmg %r0,%r7,__PT_R0(%r9)
-0: lmg %r8,%r9,__LC_RETURN_PSW
- br %r14
-cleanup_io_restore_insn:
- .quad io_done - 4
-
-cleanup_idle:
- # copy interrupt clock & cpu timer
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0: # check if stck & stpt have been executed
- clg %r9,BASED(cleanup_idle_insn)
- jhe 1f
- mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
- mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1: # account system time going idle
- lg %r9,__LC_STEAL_TIMER
- alg %r9,__CLOCK_IDLE_ENTER(%r2)
- slg %r9,__LC_LAST_UPDATE_CLOCK
- stg %r9,__LC_STEAL_TIMER
- mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
- lg %r9,__LC_SYSTEM_TIMER
- alg %r9,__LC_LAST_UPDATE_TIMER
- slg %r9,__TIMER_IDLE_ENTER(%r2)
- stg %r9,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
- # prepare return psw
- nihh %r8,0xfffd # clear wait state bit
- lg %r9,48(%r11) # return from psw_idle
- br %r14
-cleanup_idle_insn:
- .quad psw_idle_lpsw
-
-/*
- * Integer constants
- */
- .align 8
-.Lcritical_start:
- .quad __critical_start
-.Lcritical_length:
- .quad __critical_end - __critical_start
-
-
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
- stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- stg %r2,__SF_EMPTY(%r15) # save control block pointer
- stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
- xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
- lmg %r0,%r13,0(%r3) # load guest gprs 0-13
- lg %r14,__LC_GMAP # get gmap pointer
- ltgr %r14,%r14
- jz sie_gmap
- lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
-sie_gmap:
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
- oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
- tm __SIE_PROG20+3(%r14),1 # last exit...
- jnz sie_done
- LPP __SF_EMPTY(%r15) # set guest id
- sie 0(%r14)
-sie_done:
- LPP __SF_EMPTY+16(%r15) # set host id
- ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions beween sie64a and sie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-rewind_pad:
- nop 0
- .globl sie_exit
-sie_exit:
- lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
- br %r14
-sie_fault:
- lghi %r14,-EFAULT
- stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
- j sie_exit
-
- .align 8
-.Lsie_critical:
- .quad sie_gmap
-.Lsie_critical_length:
- .quad sie_done - sie_gmap
-
- EX_TABLE(rewind_pad,sie_fault)
- EX_TABLE(sie_exit,sie_fault)
-#endif
-
- .section .rodata, "a"
-#define SYSCALL(esa,esame,emu) .long esame
- .globl sys_call_table
-sys_call_table:
-#include "syscalls.S"
-#undef SYSCALL
-
-#ifdef CONFIG_COMPAT
-
-#define SYSCALL(esa,esame,emu) .long emu
- .globl sys_call_table_emu
-sys_call_table_emu:
-#include "syscalls.S"
-#undef SYSCALL
-#endif
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
new file mode 100644
index 000000000000..d028b0be5c1d
--- /dev/null
+++ b/arch/s390/kernel/facility.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/export.h>
+#include <asm/facility.h>
+
+unsigned int stfle_size(void)
+{
+ static unsigned int size;
+ unsigned int r;
+ u64 dummy;
+
+ r = READ_ONCE(size);
+ if (!r) {
+ r = __stfle_asm(&dummy, 1) + 1;
+ WRITE_ONCE(size, r);
+ }
+ return r;
+}
+EXPORT_SYMBOL(stfle_size);
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
new file mode 100644
index 000000000000..03a8973aec3c
--- /dev/null
+++ b/arch/s390/kernel/fpu.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+
+void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
+{
+ __vector128 *vxrs = state->vxrs;
+ int mask;
+
+ /*
+ * Limit the save to the FPU/vector registers already
+ * in use by the previous context.
+ */
+ flags &= state->hdr.mask;
+ if (flags & KERNEL_FPC)
+ fpu_stfpc(&state->hdr.fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_LOW)
+ save_fp_regs_vx(vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ vxrs += fpu_vstm(0, 15, vxrs);
+ vxrs += fpu_vstm(16, 31, vxrs);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ vxrs += fpu_vstm(8, 23, vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ vxrs += fpu_vstm(0, 15, vxrs);
+ else if (mask == KERNEL_VXR_V0V7)
+ vxrs += fpu_vstm(0, 7, vxrs);
+ else
+ vxrs += fpu_vstm(8, 15, vxrs);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ vxrs += fpu_vstm(16, 31, vxrs);
+ else if (mask == KERNEL_VXR_V16V23)
+ vxrs += fpu_vstm(16, 23, vxrs);
+ else
+ vxrs += fpu_vstm(24, 31, vxrs);
+ }
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state, int flags)
+{
+ __vector128 *vxrs = state->vxrs;
+ int mask;
+
+ /*
+ * Limit the restore to the FPU/vector registers of the
+ * previous context that have been overwritten by the
+ * current context.
+ */
+ flags &= state->hdr.mask;
+ if (flags & KERNEL_FPC)
+ fpu_lfpc(&state->hdr.fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_LOW)
+ load_fp_regs_vx(vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ vxrs += fpu_vlm(0, 15, vxrs);
+ vxrs += fpu_vlm(16, 31, vxrs);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ vxrs += fpu_vlm(8, 23, vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ vxrs += fpu_vlm(0, 15, vxrs);
+ else if (mask == KERNEL_VXR_V0V7)
+ vxrs += fpu_vlm(0, 7, vxrs);
+ else
+ vxrs += fpu_vlm(8, 15, vxrs);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ vxrs += fpu_vlm(16, 31, vxrs);
+ else if (mask == KERNEL_VXR_V16V23)
+ vxrs += fpu_vlm(16, 23, vxrs);
+ else
+ vxrs += fpu_vlm(24, 31, vxrs);
+ }
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
+
+void load_fpu_state(struct fpu *state, int flags)
+{
+ __vector128 *vxrs = &state->vxrs[0];
+ int mask;
+
+ if (flags & KERNEL_FPC)
+ fpu_lfpc_safe(&state->fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_V0V7)
+ load_fp_regs_vx(state->vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ fpu_vlm(0, 15, &vxrs[0]);
+ fpu_vlm(16, 31, &vxrs[16]);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ fpu_vlm(8, 23, &vxrs[8]);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ fpu_vlm(0, 15, &vxrs[0]);
+ else if (mask == KERNEL_VXR_V0V7)
+ fpu_vlm(0, 7, &vxrs[0]);
+ else
+ fpu_vlm(8, 15, &vxrs[8]);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ fpu_vlm(16, 31, &vxrs[16]);
+ else if (mask == KERNEL_VXR_V16V23)
+ fpu_vlm(16, 23, &vxrs[16]);
+ else
+ fpu_vlm(24, 31, &vxrs[24]);
+ }
+}
+
+void save_fpu_state(struct fpu *state, int flags)
+{
+ __vector128 *vxrs = &state->vxrs[0];
+ int mask;
+
+ if (flags & KERNEL_FPC)
+ fpu_stfpc(&state->fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_LOW)
+ save_fp_regs_vx(state->vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ fpu_vstm(0, 15, &vxrs[0]);
+ fpu_vstm(16, 31, &vxrs[16]);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ fpu_vstm(8, 23, &vxrs[8]);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ fpu_vstm(0, 15, &vxrs[0]);
+ else if (mask == KERNEL_VXR_V0V7)
+ fpu_vstm(0, 7, &vxrs[0]);
+ else
+ fpu_vstm(8, 15, &vxrs[8]);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ fpu_vstm(16, 31, &vxrs[16]);
+ else if (mask == KERNEL_VXR_V16V23)
+ fpu_vstm(16, 23, &vxrs[16]);
+ else
+ fpu_vstm(24, 31, &vxrs[24]);
+ }
+}
+EXPORT_SYMBOL(save_fpu_state);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index e3043aef87a9..e94bb98f5231 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -1,10 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Dynamic function tracer architecture backend.
*
- * Copyright IBM Corp. 2009
+ * Copyright IBM Corp. 2009,2014
*
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
#include <linux/hardirq.h>
@@ -12,182 +12,325 @@
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/kmsan-checks.h>
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
+#include <linux/execmem.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
+#include <asm/text-patching.h>
+#include <asm/cacheflush.h>
+#include <asm/ftrace.lds.h>
+#include <asm/nospec-branch.h>
+#include <asm/set_memory.h>
+#include "entry.h"
+#include "ftrace.h"
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-void ftrace_disable_code(void);
-void ftrace_enable_insn(void);
-
-#ifdef CONFIG_64BIT
-/*
- * The 64-bit mcount code looks like this:
- * stg %r14,8(%r15) # offset 0
- * > larl %r1,<&counter> # offset 6
- * > brasl %r14,_mcount # offset 12
- * lg %r14,8(%r15) # offset 18
- * Total length is 24 bytes. The middle two instructions of the mcount
- * block get overwritten by ftrace_make_nop / ftrace_make_call.
- * The 64-bit enabled ftrace code block looks like this:
- * stg %r14,8(%r15) # offset 0
- * > lg %r1,__LC_FTRACE_FUNC # offset 6
- * > lgr %r0,%r0 # offset 12
- * > basr %r14,%r1 # offset 16
- * lg %r14,8(%15) # offset 18
- * The return points of the mcount/ftrace function have the same offset 18.
- * The 64-bit disable ftrace code block looks like this:
- * stg %r14,8(%r15) # offset 0
- * > jg .+18 # offset 6
- * > lgr %r0,%r0 # offset 12
- * > basr %r14,%r1 # offset 16
- * lg %r14,8(%15) # offset 18
- * The jg instruction branches to offset 24 to skip as many instructions
- * as possible.
- */
-asm(
- " .align 4\n"
- "ftrace_disable_code:\n"
- " jg 0f\n"
- " lgr %r0,%r0\n"
- " basr %r14,%r1\n"
- "0:\n"
- " .align 4\n"
- "ftrace_enable_insn:\n"
- " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n");
-
-#define FTRACE_INSN_SIZE 6
-
-#else /* CONFIG_64BIT */
/*
- * The 31-bit mcount code looks like this:
- * st %r14,4(%r15) # offset 0
- * > bras %r1,0f # offset 4
- * > .long _mcount # offset 8
- * > .long <&counter> # offset 12
- * > 0: l %r14,0(%r1) # offset 16
- * > l %r1,4(%r1) # offset 20
- * basr %r14,%r14 # offset 24
- * l %r14,4(%r15) # offset 26
- * Total length is 30 bytes. The twenty bytes starting from offset 4
- * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call.
- * The 31-bit enabled ftrace code block looks like this:
- * st %r14,4(%r15) # offset 0
- * > l %r14,__LC_FTRACE_FUNC # offset 4
- * > j 0f # offset 8
- * > .fill 12,1,0x07 # offset 12
- * 0: basr %r14,%r14 # offset 24
- * l %r14,4(%r14) # offset 26
- * The return points of the mcount/ftrace function have the same offset 26.
- * The 31-bit disabled ftrace code block looks like this:
- * st %r14,4(%r15) # offset 0
- * > j .+26 # offset 4
- * > j 0f # offset 8
- * > .fill 12,1,0x07 # offset 12
- * 0: basr %r14,%r14 # offset 24
- * l %r14,4(%r14) # offset 26
- * The j instruction branches to offset 30 to skip as many instructions
- * as possible.
+ * To generate function prologue either gcc's hotpatch feature (since gcc 4.8)
+ * or a combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags
+ * (since gcc 9 / clang 10) is used.
+ * In both cases the original and also the disabled function prologue contains
+ * only a single six byte instruction and looks like this:
+ * > brcl 0,0 # offset 0
+ * To enable ftrace the code gets patched like above and afterwards looks
+ * like this:
+ * > brasl %r0,ftrace_caller # offset 0
+ *
+ * The instruction will be patched by ftrace_make_call / ftrace_make_nop.
+ * The ftrace function gets called with a non-standard C function call ABI
+ * where r0 contains the return address. It is also expected that the called
+ * function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
*/
-asm(
- " .align 4\n"
- "ftrace_disable_code:\n"
- " j 1f\n"
- " j 0f\n"
- " .fill 12,1,0x07\n"
- "0: basr %r14,%r14\n"
- "1:\n"
- " .align 4\n"
- "ftrace_enable_insn:\n"
- " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n");
-#define FTRACE_INSN_SIZE 4
+void *ftrace_func __read_mostly = ftrace_stub;
+struct ftrace_insn {
+ u16 opc;
+ s32 disp;
+} __packed;
+
+static const char *ftrace_shared_hotpatch_trampoline(const char **end)
+{
+ const char *tstart, *tend;
+
+ tstart = ftrace_shared_hotpatch_trampoline_br;
+ tend = ftrace_shared_hotpatch_trampoline_br_end;
+#ifdef CONFIG_EXPOLINE
+ if (!nospec_disable) {
+ tstart = ftrace_shared_hotpatch_trampoline_exrl;
+ tend = ftrace_shared_hotpatch_trampoline_exrl_end;
+ }
+#endif /* CONFIG_EXPOLINE */
+ if (end)
+ *end = tend;
+ return tstart;
+}
+
+bool ftrace_need_init_nop(void)
+{
+ return !cpu_has_seq_insn();
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
+ __ftrace_hotpatch_trampolines_start;
+ static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 };
+ static struct ftrace_hotpatch_trampoline *trampoline;
+ struct ftrace_hotpatch_trampoline **next_trampoline;
+ struct ftrace_hotpatch_trampoline *trampolines_end;
+ struct ftrace_hotpatch_trampoline tmp;
+ struct ftrace_insn *insn;
+ struct ftrace_insn old;
+ const char *shared;
+ s32 disp;
+
+ BUILD_BUG_ON(sizeof(struct ftrace_hotpatch_trampoline) !=
+ SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE);
-#endif /* CONFIG_64BIT */
+ next_trampoline = &next_vmlinux_trampoline;
+ trampolines_end = __ftrace_hotpatch_trampolines_end;
+ shared = ftrace_shared_hotpatch_trampoline(NULL);
+#ifdef CONFIG_MODULES
+ if (mod) {
+ next_trampoline = &mod->arch.next_trampoline;
+ trampolines_end = mod->arch.trampolines_end;
+ }
+#endif
+ if (WARN_ON_ONCE(*next_trampoline >= trampolines_end))
+ return -ENOMEM;
+ trampoline = (*next_trampoline)++;
+
+ if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old)))
+ return -EFAULT;
+ /* Check for the compiler-generated fentry nop (brcl 0, .). */
+ if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old))))
+ return -EINVAL;
+
+ /* Generate the trampoline. */
+ tmp.brasl_opc = 0xc015; /* brasl %r1, shared */
+ tmp.brasl_disp = (shared - (const char *)&trampoline->brasl_opc) / 2;
+ tmp.interceptor = FTRACE_ADDR;
+ tmp.rest_of_intercepted_function = rec->ip + sizeof(struct ftrace_insn);
+ s390_kernel_write(trampoline, &tmp, sizeof(tmp));
+
+ /* Generate a jump to the trampoline. */
+ disp = ((char *)trampoline - (char *)rec->ip) / 2;
+ insn = (struct ftrace_insn *)rec->ip;
+ s390_kernel_write(&insn->disp, &disp, sizeof(disp));
+
+ return 0;
+}
+
+static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec)
+{
+ struct ftrace_hotpatch_trampoline *trampoline;
+ struct ftrace_insn insn;
+ s64 disp;
+ u16 opc;
+
+ if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn)))
+ return ERR_PTR(-EFAULT);
+ disp = (s64)insn.disp * 2;
+ trampoline = (void *)(rec->ip + disp);
+ if (get_kernel_nofault(opc, &trampoline->brasl_opc))
+ return ERR_PTR(-EFAULT);
+ if (opc != 0xc015)
+ return ERR_PTR(-EINVAL);
+ return trampoline;
+}
+
+static inline struct ftrace_insn
+ftrace_generate_branch_insn(unsigned long ip, unsigned long target)
+{
+ /* brasl r0,target or brcl 0,0 */
+ return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004,
+ .disp = target ? (target - ip) / 2 : 0 };
+}
+
+static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target,
+ unsigned long target)
+{
+ struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target);
+ struct ftrace_insn new = ftrace_generate_branch_insn(ip, target);
+ struct ftrace_insn old;
+
+ if (!IS_ALIGNED(ip, 8))
+ return -EINVAL;
+ if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old)))
+ return -EFAULT;
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *)ip, &new, sizeof(new));
+ return 0;
+}
+
+static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
+ unsigned long old_addr,
+ unsigned long addr)
+{
+ struct ftrace_hotpatch_trampoline *trampoline;
+ u64 old;
+
+ trampoline = ftrace_get_trampoline(rec);
+ if (IS_ERR(trampoline))
+ return PTR_ERR(trampoline);
+ if (get_kernel_nofault(old, &trampoline->interceptor))
+ return -EFAULT;
+ if (old != old_addr)
+ return -EINVAL;
+ s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
+ return 0;
+}
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
+ else
+ return ftrace_modify_trampoline_call(rec, old_addr, addr);
+}
+
+static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
+{
+ u16 old;
+ u8 op;
+
+ if (get_kernel_nofault(old, addr))
+ return -EFAULT;
+ if (old != expected)
+ return -EINVAL;
+ /* set mask field to all ones or zeroes */
+ op = enable ? 0xf4 : 0x04;
+ s390_kernel_write((char *)addr + 1, &op, sizeof(op));
+ return 0;
+}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- if (probe_kernel_write((void *) rec->ip, ftrace_disable_code,
- MCOUNT_INSN_SIZE))
- return -EPERM;
- return 0;
+ /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, addr, 0);
+ else
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+}
+
+static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ struct ftrace_hotpatch_trampoline *trampoline;
+
+ trampoline = ftrace_get_trampoline(rec);
+ if (IS_ERR(trampoline))
+ return PTR_ERR(trampoline);
+ s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
+ /* Expect brcl 0x0,... */
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn,
- FTRACE_INSN_SIZE))
- return -EPERM;
- return 0;
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, 0, addr);
+ else
+ return ftrace_make_trampoline_call(rec, addr);
}
int ftrace_update_ftrace_func(ftrace_func_t func)
{
+ ftrace_func = func;
return 0;
}
-int __init ftrace_dyn_arch_init(void *data)
+void arch_ftrace_update_code(int command)
{
- *(unsigned long *) data = 0;
- return 0;
+ ftrace_modify_all_code(command);
}
-#endif /* CONFIG_DYNAMIC_FTRACE */
+void ftrace_arch_code_modify_post_process(void)
+{
+ /*
+ * Flush any pre-fetched instructions on all
+ * CPUs to make the new code visible.
+ */
+ text_poke_sync_lock();
+}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * Hook the return address and push it in the stack of return addresses
- * in current thread info.
- */
-unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
- unsigned long ip)
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- struct ftrace_graph_ent trace;
+ unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
+ unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15];
+ if (unlikely(ftrace_graph_is_dead()))
+ return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+ if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs))
+ *parent = (unsigned long)&return_to_handler;
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct kprobe_ctlblk *kcb;
+ struct pt_regs *regs;
+ struct kprobe *p;
+ int bit;
+
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
+ kmsan_unpoison_memory(fregs, ftrace_regs_size());
+ regs = ftrace_get_regs(fregs);
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (!regs || unlikely(!p) || kprobe_disabled(p))
goto out;
- ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
- goto out;
- trace.func = ip;
- /* Only trace if the calling function expects to. */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
+
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
goto out;
}
- parent = (unsigned long) return_to_handler;
-out:
- return parent;
-}
-#ifdef CONFIG_DYNAMIC_FTRACE
-/*
- * Patch the kernel code at ftrace_graph_caller location. The instruction
- * there is branch relative and save to prepare_ftrace_return. To disable
- * the call to prepare_ftrace_return we patch the bras offset to point
- * directly after the instructions. To enable the call we calculate
- * the original offset to prepare_ftrace_return and put it back.
- */
-int ftrace_enable_ftrace_graph_caller(void)
-{
- unsigned short offset;
+ __this_cpu_write(current_kprobe, p);
- offset = ((void *) prepare_ftrace_return -
- (void *) ftrace_graph_caller) / 2;
- return probe_kernel_write(ftrace_graph_caller + 2,
- &offset, sizeof(offset));
-}
+ kcb = get_kprobe_ctlblk();
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-int ftrace_disable_ftrace_graph_caller(void)
-{
- static unsigned short offset = 0x0002;
+ instruction_pointer_set(regs, ip);
+
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
- return probe_kernel_write(ftrace_graph_caller + 2,
- &offset, sizeof(offset));
+ instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE);
+
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ }
+ __this_cpu_write(current_kprobe, NULL);
+out:
+ ftrace_test_recursion_unlock(bit);
}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ return 0;
+}
+#endif
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
new file mode 100644
index 000000000000..23337065f402
--- /dev/null
+++ b/arch/s390/kernel/ftrace.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FTRACE_H
+#define _FTRACE_H
+
+#include <asm/types.h>
+
+struct ftrace_hotpatch_trampoline {
+ u16 brasl_opc;
+ s32 brasl_disp;
+ s16: 16;
+ u64 rest_of_intercepted_function;
+ u64 interceptor;
+} __packed;
+
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[];
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[];
+extern const char ftrace_shared_hotpatch_trampoline_br[];
+extern const char ftrace_shared_hotpatch_trampoline_br_end[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
+
+#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 000000000000..cf26d7a37425
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void guarded_storage_release(struct task_struct *tsk)
+{
+ kfree(tsk->thread.gs_cb);
+ kfree(tsk->thread.gs_bc_cb);
+}
+
+static int gs_enable(void)
+{
+ struct gs_cb *gs_cb;
+
+ if (!current->thread.gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ gs_cb->gsd = 25;
+ preempt_disable();
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_disable(void)
+{
+ if (current->thread.gs_cb) {
+ preempt_disable();
+ kfree(current->thread.gs_cb);
+ current->thread.gs_cb = NULL;
+ local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ if (!gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ current->thread.gs_bc_cb = gs_cb;
+ }
+ if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+ return -EFAULT;
+ return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ current->thread.gs_bc_cb = NULL;
+ kfree(gs_cb);
+ return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+ struct gs_cb *gs_cb;
+
+ preempt_disable();
+ clear_thread_flag(TIF_GUARDED_STORAGE);
+ gs_cb = current->thread.gs_bc_cb;
+ if (gs_cb) {
+ kfree(current->thread.gs_cb);
+ current->thread.gs_bc_cb = NULL;
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ }
+ preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+ struct task_struct *sibling;
+
+ read_lock(&tasklist_lock);
+ for_each_thread(current, sibling) {
+ if (!sibling->thread.gs_bc_cb)
+ continue;
+ if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+ kick_process(sibling);
+ }
+ read_unlock(&tasklist_lock);
+ return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+ struct gs_cb __user *, gs_cb)
+{
+ if (!cpu_has_gs())
+ return -EOPNOTSUPP;
+ switch (command) {
+ case GS_ENABLE:
+ return gs_enable();
+ case GS_DISABLE:
+ return gs_disable();
+ case GS_SET_BC_CB:
+ return gs_set_bc_cb(gs_cb);
+ case GS_CLEAR_BC_CB:
+ return gs_clear_bc_cb();
+ case GS_BROADCAST:
+ return gs_broadcast();
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index fd8db63dfc94..7edb9ded199c 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -1,501 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright IBM Corp. 1999, 2010
*
- * Author(s): Hartmut Penner <hp@de.ibm.com>
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
- * Rob van der Heij <rvdhei@iae.nl>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- * There are 5 different IPL methods
- * 1) load the image directly into ram at address 0 and do an PSW restart
- * 2) linload will load the image from address 0x10000 to memory 0x10000
- * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated)
- * 3) generate the tape ipl header, store the generated image on a tape
- * and ipl from it
- * In case of SL tape you need to IPL 5 times to get past VOL1 etc
- * 4) generate the vm reader ipl header, move the generated image to the
- * VM reader (use option NOH!) and do a ipl from reader (VM only)
- * 5) direct call of start by the SALIPL loader
- * We use the cpuid to distinguish between VM and native ipl
- * params for kernel are pushed to 0x10400 (see setup.h)
+ * Author(s): Hartmut Penner <hp@de.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Rob van der Heij <rvdhei@iae.nl>
*
*/
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/lowcore.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
-
-#ifdef CONFIG_64BIT
-#define ARCH_OFFSET 4
-#else
-#define ARCH_OFFSET 0
-#endif
+#include <asm/ptrace.h>
__HEAD
-
-#define IPL_BS 0x730
- .org 0
- .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded
- .long 0x02000018,0x60000050 # by ipl to addresses 0-23.
- .long 0x02000068,0x60000050 # (a PSW and two CCWs).
- .fill 80-24,1,0x40 # bytes 24-79 are discarded !!
- .long 0x020000f0,0x60000050 # The next 160 byte are loaded
- .long 0x02000140,0x60000050 # to addresses 0x18-0xb7
- .long 0x02000190,0x60000050 # They form the continuation
- .long 0x020001e0,0x60000050 # of the CCW program started
- .long 0x02000230,0x60000050 # by ipl and load the range
- .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image
- .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730
- .long 0x02000320,0x60000050 # in memory. At the end of
- .long 0x02000370,0x60000050 # the channel program the PSW
- .long 0x020003c0,0x60000050 # at location 0 is loaded.
- .long 0x02000410,0x60000050 # Initial processing starts
- .long 0x02000460,0x60000050 # at 0x200 = iplstart.
- .long 0x020004b0,0x60000050
- .long 0x02000500,0x60000050
- .long 0x02000550,0x60000050
- .long 0x020005a0,0x60000050
- .long 0x020005f0,0x60000050
- .long 0x02000640,0x60000050
- .long 0x02000690,0x60000050
- .long 0x020006e0,0x20000050
-
- .org 0x200
-#
-# subroutine to set architecture mode
-#
-.Lsetmode:
-#ifdef CONFIG_64BIT
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
- bras %r13,0f
- .fill 16,4,0x0
-0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
-#else
- mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
-#endif
- br %r14
-
-#
-# subroutine to wait for end I/O
-#
-.Lirqwait:
-#ifdef CONFIG_64BIT
- mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw
- lpsw .Lwaitpsw
-.Lioint:
- br %r14
- .align 8
-.Lnewpsw:
- .quad 0x0000000080000000,.Lioint
-#else
- mvc 0x78(8),.Lnewpsw # set up IO interrupt psw
- lpsw .Lwaitpsw
-.Lioint:
- br %r14
- .align 8
-.Lnewpsw:
- .long 0x00080000,0x80000000+.Lioint
-#endif
-.Lwaitpsw:
- .long 0x020a0000,0x80000000+.Lioint
-
-#
-# subroutine for loading cards from the reader
-#
-.Lloader:
- la %r4,0(%r14)
- la %r3,.Lorb # r2 = address of orb into r2
- la %r5,.Lirb # r4 = address of irb
- la %r6,.Lccws
- la %r7,20
-.Linit:
- st %r2,4(%r6) # initialize CCW data addresses
- la %r2,0x50(%r2)
- la %r6,8(%r6)
- bct 7,.Linit
-
- lctl %c6,%c6,.Lcr6 # set IO subclass mask
- slr %r2,%r2
-.Lldlp:
- ssch 0(%r3) # load chunk of 1600 bytes
- bnz .Llderr
-.Lwait4irq:
- bas %r14,.Lirqwait
- c %r1,0xb8 # compare subchannel number
- bne .Lwait4irq
- tsch 0(%r5)
-
- slr %r0,%r0
- ic %r0,8(%r5) # get device status
- chi %r0,8 # channel end ?
- be .Lcont
- chi %r0,12 # channel end + device end ?
- be .Lcont
-
- l %r0,4(%r5)
- s %r0,8(%r3) # r0/8 = number of ccws executed
- mhi %r0,10 # *10 = number of bytes in ccws
- lh %r3,10(%r5) # get residual count
- sr %r0,%r3 # #ccws*80-residual=#bytes read
- ar %r2,%r0
-
- br %r4 # r2 contains the total size
-
-.Lcont:
- ahi %r2,0x640 # add 0x640 to total size
- la %r6,.Lccws
- la %r7,20
-.Lincr:
- l %r0,4(%r6) # update CCW data addresses
- ahi %r0,0x640
- st %r0,4(%r6)
- ahi %r6,8
- bct 7,.Lincr
-
- b .Lldlp
-.Llderr:
- lpsw .Lcrash
-
- .align 8
-.Lorb: .long 0x00000000,0x0080ff00,.Lccws
-.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.Lcr6: .long 0xff000000
-.Lloadp:.long 0,0
- .align 8
-.Lcrash:.long 0x000a0000,0x00000000
-
- .align 8
-.Lccws: .rept 19
- .long 0x02600050,0x00000000
- .endr
- .long 0x02200050,0x00000000
-
-iplstart:
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
- lh %r1,0xb8 # test if subchannel number
- bct %r1,.Lnoload # is valid
- l %r1,0xb8 # load ipl subchannel number
- la %r2,IPL_BS # load start address
- bas %r14,.Lloader # load rest of ipl image
- l %r12,.Lparm # pointer to parameter area
- st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
-
-#
-# load parameter file from ipl device
-#
-.Lagain1:
- l %r2,.Linitrd # ramdisk loc. is temp
- bas %r14,.Lloader # load parameter file
- ltr %r2,%r2 # got anything ?
- bz .Lnopf
- chi %r2,895
- bnh .Lnotrunc
- la %r2,895
-.Lnotrunc:
- l %r4,.Linitrd
- clc 0(3,%r4),.L_hdr # if it is HDRx
- bz .Lagain1 # skip dataset header
- clc 0(3,%r4),.L_eof # if it is EOFx
- bz .Lagain1 # skip dateset trailer
- la %r5,0(%r4,%r2)
- lr %r3,%r2
- la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line
- mvc 0(256,%r3),0(%r4)
- mvc 256(256,%r3),256(%r4)
- mvc 512(256,%r3),512(%r4)
- mvc 768(122,%r3),768(%r4)
- slr %r0,%r0
- b .Lcntlp
-.Ldelspc:
- ic %r0,0(%r2,%r3)
- chi %r0,0x20 # is it a space ?
- be .Lcntlp
- ahi %r2,1
- b .Leolp
-.Lcntlp:
- brct %r2,.Ldelspc
-.Leolp:
- slr %r0,%r0
- stc %r0,0(%r2,%r3) # terminate buffer
-.Lnopf:
-
-#
-# load ramdisk from ipl device
-#
-.Lagain2:
- l %r2,.Linitrd # addr of ramdisk
- st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
- bas %r14,.Lloader # load ramdisk
- st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
- ltr %r2,%r2
- bnz .Lrdcont
- st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
-.Lrdcont:
- l %r2,.Linitrd
-
- clc 0(3,%r2),.L_hdr # skip HDRx and EOFx
- bz .Lagain2
- clc 0(3,%r2),.L_eof
- bz .Lagain2
-
-#
-# reset files in VM reader
-#
- stidp .Lcpuid # store cpuid
- tm .Lcpuid,0xff # running VM ?
- bno .Lnoreset
- la %r2,.Lreset
- lhi %r3,26
- diag %r2,%r3,8
- la %r5,.Lirb
- stsch 0(%r5) # check if irq is pending
- tm 30(%r5),0x0f # by verifying if any of the
- bnz .Lwaitforirq # activity or status control
- tm 31(%r5),0xff # bits is set in the schib
- bz .Lnoreset
-.Lwaitforirq:
- bas %r14,.Lirqwait # wait for IO interrupt
- c %r1,0xb8 # compare subchannel number
- bne .Lwaitforirq
- la %r5,.Lirb
- tsch 0(%r5)
-.Lnoreset:
- b .Lnoload
-
-#
-# everything loaded, go for it
-#
-.Lnoload:
- l %r1,.Lstartup
- br %r1
-
-.Linitrd:.long _end # default address of initrd
-.Lparm: .long PARMAREA
-.Lstartup: .long startup
-.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
- .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
- .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold"
-.L_eof: .long 0xc5d6c600 /* C'EOF' */
-.L_hdr: .long 0xc8c4d900 /* C'HDR' */
- .align 8
-.Lcpuid:.fill 8,1,0
-
+SYM_CODE_START(startup_continue)
#
-# SALIPL loader support. Based on a patch by Rob van der Heij.
-# This entry point is called directly from the SALIPL loader and
-# doesn't need a builtin ipl record.
-#
- .org 0x800
-ENTRY(start)
- stm %r0,%r15,0x07b0 # store registers
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
- basr %r12,%r0
-.base:
- l %r11,.parm
- l %r8,.cmd # pointer to command buffer
-
- ltr %r9,%r9 # do we have SALIPL parameters?
- bp .sk8x8
-
- mvc 0(64,%r8),0x00b0 # copy saved registers
- xc 64(240-64,%r8),0(%r8) # remainder of buffer
- tr 0(64,%r8),.lowcase
- b .gotr
-.sk8x8:
- mvc 0(240,%r8),0(%r9) # copy iplparms into buffer
-.gotr:
- slr %r0,%r0
- st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
- st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
- j startup # continue with startup
-.cmd: .long COMMAND_LINE # address of command line buffer
-.parm: .long PARMAREA
-.lowcase:
- .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
- .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
- .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17
- .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
- .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27
- .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
- .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37
- .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
- .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47
- .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f
- .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57
- .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f
- .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67
- .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f
- .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77
- .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f
-
- .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87
- .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f
- .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97
- .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f
- .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7
- .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf
- .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7
- .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf
- .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg
- .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi
- .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop
- .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr
- .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx
- .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz
- .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
- .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
-
+# Setup stack
#
-# startup-code at 0x10000, running in absolute addressing mode
-# this is called either by the ipl loader or directly by PSW restart
-# or linload or SALIPL
+ GET_LC %r2
+ larl %r14,init_task
+ stg %r14,__LC_CURRENT(%r2)
+ larl %r15,init_thread_union+STACK_INIT_OFFSET
+ stg %r15,__LC_KERNEL_STACK(%r2)
+ brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
+ brasl %r14,startup_init # s390 specific early init
+ brasl %r14,start_kernel # common init code
#
- .org 0x10000
-ENTRY(startup)
- j .Lep_startup_normal
- .org 0x10008
+# We returned from start_kernel ?!? PANIK
#
-# This is a list of s390 kernel entry points. At address 0x1000f the number of
-# valid entry points is stored.
-#
-# IMPORTANT: Do not change this table, it is s390 kernel ABI!
-#
- .ascii "S390EP"
- .byte 0x00,0x01
-#
-# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
-#
- .org 0x10010
-ENTRY(startup_kdump)
- j .Lep_startup_kdump
-.Lep_startup_normal:
-#ifdef CONFIG_64BIT
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
- bras %r13,0f
- .fill 16,4,0x0
-0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
-#else
- mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
-#endif
- basr %r13,0 # get base
-.LPG0:
- xc 0x200(256),0x200 # partially clear lowcore
- xc 0x300(256),0x300
- xc 0xe00(256),0xe00
- stck __LC_LAST_UPDATE_CLOCK
- spt 6f-.LPG0(%r13)
- mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
- xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
-#ifndef CONFIG_MARCH_G5
- # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
- .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list
- tm __LC_STFL_FAC_LIST,0x01 # stfle available ?
- jz 0f
- la %r0,1
- .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended
- # verify if all required facilities are supported by the machine
-0: la %r1,__LC_STFL_FAC_LIST
- la %r2,3f+8-.LPG0(%r13)
- l %r3,0(%r2)
-1: l %r0,0(%r1)
- n %r0,4(%r2)
- cl %r0,4(%r2)
- jne 2f
- la %r1,4(%r1)
- la %r2,4(%r2)
- ahi %r3,-1
- jnz 1b
- j 4f
-2: l %r15,.Lstack-.LPG0(%r13)
- ahi %r15,-96
- la %r2,.Lals_string-.LPG0(%r13)
- l %r3,.Lsclp_print-.LPG0(%r13)
- basr %r14,%r3
- lpsw 3f-.LPG0(%r13) # machine type not good enough, crash
-.Lals_string:
- .asciz "The Linux kernel requires more recent processor hardware"
-.Lsclp_print:
- .long _sclp_print_early
-.Lstack:
- .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
- .align 16
-3: .long 0x000a0000,0x8badcccc
-
-# List of facilities that are required. If not all facilities are present
-# the kernel will crash. Format is number of facility words with bits set,
-# followed by the facility words.
-
-#if defined(CONFIG_64BIT)
-#if defined(CONFIG_MARCH_ZEC12)
- .long 3, 0xc100efe3, 0xf46ce000, 0x00400000
-#elif defined(CONFIG_MARCH_Z196)
- .long 2, 0xc100efe3, 0xf46c0000
-#elif defined(CONFIG_MARCH_Z10)
- .long 2, 0xc100efe3, 0xf0680000
-#elif defined(CONFIG_MARCH_Z9_109)
- .long 1, 0xc100efc3
-#elif defined(CONFIG_MARCH_Z990)
- .long 1, 0xc0002000
-#elif defined(CONFIG_MARCH_Z900)
- .long 1, 0xc0000000
-#endif
-#else
-#if defined(CONFIG_MARCH_ZEC12)
- .long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z196)
- .long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z10)
- .long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z9_109)
- .long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z990)
- .long 1, 0x80002000
-#elif defined(CONFIG_MARCH_Z900)
- .long 1, 0x80000000
-#endif
-#endif
-4:
-#endif
-
-#ifdef CONFIG_64BIT
- /* Continue with 64bit startup code in head64.S */
- sam64 # switch to 64 bit mode
- jg startup_continue
-#else
- /* Continue with 31bit startup code in head31.S */
- l %r13,5f-.LPG0(%r13)
- b 0(%r13)
- .align 8
-5: .long startup_continue
-#endif
-
- .align 8
-6: .long 0x7fffffff,0xffffffff
-
-#include "head_kdump.S"
-
-#
-# params at 10400 (setup.h)
-#
- .org PARMAREA
- .long 0,0 # IPL_DEVICE
- .long 0,0 # INITRD_START
- .long 0,0 # INITRD_SIZE
- .long 0,0 # OLDMEM_BASE
- .long 0,0 # OLDMEM_SIZE
-
- .org COMMAND_LINE
- .byte "root=/dev/ram0 ro"
- .byte 0
+ basr %r13,0
+ lpswe dw_psw-.(%r13) # load disabled wait psw
+SYM_CODE_END(startup_continue)
- .org 0x11000
+ .balign 16
+SYM_DATA_LOCAL(dw_psw, .quad 0x0002000180000000,0x0000000000000000)
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
deleted file mode 100644
index 9a99856df1c9..000000000000
--- a/arch/s390/kernel/head31.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright IBM Corp. 2005, 2010
- *
- * Author(s): Hartmut Penner <hp@de.ibm.com>
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
- * Rob van der Heij <rvdhei@iae.nl>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-
-__HEAD
-ENTRY(startup_continue)
- basr %r13,0 # get base
-.LPG1:
-
- l %r1,.Lbase_cc-.LPG1(%r13)
- mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
- lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
- l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
- # move IPL device to lowcore
-#
-# Setup stack
-#
- l %r15,.Linittu-.LPG1(%r13)
- st %r15,__LC_THREAD_INFO # cache thread info in lowcore
- mvc __LC_CURRENT(4),__TI_task(%r15)
- ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
- st %r15,__LC_KERNEL_STACK # set end of kernel stack
- ahi %r15,-96
-#
-# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
-# and create a kernel NSS if the SAVESYS= parm is defined
-#
- l %r14,.Lstartup_init-.LPG1(%r13)
- basr %r14,%r14
- lpsw .Lentry-.LPG1(13) # jump to _stext in primary-space,
- # virtual and never return ...
- .align 8
-.Lentry:.long 0x00080000,0x80000000 + _stext
-.Lctl: .long 0x04b50000 # cr0: various things
- .long 0 # cr1: primary space segment table
- .long .Lduct # cr2: dispatchable unit control table
- .long 0 # cr3: instruction authorization
- .long 0 # cr4: instruction authorization
- .long .Lduct # cr5: primary-aste origin
- .long 0 # cr6: I/O interrupts
- .long 0 # cr7: secondary space segment table
- .long 0 # cr8: access registers translation
- .long 0 # cr9: tracing off
- .long 0 # cr10: tracing off
- .long 0 # cr11: tracing off
- .long 0 # cr12: tracing off
- .long 0 # cr13: home space segment table
- .long 0xc0000000 # cr14: machine check handling off
- .long 0 # cr15: linkage stack operations
-.Lmchunk:.long memory_chunk
-.Lbss_bgn: .long __bss_start
-.Lbss_end: .long _end
-.Lparmaddr: .long PARMAREA
-.Linittu: .long init_thread_union
-.Lstartup_init:
- .long startup_init
- .align 64
-.Lduct: .long 0,0,0,0,.Lduald,0,0,0
- .long 0,0,0,0,0,0,0,0
- .align 128
-.Lduald:.rept 8
- .long 0x80000000,0,0,0 # invalid access-list entries
- .endr
-.Lbase_cc:
- .long sched_clock_base_cc
-
-ENTRY(_ehead)
-
- .org 0x100000 - 0x11000 # head.o ends at 0x11000
-#
-# startup-code, running in absolute addressing mode
-#
-ENTRY(_stext)
- basr %r13,0 # get base
-.LPG3:
-# check control registers
- stctl %c0,%c15,0(%r15)
- oi 2(%r15),0x60 # enable sigp emergency & external call
- oi 0(%r15),0x10 # switch on low address protection
- lctl %c0,%c15,0(%r15)
-
-#
- lam 0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess
- l %r14,.Lstart-.LPG3(%r13)
- basr %r14,%r14 # call start_kernel
-#
-# We returned from start_kernel ?!? PANIK
-#
- basr %r13,0
- lpsw .Ldw-.(%r13) # load disabled wait psw
-#
- .align 8
-.Ldw: .long 0x000a0000,0x00000000
-.Lstart:.long start_kernel
-.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
deleted file mode 100644
index b9e25ae2579c..000000000000
--- a/arch/s390/kernel/head64.S
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright IBM Corp. 1999, 2010
- *
- * Author(s): Hartmut Penner <hp@de.ibm.com>
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
- * Rob van der Heij <rvdhei@iae.nl>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-
-__HEAD
-ENTRY(startup_continue)
- larl %r1,sched_clock_base_cc
- mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
- larl %r13,.LPG1 # get base
- lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
- lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
- # move IPL device to lowcore
- lghi %r0,__LC_PASTE
- stg %r0,__LC_VDSO_PER_CPU
-#
-# Setup stack
-#
- larl %r15,init_thread_union
- stg %r15,__LC_THREAD_INFO # cache thread info in lowcore
- lg %r14,__TI_task(%r15) # cache current in lowcore
- stg %r14,__LC_CURRENT
- aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
- stg %r15,__LC_KERNEL_STACK # set end of kernel stack
- aghi %r15,-160
-#
-# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
-# and create a kernel NSS if the SAVESYS= parm is defined
-#
- brasl %r14,startup_init
- lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space,
- # virtual and never return ...
- .align 16
-.LPG1:
-.Lentry:.quad 0x0000000180000000,_stext
-.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
- .quad 0 # cr1: primary space segment table
- .quad .Lduct # cr2: dispatchable unit control table
- .quad 0 # cr3: instruction authorization
- .quad 0 # cr4: instruction authorization
- .quad .Lduct # cr5: primary-aste origin
- .quad 0 # cr6: I/O interrupts
- .quad 0 # cr7: secondary space segment table
- .quad 0 # cr8: access registers translation
- .quad 0 # cr9: tracing off
- .quad 0 # cr10: tracing off
- .quad 0 # cr11: tracing off
- .quad 0 # cr12: tracing off
- .quad 0 # cr13: home space segment table
- .quad 0xc0000000 # cr14: machine check handling off
- .quad 0 # cr15: linkage stack operations
-.Lpcmsk:.quad 0x0000000180000000
-.L4malign:.quad 0xffffffffffc00000
-.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
-.Lnop: .long 0x07000700
-.Lparmaddr:
- .quad PARMAREA
- .align 64
-.Lduct: .long 0,0,0,0,.Lduald,0,0,0
- .long 0,0,0,0,0,0,0,0
- .align 128
-.Lduald:.rept 8
- .long 0x80000000,0,0,0 # invalid access-list entries
- .endr
-
-ENTRY(_ehead)
-
- .org 0x100000 - 0x11000 # head.o ends at 0x11000
-#
-# startup-code, running in absolute addressing mode
-#
-ENTRY(_stext)
- basr %r13,0 # get base
-.LPG3:
-# check control registers
- stctg %c0,%c15,0(%r15)
- oi 6(%r15),0x60 # enable sigp emergency & external call
- oi 4(%r15),0x10 # switch on low address proctection
- lctlg %c0,%c15,0(%r15)
-
- lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess
- brasl %r14,start_kernel # go to C code
-#
-# We returned from start_kernel ?!? PANIK
-#
- basr %r13,0
- lpswe .Ldw-.(%r13) # load disabled wait psw
-
- .align 8
-.Ldw: .quad 0x0002000180000000,0x0000000000000000
-.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S
deleted file mode 100644
index 085a95eb315f..000000000000
--- a/arch/s390/kernel/head_kdump.S
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * S390 kdump lowlevel functions (new kernel)
- *
- * Copyright IBM Corp. 2011
- * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <asm/sigp.h>
-
-#define DATAMOVER_ADDR 0x4000
-#define COPY_PAGE_ADDR 0x6000
-
-#ifdef CONFIG_CRASH_DUMP
-
-#
-# kdump entry (new kernel - not yet relocated)
-#
-# Note: This code has to be position independent
-#
-
-.align 2
-.Lep_startup_kdump:
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode
- sam64 # Switch to 64 bit addressing
- basr %r13,0
-.Lbase:
- larl %r2,.Lbase_addr # Check, if we have been
- lg %r2,0(%r2) # already relocated:
- clgr %r2,%r13 #
- jne .Lrelocate # No : Start data mover
- lghi %r2,0 # Yes: Start kdump kernel
- brasl %r14,startup_kdump_relocated
-
-.Lrelocate:
- larl %r4,startup
- lg %r2,0x418(%r4) # Get kdump base
- lg %r3,0x420(%r4) # Get kdump size
-
- larl %r10,.Lcopy_start # Source of data mover
- lghi %r8,DATAMOVER_ADDR # Target of data mover
- mvc 0(256,%r8),0(%r10) # Copy data mover code
-
- agr %r8,%r2 # Copy data mover to
- mvc 0(256,%r8),0(%r10) # reserved mem
-
- lghi %r14,DATAMOVER_ADDR # Jump to copied data mover
- basr %r14,%r14
-.Lbase_addr:
- .quad .Lbase
-
-#
-# kdump data mover code (runs at address DATAMOVER_ADDR)
-#
-# r2: kdump base address
-# r3: kdump size
-#
-.Lcopy_start:
- basr %r13,0 # Base
-0:
- lgr %r11,%r2 # Save kdump base address
- lgr %r12,%r2
- agr %r12,%r3 # Compute kdump end address
-
- lghi %r5,0
- lghi %r10,COPY_PAGE_ADDR # Load copy page address
-1:
- mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp
- mvc 0(256,%r5),0(%r11) # Copy new kernel to old
- mvc 0(256,%r11),0(%r10) # Copy tmp to new
- aghi %r11,256
- aghi %r5,256
- clgr %r11,%r12
- jl 1b
-
- lg %r14,.Lstartup_kdump-0b(%r13)
- basr %r14,%r14 # Start relocated kernel
-.Lstartup_kdump:
- .long 0x00000000,0x00000000 + startup_kdump_relocated
-.Lcopy_end:
-
-#
-# Startup of kdump (relocated new kernel)
-#
-.align 2
-startup_kdump_relocated:
- basr %r13,0
-0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel...
-.align 8
-.Lrestart_psw:
- .quad 0x0000000080000000,0x0000000000000000 + startup
-#else
-.align 2
-.Lep_startup_kdump:
-#ifdef CONFIG_64BIT
- larl %r13,startup_kdump_crash
- lpswe 0(%r13)
-.align 8
-startup_kdump_crash:
- .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash
-#else
- basr %r13,0
-0: lpsw startup_kdump_crash-0b(%r13)
-.align 8
-startup_kdump_crash:
- .long 0x000a0000,0x00000000 + startup_kdump_crash
-#endif /* CONFIG_64BIT */
-#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
new file mode 100644
index 000000000000..217206522266
--- /dev/null
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "hd: " fmt
+
+/*
+ * Hiperdispatch:
+ * Dynamically calculates the optimum number of high capacity COREs
+ * by considering the state the system is in. When hiperdispatch decides
+ * that a capacity update is necessary, it schedules a topology update.
+ * During topology updates the CPU capacities are always re-adjusted.
+ *
+ * There is two places where CPU capacities are being accessed within
+ * hiperdispatch.
+ * -> hiperdispatch's reoccuring work function reads CPU capacities to
+ * determine high capacity CPU count.
+ * -> during a topology update hiperdispatch's adjustment function
+ * updates CPU capacities.
+ * These two can run on different CPUs in parallel which can cause
+ * hiperdispatch to make wrong decisions. This can potentially cause
+ * some overhead by leading to extra rebuild_sched_domains() calls
+ * for correction. Access to capacities within hiperdispatch has to be
+ * serialized to prevent the overhead.
+ *
+ * Hiperdispatch decision making revolves around steal time.
+ * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time
+ * crosses the threshold value hiperdispatch falls back to giving high
+ * capacities to entitled CPUs. When steal time drops below the
+ * threshold boundary, hiperdispatch utilizes all CPUs by giving all
+ * of them high capacity.
+ *
+ * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread
+ * performance. Comparing the throughput of;
+ * - single CORE, with N threads, running N tasks
+ * - N separate COREs running N tasks,
+ * using individual COREs for individual tasks yield better
+ * performance. This performance difference is roughly ~30% (can change
+ * between machine generations)
+ *
+ * Hiperdispatch tries to hint scheduler to use individual COREs for
+ * each task, as long as steal time on those COREs are less than 30%,
+ * therefore delaying the throughput loss caused by using SMP threads.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kstrtox.h>
+#include <linux/ktime.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <asm/hiperdispatch.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hiperdispatch.h>
+
+#define HD_DELAY_FACTOR (4)
+#define HD_DELAY_INTERVAL (HZ / 4)
+#define HD_STEAL_THRESHOLD 10
+#define HD_STEAL_AVG_WEIGHT 16
+
+static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */
+static cpumask_t hd_vmvl_cpumask; /* Mask containing vertical medium and low CPUs */
+static int hd_high_capacity_cores; /* Current CORE count with high capacity */
+static int hd_entitled_cores; /* Total vertical high and medium CORE count */
+static int hd_online_cores; /* Current online CORE count */
+
+static unsigned long hd_previous_steal; /* Previous iteration's CPU steal timer total */
+static unsigned long hd_high_time; /* Total time spent while all cpus have high capacity */
+static unsigned long hd_low_time; /* Total time spent while vl cpus have low capacity */
+static atomic64_t hd_adjustments; /* Total occurrence count of hiperdispatch adjustments */
+
+static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD;
+static unsigned int hd_delay_factor = HD_DELAY_FACTOR;
+static int hd_enabled;
+
+static void hd_capacity_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
+
+static int hd_set_hiperdispatch_mode(int enable)
+{
+ if (!cpu_has_topology())
+ enable = 0;
+ if (hd_enabled == enable)
+ return 0;
+ hd_enabled = enable;
+ return 1;
+}
+
+void hd_reset_state(void)
+{
+ cpumask_clear(&hd_vl_coremask);
+ cpumask_clear(&hd_vmvl_cpumask);
+ hd_entitled_cores = 0;
+ hd_online_cores = 0;
+}
+
+void hd_add_core(int cpu)
+{
+ const struct cpumask *siblings;
+ int polarization;
+
+ hd_online_cores++;
+ polarization = smp_cpu_get_polarization(cpu);
+ siblings = topology_sibling_cpumask(cpu);
+ switch (polarization) {
+ case POLARIZATION_VH:
+ hd_entitled_cores++;
+ break;
+ case POLARIZATION_VM:
+ hd_entitled_cores++;
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ case POLARIZATION_VL:
+ cpumask_set_cpu(cpu, &hd_vl_coremask);
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ }
+}
+
+/* Serialize update and read operations of debug counters. */
+static DEFINE_MUTEX(hd_counter_mutex);
+
+static void hd_update_times(void)
+{
+ static ktime_t prev;
+ ktime_t now;
+
+ /*
+ * Check if hiperdispatch is active, if not set the prev to 0.
+ * This way it is possible to differentiate the first update iteration after
+ * enabling hiperdispatch.
+ */
+ if (hd_entitled_cores == 0 || hd_enabled == 0) {
+ prev = ktime_set(0, 0);
+ return;
+ }
+ now = ktime_get();
+ if (ktime_after(prev, 0)) {
+ if (hd_high_capacity_cores == hd_online_cores)
+ hd_high_time += ktime_ms_delta(now, prev);
+ else
+ hd_low_time += ktime_ms_delta(now, prev);
+ }
+ prev = now;
+}
+
+static void hd_update_capacities(void)
+{
+ int cpu, upscaling_cores;
+ unsigned long capacity;
+
+ upscaling_cores = hd_high_capacity_cores - hd_entitled_cores;
+ capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW;
+ hd_high_capacity_cores = hd_entitled_cores;
+ for_each_cpu(cpu, &hd_vl_coremask) {
+ smp_set_core_capacity(cpu, capacity);
+ if (capacity != CPU_CAPACITY_HIGH)
+ continue;
+ hd_high_capacity_cores++;
+ upscaling_cores--;
+ if (upscaling_cores == 0)
+ capacity = CPU_CAPACITY_LOW;
+ }
+}
+
+void hd_disable_hiperdispatch(void)
+{
+ cancel_delayed_work_sync(&hd_capacity_work);
+ hd_high_capacity_cores = hd_online_cores;
+ hd_previous_steal = 0;
+}
+
+int hd_enable_hiperdispatch(void)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ mutex_unlock(&hd_counter_mutex);
+ if (hd_enabled == 0)
+ return 0;
+ if (hd_entitled_cores == 0)
+ return 0;
+ if (hd_online_cores <= hd_entitled_cores)
+ return 0;
+ mod_delayed_work(system_dfl_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor);
+ hd_update_capacities();
+ return 1;
+}
+
+static unsigned long hd_steal_avg(unsigned long new)
+{
+ static unsigned long steal;
+
+ steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT;
+ return steal;
+}
+
+static unsigned long hd_calculate_steal_percentage(void)
+{
+ unsigned long time_delta, steal_delta, steal, percentage;
+ static ktime_t prev;
+ int cpus, cpu;
+ ktime_t now;
+
+ cpus = 0;
+ steal = 0;
+ percentage = 0;
+ for_each_cpu(cpu, &hd_vmvl_cpumask) {
+ steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+ cpus++;
+ }
+ /*
+ * If there is no vertical medium and low CPUs steal time
+ * is 0 as vertical high CPUs shouldn't experience steal time.
+ */
+ if (cpus == 0)
+ return percentage;
+ now = ktime_get();
+ time_delta = ktime_to_ns(ktime_sub(now, prev));
+ if (steal > hd_previous_steal && hd_previous_steal != 0) {
+ steal_delta = (steal - hd_previous_steal) * 100 / time_delta;
+ percentage = steal_delta / cpus;
+ }
+ hd_previous_steal = steal;
+ prev = now;
+ return percentage;
+}
+
+static void hd_capacity_work_fn(struct work_struct *work)
+{
+ unsigned long steal_percentage, new_cores;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ /*
+ * If online cores are less or equal to entitled cores hiperdispatch
+ * does not need to make any adjustments, call a topology update to
+ * disable hiperdispatch.
+ * Normally this check is handled on topology update, but during cpu
+ * unhotplug, topology and cpu mask updates are done in reverse
+ * order, causing hd_enable_hiperdispatch() to get stale data.
+ */
+ if (hd_online_cores <= hd_entitled_cores) {
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return;
+ }
+ steal_percentage = hd_steal_avg(hd_calculate_steal_percentage());
+ if (steal_percentage < hd_steal_threshold)
+ new_cores = hd_online_cores;
+ else
+ new_cores = hd_entitled_cores;
+ if (hd_high_capacity_cores != new_cores) {
+ trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores);
+ hd_high_capacity_cores = new_cores;
+ atomic64_inc(&hd_adjustments);
+ topology_schedule_update();
+ }
+ trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores);
+ mutex_unlock(&smp_cpu_state_mutex);
+ schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL);
+}
+
+static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int hiperdispatch;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &hiperdispatch,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ hiperdispatch = hd_enabled;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ mutex_lock(&smp_cpu_state_mutex);
+ if (hd_set_hiperdispatch_mode(hiperdispatch))
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return 0;
+}
+
+static const struct ctl_table hiperdispatch_ctl_table[] = {
+ {
+ .procname = "hiperdispatch",
+ .mode = 0644,
+ .proc_handler = hiperdispatch_ctl_handler,
+ },
+};
+
+static ssize_t hd_steal_threshold_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_steal_threshold);
+}
+
+static ssize_t hd_steal_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val > 100)
+ return -ERANGE;
+ hd_steal_threshold = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_steal_threshold);
+
+static ssize_t hd_delay_factor_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_delay_factor);
+}
+
+static ssize_t hd_delay_factor_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (!val)
+ return -ERANGE;
+ hd_delay_factor = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_delay_factor);
+
+static struct attribute *hd_attrs[] = {
+ &dev_attr_hd_steal_threshold.attr,
+ &dev_attr_hd_delay_factor.attr,
+ NULL,
+};
+
+static const struct attribute_group hd_attr_group = {
+ .name = "hiperdispatch",
+ .attrs = hd_attrs,
+};
+
+static int hd_greedy_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_high_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n");
+
+static int hd_conservative_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_low_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n");
+
+static int hd_adjustment_count_get(void *unused, u64 *val)
+{
+ *val = atomic64_read(&hd_adjustments);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n");
+
+static void __init hd_create_debugfs_counters(void)
+{
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir);
+ debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops);
+ debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops);
+ debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops);
+}
+
+static void __init hd_create_attributes(void)
+{
+ struct device *dev;
+
+ dev = bus_get_dev_root(&cpu_subsys);
+ if (!dev)
+ return;
+ if (sysfs_create_group(&dev->kobj, &hd_attr_group))
+ pr_warn("Unable to create hiperdispatch attribute group\n");
+ put_device(dev);
+}
+
+static int __init hd_init(void)
+{
+ if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) {
+ hd_set_hiperdispatch_mode(1);
+ topology_schedule_update();
+ }
+ if (!register_sysctl("s390", hiperdispatch_ctl_table))
+ pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n");
+ hd_create_debugfs_counters();
+ hd_create_attributes();
+ return 0;
+}
+late_initcall(hd_init);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
new file mode 100644
index 000000000000..39cb8d0ae348
--- /dev/null
+++ b/arch/s390/kernel/idle.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Idle functions for s390.
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <trace/events/power.h>
+#include <asm/cpu_mf.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include "entry.h"
+
+static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void account_idle_time_irq(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ struct lowcore *lc = get_lowcore();
+ unsigned long idle_time;
+ u64 cycles_new[8];
+ int i;
+
+ if (smp_cpu_mtid) {
+ stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
+ for (i = 0; i < smp_cpu_mtid; i++)
+ this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
+ }
+
+ idle_time = lc->int_clock - idle->clock_idle_enter;
+
+ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+ lc->last_update_clock = lc->int_clock;
+
+ lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+ lc->last_update_timer = lc->sys_enter_timer;
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+ WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+ account_idle_time(cputime_to_nsecs(idle_time));
+}
+
+void noinstr arch_cpu_idle(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long psw_mask;
+
+ /* Wait for external, I/O or machine check interrupt. */
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ clear_cpu_flag(CIF_NOHZ_DELAY);
+ set_cpu_flag(CIF_ENABLED_WAIT);
+ if (smp_cpu_mtid)
+ stcctm(MT_DIAG, smp_cpu_mtid, (u64 *)&idle->mt_cycles_enter);
+ idle->clock_idle_enter = get_tod_clock_fast();
+ idle->timer_idle_enter = get_cpu_timer();
+ bpon();
+ __load_psw_mask(psw_mask);
+}
+
+static ssize_t show_idle_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
+}
+DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
+
+static ssize_t show_idle_time(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
+}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+
+void arch_cpu_idle_enter(void)
+{
+}
+
+void arch_cpu_idle_exit(void)
+{
+}
+
+void __noreturn arch_cpu_idle_dead(void)
+{
+ cpu_die();
+}
diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c
new file mode 100644
index 000000000000..f3c3e6e1c5d3
--- /dev/null
+++ b/arch/s390/kernel/ima_arch.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ima.h>
+#include <asm/boot_data.h>
+
+bool arch_ima_get_secureboot(void)
+{
+ return ipl_secure_flag;
+}
+
+const char * const *arch_get_ima_policy(void)
+{
+ return NULL;
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index feb719d3c851..961a3d60a4dd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1,45 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* ipl/reipl/dump support for Linux on s390.
*
* Copyright IBM Corp. 2005, 2012
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
* Volker Sameske <sameske@de.ibm.com>
*/
#include <linux/types.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/init.h>
#include <linux/device.h>
#include <linux/delay.h>
+#include <linux/kstrtox.h>
+#include <linux/panic_notifier.h>
#include <linux/reboot.h>
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/crash_dump.h>
#include <linux/debug_locks.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-extable.h>
+#include <asm/machine.h>
+#include <asm/diag.h>
#include <asm/ipl.h>
#include <asm/smp.h>
#include <asm/setup.h>
#include <asm/cpcmd.h>
-#include <asm/cio.h>
#include <asm/ebcdic.h>
-#include <asm/reset.h>
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
#include "entry.h"
#define IPL_PARM_BLOCK_VERSION 0
#define IPL_UNKNOWN_STR "unknown"
#define IPL_CCW_STR "ccw"
+#define IPL_ECKD_STR "eckd"
+#define IPL_ECKD_DUMP_STR "eckd_dump"
#define IPL_FCP_STR "fcp"
#define IPL_FCP_DUMP_STR "fcp_dump"
+#define IPL_NVME_STR "nvme"
+#define IPL_NVME_DUMP_STR "nvme_dump"
#define IPL_NSS_STR "nss"
#define DUMP_CCW_STR "ccw"
+#define DUMP_ECKD_STR "eckd"
#define DUMP_FCP_STR "fcp"
+#define DUMP_NVME_STR "nvme"
#define DUMP_NONE_STR "none"
/*
@@ -84,12 +98,20 @@ static char *ipl_type_str(enum ipl_type type)
switch (type) {
case IPL_TYPE_CCW:
return IPL_CCW_STR;
+ case IPL_TYPE_ECKD:
+ return IPL_ECKD_STR;
+ case IPL_TYPE_ECKD_DUMP:
+ return IPL_ECKD_DUMP_STR;
case IPL_TYPE_FCP:
return IPL_FCP_STR;
case IPL_TYPE_FCP_DUMP:
return IPL_FCP_DUMP_STR;
case IPL_TYPE_NSS:
return IPL_NSS_STR;
+ case IPL_TYPE_NVME:
+ return IPL_NVME_STR;
+ case IPL_TYPE_NVME_DUMP:
+ return IPL_NVME_DUMP_STR;
case IPL_TYPE_UNKNOWN:
default:
return IPL_UNKNOWN_STR;
@@ -100,6 +122,8 @@ enum dump_type {
DUMP_TYPE_NONE = 1,
DUMP_TYPE_CCW = 2,
DUMP_TYPE_FCP = 4,
+ DUMP_TYPE_NVME = 8,
+ DUMP_TYPE_ECKD = 16,
};
static char *dump_type_str(enum dump_type type)
@@ -109,97 +133,120 @@ static char *dump_type_str(enum dump_type type)
return DUMP_NONE_STR;
case DUMP_TYPE_CCW:
return DUMP_CCW_STR;
+ case DUMP_TYPE_ECKD:
+ return DUMP_ECKD_STR;
case DUMP_TYPE_FCP:
return DUMP_FCP_STR;
+ case DUMP_TYPE_NVME:
+ return DUMP_NVME_STR;
default:
return NULL;
}
}
-/*
- * Must be in data section since the bss section
- * is not cleared when these are accessed.
- */
-static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
-u32 ipl_flags __attribute__((__section__(".data"))) = 0;
-
-enum ipl_method {
- REIPL_METHOD_CCW_CIO,
- REIPL_METHOD_CCW_DIAG,
- REIPL_METHOD_CCW_VM,
- REIPL_METHOD_FCP_RO_DIAG,
- REIPL_METHOD_FCP_RW_DIAG,
- REIPL_METHOD_FCP_RO_VM,
- REIPL_METHOD_FCP_DUMP,
- REIPL_METHOD_NSS,
- REIPL_METHOD_NSS_DIAG,
- REIPL_METHOD_DEFAULT,
-};
-
-enum dump_method {
- DUMP_METHOD_NONE,
- DUMP_METHOD_CCW_CIO,
- DUMP_METHOD_CCW_DIAG,
- DUMP_METHOD_CCW_VM,
- DUMP_METHOD_FCP_DIAG,
-};
+int __bootdata_preserved(ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_secure_flag);
-static int diag308_set_works = 0;
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
-static struct ipl_parameter_block ipl_block;
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
static int reipl_capabilities = IPL_TYPE_UNKNOWN;
static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
-static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_nvme;
static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_eckd;
static struct ipl_parameter_block *reipl_block_nss;
static struct ipl_parameter_block *reipl_block_actual;
static int dump_capabilities = DUMP_TYPE_NONE;
static enum dump_type dump_type = DUMP_TYPE_NONE;
-static enum dump_method dump_method = DUMP_METHOD_NONE;
static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_nvme;
static struct ipl_parameter_block *dump_block_ccw;
+static struct ipl_parameter_block *dump_block_eckd;
static struct sclp_ipl_info sclp_ipl_info;
-int diag308(unsigned long subcode, void *addr)
+static bool reipl_nvme_clear;
+static bool reipl_fcp_clear;
+static bool reipl_ccw_clear;
+static bool reipl_eckd_clear;
+
+static unsigned long os_info_flags;
+
+static inline int __diag308(unsigned long subcode, unsigned long addr)
{
- register unsigned long _addr asm("0") = (unsigned long) addr;
- register unsigned long _rc asm("1") = 0;
+ union register_pair r1;
- asm volatile(
- " diag %0,%2,0x308\n"
- "0:\n"
+ r1.even = addr;
+ r1.odd = 0;
+ asm_inline volatile(
+ " diag %[r1],%[subcode],0x308\n"
+ "0: nopr %%r7\n"
EX_TABLE(0b,0b)
- : "+d" (_addr), "+d" (_rc)
- : "d" (subcode) : "cc", "memory");
- return _rc;
+ : [r1] "+&d" (r1.pair)
+ : [subcode] "d" (subcode)
+ : "cc", "memory");
+ return r1.odd;
+}
+
+int diag308(unsigned long subcode, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X308);
+ return __diag308(subcode, addr ? virt_to_phys(addr) : 0);
}
EXPORT_SYMBOL_GPL(diag308);
/* SYSFS */
-#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
+#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...) \
static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
- return sprintf(page, _format, _value); \
-} \
-static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
- __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL);
+ return sysfs_emit(page, _format, ##args); \
+}
-#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \
-static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
struct kobj_attribute *attr, \
- char *page) \
+ const char *buf, size_t len) \
{ \
- return sprintf(page, _fmt_out, \
- (unsigned long long) _value); \
-} \
+ unsigned long long ssid, devno; \
+ \
+ if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2) \
+ return -EINVAL; \
+ \
+ if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL) \
+ return -EINVAL; \
+ \
+ _ipl_blk.ssid = ssid; \
+ _ipl_blk.devno = devno; \
+ return len; \
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk) \
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \
+ _ipl_blk.ssid, _ipl_blk.devno); \
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, 0644, \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store) \
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
+IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, 0444, sys_##_prefix##_##_name##_show, NULL)
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \
static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
struct kobj_attribute *attr, \
const char *buf, size_t len) \
@@ -211,37 +258,83 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
return len; \
} \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
- __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ __ATTR(_name, 0644, \
sys_##_prefix##_##_name##_show, \
- sys_##_prefix##_##_name##_store);
+ sys_##_prefix##_##_name##_store)
#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
-static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- char *page) \
-{ \
- return sprintf(page, _fmt_out, _value); \
-} \
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \
static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
struct kobj_attribute *attr, \
const char *buf, size_t len) \
{ \
- strncpy(_value, buf, sizeof(_value) - 1); \
+ if (len >= sizeof(_value)) \
+ return -E2BIG; \
+ len = strscpy(_value, buf); \
+ if ((ssize_t)len < 0) \
+ return len; \
strim(_value); \
return len; \
} \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
- __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ __ATTR(_name, 0644, \
sys_##_prefix##_##_name##_show, \
- sys_##_prefix##_##_name##_store);
-
-static void make_attrs_ro(struct attribute **attrs)
-{
- while (*attrs) {
- (*attrs)->mode = S_IRUGO;
- attrs++;
- }
-}
+ sys_##_prefix##_##_name##_store)
+
+#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t size = _ipl_block.scp_data_len; \
+ void *scp_data = _ipl_block.scp_data; \
+ \
+ return memory_read_from_buffer(buf, count, &off, \
+ scp_data, size); \
+}
+
+#define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t scpdata_len = count; \
+ size_t padding; \
+ \
+ if (off) \
+ return -EINVAL; \
+ \
+ memcpy(_ipl_block.scp_data, buf, count); \
+ if (scpdata_len % 8) { \
+ padding = 8 - (scpdata_len % 8); \
+ memset(_ipl_block.scp_data + scpdata_len, \
+ 0, padding); \
+ scpdata_len += padding; \
+ } \
+ \
+ _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \
+ _ipl_block.len = _ipl_bp0_len + scpdata_len; \
+ _ipl_block.scp_data_len = scpdata_len; \
+ \
+ return count; \
+}
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
+ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \
+ NULL, _size)
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
+ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \
+ sys_##_prefix##_scp_data_store, _size)
/*
* ipl section
@@ -249,21 +342,29 @@ static void make_attrs_ro(struct attribute **attrs)
static __init enum ipl_type get_ipl_type(void)
{
- struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
- if (ipl_flags & IPL_NSS_VALID)
- return IPL_TYPE_NSS;
- if (!(ipl_flags & IPL_DEVNO_VALID))
+ if (!ipl_block_valid)
return IPL_TYPE_UNKNOWN;
- if (!(ipl_flags & IPL_PARMBLOCK_VALID))
+
+ switch (ipl_block.pb0_hdr.pbt) {
+ case IPL_PBT_CCW:
return IPL_TYPE_CCW;
- if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
- return IPL_TYPE_UNKNOWN;
- if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP)
- return IPL_TYPE_UNKNOWN;
- if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
- return IPL_TYPE_FCP_DUMP;
- return IPL_TYPE_FCP;
+ case IPL_PBT_FCP:
+ if (ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
+ return IPL_TYPE_FCP_DUMP;
+ else
+ return IPL_TYPE_FCP;
+ case IPL_PBT_NVME:
+ if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+ return IPL_TYPE_NVME_DUMP;
+ else
+ return IPL_TYPE_NVME;
+ case IPL_PBT_ECKD:
+ if (ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP)
+ return IPL_TYPE_ECKD_DUMP;
+ else
+ return IPL_TYPE_ECKD;
+ }
+ return IPL_TYPE_UNKNOWN;
}
struct ipl_info ipl_info;
@@ -272,204 +373,200 @@ EXPORT_SYMBOL_GPL(ipl_info);
static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(ipl_info.type));
}
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
-/* VM IPL PARM routines */
-static size_t reipl_get_ascii_vmparm(char *dest, size_t size,
- const struct ipl_parameter_block *ipb)
+static ssize_t ipl_secure_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
{
- int i;
- size_t len;
- char has_lowercase = 0;
-
- len = 0;
- if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
- (ipb->ipl_info.ccw.vm_parm_len > 0)) {
-
- len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
- memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
- /* If at least one character is lowercase, we assume mixed
- * case; otherwise we convert everything to lowercase.
- */
- for (i = 0; i < len; i++)
- if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
- (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
- (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
- has_lowercase = 1;
- break;
- }
- if (!has_lowercase)
- EBC_TOLOWER(dest, len);
- EBCASC(dest, len);
- }
- dest[len] = 0;
-
- return len;
+ return sysfs_emit(page, "%i\n", !!ipl_secure_flag);
}
-size_t append_ipl_vmparm(char *dest, size_t size)
-{
- size_t rc;
+static struct kobj_attribute sys_ipl_secure_attr =
+ __ATTR(secure, 0444, ipl_secure_show, NULL);
- rc = 0;
- if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
- rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
- else
- dest[0] = 0;
- return rc;
+static ssize_t ipl_has_secure_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%i\n", !!sclp.has_sipl);
}
+static struct kobj_attribute sys_ipl_has_secure_attr =
+ __ATTR(has_secure, 0444, ipl_has_secure_show, NULL);
+
static ssize_t ipl_vm_parm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
char parm[DIAG308_VMPARM_SIZE + 1] = {};
- append_ipl_vmparm(parm, sizeof(parm));
- return sprintf(page, "%s\n", parm);
+ if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
+ ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
+ return sysfs_emit(page, "%s\n", parm);
}
-static size_t scpdata_length(const char* buf, size_t count)
-{
- while (count) {
- if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
- break;
- count--;
- }
- return count;
-}
-
-static size_t reipl_append_ascii_scpdata(char *dest, size_t size,
- const struct ipl_parameter_block *ipb)
-{
- size_t count;
- size_t i;
- int has_lowercase;
-
- count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
- ipb->ipl_info.fcp.scp_data_len));
- if (!count)
- goto out;
-
- has_lowercase = 0;
- for (i = 0; i < count; i++) {
- if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
- count = 0;
- goto out;
- }
- if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
- has_lowercase = 1;
- }
-
- if (has_lowercase)
- memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
- else
- for (i = 0; i < count; i++)
- dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
-out:
- dest[count] = '\0';
- return count;
-}
-
-size_t append_ipl_scpdata(char *dest, size_t len)
-{
- size_t rc;
-
- rc = 0;
- if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
- rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
- else
- dest[0] = 0;
- return rc;
-}
-
-
static struct kobj_attribute sys_ipl_vm_parm_attr =
- __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+ __ATTR(parm, 0444, ipl_vm_parm_show, NULL);
static ssize_t sys_ipl_device_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.0.%04x\n", ipl_devno);
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+ ipl_block.ccw.devno);
+ case IPL_TYPE_ECKD:
+ case IPL_TYPE_ECKD_DUMP:
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
+ ipl_block.eckd.devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
+ return sysfs_emit(page, "0.0.%04x\n", ipl_block.fcp.devno);
+ case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
+ return sysfs_emit(page, "%08ux\n", ipl_block.nvme.fid);
default:
return 0;
}
}
static struct kobj_attribute sys_ipl_device_attr =
- __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+ __ATTR(device, 0444, sys_ipl_device_show, NULL);
-static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
+static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
- return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
- IPL_PARMBLOCK_SIZE);
+ return memory_read_from_buffer(buf, count, &off, &ipl_block,
+ ipl_block.hdr.len);
}
+static const struct bin_attribute sys_ipl_parameter_attr =
+ __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL,
+ PAGE_SIZE);
-static struct bin_attribute ipl_parameter_attr = {
- .attr = {
- .name = "binary_parameter",
- .mode = S_IRUGO,
- },
- .size = PAGE_SIZE,
- .read = &ipl_parameter_read,
-};
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE);
-static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
-{
- unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
- void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
+static const struct bin_attribute *const ipl_fcp_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_fcp_scp_data_attr,
+ NULL,
+};
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE);
-static struct bin_attribute ipl_scp_data_attr = {
- .attr = {
- .name = "scp_data",
- .mode = S_IRUGO,
- },
- .size = PAGE_SIZE,
- .read = ipl_scp_data_read,
+static const struct bin_attribute *const ipl_nvme_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_nvme_scp_data_attr,
+ NULL,
};
-/* FCP ipl device attributes */
-
-DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.lun);
-DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.bootprog);
-DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE);
-static struct attribute *ipl_fcp_attrs[] = {
- &sys_ipl_type_attr.attr,
- &sys_ipl_device_attr.attr,
- &sys_ipl_fcp_wwpn_attr.attr,
- &sys_ipl_fcp_lun_attr.attr,
- &sys_ipl_fcp_bootprog_attr.attr,
- &sys_ipl_fcp_br_lba_attr.attr,
+static const struct bin_attribute *const ipl_eckd_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_eckd_scp_data_attr,
NULL,
};
-static struct attribute_group ipl_fcp_attr_group = {
- .attrs = ipl_fcp_attrs,
-};
+/* FCP ipl device attributes */
-/* CCW ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
+ (unsigned long long)ipl_block.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
+ (unsigned long long)ipl_block.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
+ (unsigned long long)ipl_block.fcp.br_lba);
+
+/* NVMe ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n",
+ (unsigned long long)ipl_block.nvme.fid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n",
+ (unsigned long long)ipl_block.nvme.nsid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.nvme.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
+ (unsigned long long)ipl_block.nvme.br_lba);
+
+/* ECKD ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_eckd, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.eckd.bootprog);
+
+#define IPL_ATTR_BR_CHR_SHOW_FN(_name, _ipb) \
+static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *buf) \
+{ \
+ struct ipl_pb0_eckd *ipb = &(_ipb); \
+ \
+ if (!ipb->br_chr.cyl && \
+ !ipb->br_chr.head && \
+ !ipb->br_chr.record) \
+ return sysfs_emit(buf, "auto\n"); \
+ \
+ return sysfs_emit(buf, "0x%x,0x%x,0x%x\n", \
+ ipb->br_chr.cyl, \
+ ipb->br_chr.head, \
+ ipb->br_chr.record); \
+}
+
+#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb) \
+static ssize_t eckd_##_name##_br_chr_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct ipl_pb0_eckd *ipb = &(_ipb); \
+ unsigned long args[3] = { 0 }; \
+ char *p, *p1, *tmp = NULL; \
+ int i, rc; \
+ \
+ if (!strncmp(buf, "auto", 4)) \
+ goto out; \
+ \
+ tmp = kstrdup(buf, GFP_KERNEL); \
+ p = tmp; \
+ for (i = 0; i < 3; i++) { \
+ p1 = strsep(&p, ", "); \
+ if (!p1) { \
+ rc = -EINVAL; \
+ goto err; \
+ } \
+ rc = kstrtoul(p1, 0, args + i); \
+ if (rc) \
+ goto err; \
+ } \
+ \
+ rc = -EINVAL; \
+ if (i != 3) \
+ goto err; \
+ \
+ if ((args[0] || args[1]) && !args[2]) \
+ goto err; \
+ \
+ if (args[0] > UINT_MAX || args[1] > 255 || args[2] > 255) \
+ goto err; \
+ \
+out: \
+ ipb->br_chr.cyl = args[0]; \
+ ipb->br_chr.head = args[1]; \
+ ipb->br_chr.record = args[2]; \
+ rc = len; \
+err: \
+ kfree(tmp); \
+ return rc; \
+}
+
+IPL_ATTR_BR_CHR_SHOW_FN(ipl, ipl_block.eckd);
+static struct kobj_attribute sys_ipl_eckd_br_chr_attr =
+ __ATTR(br_chr, 0644, eckd_ipl_br_chr_show, NULL);
+
+IPL_ATTR_BR_CHR_SHOW_FN(reipl, reipl_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(reipl, reipl_block_eckd->eckd);
+
+static struct kobj_attribute sys_reipl_eckd_br_chr_attr =
+ __ATTR(br_chr, 0644, eckd_reipl_br_chr_show, eckd_reipl_br_chr_store);
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
@@ -477,97 +574,97 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
char loadparm[LOADPARM_LEN + 1] = {};
if (!sclp_ipl_info.is_valid)
- return sprintf(page, "#unknown#\n");
+ return sysfs_emit(page, "#unknown#\n");
memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
strim(loadparm);
- return sprintf(page, "%s\n", loadparm);
+ return sysfs_emit(page, "%s\n", loadparm);
}
static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
-static struct attribute *ipl_ccw_attrs_vm[] = {
- &sys_ipl_type_attr.attr,
+static struct attribute *ipl_fcp_attrs[] = {
&sys_ipl_device_attr.attr,
+ &sys_ipl_fcp_wwpn_attr.attr,
+ &sys_ipl_fcp_lun_attr.attr,
+ &sys_ipl_fcp_bootprog_attr.attr,
+ &sys_ipl_fcp_br_lba_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
- &sys_ipl_vm_parm_attr.attr,
NULL,
};
-static struct attribute *ipl_ccw_attrs_lpar[] = {
- &sys_ipl_type_attr.attr,
- &sys_ipl_device_attr.attr,
+static const struct attribute_group ipl_fcp_attr_group = {
+ .attrs = ipl_fcp_attrs,
+ .bin_attrs = ipl_fcp_bin_attrs,
+};
+
+static struct attribute *ipl_nvme_attrs[] = {
+ &sys_ipl_nvme_fid_attr.attr,
+ &sys_ipl_nvme_nsid_attr.attr,
+ &sys_ipl_nvme_bootprog_attr.attr,
+ &sys_ipl_nvme_br_lba_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
NULL,
};
-static struct attribute_group ipl_ccw_attr_group_vm = {
- .attrs = ipl_ccw_attrs_vm,
+static const struct attribute_group ipl_nvme_attr_group = {
+ .attrs = ipl_nvme_attrs,
+ .bin_attrs = ipl_nvme_bin_attrs,
};
-static struct attribute_group ipl_ccw_attr_group_lpar = {
- .attrs = ipl_ccw_attrs_lpar
+static struct attribute *ipl_eckd_attrs[] = {
+ &sys_ipl_eckd_bootprog_attr.attr,
+ &sys_ipl_eckd_br_chr_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_device_attr.attr,
+ NULL,
};
-/* NSS ipl device attributes */
+static const struct attribute_group ipl_eckd_attr_group = {
+ .attrs = ipl_eckd_attrs,
+ .bin_attrs = ipl_eckd_bin_attrs,
+};
-DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name);
+/* CCW ipl device attributes */
-static struct attribute *ipl_nss_attrs[] = {
- &sys_ipl_type_attr.attr,
- &sys_ipl_nss_name_attr.attr,
+static struct attribute *ipl_ccw_attrs_vm[] = {
+ &sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
&sys_ipl_vm_parm_attr.attr,
NULL,
};
-static struct attribute_group ipl_nss_attr_group = {
- .attrs = ipl_nss_attrs,
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group ipl_ccw_attr_group_vm = {
+ .attrs = ipl_ccw_attrs_vm,
};
-/* UNKNOWN ipl device attributes */
+static const struct attribute_group ipl_ccw_attr_group_lpar = {
+ .attrs = ipl_ccw_attrs_lpar
+};
-static struct attribute *ipl_unknown_attrs[] = {
+static struct attribute *ipl_common_attrs[] = {
&sys_ipl_type_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
-static struct attribute_group ipl_unknown_attr_group = {
- .attrs = ipl_unknown_attrs,
+static const struct attribute_group ipl_common_attr_group = {
+ .attrs = ipl_common_attrs,
};
static struct kset *ipl_kset;
-static int __init ipl_register_fcp_files(void)
-{
- int rc;
-
- rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
- if (rc)
- goto out;
- rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_parameter_attr);
- if (rc)
- goto out_ipl_parm;
- rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_scp_data_attr);
- if (!rc)
- goto out;
-
- sysfs_remove_bin_file(&ipl_kset->kobj, &ipl_parameter_attr);
-
-out_ipl_parm:
- sysfs_remove_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
-out:
- return rc;
-}
-
static void __ipl_run(void *unused)
{
- diag308(DIAG308_IPL, NULL);
- if (MACHINE_IS_VM)
- __cpcmd("IPL", NULL, 0, NULL);
- else if (ipl_info.type == IPL_TYPE_CCW)
- reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
}
static void ipl_run(struct shutdown_trigger *trigger)
@@ -584,25 +681,31 @@ static int __init ipl_init(void)
rc = -ENOMEM;
goto out;
}
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group);
+ if (rc)
+ goto out;
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_ccw_attr_group_vm);
else
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_ccw_attr_group_lpar);
break;
+ case IPL_TYPE_ECKD:
+ case IPL_TYPE_ECKD_DUMP:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
+ break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- rc = ipl_register_fcp_files();
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
break;
- case IPL_TYPE_NSS:
- rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group);
+ case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
break;
default:
- rc = sysfs_create_group(&ipl_kset->kobj,
- &ipl_unknown_attr_group);
break;
}
out:
@@ -628,8 +731,8 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
{
char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
- reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
- return sprintf(page, "%s\n", vmparm);
+ ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+ return sysfs_emit(page, "%s\n", vmparm);
}
static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
@@ -651,14 +754,14 @@ static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
return -EINVAL;
- memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
- ipb->ipl_info.ccw.vm_parm_len = ip_len;
+ memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+ ipb->ccw.vm_parm_len = ip_len;
if (ip_len > 0) {
- ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
- memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
- ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+ ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+ memcpy(ipb->ccw.vm_parm, buf, ip_len);
+ ASCEBC(ipb->ccw.vm_parm, ip_len);
} else {
- ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+ ipb->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_VP;
}
return len;
@@ -693,100 +796,39 @@ static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
}
static struct kobj_attribute sys_reipl_nss_vmparm_attr =
- __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
- reipl_nss_vmparm_store);
+ __ATTR(parm, 0644, reipl_nss_vmparm_show,
+ reipl_nss_vmparm_store);
static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
- __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
- reipl_ccw_vmparm_store);
+ __ATTR(parm, 0644, reipl_ccw_vmparm_show,
+ reipl_ccw_vmparm_store);
/* FCP reipl device attributes */
-static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
- void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t padding;
- size_t scpdata_len;
-
- if (off < 0)
- return -EINVAL;
-
- if (off >= DIAG308_SCPDATA_SIZE)
- return -ENOSPC;
-
- if (count > DIAG308_SCPDATA_SIZE - off)
- count = DIAG308_SCPDATA_SIZE - off;
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr,
+ reipl_block_fcp->fcp,
+ IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+ DIAG308_SCPDATA_SIZE);
- memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
- scpdata_len = off + count;
-
- if (scpdata_len % 8) {
- padding = 8 - (scpdata_len % 8);
- memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
- 0, padding);
- scpdata_len += padding;
- }
-
- reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
- reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
- reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
-
- return count;
-}
-
-static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
- .attr = {
- .name = "scp_data",
- .mode = S_IRUGO | S_IWUSR,
- },
- .size = PAGE_SIZE,
- .read = reipl_fcp_scpdata_read,
- .write = reipl_fcp_scpdata_write,
+static const struct bin_attribute *const reipl_fcp_bin_attrs[] = {
+ &sys_reipl_fcp_scp_data_attr,
+ NULL,
};
DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
- reipl_block_fcp->ipl_info.fcp.wwpn);
+ reipl_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
- reipl_block_fcp->ipl_info.fcp.lun);
+ reipl_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
- reipl_block_fcp->ipl_info.fcp.bootprog);
+ reipl_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
- reipl_block_fcp->ipl_info.fcp.br_lba);
+ reipl_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_fcp->ipl_info.fcp.devno);
-
-static struct attribute *reipl_fcp_attrs[] = {
- &sys_reipl_fcp_device_attr.attr,
- &sys_reipl_fcp_wwpn_attr.attr,
- &sys_reipl_fcp_lun_attr.attr,
- &sys_reipl_fcp_bootprog_attr.attr,
- &sys_reipl_fcp_br_lba_attr.attr,
- NULL,
-};
-
-static struct attribute_group reipl_fcp_attr_group = {
- .attrs = reipl_fcp_attrs,
-};
-
-/* CCW reipl device attributes */
-
-DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_ccw->ipl_info.ccw.devno);
+ reipl_block_fcp->fcp.devno);
static void reipl_get_ascii_loadparm(char *loadparm,
struct ipl_parameter_block *ibp)
{
- memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN);
+ memcpy(loadparm, ibp->common.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
loadparm[LOADPARM_LEN] = 0;
strim(loadparm);
@@ -798,7 +840,7 @@ static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
char buf[LOADPARM_LEN + 1];
reipl_get_ascii_loadparm(buf, ipb);
- return sprintf(page, "%s\n", buf);
+ return sysfs_emit(page, "%s\n", buf);
}
static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
@@ -821,55 +863,155 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
return -EINVAL;
}
/* initialize loadparm with blanks */
- memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN);
+ memset(ipb->common.loadparm, ' ', LOADPARM_LEN);
/* copy and convert to ebcdic */
- memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len);
- ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN);
+ memcpy(ipb->common.loadparm, buf, lp_len);
+ ASCEBC(ipb->common.loadparm, LOADPARM_LEN);
+ ipb->common.flags |= IPL_PB0_FLAG_LOADPARM;
return len;
}
-/* NSS wrapper */
-static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+#define DEFINE_GENERIC_LOADPARM(name) \
+static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *page) \
+{ \
+ return reipl_generic_loadparm_show(reipl_block_##name, page); \
+} \
+static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ return reipl_generic_loadparm_store(reipl_block_##name, buf, len); \
+} \
+static struct kobj_attribute sys_reipl_##name##_loadparm_attr = \
+ __ATTR(loadparm, 0644, reipl_##name##_loadparm_show, \
+ reipl_##name##_loadparm_store)
+
+DEFINE_GENERIC_LOADPARM(fcp);
+DEFINE_GENERIC_LOADPARM(nvme);
+DEFINE_GENERIC_LOADPARM(ccw);
+DEFINE_GENERIC_LOADPARM(nss);
+DEFINE_GENERIC_LOADPARM(eckd);
+
+static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
{
- return reipl_generic_loadparm_show(reipl_block_nss, page);
+ return sysfs_emit(page, "%u\n", reipl_fcp_clear);
}
-static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
+static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
{
- return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
+ if (kstrtobool(buf, &reipl_fcp_clear) < 0)
+ return -EINVAL;
+ return len;
}
-/* CCW wrapper */
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+static struct attribute *reipl_fcp_attrs[] = {
+ &sys_reipl_fcp_device_attr.attr,
+ &sys_reipl_fcp_wwpn_attr.attr,
+ &sys_reipl_fcp_lun_attr.attr,
+ &sys_reipl_fcp_bootprog_attr.attr,
+ &sys_reipl_fcp_br_lba_attr.attr,
+ &sys_reipl_fcp_loadparm_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group reipl_fcp_attr_group = {
+ .attrs = reipl_fcp_attrs,
+ .bin_attrs = reipl_fcp_bin_attrs,
+};
+
+static struct kobj_attribute sys_reipl_fcp_clear_attr =
+ __ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+
+/* NVME reipl device attributes */
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr,
+ reipl_block_nvme->nvme,
+ IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+ DIAG308_SCPDATA_SIZE);
+
+static const struct bin_attribute *const reipl_nvme_bin_attrs[] = {
+ &sys_reipl_nvme_scp_data_attr,
+ NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
+ reipl_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
+ reipl_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+ reipl_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
+ reipl_block_nvme->nvme.br_lba);
+
+static struct attribute *reipl_nvme_attrs[] = {
+ &sys_reipl_nvme_fid_attr.attr,
+ &sys_reipl_nvme_nsid_attr.attr,
+ &sys_reipl_nvme_bootprog_attr.attr,
+ &sys_reipl_nvme_br_lba_attr.attr,
+ &sys_reipl_nvme_loadparm_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group reipl_nvme_attr_group = {
+ .attrs = reipl_nvme_attrs,
+ .bin_attrs = reipl_nvme_bin_attrs
+};
+
+static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%u\n", reipl_nvme_clear);
+}
+
+static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (kstrtobool(buf, &reipl_nvme_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_nvme_clear_attr =
+ __ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store);
+
+/* CCW reipl device attributes */
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
+
+static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
{
- return reipl_generic_loadparm_show(reipl_block_ccw, page);
+ return sysfs_emit(page, "%u\n", reipl_ccw_clear);
}
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
+static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
{
- return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
+ if (kstrtobool(buf, &reipl_ccw_clear) < 0)
+ return -EINVAL;
+ return len;
}
-static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
- __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
- reipl_ccw_loadparm_store);
+static struct kobj_attribute sys_reipl_ccw_clear_attr =
+ __ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store);
static struct attribute *reipl_ccw_attrs_vm[] = {
&sys_reipl_ccw_device_attr.attr,
&sys_reipl_ccw_loadparm_attr.attr,
&sys_reipl_ccw_vmparm_attr.attr,
+ &sys_reipl_ccw_clear_attr.attr,
NULL,
};
static struct attribute *reipl_ccw_attrs_lpar[] = {
&sys_reipl_ccw_device_attr.attr,
&sys_reipl_ccw_loadparm_attr.attr,
+ &sys_reipl_ccw_clear_attr.attr,
NULL,
};
@@ -883,12 +1025,58 @@ static struct attribute_group reipl_ccw_attr_group_lpar = {
.attrs = reipl_ccw_attrs_lpar,
};
+/* ECKD reipl device attributes */
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr,
+ reipl_block_eckd->eckd,
+ IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+ DIAG308_SCPDATA_SIZE);
+
+static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
+ &sys_reipl_eckd_scp_data_attr,
+ NULL,
+};
+
+DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+ reipl_block_eckd->eckd.bootprog);
+
+static struct attribute *reipl_eckd_attrs[] = {
+ &sys_reipl_eckd_device_attr.attr,
+ &sys_reipl_eckd_bootprog_attr.attr,
+ &sys_reipl_eckd_br_chr_attr.attr,
+ &sys_reipl_eckd_loadparm_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group reipl_eckd_attr_group = {
+ .attrs = reipl_eckd_attrs,
+ .bin_attrs = reipl_eckd_bin_attrs
+};
+
+static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%u\n", reipl_eckd_clear);
+}
+
+static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (kstrtobool(buf, &reipl_eckd_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_eckd_clear_attr =
+ __ATTR(clear, 0644, reipl_eckd_clear_show, reipl_eckd_clear_store);
/* NSS reipl device attributes */
static void reipl_get_ascii_nss_name(char *dst,
struct ipl_parameter_block *ipb)
{
- memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ memcpy(dst, ipb->ccw.nss_name, NSS_NAME_SIZE);
EBCASC(dst, NSS_NAME_SIZE);
dst[NSS_NAME_SIZE] = 0;
}
@@ -899,7 +1087,7 @@ static ssize_t reipl_nss_name_show(struct kobject *kobj,
char nss_name[NSS_NAME_SIZE + 1] = {};
reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
- return sprintf(page, "%s\n", nss_name);
+ return sysfs_emit(page, "%s\n", nss_name);
}
static ssize_t reipl_nss_name_store(struct kobject *kobj,
@@ -916,28 +1104,22 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj,
if (nss_len > NSS_NAME_SIZE)
return -EINVAL;
- memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+ memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE);
if (nss_len > 0) {
- reipl_block_nss->ipl_info.ccw.vm_flags |=
- DIAG308_VM_FLAGS_NSS_VALID;
- memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
- ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
- EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ reipl_block_nss->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_NSS;
+ memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len);
+ ASCEBC(reipl_block_nss->ccw.nss_name, nss_len);
+ EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len);
} else {
- reipl_block_nss->ipl_info.ccw.vm_flags &=
- ~DIAG308_VM_FLAGS_NSS_VALID;
+ reipl_block_nss->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_NSS;
}
return len;
}
static struct kobj_attribute sys_reipl_nss_name_attr =
- __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
- reipl_nss_name_store);
-
-static struct kobj_attribute sys_reipl_nss_loadparm_attr =
- __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
- reipl_nss_loadparm_store);
+ __ATTR(name, 0644, reipl_nss_name_show,
+ reipl_nss_name_store);
static struct attribute *reipl_nss_attrs[] = {
&sys_reipl_nss_name_attr.attr,
@@ -951,11 +1133,10 @@ static struct attribute_group reipl_nss_attr_group = {
.attrs = reipl_nss_attrs,
};
-static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block)
+void set_os_info_reipl_block(void)
{
- reipl_block_actual = reipl_block;
- os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
- reipl_block->hdr.len);
+ os_info_entry_add_data(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block_actual->hdr.len);
}
/* reipl type */
@@ -967,38 +1148,22 @@ static int reipl_set_type(enum ipl_type type)
switch(type) {
case IPL_TYPE_CCW:
- if (diag308_set_works)
- reipl_method = REIPL_METHOD_CCW_DIAG;
- else if (MACHINE_IS_VM)
- reipl_method = REIPL_METHOD_CCW_VM;
- else
- reipl_method = REIPL_METHOD_CCW_CIO;
- set_reipl_block_actual(reipl_block_ccw);
+ reipl_block_actual = reipl_block_ccw;
+ break;
+ case IPL_TYPE_ECKD:
+ reipl_block_actual = reipl_block_eckd;
break;
case IPL_TYPE_FCP:
- if (diag308_set_works)
- reipl_method = REIPL_METHOD_FCP_RW_DIAG;
- else if (MACHINE_IS_VM)
- reipl_method = REIPL_METHOD_FCP_RO_VM;
- else
- reipl_method = REIPL_METHOD_FCP_RO_DIAG;
- set_reipl_block_actual(reipl_block_fcp);
+ reipl_block_actual = reipl_block_fcp;
break;
- case IPL_TYPE_FCP_DUMP:
- reipl_method = REIPL_METHOD_FCP_DUMP;
+ case IPL_TYPE_NVME:
+ reipl_block_actual = reipl_block_nvme;
break;
case IPL_TYPE_NSS:
- if (diag308_set_works)
- reipl_method = REIPL_METHOD_NSS_DIAG;
- else
- reipl_method = REIPL_METHOD_NSS;
- set_reipl_block_actual(reipl_block_nss);
- break;
- case IPL_TYPE_UNKNOWN:
- reipl_method = REIPL_METHOD_DEFAULT;
+ reipl_block_actual = reipl_block_nss;
break;
default:
- BUG();
+ break;
}
reipl_type = type;
return 0;
@@ -1007,7 +1172,7 @@ static int reipl_set_type(enum ipl_type type)
static ssize_t reipl_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(reipl_type));
}
static ssize_t reipl_type_store(struct kobject *kobj,
@@ -1018,8 +1183,12 @@ static ssize_t reipl_type_store(struct kobject *kobj,
if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_CCW);
+ else if (strncmp(buf, IPL_ECKD_STR, strlen(IPL_ECKD_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_ECKD);
else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_FCP);
+ else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_NVME);
else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_NSS);
return (rc != 0) ? rc : len;
@@ -1030,81 +1199,53 @@ static struct kobj_attribute reipl_type_attr =
static struct kset *reipl_kset;
static struct kset *reipl_fcp_kset;
-
-static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
- const enum ipl_method m)
-{
- char loadparm[LOADPARM_LEN + 1] = {};
- char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
- char nss_name[NSS_NAME_SIZE + 1] = {};
- size_t pos = 0;
-
- reipl_get_ascii_loadparm(loadparm, ipb);
- reipl_get_ascii_nss_name(nss_name, ipb);
- reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
-
- switch (m) {
- case REIPL_METHOD_CCW_VM:
- pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
- break;
- case REIPL_METHOD_NSS:
- pos = sprintf(dst, "IPL %s", nss_name);
- break;
- default:
- break;
- }
- if (strlen(loadparm) > 0)
- pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
- if (strlen(vmparm) > 0)
- sprintf(dst + pos, " PARM %s", vmparm);
-}
+static struct kset *reipl_nvme_kset;
+static struct kset *reipl_eckd_kset;
static void __reipl_run(void *unused)
{
- struct ccw_dev_id devid;
- static char buf[128];
-
- switch (reipl_method) {
- case REIPL_METHOD_CCW_CIO:
- devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
- reipl_ccw_dev(&devid);
- break;
- case REIPL_METHOD_CCW_VM:
- get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
- __cpcmd(buf, NULL, 0, NULL);
- break;
- case REIPL_METHOD_CCW_DIAG:
+ switch (reipl_type) {
+ case IPL_TYPE_CCW:
diag308(DIAG308_SET, reipl_block_ccw);
- diag308(DIAG308_IPL, NULL);
+ if (reipl_ccw_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
break;
- case REIPL_METHOD_FCP_RW_DIAG:
- diag308(DIAG308_SET, reipl_block_fcp);
- diag308(DIAG308_IPL, NULL);
+ case IPL_TYPE_ECKD:
+ diag308(DIAG308_SET, reipl_block_eckd);
+ if (reipl_eckd_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
break;
- case REIPL_METHOD_FCP_RO_DIAG:
- diag308(DIAG308_IPL, NULL);
+ case IPL_TYPE_FCP:
+ diag308(DIAG308_SET, reipl_block_fcp);
+ if (reipl_fcp_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
break;
- case REIPL_METHOD_FCP_RO_VM:
- __cpcmd("IPL", NULL, 0, NULL);
+ case IPL_TYPE_NVME:
+ diag308(DIAG308_SET, reipl_block_nvme);
+ if (reipl_nvme_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
break;
- case REIPL_METHOD_NSS_DIAG:
+ case IPL_TYPE_NSS:
diag308(DIAG308_SET, reipl_block_nss);
- diag308(DIAG308_IPL, NULL);
- break;
- case REIPL_METHOD_NSS:
- get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
- __cpcmd(buf, NULL, 0, NULL);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
break;
- case REIPL_METHOD_DEFAULT:
- if (MACHINE_IS_VM)
- __cpcmd("IPL", NULL, 0, NULL);
- diag308(DIAG308_IPL, NULL);
+ case IPL_TYPE_UNKNOWN:
+ diag308(DIAG308_LOAD_CLEAR, NULL);
break;
- case REIPL_METHOD_FCP_DUMP:
+ case IPL_TYPE_FCP_DUMP:
+ case IPL_TYPE_NVME_DUMP:
+ case IPL_TYPE_ECKD_DUMP:
break;
}
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
}
static void reipl_run(struct shutdown_trigger *trigger)
@@ -1114,10 +1255,10 @@ static void reipl_run(struct shutdown_trigger *trigger)
static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
{
- ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ ipb->hdr.len = IPL_BP_CCW_LEN;
ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
- ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
- ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ ipb->pb0_hdr.len = IPL_BP0_CCW_LEN;
+ ipb->pb0_hdr.pbt = IPL_PBT_CCW;
}
static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
@@ -1125,22 +1266,20 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
/* LOADPARM */
/* check if read scp info worked and set loadparm */
if (sclp_ipl_info.is_valid)
- memcpy(ipb->ipl_info.ccw.load_parm,
- &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ memcpy(ipb->ccw.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
else
/* read scp info failed: set empty loadparm (EBCDIC blanks) */
- memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN);
- ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+ memset(ipb->ccw.loadparm, 0x40, LOADPARM_LEN);
+ ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
/* VM PARM */
- if (MACHINE_IS_VM && diag308_set_works &&
- (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
-
- ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
- ipb->ipl_info.ccw.vm_parm_len =
- ipl_block.ipl_info.ccw.vm_parm_len;
- memcpy(ipb->ipl_info.ccw.vm_parm,
- ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+ if (machine_is_vm() && ipl_block_valid &&
+ (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
+
+ ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+ ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len;
+ memcpy(ipb->ccw.vm_parm,
+ ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE);
}
}
@@ -1148,33 +1287,18 @@ static int __init reipl_nss_init(void)
{
int rc;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
if (!reipl_block_nss)
return -ENOMEM;
- if (!diag308_set_works)
- sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
-
rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
if (rc)
return rc;
reipl_block_ccw_init(reipl_block_nss);
- if (ipl_info.type == IPL_TYPE_NSS) {
- memset(reipl_block_nss->ipl_info.ccw.nss_name,
- ' ', NSS_NAME_SIZE);
- memcpy(reipl_block_nss->ipl_info.ccw.nss_name,
- kernel_nss_name, strlen(kernel_nss_name));
- ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
- reipl_block_nss->ipl_info.ccw.vm_flags |=
- DIAG308_VM_FLAGS_NSS_VALID;
-
- reipl_block_ccw_fill_parms(reipl_block_nss);
- }
-
reipl_capabilities |= IPL_TYPE_NSS;
return 0;
}
@@ -1187,23 +1311,16 @@ static int __init reipl_ccw_init(void)
if (!reipl_block_ccw)
return -ENOMEM;
- if (MACHINE_IS_VM) {
- if (!diag308_set_works)
- sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
- rc = sysfs_create_group(&reipl_kset->kobj,
- &reipl_ccw_attr_group_vm);
- } else {
- if(!diag308_set_works)
- sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
- rc = sysfs_create_group(&reipl_kset->kobj,
- &reipl_ccw_attr_group_lpar);
- }
+ rc = sysfs_create_group(&reipl_kset->kobj,
+ machine_is_vm() ? &reipl_ccw_attr_group_vm
+ : &reipl_ccw_attr_group_lpar);
if (rc)
return rc;
reipl_block_ccw_init(reipl_block_ccw);
if (ipl_info.type == IPL_TYPE_CCW) {
- reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+ reipl_block_ccw->ccw.ssid = ipl_block.ccw.ssid;
+ reipl_block_ccw->ccw.devno = ipl_block.ccw.devno;
reipl_block_ccw_fill_parms(reipl_block_ccw);
}
@@ -1215,14 +1332,6 @@ static int __init reipl_fcp_init(void)
{
int rc;
- if (!diag308_set_works) {
- if (ipl_info.type == IPL_TYPE_FCP) {
- make_attrs_ro(reipl_fcp_attrs);
- sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
- } else
- return 0;
- }
-
reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
if (!reipl_block_fcp)
return -ENOMEM;
@@ -1236,32 +1345,151 @@ static int __init reipl_fcp_init(void)
}
rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
- if (rc) {
- kset_unregister(reipl_fcp_kset);
- free_page((unsigned long) reipl_block_fcp);
- return rc;
- }
+ if (rc)
+ goto out1;
- rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj,
- &sys_reipl_fcp_scp_data_attr);
- if (rc) {
- sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
- kset_unregister(reipl_fcp_kset);
- free_page((unsigned long) reipl_block_fcp);
- return rc;
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_fcp_kset->kobj,
+ &sys_reipl_fcp_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else {
+ reipl_fcp_clear = true;
}
- if (ipl_info.type == IPL_TYPE_FCP)
- memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
- else {
- reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ if (ipl_info.type == IPL_TYPE_FCP) {
+ memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * is invalid in the SCSI IPL parameter block, so take it
+ * always from sclp_ipl_info.
+ */
+ memcpy(reipl_block_fcp->fcp.loadparm, sclp_ipl_info.loadparm,
+ LOADPARM_LEN);
+ } else {
+ reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN;
reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
- reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
- reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
- reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+ reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+ reipl_block_fcp->fcp.pbt = IPL_PBT_FCP;
+ reipl_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_IPL;
}
reipl_capabilities |= IPL_TYPE_FCP;
return 0;
+
+out2:
+ sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+out1:
+ kset_unregister(reipl_fcp_kset);
+ free_page((unsigned long) reipl_block_fcp);
+ return rc;
+}
+
+static int __init reipl_nvme_init(void)
+{
+ int rc;
+
+ reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_nvme)
+ return -ENOMEM;
+
+ /* sysfs: create kset for mixing attr group and bin attrs */
+ reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_nvme_kset) {
+ free_page((unsigned long) reipl_block_nvme);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+ if (rc)
+ goto out1;
+
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_nvme_kset->kobj,
+ &sys_reipl_nvme_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else {
+ reipl_nvme_clear = true;
+ }
+
+ if (ipl_info.type == IPL_TYPE_NVME) {
+ memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block));
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * is invalid in the IPL parameter block, so take it
+ * always from sclp_ipl_info.
+ */
+ memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm,
+ LOADPARM_LEN);
+ } else {
+ reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+ reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+ reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+ reipl_block_nvme->nvme.pbt = IPL_PBT_NVME;
+ reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL;
+ }
+ reipl_capabilities |= IPL_TYPE_NVME;
+ return 0;
+
+out2:
+ sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+out1:
+ kset_unregister(reipl_nvme_kset);
+ free_page((unsigned long) reipl_block_nvme);
+ return rc;
+}
+
+static int __init reipl_eckd_init(void)
+{
+ int rc;
+
+ if (!sclp.has_sipl_eckd)
+ return 0;
+
+ reipl_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_eckd)
+ return -ENOMEM;
+
+ /* sysfs: create kset for mixing attr group and bin attrs */
+ reipl_eckd_kset = kset_create_and_add(IPL_ECKD_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_eckd_kset) {
+ free_page((unsigned long)reipl_block_eckd);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+ if (rc)
+ goto out1;
+
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_eckd_kset->kobj,
+ &sys_reipl_eckd_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else {
+ reipl_eckd_clear = true;
+ }
+
+ if (ipl_info.type == IPL_TYPE_ECKD) {
+ memcpy(reipl_block_eckd, &ipl_block, sizeof(ipl_block));
+ } else {
+ reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+ reipl_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+ reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+ reipl_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+ reipl_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_IPL;
+ }
+ reipl_capabilities |= IPL_TYPE_ECKD;
+ return 0;
+
+out2:
+ sysfs_remove_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+out1:
+ kset_unregister(reipl_eckd_kset);
+ free_page((unsigned long)reipl_block_eckd);
+ return rc;
}
static int __init reipl_type_init(void)
@@ -1276,12 +1504,18 @@ static int __init reipl_type_init(void)
/*
* If we have an OS info reipl block, this will be used
*/
- if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+ if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
memcpy(reipl_block_fcp, reipl_block, size);
reipl_type = IPL_TYPE_FCP;
- } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) {
+ memcpy(reipl_block_nvme, reipl_block, size);
+ reipl_type = IPL_TYPE_NVME;
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
memcpy(reipl_block_ccw, reipl_block, size);
reipl_type = IPL_TYPE_CCW;
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_ECKD) {
+ memcpy(reipl_block_eckd, reipl_block, size);
+ reipl_type = IPL_TYPE_ECKD;
}
out:
return reipl_set_type(reipl_type);
@@ -1302,9 +1536,15 @@ static int __init reipl_init(void)
rc = reipl_ccw_init();
if (rc)
return rc;
+ rc = reipl_eckd_init();
+ if (rc)
+ return rc;
rc = reipl_fcp_init();
if (rc)
return rc;
+ rc = reipl_nvme_init();
+ if (rc)
+ return rc;
rc = reipl_nss_init();
if (rc)
return rc;
@@ -1324,15 +1564,20 @@ static struct shutdown_action __refdata reipl_action = {
/* FCP dump device attributes */
DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
- dump_block_fcp->ipl_info.fcp.wwpn);
+ dump_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
- dump_block_fcp->ipl_info.fcp.lun);
+ dump_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
- dump_block_fcp->ipl_info.fcp.bootprog);
+ dump_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
- dump_block_fcp->ipl_info.fcp.br_lba);
+ dump_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
- dump_block_fcp->ipl_info.fcp.devno);
+ dump_block_fcp->fcp.devno);
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
+ dump_block_fcp->fcp,
+ IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+ DIAG308_SCPDATA_SIZE);
static struct attribute *dump_fcp_attrs[] = {
&sys_dump_fcp_device_attr.attr,
@@ -1343,15 +1588,87 @@ static struct attribute *dump_fcp_attrs[] = {
NULL,
};
-static struct attribute_group dump_fcp_attr_group = {
+static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
+ &sys_dump_fcp_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_fcp_attr_group = {
.name = IPL_FCP_STR,
.attrs = dump_fcp_attrs,
+ .bin_attrs = dump_fcp_bin_attrs,
};
-/* CCW dump device attributes */
+/* NVME dump device attributes */
+DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
+ dump_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
+ dump_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+ dump_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
+ dump_block_nvme->nvme.br_lba);
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
+ dump_block_nvme->nvme,
+ IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+ DIAG308_SCPDATA_SIZE);
+
+static struct attribute *dump_nvme_attrs[] = {
+ &sys_dump_nvme_fid_attr.attr,
+ &sys_dump_nvme_nsid_attr.attr,
+ &sys_dump_nvme_bootprog_attr.attr,
+ &sys_dump_nvme_br_lba_attr.attr,
+ NULL,
+};
+
+static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
+ &sys_dump_nvme_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_nvme_attr_group = {
+ .name = IPL_NVME_STR,
+ .attrs = dump_nvme_attrs,
+ .bin_attrs = dump_nvme_bin_attrs,
+};
+
+/* ECKD dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+ dump_block_eckd->eckd.bootprog);
+
+IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
+
+static struct kobj_attribute sys_dump_eckd_br_chr_attr =
+ __ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store);
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_eckd, dump_block_eckd->hdr,
+ dump_block_eckd->eckd,
+ IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+ DIAG308_SCPDATA_SIZE);
+
+static struct attribute *dump_eckd_attrs[] = {
+ &sys_dump_eckd_device_attr.attr,
+ &sys_dump_eckd_bootprog_attr.attr,
+ &sys_dump_eckd_br_chr_attr.attr,
+ NULL,
+};
+
+static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
+ &sys_dump_eckd_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_eckd_attr_group = {
+ .name = IPL_ECKD_STR,
+ .attrs = dump_eckd_attrs,
+ .bin_attrs = dump_eckd_bin_attrs,
+};
-DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- dump_block_ccw->ipl_info.ccw.devno);
+/* CCW dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
static struct attribute *dump_ccw_attrs[] = {
&sys_dump_ccw_device_attr.attr,
@@ -1369,21 +1686,6 @@ static int dump_set_type(enum dump_type type)
{
if (!(dump_capabilities & type))
return -EINVAL;
- switch (type) {
- case DUMP_TYPE_CCW:
- if (diag308_set_works)
- dump_method = DUMP_METHOD_CCW_DIAG;
- else if (MACHINE_IS_VM)
- dump_method = DUMP_METHOD_CCW_VM;
- else
- dump_method = DUMP_METHOD_CCW_CIO;
- break;
- case DUMP_TYPE_FCP:
- dump_method = DUMP_METHOD_FCP_DIAG;
- break;
- default:
- dump_method = DUMP_METHOD_NONE;
- }
dump_type = type;
return 0;
}
@@ -1391,7 +1693,7 @@ static int dump_set_type(enum dump_type type)
static ssize_t dump_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", dump_type_str(dump_type));
+ return sysfs_emit(page, "%s\n", dump_type_str(dump_type));
}
static ssize_t dump_type_store(struct kobject *kobj,
@@ -1404,49 +1706,63 @@ static ssize_t dump_type_store(struct kobject *kobj,
rc = dump_set_type(DUMP_TYPE_NONE);
else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
rc = dump_set_type(DUMP_TYPE_CCW);
+ else if (strncmp(buf, DUMP_ECKD_STR, strlen(DUMP_ECKD_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_ECKD);
else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
rc = dump_set_type(DUMP_TYPE_FCP);
+ else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_NVME);
return (rc != 0) ? rc : len;
}
static struct kobj_attribute dump_type_attr =
__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+static ssize_t dump_area_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%lu\n", sclp.hsa_size);
+}
+
+static struct kobj_attribute dump_area_size_attr = __ATTR_RO(dump_area_size);
+
+static struct attribute *dump_attrs[] = {
+ &dump_type_attr.attr,
+ &dump_area_size_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_attr_group = {
+ .attrs = dump_attrs,
+};
+
static struct kset *dump_kset;
static void diag308_dump(void *dump_block)
{
diag308(DIAG308_SET, dump_block);
while (1) {
- if (diag308(DIAG308_DUMP, NULL) != 0x302)
+ if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
break;
- udelay_simple(USEC_PER_SEC);
+ udelay(USEC_PER_SEC);
}
}
static void __dump_run(void *unused)
{
- struct ccw_dev_id devid;
- static char buf[100];
-
- switch (dump_method) {
- case DUMP_METHOD_CCW_CIO:
- devid.devno = dump_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
- reipl_ccw_dev(&devid);
- break;
- case DUMP_METHOD_CCW_VM:
- sprintf(buf, "STORE STATUS");
- __cpcmd(buf, NULL, 0, NULL);
- sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
- __cpcmd(buf, NULL, 0, NULL);
- break;
- case DUMP_METHOD_CCW_DIAG:
+ switch (dump_type) {
+ case DUMP_TYPE_CCW:
diag308_dump(dump_block_ccw);
break;
- case DUMP_METHOD_FCP_DIAG:
+ case DUMP_TYPE_ECKD:
+ diag308_dump(dump_block_eckd);
+ break;
+ case DUMP_TYPE_FCP:
diag308_dump(dump_block_fcp);
break;
+ case DUMP_TYPE_NVME:
+ diag308_dump(dump_block_nvme);
+ break;
default:
break;
}
@@ -1454,7 +1770,7 @@ static void __dump_run(void *unused)
static void dump_run(struct shutdown_trigger *trigger)
{
- if (dump_method == DUMP_METHOD_NONE)
+ if (dump_type == DUMP_TYPE_NONE)
return;
smp_send_stop();
smp_call_ipl_cpu(__dump_run, NULL);
@@ -1472,10 +1788,10 @@ static int __init dump_ccw_init(void)
free_page((unsigned long)dump_block_ccw);
return rc;
}
- dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ dump_block_ccw->hdr.len = IPL_BP_CCW_LEN;
dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
- dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ dump_block_ccw->ccw.len = IPL_BP0_CCW_LEN;
+ dump_block_ccw->ccw.pbt = IPL_PBT_CCW;
dump_capabilities |= DUMP_TYPE_CCW;
return 0;
}
@@ -1486,8 +1802,6 @@ static int __init dump_fcp_init(void)
if (!sclp_ipl_info.has_dump)
return 0; /* LDIPL DUMP is not installed */
- if (!diag308_set_works)
- return 0;
dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
if (!dump_block_fcp)
return -ENOMEM;
@@ -1496,15 +1810,61 @@ static int __init dump_fcp_init(void)
free_page((unsigned long)dump_block_fcp);
return rc;
}
- dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ dump_block_fcp->hdr.len = IPL_BP_FCP_LEN;
dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
- dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
- dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+ dump_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+ dump_block_fcp->fcp.pbt = IPL_PBT_FCP;
+ dump_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_DUMP;
dump_capabilities |= DUMP_TYPE_FCP;
return 0;
}
+static int __init dump_nvme_init(void)
+{
+ int rc;
+
+ if (!sclp_ipl_info.has_dump)
+ return 0; /* LDIPL DUMP is not installed */
+ dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_nvme)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_nvme);
+ return rc;
+ }
+ dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+ dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+ dump_block_nvme->nvme.pbt = IPL_PBT_NVME;
+ dump_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_DUMP;
+ dump_capabilities |= DUMP_TYPE_NVME;
+ return 0;
+}
+
+static int __init dump_eckd_init(void)
+{
+ int rc;
+
+ if (!sclp_ipl_info.has_dump || !sclp.has_sipl_eckd)
+ return 0; /* LDIPL DUMP is not installed */
+ dump_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_eckd)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_eckd_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_eckd);
+ return rc;
+ }
+ dump_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+ dump_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+ dump_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+ dump_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_DUMP;
+ dump_capabilities |= DUMP_TYPE_ECKD;
+ return 0;
+}
+
static int __init dump_init(void)
{
int rc;
@@ -1512,7 +1872,7 @@ static int __init dump_init(void)
dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
if (!dump_kset)
return -ENOMEM;
- rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_attr_group);
if (rc) {
kset_unregister(dump_kset);
return rc;
@@ -1520,9 +1880,15 @@ static int __init dump_init(void)
rc = dump_ccw_init();
if (rc)
return rc;
+ rc = dump_eckd_init();
+ if (rc)
+ return rc;
rc = dump_fcp_init();
if (rc)
return rc;
+ rc = dump_nvme_init();
+ if (rc)
+ return rc;
dump_set_type(DUMP_TYPE_NONE);
return 0;
}
@@ -1535,38 +1901,47 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
- unsigned long ipib = (unsigned long) reipl_block_actual;
+ struct lowcore *abs_lc;
unsigned int csum;
- csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- mem_assign_absolute(S390_lowcore.ipib, ipib);
- mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
+ /*
+ * Set REIPL_CLEAR flag in os_info flags entry indicating
+ * 'clear' sysfs attribute has been set on the panicked system
+ * for specified reipl type.
+ * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+ */
+ if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+ (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+ (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+ (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+ reipl_type == IPL_TYPE_NSS ||
+ reipl_type == IPL_TYPE_UNKNOWN)
+ os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+ os_info_entry_add_data(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
+ csum = (__force unsigned int)cksm(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+ abs_lc = get_abs_lowcore();
+ abs_lc->ipib = __pa(reipl_block_actual);
+ abs_lc->ipib_checksum = csum;
+ put_abs_lowcore(abs_lc);
dump_run(trigger);
}
-static int __init dump_reipl_init(void)
-{
- if (!diag308_set_works)
- return -EOPNOTSUPP;
- else
- return 0;
-}
-
static struct shutdown_action __refdata dump_reipl_action = {
.name = SHUTDOWN_ACTION_DUMP_REIPL_STR,
.fn = dump_reipl_run,
- .init = dump_reipl_init,
};
/*
* vmcmd shutdown action: Trigger vm command on shutdown.
*/
-static char vmcmd_on_reboot[128];
-static char vmcmd_on_panic[128];
-static char vmcmd_on_halt[128];
-static char vmcmd_on_poff[128];
-static char vmcmd_on_restart[128];
+#define VMCMD_MAX_SIZE 240
+
+static char vmcmd_on_reboot[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_panic[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_halt[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_poff[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_restart[VMCMD_MAX_SIZE + 1];
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
@@ -1613,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
static int vmcmd_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -EOPNOTSUPP;
vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
if (!vmcmd_kset)
@@ -1632,7 +2007,7 @@ static void stop_run(struct shutdown_trigger *trigger)
{
if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
strcmp(trigger->name, ON_RESTART_STR) == 0)
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
smp_stop_cpu();
}
@@ -1678,7 +2053,7 @@ static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
static ssize_t on_reboot_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_reboot_trigger.action->name);
}
static ssize_t on_reboot_store(struct kobject *kobj,
@@ -1687,9 +2062,7 @@ static ssize_t on_reboot_store(struct kobject *kobj,
{
return set_trigger(buf, &on_reboot_trigger, len);
}
-
-static struct kobj_attribute on_reboot_attr =
- __ATTR(on_reboot, 0644, on_reboot_show, on_reboot_store);
+static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot);
static void do_machine_restart(char *__unused)
{
@@ -1706,7 +2079,7 @@ static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
static ssize_t on_panic_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_panic_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_panic_trigger.action->name);
}
static ssize_t on_panic_store(struct kobject *kobj,
@@ -1715,9 +2088,7 @@ static ssize_t on_panic_store(struct kobject *kobj,
{
return set_trigger(buf, &on_panic_trigger, len);
}
-
-static struct kobj_attribute on_panic_attr =
- __ATTR(on_panic, 0644, on_panic_show, on_panic_store);
+static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic);
static void do_panic(void)
{
@@ -1734,7 +2105,7 @@ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
static ssize_t on_restart_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_restart_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_restart_trigger.action->name);
}
static ssize_t on_restart_store(struct kobject *kobj,
@@ -1743,13 +2114,10 @@ static ssize_t on_restart_store(struct kobject *kobj,
{
return set_trigger(buf, &on_restart_trigger, len);
}
-
-static struct kobj_attribute on_restart_attr =
- __ATTR(on_restart, 0644, on_restart_show, on_restart_store);
+static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
static void __do_restart(void *ignore)
{
- __arch_local_irq_stosm(0x04); /* enable DAT */
smp_send_stop();
#ifdef CONFIG_CRASH_DUMP
crash_kexec(NULL);
@@ -1758,12 +2126,12 @@ static void __do_restart(void *ignore)
stop_run(&on_restart_trigger);
}
-void do_restart(void)
+void do_restart(void *arg)
{
tracing_off();
debug_locks_off();
lgr_info_log();
- smp_call_online_cpu(__do_restart, NULL);
+ smp_call_ipl_cpu(__do_restart, arg);
}
/* on halt */
@@ -1773,7 +2141,7 @@ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
static ssize_t on_halt_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_halt_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_halt_trigger.action->name);
}
static ssize_t on_halt_store(struct kobject *kobj,
@@ -1782,10 +2150,7 @@ static ssize_t on_halt_store(struct kobject *kobj,
{
return set_trigger(buf, &on_halt_trigger, len);
}
-
-static struct kobj_attribute on_halt_attr =
- __ATTR(on_halt, 0644, on_halt_show, on_halt_store);
-
+static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt);
static void do_machine_halt(void)
{
@@ -1802,7 +2167,7 @@ static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
static ssize_t on_poff_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_poff_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_poff_trigger.action->name);
}
static ssize_t on_poff_store(struct kobject *kobj,
@@ -1811,10 +2176,7 @@ static ssize_t on_poff_store(struct kobject *kobj,
{
return set_trigger(buf, &on_poff_trigger, len);
}
-
-static struct kobj_attribute on_poff_attr =
- __ATTR(on_poff, 0644, on_poff_show, on_poff_store);
-
+static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff);
static void do_machine_power_off(void)
{
@@ -1824,26 +2186,27 @@ static void do_machine_power_off(void)
}
void (*_machine_power_off)(void) = do_machine_power_off;
+static struct attribute *shutdown_action_attrs[] = {
+ &on_restart_attr.attr,
+ &on_reboot_attr.attr,
+ &on_panic_attr.attr,
+ &on_halt_attr.attr,
+ &on_poff_attr.attr,
+ NULL,
+};
+
+static struct attribute_group shutdown_action_attr_group = {
+ .attrs = shutdown_action_attrs,
+};
+
static void __init shutdown_triggers_init(void)
{
shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
firmware_kobj);
if (!shutdown_actions_kset)
goto fail;
- if (sysfs_create_file(&shutdown_actions_kset->kobj,
- &on_reboot_attr.attr))
- goto fail;
- if (sysfs_create_file(&shutdown_actions_kset->kobj,
- &on_panic_attr.attr))
- goto fail;
- if (sysfs_create_file(&shutdown_actions_kset->kobj,
- &on_halt_attr.attr))
- goto fail;
- if (sysfs_create_file(&shutdown_actions_kset->kobj,
- &on_poff_attr.attr))
- goto fail;
- if (sysfs_create_file(&shutdown_actions_kset->kobj,
- &on_restart_attr.attr))
+ if (sysfs_create_group(&shutdown_actions_kset->kobj,
+ &shutdown_action_attr_group))
goto fail;
return;
fail:
@@ -1864,7 +2227,21 @@ static void __init shutdown_actions_init(void)
static int __init s390_ipl_init(void)
{
- sclp_get_ipl_info(&sclp_ipl_info);
+ char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40};
+
+ sclp_early_get_ipl_info(&sclp_ipl_info);
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * returned by read SCP info is invalid (contains EBCDIC blanks)
+ * when the system has been booted via diag308. In that case we use
+ * the value from diag308, if available.
+ *
+ * There are also systems where diag308 store does not work in
+ * case the system is booted from HMC. Fortunately in this case
+ * READ SCP info provides the correct value.
+ */
+ if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
+ memcpy(sclp_ipl_info.loadparm, ipl_block.ccw.loadparm, LOADPARM_LEN);
shutdown_actions_init();
shutdown_triggers_init();
return 0;
@@ -1872,26 +2249,28 @@ static int __init s390_ipl_init(void)
__initcall(s390_ipl_init);
-static void __init strncpy_skip_quote(char *dst, char *src, int n)
+static void __init strscpy_skip_quote(char *dst, char *src, int n)
{
int sx, dx;
- dx = 0;
- for (sx = 0; src[sx] != 0; sx++) {
+ if (!n)
+ return;
+ for (sx = 0, dx = 0; src[sx]; sx++) {
if (src[sx] == '"')
continue;
- dst[dx++] = src[sx];
- if (dx >= n)
+ dst[dx] = src[sx];
+ if (dx + 1 == n)
break;
+ dx++;
}
+ dst[dx] = '\0';
}
static int __init vmcmd_on_reboot_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_reboot, str, 127);
- vmcmd_on_reboot[127] = 0;
+ strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot));
on_reboot_trigger.action = &vmcmd_action;
return 1;
}
@@ -1899,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup);
static int __init vmcmd_on_panic_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_panic, str, 127);
- vmcmd_on_panic[127] = 0;
+ strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic));
on_panic_trigger.action = &vmcmd_action;
return 1;
}
@@ -1910,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup);
static int __init vmcmd_on_halt_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_halt, str, 127);
- vmcmd_on_halt[127] = 0;
+ strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt));
on_halt_trigger.action = &vmcmd_action;
return 1;
}
@@ -1921,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup);
static int __init vmcmd_on_poff_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_poff, str, 127);
- vmcmd_on_poff[127] = 0;
+ strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff));
on_poff_trigger.action = &vmcmd_action;
return 1;
}
@@ -1944,24 +2320,32 @@ static struct notifier_block on_panic_nb = {
void __init setup_ipl(void)
{
+ BUILD_BUG_ON(sizeof(struct ipl_parameter_block) != PAGE_SIZE);
+
ipl_info.type = get_ipl_type();
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- ipl_info.data.ccw.dev_id.devno = ipl_devno;
- ipl_info.data.ccw.dev_id.ssid = 0;
+ ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
+ ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
+ break;
+ case IPL_TYPE_ECKD:
+ case IPL_TYPE_ECKD_DUMP:
+ ipl_info.data.eckd.dev_id.ssid = ipl_block.eckd.ssid;
+ ipl_info.data.eckd.dev_id.devno = ipl_block.eckd.devno;
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- ipl_info.data.fcp.dev_id.devno =
- IPL_PARMBLOCK_START->ipl_info.fcp.devno;
ipl_info.data.fcp.dev_id.ssid = 0;
- ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
- ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
+ ipl_info.data.fcp.dev_id.devno = ipl_block.fcp.devno;
+ ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
+ ipl_info.data.fcp.lun = ipl_block.fcp.lun;
break;
- case IPL_TYPE_NSS:
- strncpy(ipl_info.data.nss.name, kernel_nss_name,
- sizeof(ipl_info.data.nss.name));
+ case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
+ ipl_info.data.nvme.fid = ipl_block.nvme.fid;
+ ipl_info.data.nvme.nsid = ipl_block.nvme.nsid;
break;
+ case IPL_TYPE_NSS:
case IPL_TYPE_UNKNOWN:
/* We have no info to copy */
break;
@@ -1969,101 +2353,147 @@ void __init setup_ipl(void)
atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
}
-void __init ipl_update_parameters(void)
+void s390_reset_system(void)
{
- int rc;
+ /* Disable prefixing */
+ set_prefix(0);
- rc = diag308(DIAG308_STORE, &ipl_block);
- if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG))
- diag308_set_works = 1;
+ /* Disable lowcore protection */
+ local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
+ diag_amode31_ops.diag308_reset();
}
-void __init ipl_save_parameters(void)
+#ifdef CONFIG_KEXEC_FILE
+
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+ unsigned char flags, unsigned short cert)
{
- struct cio_iplinfo iplinfo;
- void *src, *dst;
+ struct ipl_report_component *comp;
- if (cio_get_iplinfo(&iplinfo))
- return;
+ comp = vzalloc(sizeof(*comp));
+ if (!comp)
+ return -ENOMEM;
+ list_add_tail(&comp->list, &report->components);
- ipl_devno = iplinfo.devno;
- ipl_flags |= IPL_DEVNO_VALID;
- if (!iplinfo.is_qdio)
- return;
- ipl_flags |= IPL_PARMBLOCK_VALID;
- src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr;
- dst = (void *)IPL_PARMBLOCK_ORIGIN;
- memmove(dst, src, PAGE_SIZE);
- S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
-}
+ comp->entry.addr = kbuf->mem;
+ comp->entry.len = kbuf->memsz;
+ comp->entry.flags = flags;
+ comp->entry.certificate_index = cert;
-static LIST_HEAD(rcall);
-static DEFINE_MUTEX(rcall_mutex);
+ report->size += sizeof(comp->entry);
-void register_reset_call(struct reset_call *reset)
-{
- mutex_lock(&rcall_mutex);
- list_add(&reset->list, &rcall);
- mutex_unlock(&rcall_mutex);
+ return 0;
}
-EXPORT_SYMBOL_GPL(register_reset_call);
-void unregister_reset_call(struct reset_call *reset)
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+ unsigned long addr, unsigned long len)
{
- mutex_lock(&rcall_mutex);
- list_del(&reset->list);
- mutex_unlock(&rcall_mutex);
+ struct ipl_report_certificate *cert;
+
+ cert = vzalloc(sizeof(*cert));
+ if (!cert)
+ return -ENOMEM;
+ list_add_tail(&cert->list, &report->certificates);
+
+ cert->entry.addr = addr;
+ cert->entry.len = len;
+ cert->key = key;
+
+ report->size += sizeof(cert->entry);
+ report->size += cert->entry.len;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(unregister_reset_call);
-static void do_reset_calls(void)
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib)
{
- struct reset_call *reset;
+ struct ipl_report *report;
-#ifdef CONFIG_64BIT
- if (diag308_set_works) {
- diag308_reset();
- return;
- }
-#endif
- list_for_each_entry(reset, &rcall, list)
- reset->fn();
-}
+ report = vzalloc(sizeof(*report));
+ if (!report)
+ return ERR_PTR(-ENOMEM);
-u32 dump_prefix_page;
+ report->ipib = ipib;
+ INIT_LIST_HEAD(&report->components);
+ INIT_LIST_HEAD(&report->certificates);
-void s390_reset_system(void (*func)(void *), void *data)
-{
- struct _lowcore *lc;
+ report->size = ALIGN(ipib->hdr.len, 8);
+ report->size += sizeof(struct ipl_rl_hdr);
+ report->size += sizeof(struct ipl_rb_components);
+ report->size += sizeof(struct ipl_rb_certificates);
- lc = (struct _lowcore *)(unsigned long) store_prefix();
+ return report;
+}
- /* Stack for interrupt/machine check handler */
- lc->panic_stack = S390_lowcore.panic_stack;
+void *ipl_report_finish(struct ipl_report *report)
+{
+ struct ipl_report_certificate *cert;
+ struct ipl_report_component *comp;
+ struct ipl_rb_certificates *certs;
+ struct ipl_parameter_block *ipib;
+ struct ipl_rb_components *comps;
+ struct ipl_rl_hdr *rl_hdr;
+ void *buf, *ptr;
- /* Save prefix page address for dump case */
- dump_prefix_page = (u32)(unsigned long) lc;
+ buf = vzalloc(report->size);
+ if (!buf)
+ goto out;
+ ptr = buf;
+
+ memcpy(ptr, report->ipib, report->ipib->hdr.len);
+ ipib = ptr;
+ if (ipl_secure_flag)
+ ipib->hdr.flags |= IPL_PL_FLAG_SIPL;
+ ipib->hdr.flags |= IPL_PL_FLAG_IPLSR;
+ ptr += report->ipib->hdr.len;
+ ptr = PTR_ALIGN(ptr, 8);
+
+ rl_hdr = ptr;
+ ptr += sizeof(*rl_hdr);
+
+ comps = ptr;
+ comps->rbt = IPL_RBT_COMPONENTS;
+ ptr += sizeof(*comps);
+ list_for_each_entry(comp, &report->components, list) {
+ memcpy(ptr, &comp->entry, sizeof(comp->entry));
+ ptr += sizeof(comp->entry);
+ }
+ comps->len = ptr - (void *)comps;
+
+ certs = ptr;
+ certs->rbt = IPL_RBT_CERTIFICATES;
+ ptr += sizeof(*certs);
+ list_for_each_entry(cert, &report->certificates, list) {
+ memcpy(ptr, &cert->entry, sizeof(cert->entry));
+ ptr += sizeof(cert->entry);
+ }
+ certs->len = ptr - (void *)certs;
+ rl_hdr->len = ptr - (void *)rl_hdr;
- /* Disable prefixing */
- set_prefix(0);
+ list_for_each_entry(cert, &report->certificates, list) {
+ memcpy(ptr, cert->key, cert->entry.len);
+ ptr += cert->entry.len;
+ }
- /* Disable lowcore protection */
- __ctl_clear_bit(0,28);
+ BUG_ON(ptr > buf + report->size);
+out:
+ return buf;
+}
+
+int ipl_report_free(struct ipl_report *report)
+{
+ struct ipl_report_component *comp, *ncomp;
+ struct ipl_report_certificate *cert, *ncert;
- /* Set new machine check handler */
- S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT;
- S390_lowcore.mcck_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+ list_for_each_entry_safe(comp, ncomp, &report->components, list)
+ vfree(comp);
- /* Set new program check handler */
- S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT;
- S390_lowcore.program_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+ list_for_each_entry_safe(cert, ncert, &report->certificates, list)
+ vfree(cert);
- /* Store status at absolute zero */
- store_status();
+ vfree(report);
- do_reset_calls();
- if (func)
- func(data);
+ return 0;
}
+
+#endif
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
new file mode 100644
index 000000000000..b5245fadcfb0
--- /dev/null
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/minmax.h>
+#include <linux/string.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+
+/* VM IPL PARM routines */
+size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ int i;
+ size_t len;
+ char has_lowercase = 0;
+
+ len = 0;
+ if ((ipb->ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP) &&
+ (ipb->ccw.vm_parm_len > 0)) {
+
+ len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len);
+ memcpy(dest, ipb->ccw.vm_parm, len);
+ /* If at least one character is lowercase, we assume mixed
+ * case; otherwise we convert everything to lowercase.
+ */
+ for (i = 0; i < len; i++)
+ if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+ (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+ (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+ has_lowercase = 1;
+ break;
+ }
+ if (!has_lowercase)
+ EBC_TOLOWER(dest, len);
+ EBCASC(dest, len);
+ }
+ dest[len] = 0;
+
+ return len;
+}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 54b0995514e8..bdf9c7cb5685 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2004, 2011
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
@@ -8,26 +9,37 @@
*/
#include <linux/kernel_stat.h>
+#include <linux/cpufeature.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/profile.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/init.h>
#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/entry-common.h>
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/stacktrace.h>
+#include <asm/softirq_stack.h>
+#include <asm/vtime.h>
+#include <asm/asm.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
struct irq_class {
+ int irq;
char *name;
char *desc;
};
@@ -42,9 +54,10 @@ struct irq_class {
* Since the external and I/O interrupt fields are already sums we would end
* up with having a sum which accounts each interrupt twice.
*/
-static const struct irq_class irqclass_main_desc[NR_IRQS] = {
- [EXTERNAL_INTERRUPT] = {.name = "EXT"},
- [IO_INTERRUPT] = {.name = "I/O"}
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+ {.irq = EXT_INTERRUPT, .name = "EXT"},
+ {.irq = IO_INTERRUPT, .name = "I/O"},
+ {.irq = THIN_INTERRUPT, .name = "AIO"},
};
/*
@@ -52,151 +65,248 @@ static const struct irq_class irqclass_main_desc[NR_IRQS] = {
* /proc/interrupts.
* In addition this list contains non external / I/O events like NMIs.
*/
-static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
- [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"},
- [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"},
- [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"},
- [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"},
- [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"},
- [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
- [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"},
- [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"},
- [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"},
- [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"},
- [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
- [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
- [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
- [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
- [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
- [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"},
- [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"},
- [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"},
- [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"},
- [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"},
- [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"},
- [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"},
- [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"},
- [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"},
- [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"},
- [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
- [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
- [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
- [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
- [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
- [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
+static const struct irq_class irqclass_sub_desc[] = {
+ {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+ {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+ {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+ {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+ {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+ {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+ {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+ {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+ {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+ {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+ {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+ {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+ {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"},
+ {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+ {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
+ {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
+ {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
+ {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
+ {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+ {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
+ {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+ {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+ {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ {.irq = IRQIO_QAI, .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"},
+ {.irq = IRQIO_APB, .name = "APB", .desc = "[AIO] AP Bus"},
+ {.irq = IRQIO_PCF, .name = "PCF", .desc = "[AIO] PCI Floating Interrupt"},
+ {.irq = IRQIO_PCD, .name = "PCD", .desc = "[AIO] PCI Directed Interrupt"},
+ {.irq = IRQIO_MSI, .name = "MSI", .desc = "[AIO] MSI Interrupt"},
+ {.irq = IRQIO_VAI, .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"},
+ {.irq = IRQIO_GAL, .name = "GAL", .desc = "[AIO] GIB Alert"},
+ {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"},
+ {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
};
+static void do_IRQ(struct pt_regs *regs, int irq)
+{
+ if (tod_after_eq(get_lowcore()->int_clock,
+ get_lowcore()->clock_comparator))
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ generic_handle_irq(irq);
+}
+
+static int on_async_stack(void)
+{
+ unsigned long frame = current_frame_address();
+
+ return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+}
+
+static void do_irq_async(struct pt_regs *regs, int irq)
+{
+ if (on_async_stack()) {
+ do_IRQ(regs, irq);
+ } else {
+ call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ,
+ struct pt_regs *, regs, int, irq);
+ }
+}
+
+static int irq_pending(struct pt_regs *regs)
+{
+ int cc;
+
+ asm volatile(
+ " tpi 0\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ :
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
+}
+
+void noinstr do_io_irq(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ bool from_idle;
+
+ irq_enter_rcu();
+
+ if (user_mode(regs)) {
+ update_timer_sys();
+ if (cpu_has_bear())
+ current->thread.last_break = regs->last_break;
+ }
+
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle)
+ account_idle_time_irq();
+
+ set_cpu_flag(CIF_NOHZ_DELAY);
+ do {
+ regs->tpi_info = get_lowcore()->tpi_info;
+ if (get_lowcore()->tpi_info.adapter_IO)
+ do_irq_async(regs, THIN_INTERRUPT);
+ else
+ do_irq_async(regs, IO_INTERRUPT);
+ } while (machine_is_lpar() && irq_pending(regs));
+
+ irq_exit_rcu();
+
+ set_irq_regs(old_regs);
+ irqentry_exit(regs, state);
+
+ if (from_idle)
+ regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+}
+
+void noinstr do_ext_irq(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ bool from_idle;
+
+ irq_enter_rcu();
+
+ if (user_mode(regs)) {
+ update_timer_sys();
+ if (cpu_has_bear())
+ current->thread.last_break = regs->last_break;
+ }
+
+ regs->int_code = get_lowcore()->ext_int_code_addr;
+ regs->int_parm = get_lowcore()->ext_params;
+ regs->int_parm_long = get_lowcore()->ext_params2;
+
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle)
+ account_idle_time_irq();
+
+ do_irq_async(regs, EXT_INTERRUPT);
+
+ irq_exit_rcu();
+ set_irq_regs(old_regs);
+ irqentry_exit(regs, state);
+
+ if (from_idle)
+ regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+}
+
+static void show_msi_interrupt(struct seq_file *p, int irq)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+ int cpu;
+
+ rcu_read_lock();
+ desc = irq_to_desc(irq);
+ if (!desc)
+ goto out;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ seq_printf(p, "%3d: ", irq);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu));
+
+ if (desc->irq_data.chip)
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+ if (desc->action)
+ seq_printf(p, " %s", desc->action->name);
+
+ seq_putc(p, '\n');
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+out:
+ rcu_read_unlock();
+}
+
/*
* show_interrupts is needed by /proc/interrupts.
*/
int show_interrupts(struct seq_file *p, void *v)
{
- int irq = *(loff_t *) v;
- int cpu;
+ int index = *(loff_t *) v;
+ int cpu, irq;
- get_online_cpus();
- if (irq == 0) {
+ cpus_read_lock();
+ if (index == 0) {
seq_puts(p, " ");
for_each_online_cpu(cpu)
- seq_printf(p, "CPU%d ", cpu);
+ seq_printf(p, "CPU%-8d", cpu);
seq_putc(p, '\n');
}
- if (irq < NR_IRQS) {
- seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
+ if (index < NR_IRQS_BASE) {
+ seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+ irq = irqclass_main_desc[index].irq;
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_putc(p, '\n');
- goto skip_arch_irqs;
+ goto out;
+ }
+ if (index < irq_get_nr_irqs()) {
+ show_msi_interrupt(p, index);
+ goto out;
}
- for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
- seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
+ for (index = 0; index < NR_ARCH_IRQS; index++) {
+ seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+ irq = irqclass_sub_desc[index].irq;
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]);
- if (irqclass_sub_desc[irq].desc)
- seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat, cpu).irqs[irq]);
+ if (irqclass_sub_desc[index].desc)
+ seq_printf(p, " %s", irqclass_sub_desc[index].desc);
seq_putc(p, '\n');
}
-skip_arch_irqs:
- put_online_cpus();
+out:
+ cpus_read_unlock();
return 0;
}
-/*
- * Switch to the asynchronous interrupt stack for softirq execution.
- */
-asmlinkage void do_softirq(void)
-{
- unsigned long flags, old, new;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
-
- if (local_softirq_pending()) {
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (old));
- /* Check against async. stack address range. */
- new = S390_lowcore.async_stack;
- if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
- /* Need to switch to the async. stack. */
- new -= STACK_FRAME_OVERHEAD;
- ((struct stack_frame *) new)->back_chain = old;
-
- asm volatile(" la 15,0(%0)\n"
- " basr 14,%2\n"
- " la 15,0(%1)\n"
- : : "a" (new), "a" (old),
- "a" (__do_softirq)
- : "0", "1", "2", "3", "4", "5", "14",
- "cc", "memory" );
- } else {
- /* We are already on the async stack. */
- __do_softirq();
- }
- }
-
- local_irq_restore(flags);
-}
-
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
+unsigned int arch_dynirq_lower_bound(unsigned int from)
{
- if (proc_mkdir("irq", NULL))
- create_prof_cpu_mask();
+ return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
}
-#endif
/*
* ext_int_hash[index] is the list head for all external interrupts that hash
* to this index.
*/
-static struct list_head ext_int_hash[256];
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
struct ext_int_info {
ext_int_handler_t handler;
- u16 code;
- struct list_head entry;
+ struct hlist_node entry;
struct rcu_head rcu;
+ u16 code;
};
/* ext_int_hash_lock protects the handler lists for external interrupts */
-DEFINE_SPINLOCK(ext_int_hash_lock);
-
-static void __init init_external_interrupts(void)
-{
- int idx;
-
- for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
- INIT_LIST_HEAD(&ext_int_hash[idx]);
-}
+static DEFINE_SPINLOCK(ext_int_hash_lock);
static inline int ext_hash(u16 code)
{
- return (code + (code >> 9)) & 0xff;
+ BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+ return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
}
-int register_external_interrupt(u16 code, ext_int_handler_t handler)
+int register_external_irq(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
unsigned long flags;
@@ -210,172 +320,92 @@ int register_external_interrupt(u16 code, ext_int_handler_t handler)
index = ext_hash(code);
spin_lock_irqsave(&ext_int_hash_lock, flags);
- list_add_rcu(&p->entry, &ext_int_hash[index]);
+ hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
-EXPORT_SYMBOL(register_external_interrupt);
+EXPORT_SYMBOL(register_external_irq);
-int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
unsigned long flags;
int index = ext_hash(code);
spin_lock_irqsave(&ext_int_hash_lock, flags);
- list_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
if (p->code == code && p->handler == handler) {
- list_del_rcu(&p->entry);
+ hlist_del_rcu(&p->entry);
kfree_rcu(p, rcu);
}
}
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
-EXPORT_SYMBOL(unregister_external_interrupt);
+EXPORT_SYMBOL(unregister_external_irq);
-void __irq_entry do_extint(struct pt_regs *regs)
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
{
+ struct pt_regs *regs = get_irq_regs();
struct ext_code ext_code;
- struct pt_regs *old_regs;
struct ext_int_info *p;
int index;
- old_regs = set_irq_regs(regs);
- irq_enter();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) {
- /* Serve timer interrupts first. */
- clock_comparator_work();
- }
- kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
- ext_code = *(struct ext_code *) &regs->int_code;
- if (ext_code.code != 0x1004)
- __get_cpu_var(s390_idle).nohz_delay = 1;
+ ext_code.int_code = regs->int_code;
+ if (ext_code.code != EXT_IRQ_CLK_COMP)
+ set_cpu_flag(CIF_NOHZ_DELAY);
index = ext_hash(ext_code.code);
rcu_read_lock();
- list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
- if (likely(p->code == ext_code.code))
- p->handler(ext_code, regs->int_parm,
- regs->int_parm_long);
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (unlikely(p->code != ext_code.code))
+ continue;
+ p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+ }
rcu_read_unlock();
- irq_exit();
- set_irq_regs(old_regs);
-}
-
-void __init init_IRQ(void)
-{
- init_external_interrupts();
+ return IRQ_HANDLED;
}
-static DEFINE_SPINLOCK(sc_irq_lock);
-static int sc_irq_refcount;
-
-void service_subclass_irq_register(void)
+static void __init init_ext_interrupts(void)
{
- spin_lock(&sc_irq_lock);
- if (!sc_irq_refcount)
- ctl_set_bit(0, 9);
- sc_irq_refcount++;
- spin_unlock(&sc_irq_lock);
-}
-EXPORT_SYMBOL(service_subclass_irq_register);
-
-void service_subclass_irq_unregister(void)
-{
- spin_lock(&sc_irq_lock);
- sc_irq_refcount--;
- if (!sc_irq_refcount)
- ctl_clear_bit(0, 9);
- spin_unlock(&sc_irq_lock);
-}
-EXPORT_SYMBOL(service_subclass_irq_unregister);
-
-static DEFINE_SPINLOCK(ma_subclass_lock);
-static int ma_subclass_refcount;
-
-void measurement_alert_subclass_register(void)
-{
- spin_lock(&ma_subclass_lock);
- if (!ma_subclass_refcount)
- ctl_set_bit(0, 5);
- ma_subclass_refcount++;
- spin_unlock(&ma_subclass_lock);
-}
-EXPORT_SYMBOL(measurement_alert_subclass_register);
-
-void measurement_alert_subclass_unregister(void)
-{
- spin_lock(&ma_subclass_lock);
- ma_subclass_refcount--;
- if (!ma_subclass_refcount)
- ctl_clear_bit(0, 5);
- spin_unlock(&ma_subclass_lock);
-}
-EXPORT_SYMBOL(measurement_alert_subclass_unregister);
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
- /*
- * Not needed, the handler is protected by a lock and IRQs that occur
- * after the handler is deleted are just NOPs.
- */
-}
-EXPORT_SYMBOL_GPL(synchronize_irq);
-#endif
-
-#ifndef CONFIG_PCI
-
-/* Only PCI devices have dynamically-defined IRQ handlers */
-
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
- WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(free_irq);
+ int idx;
-void enable_irq(unsigned int irq)
-{
- WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(enable_irq);
+ for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+ INIT_HLIST_HEAD(&ext_int_hash[idx]);
-void disable_irq(unsigned int irq)
-{
- WARN_ON(1);
+ irq_set_chip_and_handler(EXT_INTERRUPT,
+ &dummy_irq_chip, handle_percpu_irq);
+ if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL))
+ panic("Failed to register EXT interrupt\n");
}
-EXPORT_SYMBOL_GPL(disable_irq);
-
-#endif /* !CONFIG_PCI */
-void disable_irq_nosync(unsigned int irq)
+void __init init_IRQ(void)
{
- disable_irq(irq);
+ BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
}
-EXPORT_SYMBOL_GPL(disable_irq_nosync);
-unsigned long probe_irq_on(void)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_on);
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
-int probe_irq_off(unsigned long val)
+void irq_subclass_register(enum irq_subclass subclass)
{
- return 0;
+ spin_lock(&irq_subclass_lock);
+ if (!irq_subclass_refcount[subclass])
+ system_ctl_set_bit(0, subclass);
+ irq_subclass_refcount[subclass]++;
+ spin_unlock(&irq_subclass_lock);
}
-EXPORT_SYMBOL_GPL(probe_irq_off);
+EXPORT_SYMBOL(irq_subclass_register);
-unsigned int probe_irq_mask(unsigned long val)
+void irq_subclass_unregister(enum irq_subclass subclass)
{
- return val;
+ spin_lock(&irq_subclass_lock);
+ irq_subclass_refcount[subclass]--;
+ if (!irq_subclass_refcount[subclass])
+ system_ctl_clear_bit(0, subclass);
+ spin_unlock(&irq_subclass_lock);
}
-EXPORT_SYMBOL_GPL(probe_irq_mask);
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index b987ab2c1541..e808bb8bc0da 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -1,70 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Jump label s390 support
*
* Copyright IBM Corp. 2011
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#include <linux/module.h>
#include <linux/uaccess.h>
-#include <linux/stop_machine.h>
#include <linux/jump_label.h>
+#include <linux/module.h>
+#include <asm/text-patching.h>
#include <asm/ipl.h>
-#ifdef HAVE_JUMP_LABEL
-
struct insn {
u16 opcode;
s32 offset;
} __packed;
-struct insn_args {
- struct jump_entry *entry;
- enum jump_label_type type;
-};
+static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
+{
+ /* brcl 0,offset */
+ insn->opcode = 0xc004;
+ insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
+}
-static void __jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type)
+static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
{
- struct insn insn;
- int rc;
+ /* brcl 15,offset */
+ insn->opcode = 0xc0f4;
+ insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
+}
- if (type == JUMP_LABEL_ENABLE) {
- /* brcl 15,offset */
- insn.opcode = 0xc0f4;
- insn.offset = (entry->target - entry->code) >> 1;
- } else {
- /* brcl 0,0 */
- insn.opcode = 0xc004;
- insn.offset = 0;
- }
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+ struct insn *new)
+{
+ unsigned char *ipc = (unsigned char *)jump_entry_code(entry);
+ unsigned char *ipe = (unsigned char *)expected;
+ unsigned char *ipn = (unsigned char *)new;
- rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE);
- WARN_ON_ONCE(rc < 0);
+ pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc);
+ pr_emerg("Found: %6ph\n", ipc);
+ pr_emerg("Expected: %6ph\n", ipe);
+ pr_emerg("New: %6ph\n", ipn);
+ panic("Corrupted kernel text");
}
-static int __sm_arch_jump_label_transform(void *data)
+static void jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
{
- struct insn_args *args = data;
+ void *code = (void *)jump_entry_code(entry);
+ struct insn old, new;
- __jump_label_transform(args->entry, args->type);
- return 0;
+ if (type == JUMP_LABEL_JMP) {
+ jump_label_make_nop(entry, &old);
+ jump_label_make_branch(entry, &new);
+ } else {
+ jump_label_make_branch(entry, &old);
+ jump_label_make_nop(entry, &new);
+ }
+ if (memcmp(code, &old, sizeof(old)))
+ jump_label_bug(entry, &old, &new);
+ s390_kernel_write(code, &new, sizeof(new));
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- struct insn_args args;
-
- args.entry = entry;
- args.type = type;
-
- stop_machine(__sm_arch_jump_label_transform, &args, NULL);
+ jump_label_transform(entry, type);
+ text_poke_sync();
}
-void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
{
- __jump_label_transform(entry, type);
+ jump_label_transform(entry, type);
+ return true;
}
-#endif
+void arch_jump_label_transform_apply(void)
+{
+ text_poke_sync();
+}
diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c
new file mode 100644
index 000000000000..33130c7daf55
--- /dev/null
+++ b/arch/s390/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+ arch_debugfs_dir = debugfs_create_dir("s390", NULL);
+ return 0;
+}
+postcore_initcall(arch_kdebugfs_init);
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
new file mode 100644
index 000000000000..143e34a4eca5
--- /dev/null
+++ b/arch/s390/kernel/kexec_elf.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ELF loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_kernel_elf(struct kimage *image,
+ struct s390_load_data *data)
+{
+ struct kexec_buf buf = {};
+ const Elf_Ehdr *ehdr;
+ const Elf_Phdr *phdr;
+ Elf_Addr entry;
+ void *kernel;
+ int i, ret;
+
+ kernel = image->kernel_buf;
+ ehdr = (Elf_Ehdr *)kernel;
+ buf.image = image;
+ if (image->type == KEXEC_TYPE_CRASH)
+ entry = STARTUP_KDUMP_OFFSET;
+ else
+ entry = ehdr->e_entry;
+
+ phdr = (void *)ehdr + ehdr->e_phoff;
+ for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ buf.buffer = kernel + phdr->p_offset;
+ buf.bufsz = phdr->p_filesz;
+
+ buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+#endif
+ buf.memsz = phdr->p_memsz;
+ data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
+
+ if (entry - phdr->p_paddr < phdr->p_memsz) {
+ data->kernel_buf = buf.buffer;
+ data->kernel_mem = buf.mem;
+ data->parm = buf.buffer + PARMAREA;
+ }
+
+ ipl_report_add_component(data->report, &buf,
+ IPL_RB_COMPONENT_FLAG_SIGNED |
+ IPL_RB_COMPONENT_FLAG_VERIFIED,
+ IPL_RB_CERT_UNKNOWN);
+ ret = kexec_add_buffer(&buf);
+ if (ret)
+ return ret;
+ }
+
+ return data->memsz ? 0 : -EINVAL;
+}
+
+static void *s390_elf_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ const Elf_Ehdr *ehdr;
+ const Elf_Phdr *phdr;
+ size_t size;
+ int i;
+
+ /* image->fobs->probe already checked for valid ELF magic number. */
+ ehdr = (Elf_Ehdr *)kernel;
+
+ if (ehdr->e_type != ET_EXEC ||
+ ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
+ !elf_check_arch(ehdr))
+ return ERR_PTR(-EINVAL);
+
+ if (!ehdr->e_phnum || ehdr->e_phentsize != sizeof(Elf_Phdr))
+ return ERR_PTR(-EINVAL);
+
+ size = ehdr->e_ehsize + ehdr->e_phoff;
+ size += ehdr->e_phentsize * ehdr->e_phnum;
+ if (size > kernel_len)
+ return ERR_PTR(-EINVAL);
+
+ phdr = (void *)ehdr + ehdr->e_phoff;
+ size = ALIGN(size, phdr->p_align);
+ for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+ if (phdr->p_type == PT_INTERP)
+ return ERR_PTR(-EINVAL);
+
+ if (phdr->p_offset > kernel_len)
+ return ERR_PTR(-EINVAL);
+
+ size += ALIGN(phdr->p_filesz, phdr->p_align);
+ }
+
+ if (size > kernel_len)
+ return ERR_PTR(-EINVAL);
+
+ return kexec_file_add_components(image, kexec_file_add_kernel_elf);
+}
+
+static int s390_elf_probe(const char *buf, unsigned long len)
+{
+ const Elf_Ehdr *ehdr;
+
+ if (len < sizeof(Elf_Ehdr))
+ return -ENOEXEC;
+
+ ehdr = (Elf_Ehdr *)buf;
+
+ /* Only check the ELF magic number here and do proper validity check
+ * in the loader. Any check here that fails would send the erroneous
+ * ELF file to the image loader that does not care what it gets.
+ * (Most likely) causing behavior not intended by the user.
+ */
+ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+ return -ENOEXEC;
+
+ return 0;
+}
+
+const struct kexec_file_ops s390_kexec_elf_ops = {
+ .probe = s390_elf_probe,
+ .load = s390_elf_load,
+#ifdef CONFIG_KEXEC_SIG
+ .verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_SIG */
+};
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
new file mode 100644
index 000000000000..9a439175723c
--- /dev/null
+++ b/arch/s390/kernel/kexec_image.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Image loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_kernel_image(struct kimage *image,
+ struct s390_load_data *data)
+{
+ struct kexec_buf buf = {};
+
+ buf.image = image;
+
+ buf.buffer = image->kernel_buf;
+ buf.bufsz = image->kernel_buf_len;
+
+ buf.mem = 0;
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+#endif
+ buf.memsz = buf.bufsz;
+
+ data->kernel_buf = image->kernel_buf;
+ data->kernel_mem = buf.mem;
+ data->parm = image->kernel_buf + PARMAREA;
+ data->memsz += buf.memsz;
+
+ ipl_report_add_component(data->report, &buf,
+ IPL_RB_COMPONENT_FLAG_SIGNED |
+ IPL_RB_COMPONENT_FLAG_VERIFIED,
+ IPL_RB_CERT_UNKNOWN);
+ return kexec_add_buffer(&buf);
+}
+
+static void *s390_image_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ return kexec_file_add_components(image, kexec_file_add_kernel_image);
+}
+
+static int s390_image_probe(const char *buf, unsigned long len)
+{
+ /* Can't reliably tell if an image is valid. Therefore give the
+ * user whatever he wants.
+ */
+ return 0;
+}
+
+const struct kexec_file_ops s390_kexec_image_ops = {
+ .probe = s390_image_probe,
+ .load = s390_image_load,
+#ifdef CONFIG_KEXEC_SIG
+ .verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_SIG */
+};
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3388b2b2a07d..c450120b4474 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -1,244 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright IBM Corp. 2002, 2006
*
* s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
*/
+#define pr_fmt(fmt) "kprobes: " fmt
+
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
#include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/sections.h>
+#include <linux/extable.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/execmem.h>
+#include <asm/text-patching.h>
+#include <asm/set_memory.h>
+#include <asm/sections.h>
+#include <asm/dis.h>
+#include "entry.h"
DEFINE_PER_CPU(struct kprobe *, current_kprobe);
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
-static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
+void *alloc_insn_page(void)
{
- switch (insn[0] >> 8) {
- case 0x0c: /* bassm */
- case 0x0b: /* bsm */
- case 0x83: /* diag */
- case 0x44: /* ex */
- case 0xac: /* stnsm */
- case 0xad: /* stosm */
- return -EINVAL;
- }
- switch (insn[0]) {
- case 0x0101: /* pr */
- case 0xb25a: /* bsa */
- case 0xb240: /* bakr */
- case 0xb258: /* bsg */
- case 0xb218: /* pc */
- case 0xb228: /* pt */
- case 0xb98d: /* epsw */
- return -EINVAL;
+ void *page;
+
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!page)
+ return NULL;
+ set_memory_rox((unsigned long)page, 1);
+ return page;
+}
+
+static void copy_instruction(struct kprobe *p)
+{
+ kprobe_opcode_t insn[MAX_INSN_SIZE];
+ s64 disp, new_disp;
+ u64 addr, new_addr;
+ unsigned int len;
+
+ len = insn_length(*p->addr >> 8);
+ memcpy(&insn, p->addr, len);
+ p->opcode = insn[0];
+ if (probe_is_insn_relative_long(&insn[0])) {
+ /*
+ * For pc-relative instructions in RIL-b or RIL-c format patch
+ * the RI2 displacement field. The insn slot for the to be
+ * patched instruction is within the same 4GB area like the
+ * original instruction. Therefore the new displacement will
+ * always fit.
+ */
+ disp = *(s32 *)&insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&insn[1] = new_disp;
}
- return 0;
+ s390_kernel_write(p->ainsn.insn, &insn, len);
}
+NOKPROBE_SYMBOL(copy_instruction);
-static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
+/* Check if paddr is at an instruction boundary */
+static bool can_probe(unsigned long paddr)
{
- /* default fixup method */
- int fixup = FIXUP_PSW_NORMAL;
-
- switch (insn[0] >> 8) {
- case 0x05: /* balr */
- case 0x0d: /* basr */
- fixup = FIXUP_RETURN_REGISTER;
- /* if r2 = 0, no branch will be taken */
- if ((insn[0] & 0x0f) == 0)
- fixup |= FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0x06: /* bctr */
- case 0x07: /* bcr */
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0x45: /* bal */
- case 0x4d: /* bas */
- fixup = FIXUP_RETURN_REGISTER;
- break;
- case 0x47: /* bc */
- case 0x46: /* bct */
- case 0x86: /* bxh */
- case 0x87: /* bxle */
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0x82: /* lpsw */
- fixup = FIXUP_NOT_REQUIRED;
- break;
- case 0xb2: /* lpswe */
- if ((insn[0] & 0xff) == 0xb2)
- fixup = FIXUP_NOT_REQUIRED;
- break;
- case 0xa7: /* bras */
- if ((insn[0] & 0x0f) == 0x05)
- fixup |= FIXUP_RETURN_REGISTER;
- break;
- case 0xc0:
- if ((insn[0] & 0x0f) == 0x00 || /* larl */
- (insn[0] & 0x0f) == 0x05) /* brasl */
- fixup |= FIXUP_RETURN_REGISTER;
- break;
- case 0xeb:
- if ((insn[2] & 0xff) == 0x44 || /* bxhg */
- (insn[2] & 0xff) == 0x45) /* bxleg */
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0xe3: /* bctg */
- if ((insn[2] & 0xff) == 0x46)
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
+ unsigned long addr, offset = 0;
+ kprobe_opcode_t insn;
+ struct kprobe *kp;
+
+ if (paddr & 0x01)
+ return false;
+
+ if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
+ return false;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr) {
+ if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(insn)))
+ return false;
+
+ if (insn >> 8 == 0) {
+ if (insn != BREAKPOINT_INSTRUCTION) {
+ /*
+ * Note that QEMU inserts opcode 0x0000 to implement
+ * software breakpoints for guests. Since the size of
+ * the original instruction is unknown, stop following
+ * instructions and prevent setting a kprobe.
+ */
+ return false;
+ }
+ /*
+ * Check if the instruction has been modified by another
+ * kprobe, in which case the original instruction is
+ * decoded.
+ */
+ kp = get_kprobe((void *)addr);
+ if (!kp) {
+ /* not a kprobe */
+ return false;
+ }
+ insn = kp->opcode;
+ }
+ addr += insn_length(insn >> 8);
}
- return fixup;
+ return addr == paddr;
}
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+int arch_prepare_kprobe(struct kprobe *p)
{
- if ((unsigned long) p->addr & 0x01)
+ if (!can_probe((unsigned long)p->addr))
return -EINVAL;
-
/* Make sure the probe isn't going on a difficult instruction */
- if (is_prohibited_opcode(p->addr))
+ if (probe_is_prohibited_opcode(p->addr))
return -EINVAL;
-
- p->opcode = *p->addr;
- memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
-
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+ copy_instruction(p);
return 0;
}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
-struct ins_replace_args {
- kprobe_opcode_t *ptr;
- kprobe_opcode_t opcode;
+struct swap_insn_args {
+ struct kprobe *p;
+ unsigned int arm_kprobe : 1;
};
-static int __kprobes swap_instruction(void *aref)
+static int swap_instruction(void *data)
{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long status = kcb->kprobe_status;
- struct ins_replace_args *args = aref;
+ struct swap_insn_args *args = data;
+ struct kprobe *p = args->p;
+ u16 opc;
- kcb->kprobe_status = KPROBE_SWAP_INST;
- probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode));
- kcb->kprobe_status = status;
+ opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+ s390_kernel_write(p->addr, &opc, sizeof(opc));
return 0;
}
+NOKPROBE_SYMBOL(swap_instruction);
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
{
- struct ins_replace_args args;
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- args.ptr = p->addr;
- args.opcode = BREAKPOINT_INSTRUCTION;
- stop_machine(swap_instruction, &args, NULL);
+ if (cpu_has_seq_insn()) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
{
- struct ins_replace_args args;
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- args.ptr = p->addr;
- args.opcode = p->opcode;
- stop_machine(swap_instruction, &args, NULL);
+ if (cpu_has_seq_insn()) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
{
+ if (!p->ainsn.insn)
+ return;
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
-static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
- struct pt_regs *regs,
- unsigned long ip)
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
{
- struct per_regs per_kprobe;
+ union {
+ struct ctlreg regs[3];
+ struct {
+ struct ctlreg control;
+ struct ctlreg start;
+ struct ctlreg end;
+ };
+ } per_kprobe;
/* Set up the PER control registers %cr9-%cr11 */
- per_kprobe.control = PER_EVENT_IFETCH;
- per_kprobe.start = ip;
- per_kprobe.end = ip;
+ per_kprobe.control.val = PER_EVENT_IFETCH;
+ per_kprobe.start.val = ip;
+ per_kprobe.end.val = ip;
/* Save control regs and psw mask */
- __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+ __local_ctl_store(9, 11, kcb->kprobe_saved_ctl);
kcb->kprobe_saved_imask = regs->psw.mask &
(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
/* Set PER control regs, turns on single step for the given address */
- __ctl_load(per_kprobe, 9, 11);
+ __local_ctl_load(9, 11, per_kprobe.regs);
regs->psw.mask |= PSW_MASK_PER;
regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
- regs->psw.addr = ip | PSW_ADDR_AMODE;
+ regs->psw.addr = ip;
}
+NOKPROBE_SYMBOL(enable_singlestep);
-static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
- struct pt_regs *regs,
- unsigned long ip)
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
{
/* Restore control regs and psw mask, set new psw address */
- __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+ __local_ctl_load(9, 11, kcb->kprobe_saved_ctl);
regs->psw.mask &= ~PSW_MASK_PER;
regs->psw.mask |= kcb->kprobe_saved_imask;
- regs->psw.addr = ip | PSW_ADDR_AMODE;
+ regs->psw.addr = ip;
}
+NOKPROBE_SYMBOL(disable_singlestep);
/*
* Activate a kprobe by storing its pointer to current_kprobe. The
* previous kprobe is stored in kcb->prev_kprobe. A stack of up to
* two kprobes can be active, see KPROBE_REENTER.
*/
-static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
- kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe);
+ kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
kcb->prev_kprobe.status = kcb->kprobe_status;
- __get_cpu_var(current_kprobe) = p;
+ __this_cpu_write(current_kprobe, p);
}
+NOKPROBE_SYMBOL(push_kprobe);
/*
* Deactivate a kprobe by backing up to the previous state. If the
* current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
* for any other state prev_kprobe.kp will be NULL.
*/
-static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb)
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->prev_kprobe.kp = NULL;
}
+NOKPROBE_SYMBOL(pop_kprobe);
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
-{
- ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
-
- /* Replace the return addr with trampoline addr */
- regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
-}
-
-static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
- struct kprobe *p)
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
@@ -253,13 +269,14 @@ static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
* is a BUG. The code path resides in the .kprobes.text
* section and is executed with interrupts disabled.
*/
- printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr);
+ pr_err("Failed to recover from reentered kprobes.\n");
dump_kprobe(p);
BUG();
}
}
+NOKPROBE_SYMBOL(kprobe_reenter_check);
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+static int kprobe_handler(struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb;
struct kprobe *p;
@@ -271,7 +288,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
*/
preempt_disable();
kcb = get_kprobe_ctlblk();
- p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+ p = get_kprobe((void *)(regs->psw.addr - 2));
if (p) {
if (kprobe_running()) {
@@ -292,38 +309,20 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
* If we have no pre-handler or it returned 0, we
* continue with single stepping. If we have a
* pre-handler and it returned non-zero, it prepped
- * for calling the break_handler below on re-entry
- * for jprobe processing, so get out doing nothing
- * more here.
+ * for changing execution path, so get out doing
+ * nothing more here.
*/
push_kprobe(kcb, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (p->pre_handler && p->pre_handler(p, regs))
+ if (p->pre_handler && p->pre_handler(p, regs)) {
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
return 1;
+ }
kcb->kprobe_status = KPROBE_HIT_SS;
}
enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
return 1;
- } else if (kprobe_running()) {
- p = __get_cpu_var(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- /*
- * Continuation after the jprobe completed and
- * caused the jprobe_return trap. The jprobe
- * break_handler "returns" to the original
- * function that still has the kprobe breakpoint
- * installed. We continue with single stepping.
- */
- kcb->kprobe_status = KPROBE_HIT_SS;
- enable_singlestep(kcb, regs,
- (unsigned long) p->ainsn.insn);
- return 1;
- } /* else:
- * No kprobe at this address and the current kprobe
- * has no break handler (no jprobe!). The kernel just
- * exploded, let the standard trap handler pick up the
- * pieces.
- */
} /* else:
* No kprobe at this address and no active kprobe. The trap has
* not been caused by a kprobe breakpoint. The race of breakpoint
@@ -333,111 +332,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
preempt_enable_no_resched();
return 0;
}
-
-/*
- * Function return probe trampoline:
- * - init_kprobes() establishes a probepoint here
- * - When the probed function returns, this probe
- * causes the handlers to fire
- */
-static void __used kretprobe_trampoline_holder(void)
-{
- asm volatile(".global kretprobe_trampoline\n"
- "kretprobe_trampoline: bcr 0,0\n");
-}
-
-/*
- * Called when the probe at kretprobe trampoline is hit
- */
-static int __kprobes trampoline_probe_handler(struct kprobe *p,
- struct pt_regs *regs)
-{
- struct kretprobe_instance *ri;
- struct hlist_head *head, empty_rp;
- struct hlist_node *tmp;
- unsigned long flags, orig_ret_address;
- unsigned long trampoline_address;
- kprobe_opcode_t *correct_ret_addr;
-
- INIT_HLIST_HEAD(&empty_rp);
- kretprobe_hash_lock(current, &head, &flags);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more than one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- ri = NULL;
- orig_ret_address = 0;
- correct_ret_addr = NULL;
- trampoline_address = (unsigned long) &kretprobe_trampoline;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long) ri->ret_addr;
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
- correct_ret_addr = ri->ret_addr;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long) ri->ret_addr;
-
- if (ri->rp && ri->rp->handler) {
- ri->ret_addr = correct_ret_addr;
- ri->rp->handler(ri, regs);
- }
-
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
-
- pop_kprobe(get_kprobe_ctlblk());
- kretprobe_hash_unlock(current, &flags);
- preempt_enable_no_resched();
-
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
-}
+NOKPROBE_SYMBOL(kprobe_handler);
/*
* Called after single-stepping. p->addr is the address of the
@@ -447,17 +342,17 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
- int fixup = get_fixup_type(p->ainsn.insn);
+ unsigned long ip = regs->psw.addr;
+ int fixup = probe_get_fixup_type(p->ainsn.insn);
if (fixup & FIXUP_PSW_NORMAL)
ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
- int ilen = ((p->ainsn.insn[0] >> 14) + 3) & -2;
+ int ilen = insn_length(p->ainsn.insn[0] >> 8);
if (ip - (unsigned long) p->ainsn.insn == ilen)
ip = (unsigned long) p->addr + ilen;
}
@@ -470,8 +365,9 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
disable_singlestep(kcb, regs, ip);
}
+NOKPROBE_SYMBOL(resume_execution);
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+static int post_kprobe_handler(struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
struct kprobe *p = kprobe_running();
@@ -479,12 +375,11 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
if (!p)
return 0;
+ resume_execution(p, regs);
if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
-
- resume_execution(p, regs);
pop_kprobe(kcb);
preempt_enable_no_resched();
@@ -498,17 +393,14 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
return 1;
}
+NOKPROBE_SYMBOL(post_kprobe_handler);
-static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
struct kprobe *p = kprobe_running();
- const struct exception_table_entry *entry;
switch(kcb->kprobe_status) {
- case KPROBE_SWAP_INST:
- /* We are here because the instruction replacement failed */
- return 0;
case KPROBE_HIT_SS:
case KPROBE_REENTER:
/*
@@ -525,32 +417,11 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(p);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (p->fault_handler && p->fault_handler(p, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
- if (entry) {
- regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+ if (fixup_exception(regs))
return 1;
- }
-
/*
* fixup_exception() could not handle it,
* Let do_page_fault() fix it.
@@ -561,8 +432,9 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
}
return 0;
}
+NOKPROBE_SYMBOL(kprobe_trap_handler);
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
int ret;
@@ -573,12 +445,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
return ret;
}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
/*
* Wrapper routine to for handling exceptions.
*/
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
+int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
struct die_args *args = (struct die_args *) data;
struct pt_regs *regs = args->regs;
@@ -610,62 +483,21 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
return ret;
}
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long stack;
-
- memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
-
- /* setup return addr to the jprobe handler routine */
- regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
- regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-
- /* r15 is the stack pointer */
- stack = (unsigned long) regs->gprs[15];
-
- memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
- return 1;
-}
-
-void __kprobes jprobe_return(void)
-{
- asm volatile(".word 0x0002");
-}
-
-static void __used __kprobes jprobe_return_end(void)
-{
- asm volatile("bcr 0,0");
-}
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int __init arch_init_kprobes(void)
{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long stack;
-
- stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15];
-
- /* Put the regs back */
- memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
- /* put the stack back */
- memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack));
- preempt_enable_no_resched();
- return 1;
+ return 0;
}
-static struct kprobe trampoline = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler
-};
-
-int __init arch_init_kprobes(void)
+int __init arch_populate_kprobe_blacklist(void)
{
- return register_kprobe(&trampoline);
+ return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+ (unsigned long)__irqentry_text_end);
}
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
{
- return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
+ return 0;
}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 6ea6d69339b5..6d1ffca5f798 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Linux Guest Relocation (LGR) detection
*
@@ -5,7 +6,8 @@
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <asm/facility.h>
@@ -86,8 +88,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
if (stsi(si, 2, 2, 2))
return;
cpascii(lgr_info->name, si->name, sizeof(si->name));
- memcpy(&lgr_info->lpar_number, &si->lpar_number,
- sizeof(lgr_info->lpar_number));
+ lgr_info->lpar_number = si->lpar_number;
}
/*
@@ -152,21 +153,20 @@ static void lgr_timer_set(void);
/*
* LGR timer callback
*/
-static void lgr_timer_fn(unsigned long ignored)
+static void lgr_timer_fn(struct timer_list *unused)
{
lgr_info_log();
lgr_timer_set();
}
-static struct timer_list lgr_timer =
- TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0);
+static struct timer_list lgr_timer;
/*
* Setup next LGR timer
*/
static void lgr_timer_set(void)
{
- mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+ mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS));
}
/*
@@ -180,7 +180,8 @@ static int __init lgr_init(void)
debug_register_view(lgr_dbf, &debug_hex_ascii_view);
lgr_info_get(&lgr_info_last);
debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+ timer_setup(&lgr_timer, lgr_timer_fn, TIMER_DEFERRABLE);
lgr_timer_set();
return 0;
}
-module_init(lgr_init);
+device_initcall(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ac2178161ec3..baeb3dcfc1c8 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2005, 2011
*
* Author(s): Rolf Adelsberger,
- * Heiko Carstens <heiko.carstens@de.ibm.com>
* Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
@@ -13,20 +13,28 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
-#include <linux/suspend.h>
+#include <linux/cpufeature.h>
+#include <asm/guarded_storage.h>
+#include <asm/machine.h>
+#include <asm/pfault.h>
#include <asm/cio.h>
+#include <asm/fpu.h>
#include <asm/setup.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/smp.h>
-#include <asm/reset.h>
#include <asm/ipl.h>
#include <asm/diag.h>
#include <asm/elf.h>
#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/set_memory.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+#include <asm/sclp.h>
-typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
+typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long);
+typedef int (*purgatory_t)(int);
extern const unsigned char relocate_kernel[];
extern const unsigned long long relocate_kernel_len;
@@ -34,146 +42,145 @@ extern const unsigned long long relocate_kernel_len;
#ifdef CONFIG_CRASH_DUMP
/*
- * Create ELF notes for one CPU
+ * Reset the system, copy boot CPU registers to absolute zero,
+ * and jump to the kdump image
*/
-static void add_elf_notes(int cpu)
+static void __do_machine_kdump(void *data)
{
- struct save_area *sa = (void *) 4608 + store_prefix();
- void *ptr;
+ struct kimage *image = data;
+ purgatory_t purgatory;
+ unsigned long prefix;
+
+ purgatory = (purgatory_t)image->start;
+
+ /* store_status() saved the prefix register to lowcore */
+ prefix = (unsigned long)get_lowcore()->prefixreg_save_area;
+
+ /* Now do the reset */
+ s390_reset_system();
+
+ /*
+ * Copy dump CPU store status info to absolute zero.
+ * This need to be done *after* s390_reset_system set the
+ * prefix register of this CPU to zero
+ */
+ memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
+ phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
+
+ call_nodat(1, int, purgatory, int, 1);
- memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
- ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
- ptr = fill_cpu_elf_notes(ptr, sa);
- memset(ptr, 0, sizeof(struct elf_note));
+ /* Die if kdump returns */
+ disabled_wait();
}
/*
- * Initialize CPU ELF notes
+ * Start kdump: create a LGR log entry, store status of all CPUs and
+ * branch to __do_machine_kdump.
*/
-void setup_regs(void)
+static noinline void __machine_kdump(void *image)
{
- unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
- int cpu, this_cpu;
+ struct mcesa *mcesa;
+ union ctlreg2 cr2_old, cr2_new;
+ int this_cpu, cpu;
+ lgr_info_log();
+ /* Get status of the other CPUs */
this_cpu = smp_find_processor_id(stap());
- add_elf_notes(this_cpu);
for_each_online_cpu(cpu) {
if (cpu == this_cpu)
continue;
if (smp_store_status(cpu))
continue;
- add_elf_notes(cpu);
}
- /* Copy dump CPU store status info to absolute zero */
- memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
-}
-
-/*
- * PM notifier callback for kdump
- */
-static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
- void *ptr)
-{
- switch (action) {
- case PM_SUSPEND_PREPARE:
- case PM_HIBERNATION_PREPARE:
- if (crashk_res.start)
- crash_map_reserved_pages();
- break;
- case PM_POST_SUSPEND:
- case PM_POST_HIBERNATION:
- if (crashk_res.start)
- crash_unmap_reserved_pages();
- break;
- default:
- return NOTIFY_DONE;
+ /* Store status of the boot CPU */
+ mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK);
+ if (cpu_has_vx())
+ save_vx_regs((__vector128 *) mcesa->vector_save_area);
+ if (cpu_has_gs()) {
+ local_ctl_store(2, &cr2_old.reg);
+ cr2_new = cr2_old;
+ cr2_new.gse = 1;
+ local_ctl_load(2, &cr2_new.reg);
+ save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+ local_ctl_load(2, &cr2_old.reg);
}
- return NOTIFY_OK;
+ /*
+ * To create a good backchain for this CPU in the dump store_status
+ * is passed the address of a function. The address is saved into
+ * the PSW save area of the boot CPU and the function is invoked as
+ * a tail call of store_status. The backchain in the dump will look
+ * like this:
+ * restart_int_handler -> __machine_kexec -> __do_machine_kdump
+ * The call to store_status() will not return.
+ */
+ store_status(__do_machine_kdump, image);
}
-static int __init machine_kdump_pm_init(void)
-{
- pm_notifier(machine_kdump_pm_cb, 0);
- return 0;
-}
-arch_initcall(machine_kdump_pm_init);
-#endif
+#endif /* CONFIG_CRASH_DUMP */
/*
- * Start kdump: We expect here that a store status has been done on our CPU
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
*/
-static void __do_machine_kdump(void *image)
+static bool kdump_csum_valid(struct kimage *image)
{
#ifdef CONFIG_CRASH_DUMP
- int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
+ purgatory_t purgatory = (purgatory_t)image->start;
+ int rc;
- setup_regs();
- __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
- start_kdump(1);
+ rc = call_nodat(1, int, purgatory, int, 0);
+ return rc == 0;
+#else
+ return false;
#endif
}
-/*
- * Check if kdump checksums are valid: We call purgatory with parameter "0"
- */
-static int kdump_csum_valid(struct kimage *image)
-{
#ifdef CONFIG_CRASH_DUMP
- int (*start_kdump)(int) = (void *)image->start;
- int rc;
- __arch_local_irq_stnsm(0xfb); /* disable DAT */
- rc = start_kdump(0);
- __arch_local_irq_stosm(0x04); /* enable DAT */
- return rc ? 0 : -EINVAL;
-#else
- return -EINVAL;
-#endif
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr, size;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE)
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+ size = begin - crashk_res.start;
+ if (size)
+ os_info_crashkernel_add(crashk_res.start, size);
+ else
+ os_info_crashkernel_add(0, 0);
}
-/*
- * Map or unmap crashkernel memory
- */
-static void crash_map_pages(int enable)
+static void crash_protect_pages(int protect)
{
- unsigned long size = resource_size(&crashk_res);
-
- BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
- size % KEXEC_CRASH_MEM_ALIGN);
- if (enable)
- vmem_add_mapping(crashk_res.start, size);
- else {
- vmem_remove_mapping(crashk_res.start, size);
- if (size)
- os_info_crashkernel_add(crashk_res.start, size);
- else
- os_info_crashkernel_add(0, 0);
- }
+ unsigned long size;
+
+ if (!crashk_res.end)
+ return;
+ size = resource_size(&crashk_res);
+ if (protect)
+ set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+ else
+ set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
}
-/*
- * Map crashkernel memory
- */
-void crash_map_reserved_pages(void)
+void arch_kexec_protect_crashkres(void)
{
- crash_map_pages(1);
+ crash_protect_pages(1);
}
-/*
- * Unmap crashkernel memory
- */
-void crash_unmap_reserved_pages(void)
+void arch_kexec_unprotect_crashkres(void)
{
- crash_map_pages(0);
+ crash_protect_pages(0);
}
+#endif
+
/*
* Give back memory to hypervisor before new kdump is loaded
*/
static int machine_kexec_prepare_kdump(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
diag10_range(PFN_DOWN(crashk_res.start),
PFN_DOWN(crashk_res.end - crashk_res.start + 1));
return 0;
@@ -186,10 +193,6 @@ int machine_kexec_prepare(struct kimage *image)
{
void *reboot_code_buffer;
- /* Can't replace kernel image since it is read-only. */
- if (ipl_flags & IPL_NSS_VALID)
- return -EOPNOTSUPP;
-
if (image->type == KEXEC_TYPE_CRASH)
return machine_kexec_prepare_kdump();
@@ -198,7 +201,7 @@ int machine_kexec_prepare(struct kimage *image)
return -EINVAL;
/* Get the destination where the assembler code should be copied to.*/
- reboot_code_buffer = (void *) page_to_phys(image->control_code_page);
+ reboot_code_buffer = page_to_virt(image->control_code_page);
/* Then copy it */
memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
@@ -209,19 +212,13 @@ void machine_kexec_cleanup(struct kimage *image)
{
}
-void arch_crash_save_vmcoreinfo(void)
-{
- VMCOREINFO_SYMBOL(lowcore_ptr);
- VMCOREINFO_SYMBOL(high_memory);
- VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-}
-
void machine_shutdown(void)
{
}
void machine_crash_shutdown(struct pt_regs *regs)
{
+ set_os_info_reipl_block();
}
/*
@@ -229,13 +226,23 @@ void machine_crash_shutdown(struct pt_regs *regs)
*/
static void __do_machine_kexec(void *data)
{
- relocate_kernel_t data_mover;
+ unsigned long data_mover, entry, diag308_subcode;
struct kimage *image = data;
- data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
+ data_mover = page_to_phys(image->control_code_page);
+ entry = virt_to_phys(&image->head);
+ diag308_subcode = DIAG308_CLEAR_RESET;
+ if (sclp.has_iplcc)
+ diag308_subcode |= DIAG308_FLAG_EI;
+ s390_reset_system();
- /* Call the moving routine */
- (*data_mover)(&image->head, image->start);
+ call_nodat(3, void, (relocate_kernel_t)data_mover,
+ unsigned long, entry,
+ unsigned long, image->start,
+ unsigned long, diag308_subcode);
+
+ /* Die if kexec returns */
+ disabled_wait();
}
/*
@@ -243,19 +250,14 @@ static void __do_machine_kexec(void *data)
*/
static void __machine_kexec(void *data)
{
- struct kimage *image = data;
-
- __arch_local_irq_stosm(0x04); /* enable DAT */
pfault_fini();
tracing_off();
debug_locks_off();
- if (image->type == KEXEC_TYPE_CRASH) {
- lgr_info_log();
- s390_reset_system(__do_machine_kdump, data);
- } else {
- s390_reset_system(__do_machine_kexec, data);
- }
- disabled_wait((unsigned long) __builtin_return_address(0));
+#ifdef CONFIG_CRASH_DUMP
+ if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH)
+ __machine_kdump(data);
+#endif
+ __do_machine_kexec(data);
}
/*
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
new file mode 100644
index 000000000000..a36d7311c668
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 code for kexec_file_load system call
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#define pr_fmt(fmt) "kexec: " fmt
+
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kexec.h>
+#include <linux/module_signature.h>
+#include <linux/verification.h>
+#include <linux/vmalloc.h>
+#include <asm/boot_data.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &s390_kexec_elf_ops,
+ &s390_kexec_image_ops,
+ NULL,
+};
+
+#ifdef CONFIG_KEXEC_SIG
+int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+ const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+ struct module_signature *ms;
+ unsigned long sig_len;
+ int ret;
+
+ /* Skip signature verification when not secure IPLed. */
+ if (!ipl_secure_flag)
+ return 0;
+
+ if (marker_len > kernel_len)
+ return -EKEYREJECTED;
+
+ if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING,
+ marker_len))
+ return -EKEYREJECTED;
+ kernel_len -= marker_len;
+
+ ms = (void *)kernel + kernel_len - sizeof(*ms);
+ kernel_len -= sizeof(*ms);
+
+ sig_len = be32_to_cpu(ms->sig_len);
+ if (sig_len >= kernel_len)
+ return -EKEYREJECTED;
+ kernel_len -= sig_len;
+
+ if (ms->id_type != PKEY_ID_PKCS7)
+ return -EKEYREJECTED;
+
+ if (ms->algo != 0 ||
+ ms->hash != 0 ||
+ ms->signer_len != 0 ||
+ ms->key_id_len != 0 ||
+ ms->__pad[0] != 0 ||
+ ms->__pad[1] != 0 ||
+ ms->__pad[2] != 0) {
+ return -EBADMSG;
+ }
+
+ ret = verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_SECONDARY_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+ ret = verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
+ return ret;
+}
+#endif /* CONFIG_KEXEC_SIG */
+
+static int kexec_file_update_purgatory(struct kimage *image,
+ struct s390_load_data *data)
+{
+ u64 entry, type;
+ int ret;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ entry = STARTUP_KDUMP_OFFSET;
+ type = KEXEC_TYPE_CRASH;
+ } else {
+ entry = STARTUP_NORMAL_OFFSET;
+ type = KEXEC_TYPE_DEFAULT;
+ }
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry,
+ sizeof(entry), false);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel_type", &type,
+ sizeof(type), false);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH) {
+ u64 crash_size;
+
+ ret = kexec_purgatory_get_set_symbol(image, "crash_start",
+ &crashk_res.start,
+ sizeof(crashk_res.start),
+ false);
+ if (ret)
+ return ret;
+
+ crash_size = crashk_res.end - crashk_res.start + 1;
+ ret = kexec_purgatory_get_set_symbol(image, "crash_size",
+ &crash_size,
+ sizeof(crash_size),
+ false);
+ }
+#endif
+ return ret;
+}
+
+static int kexec_file_add_purgatory(struct kimage *image,
+ struct s390_load_data *data)
+{
+ struct kexec_buf buf = {};
+ int ret;
+
+ buf.image = image;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+#endif
+
+ ret = kexec_load_purgatory(image, &buf);
+ if (ret)
+ return ret;
+ data->memsz += buf.memsz;
+
+ return kexec_file_update_purgatory(image, data);
+}
+
+static int kexec_file_add_initrd(struct kimage *image,
+ struct s390_load_data *data)
+{
+ struct kexec_buf buf = {};
+ int ret;
+
+ buf.image = image;
+
+ buf.buffer = image->initrd_buf;
+ buf.bufsz = image->initrd_buf_len;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+#endif
+ buf.memsz = buf.bufsz;
+
+ data->parm->initrd_start = data->memsz;
+ data->parm->initrd_size = buf.memsz;
+ data->memsz += buf.memsz;
+
+ ret = kexec_add_buffer(&buf);
+ if (ret)
+ return ret;
+
+ return ipl_report_add_component(data->report, &buf, 0, 0);
+}
+
+static int kexec_file_add_ipl_report(struct kimage *image,
+ struct s390_load_data *data)
+{
+ __u32 *lc_ipl_parmblock_ptr;
+ unsigned int len, ncerts;
+ struct kexec_buf buf = {};
+ unsigned long addr;
+ void *ptr, *end;
+ int ret;
+
+ buf.image = image;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+
+ ptr = __va(ipl_cert_list_addr);
+ end = ptr + ipl_cert_list_size;
+ ncerts = 0;
+ while (ptr < end) {
+ ncerts++;
+ len = *(unsigned int *)ptr;
+ ptr += sizeof(len);
+ ptr += len;
+ }
+
+ addr = data->memsz + data->report->size;
+ addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
+ ptr = __va(ipl_cert_list_addr);
+ while (ptr < end) {
+ len = *(unsigned int *)ptr;
+ ptr += sizeof(len);
+ ipl_report_add_certificate(data->report, ptr, addr, len);
+ addr += len;
+ ptr += len;
+ }
+
+ ret = -ENOMEM;
+ buf.buffer = ipl_report_finish(data->report);
+ if (!buf.buffer)
+ goto out;
+ buf.bufsz = data->report->size;
+ buf.memsz = buf.bufsz;
+ image->arch.ipl_buf = buf.buffer;
+
+ data->memsz += buf.memsz;
+
+ lc_ipl_parmblock_ptr =
+ data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
+ *lc_ipl_parmblock_ptr = (__u32)buf.mem;
+
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+#endif
+
+ ret = kexec_add_buffer(&buf);
+out:
+ return ret;
+}
+
+void *kexec_file_add_components(struct kimage *image,
+ int (*add_kernel)(struct kimage *image,
+ struct s390_load_data *data))
+{
+ unsigned long max_command_line_size = LEGACY_COMMAND_LINE_SIZE;
+ struct s390_load_data data = {0};
+ unsigned long minsize;
+ int ret;
+
+ data.report = ipl_report_init(&ipl_block);
+ if (IS_ERR(data.report))
+ return data.report;
+
+ ret = add_kernel(image, &data);
+ if (ret)
+ goto out;
+
+ ret = -EINVAL;
+ minsize = PARMAREA + offsetof(struct parmarea, command_line);
+ if (image->kernel_buf_len < minsize)
+ goto out;
+
+ if (data.parm->max_command_line_size)
+ max_command_line_size = data.parm->max_command_line_size;
+
+ if (minsize + max_command_line_size < minsize)
+ goto out;
+
+ if (image->kernel_buf_len < minsize + max_command_line_size)
+ goto out;
+
+ if (image->cmdline_buf_len >= max_command_line_size)
+ goto out;
+
+ memcpy(data.parm->command_line, image->cmdline_buf,
+ image->cmdline_buf_len);
+
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH) {
+ data.parm->oldmem_base = crashk_res.start;
+ data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
+ }
+#endif
+
+ if (image->initrd_buf) {
+ ret = kexec_file_add_initrd(image, &data);
+ if (ret)
+ goto out;
+ }
+
+ ret = kexec_file_add_purgatory(image, &data);
+ if (ret)
+ goto out;
+
+ if (data.kernel_mem == 0) {
+ unsigned long restart_psw = 0x0008000080000000UL;
+ restart_psw += image->start;
+ memcpy(data.kernel_buf, &restart_psw, sizeof(restart_psw));
+ image->start = 0;
+ }
+
+ ret = kexec_file_add_ipl_report(image, &data);
+out:
+ ipl_report_free(data.report);
+ return ERR_PTR(ret);
+}
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab)
+{
+ const char *strtab, *name, *shstrtab;
+ const Elf_Shdr *sechdrs;
+ Elf_Rela *relas;
+ int i, r_type;
+ int ret;
+
+ /* String & section header string table */
+ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+ strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+ shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
+
+ relas = (void *)pi->ehdr + relsec->sh_offset;
+
+ for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+ const Elf_Sym *sym; /* symbol to relocate */
+ unsigned long addr; /* final location after relocation */
+ unsigned long val; /* relocated symbol value */
+ void *loc; /* tmp location to modify */
+
+ sym = (void *)pi->ehdr + symtab->sh_offset;
+ sym += ELF64_R_SYM(relas[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ if (sym->st_shndx == SHN_UNDEF) {
+ pr_err("Undefined symbol: %s\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx == SHN_COMMON) {
+ pr_err("symbol '%s' in common section\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx >= pi->ehdr->e_shnum &&
+ sym->st_shndx != SHN_ABS) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ }
+
+ loc = pi->purgatory_buf;
+ loc += section->sh_offset;
+ loc += relas[i].r_offset;
+
+ val = sym->st_value;
+ if (sym->st_shndx != SHN_ABS)
+ val += pi->sechdrs[sym->st_shndx].sh_addr;
+ val += relas[i].r_addend;
+
+ addr = section->sh_addr + relas[i].r_offset;
+
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+
+ if (r_type == R_390_PLT32DBL)
+ r_type = R_390_PC32DBL;
+
+ ret = arch_kexec_do_relocs(r_type, loc, val, addr);
+ if (ret) {
+ pr_err("Unknown rela relocation: %d\n", r_type);
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ vfree(image->arch.ipl_buf);
+ image->arch.ipl_buf = NULL;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
new file mode 100644
index 000000000000..b7182cec48dc
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/elf.h>
+#include <asm/kexec.h>
+
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+ unsigned long addr)
+{
+ switch (r_type) {
+ case R_390_NONE:
+ break;
+ case R_390_8: /* Direct 8 bit. */
+ *(u8 *)loc = val;
+ break;
+ case R_390_12: /* Direct 12 bit. */
+ *(u16 *)loc &= 0xf000;
+ *(u16 *)loc |= val & 0xfff;
+ break;
+ case R_390_16: /* Direct 16 bit. */
+ *(u16 *)loc = val;
+ break;
+ case R_390_20: /* Direct 20 bit. */
+ *(u32 *)loc &= 0xf00000ff;
+ *(u32 *)loc |= (val & 0xfff) << 16; /* DL */
+ *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */
+ break;
+ case R_390_32: /* Direct 32 bit. */
+ *(u32 *)loc = val;
+ break;
+ case R_390_64: /* Direct 64 bit. */
+ case R_390_GLOB_DAT:
+ case R_390_JMP_SLOT:
+ *(u64 *)loc = val;
+ break;
+ case R_390_PC16: /* PC relative 16 bit. */
+ *(u16 *)loc = (val - addr);
+ break;
+ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
+ *(u16 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
+ *(u32 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32: /* PC relative 32 bit. */
+ *(u32 *)loc = (val - addr);
+ break;
+ case R_390_PC64: /* PC relative 64 bit. */
+ *(u64 *)loc = (val - addr);
+ break;
+ case R_390_RELATIVE:
+ *(unsigned long *) loc = val;
+ break;
+ default:
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 08dcf21cb8df..1fec370fecf4 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -1,73 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright IBM Corp. 2008, 2009
*
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
*/
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
+#include <asm/ptrace.h>
+#include <asm/march.h>
+
+#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+
+#define STACK_FRAME_SIZE_FREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE)
+#define STACK_FREGS (STACK_FRAME_OVERHEAD)
+#define STACK_FREGS_PTREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS)
+#define STACK_FREGS_PTREGS_GPRS (STACK_FREGS_PTREGS + __PT_GPRS)
+#define STACK_FREGS_PTREGS_PSW (STACK_FREGS_PTREGS + __PT_PSW)
+#define STACK_FREGS_PTREGS_ORIG_GPR2 (STACK_FREGS_PTREGS + __PT_ORIG_GPR2)
+#define STACK_FREGS_PTREGS_FLAGS (STACK_FREGS_PTREGS + __PT_FLAGS)
+
+/* packed stack: allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE 24
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+ GEN_BR_THUNK %r1
+ GEN_BR_THUNK %r14
.section .kprobes.text, "ax"
-ENTRY(ftrace_stub)
- br %r14
+SYM_FUNC_START(ftrace_stub)
+ BR_EX %r14
+SYM_FUNC_END(ftrace_stub)
+
+SYM_CODE_START(ftrace_stub_direct_tramp)
+ lgr %r1, %r0
+ BR_EX %r1
+SYM_CODE_END(ftrace_stub_direct_tramp)
+
+ .macro ftrace_regs_entry, allregs=0
+ stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
+
+ .if \allregs == 1
+ # save psw mask
+ # don't put any instructions clobbering CC before this point
+ epsw %r1,%r14
+ risbg %r14,%r1,0,31,32
+ .endif
+
+ lgr %r1,%r15
+ # allocate stack frame for ftrace_caller to contain traced function
+ aghi %r15,-TRACED_FUNC_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r0,(__SF_GPRS+8*8)(%r15)
+ stg %r15,(__SF_GPRS+9*8)(%r15)
+ # allocate ftrace_regs and stack frame for ftrace_trace_function
+ aghi %r15,-STACK_FRAME_SIZE_FREGS
+ stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+ xc STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+
+ .if \allregs == 1
+ stg %r14,(STACK_FREGS_PTREGS_PSW)(%r15)
+ mvghi STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+ .else
+ xc STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15)
+ .endif
+
+ lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address
+ aghi %r1,-TRACED_FUNC_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+ stmg %r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+ .endm
-ENTRY(_mcount)
-#ifdef CONFIG_DYNAMIC_FTRACE
- br %r14
+SYM_CODE_START(ftrace_regs_caller)
+ ftrace_regs_entry 1
+ j ftrace_common
+SYM_CODE_END(ftrace_regs_caller)
-ENTRY(ftrace_caller)
+SYM_CODE_START(ftrace_caller)
+ ftrace_regs_entry 0
+ j ftrace_common
+SYM_CODE_END(ftrace_caller)
+
+SYM_CODE_START(ftrace_common)
+#ifdef MARCH_HAS_Z196_FEATURES
+ aghik %r2,%r0,-MCOUNT_INSN_SIZE
+ lgrl %r4,function_trace_op
+ lgrl %r1,ftrace_func
+#else
+ lgr %r2,%r0
+ aghi %r2,-MCOUNT_INSN_SIZE
+ larl %r4,function_trace_op
+ lg %r4,0(%r4)
+ larl %r1,ftrace_func
+ lg %r1,0(%r1)
#endif
- stm %r2,%r5,16(%r15)
- bras %r1,2f
-0: .long ftrace_trace_function
-1: .long function_trace_stop
-2: l %r2,1b-0b(%r1)
- icm %r2,0xf,0(%r2)
- jnz 3f
- st %r14,56(%r15)
- lr %r0,%r15
- ahi %r15,-96
- l %r3,100(%r15)
- la %r2,0(%r14)
- st %r0,__SF_BACKCHAIN(%r15)
- la %r3,0(%r3)
- ahi %r2,-MCOUNT_INSN_SIZE
- l %r14,0b-0b(%r1)
- l %r14,0(%r14)
- basr %r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- l %r2,100(%r15)
- l %r3,152(%r15)
-ENTRY(ftrace_graph_caller)
-# The bras instruction gets runtime patched to call prepare_ftrace_return.
-# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
-# bras %r14,prepare_ftrace_return
- bras %r14,0f
-0: st %r2,100(%r15)
+ lgr %r3,%r14
+ la %r5,STACK_FREGS(%r15)
+ BASR_EX %r14,%r1
+ lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+#ifdef MARCH_HAS_Z196_FEATURES
+ ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+ locgrz %r1,%r0
+#else
+ lg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+ ltgr %r1,%r1
+ jnz 0f
+ lgr %r1,%r0
#endif
- ahi %r15,96
- l %r14,56(%r15)
-3: lm %r2,%r5,16(%r15)
- br %r14
+0: lmg %r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+ BR_EX %r1
+SYM_CODE_END(ftrace_common)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(return_to_handler)
- stm %r2,%r5,16(%r15)
- st %r14,56(%r15)
- lr %r0,%r15
- ahi %r15,-96
- st %r0,__SF_BACKCHAIN(%r15)
- bras %r1,0f
- .long ftrace_return_to_handler
-0: l %r2,0b-0b(%r1)
- basr %r14,%r2
- lr %r14,%r2
- ahi %r15,96
- lm %r2,%r5,16(%r15)
- br %r14
+SYM_FUNC_START(return_to_handler)
+ stmg %r2,%r5,32(%r15)
+ lgr %r1,%r15
+ # allocate ftrace_regs and stack frame for ftrace_return_to_handler
+ aghi %r15,-STACK_FRAME_SIZE_FREGS
+ stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+ stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
+ brasl %r14,ftrace_return_to_handler
+ aghi %r15,STACK_FRAME_SIZE_FREGS
+ lgr %r14,%r2
+ lmg %r2,%r5,32(%r15)
+ BR_EX %r14
+SYM_FUNC_END(return_to_handler)
#endif
+#endif /* CONFIG_FUNCTION_TRACER */
+
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_br)
+ lmg %r0,%r1,2(%r1)
+ br %r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_br_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_br)
+
+#ifdef CONFIG_EXPOLINE
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_exrl)
+ lmg %r0,%r1,2(%r1)
+ exrl %r0,0f
+ j .
+0: br %r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_exrl_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl)
+#endif /* CONFIG_EXPOLINE */
+
+#ifdef CONFIG_RETHOOK
+
+SYM_CODE_START(arch_rethook_trampoline)
+ stg %r14,(__SF_GPRS+8*8)(%r15)
+ lay %r15,-STACK_FRAME_SIZE_PTREGS(%r15)
+ stmg %r0,%r14,STACK_PTREGS_GPRS(%r15)
+
+ # store original stack pointer in backchain and pt_regs
+ lay %r7,STACK_FRAME_SIZE_PTREGS(%r15)
+ stg %r7,__SF_BACKCHAIN(%r15)
+ stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15)
+
+ # store full psw
+ epsw %r2,%r3
+ risbg %r3,%r2,0,31,32
+ stg %r3,STACK_PTREGS_PSW(%r15)
+ larl %r1,arch_rethook_trampoline
+ stg %r1,STACK_PTREGS_PSW+8(%r15)
+
+ lay %r2,STACK_PTREGS(%r15)
+ brasl %r14,arch_rethook_trampoline_callback
+
+ mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
+ lmg %r0,%r15,STACK_PTREGS_GPRS(%r15)
+ lpswe __SF_EMPTY(%r15)
+SYM_CODE_END(arch_rethook_trampoline)
+
+#endif /* CONFIG_RETHOOK */
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
deleted file mode 100644
index 1c52eae3396a..000000000000
--- a/arch/s390/kernel/mcount64.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright IBM Corp. 2008, 2009
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-
- .section .kprobes.text, "ax"
-
-ENTRY(ftrace_stub)
- br %r14
-
-ENTRY(_mcount)
-#ifdef CONFIG_DYNAMIC_FTRACE
- br %r14
-
-ENTRY(ftrace_caller)
-#endif
- larl %r1,function_trace_stop
- icm %r1,0xf,0(%r1)
- bnzr %r14
- stmg %r2,%r5,32(%r15)
- stg %r14,112(%r15)
- lgr %r1,%r15
- aghi %r15,-160
- stg %r1,__SF_BACKCHAIN(%r15)
- lgr %r2,%r14
- lg %r3,168(%r15)
- aghi %r2,-MCOUNT_INSN_SIZE
- larl %r14,ftrace_trace_function
- lg %r14,0(%r14)
- basr %r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- lg %r2,168(%r15)
- lg %r3,272(%r15)
-ENTRY(ftrace_graph_caller)
-# The bras instruction gets runtime patched to call prepare_ftrace_return.
-# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
-# bras %r14,prepare_ftrace_return
- bras %r14,0f
-0: stg %r2,168(%r15)
-#endif
- aghi %r15,160
- lmg %r2,%r5,32(%r15)
- lg %r14,112(%r15)
- br %r14
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-ENTRY(return_to_handler)
- stmg %r2,%r5,32(%r15)
- lgr %r1,%r15
- aghi %r15,-160
- stg %r1,__SF_BACKCHAIN(%r15)
- brasl %r14,ftrace_return_to_handler
- aghi %r15,160
- lgr %r14,%r2
- lmg %r2,%r5,32(%r15)
- br %r14
-
-#endif
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 7845e15a17df..9d1f8a50f5a4 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Kernel module help for s390.
*
@@ -8,29 +9,27 @@
*
* based on i386 version
* Copyright (C) 2001 Rusty Russell.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/elf.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/kasan.h>
#include <linux/moduleloader.h>
#include <linux/bug.h>
+#include <linux/memory.h>
+#include <linux/execmem.h>
+#include <asm/arch-stackprotector.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/facility.h>
+#include <asm/ftrace.lds.h>
+#include <asm/set_memory.h>
+#include <asm/setup.h>
+#include <asm/asm-offsets.h>
#if 0
#define DEBUGP printk
@@ -38,31 +37,23 @@
#define DEBUGP(fmt , ...)
#endif
-#ifndef CONFIG_64BIT
-#define PLT_ENTRY_SIZE 12
-#else /* CONFIG_64BIT */
-#define PLT_ENTRY_SIZE 20
-#endif /* CONFIG_64BIT */
+#define PLT_ENTRY_SIZE 22
-#ifdef CONFIG_64BIT
-void *module_alloc(unsigned long size)
+#ifdef CONFIG_FUNCTION_TRACER
+void module_arch_cleanup(struct module *mod)
{
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL, -1,
- __builtin_return_address(0));
+ execmem_free(mod->arch.trampolines_start);
}
#endif
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_arch_freeing_init(struct module *mod)
{
- if (mod) {
- vfree(mod->arch.syminfo);
- mod->arch.syminfo = NULL;
- }
- vfree(module_region);
+ if (is_livepatch_module(mod) &&
+ mod->state == MODULE_STATE_LIVE)
+ return;
+
+ vfree(mod->arch.syminfo);
+ mod->arch.syminfo = NULL;
}
static void check_rela(Elf_Rela *rela, struct module *me)
@@ -122,6 +113,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
Elf_Rela *rela;
char *strings;
int nrela, i, j;
+ struct module_memory *mod_mem;
/* Find symbol table and string table. */
symtab = NULL;
@@ -138,8 +130,8 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
/* Allocate one syminfo structure per symbol. */
me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
- me->arch.syminfo = vmalloc(me->arch.nsyms *
- sizeof(struct mod_arch_syminfo));
+ me->arch.syminfo = vmalloc(array_size(sizeof(struct mod_arch_syminfo),
+ me->arch.nsyms));
if (!me->arch.syminfo)
return -ENOMEM;
symbols = (void *) hdr + symtab->sh_offset;
@@ -169,19 +161,26 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
/* Increase core size by size of got & plt and set start
offsets for got and plt. */
- me->core_size = ALIGN(me->core_size, 4);
- me->arch.got_offset = me->core_size;
- me->core_size += me->arch.got_size;
- me->arch.plt_offset = me->core_size;
- me->core_size += me->arch.plt_size;
+ mod_mem = &me->mem[MOD_TEXT];
+ mod_mem->size = ALIGN(mod_mem->size, 4);
+ me->arch.got_offset = mod_mem->size;
+ mod_mem->size += me->arch.got_size;
+ me->arch.plt_offset = mod_mem->size;
+ if (me->arch.plt_size) {
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
+ me->arch.plt_size += PLT_ENTRY_SIZE;
+ mod_mem->size += me->arch.plt_size;
+ }
return 0;
}
static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
- int sign, int bits, int shift)
+ int sign, int bits, int shift,
+ void *(*write)(void *dest, const void *src, size_t len))
{
unsigned long umax;
long min, max;
+ void *dest = (void *)loc;
if (val & ((1UL << shift) - 1))
return -ENOEXEC;
@@ -198,26 +197,33 @@ static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
return -ENOEXEC;
}
- if (bits == 8)
- *(unsigned char *) loc = val;
- else if (bits == 12)
- *(unsigned short *) loc = (val & 0xfff) |
+ if (bits == 8) {
+ unsigned char tmp = val;
+ write(dest, &tmp, 1);
+ } else if (bits == 12) {
+ unsigned short tmp = (val & 0xfff) |
(*(unsigned short *) loc & 0xf000);
- else if (bits == 16)
- *(unsigned short *) loc = val;
- else if (bits == 20)
- *(unsigned int *) loc = (val & 0xfff) << 16 |
- (val & 0xff000) >> 4 |
- (*(unsigned int *) loc & 0xf00000ff);
- else if (bits == 32)
- *(unsigned int *) loc = val;
- else if (bits == 64)
- *(unsigned long *) loc = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 16) {
+ unsigned short tmp = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 20) {
+ unsigned int tmp = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff);
+ write(dest, &tmp, 4);
+ } else if (bits == 32) {
+ unsigned int tmp = val;
+ write(dest, &tmp, 4);
+ } else if (bits == 64) {
+ unsigned long tmp = val;
+ write(dest, &tmp, 8);
+ }
return 0;
}
static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
- const char *strtab, struct module *me)
+ const char *strtab, struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
struct mod_arch_syminfo *info;
Elf_Addr loc, val;
@@ -245,17 +251,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_64: /* Direct 64 bit. */
val += rela->r_addend;
if (r_type == R_390_8)
- rc = apply_rela_bits(loc, val, 0, 8, 0);
+ rc = apply_rela_bits(loc, val, 0, 8, 0, write);
else if (r_type == R_390_12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_PC16: /* PC relative 16 bit. */
case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
@@ -264,15 +270,15 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PC64: /* PC relative 64 bit. */
val += rela->r_addend - loc;
if (r_type == R_390_PC16)
- rc = apply_rela_bits(loc, val, 1, 16, 0);
+ rc = apply_rela_bits(loc, val, 1, 16, 0, write);
else if (r_type == R_390_PC16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PC32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PC32)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_PC64)
- rc = apply_rela_bits(loc, val, 1, 64, 0);
+ rc = apply_rela_bits(loc, val, 1, 64, 0, write);
break;
case R_390_GOT12: /* 12 bit GOT offset. */
case R_390_GOT16: /* 16 bit GOT offset. */
@@ -287,33 +293,34 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
if (info->got_initialized == 0) {
- Elf_Addr *gotent;
+ Elf_Addr *gotent = me->mem[MOD_TEXT].base +
+ me->arch.got_offset +
+ info->got_offset;
- gotent = me->module_core + me->arch.got_offset +
- info->got_offset;
- *gotent = val;
+ write(gotent, &val, sizeof(*gotent));
info->got_initialized = 1;
}
val = info->got_offset + rela->r_addend;
if (r_type == R_390_GOT12 ||
r_type == R_390_GOTPLT12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_GOT16 ||
r_type == R_390_GOTPLT16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOT20 ||
r_type == R_390_GOTPLT20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_GOT32 ||
r_type == R_390_GOTPLT32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOT64 ||
r_type == R_390_GOTPLT64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
- val += (Elf_Addr) me->module_core - loc;
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ val += (Elf_Addr)me->mem[MOD_TEXT].base +
+ me->arch.got_offset - loc;
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
@@ -324,20 +331,28 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int *ip;
- ip = me->module_core + me->arch.plt_offset +
- info->plt_offset;
-#ifndef CONFIG_64BIT
- ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */
- ip[1] = 0x100607f1;
- ip[2] = val;
-#else /* CONFIG_64BIT */
- ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
- ip[1] = 0x100a0004;
- ip[2] = 0x07f10000;
- ip[3] = (unsigned int) (val >> 32);
- ip[4] = (unsigned int) val;
-#endif /* CONFIG_64BIT */
+ unsigned char insn[PLT_ENTRY_SIZE];
+ char *plt_base;
+ char *ip;
+
+ plt_base = me->mem[MOD_TEXT].base + me->arch.plt_offset;
+ ip = plt_base + info->plt_offset;
+ *(int *)insn = 0x0d10e310; /* basr 1,0 */
+ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
+ char *jump_r1;
+
+ jump_r1 = plt_base + me->arch.plt_size -
+ PLT_ENTRY_SIZE;
+ /* brcl 0xf,__jump_r1 */
+ *(short *)&insn[8] = 0xc0f4;
+ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
+ } else {
+ *(int *)&insn[8] = 0x07f10000; /* br %r1 */
+ }
+ *(long *)&insn[14] = val;
+
+ write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
}
if (r_type == R_390_PLTOFF16 ||
@@ -350,44 +365,44 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val - loc + 0xffffUL < 0x1ffffeUL) ||
(r_type == R_390_PLT32DBL &&
val - loc + 0xffffffffULL < 0x1fffffffeULL)))
- val = (Elf_Addr) me->module_core +
+ val = (Elf_Addr) me->mem[MOD_TEXT].base +
me->arch.plt_offset +
info->plt_offset;
val += rela->r_addend - loc;
}
if (r_type == R_390_PLT16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PLTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_PLT32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PLT32 ||
r_type == R_390_PLTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_PLT64 ||
r_type == R_390_PLTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTOFF16: /* 16 bit offset to GOT. */
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
case R_390_GOTOFF64: /* 64 bit offset to GOT. */
val = val + rela->r_addend -
- ((Elf_Addr) me->module_core + me->arch.got_offset);
+ ((Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
- val = (Elf_Addr) me->module_core + me->arch.got_offset +
+ val = (Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_GOTPCDBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
break;
case R_390_COPY:
case R_390_GLOB_DAT: /* Create GOT entry. */
@@ -411,9 +426,10 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
return 0;
}
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
unsigned int symindex, unsigned int relsec,
- struct module *me)
+ struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
Elf_Addr base;
Elf_Sym *symtab;
@@ -429,18 +445,103 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
for (i = 0; i < n; i++, rela++) {
- rc = apply_rela(rela, base, symtab, strtab, me);
+ rc = apply_rela(rela, base, symtab, strtab, me, write);
if (rc)
return rc;
}
return 0;
}
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
+{
+ bool early = me->state == MODULE_STATE_UNFORMED;
+ void *(*write)(void *, const void *, size_t) = memcpy;
+
+ if (!early)
+ write = s390_kernel_write;
+
+ return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+ write);
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
+ const Elf_Shdr *s)
+{
+ char *start, *end;
+ int numpages;
+ size_t size;
+
+ size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
+ numpages = DIV_ROUND_UP(size, PAGE_SIZE);
+ start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
+ if (!start)
+ return -ENOMEM;
+ set_memory_rox((unsigned long)start, numpages);
+ end = start + size;
+
+ me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start;
+ me->arch.trampolines_end = (struct ftrace_hotpatch_trampoline *)end;
+ me->arch.next_trampoline = me->arch.trampolines_start;
+
+ return 0;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
- vfree(me->arch.syminfo);
- me->arch.syminfo = NULL;
- return 0;
+ const Elf_Shdr *s;
+ char *secstrings, *secname;
+ void *aseg;
+ int rc = 0;
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ !nospec_disable && me->arch.plt_size) {
+ unsigned int *ij;
+
+ ij = me->mem[MOD_TEXT].base + me->arch.plt_offset +
+ me->arch.plt_size - PLT_ENTRY_SIZE;
+ ij[0] = 0xc6000000; /* exrl %r0,.+10 */
+ ij[1] = 0x0005a7f4; /* j . */
+ ij[2] = 0x000007f1; /* br %r1 */
+ }
+
+ secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+ aseg = (void *) s->sh_addr;
+ secname = secstrings + s->sh_name;
+
+ if (!strcmp(".altinstructions", secname))
+ /* patch .altinstructions */
+ apply_alternatives(aseg, aseg + s->sh_size);
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (str_has_prefix(secname, ".s390_indirect")))
+ nospec_revert(aseg, aseg + s->sh_size);
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (str_has_prefix(secname, ".s390_return")))
+ nospec_revert(aseg, aseg + s->sh_size);
+
+ if (IS_ENABLED(CONFIG_STACKPROTECTOR) &&
+ (str_has_prefix(secname, "__stack_protector_loc"))) {
+ rc = stack_protector_apply(aseg, aseg + s->sh_size);
+ if (rc)
+ break;
+ }
+
+#ifdef CONFIG_FUNCTION_TRACER
+ if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) {
+ rc = module_alloc_ftrace_hotpatch_trampolines(me, s);
+ if (rc)
+ break;
+ }
+#endif /* CONFIG_FUNCTION_TRACER */
+ }
+
+ return rc;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 504175ebf8b0..a55abbf65333 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Machine check handler
*
@@ -5,59 +6,235 @@
* Author(s): Ingo Adlung <adlung@de.ibm.com>,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
* Cornelia Huck <cornelia.huck@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#include <linux/kernel_stat.h>
+#include <linux/utsname.h>
+#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/entry-common.h>
#include <linux/hardirq.h>
+#include <linux/log2.h>
+#include <linux/kprobes.h>
+#include <linux/kmemleak.h>
#include <linux/time.h>
#include <linux/module.h>
+#include <linux/sched/signal.h>
+#include <linux/kvm_host.h>
#include <asm/lowcore.h>
+#include <asm/ctlreg.h>
+#include <asm/fpu.h>
#include <asm/smp.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
#include <asm/cputime.h>
#include <asm/nmi.h>
#include <asm/crw.h>
+#include <asm/asm-offsets.h>
+#include <asm/pai.h>
+#include <asm/vtime.h>
struct mcck_struct {
- int kill_task;
- int channel_report;
- int warning;
- unsigned long long mcck_code;
+ unsigned int kill_task : 1;
+ unsigned int channel_report : 1;
+ unsigned int warning : 1;
+ unsigned int stp_queue : 1;
+ unsigned long mcck_code;
};
static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
-static void s390_handle_damage(char *msg)
+static inline int nmi_needs_mcesa(void)
{
- smp_send_stop();
- disabled_wait((unsigned long) __builtin_return_address(0));
- while (1);
+ return cpu_has_vx() || cpu_has_gs();
}
/*
- * Main machine check handler function. Will be called with interrupts enabled
- * or disabled and machine checks enabled or disabled.
+ * The initial machine check extended save area for the boot CPU.
+ * It will be replaced on the boot CPU reinit with an allocated
+ * structure. The structure is required for machine check happening
+ * early in the boot process.
+ */
+static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
+
+void __init nmi_alloc_mcesa_early(u64 *mcesad)
+{
+ if (!nmi_needs_mcesa())
+ return;
+ *mcesad = __pa(&boot_mcesa);
+ if (cpu_has_gs())
+ *mcesad |= ilog2(MCESA_MAX_SIZE);
+}
+
+int nmi_alloc_mcesa(u64 *mcesad)
+{
+ unsigned long size;
+ void *origin;
+
+ *mcesad = 0;
+ if (!nmi_needs_mcesa())
+ return 0;
+ size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
+ origin = kmalloc(size, GFP_KERNEL);
+ if (!origin)
+ return -ENOMEM;
+ /* The pointer is stored with mcesa_bits ORed in */
+ kmemleak_not_leak(origin);
+ *mcesad = __pa(origin);
+ if (cpu_has_gs())
+ *mcesad |= ilog2(MCESA_MAX_SIZE);
+ return 0;
+}
+
+void nmi_free_mcesa(u64 *mcesad)
+{
+ if (!nmi_needs_mcesa())
+ return;
+ kfree(__va(*mcesad & MCESA_ORIGIN_MASK));
+}
+
+static __always_inline char *nmi_puts(char *dest, const char *src)
+{
+ while (*src)
+ *dest++ = *src++;
+ *dest = 0;
+ return dest;
+}
+
+static __always_inline char *u64_to_hex(char *dest, u64 val)
+{
+ int i, num;
+
+ for (i = 1; i <= 16; i++) {
+ num = (val >> (64 - 4 * i)) & 0xf;
+ if (num >= 10)
+ *dest++ = 'A' + num - 10;
+ else
+ *dest++ = '0' + num;
+ }
+ *dest = 0;
+ return dest;
+}
+
+static notrace void nmi_print_info(void)
+{
+ struct lowcore *lc = get_lowcore();
+ char message[100];
+ char *ptr;
+ int i;
+
+ ptr = nmi_puts(message, "Unrecoverable machine check, code: ");
+ ptr = u64_to_hex(ptr, lc->mcck_interruption_code);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, init_utsname()->release);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, arch_hw_string);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "PSW: ");
+ ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask);
+ ptr = nmi_puts(ptr, " ");
+ ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr);
+ ptr = nmi_puts(ptr, " PFX: ");
+ ptr = u64_to_hex(ptr, (u64)get_lowcore());
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "LBA: ");
+ ptr = u64_to_hex(ptr, lc->last_break_save_area);
+ ptr = nmi_puts(ptr, " EDC: ");
+ ptr = u64_to_hex(ptr, lc->external_damage_code);
+ ptr = nmi_puts(ptr, " FSA: ");
+ ptr = u64_to_hex(ptr, lc->failing_storage_address);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "CRS:\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ for (i = 0; i < 16; i++) {
+ ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val);
+ ptr = nmi_puts(ptr, " ");
+ if ((i + 1) % 4 == 0) {
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ }
+ }
+
+ ptr = nmi_puts(message, "GPRS:\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ for (i = 0; i < 16; i++) {
+ ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]);
+ ptr = nmi_puts(ptr, " ");
+ if ((i + 1) % 4 == 0) {
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ }
+ }
+
+ ptr = nmi_puts(message, "System stopped\n");
+ sclp_emergency_printk(message);
+}
+
+static notrace void __noreturn s390_handle_damage(void)
+{
+ struct lowcore *lc = get_lowcore();
+ union ctlreg0 cr0, cr0_new;
+ psw_t psw_save;
+
+ smp_emergency_stop();
+ diag_amode31_ops.diag308_reset();
+
+ /*
+ * Disable low address protection and make machine check new PSW a
+ * disabled wait PSW. Any additional machine check cannot be handled.
+ */
+ local_ctl_store(0, &cr0.reg);
+ cr0_new = cr0;
+ cr0_new.lap = 0;
+ local_ctl_load(0, &cr0_new.reg);
+ psw_save = lc->mcck_new_psw;
+ psw_bits(lc->mcck_new_psw).io = 0;
+ psw_bits(lc->mcck_new_psw).ext = 0;
+ psw_bits(lc->mcck_new_psw).wait = 1;
+ nmi_print_info();
+
+ /*
+ * Restore machine check new PSW and control register 0 to original
+ * values. This makes possible system dump analysis easier.
+ */
+ lc->mcck_new_psw = psw_save;
+ local_ctl_load(0, &cr0.reg);
+ disabled_wait();
+}
+NOKPROBE_SYMBOL(s390_handle_damage);
+
+/*
+ * Main machine check handler function. Will be called with interrupts disabled
+ * and machine checks enabled.
*/
void s390_handle_mcck(void)
{
- unsigned long flags;
struct mcck_struct mcck;
+ unsigned long mflags;
/*
* Disable machine checks and get the current state of accumulated
* machine checks. Afterwards delete the old state and enable machine
* checks again.
*/
- local_irq_save(flags);
- local_mcck_disable();
- mcck = __get_cpu_var(cpu_mcck);
- memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
- clear_thread_flag(TIF_MCCK_PENDING);
- local_mcck_enable();
- local_irq_restore(flags);
+ local_mcck_save(mflags);
+ mcck = *this_cpu_ptr(&cpu_mcck);
+ memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
+ local_mcck_restore(mflags);
if (mcck.channel_report)
crw_handle_channel_report();
@@ -74,171 +251,112 @@ void s390_handle_mcck(void)
static int mchchk_wng_posted = 0;
/* Use single cpu clear, as we cannot handle smp here. */
- __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
+ local_ctl_clear_bit(14, CR14_WARNING_SUBMASK_BIT);
if (xchg(&mchchk_wng_posted, 1) == 0)
kill_cad_pid(SIGPWR, 1);
}
+ if (mcck.stp_queue)
+ stp_queue_work();
if (mcck.kill_task) {
- local_irq_enable();
printk(KERN_EMERG "mcck: Terminating task because of machine "
- "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+ "malfunction (code 0x%016lx).\n", mcck.mcck_code);
printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
current->comm, current->pid);
- do_exit(SIGSEGV);
+ if (is_global_init(current))
+ panic("mcck: Attempting to kill init!\n");
+ do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID);
}
}
-EXPORT_SYMBOL_GPL(s390_handle_mcck);
-/*
- * returns 0 if all registers could be validated
- * returns 1 otherwise
+/**
+ * nmi_registers_valid - verify if registers are valid
+ * @mci: machine check interruption code
+ *
+ * Inspect a machine check interruption code and verify if all required
+ * registers are valid. For some registers the corresponding validity bit is
+ * ignored and the registers are set to the expected value.
+ * Returns true if all registers are valid, otherwise false.
*/
-static int notrace s390_revalidate_registers(struct mci *mci)
+static bool notrace nmi_registers_valid(union mci mci)
{
- int kill_task;
- u64 zero;
- void *fpt_save_area, *fpt_creg_save_area;
-
- kill_task = 0;
- zero = 0;
+ union ctlreg2 cr2;
- if (!mci->gr) {
- /*
- * General purpose registers couldn't be restored and have
- * unknown contents. Process needs to be terminated.
- */
- kill_task = 1;
- }
- if (!mci->fp) {
- /*
- * Floating point registers can't be restored and
- * therefore the process needs to be terminated.
- */
- kill_task = 1;
- }
-#ifndef CONFIG_64BIT
- asm volatile(
- " ld 0,0(%0)\n"
- " ld 2,8(%0)\n"
- " ld 4,16(%0)\n"
- " ld 6,24(%0)"
- : : "a" (&S390_lowcore.floating_pt_save_area));
-#endif
-
- if (MACHINE_HAS_IEEE) {
-#ifdef CONFIG_64BIT
- fpt_save_area = &S390_lowcore.floating_pt_save_area;
- fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
-#else
- fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
- fpt_creg_save_area = fpt_save_area + 128;
-#endif
- if (!mci->fc) {
- /*
- * Floating point control register can't be restored.
- * Task will be terminated.
- */
- asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
- kill_task = 1;
-
- } else
- asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
-
- asm volatile(
- " ld 0,0(%0)\n"
- " ld 1,8(%0)\n"
- " ld 2,16(%0)\n"
- " ld 3,24(%0)\n"
- " ld 4,32(%0)\n"
- " ld 5,40(%0)\n"
- " ld 6,48(%0)\n"
- " ld 7,56(%0)\n"
- " ld 8,64(%0)\n"
- " ld 9,72(%0)\n"
- " ld 10,80(%0)\n"
- " ld 11,88(%0)\n"
- " ld 12,96(%0)\n"
- " ld 13,104(%0)\n"
- " ld 14,112(%0)\n"
- " ld 15,120(%0)\n"
- : : "a" (fpt_save_area));
- }
- /* Revalidate access registers */
- asm volatile(
- " lam 0,15,0(%0)"
- : : "a" (&S390_lowcore.access_regs_save_area));
- if (!mci->ar) {
- /*
- * Access registers have unknown contents.
- * Terminating task.
- */
- kill_task = 1;
- }
- /* Revalidate control registers */
- if (!mci->cr) {
- /*
- * Control registers have unknown contents.
- * Can't recover and therefore stopping machine.
- */
- s390_handle_damage("invalid control registers.");
- } else {
-#ifdef CONFIG_64BIT
- asm volatile(
- " lctlg 0,15,0(%0)"
- : : "a" (&S390_lowcore.cregs_save_area));
-#else
- asm volatile(
- " lctl 0,15,0(%0)"
- : : "a" (&S390_lowcore.cregs_save_area));
-#endif
- }
/*
- * We don't even try to revalidate the TOD register, since we simply
- * can't write something sensible into that register.
+ * The getcpu vdso syscall reads the CPU number from the programmable
+ * field of the TOD clock. Disregard the TOD programmable register
+ * validity bit and load the CPU number into the TOD programmable field
+ * unconditionally.
*/
-#ifdef CONFIG_64BIT
+ set_tod_programmable_field(raw_smp_processor_id());
/*
- * See if we can revalidate the TOD programmable register with its
- * old contents (should be zero) otherwise set it to zero.
+ * Set the clock comparator register to the next expected value.
*/
- if (!mci->pr)
- asm volatile(
- " sr 0,0\n"
- " sckpf"
- : : : "0", "cc");
- else
- asm volatile(
- " l 0,0(%0)\n"
- " sckpf"
- : : "a" (&S390_lowcore.tod_progreg_save_area)
- : "0", "cc");
-#endif
- /* Revalidate clock comparator register */
- if (S390_lowcore.clock_comparator == -1)
- set_clock_comparator(S390_lowcore.mcck_clock);
- else
- set_clock_comparator(S390_lowcore.clock_comparator);
- /* Check if old PSW is valid */
- if (!mci->wp)
- /*
- * Can't tell if we come from user or kernel mode
- * -> stopping machine.
- */
- s390_handle_damage("old psw invalid.");
+ set_clock_comparator(get_lowcore()->clock_comparator);
+ if (!mci.gr || !mci.fp || !mci.fc)
+ return false;
+ /*
+ * The vector validity must only be checked if not running a
+ * KVM guest. For KVM guests the machine check is forwarded by
+ * KVM and it is the responsibility of the guest to take
+ * appropriate actions. The host vector or FPU values have been
+ * saved by KVM and will be restored by KVM.
+ */
+ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
+ return false;
+ if (!mci.ar)
+ return false;
+ /*
+ * Two cases for guarded storage registers:
+ * - machine check in kernel or userspace
+ * - machine check while running SIE (KVM guest)
+ * For kernel or userspace the userspace values of guarded storage
+ * control can not be recreated, the process must be terminated.
+ * For SIE the guest values of guarded storage can not be recreated.
+ * This is either due to a bug or due to GS being disabled in the
+ * guest. The guest will be notified by KVM code and the guests machine
+ * check handling must take care of this. The host values are saved by
+ * KVM and are not affected.
+ */
+ cr2.reg = get_lowcore()->cregs_save_area[2];
+ if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST))
+ return false;
+ if (!mci.ms || !mci.pm || !mci.ia)
+ return false;
+ return true;
+}
+NOKPROBE_SYMBOL(nmi_registers_valid);
- if (!mci->ms || !mci->pm || !mci->ia)
- kill_task = 1;
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+ struct mcck_volatile_info *mcck_backup;
+ struct sie_page *sie_page;
+
+ /* r14 contains the sie block, which was set in sie64a */
+ struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]);
- return kill_task;
+ if (sie_block == NULL)
+ /* Something's seriously wrong, stop system. */
+ s390_handle_damage();
+
+ sie_page = container_of(sie_block, struct sie_page, sie_block);
+ mcck_backup = &sie_page->mcck_info;
+ mcck_backup->mcic = get_lowcore()->mcck_interruption_code &
+ ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+ mcck_backup->ext_damage_code = get_lowcore()->external_damage_code;
+ mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address;
}
+NOKPROBE_SYMBOL(s390_backup_mcck_info);
#define MAX_IPD_COUNT 29
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
#define ED_STP_ISLAND 6 /* External damage STP island check */
#define ED_STP_SYNC 7 /* External damage STP sync check */
-#define ED_ETR_SYNC 12 /* External damage ETR sync check */
-#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */
+
+#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
/*
* machine check handler.
@@ -248,44 +366,41 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
static unsigned long long last_ipd;
+ struct lowcore *lc = get_lowcore();
struct mcck_struct *mcck;
unsigned long long tmp;
- struct mci *mci;
- int umode;
+ irqentry_state_t irq_state;
+ union mci mci;
+ unsigned long mcck_dam_code;
+ int mcck_pending = 0;
+
+ irq_state = irqentry_nmi_enter(regs);
- nmi_enter();
+ if (user_mode(regs))
+ update_timer_mcck();
inc_irq_stat(NMI_NMI);
- mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
- mcck = &__get_cpu_var(cpu_mcck);
- umode = user_mode(regs);
+ mci.val = lc->mcck_interruption_code;
+ mcck = this_cpu_ptr(&cpu_mcck);
- if (mci->sd) {
- /* System damage -> stopping machine */
- s390_handle_damage("received system damage machine check.");
- }
- if (mci->pd) {
- if (mci->b) {
+ /*
+ * Reinject the instruction processing damages' machine checks
+ * including Delayed Access Exception into the guest
+ * instead of damaging the host if they happen in the guest.
+ */
+ if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
+ if (mci.b) {
/* Processing backup -> verify if we can survive this */
u64 z_mcic, o_mcic, t_mcic;
-#ifdef CONFIG_64BIT
z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
1ULL<<16);
-#else
- z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
- 1ULL<<29);
- o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
- 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
- 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
-#endif
- t_mcic = *(u64 *)mci;
+ t_mcic = mci.val;
if (((t_mcic & z_mcic) != 0) ||
((t_mcic & o_mcic) != o_mcic)) {
- s390_handle_damage("processing backup machine "
- "check with damage.");
+ s390_handle_damage();
}
/*
@@ -300,76 +415,95 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
ipd_count = 1;
last_ipd = tmp;
if (ipd_count == MAX_IPD_COUNT)
- s390_handle_damage("too many ipd retries.");
+ s390_handle_damage();
spin_unlock(&ipd_lock);
} else {
/* Processing damage -> stopping machine */
- s390_handle_damage("received instruction processing "
- "damage machine check.");
+ s390_handle_damage();
}
}
- if (s390_revalidate_registers(mci)) {
- if (umode) {
- /*
- * Couldn't restore all register contents while in
- * user mode -> mark task for termination.
- */
- mcck->kill_task = 1;
- mcck->mcck_code = *(unsigned long long *) mci;
- set_thread_flag(TIF_MCCK_PENDING);
- } else {
- /*
- * Couldn't restore all register contents while in
- * kernel mode -> stopping machine.
- */
- s390_handle_damage("unable to revalidate registers.");
- }
+ if (!nmi_registers_valid(mci)) {
+ if (!user_mode(regs))
+ s390_handle_damage();
+ /*
+ * Couldn't restore all register contents for the
+ * user space process -> mark task for termination.
+ */
+ mcck->kill_task = 1;
+ mcck->mcck_code = mci.val;
+ mcck_pending = 1;
}
- if (mci->cd) {
+
+ /*
+ * Backup the machine check's info if it happens when the guest
+ * is running.
+ */
+ if (test_cpu_flag(CIF_MCCK_GUEST))
+ s390_backup_mcck_info(regs);
+
+ if (mci.cd) {
/* Timing facility damage */
- s390_handle_damage("TOD clock damaged");
+ s390_handle_damage();
}
- if (mci->ed && mci->ec) {
+ if (mci.ed && mci.ec) {
/* External damage */
- if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
- etr_sync_check();
- if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
- etr_switch_to_local();
- if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
- stp_sync_check();
- if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
- stp_island_check();
+ if (lc->external_damage_code & (1U << ED_STP_SYNC))
+ mcck->stp_queue |= stp_sync_check();
+ if (lc->external_damage_code & (1U << ED_STP_ISLAND))
+ mcck->stp_queue |= stp_island_check();
+ mcck_pending = 1;
}
- if (mci->se)
+ /*
+ * Reinject storage related machine checks into the guest if they
+ * happen when the guest is running.
+ */
+ if (!test_cpu_flag(CIF_MCCK_GUEST)) {
/* Storage error uncorrected */
- s390_handle_damage("received storage error uncorrected "
- "machine check.");
- if (mci->ke)
+ if (mci.se)
+ s390_handle_damage();
/* Storage key-error uncorrected */
- s390_handle_damage("received storage key-error uncorrected "
- "machine check.");
- if (mci->ds && mci->fa)
+ if (mci.ke)
+ s390_handle_damage();
/* Storage degradation */
- s390_handle_damage("received storage degradation machine "
- "check.");
- if (mci->cp) {
+ if (mci.ds && mci.fa)
+ s390_handle_damage();
+ }
+ if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
- set_thread_flag(TIF_MCCK_PENDING);
+ mcck_pending = 1;
}
- if (mci->w) {
+ if (mci.w) {
/* Warning pending */
mcck->warning = 1;
- set_thread_flag(TIF_MCCK_PENDING);
+ mcck_pending = 1;
}
- nmi_exit();
+
+ /*
+ * If there are only Channel Report Pending and External Damage
+ * machine checks, they will not be reinjected into the guest
+ * because they refer to host conditions only.
+ */
+ mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
+ if (test_cpu_flag(CIF_MCCK_GUEST) &&
+ (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
+ /* Set exit reason code for host's later handling */
+ *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+ }
+ clear_cpu_flag(CIF_MCCK_GUEST);
+
+ if (mcck_pending)
+ schedule_mcck_handler();
+
+ irqentry_nmi_exit(regs, irq_state);
}
+NOKPROBE_SYMBOL(s390_do_machine_check);
static int __init machine_check_init(void)
{
- ctl_set_bit(14, 25); /* enable external damage MCH */
- ctl_set_bit(14, 27); /* enable system recovery MCH */
- ctl_set_bit(14, 24); /* enable warning MCH */
+ system_ctl_set_bit(14, CR14_EXTERNAL_DAMAGE_SUBMASK_BIT);
+ system_ctl_set_bit(14, CR14_RECOVERY_SUBMASK_BIT);
+ system_ctl_set_bit(14, CR14_WARNING_SUBMASK_BIT);
return 0;
}
-arch_initcall(machine_check_init);
+early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
new file mode 100644
index 000000000000..e11ec15960a1
--- /dev/null
+++ b/arch/s390/kernel/nospec-branch.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/nospec-branch.h>
+
+int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
+
+static int __init nobp_setup_early(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ if (enabled && test_facility(82)) {
+ /*
+ * The user explicitly requested nobp=1, enable it and
+ * disable the expoline support.
+ */
+ nobp = 1;
+ if (IS_ENABLED(CONFIG_EXPOLINE))
+ nospec_disable = 1;
+ } else {
+ nobp = 0;
+ }
+ return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+ nobp = 0;
+ return 0;
+}
+early_param("nospec", nospec_setup_early);
+
+static int __init nospec_report(void)
+{
+ if (test_facility(156))
+ pr_info("Spectre V2 mitigation: etokens\n");
+ if (nospec_uses_trampoline())
+ pr_info("Spectre V2 mitigation: execute trampolines\n");
+ if (nobp_enabled())
+ pr_info("Spectre V2 mitigation: limited branch prediction\n");
+ return 0;
+}
+arch_initcall(nospec_report);
+
+#ifdef CONFIG_EXPOLINE
+
+int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
+
+static int __init nospectre_v2_setup_early(char *str)
+{
+ nospec_disable = 1;
+ return 0;
+}
+early_param("nospectre_v2", nospectre_v2_setup_early);
+
+void __init nospec_auto_detect(void)
+{
+ if (test_facility(156) || cpu_mitigations_off()) {
+ /*
+ * The machine supports etokens.
+ * Disable expolines and disable nobp.
+ */
+ if (__is_defined(CC_USING_EXPOLINE))
+ nospec_disable = 1;
+ nobp = 0;
+ } else if (__is_defined(CC_USING_EXPOLINE)) {
+ /*
+ * The kernel has been compiled with expolines.
+ * Keep expolines enabled and disable nobp.
+ */
+ nospec_disable = 0;
+ nobp = 0;
+ }
+ /*
+ * If the kernel has not been compiled with expolines the
+ * nobp setting decides what is done, this depends on the
+ * CONFIG_KERNEL_NP option and the nobp/nospec parameters.
+ */
+}
+
+static int __init spectre_v2_setup_early(char *str)
+{
+ if (str && !strncmp(str, "on", 2)) {
+ nospec_disable = 0;
+ nobp = 0;
+ }
+ if (str && !strncmp(str, "off", 3))
+ nospec_disable = 1;
+ if (str && !strncmp(str, "auto", 4))
+ nospec_auto_detect();
+ return 0;
+}
+early_param("spectre_v2", spectre_v2_setup_early);
+
+static void __init_or_module __nospec_revert(s32 *start, s32 *end)
+{
+ enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type;
+ static const u8 branch[] = { 0x47, 0x00, 0x07, 0x00 };
+ u8 *instr, *thunk, *br;
+ u8 insnbuf[6];
+ s32 *epo;
+
+ /* Second part of the instruction replace is always a nop */
+ memcpy(insnbuf + 2, branch, sizeof(branch));
+ for (epo = start; epo < end; epo++) {
+ instr = (u8 *) epo + *epo;
+ if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
+ type = BRCL_EXPOLINE; /* brcl instruction */
+ else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05)
+ type = BRASL_EXPOLINE; /* brasl instruction */
+ else
+ continue;
+ thunk = instr + (long)(*(int *)(instr + 2)) * 2;
+ if (thunk[0] == 0xc6 && thunk[1] == 0x00)
+ /* exrl %r0,<target-br> */
+ br = thunk + (long)(*(int *)(thunk + 2)) * 2;
+ else
+ continue;
+ if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+ continue;
+ switch (type) {
+ case BRCL_EXPOLINE:
+ /* brcl to thunk, replace with br + nop */
+ insnbuf[0] = br[0];
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ break;
+ case BRASL_EXPOLINE:
+ /* brasl to thunk, replace with basr + nop */
+ insnbuf[0] = 0x0d;
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ break;
+ }
+
+ s390_kernel_write(instr, insnbuf, 6);
+ }
+}
+
+void __init_or_module nospec_revert(s32 *start, s32 *end)
+{
+ if (nospec_disable)
+ __nospec_revert(start, end);
+}
+
+extern s32 __nospec_call_start[], __nospec_call_end[];
+extern s32 __nospec_return_start[], __nospec_return_end[];
+void __init nospec_init_branches(void)
+{
+ nospec_revert(__nospec_call_start, __nospec_call_end);
+ nospec_revert(__nospec_return_start, __nospec_return_end);
+}
+
+#endif /* CONFIG_EXPOLINE */
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644
index 000000000000..5970dd3ee7c5
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (test_facility(156))
+ return sysfs_emit(buf, "Mitigation: etokens\n");
+ if (nospec_uses_trampoline())
+ return sysfs_emit(buf, "Mitigation: execute trampolines\n");
+ if (nobp_enabled())
+ return sysfs_emit(buf, "Mitigation: limited branch prediction\n");
+ return sysfs_emit(buf, "Vulnerable\n");
+}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
new file mode 100644
index 000000000000..2fc40f97c0ad
--- /dev/null
+++ b/arch/s390/kernel/numa.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <asm/numa.h>
+
+void __init numa_setup(void)
+{
+ int nid;
+
+ nodes_clear(node_possible_map);
+ node_set(0, node_possible_map);
+ node_set_online(0);
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8);
+ NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ NODE_DATA(0)->node_id = 0;
+}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index d112fc66f993..94fa44776d0c 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* OS info memory interface
*
@@ -5,15 +6,19 @@
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "os_info"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "os_info: " fmt
#include <linux/crash_dump.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
+#include <asm/asm-offsets.h>
+#include <asm/sections.h>
+#include <asm/ipl.h>
/*
* OS info structure has to be page aligned
@@ -26,7 +31,7 @@ static struct os_info os_info __page_aligned_data;
u32 os_info_csum(struct os_info *os_info)
{
int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
- return csum_partial(&os_info->version_major, size, 0);
+ return (__force u32)cksm(&os_info->version_major, size, 0);
}
/*
@@ -40,28 +45,51 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size)
}
/*
- * Add OS info entry and update checksum
+ * Add OS info data entry and update checksum
*/
-void os_info_entry_add(int nr, void *ptr, u64 size)
+void os_info_entry_add_data(int nr, void *ptr, u64 size)
{
- os_info.entry[nr].addr = (u64)(unsigned long)ptr;
+ os_info.entry[nr].addr = __pa(ptr);
os_info.entry[nr].size = size;
- os_info.entry[nr].csum = csum_partial(ptr, size, 0);
+ os_info.entry[nr].csum = (__force u32)cksm(ptr, size, 0);
os_info.csum = os_info_csum(&os_info);
}
/*
- * Initialize OS info struture and set lowcore pointer
+ * Add OS info value entry and update checksum
+ */
+void os_info_entry_add_val(int nr, u64 value)
+{
+ os_info.entry[nr].val = value;
+ os_info.entry[nr].size = 0;
+ os_info.entry[nr].csum = 0;
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info structure and set lowcore pointer
*/
void __init os_info_init(void)
{
- void *ptr = &os_info;
+ struct lowcore *abs_lc;
+ BUILD_BUG_ON(sizeof(struct os_info) != PAGE_SIZE);
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
+ os_info_entry_add_val(OS_INFO_IDENTITY_BASE, __identity_base);
+ os_info_entry_add_val(OS_INFO_KASLR_OFFSET, kaslr_offset());
+ os_info_entry_add_val(OS_INFO_KASLR_OFF_PHYS, __kaslr_offset_phys);
+ os_info_entry_add_val(OS_INFO_VMEMMAP, (unsigned long)vmemmap);
+ os_info_entry_add_val(OS_INFO_AMODE31_START, AMODE31_START);
+ os_info_entry_add_val(OS_INFO_AMODE31_END, AMODE31_END);
+ os_info_entry_add_val(OS_INFO_IMAGE_START, (unsigned long)_stext);
+ os_info_entry_add_val(OS_INFO_IMAGE_END, (unsigned long)_end);
+ os_info_entry_add_val(OS_INFO_IMAGE_PHYS, __pa_symbol(_stext));
os_info.csum = os_info_csum(&os_info);
- mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr);
+ abs_lc = get_abs_lowcore();
+ abs_lc->os_info = __pa(&os_info);
+ put_abs_lowcore(abs_lc);
}
#ifdef CONFIG_CRASH_DUMP
@@ -89,11 +117,11 @@ static void os_info_old_alloc(int nr, int align)
goto fail;
}
buf_align = PTR_ALIGN(buf, align);
- if (copy_from_oldmem(buf_align, (void *) addr, size)) {
+ if (copy_oldmem_kernel(buf_align, addr, size)) {
msg = "copy failed";
goto fail_free;
}
- csum = csum_partial(buf_align, size, 0);
+ csum = (__force u32)cksm(buf_align, size, 0);
if (csum != os_info_old->entry[nr].csum) {
msg = "checksum failed";
goto fail_free;
@@ -120,16 +148,16 @@ static void os_info_old_init(void)
if (os_info_init)
return;
- if (!OLDMEM_BASE)
+ if (!oldmem_data.start && !is_ipl_type_dump())
goto fail;
- if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr)))
+ if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
goto fail;
if (addr == 0 || addr % PAGE_SIZE)
goto fail;
os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
if (!os_info_old)
goto fail;
- if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old)))
+ if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old)))
goto fail_free;
if (os_info_old->magic != OS_INFO_MAGIC)
goto fail_free;
@@ -152,7 +180,7 @@ fail:
}
/*
- * Return pointer to os infor entry and its size
+ * Return pointer to os info entry and its size
*/
void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 390d9ae57bb2..408ab93112bf 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1,40 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
+ * Copyright IBM Corp. 2012, 2023
+ * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ * Thomas Richter <tmricht@linux.ibm.com>
*/
-#define KMSG_COMPONENT "cpum_cf"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpum_cf: " fmt
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
-#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/init.h>
-#include <linux/export.h>
-#include <asm/ctl_reg.h>
-#include <asm/irq.h>
+#include <linux/miscdevice.h>
+#include <linux/perf_event.h>
+
#include <asm/cpu_mf.h>
+#include <asm/hwctrset.h>
+#include <asm/debug.h>
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
+#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */
-/* CPU-measurement counter facility supports these CPU counter sets:
- * For CPU counter sets:
- * Basic counter set: 0-31
- * Problem-state counter set: 32-63
- * Crypto-activity counter set: 64-127
- * Extented counter set: 128-159
- */
enum cpumf_ctr_set {
- /* CPU counter sets */
- CPUMF_CTR_SET_BASIC = 0,
- CPUMF_CTR_SET_USER = 1,
- CPUMF_CTR_SET_CRYPTO = 2,
- CPUMF_CTR_SET_EXT = 3,
+ CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
+ CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
+ CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
+ CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
+ CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
/* Maximum number of counter sets */
CPUMF_CTR_SET_MAX,
@@ -42,261 +37,681 @@ enum cpumf_ctr_set {
#define CPUMF_LCCTL_ENABLE_SHIFT 16
#define CPUMF_LCCTL_ACTCTL_SHIFT 0
-static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
- [CPUMF_CTR_SET_BASIC] = 0x02,
- [CPUMF_CTR_SET_USER] = 0x04,
- [CPUMF_CTR_SET_CRYPTO] = 0x08,
- [CPUMF_CTR_SET_EXT] = 0x01,
-};
-static void ctr_set_enable(u64 *state, int ctr_set)
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
{
- *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT;
+ *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
}
-static void ctr_set_disable(u64 *state, int ctr_set)
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
{
- *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT);
+ *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
}
-static void ctr_set_start(u64 *state, int ctr_set)
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
{
- *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT;
+ *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
}
-static void ctr_set_stop(u64 *state, int ctr_set)
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
{
- *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT);
+ *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
}
-/* Local CPUMF event structure */
-struct cpu_hw_events {
- struct cpumf_ctr_info info;
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ return stcctm(BASIC, range, dest);
+ case CPUMF_CTR_SET_USER:
+ return stcctm(PROBLEM_STATE, range, dest);
+ case CPUMF_CTR_SET_CRYPTO:
+ return stcctm(CRYPTO_ACTIVITY, range, dest);
+ case CPUMF_CTR_SET_EXT:
+ return stcctm(EXTENDED, range, dest);
+ case CPUMF_CTR_SET_MT_DIAG:
+ return stcctm(MT_DIAG_CLEARING, range, dest);
+ case CPUMF_CTR_SET_MAX:
+ return 3;
+ }
+ return 3;
+}
+
+struct cpu_cf_events {
+ refcount_t refcnt; /* Reference count */
atomic_t ctr_set[CPUMF_CTR_SET_MAX];
- u64 state, tx_state;
+ u64 state; /* For perf_event_open SVC */
+ u64 dev_state; /* For /dev/hwctr */
unsigned int flags;
+ size_t used; /* Bytes used in data */
+ size_t usedss; /* Bytes used in start/stop */
+ unsigned char start[PAGE_SIZE]; /* Counter set at event add */
+ unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
+ unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
+ unsigned int sets; /* # Counter set saved in memory */
};
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
- .ctr_set = {
- [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
- },
- .state = 0,
- .flags = 0,
+
+static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
+static debug_info_t *cf_dbg;
+
+/*
+ * The CPU Measurement query counter information instruction contains
+ * information which varies per machine generation, but is constant and
+ * does not change when running on a particular machine, such as counter
+ * first and second version number. This is needed to determine the size
+ * of counter sets. Extract this information at device driver initialization.
+ */
+static struct cpumf_ctr_info cpumf_ctr_info;
+
+struct cpu_cf_ptr {
+ struct cpu_cf_events *cpucf;
};
-static int get_counter_set(u64 event)
+static struct cpu_cf_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ * CPU hot plug remove can not happen. Event removal requires a close()
+ * first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ * All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
{
- int set = -1;
+ struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
- if (event < 32)
- set = CPUMF_CTR_SET_BASIC;
- else if (event < 64)
- set = CPUMF_CTR_SET_USER;
- else if (event < 128)
- set = CPUMF_CTR_SET_CRYPTO;
- else if (event < 256)
- set = CPUMF_CTR_SET_EXT;
+ if (p) {
+ struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
- return set;
+ return q->cpucf;
+ }
+ return NULL;
}
-static int validate_event(const struct hw_perf_event *hwc)
+static struct cpu_cf_events *this_cpu_cfhw(void)
{
- switch (hwc->config_base) {
- case CPUMF_CTR_SET_BASIC:
- case CPUMF_CTR_SET_USER:
- case CPUMF_CTR_SET_CRYPTO:
- case CPUMF_CTR_SET_EXT:
- /* check for reserved counters */
- if ((hwc->config >= 6 && hwc->config <= 31) ||
- (hwc->config >= 38 && hwc->config <= 63) ||
- (hwc->config >= 80 && hwc->config <= 127))
- return -EOPNOTSUPP;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
+ return get_cpu_cfhw(smp_processor_id());
}
-static int validate_ctr_version(const struct hw_perf_event *hwc)
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
{
- struct cpu_hw_events *cpuhw;
- int err = 0;
+ lcctl(0);
+}
- cpuhw = &get_cpu_var(cpu_hw_events);
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+ if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+ return;
+ free_percpu(cpu_cf_root.cfptr);
+ cpu_cf_root.cfptr = NULL;
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n",
+ __func__, refcount_read(&cpu_cf_root.refcnt),
+ !cpu_cf_root.cfptr);
+}
- /* check required version for counter sets */
- switch (hwc->config_base) {
- case CPUMF_CTR_SET_BASIC:
- case CPUMF_CTR_SET_USER:
- if (cpuhw->info.cfvn < 1)
- err = -EOPNOTSUPP;
- break;
- case CPUMF_CTR_SET_CRYPTO:
- case CPUMF_CTR_SET_EXT:
- if (cpuhw->info.csvn < 1)
- err = -EOPNOTSUPP;
- if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
- (cpuhw->info.csvn == 2 && hwc->config > 175) ||
- (cpuhw->info.csvn > 2 && hwc->config > 255))
- err = -EOPNOTSUPP;
- break;
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+ int rc = 0;
+
+ if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+ return rc;
+
+ /* The memory is already zeroed. */
+ cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+ if (cpu_cf_root.cfptr) {
+ refcount_set(&cpu_cf_root.refcnt, 1);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ } else {
+ rc = -ENOMEM;
}
- put_cpu_var(cpu_hw_events);
- return err;
+ return rc;
}
-static int validate_ctr_auth(const struct hw_perf_event *hwc)
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
{
- struct cpu_hw_events *cpuhw;
- u64 ctrs_state;
- int err = 0;
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+
+ mutex_lock(&pmc_reserve_mutex);
+ /*
+ * When invoked via CPU hotplug handler, there might be no events
+ * installed or that particular CPU might not have an
+ * event installed. This anchor pointer can be NULL!
+ */
+ if (!cpu_cf_root.cfptr)
+ goto out;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+ /*
+ * Might be zero when called from CPU hotplug handler and no event
+ * installed on that CPU, but on different CPUs.
+ */
+ if (!cpuhw)
+ goto out;
- cpuhw = &get_cpu_var(cpu_hw_events);
+ if (refcount_dec_and_test(&cpuhw->refcnt)) {
+ kfree(cpuhw);
+ p->cpucf = NULL;
+ }
+ cpum_cf_free_root();
+out:
+ mutex_unlock(&pmc_reserve_mutex);
+}
- /* check authorization for cpu counter sets */
- ctrs_state = cpumf_state_ctl[hwc->config_base];
- if (!(ctrs_state & cpuhw->info.auth_ctl))
- err = -EPERM;
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+ int rc;
- put_cpu_var(cpu_hw_events);
- return err;
+ mutex_lock(&pmc_reserve_mutex);
+ rc = cpum_cf_alloc_root();
+ if (rc)
+ goto unlock;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+
+ if (!cpuhw) {
+ cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+ if (cpuhw) {
+ p->cpucf = cpuhw;
+ refcount_set(&cpuhw->refcnt, 1);
+ } else {
+ rc = -ENOMEM;
+ }
+ } else {
+ refcount_inc(&cpuhw->refcnt);
+ }
+ if (rc) {
+ /*
+ * Error in allocation of event, decrement anchor. Since
+ * cpu_cf_event in not created, its destroy() function is not
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ cpum_cf_free_root();
+ }
+unlock:
+ mutex_unlock(&pmc_reserve_mutex);
+ return rc;
}
/*
- * Change the CPUMF state to active.
- * Enable and activate the CPU-counter sets according
- * to the per-cpu control state.
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
*/
-static void cpumf_pmu_enable(struct pmu *pmu)
+static int cpum_cf_alloc(int cpu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- int err;
+ cpumask_var_t mask;
+ int rc;
- if (cpuhw->flags & PMU_F_ENABLED)
- return;
+ if (cpu == -1) {
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ for_each_online_cpu(cpu) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (rc) {
+ for_each_cpu(cpu, mask)
+ cpum_cf_free_cpu(cpu);
+ break;
+ }
+ cpumask_set_cpu(cpu, mask);
+ }
+ free_cpumask_var(mask);
+ } else {
+ rc = cpum_cf_alloc_cpu(cpu);
+ }
+ return rc;
+}
- err = lcctl(cpuhw->state);
- if (err) {
- pr_err("Enabling the performance measuring unit "
- "failed with rc=%x\n", err);
- return;
+static void cpum_cf_free(int cpu)
+{
+ if (cpu == -1) {
+ for_each_online_cpu(cpu)
+ cpum_cf_free_cpu(cpu);
+ } else {
+ cpum_cf_free_cpu(cpu);
}
+}
+
+#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
+ /* interval in seconds */
+
+/* Counter sets are stored as data stream in a page sized memory buffer and
+ * exported to user space via raw data attached to the event sample data.
+ * Each counter set starts with an eight byte header consisting of:
+ * - a two byte eye catcher (0xfeef)
+ * - a one byte counter set number
+ * - a two byte counter set size (indicates the number of counters in this set)
+ * - a three byte reserved value (must be zero) to make the header the same
+ * size as a counter value.
+ * All counter values are eight byte in size.
+ *
+ * All counter sets are followed by a 64 byte trailer.
+ * The trailer consists of a:
+ * - flag field indicating valid fields when corresponding bit set
+ * - the counter facility first and second version number
+ * - the CPU speed if nonzero
+ * - the time stamp the counter sets have been collected
+ * - the time of day (TOD) base value
+ * - the machine type.
+ *
+ * The counter sets are saved when the process is prepared to be executed on a
+ * CPU and saved again when the process is going to be removed from a CPU.
+ * The difference of both counter sets are calculated and stored in the event
+ * sample data area.
+ */
+struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int set:16; /* 16-31 Counter set identifier */
+ unsigned int ctr:16; /* 32-47 Number of stored counters */
+ unsigned int res1:16; /* 48-63 Reserved */
+};
+
+struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */
+ /* 0 - 7 */
+ union {
+ struct {
+ unsigned int clock_base:1; /* TOD clock base set */
+ unsigned int speed:1; /* CPU speed set */
+ /* Measurement alerts */
+ unsigned int mtda:1; /* Loss of MT ctr. data alert */
+ unsigned int caca:1; /* Counter auth. change alert */
+ unsigned int lcda:1; /* Loss of counter data alert */
+ };
+ unsigned long flags; /* 0-63 All indicators */
+ };
+ /* 8 - 15 */
+ unsigned int cfvn:16; /* 64-79 Ctr First Version */
+ unsigned int csvn:16; /* 80-95 Ctr Second Version */
+ unsigned int cpu_speed:32; /* 96-127 CPU speed */
+ /* 16 - 23 */
+ unsigned long timestamp; /* 128-191 Timestamp (TOD) */
+ /* 24 - 55 */
+ union {
+ struct {
+ unsigned long progusage1;
+ unsigned long progusage2;
+ unsigned long progusage3;
+ unsigned long tod_base;
+ };
+ unsigned long progusage[4];
+ };
+ /* 56 - 63 */
+ unsigned int mach_type:16; /* Machine type */
+ unsigned int res1:16; /* Reserved */
+ unsigned int res2:32; /* Reserved */
+};
- cpuhw->flags |= PMU_F_ENABLED;
+/* Create the trailer data at the end of a page. */
+static void cfdiag_trailer(struct cf_trailer_entry *te)
+{
+ struct cpuid cpuid;
+
+ te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */
+ te->csvn = cpumf_ctr_info.csvn;
+
+ get_cpu_id(&cpuid); /* Machine type */
+ te->mach_type = cpuid.machine;
+ te->cpu_speed = cfdiag_cpu_speed;
+ if (te->cpu_speed)
+ te->speed = 1;
+ te->clock_base = 1; /* Save clock base */
+ te->tod_base = tod_clock_base.tod;
+ te->timestamp = get_tod_clock_fast();
}
/*
- * Change the CPUMF state to inactive.
- * Disable and enable (inactive) the CPU-counter sets according
- * to the per-cpu control state.
+ * The number of counters per counter set varies between machine generations,
+ * but is constant when running on a particular machine generation.
+ * Determine each counter set size at device driver initialization and
+ * retrieve it later.
*/
-static void cpumf_pmu_disable(struct pmu *pmu)
+static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX];
+static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- int err;
- u64 inactive;
+ size_t ctrset_size = 0;
- if (!(cpuhw->flags & PMU_F_ENABLED))
- return;
+ switch (ctrset) {
+ case CPUMF_CTR_SET_BASIC:
+ if (cpumf_ctr_info.cfvn >= 1)
+ ctrset_size = 6;
+ break;
+ case CPUMF_CTR_SET_USER:
+ if (cpumf_ctr_info.cfvn == 1)
+ ctrset_size = 6;
+ else if (cpumf_ctr_info.cfvn >= 3)
+ ctrset_size = 2;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
+ ctrset_size = 16;
+ else if (cpumf_ctr_info.csvn >= 6)
+ ctrset_size = 20;
+ break;
+ case CPUMF_CTR_SET_EXT:
+ if (cpumf_ctr_info.csvn == 1)
+ ctrset_size = 32;
+ else if (cpumf_ctr_info.csvn == 2)
+ ctrset_size = 48;
+ else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
+ ctrset_size = 128;
+ else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8)
+ ctrset_size = 160;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (cpumf_ctr_info.csvn > 3)
+ ctrset_size = 48;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ break;
+ }
+ cpumf_ctr_setsizes[ctrset] = ctrset_size;
+}
- inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
- err = lcctl(inactive);
- if (err) {
- pr_err("Disabling the performance measuring unit "
- "failed with rc=%x\n", err);
- return;
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)
+{
+ return cpumf_ctr_setsizes[ctrset];
+}
+
+/* Read a counter set. The counter set number determines the counter set and
+ * the CPUM-CF first and second version number determine the number of
+ * available counters in each counter set.
+ * Each counter set starts with header containing the counter set number and
+ * the number of eight byte counters.
+ *
+ * The functions returns the number of bytes occupied by this counter set
+ * including the header.
+ * If there is no counter in the counter set, this counter set is useless and
+ * zero is returned on this case.
+ *
+ * Note that the counter sets may not be enabled or active and the stcctm
+ * instruction might return error 3. Depending on error_ok value this is ok,
+ * for example when called from cpumf_pmu_start() call back function.
+ */
+static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
+ size_t room, bool error_ok)
+{
+ size_t ctrset_size, need = 0;
+ int rc = 3; /* Assume write failure */
+
+ ctrdata->def = CF_DIAG_CTRSET_DEF;
+ ctrdata->set = ctrset;
+ ctrdata->res1 = 0;
+ ctrset_size = cpum_cf_read_setsize(ctrset);
+
+ if (ctrset_size) { /* Save data */
+ need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
+ if (need <= room) {
+ rc = ctr_stcctm(ctrset, ctrset_size,
+ (u64 *)(ctrdata + 1));
+ }
+ if (rc != 3 || error_ok)
+ ctrdata->ctr = ctrset_size;
+ else
+ need = 0;
}
- cpuhw->flags &= ~PMU_F_ENABLED;
+ return need;
}
+static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
+ [CPUMF_CTR_SET_BASIC] = 0x02,
+ [CPUMF_CTR_SET_USER] = 0x04,
+ [CPUMF_CTR_SET_CRYPTO] = 0x08,
+ [CPUMF_CTR_SET_EXT] = 0x01,
+ [CPUMF_CTR_SET_MT_DIAG] = 0x20,
+};
-/* Number of perf events counting hardware events */
-static atomic_t num_events = ATOMIC_INIT(0);
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
+/* Read out all counter sets and save them in the provided data buffer.
+ * The last 64 byte host an artificial trailer entry.
+ */
+static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth,
+ bool error_ok)
+{
+ struct cf_trailer_entry *trailer;
+ size_t offset = 0, done;
+ int i;
-/* CPU-measurement alerts for the counter facility */
-static void cpumf_measurement_alert(struct ext_code ext_code,
- unsigned int alert, unsigned long unused)
+ memset(data, 0, sz);
+ sz -= sizeof(*trailer); /* Always room for trailer */
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ struct cf_ctrset_entry *ctrdata = data + offset;
+
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue; /* Counter set not authorized */
+
+ done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok);
+ offset += done;
+ }
+ trailer = data + offset;
+ cfdiag_trailer(trailer);
+ return offset + sizeof(*trailer);
+}
+
+/* Calculate the difference for each counter in a counter set. */
+static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters)
{
- struct cpu_hw_events *cpuhw;
+ for (; --counters >= 0; ++pstart, ++pstop)
+ if (*pstop >= *pstart)
+ *pstop -= *pstart;
+ else
+ *pstop = *pstart - *pstop + 1;
+}
- if (!(alert & CPU_MF_INT_CF_MASK))
- return;
+/* Scan the counter sets and calculate the difference of each counter
+ * in each set. The result is the increment of each counter during the
+ * period the counter set has been activated.
+ *
+ * Return true on success.
+ */
+static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
+{
+ struct cf_trailer_entry *trailer_start, *trailer_stop;
+ struct cf_ctrset_entry *ctrstart, *ctrstop;
+ size_t offset = 0;
+ int i;
+
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
+ ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+
+ /* Counter set not authorized */
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue;
+ /* Counter set size zero was not saved */
+ if (!cpum_cf_read_setsize(i))
+ continue;
+
+ if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
+ pr_err_once("cpum_cf_diag counter set compare error "
+ "in set %i\n", ctrstart->set);
+ return 0;
+ }
+ if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
+ cfdiag_diffctrset((u64 *)(ctrstart + 1),
+ (u64 *)(ctrstop + 1), ctrstart->ctr);
+ offset += ctrstart->ctr * sizeof(u64) +
+ sizeof(*ctrstart);
+ }
+ }
- inc_irq_stat(IRQEXT_CMC);
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ /* Save time_stamp from start of event in stop's trailer */
+ trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
+ trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset);
+ trailer_stop->progusage[0] = trailer_start->timestamp;
- /* Measurement alerts are shared and might happen when the PMU
- * is not reserved. Ignore these alerts in this case. */
- if (!(cpuhw->flags & PMU_F_RESERVED))
- return;
+ return 1;
+}
- /* counter authorization change alert */
- if (alert & CPU_MF_INT_CF_CACA)
- qctri(&cpuhw->info);
+static enum cpumf_ctr_set get_counter_set(u64 event)
+{
+ int set = CPUMF_CTR_SET_MAX;
- /* loss of counter data alert */
- if (alert & CPU_MF_INT_CF_LCDA)
- pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+ if (event < 32)
+ set = CPUMF_CTR_SET_BASIC;
+ else if (event < 64)
+ set = CPUMF_CTR_SET_USER;
+ else if (event < 128)
+ set = CPUMF_CTR_SET_CRYPTO;
+ else if (event < 288)
+ set = CPUMF_CTR_SET_EXT;
+ else if (event >= 448 && event < 496)
+ set = CPUMF_CTR_SET_MT_DIAG;
+
+ return set;
}
-#define PMC_INIT 0
-#define PMC_RELEASE 1
-static void setup_pmc_cpu(void *flags)
+static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ u16 mtdiag_ctl;
+ int err = 0;
- switch (*((int *) flags)) {
- case PMC_INIT:
- memset(&cpuhw->info, 0, sizeof(cpuhw->info));
- qctri(&cpuhw->info);
- cpuhw->flags |= PMU_F_RESERVED;
+ /* check required version for counter sets */
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ if (cpumf_ctr_info.cfvn < 1)
+ err = -EOPNOTSUPP;
break;
-
- case PMC_RELEASE:
- cpuhw->flags &= ~PMU_F_RESERVED;
+ case CPUMF_CTR_SET_CRYPTO:
+ if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 &&
+ config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83))
+ err = -EOPNOTSUPP;
+ break;
+ case CPUMF_CTR_SET_EXT:
+ if (cpumf_ctr_info.csvn < 1)
+ err = -EOPNOTSUPP;
+ if ((cpumf_ctr_info.csvn == 1 && config > 159) ||
+ (cpumf_ctr_info.csvn == 2 && config > 175) ||
+ (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 &&
+ config > 255) ||
+ (cpumf_ctr_info.csvn >= 6 && config > 287))
+ err = -EOPNOTSUPP;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (cpumf_ctr_info.csvn <= 3)
+ err = -EOPNOTSUPP;
+ /*
+ * MT-diagnostic counters are read-only. The counter set
+ * is automatically enabled and activated on all CPUs with
+ * multithreading (SMT). Deactivation of multithreading
+ * also disables the counter set. State changes are ignored
+ * by lcctl(). Because Linux controls SMT enablement through
+ * a kernel parameter only, the counter set is either disabled
+ * or enabled and active.
+ *
+ * Thus, the counters can only be used if SMT is on and the
+ * counter set is enabled and active.
+ */
+ mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
+ if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) &&
+ (cpumf_ctr_info.enable_ctl & mtdiag_ctl) &&
+ (cpumf_ctr_info.act_ctl & mtdiag_ctl)))
+ err = -EOPNOTSUPP;
break;
+ case CPUMF_CTR_SET_MAX:
+ err = -EOPNOTSUPP;
}
- /* Disable CPU counter sets */
- lcctl(0);
+ return err;
}
-/* Initialize the CPU-measurement facility */
-static int reserve_pmc_hardware(void)
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
{
- int flags = PMC_INIT;
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+ int err;
- on_each_cpu(setup_pmc_cpu, &flags, 1);
- measurement_alert_subclass_register();
+ if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
+ return;
- return 0;
+ err = lcctl(cpuhw->state | cpuhw->dev_state);
+ if (err)
+ pr_err("Enabling the performance measuring unit failed with rc=%x\n", err);
+ else
+ cpuhw->flags |= PMU_F_ENABLED;
}
-/* Release the CPU-measurement facility */
-static void release_pmc_hardware(void)
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
{
- int flags = PMC_RELEASE;
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+ u64 inactive;
+ int err;
- on_each_cpu(setup_pmc_cpu, &flags, 1);
- measurement_alert_subclass_unregister();
+ if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ inactive |= cpuhw->dev_state;
+ err = lcctl(inactive);
+ if (err)
+ pr_err("Disabling the performance measuring unit failed with rc=%x\n", err);
+ else
+ cpuhw->flags &= ~PMU_F_ENABLED;
}
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- release_pmc_hardware();
- mutex_unlock(&pmc_reserve_mutex);
- }
+ cpum_cf_free(event->cpu);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -320,14 +735,20 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static int __hw_perf_event_init(struct perf_event *event)
+static int is_userspace_event(u64 ev)
+{
+ return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
+}
+
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
- int err;
+ enum cpumf_ctr_set set;
u64 ev;
- switch (attr->type) {
+ switch (type) {
case PERF_TYPE_RAW:
/* Raw events are used to access counters directly,
* hence do not permit excludes */
@@ -339,21 +760,26 @@ static int __hw_perf_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE:
ev = attr->config;
- /* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
- if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_user[ev];
-
- /* No support for kernel space counters only */
+ /*
+ * Count user space (problem-state) only
+ * Handle events 32 and 33 as 0:u and 1:u
+ */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+ }
} else if (!attr->exclude_kernel && attr->exclude_user) {
+ /* No support for kernel space counters only */
return -EOPNOTSUPP;
-
- /* Count user and kernel space */
} else {
- if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_basic[ev];
+ /* Count user and kernel space, incl. events 32 + 33 */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
}
break;
@@ -364,67 +790,79 @@ static int __hw_perf_event_init(struct perf_event *event)
if (ev == -1)
return -ENOENT;
- if (ev >= PERF_CPUM_CF_MAX_CTR)
- return -EINVAL;
-
- /* Use the hardware perf event structure to store the counter number
- * in 'config' member and the counter set to which the counter belongs
- * in the 'config_base'. The counter set (config_base) is then used
- * to enable/disable the counters.
- */
- hwc->config = ev;
- hwc->config_base = get_counter_set(ev);
+ if (ev > PERF_CPUM_CF_MAX_CTR)
+ return -ENOENT;
- /* Validate the counter that is assigned to this event.
- * Because the counter facility can use numerous counters at the
- * same time without constraints, it is not necessary to explicity
- * validate event groups (event->group_leader != event).
- */
- err = validate_event(hwc);
- if (err)
- return err;
+ /* Obtain the counter set to which the specified counter belongs */
+ set = get_counter_set(ev);
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ case CPUMF_CTR_SET_MT_DIAG:
+ /*
+ * Use the hardware perf event structure to store the
+ * counter number in the 'config' member and the counter
+ * set number in the 'config_base' as bit mask.
+ * It is later used to enable/disable the counter(s).
+ */
+ hwc->config = ev;
+ hwc->config_base = cpumf_ctr_ctl[set];
+ break;
+ case CPUMF_CTR_SET_MAX:
+ /* The counter could not be associated to a counter set */
+ return -EINVAL;
+ }
/* Initialize for using the CPU-measurement counter facility */
- if (!atomic_inc_not_zero(&num_events)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
- err = -EBUSY;
- else
- atomic_inc(&num_events);
- mutex_unlock(&pmc_reserve_mutex);
- }
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
- /* Finally, validate version and authorization of the counter set */
- err = validate_ctr_auth(hwc);
- if (!err)
- err = validate_ctr_version(hwc);
+ /*
+ * Finally, validate version and authorization of the counter set.
+ * If the particular CPU counter set is not authorized,
+ * return with -ENOENT in order to fall back to other
+ * PMUs that might suffice the event request.
+ */
+ if (!(hwc->config_base & cpumf_ctr_info.auth_ctl))
+ return -ENOENT;
+ return validate_ctr_version(hwc->config, set);
+}
- return err;
+/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different
+ * attribute::type values:
+ * - PERF_TYPE_HARDWARE:
+ * - pmu->type:
+ * Handle both type of invocations identical. They address the same hardware.
+ * The result is different when event modifiers exclude_kernel and/or
+ * exclude_user are also set.
+ */
+static int cpumf_pmu_event_type(struct perf_event *event)
+{
+ u64 ev = event->attr.config;
+
+ if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
+ return PERF_TYPE_HARDWARE;
+ return PERF_TYPE_RAW;
}
static int cpumf_pmu_event_init(struct perf_event *event)
{
- int err;
-
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- case PERF_TYPE_HW_CACHE:
- case PERF_TYPE_RAW:
- /* The CPU measurement counter facility does not have overflow
- * interrupts to do sampling. Sampling must be provided by
- * external means, for example, by timers.
- */
- if (is_sampling_event(event))
- return -ENOENT;
- err = __hw_perf_event_init(event);
- break;
- default:
- return -ENOENT;
- }
+ unsigned int type = event->attr.type;
+ int err = -ENOENT;
- if (unlikely(err) && event->destroy)
- event->destroy(event);
+ if (is_sampling_event(event)) /* No sampling support */
+ return err;
+ if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+ err = __hw_perf_event_init(event, type);
+ else if (event->pmu->type == type)
+ /* Registered as unknown PMU */
+ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
return err;
}
@@ -434,8 +872,8 @@ static int hw_perf_event_reset(struct perf_event *event)
u64 prev, new;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err) {
if (err != 3)
@@ -447,28 +885,26 @@ static int hw_perf_event_reset(struct perf_event *event)
*/
new = 0;
}
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
return err;
}
-static int hw_perf_event_update(struct perf_event *event)
+static void hw_perf_event_update(struct perf_event *event)
{
u64 prev, new, delta;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err)
- goto out;
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ return;
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
delta = (prev <= new) ? new - prev
: (-1ULL - prev) + new + 1; /* overflow */
local64_add(delta, &event->count);
-out:
- return err;
}
static void cpumf_pmu_read(struct perf_event *event)
@@ -481,18 +917,13 @@ static void cpumf_pmu_read(struct perf_event *event)
static void cpumf_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
+ int i;
- if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
- return;
-
- if (WARN_ON_ONCE(hwc->config == -1))
+ if (!(hwc->state & PERF_HES_STOPPED))
return;
- if (flags & PERF_EF_RELOAD)
- WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
hwc->state = 0;
/* (Re-)enable and activate the counter set */
@@ -504,45 +935,93 @@ static void cpumf_pmu_start(struct perf_event *event, int flags)
* needs to be synchronized. At this point, the counter set can be in
* the inactive or disabled state.
*/
- hw_perf_event_reset(event);
+ if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+ cpuhw->usedss = cfdiag_getctr(cpuhw->start,
+ sizeof(cpuhw->start),
+ hwc->config_base, true);
+ } else {
+ hw_perf_event_reset(event);
+ }
- /* increment refcount for this counter set */
- atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
+ /* Increment refcount for counter sets */
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+ if ((hwc->config_base & cpumf_ctr_ctl[i]))
+ atomic_inc(&cpuhw->ctr_set[i]);
+}
+
+/* Create perf event sample with the counter sets as raw data. The sample
+ * is then pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int cfdiag_push_sample(struct perf_event *event,
+ struct cpu_cf_events *cpuhw)
+{
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ int overflow;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+
+ if (event->attr.sample_type & PERF_SAMPLE_CPU)
+ data.cpu_entry.cpu = event->cpu;
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = cpuhw->usedss;
+ raw.frag.data = cpuhw->stop;
+ perf_sample_save_raw_data(&data, event, &raw);
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+
+ perf_event_update_userpage(event);
+ return overflow;
}
static void cpumf_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
+ int i;
if (!(hwc->state & PERF_HES_STOPPED)) {
/* Decrement reference count for this counter set and if this
* is the last used counter in the set, clear activation
* control and set the counter set state to inactive.
*/
- if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
- ctr_set_stop(&cpuhw->state, hwc->config_base);
- event->hw.state |= PERF_HES_STOPPED;
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (!(hwc->config_base & cpumf_ctr_ctl[i]))
+ continue;
+ if (!atomic_dec_return(&cpuhw->ctr_set[i]))
+ ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]);
+ }
+ hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- hw_perf_event_update(event);
- event->hw.state |= PERF_HES_UPTODATE;
+ if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+ local64_inc(&event->count);
+ cpuhw->usedss = cfdiag_getctr(cpuhw->stop,
+ sizeof(cpuhw->stop),
+ event->hw.config_base,
+ false);
+ if (cfdiag_diffctr(cpuhw, event->hw.config_base))
+ cfdiag_push_sample(event, cpuhw);
+ } else {
+ hw_perf_event_update(event);
+ }
+ hwc->state |= PERF_HES_UPTODATE;
}
}
static int cpumf_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
-
- /* Check authorization for the counter set to which this
- * counter belongs.
- * For group events transaction, the authorization check is
- * done in cpumf_pmu_commit_txn().
- */
- if (!(cpuhw->flags & PERF_EVENT_TXN))
- if (validate_ctr_auth(&event->hw))
- return -EPERM;
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
ctr_set_enable(&cpuhw->state, event->hw.config_base);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -550,14 +1029,13 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
if (flags & PERF_EF_START)
cpumf_pmu_start(event, PERF_EF_RELOAD);
- perf_event_update_userpage(event);
-
return 0;
}
static void cpumf_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+ int i;
cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -567,127 +1045,903 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
*
* When a new perf event has been added but not yet started, this can
* clear enable control and resets all counters in a set. Therefore,
- * cpumf_pmu_start() always has to reenable a counter set.
+ * cpumf_pmu_start() always has to re-enable a counter set.
*/
- if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
- ctr_set_disable(&cpuhw->state, event->hw.config_base);
-
- perf_event_update_userpage(event);
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+ if (!atomic_read(&cpuhw->ctr_set[i]))
+ ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]);
}
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+ .task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+ .pmu_enable = cpumf_pmu_enable,
+ .pmu_disable = cpumf_pmu_disable,
+ .event_init = cpumf_pmu_event_init,
+ .add = cpumf_pmu_add,
+ .del = cpumf_pmu_del,
+ .start = cpumf_pmu_start,
+ .stop = cpumf_pmu_stop,
+ .read = cpumf_pmu_read,
+};
+
+static struct cfset_session { /* CPUs and counter set bit mask */
+ struct list_head head; /* Head of list of active processes */
+} cfset_session = {
+ .head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+
/*
- * Start group events scheduling transaction.
- * Set flags to perform a single test at commit time.
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ * and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ * pmu_delete(). The event itself is removed when the file descriptor is
+ * closed.
*/
-static void cpumf_pmu_start_txn(struct pmu *pmu)
+static int cfset_online_cpu(unsigned int cpu);
+
+static int cpum_cf_online_cpu(unsigned int cpu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ int rc = 0;
- perf_pmu_disable(pmu);
- cpuhw->flags |= PERF_EVENT_TXN;
- cpuhw->tx_state = cpuhw->state;
+ /*
+ * Ignore notification for perf_event_open().
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (!rc)
+ cfset_online_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return rc;
}
-/*
- * Stop and cancel a group events scheduling tranctions.
- * Assumes cpumf_pmu_del() is called for each successful added
- * cpumf_pmu_add() during the transaction.
+static int cfset_offline_cpu(unsigned int cpu);
+
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+ /*
+ * During task exit processing of grouped perf events triggered by CPU
+ * hotplug processing, pmu_disable() is called as part of perf context
+ * removal process. Therefore do not trigger event removal now for
+ * perf_event_open() created events. Perf common code triggers event
+ * destruction when the event file descriptor is closed.
+ *
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ cfset_offline_cpu(cpu);
+ cpum_cf_free_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+ return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_cf_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+
+ /*
+ * Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case.
+ */
+ cpuhw = this_cpu_cfhw();
+ if (!cpuhw)
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpumf_ctr_info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+ /* loss of MT counter data alert */
+ if (alert & CPU_MF_INT_CF_MTDA)
+ pr_warn("CPU[%i] MT counter data was lost\n",
+ smp_processor_id());
+}
+
+static int cfset_init(void);
+static int __init cpumf_pmu_init(void)
+{
+ int rc;
+
+ /* Extract counter measurement facility information */
+ if (!cpum_cf_avail() || qctri(&cpumf_ctr_info))
+ return -ENODEV;
+
+ /* Determine and store counter set sizes for later reference */
+ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+ cpum_cf_make_setsize(rc);
+
+ /*
+ * Clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters
+ */
+ system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+ return rc;
+ }
+
+ /* Setup s390dbf facility */
+ cf_dbg = debug_register("cpum_cf", 2, 1, 128);
+ if (!cf_dbg) {
+ pr_err("Registration of s390dbf(cpum_cf) failed\n");
+ rc = -ENOMEM;
+ goto out1;
+ }
+ debug_register_view(cf_dbg, &debug_sprintf_view);
+
+ cpumf_pmu.attr_groups = cpumf_cf_event_group();
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
+ if (rc) {
+ pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ goto out2;
+ } else if (stccm_avail()) { /* Setup counter set device */
+ cfset_init();
+ }
+
+ rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+ "perf/s390/cf:online",
+ cpum_cf_online_cpu, cpum_cf_offline_cpu);
+ return rc;
+
+out2:
+ debug_unregister_view(cf_dbg, &debug_sprintf_view);
+ debug_unregister(cf_dbg);
+out1:
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
+ return rc;
+}
+
+/* Support for the CPU Measurement Facility counter set extraction using
+ * device /dev/hwctr. This allows user space programs to extract complete
+ * counter set via normal file operations.
*/
-static void cpumf_pmu_cancel_txn(struct pmu *pmu)
+
+struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
+ unsigned int sets; /* Counter set bit mask */
+ atomic_t cpus_ack; /* # CPUs successfully executed func */
+};
+
+struct cfset_request { /* CPUs and counter set bit mask */
+ unsigned long ctrset; /* Bit mask of counter set to read */
+ cpumask_t mask; /* CPU mask to read from */
+ struct list_head node; /* Chain to cfset_session.head */
+};
+
+static void cfset_session_init(void)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ INIT_LIST_HEAD(&cfset_session.head);
+}
- WARN_ON(cpuhw->tx_state != cpuhw->state);
+/* Remove current request from global bookkeeping. Maintain a counter set bit
+ * mask on a per CPU basis.
+ * Done in process context under mutex protection.
+ */
+static void cfset_session_del(struct cfset_request *p)
+{
+ list_del(&p->node);
+}
- cpuhw->flags &= ~PERF_EVENT_TXN;
- perf_pmu_enable(pmu);
+/* Add current request to global bookkeeping. Maintain a counter set bit mask
+ * on a per CPU basis.
+ * Done in process context under mutex protection.
+ */
+static void cfset_session_add(struct cfset_request *p)
+{
+ list_add(&p->node, &cfset_session.head);
+}
+
+/* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access
+ * path is currently used.
+ * The cpu_cf_events::dev_state is used to denote counter sets in use by this
+ * interface. It is always or'ed in. If this interface is not active, its
+ * value is zero and no additional counter sets will be included.
+ *
+ * The cpu_cf_events::state is used by the perf_event_open SVC and remains
+ * unchanged.
+ *
+ * perf_pmu_enable() and perf_pmu_enable() and its call backs
+ * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the
+ * performance measurement subsystem to enable per process
+ * CPU Measurement counter facility.
+ * The XXX_enable() and XXX_disable functions are used to turn off
+ * x86 performance monitoring interrupt (PMI) during scheduling.
+ * s390 uses these calls to temporarily stop and resume the active CPU
+ * counters sets during scheduling.
+ *
+ * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr
+ * device access. The perf_event_open() SVC interface makes a lot of effort
+ * to only run the counters while the calling process is actively scheduled
+ * to run.
+ * When /dev/hwctr interface is also used at the same time, the counter sets
+ * will keep running, even when the process is scheduled off a CPU.
+ * However this is not a problem and does not lead to wrong counter values
+ * for the perf_event_open() SVC. The current counter value will be recorded
+ * during schedule-in. At schedule-out time the current counter value is
+ * extracted again and the delta is calculated and added to the event.
+ */
+/* Stop all counter sets via ioctl interface */
+static void cfset_ioctl_off(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+ struct cfset_call_on_cpu_parm *p = parm;
+ int rc;
+
+ /* Check if any counter set used by /dev/hwctr */
+ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+ if ((p->sets & cpumf_ctr_ctl[rc])) {
+ if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
+ ctr_set_disable(&cpuhw->dev_state,
+ cpumf_ctr_ctl[rc]);
+ ctr_set_stop(&cpuhw->dev_state,
+ cpumf_ctr_ctl[rc]);
+ }
+ }
+ /* Keep perf_event_open counter sets */
+ rc = lcctl(cpuhw->dev_state | cpuhw->state);
+ if (rc)
+ pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n",
+ cpuhw->state, S390_HWCTR_DEVICE, rc);
+ if (!cpuhw->dev_state)
+ cpuhw->flags &= ~PMU_F_IN_USE;
+}
+
+/* Start counter sets on particular CPU */
+static void cfset_ioctl_on(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+ struct cfset_call_on_cpu_parm *p = parm;
+ int rc;
+
+ cpuhw->flags |= PMU_F_IN_USE;
+ ctr_set_enable(&cpuhw->dev_state, p->sets);
+ ctr_set_start(&cpuhw->dev_state, p->sets);
+ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+ if ((p->sets & cpumf_ctr_ctl[rc]))
+ atomic_inc(&cpuhw->ctr_set[rc]);
+ rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */
+ if (!rc)
+ atomic_inc(&p->cpus_ack);
+ else
+ pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
+ cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
+}
+
+static void cfset_release_cpu(void *p)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+ int rc;
+
+ cpuhw->dev_state = 0;
+ rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */
+ if (rc)
+ pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n",
+ cpuhw->state, S390_HWCTR_DEVICE, rc);
+}
+
+/* This modifies the process CPU mask to adopt it to the currently online
+ * CPUs. Offline CPUs can not be addresses. This call terminates the access
+ * and is usually followed by close() or a new iotcl(..., START, ...) which
+ * creates a new request structure.
+ */
+static void cfset_all_stop(struct cfset_request *req)
+{
+ struct cfset_call_on_cpu_parm p = {
+ .sets = req->ctrset,
+ };
+
+ cpumask_and(&req->mask, &req->mask, cpu_online_mask);
+ on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1);
+}
+
+/* Release function is also called when application gets terminated without
+ * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
+ */
+static int cfset_release(struct inode *inode, struct file *file)
+{
+ mutex_lock(&cfset_ctrset_mutex);
+ /* Open followed by close/exit has no private_data */
+ if (file->private_data) {
+ cfset_all_stop(file->private_data);
+ cfset_session_del(file->private_data);
+ kfree(file->private_data);
+ file->private_data = NULL;
+ }
+ if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */
+ on_each_cpu(cfset_release_cpu, NULL, 1);
+ cpum_cf_free(-1);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
}
/*
- * Commit the group events scheduling transaction. On success, the
- * transaction is closed. On error, the transaction is kept open
- * until cpumf_pmu_cancel_txn() is called.
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
*/
-static int cpumf_pmu_commit_txn(struct pmu *pmu)
+static int cfset_open(struct inode *inode, struct file *file)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- u64 state;
+ int rc = 0;
- /* check if the updated state can be scheduled */
- state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
- state >>= CPUMF_LCCTL_ENABLE_SHIFT;
- if ((state & cpuhw->info.auth_ctl) != state)
+ if (!perfmon_capable())
return -EPERM;
+ file->private_data = NULL;
+
+ mutex_lock(&cfset_ctrset_mutex);
+ if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */
+ rc = cpum_cf_alloc(-1);
+ if (!rc) {
+ cfset_session_init();
+ refcount_set(&cfset_opencnt, 1);
+ }
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
- cpuhw->flags &= ~PERF_EVENT_TXN;
- perf_pmu_enable(pmu);
+ /* nonseekable_open() never fails */
+ return rc ?: nonseekable_open(inode, file);
+}
+
+static int cfset_all_start(struct cfset_request *req)
+{
+ struct cfset_call_on_cpu_parm p = {
+ .sets = req->ctrset,
+ .cpus_ack = ATOMIC_INIT(0),
+ };
+ cpumask_var_t mask;
+ int rc = 0;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_and(mask, &req->mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1);
+ if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
+ on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
+ rc = -EIO;
+ }
+ free_cpumask_var(mask);
+ return rc;
+}
+
+/* Return the maximum required space for all possible CPUs in case one
+ * CPU will be onlined during the START, READ, STOP cycles.
+ * To find out the size of the counter sets, any one CPU will do. They
+ * all have the same counter sets.
+ */
+static size_t cfset_needspace(unsigned int sets)
+{
+ size_t bytes = 0;
+ int i;
+
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (!(sets & cpumf_ctr_ctl[i]))
+ continue;
+ bytes += cpum_cf_read_setsize(i) * sizeof(u64) +
+ sizeof(((struct s390_ctrset_setdata *)0)->set) +
+ sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
+ }
+ bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
+ (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
+ sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
+ return bytes;
+}
+
+static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
+{
+ struct s390_ctrset_read __user *ctrset_read;
+ unsigned int cpu, cpus, rc = 0;
+ void __user *uptr;
+
+ ctrset_read = (struct s390_ctrset_read __user *)arg;
+ uptr = ctrset_read->data;
+ for_each_cpu(cpu, mask) {
+ struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
+ struct s390_ctrset_cpudata __user *ctrset_cpudata;
+
+ ctrset_cpudata = uptr;
+ rc = put_user(cpu, &ctrset_cpudata->cpu_nr);
+ rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
+ rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
+ cpuhw->used);
+ if (rc) {
+ rc = -EFAULT;
+ goto out;
+ }
+ uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
+ cond_resched();
+ }
+ cpus = cpumask_weight(mask);
+ if (put_user(cpus, &ctrset_read->no_cpus))
+ rc = -EFAULT;
+out:
+ return rc;
+}
+
+static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
+ int ctrset_size, size_t room)
+{
+ size_t need = 0;
+ int rc = -1;
+
+ need = sizeof(*p) + sizeof(u64) * ctrset_size;
+ if (need <= room) {
+ p->set = cpumf_ctr_ctl[ctrset];
+ p->no_cnts = ctrset_size;
+ rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
+ if (rc == 3) /* Nothing stored */
+ need = 0;
+ }
+ return need;
+}
+
+/* Read all counter sets. */
+static void cfset_cpu_read(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+ struct cfset_call_on_cpu_parm *p = parm;
+ int set, set_size;
+ size_t space;
+
+ /* No data saved yet */
+ cpuhw->used = 0;
+ cpuhw->sets = 0;
+ memset(cpuhw->data, 0, sizeof(cpuhw->data));
+
+ /* Scan the counter sets */
+ for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
+ struct s390_ctrset_setdata *sp = (void *)cpuhw->data +
+ cpuhw->used;
+
+ if (!(p->sets & cpumf_ctr_ctl[set]))
+ continue; /* Counter set not in list */
+ set_size = cpum_cf_read_setsize(set);
+ space = sizeof(cpuhw->data) - cpuhw->used;
+ space = cfset_cpuset_read(sp, set, set_size, space);
+ if (space) {
+ cpuhw->used += space;
+ cpuhw->sets += 1;
+ }
+ }
+}
+
+static int cfset_all_read(unsigned long arg, struct cfset_request *req)
+{
+ struct cfset_call_on_cpu_parm p;
+ cpumask_var_t mask;
+ int rc;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ p.sets = req->ctrset;
+ cpumask_and(mask, &req->mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cfset_cpu_read, &p, 1);
+ rc = cfset_all_copy(arg, mask);
+ free_cpumask_var(mask);
+ return rc;
+}
+
+static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
+{
+ int ret = -ENODATA;
+
+ if (req && req->ctrset)
+ ret = cfset_all_read(arg, req);
+ return ret;
+}
+
+static long cfset_ioctl_stop(struct file *file)
+{
+ struct cfset_request *req = file->private_data;
+ int ret = -ENXIO;
+
+ if (req) {
+ cfset_all_stop(req);
+ cfset_session_del(req);
+ kfree(req);
+ file->private_data = NULL;
+ ret = 0;
+ }
+ return ret;
+}
+
+static long cfset_ioctl_start(unsigned long arg, struct file *file)
+{
+ struct s390_ctrset_start __user *ustart;
+ struct s390_ctrset_start start;
+ struct cfset_request *preq;
+ void __user *umask;
+ unsigned int len;
+ int ret = 0;
+ size_t need;
+
+ if (file->private_data)
+ return -EBUSY;
+ ustart = (struct s390_ctrset_start __user *)arg;
+ if (copy_from_user(&start, ustart, sizeof(start)))
+ return -EFAULT;
+ if (start.version != S390_HWCTR_START_VERSION)
+ return -EINVAL;
+ if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
+ return -EINVAL; /* Invalid counter set */
+ if (!start.counter_sets)
+ return -EINVAL; /* No counter set at all? */
+
+ preq = kzalloc(sizeof(*preq), GFP_KERNEL);
+ if (!preq)
+ return -ENOMEM;
+ cpumask_clear(&preq->mask);
+ len = min_t(u64, start.cpumask_len, cpumask_size());
+ umask = (void __user *)start.cpumask;
+ if (copy_from_user(&preq->mask, umask, len)) {
+ kfree(preq);
+ return -EFAULT;
+ }
+ if (cpumask_empty(&preq->mask)) {
+ kfree(preq);
+ return -EINVAL;
+ }
+ need = cfset_needspace(start.counter_sets);
+ if (put_user(need, &ustart->data_bytes)) {
+ kfree(preq);
+ return -EFAULT;
+ }
+ preq->ctrset = start.counter_sets;
+ ret = cfset_all_start(preq);
+ if (!ret) {
+ cfset_session_add(preq);
+ file->private_data = preq;
+ } else {
+ kfree(preq);
+ }
+ return ret;
+}
+
+/* Entry point to the /dev/hwctr device interface.
+ * The ioctl system call supports three subcommands:
+ * S390_HWCTR_START: Start the specified counter sets on a CPU list. The
+ * counter set keeps running until explicitly stopped. Returns the number
+ * of bytes needed to store the counter values. If another S390_HWCTR_START
+ * ioctl subcommand is called without a previous S390_HWCTR_STOP stop
+ * command on the same file descriptor, -EBUSY is returned.
+ * S390_HWCTR_READ: Read the counter set values from specified CPU list given
+ * with the S390_HWCTR_START command.
+ * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the
+ * previous S390_HWCTR_START subcommand.
+ */
+static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ cpus_read_lock();
+ mutex_lock(&cfset_ctrset_mutex);
+ switch (cmd) {
+ case S390_HWCTR_START:
+ ret = cfset_ioctl_start(arg, file);
+ break;
+ case S390_HWCTR_STOP:
+ ret = cfset_ioctl_stop(file);
+ break;
+ case S390_HWCTR_READ:
+ ret = cfset_ioctl_read(arg, file->private_data);
+ break;
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ cpus_read_unlock();
+ return ret;
+}
+
+static const struct file_operations cfset_fops = {
+ .owner = THIS_MODULE,
+ .open = cfset_open,
+ .release = cfset_release,
+ .unlocked_ioctl = cfset_ioctl,
+};
+
+static struct miscdevice cfset_dev = {
+ .name = S390_HWCTR_DEVICE,
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &cfset_fops,
+ .mode = 0666,
+};
+
+/* Hotplug add of a CPU. Scan through all active processes and add
+ * that CPU to the list of CPUs supplied with ioctl(..., START, ...).
+ */
+static int cfset_online_cpu(unsigned int cpu)
+{
+ struct cfset_call_on_cpu_parm p;
+ struct cfset_request *rp;
+
+ if (!list_empty(&cfset_session.head)) {
+ list_for_each_entry(rp, &cfset_session.head, node) {
+ p.sets = rp->ctrset;
+ cfset_ioctl_on(&p);
+ cpumask_set_cpu(cpu, &rp->mask);
+ }
+ }
return 0;
}
-/* Performance monitoring unit for s390x */
-static struct pmu cpumf_pmu = {
+/* Hotplug remove of a CPU. Scan through all active processes and clear
+ * that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
+ */
+static int cfset_offline_cpu(unsigned int cpu)
+{
+ struct cfset_call_on_cpu_parm p;
+ struct cfset_request *rp;
+
+ if (!list_empty(&cfset_session.head)) {
+ list_for_each_entry(rp, &cfset_session.head, node) {
+ p.sets = rp->ctrset;
+ cfset_ioctl_off(&p);
+ cpumask_clear_cpu(cpu, &rp->mask);
+ }
+ }
+ return 0;
+}
+
+static void cfdiag_read(struct perf_event *event)
+{
+}
+
+static int get_authctrsets(void)
+{
+ unsigned long auth = 0;
+ enum cpumf_ctr_set i;
+
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i])
+ auth |= cpumf_ctr_ctl[i];
+ }
+ return auth;
+}
+
+/* Setup the event. Test for authorized counter sets and only include counter
+ * sets which are authorized at the time of the setup. Including unauthorized
+ * counter sets result in specification exception (and panic).
+ */
+static int cfdiag_event_init2(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ int err = 0;
+
+ /* Set sample_period to indicate sampling */
+ event->hw.config = attr->config;
+ event->hw.sample_period = attr->sample_period;
+ local64_set(&event->hw.period_left, event->hw.sample_period);
+ local64_set(&event->count, 0);
+ event->hw.last_period = event->hw.sample_period;
+
+ /* Add all authorized counter sets to config_base. The
+ * the hardware init function is either called per-cpu or just once
+ * for all CPUS (event->cpu == -1). This depends on the whether
+ * counting is started for all CPUs or on a per workload base where
+ * the perf event moves from one CPU to another CPU.
+ * Checking the authorization on any CPU is fine as the hardware
+ * applies the same authorization settings to all CPUs.
+ */
+ event->hw.config_base = get_authctrsets();
+
+ /* No authorized counter sets, nothing to count/sample */
+ if (!event->hw.config_base)
+ err = -EINVAL;
+
+ return err;
+}
+
+static int cfdiag_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ int err = -ENOENT;
+
+ if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
+ event->attr.type != event->pmu->type)
+ goto out;
+
+ /* Raw events are used to access counters directly,
+ * hence do not permit excludes.
+ * This event is useless without PERF_SAMPLE_RAW to return counter set
+ * values as raw data.
+ */
+ if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
+ !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* Initialize for using the CPU-measurement counter facility */
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
+ event->destroy = hw_perf_event_destroy;
+
+ err = cfdiag_event_init2(event);
+out:
+ return err;
+}
+
+/* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used
+ * to collect the complete counter sets for a scheduled process. Target
+ * are complete counter sets attached as raw data to the artificial event.
+ * This results in complete counter sets available when a process is
+ * scheduled. Contains the delta of every counter while the process was
+ * running.
+ */
+CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
+
+static struct attribute *cfdiag_events_attr[] = {
+ CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cfdiag_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cfdiag_events_group = {
+ .name = "events",
+ .attrs = cfdiag_events_attr,
+};
+static struct attribute_group cfdiag_format_group = {
+ .name = "format",
+ .attrs = cfdiag_format_attr,
+};
+static const struct attribute_group *cfdiag_attr_groups[] = {
+ &cfdiag_events_group,
+ &cfdiag_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for event CF_DIAG. Since this event
+ * is also started and stopped via the perf_event_open() system call, use
+ * the same event enable/disable call back functions. They do not
+ * have a pointer to the perf_event structure as first parameter.
+ *
+ * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
+ * Reuse them and distinguish the event (always first parameter) via
+ * 'config' member.
+ */
+static struct pmu cf_diag = {
+ .task_ctx_nr = perf_sw_context,
+ .event_init = cfdiag_event_init,
.pmu_enable = cpumf_pmu_enable,
.pmu_disable = cpumf_pmu_disable,
- .event_init = cpumf_pmu_event_init,
.add = cpumf_pmu_add,
.del = cpumf_pmu_del,
.start = cpumf_pmu_start,
.stop = cpumf_pmu_stop,
- .read = cpumf_pmu_read,
- .start_txn = cpumf_pmu_start_txn,
- .commit_txn = cpumf_pmu_commit_txn,
- .cancel_txn = cpumf_pmu_cancel_txn,
+ .read = cfdiag_read,
+
+ .attr_groups = cfdiag_attr_groups
};
-static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+/* Calculate memory needed to store all counter sets together with header and
+ * trailer data. This is independent of the counter set authorization which
+ * can vary depending on the configuration.
+ */
+static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
{
- unsigned int cpu = (long) hcpu;
- int flags;
+ size_t max_size = sizeof(struct cf_trailer_entry);
+ enum cpumf_ctr_set i;
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- flags = PMC_INIT;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
- break;
- case CPU_DOWN_PREPARE:
- flags = PMC_RELEASE;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
- break;
- default:
- break;
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ size_t size = cpum_cf_read_setsize(i);
+
+ if (size)
+ max_size += size * sizeof(u64) +
+ sizeof(struct cf_ctrset_entry);
+ }
+ return max_size;
+}
+
+/* Get the CPU speed, try sampling facility first and CPU attributes second. */
+static void cfdiag_get_cpu_speed(void)
+{
+ unsigned long mhz;
+
+ if (cpum_sf_avail()) { /* Sampling facility first */
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (!qsi(&si)) {
+ cfdiag_cpu_speed = si.cpu_speed;
+ return;
+ }
}
- return NOTIFY_OK;
+ /* Fallback: CPU speed extract static part. Used in case
+ * CPU Measurement Sampling Facility is turned off.
+ */
+ mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+ if (mhz != -1UL)
+ cfdiag_cpu_speed = mhz & 0xffffffff;
}
-static int __init cpumf_pmu_init(void)
+static int cfset_init(void)
{
+ size_t need;
int rc;
- if (!cpum_cf_avail())
- return -ENODEV;
-
- /* clear bit 15 of cr0 to unauthorize problem-state to
- * extract measurement counters */
- ctl_clear_bit(0, 48);
+ cfdiag_get_cpu_speed();
+ /* Make sure the counter set data fits into predefined buffer. */
+ need = cfdiag_maxsize(&cpumf_ctr_info);
+ if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
+ pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
+ need);
+ return -ENOMEM;
+ }
- /* register handler for measurement-alert interruptions */
- rc = register_external_interrupt(0x1407, cpumf_measurement_alert);
+ rc = misc_register(&cfset_dev);
if (rc) {
- pr_err("Registering for CPU-measurement alerts "
- "failed with rc=%i\n", rc);
+ pr_err("Registration of /dev/%s failed rc=%i\n",
+ cfset_dev.name, rc);
goto out;
}
- rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
if (rc) {
- pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
- unregister_external_interrupt(0x1407, cpumf_measurement_alert);
- goto out;
+ misc_deregister(&cfset_dev);
+ pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
+ rc);
}
- perf_cpu_notifier(cpumf_pmu_notifier);
out:
return rc;
}
-early_initcall(cpumf_pmu_init);
+
+device_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000000..7ace1f9e4ccf
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,1065 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_mf.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf_fvn1, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn1, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_fvn3, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn3, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_FUNCTION_COUNT, 0x0050);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_CYCLES_COUNT, 0x0051);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT, 0x0052);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT, 0x0053);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z14, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z14, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z14, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z14, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7);
+CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb);
+CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc);
+CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd);
+CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce);
+CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
+static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn1, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, L1D_PENALTY_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_fvn3, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn3, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn3, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn3, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn3, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn3, L1D_PENALTY_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_FUNCTION_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_CYCLES_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT),
+ NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+ NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+ NULL,
+};
+
+static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z13, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
+static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z14, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z14, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z14, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z14, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z14, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z14, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z14, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z14, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
+static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
+static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z16, SORTL),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK),
+ CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
+static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION),
+ CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z17, SORTL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumcf_pmu_events_group = {
+ .name = "events",
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumcf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumcf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumcf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumcf_pmu_attr_groups[] = {
+ &cpumcf_pmu_events_group,
+ &cpumcf_pmu_format_group,
+ NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+ struct attribute **b,
+ struct attribute **c)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ for (i = 0; c[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
+ if (!new)
+ return NULL;
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ for (i = 0; c[i]; i++)
+ new[j++] = c[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+ struct attribute **combined, **model, **cfvn, **csvn;
+ struct attribute *none[] = { NULL };
+ struct cpumf_ctr_info ci;
+ struct cpuid cpu_id;
+
+ /* Determine generic counters set(s) */
+ qctri(&ci);
+ switch (ci.cfvn) {
+ case 1:
+ cfvn = cpumcf_fvn1_pmu_event_attr;
+ break;
+ case 3:
+ cfvn = cpumcf_fvn3_pmu_event_attr;
+ break;
+ default:
+ cfvn = none;
+ }
+
+ /* Determine version specific crypto set */
+ csvn = none;
+ if (ci.csvn >= 1 && ci.csvn <= 5)
+ csvn = cpumcf_svn_12345_pmu_event_attr;
+ else if (ci.csvn >= 6)
+ csvn = cpumcf_svn_678_pmu_event_attr;
+
+ /* Determine model-specific counter set(s) */
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ model = cpumcf_z10_pmu_event_attr;
+ break;
+ case 0x2817:
+ case 0x2818:
+ model = cpumcf_z196_pmu_event_attr;
+ break;
+ case 0x2827:
+ case 0x2828:
+ model = cpumcf_zec12_pmu_event_attr;
+ break;
+ case 0x2964:
+ case 0x2965:
+ model = cpumcf_z13_pmu_event_attr;
+ break;
+ case 0x3906:
+ case 0x3907:
+ model = cpumcf_z14_pmu_event_attr;
+ break;
+ case 0x8561:
+ case 0x8562:
+ model = cpumcf_z15_pmu_event_attr;
+ break;
+ case 0x3931:
+ case 0x3932:
+ model = cpumcf_z16_pmu_event_attr;
+ break;
+ case 0x9175:
+ case 0x9176:
+ model = cpumcf_z17_pmu_event_attr;
+ break;
+ default:
+ model = none;
+ break;
+ }
+
+ combined = merge_attr(cfvn, csvn, model);
+ if (combined)
+ cpumcf_pmu_events_group.attrs = combined;
+ return cpumcf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000000..459af23a47a5
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,2103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013, 2018
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define pr_fmt(fmt) "cpum_sf: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/pid.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+#include <linux/io.h>
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */
+
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPL_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT 1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+ return ((unsigned long)sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account. Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size Buffer characteristics
+ * ---------------------------------------------------
+ * 64KB == 16 pages (4KB per page)
+ * 1 page for SDB-tables
+ * 15 pages for SDBs
+ *
+ * 32MB == 8192 pages (4KB per page)
+ * 16 pages for SDB-tables
+ * 8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+ unsigned long *sdbt; /* Sample-data-block-table origin */
+ /* buffer characteristics (required for buffer increments) */
+ unsigned long num_sdb; /* Number of sample-data-blocks */
+ unsigned long num_sdbt; /* Number of sample-data-block-tables */
+ unsigned long *tail; /* last sample-data-block-table */
+};
+
+struct aux_buffer {
+ struct sf_buffer sfb;
+ unsigned long head; /* index of SDB of buffer head */
+ unsigned long alert_mark; /* index of SDB of alert request position */
+ unsigned long empty_mark; /* mark of SDB not marked full */
+ unsigned long *sdb_index; /* SDB address for fast lookup */
+ unsigned long *sdbt_index; /* SDBT address for fast lookup */
+};
+
+struct cpu_hw_sf {
+ /* CPU-measurement sampling information block */
+ struct hws_qsi_info_block qsi;
+ /* CPU-measurement sampling control block */
+ struct hws_lsctl_request_block lsctl;
+ struct sf_buffer sfb; /* Sampling buffer */
+ unsigned int flags; /* Status flags */
+ struct perf_event *event; /* Scheduled perf event */
+ struct perf_output_handle handle; /* AUX buffer output handle */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/* Sampling control helper functions */
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *)v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return ret;
+}
+
+/*
+ * Return true if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt
+ */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1UL ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return phys_to_virt(*s & ~0x1UL);
+}
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static void sf_disable(void)
+{
+ struct hws_lsctl_request_block sreq;
+
+ memset(&sreq, 0, sizeof(sreq));
+ lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+ return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+ unsigned long *sdbt, *curr, *head;
+
+ sdbt = sfb->sdbt;
+ if (!sdbt)
+ return;
+ sfb->sdbt = NULL;
+ /* Free the SDBT after all SDBs are processed... */
+ head = sdbt;
+ curr = sdbt;
+ do {
+ if (is_link_entry(curr)) {
+ /* Process table-link entries */
+ curr = get_next_sdbt(curr);
+ free_page((unsigned long)sdbt);
+ sdbt = curr;
+ } else {
+ /* Process SDB pointer */
+ free_page((unsigned long)phys_to_virt(*curr));
+ curr++;
+ }
+ } while (curr != head);
+ memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+ struct hws_trailer_entry *te;
+ unsigned long sdb;
+
+ /* Allocate and initialize sample-data-block */
+ sdb = get_zeroed_page(gfp_flags);
+ if (!sdb)
+ return -ENOMEM;
+ te = trailer_entry_ptr(sdb);
+ te->header.a = 1;
+
+ /* Link SDB into the sample-data-block-table */
+ *sdbt = virt_to_phys((void *)sdb);
+
+ return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ * sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+ unsigned long num_sdb, gfp_t gfp_flags)
+{
+ int i, rc;
+ unsigned long *new, *tail, *tail_prev = NULL;
+
+ if (!sfb->sdbt || !sfb->tail)
+ return -EINVAL;
+
+ if (!is_link_entry(sfb->tail))
+ return -EINVAL;
+
+ /* Append to the existing sampling buffer, overwriting the table-link
+ * register.
+ * The tail variables always points to the "tail" (last and table-link)
+ * entry in an SDB-table.
+ */
+ tail = sfb->tail;
+
+ /* Do a sanity check whether the table-link entry points to
+ * the sampling buffer origin.
+ */
+ if (sfb->sdbt != get_next_sdbt(tail)) {
+ debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n",
+ __func__, (unsigned long)sfb->sdbt,
+ (unsigned long)tail);
+ return -EINVAL;
+ }
+
+ /* Allocate remaining SDBs */
+ rc = 0;
+ for (i = 0; i < num_sdb; i++) {
+ /* Allocate a new SDB-table if it is full. */
+ if (require_table_link(tail)) {
+ new = (unsigned long *)get_zeroed_page(gfp_flags);
+ if (!new) {
+ rc = -ENOMEM;
+ break;
+ }
+ sfb->num_sdbt++;
+ /* Link current page to tail of chain */
+ *tail = virt_to_phys((void *)new) + 1;
+ tail_prev = tail;
+ tail = new;
+ }
+
+ /* Allocate a new sample-data-block.
+ * If there is not enough memory, stop the realloc process
+ * and simply use what was allocated. If this is a temporary
+ * issue, a new realloc call (if required) might succeed.
+ */
+ rc = alloc_sample_data_block(tail, gfp_flags);
+ if (rc) {
+ /* Undo last SDBT. An SDBT with no SDB at its first
+ * entry but with an SDBT entry instead can not be
+ * handled by the interrupt handler code.
+ * Avoid this situation.
+ */
+ if (tail_prev) {
+ sfb->num_sdbt--;
+ free_page((unsigned long)new);
+ tail = tail_prev;
+ }
+ break;
+ }
+ sfb->num_sdb++;
+ tail++;
+ tail_prev = new = NULL; /* Allocated at least one SBD */
+ }
+
+ /* Link sampling buffer to its origin */
+ *tail = virt_to_phys(sfb->sdbt) + 1;
+ sfb->tail = tail;
+
+ return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB). For each allocation,
+ * a 4K page is used. The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+ int rc;
+
+ if (sfb->sdbt)
+ return -EINVAL;
+
+ /* Allocate the sample-data-block-table origin */
+ sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ return -ENOMEM;
+ sfb->num_sdb = 0;
+ sfb->num_sdbt = 1;
+
+ /* Link the table origin to point to itself to prepare for
+ * realloc_sampling_buffer() invocation.
+ */
+ sfb->tail = sfb->sdbt;
+ *sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1;
+
+ /* Allocate requested number of sample-data-blocks */
+ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+ if (rc)
+ free_sampling_buffer(sfb);
+ return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+ struct hws_qsi_info_block si;
+
+ CPUM_SF_MIN_SDB = min;
+ CPUM_SF_MAX_SDB = max;
+
+ memset(&si, 0, sizeof(si));
+ qsi(&si);
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+ return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+ : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ if (!sfb->sdbt)
+ return SFB_ALLOC_REG(hwc);
+ if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+ return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+ return 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ /* Limit the number of SDBs to not exceed the maximum */
+ num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+ if (num)
+ SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ SFB_ALLOC_REG(hwc) = 0;
+ sfb_account_allocs(num, hwc);
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+ if (sf_buffer_available(cpuhw))
+ free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+ unsigned long n_sdb, freq;
+
+ /* Calculate sampling buffers using 4K pages
+ *
+ * 1. The sampling size is 32 bytes for basic sampling. This size
+ * is the same for all machine types. Diagnostic
+ * sampling uses auxlilary data buffer setup which provides the
+ * memory for SDBs using linux common code auxiliary trace
+ * setup.
+ *
+ * 2. Function alloc_sampling_buffer() sets the Alert Request
+ * Control indicator to trigger a measurement-alert to harvest
+ * sample-data-blocks (SDB). This is done per SDB. This
+ * measurement alert interrupt fires quick enough to handle
+ * one SDB, on very high frequency and work loads there might
+ * be 2 to 3 SBDs available for sample processing.
+ * Currently there is no need for setup alert request on every
+ * n-th page. This is counterproductive as one IRQ triggers
+ * a very high number of samples to be processed at one IRQ.
+ *
+ * 3. Use the sampling frequency as input.
+ * Compute the number of SDBs and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Depending on frequency add some more
+ * SDBs to handle a higher sampling rate.
+ * Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
+ * (one SDB) for every 10000 HZ frequency increment.
+ *
+ * 4. Compute the number of sample-data-block-tables (SDBT) and
+ * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+ * to 511 SDBs).
+ */
+ freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+ n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
+
+ /* If there is already a sampling buffer allocated, it is very likely
+ * that the sampling facility is enabled too. If the event to be
+ * initialized requires a greater sampling buffer, the allocation must
+ * be postponed. Changing the sampling buffer requires the sampling
+ * facility to be in the disabled state. So, account the number of
+ * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+ * before the event is started.
+ */
+ sfb_init_allocs(n_sdb, hwc);
+ if (sf_buffer_available(cpuhw))
+ return 0;
+
+ return alloc_sampling_buffer(&cpuhw->sfb,
+ sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+ unsigned long min)
+{
+ return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+ /* Use a percentage-based approach to extend the sampling facility
+ * buffer. Accept up to 5% sample data loss.
+ * Vary the extents between 1% to 5% of the current number of
+ * sample-data-blocks.
+ */
+ if (ratio <= 5)
+ return 0;
+ if (ratio <= 25)
+ return min_percent(1, base, 1);
+ if (ratio <= 50)
+ return min_percent(1, base, 1);
+ if (ratio <= 75)
+ return min_percent(2, base, 2);
+ if (ratio <= 100)
+ return min_percent(3, base, 3);
+ if (ratio <= 250)
+ return min_percent(4, base, 4);
+
+ return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+ struct hw_perf_event *hwc)
+{
+ unsigned long ratio, num;
+
+ if (!OVERFLOW_REG(hwc))
+ return;
+
+ /* The sample_overflow contains the average number of sample data
+ * that has been lost because sample-data-blocks were full.
+ *
+ * Calculate the total number of sample data entries that has been
+ * discarded. Then calculate the ratio of lost samples to total samples
+ * per second in percent.
+ */
+ ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+ sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+ /* Compute number of sample-data-blocks */
+ num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+ if (num)
+ sfb_account_allocs(num, hwc);
+
+ OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb: Sampling buffer structure (for local CPU)
+ * @hwc: Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ * change the sampling buffer structure. Do not call this function
+ * when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ unsigned long num;
+
+ num = sfb_pending_allocs(sfb, hwc);
+ if (!num)
+ return;
+
+ /* Disable the sampling facility to reset any states and also
+ * clear pending measurement alerts.
+ */
+ sf_disable();
+
+ /* Extend the sampling buffer.
+ * This memory allocation typically happens in an atomic context when
+ * called by perf. Because this is a reallocation, it is fine if the
+ * new SDB-request cannot be satisfied immediately.
+ */
+ realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+}
+
+/* Number of perf events counting hardware events */
+static refcount_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+static void setup_pmc_cpu(void *flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ sf_disable();
+ switch (*((int *)flags)) {
+ case PMC_INIT:
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ qsi(&cpuhw->qsi);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ deallocate_buffers(cpuhw);
+ break;
+ }
+}
+
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+}
+
+static void reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ /* Release PMC if this is the last perf event */
+ if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) {
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+ hwc->sample_period = period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+ unsigned long rate)
+{
+ return clamp_t(unsigned long, rate,
+ si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static u32 cpumsf_pid_type(struct perf_event *event,
+ u32 pid, enum pid_type type)
+{
+ struct task_struct *tsk;
+
+ /* Idle process */
+ if (!pid)
+ goto out;
+
+ tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+ pid = -1;
+ if (tsk) {
+ /*
+ * Only top level events contain the pid namespace in which
+ * they are created.
+ */
+ if (event->parent)
+ event = event->parent;
+ pid = __task_pid_nr_ns(tsk, type, event->ns);
+ /*
+ * See also 1d953111b648
+ * "perf/core: Don't report zero PIDs for exiting tasks".
+ */
+ if (!pid && !pid_alive(tsk))
+ pid = -1;
+ }
+out:
+ return pid;
+}
+
+static void cpumsf_output_event_pid(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ u32 pid;
+ struct perf_event_header header;
+ struct perf_output_handle handle;
+
+ /*
+ * Obtain the PID from the basic-sampling data entry and
+ * correct the data->tid_entry.pid value.
+ */
+ pid = data->tid_entry.pid;
+
+ /* Protect callchain buffers, tasks */
+ rcu_read_lock();
+
+ perf_prepare_sample(data, event, regs);
+ perf_prepare_header(&header, data, event, regs);
+ if (perf_output_begin(&handle, data, event, header.size))
+ goto out;
+
+ /* Update the process ID (see also kernel/events/core.c) */
+ data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
+ data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
+
+ perf_output_sample(&handle, &header, data, event);
+ perf_output_end(&handle);
+out:
+ rcu_read_unlock();
+}
+
+static unsigned long getrate(bool freq, unsigned long sample,
+ struct hws_qsi_info_block *si)
+{
+ unsigned long rate;
+
+ if (freq) {
+ rate = freq_to_sample_rate(si, sample);
+ rate = hw_limit_rate(si, rate);
+ } else {
+ /* The min/max sampling rates specifies the valid range
+ * of sample periods. If the specified sample period is
+ * out of range, limit the period to the range boundary.
+ */
+ rate = hw_limit_rate(si, sample);
+
+ /* The perf core maintains a maximum sample rate that is
+ * configurable through the sysctl interface. Ensure the
+ * sampling rate does not exceed this value. This also helps
+ * to avoid throttling when pushing samples with
+ * perf_event_overflow().
+ */
+ if (sample_rate_to_freq(si, rate) >
+ sysctl_perf_event_sample_rate) {
+ rate = 0;
+ }
+ }
+ return rate;
+}
+
+/* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed. So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ *
+ * Since the CPU Measurement sampling facility can not handle frequency
+ * calculate the sampling interval when frequency is specified using
+ * this formula:
+ * interval := cpu_speed * 1000000 / sample_freq
+ *
+ * Returns errno on bad input and zero on success with parameter interval
+ * set to the correct sampling rate.
+ *
+ * Note: This function turns off freq bit to avoid calling function
+ * perf_adjust_period(). This causes frequency adjustment in the common
+ * code part which causes tremendous variations in the counter values.
+ */
+static int __hw_perf_event_init_rate(struct perf_event *event,
+ struct hws_qsi_info_block *si)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long rate;
+
+ if (attr->freq) {
+ if (!attr->sample_freq)
+ return -EINVAL;
+ rate = getrate(attr->freq, attr->sample_freq, si);
+ attr->freq = 0; /* Don't call perf_adjust_period() */
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
+ } else {
+ rate = getrate(attr->freq, attr->sample_period, si);
+ if (!rate)
+ return -EINVAL;
+ }
+ attr->sample_period = rate;
+ SAMPL_RATE(hwc) = rate;
+ hw_init_period(hwc, SAMPL_RATE(hwc));
+ return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct cpu_hw_sf *cpuhw;
+ struct hws_qsi_info_block si;
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ int cpu, err = 0;
+
+ /* Reserve CPU-measurement sampling facility */
+ mutex_lock(&pmc_reserve_mutex);
+ if (!refcount_inc_not_zero(&num_events)) {
+ reserve_pmc_hardware();
+ refcount_set(&num_events, 1);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ /* Access per-CPU sampling information (query sampling info) */
+ /*
+ * The event->cpu value can be -1 to count on every CPU, for example,
+ * when attaching to a task. If this is specified, use the query
+ * sampling info from the current CPU, otherwise use event->cpu to
+ * retrieve the per-CPU information.
+ * Later, cpuhw indicates whether to allocate sampling buffers for a
+ * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+ */
+ memset(&si, 0, sizeof(si));
+ cpuhw = NULL;
+ if (event->cpu == -1) {
+ qsi(&si);
+ } else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+ si = cpuhw->qsi;
+ }
+
+ /* Check sampling facility authorization and, if not authorized,
+ * fall back to other PMUs. It is safe to check any CPU because
+ * the authorization is identical for all configured CPUs.
+ */
+ if (!si.as) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
+ pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ /* Always enable basic sampling */
+ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+ /* Check if diagnostic sampling is requested. Deny if the required
+ * sampling authorization is missing.
+ */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+ if (!si.ad) {
+ err = -EPERM;
+ goto out;
+ }
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+ }
+
+ err = __hw_perf_event_init_rate(event, &si);
+ if (err)
+ goto out;
+
+ /* Use AUX buffer. No need to allocate it by ourself */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
+ goto out;
+
+ /* Allocate the per-CPU sampling buffer using the CPU information
+ * from the event. If the event is not pinned to a particular
+ * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+ * buffers for each online CPU.
+ */
+ if (cpuhw)
+ /* Event is pinned to a particular CPU */
+ err = allocate_buffers(cpuhw, hwc);
+ else {
+ /* Event is not pinned, allocate sampling buffer on
+ * each online CPU
+ */
+ for_each_online_cpu(cpu) {
+ cpuhw = &per_cpu(cpu_hw_sf, cpu);
+ err = allocate_buffers(cpuhw, hwc);
+ if (err)
+ break;
+ }
+ }
+
+ /* If PID/TID sampling is active, replace the default overflow
+ * handler to extract and resolve the PIDs from the basic-sampling
+ * data entries.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_TID)
+ if (is_default_overflow_handler(event))
+ event->overflow_handler = cpumsf_output_event_pid;
+out:
+ mutex_unlock(&pmc_reserve_mutex);
+ return err;
+}
+
+static bool is_callchain_event(struct perf_event *event)
+{
+ u64 sample_type = event->attr.sample_type;
+
+ return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
+ PERF_SAMPLE_STACK_USER);
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ /* No support for taken branch sampling */
+ /* No support for callchain, stacks and registers */
+ if (has_branch_stack(event) || is_callchain_event(event))
+ return -EOPNOTSUPP;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+ (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+ return -ENOENT;
+ break;
+ case PERF_TYPE_HARDWARE:
+ /* Support sampling of CPU cycles in addition to the
+ * counter facility. However, the counter facility
+ * is more precise and, hence, restrict this PMU to
+ * sampling events only.
+ */
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+ return -ENOENT;
+ if (!is_sampling_event(event))
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Force reset of idle/hv excludes regardless of what the
+ * user requested.
+ */
+ if (event->attr.exclude_hv)
+ event->attr.exclude_hv = 0;
+ if (event->attr.exclude_idle)
+ event->attr.exclude_idle = 0;
+
+ err = __hw_perf_event_init(event);
+ return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct hw_perf_event *hwc;
+ int err;
+
+ /*
+ * Event must be
+ * - added/started on this CPU (PMU_F_IN_USE set)
+ * - and CPU must be available (PMU_F_RESERVED set)
+ * - and not already enabled (PMU_F_ENABLED not set)
+ * - and not in error condition (PMU_F_ERR_MASK not set)
+ */
+ if (cpuhw->flags != (PMU_F_IN_USE | PMU_F_RESERVED))
+ return;
+
+ /* Check whether to extent the sampling buffer.
+ *
+ * Two conditions trigger an increase of the sampling buffer for a
+ * perf event:
+ * 1. Postponed buffer allocations from the event initialization.
+ * 2. Sampling overflows that contribute to pending allocations.
+ *
+ * Note that the extend_sampling_buffer() function disables the sampling
+ * facility, but it can be fully re-enabled using sampling controls that
+ * have been saved in cpumsf_pmu_disable().
+ */
+ hwc = &cpuhw->event->hw;
+ if (!(SAMPL_DIAG_MODE(hwc))) {
+ /*
+ * Account number of overflow-designated buffer extents
+ */
+ sfb_account_overflows(cpuhw, hwc);
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
+ }
+ /* Rate may be adjusted with ioctl() */
+ cpuhw->lsctl.interval = SAMPL_RATE(hwc);
+
+ /* (Re)enable the PMU and sampling facility */
+ err = lsctl(&cpuhw->lsctl);
+ if (err) {
+ pr_err("Loading sampling controls failed: op 1 err %i\n", err);
+ return;
+ }
+
+ /* Load current program parameter */
+ lpp(&get_lowcore()->lpp);
+ cpuhw->flags |= PMU_F_ENABLED;
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct hws_lsctl_request_block inactive;
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Switch off sampling activation control */
+ inactive = cpuhw->lsctl;
+ inactive.cs = 0;
+ inactive.cd = 0;
+
+ err = lsctl(&inactive);
+ if (err) {
+ pr_err("Loading sampling controls failed: op 2 err %i\n", err);
+ return;
+ }
+
+ /*
+ * Save state of TEAR and DEAR register contents.
+ * TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ qsi(&si);
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_event_exclude() - Filter event
+ * @event: The perf event
+ * @regs: pt_regs structure
+ * @sde_regs: Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
+ struct perf_sf_sde_regs *sde_regs)
+{
+ if (event->attr.exclude_user && user_mode(regs))
+ return 1;
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return 1;
+ if (event->attr.exclude_guest && sde_regs->in_guest)
+ return 1;
+ if (event->attr.exclude_host && !sde_regs->in_guest)
+ return 1;
+ return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event: The perf event
+ * @sample: Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample. The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event,
+ struct hws_basic_entry *basic)
+{
+ int overflow;
+ struct pt_regs regs;
+ struct perf_sf_sde_regs *sde_regs;
+ struct perf_sample_data data;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ /* Setup pt_regs to look like an CPU-measurement external interrupt
+ * using the Program Request Alert code. The regs.int_parm_long
+ * field which is unused contains additional sample-data-entry related
+ * indicators.
+ */
+ memset(&regs, 0, sizeof(regs));
+ regs.int_code = 0x1407;
+ regs.int_parm = CPU_MF_INT_SF_PRA;
+ sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+ psw_bits(regs.psw).ia = basic->ia;
+ psw_bits(regs.psw).dat = basic->T;
+ psw_bits(regs.psw).wait = basic->W;
+ psw_bits(regs.psw).pstate = basic->P;
+ psw_bits(regs.psw).as = basic->AS;
+
+ /*
+ * Use the hardware provided configuration level to decide if the
+ * sample belongs to a guest or host. If that is not available,
+ * fall back to the following heuristics:
+ * A non-zero guest program parameter always indicates a guest
+ * sample. Some early samples or samples from guests without
+ * lpp usage would be misaccounted to the host. We use the asn
+ * value as an addon heuristic to detect most of these guest samples.
+ * If the value differs from 0xffff (the host value), we assume to
+ * be a KVM guest.
+ */
+ switch (basic->CL) {
+ case 1: /* logical partition */
+ sde_regs->in_guest = 0;
+ break;
+ case 2: /* virtual machine */
+ sde_regs->in_guest = 1;
+ break;
+ default: /* old machine, use heuristics */
+ if (basic->gpp || basic->prim_asn != 0xffff)
+ sde_regs->in_guest = 1;
+ break;
+ }
+
+ /*
+ * Store the PID value from the sample-data-entry to be
+ * processed and resolved by cpumsf_output_event_pid().
+ */
+ data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
+
+ overflow = 0;
+ if (perf_event_exclude(event, &regs, sde_regs))
+ goto out;
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+out:
+ return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+ local64_add(count, &event->count);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event: The perf event
+ * @sdbt: Sample-data-block table
+ * @overflow: Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem. Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries. A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry. The sampling function is determined by the flags in the perf
+ * event hardware structure. The function always works with a combined-sampling
+ * data entry but ignores the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ unsigned long long *overflow)
+{
+ struct hws_trailer_entry *te;
+ struct hws_basic_entry *sample;
+
+ te = trailer_entry_ptr((unsigned long)sdbt);
+ sample = (struct hws_basic_entry *)sdbt;
+ while ((unsigned long *)sample < (unsigned long *)te) {
+ /* Check for an empty sample */
+ if (!sample->def || sample->LS)
+ break;
+
+ /* Update perf event period */
+ perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+ /* Check whether sample is valid */
+ if (sample->def == 0x0001) {
+ /* If an event overflow occurred, the PMU is stopped to
+ * throttle event delivery. Remaining sample data is
+ * discarded.
+ */
+ if (!*overflow) {
+ /* Check whether sample is consistent */
+ if (sample->I == 0 && sample->W == 0) {
+ /* Deliver sample data to perf */
+ *overflow = perf_push_sample(event,
+ sample);
+ }
+ } else
+ /* Count discarded samples */
+ *overflow += 1;
+ } else {
+ /* Sample slot is not yet written or other record.
+ *
+ * This condition can occur if the buffer was reused
+ * from a combined basic- and diagnostic-sampling.
+ * If only basic-sampling is then active, entries are
+ * written into the larger diagnostic entries.
+ * This is typically the case for sample-data-blocks
+ * that are not full. Stop processing if the first
+ * invalid format was detected.
+ */
+ if (!te->header.f)
+ break;
+ }
+
+ /* Reset sample slot and advance to next sample */
+ sample->def = 0;
+ sample++;
+ }
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event: The perf event
+ * @flush_all: Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed. Specify the flush_all flag
+ * to also walk through partially filled sample-data-blocks.
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+ unsigned long long event_overflow, sampl_overflow, num_sdb;
+ struct hw_perf_event *hwc = &event->hw;
+ union hws_trailer_header prev, new;
+ struct hws_trailer_entry *te;
+ unsigned long *sdbt, sdb;
+ int done;
+
+ /*
+ * AUX buffer is used when in diagnostic sampling mode.
+ * No perf events/samples are created.
+ */
+ if (SAMPL_DIAG_MODE(hwc))
+ return;
+
+ sdbt = (unsigned long *)TEAR_REG(hwc);
+ done = event_overflow = sampl_overflow = num_sdb = 0;
+ while (!done) {
+ /* Get the trailer entry of the sample-data-block */
+ sdb = (unsigned long)phys_to_virt(*sdbt);
+ te = trailer_entry_ptr(sdb);
+
+ /* Leave loop if no more work to do (block full indicator) */
+ if (!te->header.f) {
+ done = 1;
+ if (!flush_all)
+ break;
+ }
+
+ /* Check the sample overflow count */
+ if (te->header.overflow)
+ /* Account sample overflows and, if a particular limit
+ * is reached, extend the sampling buffer.
+ * For details, see sfb_account_overflows().
+ */
+ sampl_overflow += te->header.overflow;
+
+ /* Collect all samples from a single sample-data-block and
+ * flag if an (perf) event overflow happened. If so, the PMU
+ * is stopped and remaining samples will be discarded.
+ */
+ hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
+ num_sdb++;
+
+ /* Reset trailer (using compare-double-and-swap) */
+ prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+ do {
+ new.val = prev.val;
+ new.f = 0;
+ new.a = 1;
+ new.overflow = 0;
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
+
+ /* Advance to next sample-data-block */
+ sdbt++;
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ /* Update event hardware registers */
+ TEAR_REG(hwc) = (unsigned long)sdbt;
+
+ /* Stop processing sample-data if all samples of the current
+ * sample-data-block were flushed even if it was not full.
+ */
+ if (flush_all && done)
+ break;
+ }
+
+ /* Account sample overflows in the event hardware structure */
+ if (sampl_overflow)
+ OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+ sampl_overflow, 1 + num_sdb);
+
+ /* Perf_event_overflow() and perf_event_account_interrupt() limit
+ * the interrupt rate to an upper limit. Roughly 1000 samples per
+ * task tick.
+ * Hitting this limit results in a large number
+ * of throttled REF_REPORT_THROTTLE entries and the samples
+ * are dropped.
+ * Slightly increase the interval to avoid hitting this limit.
+ */
+ if (event_overflow)
+ SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+}
+
+static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
+ unsigned long i)
+{
+ return i % aux->sfb.num_sdb;
+}
+
+static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end)
+{
+ return end >= start ? end - start + 1 : 0;
+}
+
+static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->alert_mark);
+}
+
+static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->empty_mark);
+}
+
+/*
+ * Get trailer entry by index of SDB.
+ */
+static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
+ unsigned long index)
+{
+ unsigned long sdb;
+
+ index = aux_sdb_index(aux, index);
+ sdb = aux->sdb_index[index];
+ return trailer_entry_ptr(sdb);
+}
+
+/*
+ * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu
+ * disabled. Collect the full SDBs in AUX buffer which have not reached
+ * the point of alert indicator. And ignore the SDBs which are not
+ * full.
+ *
+ * 1. Scan SDBs to see how much data is there and consume them.
+ * 2. Remove alert indicator in the buffer.
+ */
+static void aux_output_end(struct perf_output_handle *handle)
+{
+ unsigned long i, range_scan, idx;
+ struct aux_buffer *aux;
+ struct hws_trailer_entry *te;
+
+ aux = perf_get_aux(handle);
+ if (!aux)
+ return;
+
+ range_scan = aux_sdb_num_alert(aux);
+ for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+ if (!te->header.f)
+ break;
+ }
+ /* i is num of SDBs which are full */
+ perf_aux_output_end(handle, i << PAGE_SHIFT);
+
+ /* Remove alert indicators in the buffer */
+ te = aux_sdb_trailer(aux, aux->alert_mark);
+ te->header.a = 0;
+}
+
+/*
+ * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event
+ * is first added to the CPU or rescheduled again to the CPU. It is called
+ * with pmu disabled.
+ *
+ * 1. Reset the trailer of SDBs to get ready for new data.
+ * 2. Tell the hardware where to put the data by reset the SDBs buffer
+ * head(tear/dear).
+ */
+static int aux_output_begin(struct perf_output_handle *handle,
+ struct aux_buffer *aux,
+ struct cpu_hw_sf *cpuhw)
+{
+ unsigned long range, i, range_scan, idx, head, base, offset;
+ struct hws_trailer_entry *te;
+
+ if (handle->head & ~PAGE_MASK)
+ return -EINVAL;
+
+ aux->head = handle->head >> PAGE_SHIFT;
+ range = (handle->size + 1) >> PAGE_SHIFT;
+ if (range <= 1)
+ return -ENOMEM;
+
+ /*
+ * SDBs between aux->head and aux->empty_mark are already ready
+ * for new data. range_scan is num of SDBs not within them.
+ */
+ if (range > aux_sdb_num_empty(aux)) {
+ range_scan = range - aux_sdb_num_empty(aux);
+ idx = aux->empty_mark + 1;
+ for (i = 0; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+ te->header.f = 0;
+ te->header.a = 0;
+ te->header.overflow = 0;
+ }
+ /* Save the position of empty SDBs */
+ aux->empty_mark = aux->head + range - 1;
+ }
+
+ /* Set alert indicator */
+ aux->alert_mark = aux->head + range/2 - 1;
+ te = aux_sdb_trailer(aux, aux->alert_mark);
+ te->header.a = 1;
+
+ /* Reset hardware buffer head */
+ head = aux_sdb_index(aux, aux->head);
+ base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
+ offset = head % CPUM_SF_SDB_PER_TABLE;
+ cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
+ cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
+
+ return 0;
+}
+
+/*
+ * Set alert indicator on SDB at index @alert_index while sampler is running.
+ *
+ * Return true if successfully.
+ * Return false if full indicator is already set by hardware sampler.
+ */
+static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+ unsigned long long *overflow)
+{
+ union hws_trailer_header prev, new;
+ struct hws_trailer_entry *te;
+
+ te = aux_sdb_trailer(aux, alert_index);
+ prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+ do {
+ new.val = prev.val;
+ *overflow = prev.overflow;
+ if (prev.f) {
+ /*
+ * SDB is already set by hardware.
+ * Abort and try to set somewhere
+ * behind.
+ */
+ return false;
+ }
+ new.a = 1;
+ new.overflow = 0;
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
+ return true;
+}
+
+/*
+ * aux_reset_buffer() - Scan and setup SDBs for new samples
+ * @aux: The AUX buffer to set
+ * @range: The range of SDBs to scan started from aux->head
+ * @overflow: Set to overflow count
+ *
+ * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is
+ * marked as empty, check if it is already set full by the hardware sampler.
+ * If yes, that means new data is already there before we can set an alert
+ * indicator. Caller should try to set alert indicator to some position behind.
+ *
+ * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used
+ * previously and have already been consumed by user space. Reset these SDBs
+ * (clear full indicator and alert indicator) for new data.
+ * If aux->alert_mark fall in this area, just set it. Overflow count is
+ * recorded while scanning.
+ *
+ * SDBs between aux->head and aux->empty_mark are already reset at last time.
+ * and ready for new samples. So scanning on this area could be skipped.
+ *
+ * Return true if alert indicator is set successfully and false if not.
+ */
+static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
+ unsigned long long *overflow)
+{
+ union hws_trailer_header prev, new;
+ unsigned long i, range_scan, idx;
+ unsigned long long orig_overflow;
+ struct hws_trailer_entry *te;
+
+ if (range <= aux_sdb_num_empty(aux))
+ /*
+ * No need to scan. All SDBs in range are marked as empty.
+ * Just set alert indicator. Should check race with hardware
+ * sampler.
+ */
+ return aux_set_alert(aux, aux->alert_mark, overflow);
+
+ if (aux->alert_mark <= aux->empty_mark)
+ /*
+ * Set alert indicator on empty SDB. Should check race
+ * with hardware sampler.
+ */
+ if (!aux_set_alert(aux, aux->alert_mark, overflow))
+ return false;
+
+ /*
+ * Scan the SDBs to clear full and alert indicator used previously.
+ * Start scanning from one SDB behind empty_mark. If the new alert
+ * indicator fall into this range, set it.
+ */
+ range_scan = range - aux_sdb_num_empty(aux);
+ idx = aux->empty_mark + 1;
+ for (i = 0; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+ prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+ do {
+ new.val = prev.val;
+ orig_overflow = prev.overflow;
+ new.f = 0;
+ new.overflow = 0;
+ if (idx == aux->alert_mark)
+ new.a = 1;
+ else
+ new.a = 0;
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
+ *overflow += orig_overflow;
+ }
+
+ /* Update empty_mark to new position */
+ aux->empty_mark = aux->head + range - 1;
+
+ return true;
+}
+
+/*
+ * Measurement alert handler for diagnostic mode sampling.
+ */
+static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
+{
+ struct aux_buffer *aux;
+ int done = 0;
+ unsigned long range = 0, size;
+ unsigned long long overflow = 0;
+ struct perf_output_handle *handle = &cpuhw->handle;
+ unsigned long num_sdb;
+
+ aux = perf_get_aux(handle);
+ if (!aux)
+ return;
+
+ /* Inform user space new data arrived */
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+ debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__,
+ size >> PAGE_SHIFT);
+ perf_aux_output_end(handle, size);
+
+ num_sdb = aux->sfb.num_sdb;
+ while (!done) {
+ /* Get an output handle */
+ aux = perf_aux_output_begin(handle, cpuhw->event);
+ if (handle->size == 0) {
+ pr_err("The AUX buffer with %lu pages for the "
+ "diagnostic-sampling mode is full\n",
+ num_sdb);
+ break;
+ }
+ if (!aux)
+ return;
+
+ /* Update head and alert_mark to new position */
+ aux->head = handle->head >> PAGE_SHIFT;
+ range = (handle->size + 1) >> PAGE_SHIFT;
+ if (range == 1)
+ aux->alert_mark = aux->head;
+ else
+ aux->alert_mark = aux->head + range/2 - 1;
+
+ if (aux_reset_buffer(aux, range, &overflow)) {
+ if (!overflow) {
+ done = 1;
+ break;
+ }
+ size = range << PAGE_SHIFT;
+ perf_aux_output_end(&cpuhw->handle, size);
+ pr_err("Sample data caused the AUX buffer with %lu "
+ "pages to overflow\n", aux->sfb.num_sdb);
+ } else {
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+ perf_aux_output_end(&cpuhw->handle, size);
+ }
+ }
+}
+
+/*
+ * Callback when freeing AUX buffers.
+ */
+static void aux_buffer_free(void *data)
+{
+ struct aux_buffer *aux = data;
+ unsigned long i, num_sdbt;
+
+ if (!aux)
+ return;
+
+ /* Free SDBT. SDB is freed by the caller */
+ num_sdbt = aux->sfb.num_sdbt;
+ for (i = 0; i < num_sdbt; i++)
+ free_page(aux->sdbt_index[i]);
+
+ kfree(aux->sdbt_index);
+ kfree(aux->sdb_index);
+ kfree(aux);
+}
+
+static void aux_sdb_init(unsigned long sdb)
+{
+ struct hws_trailer_entry *te;
+
+ te = trailer_entry_ptr(sdb);
+
+ /* Save clock base */
+ te->clock_base = 1;
+ te->progusage2 = tod_clock_base.tod;
+}
+
+/*
+ * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
+ * @event: Event the buffer is setup for, event->cpu == -1 means current
+ * @pages: Array of pointers to buffer pages passed from perf core
+ * @nr_pages: Total pages
+ * @snapshot: Flag for snapshot mode
+ *
+ * This is the callback when setup an event using AUX buffer. Perf tool can
+ * trigger this by an additional mmap() call on the event. Unlike the buffer
+ * for basic samples, AUX buffer belongs to the event. It is scheduled with
+ * the task among online cpus when it is a per-thread event.
+ *
+ * Return the private AUX buffer structure if success or NULL if fails.
+ */
+static void *aux_buffer_setup(struct perf_event *event, void **pages,
+ int nr_pages, bool snapshot)
+{
+ struct sf_buffer *sfb;
+ struct aux_buffer *aux;
+ unsigned long *new, *tail;
+ int i, n_sdbt;
+
+ if (!nr_pages || !pages)
+ return NULL;
+
+ if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+ pr_err("AUX buffer size (%i pages) is larger than the "
+ "maximum sampling buffer limit\n",
+ nr_pages);
+ return NULL;
+ } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+ pr_err("AUX buffer size (%i pages) is less than the "
+ "minimum sampling buffer limit\n",
+ nr_pages);
+ return NULL;
+ }
+
+ /* Allocate aux_buffer struct for the event */
+ aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
+ if (!aux)
+ goto no_aux;
+ sfb = &aux->sfb;
+
+ /* Allocate sdbt_index for fast reference */
+ n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
+ aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
+ if (!aux->sdbt_index)
+ goto no_sdbt_index;
+
+ /* Allocate sdb_index for fast reference */
+ aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
+ if (!aux->sdb_index)
+ goto no_sdb_index;
+
+ /* Allocate the first SDBT */
+ sfb->num_sdbt = 0;
+ sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ goto no_sdbt;
+ aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
+ tail = sfb->tail = sfb->sdbt;
+
+ /*
+ * Link the provided pages of AUX buffer to SDBT.
+ * Allocate SDBT if needed.
+ */
+ for (i = 0; i < nr_pages; i++, tail++) {
+ if (require_table_link(tail)) {
+ new = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!new)
+ goto no_sdbt;
+ aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
+ /* Link current page to tail of chain */
+ *tail = virt_to_phys(new) + 1;
+ tail = new;
+ }
+ /* Tail is the entry in a SDBT */
+ *tail = virt_to_phys(pages[i]);
+ aux->sdb_index[i] = (unsigned long)pages[i];
+ aux_sdb_init((unsigned long)pages[i]);
+ }
+ sfb->num_sdb = nr_pages;
+
+ /* Link the last entry in the SDBT to the first SDBT */
+ *tail = virt_to_phys(sfb->sdbt) + 1;
+ sfb->tail = tail;
+
+ /*
+ * Initial all SDBs are zeroed. Mark it as empty.
+ * So there is no need to clear the full indicator
+ * when this event is first added.
+ */
+ aux->empty_mark = sfb->num_sdb - 1;
+
+ return aux;
+
+no_sdbt:
+ /* SDBs (AUX buffer pages) are freed by caller */
+ for (i = 0; i < sfb->num_sdbt; i++)
+ free_page(aux->sdbt_index[i]);
+ kfree(aux->sdb_index);
+no_sdb_index:
+ kfree(aux->sdbt_index);
+no_sdbt_index:
+ kfree(aux);
+no_aux:
+ return NULL;
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+ /* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Check if the new sampling period/frequency is appropriate.
+ *
+ * Return non-zero on error and zero on passed checks.
+ */
+static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
+{
+ struct hws_qsi_info_block si;
+ unsigned long rate;
+ bool do_freq;
+
+ memset(&si, 0, sizeof(si));
+ if (event->cpu == -1) {
+ qsi(&si);
+ } else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+
+ si = cpuhw->qsi;
+ }
+
+ do_freq = !!SAMPL_FREQ_MODE(&event->hw);
+ rate = getrate(do_freq, value, &si);
+ if (!rate)
+ return -EINVAL;
+
+ event->attr.sample_period = rate;
+ SAMPL_RATE(&event->hw) = rate;
+ hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
+ return 0;
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ if (!(event->hw.state & PERF_HES_STOPPED))
+ return;
+ perf_pmu_disable(event->pmu);
+ event->hw.state = 0;
+ cpuhw->lsctl.cs = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.cd = 1;
+ perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ perf_pmu_disable(event->pmu);
+ cpuhw->lsctl.cs = 0;
+ cpuhw->lsctl.cd = 0;
+ event->hw.state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+ /* CPU hotplug off removes SDBs. No samples to extract. */
+ if (cpuhw->flags & PMU_F_RESERVED)
+ hw_perf_event_update(event, 1);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct aux_buffer *aux;
+ int err = 0;
+
+ if (cpuhw->flags & PMU_F_IN_USE)
+ return -EAGAIN;
+
+ if (!SAMPL_DIAG_MODE(&event->hw) && !sf_buffer_available(cpuhw))
+ return -EINVAL;
+
+ perf_pmu_disable(event->pmu);
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* Set up sampling controls. Always program the sampling register
+ * using the SDB-table start. Reset TEAR_REG event hardware register
+ * that is used by hw_perf_event_update() to store the sampling buffer
+ * position after samples have been flushed.
+ */
+ cpuhw->lsctl.s = 0;
+ cpuhw->lsctl.h = 1;
+ cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+ if (!SAMPL_DIAG_MODE(&event->hw)) {
+ cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt);
+ cpuhw->lsctl.dear = *(unsigned long *)cpuhw->sfb.sdbt;
+ TEAR_REG(&event->hw) = (unsigned long)cpuhw->sfb.sdbt;
+ }
+
+ /* Ensure sampling functions are in the disabled state. If disabled,
+ * switch on sampling enable control. */
+ if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ if (SAMPL_DIAG_MODE(&event->hw)) {
+ aux = perf_aux_output_begin(&cpuhw->handle, event);
+ if (!aux) {
+ err = -EINVAL;
+ goto out;
+ }
+ err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
+ if (err)
+ goto out;
+ cpuhw->lsctl.ed = 1;
+ }
+ cpuhw->lsctl.es = 1;
+
+ /* Set in_use flag and store event */
+ cpuhw->event = event;
+ cpuhw->flags |= PMU_F_IN_USE;
+
+ if (flags & PERF_EF_START)
+ cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ perf_pmu_disable(event->pmu);
+ cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+ cpuhw->lsctl.es = 0;
+ cpuhw->lsctl.ed = 0;
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ cpuhw->event = NULL;
+
+ if (SAMPL_DIAG_MODE(&event->hw))
+ aux_output_end(&cpuhw->handle);
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+/* Attribute list for CPU_SF.
+ *
+ * The availablitiy depends on the CPU_MF sampling facility authorization
+ * for basic + diagnositic samples. This is determined at initialization
+ * time by the sampling facility device driver.
+ * If the authorization for basic samples is turned off, it should be
+ * also turned off for diagnostic sampling.
+ *
+ * During initialization of the device driver, check the authorization
+ * level for diagnostic sampling and installs the attribute
+ * file for diagnostic sampling if necessary.
+ *
+ * For now install a placeholder to reference all possible attributes:
+ * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG.
+ * Add another entry for the final NULL pointer.
+ */
+enum {
+ SF_CYCLES_BASIC_ATTR_IDX = 0,
+ SF_CYCLES_BASIC_DIAG_ATTR_IDX,
+ SF_CYCLES_ATTR_MAX
+};
+
+static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
+ [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumsf_pmu_events_attr,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+static struct pmu cpumf_sampling = {
+ .pmu_enable = cpumsf_pmu_enable,
+ .pmu_disable = cpumsf_pmu_disable,
+
+ .event_init = cpumsf_pmu_event_init,
+ .add = cpumsf_pmu_add,
+ .del = cpumsf_pmu_del,
+
+ .start = cpumsf_pmu_start,
+ .stop = cpumsf_pmu_stop,
+ .read = cpumsf_pmu_read,
+
+ .attr_groups = cpumsf_pmu_attr_groups,
+
+ .setup_aux = aux_buffer_setup,
+ .free_aux = aux_buffer_free,
+
+ .check_period = cpumsf_pmu_check_period,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_sf *cpuhw;
+
+ if (!(alert & CPU_MF_INT_SF_MASK))
+ return;
+ inc_irq_stat(IRQEXT_CMS);
+ cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* The processing below must take care of multiple alert events that
+ * might be indicated concurrently. */
+
+ /* Program alert request */
+ if (alert & CPU_MF_INT_SF_PRA) {
+ if (cpuhw->flags & PMU_F_IN_USE) {
+ if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
+ hw_collect_aux(cpuhw);
+ else
+ hw_perf_event_update(cpuhw->event, 0);
+ }
+ }
+
+ /* Report measurement alerts only for non-PRA codes */
+ if (alert != CPU_MF_INT_SF_PRA)
+ debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__,
+ alert);
+
+ /* Sampling authorization change request */
+ if (alert & CPU_MF_INT_SF_SACA)
+ qsi(&cpuhw->qsi);
+
+ /* Loss of sample data due to high-priority machine activities */
+ if (alert & CPU_MF_INT_SF_LSDA) {
+ pr_err("Sample data was lost\n");
+ cpuhw->flags |= PMU_F_ERR_LSDA;
+ sf_disable();
+ }
+
+ /* Invalid sampling buffer entry */
+ if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+ pr_err("A sampling buffer entry is incorrect (alert=%#x)\n",
+ alert);
+ cpuhw->flags |= PMU_F_ERR_IBE;
+ sf_disable();
+ }
+}
+
+static int cpusf_pmu_setup(unsigned int cpu, int flags)
+{
+ /* Ignore the notification if no events are scheduled on the PMU.
+ * This might be racy...
+ */
+ if (!refcount_read(&num_events))
+ return 0;
+
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
+ return 0;
+}
+
+static int s390_pmu_sf_online_cpu(unsigned int cpu)
+{
+ return cpusf_pmu_setup(cpu, PMC_INIT);
+}
+
+static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+{
+ return cpusf_pmu_setup(cpu, PMC_RELEASE);
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+ int rc;
+ unsigned long min, max;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ if (!val || !strlen(val))
+ return -EINVAL;
+
+ /* Valid parameter values: "min,max" or "max" */
+ min = CPUM_SF_MIN_SDB;
+ max = CPUM_SF_MAX_SDB;
+ if (strchr(val, ','))
+ rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+ else
+ rc = kstrtoul(val, 10, &max);
+
+ if (min < 2 || min >= max || max > get_num_physpages())
+ rc = -EINVAL;
+ if (rc)
+ return rc;
+
+ sfb_set_limits(min, max);
+ pr_info("The sampling buffer limits have changed to: "
+ "min %lu max %lu (diag %lu)\n",
+ CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+ return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static const struct kernel_param_ops param_ops_sfb_size = {
+ .set = param_set_sfb_size,
+ .get = param_get_sfb_size,
+};
+
+enum {
+ RS_INIT_FAILURE_BSDES = 2, /* Bad basic sampling size */
+ RS_INIT_FAILURE_ALRT = 3, /* IRQ registration failure */
+ RS_INIT_FAILURE_PERF = 4 /* PMU registration failure */
+};
+
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+ pr_err("Sampling facility support for perf is not available: "
+ "reason %#x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+
+ memset(&si, 0, sizeof(si));
+ qsi(&si);
+ if (!si.as && !si.ad)
+ return -ENODEV;
+
+ if (si.bsdes != sizeof(struct hws_basic_entry)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+ return -EINVAL;
+ }
+
+ if (si.ad) {
+ sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+ /* Sampling of diagnostic data authorized,
+ * install event into attribute list of PMU device.
+ */
+ cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+ }
+
+ sfdbg = debug_register("cpum_sf", 2, 1, 80);
+ if (!sfdbg) {
+ pr_err("Registering for s390dbf failed\n");
+ return -ENOMEM;
+ }
+ debug_register_view(sfdbg, &debug_sprintf_view);
+
+ err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+ debug_unregister(sfdbg);
+ goto out;
+ }
+
+ err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ debug_unregister(sfdbg);
+ goto out;
+ }
+
+ cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online",
+ s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
+out:
+ return err;
+}
+
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index a6fc037671b1..606750bae508 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,44 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance event support for s390x
*
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012, 2013
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
-#define KMSG_COMPONENT "perf"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "perf: " fmt
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/kvm_host.h>
#include <linux/percpu.h>
-#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/sysfs.h>
+#include <asm/stacktrace.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
-
-const char *perf_pmu_name(void)
-{
- if (cpum_cf_avail() || cpum_sf_avail())
- return "CPU-measurement facilities (CPUMF)";
- return "pmu";
-}
-EXPORT_SYMBOL(perf_pmu_name);
-
-int perf_num_counters(void)
-{
- int num = 0;
-
- if (cpum_cf_avail())
- num += PERF_CPUM_CF_MAX_CTR;
-
- return num;
-}
-EXPORT_SYMBOL(perf_num_counters);
+#include <asm/sysinfo.h>
+#include <asm/unwind.h>
static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
{
@@ -47,17 +30,18 @@ static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
if (!stack)
return NULL;
- return (struct kvm_s390_sie_block *) stack->empty1[0];
+ return (struct kvm_s390_sie_block *)stack->sie_control_block;
}
static bool is_in_guest(struct pt_regs *regs)
{
- unsigned long ip = instruction_pointer(regs);
-
if (user_mode(regs))
return false;
-
- return ip == (unsigned long) &sie_exit;
+#if IS_ENABLED(CONFIG_KVM)
+ return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+ return false;
+#endif
}
static unsigned long guest_is_user_mode(struct pt_regs *regs)
@@ -67,10 +51,10 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs)
static unsigned long instruction_pointer_guest(struct pt_regs *regs)
{
- return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+ return sie_block(regs)->gpsw.addr;
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{
return is_in_guest(regs) ? instruction_pointer_guest(regs)
: instruction_pointer(regs);
@@ -82,8 +66,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
: PERF_RECORD_MISC_GUEST_KERNEL;
}
-unsigned long perf_misc_flags(struct pt_regs *regs)
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+ struct perf_sf_sde_regs *sde_regs;
+ unsigned long flags;
+
+ sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+ if (sde_regs->in_guest)
+ flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ return flags;
+}
+
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{
+ /* Check if the cpum_sf PMU has created the pt_regs structure.
+ * In this case, perf misc flags can be easily extracted. Otherwise,
+ * do regular checks on the pt_regs content.
+ */
+ if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+ if (!regs->gprs[15])
+ return perf_misc_flags_sf(regs);
+
if (is_in_guest(regs))
return perf_misc_guest_flags(regs);
@@ -91,86 +98,132 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
: PERF_RECORD_MISC_KERNEL;
}
-void perf_event_print_debug(void)
+static void print_debug_cf(void)
{
struct cpumf_ctr_info cf_info;
- unsigned long flags;
- int cpu;
+ int cpu = smp_processor_id();
- if (!cpum_cf_avail())
- return;
-
- local_irq_save(flags);
-
- cpu = smp_processor_id();
memset(&cf_info, 0, sizeof(cf_info));
- if (!qctri(&cf_info)) {
+ if (!qctri(&cf_info))
pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
cpu, cf_info.cfvn, cf_info.csvn,
cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
- print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET,
- &cf_info, sizeof(cf_info));
- }
+}
- local_irq_restore(flags);
+static void print_debug_sf(void)
+{
+ struct hws_qsi_info_block si;
+ int cpu = smp_processor_id();
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+ cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+
+ if (si.as)
+ pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+ " bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+ if (si.ad)
+ pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+ " dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
}
-/* See also arch/s390/kernel/traps.c */
-static unsigned long __store_trace(struct perf_callchain_entry *entry,
- unsigned long sp,
- unsigned long low, unsigned long high)
-{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- sp = sp & PSW_ADDR_INSN;
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry,
- sf->gprs[8] & PSW_ADDR_INSN);
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
- low = sp;
- sp = regs->gprs[15];
- }
+void perf_event_print_debug(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (cpum_cf_avail())
+ print_debug_cf();
+ if (cpum_sf_avail())
+ print_debug_sf();
+ local_irq_restore(flags);
}
-void perf_callchain_kernel(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
{
- unsigned long head;
- struct stack_frame *head_sf;
+ struct cpumf_ctr_info ci;
- if (user_mode(regs))
+ memset(&ci, 0, sizeof(ci));
+ if (qctri(&ci))
return;
- head = regs->gprs[15];
- head_sf = (struct stack_frame *) head;
+ seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+ "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
- if (!head_sf || !head_sf->back_chain)
+static void sl_print_sampling(struct seq_file *m)
+{
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ if (!si.as && !si.ad)
return;
- head = head_sf->back_chain;
- head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack);
+ seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+ " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+ if (si.as)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+ " sample_size=%u\n", si.bsdes);
+ if (si.ad)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+ " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+ struct service_level *sl)
+{
+ if (cpum_cf_avail())
+ sl_print_counter(m);
+ if (cpum_sf_avail())
+ sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+ .seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+ return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct unwind_state state;
+ unsigned long addr;
+
+ unwind_for_each_frame(&state, current, regs, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || perf_callchain_store(entry, addr))
+ return;
+ }
+}
+
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ arch_stack_walk_user_common(NULL, NULL, entry, regs, true);
+}
+
+/* Perf definitions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
- __store_trace(entry, head, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%04llx\n", pmu_attr->id);
}
diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c
new file mode 100644
index 000000000000..810f5b6c5e01
--- /dev/null
+++ b/arch/s390/kernel/perf_pai.c
@@ -0,0 +1,1230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Facility
+ *
+ * Copyright IBM Corp. 2026
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define pr_fmt(fmt) "pai: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+#include <asm/ctlreg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+static debug_info_t *paidbg;
+
+DEFINE_STATIC_KEY_FALSE(pai_key);
+
+enum {
+ PAI_PMU_CRYPTO, /* Index of PMU pai_crypto */
+ PAI_PMU_EXT, /* Index of PMU pai_ext */
+ PAI_PMU_MAX /* # of PAI PMUs */
+};
+
+enum {
+ PAIE1_CB_SZ = 0x200, /* Size of PAIE1 control block */
+ PAIE1_CTRBLOCK_SZ = 0x400 /* Size of PAIE1 counter blocks */
+};
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb { /* PAI extension 1 control block */
+ u64 header; /* Not used */
+ u64 reserved1;
+ u64 acc; /* Addr to analytics counter control block */
+ u8 reserved2[PAIE1_CTRBLOCK_SZ - 3 * sizeof(u64)];
+} __packed;
+
+struct pai_map {
+ unsigned long *area; /* Area for CPU to store counters */
+ struct pai_userdata *save; /* Page to store no-zero counters */
+ unsigned int active_events; /* # of PAI crypto users */
+ refcount_t refcnt; /* Reference count mapped buffers */
+ struct perf_event *event; /* Perf event for sampling */
+ struct list_head syswide_list; /* List system-wide sampling events */
+ struct paiext_cb *paiext_cb; /* PAI extension control block area */
+ bool fullpage; /* True: counter area is a full page */
+};
+
+struct pai_mapptr {
+ struct pai_map *mapptr;
+};
+
+static struct pai_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct pai_mapptr __percpu *mapptr;
+} pai_root[PAI_PMU_MAX];
+
+/* This table defines the different parameters of the PAI PMUs. During
+ * initialization the machine dependent values are extracted and saved.
+ * However most of the values are static and do not change.
+ * There is one table entry per PAI PMU.
+ */
+struct pai_pmu { /* Define PAI PMU characteristics */
+ const char *pmuname; /* Name of PMU */
+ const int facility_nr; /* Facility number to check for support */
+ unsigned int num_avail; /* # Counters defined by hardware */
+ unsigned int num_named; /* # Counters known by name */
+ unsigned long base; /* Counter set base number */
+ unsigned long kernel_offset; /* Offset to kernel part in counter page */
+ unsigned long area_size; /* Size of counter area */
+ const char * const *names; /* List of counter names */
+ struct pmu *pmu; /* Ptr to supporting PMU */
+ int (*init)(struct pai_pmu *p); /* PMU support init function */
+ void (*exit)(struct pai_pmu *p); /* PMU support exit function */
+ struct attribute_group *event_group; /* Ptr to attribute of events */
+};
+
+static struct pai_pmu pai_pmu[]; /* Forward declaration */
+
+/* Free per CPU data when the last event is removed. */
+static void pai_root_free(int idx)
+{
+ if (refcount_dec_and_test(&pai_root[idx].refcnt)) {
+ free_percpu(pai_root[idx].mapptr);
+ pai_root[idx].mapptr = NULL;
+ }
+ debug_sprintf_event(paidbg, 5, "%s root[%d].refcount %d\n", __func__,
+ idx, refcount_read(&pai_root[idx].refcnt));
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int pai_root_alloc(int idx)
+{
+ if (!refcount_inc_not_zero(&pai_root[idx].refcnt)) {
+ /* The memory is already zeroed. */
+ pai_root[idx].mapptr = alloc_percpu(struct pai_mapptr);
+ if (!pai_root[idx].mapptr)
+ return -ENOMEM;
+ refcount_set(&pai_root[idx].refcnt, 1);
+ }
+ return 0;
+}
+
+/* Release the PMU if event is the last perf event */
+static DEFINE_MUTEX(pai_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void pai_free(struct pai_mapptr *mp)
+{
+ if (mp->mapptr->fullpage)
+ free_page((unsigned long)mp->mapptr->area);
+ else
+ kfree(mp->mapptr->area);
+ kfree(mp->mapptr->paiext_cb);
+ kvfree(mp->mapptr->save);
+ kfree(mp->mapptr);
+ mp->mapptr = NULL;
+}
+
+/* Adjust usage counters and remove allocated memory when all users are
+ * gone.
+ */
+static void pai_event_destroy_cpu(struct perf_event *event, int cpu)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = per_cpu_ptr(pai_root[idx].mapptr, cpu);
+ struct pai_map *cpump = mp->mapptr;
+
+ mutex_lock(&pai_reserve_mutex);
+ debug_sprintf_event(paidbg, 5, "%s event %#llx idx %d cpu %d users %d "
+ "refcnt %u\n", __func__, event->attr.config, idx,
+ event->cpu, cpump->active_events,
+ refcount_read(&cpump->refcnt));
+ if (refcount_dec_and_test(&cpump->refcnt))
+ pai_free(mp);
+ pai_root_free(idx);
+ mutex_unlock(&pai_reserve_mutex);
+}
+
+static void pai_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+
+ for_each_cpu(cpu, mask)
+ pai_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ pai_event_destroy_cpu(event, event->cpu);
+ }
+}
+
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+ static_branch_dec(&pai_key);
+ pai_event_destroy(event);
+}
+
+static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset)
+{
+ if (offset)
+ nr += offset / sizeof(*page);
+ return page[nr];
+}
+
+/* Read the counter values. Return value from location in CMP. For base
+ * event xxx_ALL sum up all events. Returns counter value.
+ */
+static u64 pai_getdata(struct perf_event *event, bool kernel)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_pmu *pp = &pai_pmu[idx];
+ struct pai_map *cpump = mp->mapptr;
+ unsigned int i;
+ u64 sum = 0;
+
+ if (event->attr.config != pp->base) {
+ return pai_getctr(cpump->area,
+ event->attr.config - pp->base,
+ kernel ? pp->kernel_offset : 0);
+ }
+
+ for (i = 1; i <= pp->num_avail; i++) {
+ u64 val = pai_getctr(cpump->area, i,
+ kernel ? pp->kernel_offset : 0);
+
+ if (!val)
+ continue;
+ sum += val;
+ }
+ return sum;
+}
+
+static u64 paicrypt_getall(struct perf_event *event)
+{
+ u64 sum = 0;
+
+ if (!event->attr.exclude_kernel)
+ sum += pai_getdata(event, true);
+ if (!event->attr.exclude_user)
+ sum += pai_getdata(event, false);
+
+ return sum;
+}
+
+/* Check concurrent access of counting and sampling for crypto events.
+ * This function is called in process context and it is save to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int pai_alloc_cpu(struct perf_event *event, int cpu)
+{
+ int rc, idx = PAI_PMU_IDX(event);
+ struct pai_map *cpump = NULL;
+ bool need_paiext_cb = false;
+ struct pai_mapptr *mp;
+
+ mutex_lock(&pai_reserve_mutex);
+ /* Allocate root node */
+ rc = pai_root_alloc(idx);
+ if (rc)
+ goto unlock;
+
+ /* Allocate node for this event */
+ mp = per_cpu_ptr(pai_root[idx].mapptr, cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paicrypt_map allocated? */
+ rc = -ENOMEM;
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump)
+ goto undo;
+ /* Allocate memory for counter page and counter extraction.
+ * Only the first counting event has to allocate a page.
+ */
+ mp->mapptr = cpump;
+ if (idx == PAI_PMU_CRYPTO) {
+ cpump->area = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ /* free_page() can handle 0x0 address */
+ cpump->fullpage = true;
+ } else { /* PAI_PMU_EXT */
+ /*
+ * Allocate memory for counter area and counter extraction.
+ * These are
+ * - a 512 byte block and requires 512 byte boundary
+ * alignment.
+ * - a 1KB byte block and requires 1KB boundary
+ * alignment.
+ * Only the first counting event has to allocate the area.
+ *
+ * Note: This works with commit 59bb47985c1d by default.
+ * Backporting this to kernels without this commit might
+ * needs adjustment.
+ */
+ cpump->area = kzalloc(pai_pmu[idx].area_size, GFP_KERNEL);
+ cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+ need_paiext_cb = true;
+ }
+ cpump->save = kvmalloc_array(pai_pmu[idx].num_avail + 1,
+ sizeof(struct pai_userdata),
+ GFP_KERNEL);
+ if (!cpump->area || !cpump->save ||
+ (need_paiext_cb && !cpump->paiext_cb)) {
+ pai_free(mp);
+ goto undo;
+ }
+ INIT_LIST_HEAD(&cpump->syswide_list);
+ refcount_set(&cpump->refcnt, 1);
+ rc = 0;
+ } else {
+ refcount_inc(&cpump->refcnt);
+ }
+
+undo:
+ if (rc) {
+ /* Error in allocation of event, decrement anchor. Since
+ * the event in not created, its destroy() function is never
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ pai_root_free(idx);
+ }
+unlock:
+ mutex_unlock(&pai_reserve_mutex);
+ /* If rc is non-zero, no increment of counter/sampler was done. */
+ return rc;
+}
+
+static int pai_alloc(struct perf_event *event)
+{
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ rc = pai_alloc_cpu(event, cpu);
+ if (rc) {
+ for_each_cpu(cpu, maskptr)
+ pai_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in pai_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
+ return rc;
+}
+
+/* Validate event number and return error if event is not supported.
+ * On successful return, PAI_PMU_IDX(event) is set to the index of
+ * the supporting paing_support[] array element.
+ */
+static int pai_event_valid(struct perf_event *event, int idx)
+{
+ struct perf_event_attr *a = &event->attr;
+ struct pai_pmu *pp = &pai_pmu[idx];
+
+ /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* Allow only CRYPTO_ALL/NNPA_ALL for sampling */
+ if (a->sample_period && a->config != pp->base)
+ return -EINVAL;
+ /* PAI crypto event must be in valid range, try others if not */
+ if (a->config < pp->base || a->config > pp->base + pp->num_avail)
+ return -ENOENT;
+ if (idx == PAI_PMU_EXT && a->exclude_user)
+ return -EINVAL;
+ PAI_PMU_IDX(event) = idx;
+ return 0;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int pai_event_init(struct perf_event *event, int idx)
+{
+ struct perf_event_attr *a = &event->attr;
+ int rc;
+
+ /* PAI event must be valid and in supported range */
+ rc = pai_event_valid(event, idx);
+ if (rc)
+ goto out;
+ /* Get a page to store last counter values for sampling */
+ if (a->sample_period) {
+ PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+ if (!PAI_SAVE_AREA(event)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
+
+ if (event->cpu >= 0)
+ rc = pai_alloc_cpu(event, event->cpu);
+ else
+ rc = pai_alloc(event);
+ if (rc) {
+ free_page(PAI_SAVE_AREA(event));
+ goto out;
+ }
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which contain the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+out:
+ return rc;
+}
+
+static int paicrypt_event_init(struct perf_event *event)
+{
+ int rc = pai_event_init(event, PAI_PMU_CRYPTO);
+
+ if (!rc) {
+ event->destroy = paicrypt_event_destroy;
+ static_branch_inc(&pai_key);
+ }
+ return rc;
+}
+
+static void pai_read(struct perf_event *event,
+ u64 (*fct)(struct perf_event *event))
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = fct(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1;
+ local64_add(delta, &event->count);
+}
+
+static void paicrypt_read(struct perf_event *event)
+{
+ pai_read(event, paicrypt_getall);
+}
+
+static void pai_start(struct perf_event *event, int flags,
+ u64 (*fct)(struct perf_event *event))
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_pmu *pp = &pai_pmu[idx];
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ u64 sum;
+
+ if (!event->attr.sample_period) { /* Counting */
+ sum = fct(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ } else { /* Sampling */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
+ }
+}
+
+static void paicrypt_start(struct perf_event *event, int flags)
+{
+ pai_start(event, flags, paicrypt_getall);
+}
+
+static int pai_add(struct perf_event *event, int flags)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+ unsigned long ccd;
+
+ if (++cpump->active_events == 1) {
+ if (!pcb) { /* PAI crypto */
+ ccd = virt_to_phys(cpump->area) | PAI_CRYPTO_KERNEL_OFFSET;
+ WRITE_ONCE(get_lowcore()->ccd, ccd);
+ local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
+ } else { /* PAI extension 1 */
+ ccd = virt_to_phys(pcb);
+ WRITE_ONCE(get_lowcore()->aicd, ccd);
+ pcb->acc = virt_to_phys(cpump->area) | 0x1;
+ /* Enable CPU instruction lookup for PAIE1 control block */
+ local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
+ }
+ }
+ if (flags & PERF_EF_START)
+ pai_pmu[idx].pmu->start(event, PERF_EF_RELOAD);
+ event->hw.state = 0;
+ return 0;
+}
+
+static int paicrypt_add(struct perf_event *event, int flags)
+{
+ return pai_add(event, flags);
+}
+
+static void pai_have_sample(struct perf_event *, struct pai_map *);
+static void pai_stop(struct perf_event *event, int flags)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
+ pai_pmu[idx].pmu->read(event);
+ } else { /* Sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ perf_sched_cb_dec(event->pmu);
+ list_del(PAI_SWLIST(event));
+ } else {
+ pai_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
+ }
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paicrypt_stop(struct perf_event *event, int flags)
+{
+ pai_stop(event, flags);
+}
+
+static void pai_del(struct perf_event *event, int flags)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ pai_pmu[idx].pmu->stop(event, PERF_EF_UPDATE);
+ if (--cpump->active_events == 0) {
+ if (!pcb) { /* PAI crypto */
+ local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
+ WRITE_ONCE(get_lowcore()->ccd, 0);
+ } else { /* PAI extension 1 */
+ /* Disable CPU instruction lookup for PAIE1 control block */
+ local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
+ pcb->acc = 0;
+ WRITE_ONCE(get_lowcore()->aicd, 0);
+ }
+ }
+}
+
+static void paicrypt_del(struct perf_event *event, int flags)
+{
+ pai_del(event, flags);
+}
+
+/* Create raw data and save it in buffer. Calculate the delta for each
+ * counter between this invocation and the last invocation.
+ * Returns number of bytes copied.
+ * Saves only entries with positive counter difference of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page,
+ struct pai_pmu *pp, unsigned long *page_old,
+ bool exclude_user, bool exclude_kernel)
+{
+ int i, outidx = 0;
+
+ for (i = 1; i <= pp->num_avail; i++) {
+ u64 val = 0, val_old = 0;
+
+ if (!exclude_kernel) {
+ val += pai_getctr(page, i, pp->kernel_offset);
+ val_old += pai_getctr(page_old, i, pp->kernel_offset);
+ }
+ if (!exclude_user) {
+ val += pai_getctr(page, i, 0);
+ val_old += pai_getctr(page_old, i, 0);
+ }
+ if (val >= val_old)
+ val -= val_old;
+ else
+ val = (~0ULL - val_old) + val + 1;
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paicrypt_sched_task() and pai_push_sample() are not
+ * invoked after function paicrypt_del() has been called because of function
+ * perf_sched_cb_dec(). Both functions are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paicrypt_sched_task() as call back
+ * to run at context switch time.
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paicrypt_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int pai_push_sample(size_t rawsize, struct pai_map *cpump,
+ struct perf_event *event)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_pmu *pp = &pai_pmu[idx];
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ int overflow;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU) {
+ data.cpu_entry.cpu = smp_processor_id();
+ data.cpu_entry.reserved = 0;
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ perf_sample_save_raw_data(&data, event, &raw);
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Save crypto counter lowcore page after reading event data. */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size);
+ return overflow;
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void pai_have_sample(struct perf_event *event, struct pai_map *cpump)
+{
+ struct pai_pmu *pp;
+ size_t rawsize;
+
+ if (!event) /* No event active */
+ return;
+ pp = &pai_pmu[PAI_PMU_IDX(event)];
+ rawsize = pai_copy(cpump->save, cpump->area, pp,
+ (unsigned long *)PAI_SAVE_AREA(event),
+ event->attr.exclude_user,
+ event->attr.exclude_kernel);
+ if (rawsize) /* No incremented counters */
+ pai_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void pai_have_samples(int idx)
+{
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ pai_have_sample(event, cpump);
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event CRYPTO_ALL is allowed.
+ */
+static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, save old values.
+ */
+ if (!sched_in)
+ pai_have_samples(PAI_PMU_CRYPTO);
+}
+
+/* ============================= paiext ====================================*/
+
+static void paiext_event_destroy(struct perf_event *event)
+{
+ pai_event_destroy(event);
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+ int rc = pai_event_init(event, PAI_PMU_EXT);
+
+ if (!rc) {
+ event->attr.exclude_kernel = true; /* No kernel space part */
+ event->destroy = paiext_event_destroy;
+ /* Offset of NNPA in paiext_cb */
+ event->hw.config_base = offsetof(struct paiext_cb, acc);
+ }
+ return rc;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+ return pai_getdata(event, false);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+ pai_read(event, paiext_getall);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+ pai_start(event, flags, paiext_getall);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+ return pai_add(event, flags);
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+ pai_stop(event, flags);
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+ pai_del(event, flags);
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, save old values.
+ */
+ if (!sched_in)
+ pai_have_samples(PAI_PMU_EXT);
+}
+
+/* Attribute definitions for paicrypt interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1000 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paicrypt_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paicrypt_events_group = {
+ .name = "events",
+ .attrs = NULL /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paicrypt_format_group = {
+ .name = "format",
+ .attrs = paicrypt_format_attr,
+};
+
+static const struct attribute_group *paicrypt_attr_groups[] = {
+ &paicrypt_events_group,
+ &paicrypt_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paicrypt = {
+ .task_ctx_nr = perf_hw_context,
+ .event_init = paicrypt_event_init,
+ .add = paicrypt_add,
+ .del = paicrypt_del,
+ .start = paicrypt_start,
+ .stop = paicrypt_stop,
+ .read = paicrypt_read,
+ .sched_task = paicrypt_sched_task,
+ .attr_groups = paicrypt_attr_groups
+};
+
+/* List of symbolic PAI counter names. */
+static const char * const paicrypt_ctrnames[] = {
+ [0] = "CRYPTO_ALL",
+ [1] = "KM_DEA",
+ [2] = "KM_TDEA_128",
+ [3] = "KM_TDEA_192",
+ [4] = "KM_ENCRYPTED_DEA",
+ [5] = "KM_ENCRYPTED_TDEA_128",
+ [6] = "KM_ENCRYPTED_TDEA_192",
+ [7] = "KM_AES_128",
+ [8] = "KM_AES_192",
+ [9] = "KM_AES_256",
+ [10] = "KM_ENCRYPTED_AES_128",
+ [11] = "KM_ENCRYPTED_AES_192",
+ [12] = "KM_ENCRYPTED_AES_256",
+ [13] = "KM_XTS_AES_128",
+ [14] = "KM_XTS_AES_256",
+ [15] = "KM_XTS_ENCRYPTED_AES_128",
+ [16] = "KM_XTS_ENCRYPTED_AES_256",
+ [17] = "KMC_DEA",
+ [18] = "KMC_TDEA_128",
+ [19] = "KMC_TDEA_192",
+ [20] = "KMC_ENCRYPTED_DEA",
+ [21] = "KMC_ENCRYPTED_TDEA_128",
+ [22] = "KMC_ENCRYPTED_TDEA_192",
+ [23] = "KMC_AES_128",
+ [24] = "KMC_AES_192",
+ [25] = "KMC_AES_256",
+ [26] = "KMC_ENCRYPTED_AES_128",
+ [27] = "KMC_ENCRYPTED_AES_192",
+ [28] = "KMC_ENCRYPTED_AES_256",
+ [29] = "KMC_PRNG",
+ [30] = "KMA_GCM_AES_128",
+ [31] = "KMA_GCM_AES_192",
+ [32] = "KMA_GCM_AES_256",
+ [33] = "KMA_GCM_ENCRYPTED_AES_128",
+ [34] = "KMA_GCM_ENCRYPTED_AES_192",
+ [35] = "KMA_GCM_ENCRYPTED_AES_256",
+ [36] = "KMF_DEA",
+ [37] = "KMF_TDEA_128",
+ [38] = "KMF_TDEA_192",
+ [39] = "KMF_ENCRYPTED_DEA",
+ [40] = "KMF_ENCRYPTED_TDEA_128",
+ [41] = "KMF_ENCRYPTED_TDEA_192",
+ [42] = "KMF_AES_128",
+ [43] = "KMF_AES_192",
+ [44] = "KMF_AES_256",
+ [45] = "KMF_ENCRYPTED_AES_128",
+ [46] = "KMF_ENCRYPTED_AES_192",
+ [47] = "KMF_ENCRYPTED_AES_256",
+ [48] = "KMCTR_DEA",
+ [49] = "KMCTR_TDEA_128",
+ [50] = "KMCTR_TDEA_192",
+ [51] = "KMCTR_ENCRYPTED_DEA",
+ [52] = "KMCTR_ENCRYPTED_TDEA_128",
+ [53] = "KMCTR_ENCRYPTED_TDEA_192",
+ [54] = "KMCTR_AES_128",
+ [55] = "KMCTR_AES_192",
+ [56] = "KMCTR_AES_256",
+ [57] = "KMCTR_ENCRYPTED_AES_128",
+ [58] = "KMCTR_ENCRYPTED_AES_192",
+ [59] = "KMCTR_ENCRYPTED_AES_256",
+ [60] = "KMO_DEA",
+ [61] = "KMO_TDEA_128",
+ [62] = "KMO_TDEA_192",
+ [63] = "KMO_ENCRYPTED_DEA",
+ [64] = "KMO_ENCRYPTED_TDEA_128",
+ [65] = "KMO_ENCRYPTED_TDEA_192",
+ [66] = "KMO_AES_128",
+ [67] = "KMO_AES_192",
+ [68] = "KMO_AES_256",
+ [69] = "KMO_ENCRYPTED_AES_128",
+ [70] = "KMO_ENCRYPTED_AES_192",
+ [71] = "KMO_ENCRYPTED_AES_256",
+ [72] = "KIMD_SHA_1",
+ [73] = "KIMD_SHA_256",
+ [74] = "KIMD_SHA_512",
+ [75] = "KIMD_SHA3_224",
+ [76] = "KIMD_SHA3_256",
+ [77] = "KIMD_SHA3_384",
+ [78] = "KIMD_SHA3_512",
+ [79] = "KIMD_SHAKE_128",
+ [80] = "KIMD_SHAKE_256",
+ [81] = "KIMD_GHASH",
+ [82] = "KLMD_SHA_1",
+ [83] = "KLMD_SHA_256",
+ [84] = "KLMD_SHA_512",
+ [85] = "KLMD_SHA3_224",
+ [86] = "KLMD_SHA3_256",
+ [87] = "KLMD_SHA3_384",
+ [88] = "KLMD_SHA3_512",
+ [89] = "KLMD_SHAKE_128",
+ [90] = "KLMD_SHAKE_256",
+ [91] = "KMAC_DEA",
+ [92] = "KMAC_TDEA_128",
+ [93] = "KMAC_TDEA_192",
+ [94] = "KMAC_ENCRYPTED_DEA",
+ [95] = "KMAC_ENCRYPTED_TDEA_128",
+ [96] = "KMAC_ENCRYPTED_TDEA_192",
+ [97] = "KMAC_AES_128",
+ [98] = "KMAC_AES_192",
+ [99] = "KMAC_AES_256",
+ [100] = "KMAC_ENCRYPTED_AES_128",
+ [101] = "KMAC_ENCRYPTED_AES_192",
+ [102] = "KMAC_ENCRYPTED_AES_256",
+ [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+ [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+ [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+ [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+ [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+ [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+ [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+ [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+ [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+ [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+ [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+ [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256",
+ [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+ [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+ [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+ [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+ [119] = "PCC_SCALAR_MULTIPLY_P256",
+ [120] = "PCC_SCALAR_MULTIPLY_P384",
+ [121] = "PCC_SCALAR_MULTIPLY_P521",
+ [122] = "PCC_SCALAR_MULTIPLY_ED25519",
+ [123] = "PCC_SCALAR_MULTIPLY_ED448",
+ [124] = "PCC_SCALAR_MULTIPLY_X25519",
+ [125] = "PCC_SCALAR_MULTIPLY_X448",
+ [126] = "PRNO_SHA_512_DRNG",
+ [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+ [128] = "PRNO_TRNG",
+ [129] = "KDSA_ECDSA_VERIFY_P256",
+ [130] = "KDSA_ECDSA_VERIFY_P384",
+ [131] = "KDSA_ECDSA_VERIFY_P521",
+ [132] = "KDSA_ECDSA_SIGN_P256",
+ [133] = "KDSA_ECDSA_SIGN_P384",
+ [134] = "KDSA_ECDSA_SIGN_P521",
+ [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+ [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+ [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+ [138] = "KDSA_EDDSA_VERIFY_ED25519",
+ [139] = "KDSA_EDDSA_VERIFY_ED448",
+ [140] = "KDSA_EDDSA_SIGN_ED25519",
+ [141] = "KDSA_EDDSA_SIGN_ED448",
+ [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+ [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+ [144] = "PCKMO_ENCRYPT_DEA_KEY",
+ [145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
+ [146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
+ [147] = "PCKMO_ENCRYPT_AES_128_KEY",
+ [148] = "PCKMO_ENCRYPT_AES_192_KEY",
+ [149] = "PCKMO_ENCRYPT_AES_256_KEY",
+ [150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
+ [151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
+ [152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
+ [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+ [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
+ [155] = "IBM_RESERVED_155",
+ [156] = "IBM_RESERVED_156",
+ [157] = "KM_FULL_XTS_AES_128",
+ [158] = "KM_FULL_XTS_AES_256",
+ [159] = "KM_FULL_XTS_ENCRYPTED_AES_128",
+ [160] = "KM_FULL_XTS_ENCRYPTED_AES_256",
+ [161] = "KMAC_HMAC_SHA_224",
+ [162] = "KMAC_HMAC_SHA_256",
+ [163] = "KMAC_HMAC_SHA_384",
+ [164] = "KMAC_HMAC_SHA_512",
+ [165] = "KMAC_HMAC_ENCRYPTED_SHA_224",
+ [166] = "KMAC_HMAC_ENCRYPTED_SHA_256",
+ [167] = "KMAC_HMAC_ENCRYPTED_SHA_384",
+ [168] = "KMAC_HMAC_ENCRYPTED_SHA_512",
+ [169] = "PCKMO_ENCRYPT_HMAC_512_KEY",
+ [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY",
+ [171] = "PCKMO_ENCRYPT_AES_XTS_128",
+ [172] = "PCKMO_ENCRYPT_AES_XTS_256",
+};
+
+static struct attribute *paiext_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+ .name = "events",
+ .attrs = NULL, /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+ .name = "format",
+ .attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+ &paiext_events_group,
+ &paiext_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+ .task_ctx_nr = perf_hw_context,
+ .event_init = paiext_event_init,
+ .add = paiext_add,
+ .del = paiext_del,
+ .start = paiext_start,
+ .stop = paiext_stop,
+ .read = paiext_read,
+ .sched_task = paiext_sched_task,
+ .attr_groups = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+ [0] = "NNPA_ALL",
+ [1] = "NNPA_ADD",
+ [2] = "NNPA_SUB",
+ [3] = "NNPA_MUL",
+ [4] = "NNPA_DIV",
+ [5] = "NNPA_MIN",
+ [6] = "NNPA_MAX",
+ [7] = "NNPA_LOG",
+ [8] = "NNPA_EXP",
+ [9] = "NNPA_IBM_RESERVED_9",
+ [10] = "NNPA_RELU",
+ [11] = "NNPA_TANH",
+ [12] = "NNPA_SIGMOID",
+ [13] = "NNPA_SOFTMAX",
+ [14] = "NNPA_BATCHNORM",
+ [15] = "NNPA_MAXPOOL2D",
+ [16] = "NNPA_AVGPOOL2D",
+ [17] = "NNPA_LSTMACT",
+ [18] = "NNPA_GRUACT",
+ [19] = "NNPA_CONVOLUTION",
+ [20] = "NNPA_MATMUL_OP",
+ [21] = "NNPA_MATMUL_OP_BCAST23",
+ [22] = "NNPA_SMALLBATCH",
+ [23] = "NNPA_LARGEDIM",
+ [24] = "NNPA_SMALLTENSOR",
+ [25] = "NNPA_1MFRAME",
+ [26] = "NNPA_2GFRAME",
+ [27] = "NNPA_ACCESSEXCEPT",
+ [28] = "NNPA_TRANSFORM",
+ [29] = "NNPA_GELU",
+ [30] = "NNPA_MOMENTS",
+ [31] = "NNPA_LAYERNORM",
+ [32] = "NNPA_MATMUL_OP_BCAST1",
+ [33] = "NNPA_SQRT",
+ [34] = "NNPA_INVSQRT",
+ [35] = "NNPA_NORM",
+ [36] = "NNPA_REDUCE",
+};
+
+static void __init attr_event_free(struct attribute **attrs)
+{
+ struct perf_pmu_events_attr *pa;
+ unsigned int i;
+
+ for (i = 0; attrs[i]; i++) {
+ struct device_attribute *dap;
+
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static struct attribute * __init attr_event_init_one(int num,
+ unsigned long base,
+ const char *name)
+{
+ struct perf_pmu_events_attr *pa;
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return NULL;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = base + num;
+ pa->attr.attr.name = name;
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ return &pa->attr.attr;
+}
+
+static struct attribute ** __init attr_event_init(struct pai_pmu *p)
+{
+ unsigned int min_attr = min_t(unsigned int, p->num_named, p->num_avail);
+ struct attribute **attrs;
+ unsigned int i;
+
+ attrs = kmalloc_array(min_attr + 1, sizeof(*attrs), GFP_KERNEL | __GFP_ZERO);
+ if (!attrs)
+ goto out;
+ for (i = 0; i < min_attr; i++) {
+ attrs[i] = attr_event_init_one(i, p->base, p->names[i]);
+ if (!attrs[i]) {
+ attr_event_free(attrs);
+ attrs = NULL;
+ goto out;
+ }
+ }
+ attrs[i] = NULL;
+out:
+ return attrs;
+}
+
+static void __init pai_pmu_exit(struct pai_pmu *p)
+{
+ attr_event_free(p->event_group->attrs);
+ p->event_group->attrs = NULL;
+}
+
+/* Add a PMU. Install its events and register the PMU device driver
+ * call back functions.
+ */
+static int __init pai_pmu_init(struct pai_pmu *p)
+{
+ int rc = -ENOMEM;
+
+
+ /* Export known PAI events */
+ p->event_group->attrs = attr_event_init(p);
+ if (!p->event_group->attrs) {
+ pr_err("Creation of PMU %s /sysfs failed\n", p->pmuname);
+ goto out;
+ }
+
+ rc = perf_pmu_register(p->pmu, p->pmuname, -1);
+ if (rc) {
+ pai_pmu_exit(p);
+ pr_err("Registering PMU %s failed with rc=%i\n", p->pmuname,
+ rc);
+ }
+out:
+ return rc;
+}
+
+/* PAI PMU characteristics table */
+static struct pai_pmu pai_pmu[] __refdata = {
+ [PAI_PMU_CRYPTO] = {
+ .pmuname = "pai_crypto",
+ .facility_nr = 196,
+ .num_named = ARRAY_SIZE(paicrypt_ctrnames),
+ .names = paicrypt_ctrnames,
+ .base = PAI_CRYPTO_BASE,
+ .kernel_offset = PAI_CRYPTO_KERNEL_OFFSET,
+ .area_size = PAGE_SIZE,
+ .init = pai_pmu_init,
+ .exit = pai_pmu_exit,
+ .pmu = &paicrypt,
+ .event_group = &paicrypt_events_group
+ },
+ [PAI_PMU_EXT] = {
+ .pmuname = "pai_ext",
+ .facility_nr = 197,
+ .num_named = ARRAY_SIZE(paiext_ctrnames),
+ .names = paiext_ctrnames,
+ .base = PAI_NNPA_BASE,
+ .kernel_offset = 0,
+ .area_size = PAIE1_CTRBLOCK_SZ,
+ .init = pai_pmu_init,
+ .exit = pai_pmu_exit,
+ .pmu = &paiext,
+ .event_group = &paiext_events_group
+ }
+};
+
+/*
+ * Check if the PMU (via facility) is supported by machine. Try all of the
+ * supported PAI PMUs.
+ * Return number of successfully installed PMUs.
+ */
+static int __init paipmu_setup(void)
+{
+ struct qpaci_info_block ib;
+ int install_ok = 0, rc;
+ struct pai_pmu *p;
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(pai_pmu); ++i) {
+ p = &pai_pmu[i];
+
+ if (!test_facility(p->facility_nr))
+ continue;
+
+ qpaci(&ib);
+ switch (i) {
+ case PAI_PMU_CRYPTO:
+ p->num_avail = ib.num_cc;
+ if (p->num_avail >= PAI_CRYPTO_MAXCTR) {
+ pr_err("Too many PMU %s counters %d\n",
+ p->pmuname, p->num_avail);
+ continue;
+ }
+ break;
+ case PAI_PMU_EXT:
+ p->num_avail = ib.num_nnpa;
+ break;
+ }
+ p->num_avail += 1; /* Add xxx_ALL event */
+ if (p->init) {
+ rc = p->init(p);
+ if (!rc)
+ ++install_ok;
+ }
+ }
+ return install_ok;
+}
+
+static int __init pai_init(void)
+{
+ /* Setup s390dbf facility */
+ paidbg = debug_register("pai", 32, 256, 128);
+ if (!paidbg) {
+ pr_err("Registration of s390dbf pai failed\n");
+ return -ENOMEM;
+ }
+ debug_register_view(paidbg, &debug_sprintf_view);
+
+ if (!paipmu_setup()) {
+ /* No PMU registration, no need for debug buffer */
+ debug_unregister_view(paidbg, &debug_sprintf_view);
+ debug_unregister(paidbg);
+ return -ENODEV;
+ }
+ return 0;
+}
+
+device_initcall(pai_init);
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
new file mode 100644
index 000000000000..7b305f1456f8
--- /dev/null
+++ b/arch/s390/kernel/perf_regs.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <linux/perf_regs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/ptrace.h>
+#include <asm/fpu.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ freg_t fp;
+
+ if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15)
+ return regs->gprs[idx];
+
+ if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) {
+ if (!user_mode(regs))
+ return 0;
+
+ idx -= PERF_REG_S390_FP0;
+ fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
+ return fp.ui;
+ }
+
+ if (idx == PERF_REG_S390_MASK)
+ return regs->psw.mask;
+ if (idx == PERF_REG_S390_PC)
+ return regs->psw.addr;
+
+ WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX);
+ return 0;
+}
+
+#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+ struct pt_regs *regs)
+{
+ /*
+ * Use the regs from the first interruption and let
+ * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()).
+ *
+ * Also save FPU registers for user-space tasks only.
+ */
+ regs_user->regs = task_pt_regs(current);
+ if (user_mode(regs_user->regs))
+ save_user_fpu_regs();
+ regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
deleted file mode 100644
index 14bdecb61923..000000000000
--- a/arch/s390/kernel/pgm_check.S
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Program check table.
- *
- * Copyright IBM Corp. 2012
- */
-
-#include <linux/linkage.h>
-
-#ifdef CONFIG_32BIT
-#define PGM_CHECK_64BIT(handler) .long default_trap_handler
-#else
-#define PGM_CHECK_64BIT(handler) .long handler
-#endif
-
-#define PGM_CHECK(handler) .long handler
-#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler)
-
-/*
- * The program check table contains exactly 128 (0x00-0x7f) entries. Each
- * line defines the 31 and/or 64 bit function to be called corresponding
- * to the program check interruption code.
- */
-.section .rodata, "a"
-ENTRY(pgm_check_table)
-PGM_CHECK_DEFAULT /* 00 */
-PGM_CHECK(illegal_op) /* 01 */
-PGM_CHECK(privileged_op) /* 02 */
-PGM_CHECK(execute_exception) /* 03 */
-PGM_CHECK(do_protection_exception) /* 04 */
-PGM_CHECK(addressing_exception) /* 05 */
-PGM_CHECK(specification_exception) /* 06 */
-PGM_CHECK(data_exception) /* 07 */
-PGM_CHECK(overflow_exception) /* 08 */
-PGM_CHECK(divide_exception) /* 09 */
-PGM_CHECK(overflow_exception) /* 0a */
-PGM_CHECK(divide_exception) /* 0b */
-PGM_CHECK(hfp_overflow_exception) /* 0c */
-PGM_CHECK(hfp_underflow_exception) /* 0d */
-PGM_CHECK(hfp_significance_exception) /* 0e */
-PGM_CHECK(hfp_divide_exception) /* 0f */
-PGM_CHECK(do_dat_exception) /* 10 */
-PGM_CHECK(do_dat_exception) /* 11 */
-PGM_CHECK(translation_exception) /* 12 */
-PGM_CHECK(special_op_exception) /* 13 */
-PGM_CHECK_DEFAULT /* 14 */
-PGM_CHECK(operand_exception) /* 15 */
-PGM_CHECK_DEFAULT /* 16 */
-PGM_CHECK_DEFAULT /* 17 */
-PGM_CHECK_64BIT(transaction_exception) /* 18 */
-PGM_CHECK_DEFAULT /* 19 */
-PGM_CHECK_DEFAULT /* 1a */
-PGM_CHECK_DEFAULT /* 1b */
-PGM_CHECK(space_switch_exception) /* 1c */
-PGM_CHECK(hfp_sqrt_exception) /* 1d */
-PGM_CHECK_DEFAULT /* 1e */
-PGM_CHECK_DEFAULT /* 1f */
-PGM_CHECK_DEFAULT /* 20 */
-PGM_CHECK_DEFAULT /* 21 */
-PGM_CHECK_DEFAULT /* 22 */
-PGM_CHECK_DEFAULT /* 23 */
-PGM_CHECK_DEFAULT /* 24 */
-PGM_CHECK_DEFAULT /* 25 */
-PGM_CHECK_DEFAULT /* 26 */
-PGM_CHECK_DEFAULT /* 27 */
-PGM_CHECK_DEFAULT /* 28 */
-PGM_CHECK_DEFAULT /* 29 */
-PGM_CHECK_DEFAULT /* 2a */
-PGM_CHECK_DEFAULT /* 2b */
-PGM_CHECK_DEFAULT /* 2c */
-PGM_CHECK_DEFAULT /* 2d */
-PGM_CHECK_DEFAULT /* 2e */
-PGM_CHECK_DEFAULT /* 2f */
-PGM_CHECK_DEFAULT /* 30 */
-PGM_CHECK_DEFAULT /* 31 */
-PGM_CHECK_DEFAULT /* 32 */
-PGM_CHECK_DEFAULT /* 33 */
-PGM_CHECK_DEFAULT /* 34 */
-PGM_CHECK_DEFAULT /* 35 */
-PGM_CHECK_DEFAULT /* 36 */
-PGM_CHECK_DEFAULT /* 37 */
-PGM_CHECK_64BIT(do_asce_exception) /* 38 */
-PGM_CHECK_64BIT(do_dat_exception) /* 39 */
-PGM_CHECK_64BIT(do_dat_exception) /* 3a */
-PGM_CHECK_64BIT(do_dat_exception) /* 3b */
-PGM_CHECK_DEFAULT /* 3c */
-PGM_CHECK_DEFAULT /* 3d */
-PGM_CHECK_DEFAULT /* 3e */
-PGM_CHECK_DEFAULT /* 3f */
-PGM_CHECK_DEFAULT /* 40 */
-PGM_CHECK_DEFAULT /* 41 */
-PGM_CHECK_DEFAULT /* 42 */
-PGM_CHECK_DEFAULT /* 43 */
-PGM_CHECK_DEFAULT /* 44 */
-PGM_CHECK_DEFAULT /* 45 */
-PGM_CHECK_DEFAULT /* 46 */
-PGM_CHECK_DEFAULT /* 47 */
-PGM_CHECK_DEFAULT /* 48 */
-PGM_CHECK_DEFAULT /* 49 */
-PGM_CHECK_DEFAULT /* 4a */
-PGM_CHECK_DEFAULT /* 4b */
-PGM_CHECK_DEFAULT /* 4c */
-PGM_CHECK_DEFAULT /* 4d */
-PGM_CHECK_DEFAULT /* 4e */
-PGM_CHECK_DEFAULT /* 4f */
-PGM_CHECK_DEFAULT /* 50 */
-PGM_CHECK_DEFAULT /* 51 */
-PGM_CHECK_DEFAULT /* 52 */
-PGM_CHECK_DEFAULT /* 53 */
-PGM_CHECK_DEFAULT /* 54 */
-PGM_CHECK_DEFAULT /* 55 */
-PGM_CHECK_DEFAULT /* 56 */
-PGM_CHECK_DEFAULT /* 57 */
-PGM_CHECK_DEFAULT /* 58 */
-PGM_CHECK_DEFAULT /* 59 */
-PGM_CHECK_DEFAULT /* 5a */
-PGM_CHECK_DEFAULT /* 5b */
-PGM_CHECK_DEFAULT /* 5c */
-PGM_CHECK_DEFAULT /* 5d */
-PGM_CHECK_DEFAULT /* 5e */
-PGM_CHECK_DEFAULT /* 5f */
-PGM_CHECK_DEFAULT /* 60 */
-PGM_CHECK_DEFAULT /* 61 */
-PGM_CHECK_DEFAULT /* 62 */
-PGM_CHECK_DEFAULT /* 63 */
-PGM_CHECK_DEFAULT /* 64 */
-PGM_CHECK_DEFAULT /* 65 */
-PGM_CHECK_DEFAULT /* 66 */
-PGM_CHECK_DEFAULT /* 67 */
-PGM_CHECK_DEFAULT /* 68 */
-PGM_CHECK_DEFAULT /* 69 */
-PGM_CHECK_DEFAULT /* 6a */
-PGM_CHECK_DEFAULT /* 6b */
-PGM_CHECK_DEFAULT /* 6c */
-PGM_CHECK_DEFAULT /* 6d */
-PGM_CHECK_DEFAULT /* 6e */
-PGM_CHECK_DEFAULT /* 6f */
-PGM_CHECK_DEFAULT /* 70 */
-PGM_CHECK_DEFAULT /* 71 */
-PGM_CHECK_DEFAULT /* 72 */
-PGM_CHECK_DEFAULT /* 73 */
-PGM_CHECK_DEFAULT /* 74 */
-PGM_CHECK_DEFAULT /* 75 */
-PGM_CHECK_DEFAULT /* 76 */
-PGM_CHECK_DEFAULT /* 77 */
-PGM_CHECK_DEFAULT /* 78 */
-PGM_CHECK_DEFAULT /* 79 */
-PGM_CHECK_DEFAULT /* 7a */
-PGM_CHECK_DEFAULT /* 7b */
-PGM_CHECK_DEFAULT /* 7c */
-PGM_CHECK_DEFAULT /* 7d */
-PGM_CHECK_DEFAULT /* 7e */
-PGM_CHECK_DEFAULT /* 7f */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 2bc3eddae34a..0df95dcb2101 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* This file handles the architecture dependent parts of process handling.
*
@@ -7,9 +8,13 @@
* Denis Joseph Barrow,
*/
+#include <linux/elf-randomize.h>
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
@@ -19,93 +24,90 @@
#include <linux/tick.h>
#include <linux/personality.h>
#include <linux/syscalls.h>
-#include <linux/compat.h>
#include <linux/kprobes.h>
#include <linux/random.h>
-#include <linux/module.h>
-#include <asm/io.h>
+#include <linux/init_task.h>
+#include <linux/entry-common.h>
+#include <linux/io.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
+#include <asm/switch_to.h>
+#include <asm/cpu_mf.h>
#include <asm/processor.h>
+#include <asm/ptrace.h>
#include <asm/vtimer.h>
#include <asm/exec.h>
+#include <asm/fpu.h>
#include <asm/irq.h>
#include <asm/nmi.h>
#include <asm/smp.h>
-#include <asm/switch_to.h>
+#include <asm/stacktrace.h>
#include <asm/runtime_instr.h>
+#include <asm/unwind.h>
#include "entry.h"
-asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
+void ret_from_fork(void) asm("ret_from_fork");
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs)
{
- struct stack_frame *sf, *low, *high;
+ void (*func)(void *arg);
- if (!tsk || !task_stack_page(tsk))
- return 0;
- low = task_stack_page(tsk);
- high = (struct stack_frame *) task_pt_regs(tsk);
- sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
- if (sf <= low || sf > high)
- return 0;
- sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
- if (sf <= low || sf > high)
- return 0;
- return sf->gprs[8];
-}
+ schedule_tail(prev);
-void arch_cpu_idle(void)
-{
- local_mcck_disable();
- if (test_thread_flag(TIF_MCCK_PENDING)) {
- local_mcck_enable();
- local_irq_enable();
- return;
+ if (!user_mode(regs)) {
+ /* Kernel thread */
+ func = (void *)regs->gprs[9];
+ func((void *)regs->gprs[10]);
}
- /* Halt the cpu and keep track of cpu time accounting. */
- vtime_stop_cpu();
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ syscall_exit_to_user_mode(regs);
}
-void arch_cpu_idle_exit(void)
+void flush_thread(void)
{
- if (test_thread_flag(TIF_MCCK_PENDING))
- s390_handle_mcck();
}
-void arch_cpu_idle_dead(void)
+void arch_setup_new_exec(void)
{
- cpu_die();
+ if (get_lowcore()->current_pid != current->pid) {
+ get_lowcore()->current_pid = current->pid;
+ if (test_facility(40))
+ lpp(&get_lowcore()->lpp);
+ }
}
-extern void __kprobes kernel_thread_starter(void);
-
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
+void arch_release_task_struct(struct task_struct *tsk)
{
- exit_thread_runtime_instr();
+ runtime_instr_release(tsk);
+ guarded_storage_release(tsk);
}
-void flush_thread(void)
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
-}
+ save_user_fpu_regs();
-void release_thread(struct task_struct *dead_task)
-{
+ *dst = *src;
+ dst->thread.kfpu_flags = 0;
+
+ /*
+ * Don't transfer over the runtime instrumentation or the guarded
+ * storage control block pointers. These fields are cleared here instead
+ * of in copy_thread() to avoid premature freeing of associated memory
+ * on fork() failure. Wait to clear the RI flag because ->stack still
+ * refers to the source thread.
+ */
+ dst->thread.ri_cb = NULL;
+ dst->thread.gs_cb = NULL;
+ dst->thread.gs_bc_cb = NULL;
+
+ return 0;
}
-int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
- unsigned long arg, struct task_struct *p)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- struct thread_info *ti;
+ u64 clone_flags = args->flags;
+ unsigned long new_stackp = args->stack;
+ unsigned long tls = args->tls;
struct fake_frame
{
struct stack_frame sf;
@@ -117,158 +119,130 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
/* Save access registers to new thread structure. */
save_access_regs(&p->thread.acrs[0]);
/* start new process with ar4 pointing to the correct address space */
- p->thread.mm_segment = get_fs();
/* Don't copy debug registers */
memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
- clear_tsk_thread_flag(p, TIF_PER_TRAP);
+ p->thread.per_flags = 0;
/* Initialize per thread user and system timer values */
- ti = task_thread_info(p);
- ti->user_timer = 0;
- ti->system_timer = 0;
+ p->thread.user_timer = 0;
+ p->thread.guest_timer = 0;
+ p->thread.system_timer = 0;
+ p->thread.hardirq_timer = 0;
+ p->thread.softirq_timer = 0;
+ p->thread.last_break = 1;
frame->sf.back_chain = 0;
+ frame->sf.gprs[11 - 6] = (unsigned long)&frame->childregs;
+ frame->sf.gprs[12 - 6] = (unsigned long)p;
/* new return point is ret_from_fork */
- frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+ frame->sf.gprs[14 - 6] = (unsigned long)ret_from_fork;
/* fake return stack for resume(), don't go back to schedule */
- frame->sf.gprs[9] = (unsigned long) frame;
+ frame->sf.gprs[15 - 6] = (unsigned long)frame;
/* Store access registers to kernel stack of new process. */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
- frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- frame->childregs.psw.addr = PSW_ADDR_AMODE |
- (unsigned long) kernel_thread_starter;
- frame->childregs.gprs[9] = new_stackp; /* function */
- frame->childregs.gprs[10] = arg;
- frame->childregs.gprs[11] = (unsigned long) do_exit;
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+ PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.gprs[9] = (unsigned long)args->fn;
+ frame->childregs.gprs[10] = (unsigned long)args->fn_arg;
frame->childregs.orig_gpr2 = -1;
-
+ frame->childregs.last_break = 1;
return 0;
}
frame->childregs = *current_pt_regs();
frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
+ frame->childregs.flags = 0;
if (new_stackp)
frame->childregs.gprs[15] = new_stackp;
-
- /* Don't copy runtime instrumentation info */
- p->thread.ri_cb = NULL;
- p->thread.ri_signum = 0;
- frame->childregs.psw.mask &= ~PSW_MASK_RI;
-
-#ifndef CONFIG_64BIT
/*
- * save fprs to current->thread.fp_regs to merge them with
- * the emulated registers and then copy the result to the child.
+ * Clear the runtime instrumentation flag after the above childregs
+ * copy. The CB pointer was already cleared in arch_dup_task_struct().
*/
- save_fp_regs(&current->thread.fp_regs);
- memcpy(&p->thread.fp_regs, &current->thread.fp_regs,
- sizeof(s390_fp_regs));
- /* Set a new TLS ? */
- if (clone_flags & CLONE_SETTLS)
- p->thread.acrs[0] = frame->childregs.gprs[6];
-#else /* CONFIG_64BIT */
- /* Save the fpu registers to new thread structure. */
- save_fp_regs(&p->thread.fp_regs);
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
- unsigned long tls = frame->childregs.gprs[6];
- if (is_compat_task()) {
- p->thread.acrs[0] = (unsigned int)tls;
- } else {
- p->thread.acrs[0] = (unsigned int)(tls >> 32);
- p->thread.acrs[1] = (unsigned int)tls;
- }
+ p->thread.acrs[0] = (unsigned int)(tls >> 32);
+ p->thread.acrs[1] = (unsigned int)tls;
}
-#endif /* CONFIG_64BIT */
+ /*
+ * s390 stores the svc return address in arch_data when calling
+ * sigreturn()/restart_syscall() via vdso. 1 means no valid address
+ * stored.
+ */
+ p->restart_block.arch_data = 1;
return 0;
}
-asmlinkage void execve_tail(void)
+void execve_tail(void)
{
- current->thread.fp_regs.fpc = 0;
- if (MACHINE_HAS_IEEE)
- asm volatile("sfpc %0,%0" : : "d" (0));
+ current->thread.ufpu.fpc = 0;
+ fpu_sfpc(0);
}
-/*
- * fill in the FPU structure for a core dump.
- */
-int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
+struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next)
{
-#ifndef CONFIG_64BIT
- /*
- * save fprs to current->thread.fp_regs to merge them with
- * the emulated registers and then copy the result to the dump.
- */
- save_fp_regs(&current->thread.fp_regs);
- memcpy(fpregs, &current->thread.fp_regs, sizeof(s390_fp_regs));
-#else /* CONFIG_64BIT */
- save_fp_regs(fpregs);
-#endif /* CONFIG_64BIT */
- return 1;
+ save_user_fpu_regs();
+ save_kernel_fpu_regs(&prev->thread);
+ save_access_regs(&prev->thread.acrs[0]);
+ save_ri_cb(prev->thread.ri_cb);
+ save_gs_cb(prev->thread.gs_cb);
+ update_cr_regs(next);
+ restore_kernel_fpu_regs(&next->thread);
+ restore_access_regs(&next->thread.acrs[0]);
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);
+ restore_gs_cb(next->thread.gs_cb);
+ return __switch_to_asm(prev, next);
}
-EXPORT_SYMBOL(dump_fpu);
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
- struct stack_frame *sf, *low, *high;
- unsigned long return_address;
- int count;
+ struct unwind_state state;
+ unsigned long ip = 0;
- if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
+ if (!task_stack_page(p))
return 0;
- low = task_stack_page(p);
- high = (struct stack_frame *) task_pt_regs(p);
- sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
- if (sf <= low || sf > high)
+
+ if (!try_get_task_stack(p))
return 0;
- for (count = 0; count < 16; count++) {
- sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
- if (sf <= low || sf > high)
- return 0;
- return_address = sf->gprs[8] & PSW_ADDR_INSN;
- if (!in_sched_functions(return_address))
- return return_address;
+
+ unwind_for_each_frame(&state, p, NULL, 0) {
+ if (state.stack_info.type != STACK_TYPE_TASK) {
+ ip = 0;
+ break;
+ }
+
+ ip = unwind_get_return_address(&state);
+ if (!ip)
+ break;
+
+ if (!in_sched_functions(ip))
+ break;
}
- return 0;
+
+ put_task_stack(p);
+ return ip;
}
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= get_random_u32_below(PAGE_SIZE);
return sp & ~0xf;
}
static inline unsigned long brk_rnd(void)
{
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
- else
- return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+ return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
-
- if (ret < mm->brk)
- return mm->brk;
- return ret;
-}
-
-unsigned long randomize_et_dyn(unsigned long base)
-{
- unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+ unsigned long ret;
- if (!(current->flags & PF_RANDOMIZE))
- return base;
- if (ret < base)
- return base;
- return ret;
+ ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ return (ret > mm->brk) ? ret : mm->brk;
}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 753c41d0ffd3..e33a3eccda56 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -1,91 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
-#define KMSG_COMPONENT "cpu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpu: " fmt
+#include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
+#include <linux/bitops.h>
#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/sched/mm.h>
#include <linux/init.h>
-#include <linux/smp.h>
#include <linux/seq_file.h>
+#include <linux/mm_types.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <asm/text-patching.h>
+#include <asm/machine.h>
+#include <asm/diag.h>
+#include <asm/facility.h>
#include <asm/elf.h>
#include <asm/lowcore.h>
#include <asm/param.h>
+#include <asm/sclp.h>
+#include <asm/smp.h>
+
+unsigned long __read_mostly elf_hwcap;
+char elf_platform[ELF_PLATFORM_SIZE];
+
+struct cpu_info {
+ unsigned int cpu_mhz_dynamic;
+ unsigned int cpu_mhz_static;
+ struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+static DEFINE_PER_CPU(int, cpu_relax_retry);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+ if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+ machine_has_cpu_mhz = true;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+ unsigned long mhz;
+ struct cpu_info *c;
+
+ mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+ c = this_cpu_ptr(&cpu_info);
+ c->cpu_mhz_dynamic = mhz >> 32;
+ c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+ s390_adjust_jiffies();
+ if (machine_has_cpu_mhz)
+ on_each_cpu(update_cpu_mhz, NULL, 0);
+}
+
+void notrace stop_machine_yield(const struct cpumask *cpumask)
+{
+ int cpu, this_cpu;
+
+ this_cpu = smp_processor_id();
+ if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
+ __this_cpu_write(cpu_relax_retry, 0);
+ cpu = cpumask_next_wrap(this_cpu, cpumask);
+ if (cpu >= nr_cpu_ids)
+ return;
+ if (arch_vcpu_is_preempted(cpu))
+ smp_yield_cpu(cpu);
+ }
+}
+
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
+
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
-static DEFINE_PER_CPU(struct cpuid, cpu_id);
+void text_poke_sync_lock(void)
+{
+ cpus_read_lock();
+ text_poke_sync();
+ cpus_read_unlock();
+}
/*
* cpu_init - initializes state that is per-CPU.
*/
-void __cpuinit cpu_init(void)
+void cpu_init(void)
{
- struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
- struct cpuid *id = &__get_cpu_var(cpu_id);
+ struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
get_cpu_id(id);
- atomic_inc(&init_mm.mm_count);
+ if (machine_has_cpu_mhz)
+ update_cpu_mhz(NULL);
+ mmgrab(&init_mm);
current->active_mm = &init_mm;
BUG_ON(current->mm);
enter_lazy_tlb(&init_mm, current);
- memset(idle, 0, sizeof(*idle));
}
-/*
- * show_cpuinfo - Get information on one CPU for use by procfs.
- */
-static int show_cpuinfo(struct seq_file *m, void *v)
+static void show_facilities(struct seq_file *m)
+{
+ unsigned int bit;
+
+ seq_puts(m, "facilities :");
+ for_each_set_bit_inv(bit, (long *)&stfle_fac_list, MAX_FACILITY_BIT)
+ seq_printf(m, " %d", bit);
+ seq_putc(m, '\n');
+}
+
+static void show_cpu_summary(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
- "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te"
+ [HWCAP_NR_ESAN3] = "esan3",
+ [HWCAP_NR_ZARCH] = "zarch",
+ [HWCAP_NR_STFLE] = "stfle",
+ [HWCAP_NR_MSA] = "msa",
+ [HWCAP_NR_LDISP] = "ldisp",
+ [HWCAP_NR_EIMM] = "eimm",
+ [HWCAP_NR_DFP] = "dfp",
+ [HWCAP_NR_HPAGE] = "edat",
+ [HWCAP_NR_ETF3EH] = "etf3eh",
+ [HWCAP_NR_HIGH_GPRS] = "highgprs",
+ [HWCAP_NR_TE] = "te",
+ [HWCAP_NR_VXRS] = "vx",
+ [HWCAP_NR_VXRS_BCD] = "vxd",
+ [HWCAP_NR_VXRS_EXT] = "vxe",
+ [HWCAP_NR_GS] = "gs",
+ [HWCAP_NR_VXRS_EXT2] = "vxe2",
+ [HWCAP_NR_VXRS_PDE] = "vxp",
+ [HWCAP_NR_SORT] = "sort",
+ [HWCAP_NR_DFLT] = "dflt",
+ [HWCAP_NR_VXRS_PDE2] = "vxp2",
+ [HWCAP_NR_NNPA] = "nnpa",
+ [HWCAP_NR_PCI_MIO] = "pcimio",
+ [HWCAP_NR_SIE] = "sie",
};
- unsigned long n = (unsigned long) v - 1;
- int i;
-
- if (!n) {
- s390_adjust_jiffies();
- seq_printf(m, "vendor_id : IBM/S390\n"
- "# processors : %i\n"
- "bogomips per cpu: %lu.%02lu\n",
- num_online_cpus(), loops_per_jiffy/(500000/HZ),
- (loops_per_jiffy/(5000/HZ))%100);
- seq_puts(m, "features\t: ");
- for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
- if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
- seq_printf(m, "%s ", hwcap_str[i]);
- seq_puts(m, "\n");
- show_cacheinfo(m);
- }
- get_online_cpus();
- if (cpu_online(n)) {
- struct cpuid *id = &per_cpu(cpu_id, n);
- seq_printf(m, "processor %li: "
+ int i, cpu;
+
+ BUILD_BUG_ON(ARRAY_SIZE(hwcap_str) != HWCAP_NR_MAX);
+ seq_printf(m, "vendor_id : IBM/S390\n"
+ "# processors : %i\n"
+ "bogomips per cpu: %lu.%02lu\n",
+ num_online_cpus(), loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ))%100);
+ seq_printf(m, "max thread id : %d\n", smp_cpu_mtid);
+ seq_puts(m, "features\t: ");
+ for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+ if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+ seq_printf(m, "%s ", hwcap_str[i]);
+ seq_puts(m, "\n");
+ show_facilities(m);
+ show_cacheinfo(m);
+ for_each_online_cpu(cpu) {
+ struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+ seq_printf(m, "processor %d: "
"version = %02X, "
"identification = %06X, "
"machine = %04X\n",
- n, id->version, id->ident, id->machine);
+ cpu, id->version, id->ident, id->machine);
+ }
+}
+
+static int __init setup_hwcaps(void)
+{
+ /* instructions named N3, "backported" to esa-mode */
+ elf_hwcap |= HWCAP_ESAN3;
+
+ /* z/Architecture mode active */
+ elf_hwcap |= HWCAP_ZARCH;
+
+ /* store-facility-list-extended */
+ if (test_facility(7))
+ elf_hwcap |= HWCAP_STFLE;
+
+ /* message-security assist */
+ if (test_facility(17))
+ elf_hwcap |= HWCAP_MSA;
+
+ /* long-displacement */
+ if (test_facility(19))
+ elf_hwcap |= HWCAP_LDISP;
+
+ /* extended-immediate */
+ elf_hwcap |= HWCAP_EIMM;
+
+ /* extended-translation facility 3 enhancement */
+ if (test_facility(22) && test_facility(30))
+ elf_hwcap |= HWCAP_ETF3EH;
+
+ /* decimal floating point & perform floating point operation */
+ if (test_facility(42) && test_facility(44))
+ elf_hwcap |= HWCAP_DFP;
+
+ /* huge page support */
+ if (cpu_has_edat1())
+ elf_hwcap |= HWCAP_HPAGE;
+
+ /* 64-bit register support for 31-bit processes */
+ elf_hwcap |= HWCAP_HIGH_GPRS;
+
+ /* transactional execution */
+ if (machine_has_tx())
+ elf_hwcap |= HWCAP_TE;
+
+ /* vector */
+ if (test_facility(129)) {
+ elf_hwcap |= HWCAP_VXRS;
+ if (test_facility(134))
+ elf_hwcap |= HWCAP_VXRS_BCD;
+ if (test_facility(135))
+ elf_hwcap |= HWCAP_VXRS_EXT;
+ if (test_facility(148))
+ elf_hwcap |= HWCAP_VXRS_EXT2;
+ if (test_facility(152))
+ elf_hwcap |= HWCAP_VXRS_PDE;
+ if (test_facility(192))
+ elf_hwcap |= HWCAP_VXRS_PDE2;
+ }
+
+ if (test_facility(150))
+ elf_hwcap |= HWCAP_SORT;
+
+ if (test_facility(151))
+ elf_hwcap |= HWCAP_DFLT;
+
+ if (test_facility(165))
+ elf_hwcap |= HWCAP_NNPA;
+
+ /* guarded storage */
+ if (cpu_has_gs())
+ elf_hwcap |= HWCAP_GS;
+
+ if (test_machine_feature(MFEATURE_PCI_MIO))
+ elf_hwcap |= HWCAP_PCI_MIO;
+
+ /* virtualization support */
+ if (sclp.has_sief2)
+ elf_hwcap |= HWCAP_SIE;
+
+ return 0;
+}
+arch_initcall(setup_hwcaps);
+
+static int __init setup_elf_platform(void)
+{
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ add_device_randomness(&cpu_id, sizeof(cpu_id));
+ switch (cpu_id.machine) {
+ default: /* Use "z10" as default. */
+ strscpy(elf_platform, "z10");
+ break;
+ case 0x2817:
+ case 0x2818:
+ strscpy(elf_platform, "z196");
+ break;
+ case 0x2827:
+ case 0x2828:
+ strscpy(elf_platform, "zEC12");
+ break;
+ case 0x2964:
+ case 0x2965:
+ strscpy(elf_platform, "z13");
+ break;
+ case 0x3906:
+ case 0x3907:
+ strscpy(elf_platform, "z14");
+ break;
+ case 0x8561:
+ case 0x8562:
+ strscpy(elf_platform, "z15");
+ break;
+ case 0x3931:
+ case 0x3932:
+ strscpy(elf_platform, "z16");
+ break;
+ case 0x9175:
+ case 0x9176:
+ strscpy(elf_platform, "z17");
+ break;
}
- put_online_cpus();
return 0;
}
+arch_initcall(setup_elf_platform);
+
+static void show_cpu_topology(struct seq_file *m, unsigned long n)
+{
+#ifdef CONFIG_SCHED_TOPOLOGY
+ seq_printf(m, "physical id : %d\n", topology_physical_package_id(n));
+ seq_printf(m, "core id : %d\n", topology_core_id(n));
+ seq_printf(m, "book id : %d\n", topology_book_id(n));
+ seq_printf(m, "drawer id : %d\n", topology_drawer_id(n));
+ seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n));
+ seq_printf(m, "address : %d\n", smp_cpu_get_cpu_address(n));
+ seq_printf(m, "siblings : %d\n", cpumask_weight(topology_core_cpumask(n)));
+ seq_printf(m, "cpu cores : %d\n", topology_booted_cores(n));
+#endif /* CONFIG_SCHED_TOPOLOGY */
+}
+
+static void show_cpu_ids(struct seq_file *m, unsigned long n)
+{
+ struct cpuid *id = &per_cpu(cpu_info.cpu_id, n);
+
+ seq_printf(m, "version : %02X\n", id->version);
+ seq_printf(m, "identification : %06X\n", id->ident);
+ seq_printf(m, "machine : %04X\n", id->machine);
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+ struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+ if (!machine_has_cpu_mhz)
+ return;
+ seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+ seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ unsigned long n = (unsigned long) v - 1;
+ unsigned long first = cpumask_first(cpu_online_mask);
+
+ if (n == first)
+ show_cpu_summary(m, v);
+ seq_printf(m, "\ncpu number : %ld\n", n);
+ show_cpu_topology(m, n);
+ show_cpu_ids(m, n);
+ show_cpu_mhz(m, n);
+ return 0;
+}
+
+static inline void *c_update(loff_t *pos)
+{
+ if (*pos)
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ else
+ *pos = cpumask_first(cpu_online_mask);
+ return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
+}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+ cpus_read_lock();
+ return c_update(pos);
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
++*pos;
- return c_start(m, pos);
+ return c_update(pos);
}
static void c_stop(struct seq_file *m, void *v)
{
+ cpus_read_unlock();
}
const struct seq_operations cpuinfo_op = {
@@ -94,4 +384,3 @@ const struct seq_operations cpuinfo_op = {
.stop = c_stop,
.show = show_cpuinfo,
};
-
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index a314c57f4e94..ceaa1726e328 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Ptrace user space interface.
*
@@ -8,6 +9,8 @@
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
@@ -18,96 +21,117 @@
#include <linux/signal.h>
#include <linux/elf.h>
#include <linux/regset.h>
-#include <linux/tracehook.h>
#include <linux/seccomp.h>
-#include <linux/compat.h>
#include <trace/syscall.h>
-#include <asm/segment.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unistd.h>
-#include <asm/switch_to.h>
-#include "entry.h"
-
-#ifdef CONFIG_COMPAT
-#include "compat_ptrace.h"
-#endif
+#include <asm/runtime_instr.h>
+#include <asm/facility.h>
+#include <asm/machine.h>
+#include <asm/ptrace.h>
+#include <asm/rwonce.h>
+#include <asm/fpu.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
-enum s390_regset {
- REGSET_GENERAL,
- REGSET_FP,
- REGSET_LAST_BREAK,
- REGSET_TDB,
- REGSET_SYSTEM_CALL,
- REGSET_GENERAL_EXTENDED,
-};
+#include "entry.h"
-void update_per_regs(struct task_struct *task)
+void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
- struct per_regs old, new;
-
-#ifdef CONFIG_64BIT
+ union ctlreg0 cr0_old, cr0_new;
+ union ctlreg2 cr2_old, cr2_new;
+ int cr0_changed, cr2_changed;
+ union {
+ struct ctlreg regs[3];
+ struct {
+ struct ctlreg control;
+ struct ctlreg start;
+ struct ctlreg end;
+ };
+ } old, new;
+
+ local_ctl_store(0, &cr0_old.reg);
+ local_ctl_store(2, &cr2_old.reg);
+ cr0_new = cr0_old;
+ cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
- if (MACHINE_HAS_TE) {
- unsigned long cr0, cr0_new;
-
- __ctl_store(cr0, 0, 0);
- /* set or clear transaction execution bits 8 and 9. */
+ if (machine_has_tx()) {
+ /* Set or clear transaction execution TXC bit 8. */
+ cr0_new.tcx = 1;
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr0_new = cr0 & ~(3UL << 54);
- else
- cr0_new = cr0 | (3UL << 54);
- /* Only load control register 0 if necessary. */
- if (cr0 != cr0_new)
- __ctl_load(cr0_new, 0, 0);
+ cr0_new.tcx = 0;
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ cr2_new.tdc = 0;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr2_new.tdc = 1;
+ else
+ cr2_new.tdc = 2;
+ }
+ }
+ /* Take care of enable/disable of guarded storage. */
+ if (cpu_has_gs()) {
+ cr2_new.gse = 0;
+ if (task->thread.gs_cb)
+ cr2_new.gse = 1;
}
-#endif
+ /* Load control register 0/2 iff changed */
+ cr0_changed = cr0_new.val != cr0_old.val;
+ cr2_changed = cr2_new.val != cr2_old.val;
+ if (cr0_changed)
+ local_ctl_load(0, &cr0_new.reg);
+ if (cr2_changed)
+ local_ctl_load(2, &cr2_new.reg);
/* Copy user specified PER registers */
- new.control = thread->per_user.control;
- new.start = thread->per_user.start;
- new.end = thread->per_user.end;
+ new.control.val = thread->per_user.control;
+ new.start.val = thread->per_user.start;
+ new.end.val = thread->per_user.end;
/* merge TIF_SINGLE_STEP into user specified PER registers. */
- if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) {
- new.control |= PER_EVENT_IFETCH;
-#ifdef CONFIG_64BIT
- new.control |= PER_CONTROL_SUSPENSION;
- new.control |= PER_EVENT_TRANSACTION_END;
-#endif
- new.start = 0;
- new.end = PSW_ADDR_INSN;
+ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
+ test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
+ if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+ new.control.val |= PER_EVENT_BRANCH;
+ else
+ new.control.val |= PER_EVENT_IFETCH;
+ new.control.val |= PER_CONTROL_SUSPENSION;
+ new.control.val |= PER_EVENT_TRANSACTION_END;
+ if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
+ new.control.val |= PER_EVENT_IFETCH;
+ new.start.val = 0;
+ new.end.val = -1UL;
}
/* Take care of the PER enablement bit in the PSW. */
- if (!(new.control & PER_EVENT_MASK)) {
+ if (!(new.control.val & PER_EVENT_MASK)) {
regs->psw.mask &= ~PSW_MASK_PER;
return;
}
regs->psw.mask |= PSW_MASK_PER;
- __ctl_store(old, 9, 11);
+ __local_ctl_store(9, 11, old.regs);
if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
- __ctl_load(new, 9, 11);
+ __local_ctl_load(9, 11, new.regs);
}
void user_enable_single_step(struct task_struct *task)
{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
set_tsk_thread_flag(task, TIF_SINGLE_STEP);
- if (task == current)
- update_per_regs(task);
}
void user_disable_single_step(struct task_struct *task)
{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
- if (task == current)
- update_per_regs(task);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ set_tsk_thread_flag(task, TIF_BLOCK_STEP);
}
/*
@@ -124,47 +148,41 @@ void ptrace_disable(struct task_struct *task)
task->thread.per_flags = 0;
}
-#ifndef CONFIG_64BIT
-# define __ADDR_MASK 3
-#else
-# define __ADDR_MASK 7
-#endif
+#define __ADDR_MASK 7
static inline unsigned long __peek_user_per(struct task_struct *child,
addr_t addr)
{
- struct per_struct_kernel *dummy = NULL;
-
- if (addr == (addr_t) &dummy->cr9)
+ if (addr == offsetof(struct per_struct_kernel, cr9))
/* Control bits of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
PER_EVENT_IFETCH : child->thread.per_user.control;
- else if (addr == (addr_t) &dummy->cr10)
+ else if (addr == offsetof(struct per_struct_kernel, cr10))
/* Start address of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
0 : child->thread.per_user.start;
- else if (addr == (addr_t) &dummy->cr11)
+ else if (addr == offsetof(struct per_struct_kernel, cr11))
/* End address of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
- PSW_ADDR_INSN : child->thread.per_user.end;
- else if (addr == (addr_t) &dummy->bits)
+ -1UL : child->thread.per_user.end;
+ else if (addr == offsetof(struct per_struct_kernel, bits))
/* Single-step bit. */
return test_thread_flag(TIF_SINGLE_STEP) ?
(1UL << (BITS_PER_LONG - 1)) : 0;
- else if (addr == (addr_t) &dummy->starting_addr)
+ else if (addr == offsetof(struct per_struct_kernel, starting_addr))
/* Start address of the user specified per set. */
return child->thread.per_user.start;
- else if (addr == (addr_t) &dummy->ending_addr)
+ else if (addr == offsetof(struct per_struct_kernel, ending_addr))
/* End address of the user specified per set. */
return child->thread.per_user.end;
- else if (addr == (addr_t) &dummy->perc_atmid)
+ else if (addr == offsetof(struct per_struct_kernel, perc_atmid))
/* PER code, ATMID and AI of the last PER trap */
return (unsigned long)
child->thread.per_event.cause << (BITS_PER_LONG - 16);
- else if (addr == (addr_t) &dummy->address)
+ else if (addr == offsetof(struct per_struct_kernel, address))
/* Address of the last PER trap */
return child->thread.per_event.address;
- else if (addr == (addr_t) &dummy->access_id)
+ else if (addr == offsetof(struct per_struct_kernel, access_id))
/* Access id of the last PER trap */
return (unsigned long)
child->thread.per_event.paid << (BITS_PER_LONG - 8);
@@ -182,63 +200,65 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
*/
static unsigned long __peek_user(struct task_struct *child, addr_t addr)
{
- struct user *dummy = NULL;
addr_t offset, tmp;
- if (addr < (addr_t) &dummy->regs.acrs) {
+ if (addr < offsetof(struct user, regs.acrs)) {
/*
* psw and gprs are stored on the stack
*/
tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
- if (addr == (addr_t) &dummy->regs.psw.mask)
+ if (addr == offsetof(struct user, regs.psw.mask)) {
/* Return a clean psw mask. */
- tmp = psw_user_bits | (tmp & PSW_MASK_USER);
+ tmp &= PSW_MASK_USER | PSW_MASK_RI;
+ tmp |= PSW_USER_BITS;
+ }
- } else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
+ } else if (addr < offsetof(struct user, regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
- offset = addr - (addr_t) &dummy->regs.acrs;
-#ifdef CONFIG_64BIT
+ offset = addr - offsetof(struct user, regs.acrs);
/*
* Very special case: old & broken 64 bit gdb reading
* from acrs[15]. Result is a 64 bit value. Read the
* 32 bit acrs[15] value and shift it by 32. Sick...
*/
- if (addr == (addr_t) &dummy->regs.acrs[15])
+ if (addr == offsetof(struct user, regs.acrs[15]))
tmp = ((unsigned long) child->thread.acrs[15]) << 32;
else
-#endif
- tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
+ tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
- } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ } else if (addr == offsetof(struct user, regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
- } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ } else if (addr < offsetof(struct user, regs.fp_regs)) {
/*
* prevent reads of padding hole between
* orig_gpr2 and fp_regs on s390.
*/
tmp = 0;
- } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
- /*
- * floating point regs. are stored in the thread structure
+ } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
+ /*
+ * floating point control reg. is in the thread structure
*/
- offset = addr - (addr_t) &dummy->regs.fp_regs;
- tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset);
- if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
- tmp &= (unsigned long) FPC_VALID_MASK
- << (BITS_PER_LONG - 32);
+ tmp = child->thread.ufpu.fpc;
+ tmp <<= BITS_PER_LONG - 32;
- } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+ /*
+ * floating point regs. are in the child->thread.ufpu.vxrs array
+ */
+ offset = addr - offsetof(struct user, regs.fp_regs.fprs);
+ tmp = *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
+ } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
*/
- addr -= (addr_t) &dummy->regs.per_info;
+ addr -= offsetof(struct user, regs.per_info);
tmp = __peek_user_per(child, addr);
} else
@@ -257,11 +277,9 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
* an alignment of 4. Programmers from hell...
*/
mask = __ADDR_MASK;
-#ifdef CONFIG_64BIT
- if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
- addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ if (addr >= offsetof(struct user, regs.acrs) &&
+ addr < offsetof(struct user, regs.orig_gpr2))
mask = 3;
-#endif
if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
return -EIO;
@@ -272,8 +290,6 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
static inline void __poke_user_per(struct task_struct *child,
addr_t addr, addr_t data)
{
- struct per_struct_kernel *dummy = NULL;
-
/*
* There are only three fields in the per_info struct that the
* debugger user can write to.
@@ -286,14 +302,14 @@ static inline void __poke_user_per(struct task_struct *child,
* addresses are used only if single stepping is not in effect.
* Writes to any other field in per_info are ignored.
*/
- if (addr == (addr_t) &dummy->cr9)
+ if (addr == offsetof(struct per_struct_kernel, cr9))
/* PER event mask of the user specified per set. */
child->thread.per_user.control =
data & (PER_EVENT_MASK | PER_CONTROL_MASK);
- else if (addr == (addr_t) &dummy->starting_addr)
+ else if (addr == offsetof(struct per_struct_kernel, starting_addr))
/* Starting address of the user specified per set. */
child->thread.per_user.start = data;
- else if (addr == (addr_t) &dummy->ending_addr)
+ else if (addr == offsetof(struct per_struct_kernel, ending_addr))
/* Ending address of the user specified per set. */
child->thread.per_user.end = data;
}
@@ -306,67 +322,84 @@ static inline void __poke_user_per(struct task_struct *child,
*/
static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
{
- struct user *dummy = NULL;
addr_t offset;
- if (addr < (addr_t) &dummy->regs.acrs) {
+
+ if (addr < offsetof(struct user, regs.acrs)) {
+ struct pt_regs *regs = task_pt_regs(child);
/*
* psw and gprs are stored on the stack
*/
- if (addr == (addr_t) &dummy->regs.psw.mask &&
- ((data & ~PSW_MASK_USER) != psw_user_bits ||
- ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))))
- /* Invalid psw mask. */
- return -EINVAL;
- *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
+ if (addr == offsetof(struct user, regs.psw.mask)) {
+ unsigned long mask = PSW_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+ if ((data ^ PSW_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+ /* Invalid addressing mode bits */
+ return -EINVAL;
+ }
- } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
+ if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+ addr == offsetof(struct user, regs.gprs[2])) {
+ struct pt_regs *regs = task_pt_regs(child);
+
+ regs->int_code = 0x20000 | (data & 0xffff);
+ }
+ *(addr_t *)((addr_t) &regs->psw + addr) = data;
+ } else if (addr < offsetof(struct user, regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
- offset = addr - (addr_t) &dummy->regs.acrs;
-#ifdef CONFIG_64BIT
+ offset = addr - offsetof(struct user, regs.acrs);
/*
* Very special case: old & broken 64 bit gdb writing
* to acrs[15] with a 64 bit value. Ignore the lower
* half of the value and write the upper 32 bit to
* acrs[15]. Sick...
*/
- if (addr == (addr_t) &dummy->regs.acrs[15])
+ if (addr == offsetof(struct user, regs.acrs[15]))
child->thread.acrs[15] = (unsigned int) (data >> 32);
else
-#endif
- *(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
+ *(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
- } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ } else if (addr == offsetof(struct user, regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
task_pt_regs(child)->orig_gpr2 = data;
- } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ } else if (addr < offsetof(struct user, regs.fp_regs)) {
/*
* prevent writes of padding hole between
* orig_gpr2 and fp_regs on s390.
*/
return 0;
- } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+ } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
/*
- * floating point regs. are stored in the thread structure
+ * floating point control reg. is in the thread structure
*/
- if (addr == (addr_t) &dummy->regs.fp_regs.fpc &&
- (data & ~((unsigned long) FPC_VALID_MASK
- << (BITS_PER_LONG - 32))) != 0)
+ if ((unsigned int)data != 0)
return -EINVAL;
- offset = addr - (addr_t) &dummy->regs.fp_regs;
- *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data;
+ child->thread.ufpu.fpc = data >> (BITS_PER_LONG - 32);
- } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+ /*
+ * floating point regs. are in the child->thread.ufpu.vxrs array
+ */
+ offset = addr - offsetof(struct user, regs.fp_regs.fprs);
+ *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = data;
+ } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
*/
- addr -= (addr_t) &dummy->regs.per_info;
+ addr -= offsetof(struct user, regs.per_info);
__poke_user_per(child, addr, data);
}
@@ -383,11 +416,9 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
* an alignment of 4. Programmers from hell indeed...
*/
mask = __ADDR_MASK;
-#ifdef CONFIG_64BIT
- if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
- addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ if (addr >= offsetof(struct user, regs.acrs) &&
+ addr < offsetof(struct user, regs.orig_gpr2))
mask = 3;
-#endif
if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
return -EIO;
@@ -435,365 +466,40 @@ long arch_ptrace(struct task_struct *child, long request,
}
return 0;
case PTRACE_GET_LAST_BREAK:
- put_user(task_thread_info(child)->last_break,
- (unsigned long __user *) data);
- return 0;
+ return put_user(child->thread.last_break, (unsigned long __user *)data);
case PTRACE_ENABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags &= ~PER_FLAG_NO_TE;
return 0;
case PTRACE_DISABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags |= PER_FLAG_NO_TE;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
return 0;
- default:
- /* Removing high order bit from addr (only for 31 bit). */
- addr &= PSW_ADDR_INSN;
- return ptrace_request(child, request, addr, data);
- }
-}
-
-#ifdef CONFIG_COMPAT
-/*
- * Now the fun part starts... a 31 bit program running in the
- * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
- * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy
- * to handle, the difference to the 64 bit versions of the requests
- * is that the access is done in multiples of 4 byte instead of
- * 8 bytes (sizeof(unsigned long) on 31/64 bit).
- * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA,
- * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program
- * is a 31 bit program too, the content of struct user can be
- * emulated. A 31 bit program peeking into the struct user of
- * a 64 bit program is a no-no.
- */
-
-/*
- * Same as peek_user_per but for a 31 bit program.
- */
-static inline __u32 __peek_user_per_compat(struct task_struct *child,
- addr_t addr)
-{
- struct compat_per_struct_kernel *dummy32 = NULL;
-
- if (addr == (addr_t) &dummy32->cr9)
- /* Control bits of the active per set. */
- return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
- PER_EVENT_IFETCH : child->thread.per_user.control;
- else if (addr == (addr_t) &dummy32->cr10)
- /* Start address of the active per set. */
- return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
- 0 : child->thread.per_user.start;
- else if (addr == (addr_t) &dummy32->cr11)
- /* End address of the active per set. */
- return test_thread_flag(TIF_SINGLE_STEP) ?
- PSW32_ADDR_INSN : child->thread.per_user.end;
- else if (addr == (addr_t) &dummy32->bits)
- /* Single-step bit. */
- return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
- 0x80000000 : 0;
- else if (addr == (addr_t) &dummy32->starting_addr)
- /* Start address of the user specified per set. */
- return (__u32) child->thread.per_user.start;
- else if (addr == (addr_t) &dummy32->ending_addr)
- /* End address of the user specified per set. */
- return (__u32) child->thread.per_user.end;
- else if (addr == (addr_t) &dummy32->perc_atmid)
- /* PER code, ATMID and AI of the last PER trap */
- return (__u32) child->thread.per_event.cause << 16;
- else if (addr == (addr_t) &dummy32->address)
- /* Address of the last PER trap */
- return (__u32) child->thread.per_event.address;
- else if (addr == (addr_t) &dummy32->access_id)
- /* Access id of the last PER trap */
- return (__u32) child->thread.per_event.paid << 24;
- return 0;
-}
-
-/*
- * Same as peek_user but for a 31 bit program.
- */
-static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
-{
- struct compat_user *dummy32 = NULL;
- addr_t offset;
- __u32 tmp;
-
- if (addr < (addr_t) &dummy32->regs.acrs) {
- struct pt_regs *regs = task_pt_regs(child);
- /*
- * psw and gprs are stored on the stack
- */
- if (addr == (addr_t) &dummy32->regs.psw.mask) {
- /* Fake a 31 bit psw mask. */
- tmp = (__u32)(regs->psw.mask >> 32);
- tmp = psw32_user_bits | (tmp & PSW32_MASK_USER);
- } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
- /* Fake a 31 bit psw address. */
- tmp = (__u32) regs->psw.addr |
- (__u32)(regs->psw.mask & PSW_MASK_BA);
- } else {
- /* gpr 0-15 */
- tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
- }
- } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
- /*
- * access registers are stored in the thread structure
- */
- offset = addr - (addr_t) &dummy32->regs.acrs;
- tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
-
- } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
- /*
- * orig_gpr2 is stored on the kernel stack
- */
- tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
-
- } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
- /*
- * prevent reads of padding hole between
- * orig_gpr2 and fp_regs on s390.
- */
- tmp = 0;
-
- } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
- /*
- * floating point regs. are stored in the thread structure
- */
- offset = addr - (addr_t) &dummy32->regs.fp_regs;
- tmp = *(__u32 *)((addr_t) &child->thread.fp_regs + offset);
-
- } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
- /*
- * Handle access to the per_info structure.
- */
- addr -= (addr_t) &dummy32->regs.per_info;
- tmp = __peek_user_per_compat(child, addr);
-
- } else
- tmp = 0;
-
- return tmp;
-}
-
-static int peek_user_compat(struct task_struct *child,
- addr_t addr, addr_t data)
-{
- __u32 tmp;
-
- if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
- return -EIO;
-
- tmp = __peek_user_compat(child, addr);
- return put_user(tmp, (__u32 __user *) data);
-}
-
-/*
- * Same as poke_user_per but for a 31 bit program.
- */
-static inline void __poke_user_per_compat(struct task_struct *child,
- addr_t addr, __u32 data)
-{
- struct compat_per_struct_kernel *dummy32 = NULL;
-
- if (addr == (addr_t) &dummy32->cr9)
- /* PER event mask of the user specified per set. */
- child->thread.per_user.control =
- data & (PER_EVENT_MASK | PER_CONTROL_MASK);
- else if (addr == (addr_t) &dummy32->starting_addr)
- /* Starting address of the user specified per set. */
- child->thread.per_user.start = data;
- else if (addr == (addr_t) &dummy32->ending_addr)
- /* Ending address of the user specified per set. */
- child->thread.per_user.end = data;
-}
-
-/*
- * Same as poke_user but for a 31 bit program.
- */
-static int __poke_user_compat(struct task_struct *child,
- addr_t addr, addr_t data)
-{
- struct compat_user *dummy32 = NULL;
- __u32 tmp = (__u32) data;
- addr_t offset;
-
- if (addr < (addr_t) &dummy32->regs.acrs) {
- struct pt_regs *regs = task_pt_regs(child);
- /*
- * psw, gprs, acrs and orig_gpr2 are stored on the stack
- */
- if (addr == (addr_t) &dummy32->regs.psw.mask) {
- /* Build a 64 bit psw mask from 31 bit mask. */
- if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits)
- /* Invalid psw mask. */
- return -EINVAL;
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
- (regs->psw.mask & PSW_MASK_BA) |
- (__u64)(tmp & PSW32_MASK_USER) << 32;
- } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
- /* Build a 64 bit psw address from 31 bit address. */
- regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
- /* Transfer 31 bit amode bit to psw mask. */
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
- (__u64)(tmp & PSW32_ADDR_AMODE);
- } else {
- /* gpr 0-15 */
- *(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
- }
- } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
- /*
- * access registers are stored in the thread structure
- */
- offset = addr - (addr_t) &dummy32->regs.acrs;
- *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
-
- } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
- /*
- * orig_gpr2 is stored on the kernel stack
- */
- *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
-
- } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
- /*
- * prevent writess of padding hole between
- * orig_gpr2 and fp_regs on s390.
- */
- return 0;
-
- } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
- /*
- * floating point regs. are stored in the thread structure
- */
- if (addr == (addr_t) &dummy32->regs.fp_regs.fpc &&
- (tmp & ~FPC_VALID_MASK) != 0)
- /* Invalid floating point control. */
+ case PTRACE_TE_ABORT_RAND:
+ if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE))
+ return -EIO;
+ switch (data) {
+ case 0UL:
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ break;
+ case 1UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ case 2UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ default:
return -EINVAL;
- offset = addr - (addr_t) &dummy32->regs.fp_regs;
- *(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp;
-
- } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
- /*
- * Handle access to the per_info structure.
- */
- addr -= (addr_t) &dummy32->regs.per_info;
- __poke_user_per_compat(child, addr, data);
- }
-
- return 0;
-}
-
-static int poke_user_compat(struct task_struct *child,
- addr_t addr, addr_t data)
-{
- if (!is_compat_task() || (addr & 3) ||
- addr > sizeof(struct compat_user) - 3)
- return -EIO;
-
- return __poke_user_compat(child, addr, data);
-}
-
-long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
- compat_ulong_t caddr, compat_ulong_t cdata)
-{
- unsigned long addr = caddr;
- unsigned long data = cdata;
- compat_ptrace_area parea;
- int copied, ret;
-
- switch (request) {
- case PTRACE_PEEKUSR:
- /* read the word at location addr in the USER area. */
- return peek_user_compat(child, addr, data);
-
- case PTRACE_POKEUSR:
- /* write the word at location addr in the USER area */
- return poke_user_compat(child, addr, data);
-
- case PTRACE_PEEKUSR_AREA:
- case PTRACE_POKEUSR_AREA:
- if (copy_from_user(&parea, (void __force __user *) addr,
- sizeof(parea)))
- return -EFAULT;
- addr = parea.kernel_addr;
- data = parea.process_addr;
- copied = 0;
- while (copied < parea.len) {
- if (request == PTRACE_PEEKUSR_AREA)
- ret = peek_user_compat(child, addr, data);
- else {
- __u32 utmp;
- if (get_user(utmp,
- (__u32 __force __user *) data))
- return -EFAULT;
- ret = poke_user_compat(child, addr, utmp);
- }
- if (ret)
- return ret;
- addr += sizeof(unsigned int);
- data += sizeof(unsigned int);
- copied += sizeof(unsigned int);
}
return 0;
- case PTRACE_GET_LAST_BREAK:
- put_user(task_thread_info(child)->last_break,
- (unsigned int __user *) data);
- return 0;
- }
- return compat_ptrace_request(child, request, addr, data);
-}
-#endif
-
-asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
-{
- long ret = 0;
-
- /* Do the secure computing check first. */
- if (secure_computing(regs->gprs[2])) {
- /* seccomp failures shouldn't expose any additional code. */
- ret = -1;
- goto out;
- }
-
- /*
- * The sysc_tracesys code in entry.S stored the system
- * call number to gprs[2].
- */
- if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- (tracehook_report_syscall_entry(regs) ||
- regs->gprs[2] >= NR_syscalls)) {
- /*
- * Tracing decided this syscall should not happen or the
- * debugger stored an invalid system call number. Skip
- * the system call and the system call restart handling.
- */
- clear_thread_flag(TIF_SYSCALL);
- ret = -1;
+ default:
+ return ptrace_request(child, request, addr, data);
}
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_enter(regs, regs->gprs[2]);
-
- audit_syscall_entry(is_compat_task() ?
- AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
- regs->gprs[2], regs->orig_gpr2,
- regs->gprs[3], regs->gprs[4],
- regs->gprs[5]);
-out:
- return ret ?: regs->gprs[2];
-}
-
-asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
-{
- audit_syscall_exit(regs);
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_exit(regs, regs->gprs[2]);
-
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, 0);
}
/*
@@ -802,28 +508,14 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
static int s390_regs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ unsigned pos;
if (target == current)
save_access_regs(target->thread.acrs);
- if (kbuf) {
- unsigned long *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- unsigned long __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long))
+ membuf_store(&to, __peek_user(target, pos));
return 0;
}
@@ -864,14 +556,18 @@ static int s390_regs_set(struct task_struct *target,
}
static int s390_fpregs_get(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, void *kbuf, void __user *ubuf)
+ const struct user_regset *regset,
+ struct membuf to)
{
+ _s390_fp_regs fp_regs;
+
if (target == current)
- save_fp_regs(&target->thread.fp_regs);
+ save_user_fpu_regs();
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_regs, 0, -1);
+ fp_regs.fpc = target->thread.ufpu.fpc;
+ fpregs_store(&fp_regs, &target->thread.ufpu);
+
+ return membuf_write(&to, &fp_regs, sizeof(fp_regs));
}
static int s390_fpregs_set(struct task_struct *target,
@@ -880,51 +576,36 @@ static int s390_fpregs_set(struct task_struct *target,
const void __user *ubuf)
{
int rc = 0;
+ freg_t fprs[__NUM_FPRS];
if (target == current)
- save_fp_regs(&target->thread.fp_regs);
-
- /* If setting FPC, must validate it first. */
+ save_user_fpu_regs();
+ convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
- u32 fpc[2] = { target->thread.fp_regs.fpc, 0 };
- rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpc,
+ u32 ufpc[2] = { target->thread.ufpu.fpc, 0 };
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
0, offsetof(s390_fp_regs, fprs));
if (rc)
return rc;
- if ((fpc[0] & ~FPC_VALID_MASK) != 0 || fpc[1] != 0)
+ if (ufpc[1] != 0)
return -EINVAL;
- target->thread.fp_regs.fpc = fpc[0];
+ target->thread.ufpu.fpc = ufpc[0];
}
if (rc == 0 && count > 0)
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.fp_regs.fprs,
- offsetof(s390_fp_regs, fprs), -1);
-
- if (rc == 0 && target == current)
- restore_fp_regs(&target->thread.fp_regs);
-
+ fprs, offsetof(s390_fp_regs, fprs), -1);
+ if (rc)
+ return rc;
+ convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
return rc;
}
-#ifdef CONFIG_64BIT
-
static int s390_last_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- if (count > 0) {
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = task_thread_info(target)->last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(task_thread_info(target)->last_break, u))
- return -EFAULT;
- }
- }
- return 0;
+ return membuf_store(&to, target->thread.last_break);
}
static int s390_last_break_set(struct task_struct *target,
@@ -937,16 +618,15 @@ static int s390_last_break_set(struct task_struct *target,
static int s390_tdb_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct pt_regs *regs = task_pt_regs(target);
- unsigned char *data;
+ size_t size;
if (!(regs->int_code & 0x200))
return -ENODATA;
- data = target->thread.trap_tdb;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+ size = sizeof(target->thread.trap_tdb.data);
+ return membuf_write(&to, target->thread.trap_tdb.data, size);
}
static int s390_tdb_set(struct task_struct *target,
@@ -957,16 +637,80 @@ static int s390_tdb_set(struct task_struct *target,
return 0;
}
-#endif
+static int s390_vxrs_low_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ if (!cpu_has_vx())
+ return -ENODEV;
+ if (target == current)
+ save_user_fpu_regs();
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = target->thread.ufpu.vxrs[i].low;
+ return membuf_write(&to, vxrs, sizeof(vxrs));
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i, rc;
+
+ if (!cpu_has_vx())
+ return -ENODEV;
+ if (target == current)
+ save_user_fpu_regs();
+
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = target->thread.ufpu.vxrs[i].low;
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ if (rc == 0)
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ target->thread.ufpu.vxrs[i].low = vxrs[i];
+
+ return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_vx())
+ return -ENODEV;
+ if (target == current)
+ save_user_fpu_regs();
+ return membuf_write(&to, target->thread.ufpu.vxrs + __NUM_VXRS_LOW,
+ __NUM_VXRS_HIGH * sizeof(__vector128));
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc;
+
+ if (!cpu_has_vx())
+ return -ENODEV;
+ if (target == current)
+ save_user_fpu_regs();
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.ufpu.vxrs + __NUM_VXRS_LOW, 0, -1);
+ return rc;
+}
static int s390_system_call_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- unsigned int *data = &task_thread_info(target)->system_call;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(unsigned int));
+ return membuf_store(&to, target->thread.system_call);
}
static int s390_system_call_set(struct task_struct *target,
@@ -974,282 +718,280 @@ static int s390_system_call_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- unsigned int *data = &task_thread_info(target)->system_call;
+ unsigned int *data = &target->thread.system_call;
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
data, 0, sizeof(unsigned int));
}
-static const struct user_regset s390_regsets[] = {
- [REGSET_GENERAL] = {
- .core_note_type = NT_PRSTATUS,
- .n = sizeof(s390_regs) / sizeof(long),
- .size = sizeof(long),
- .align = sizeof(long),
- .get = s390_regs_get,
- .set = s390_regs_set,
- },
- [REGSET_FP] = {
- .core_note_type = NT_PRFPREG,
- .n = sizeof(s390_fp_regs) / sizeof(long),
- .size = sizeof(long),
- .align = sizeof(long),
- .get = s390_fpregs_get,
- .set = s390_fpregs_set,
- },
-#ifdef CONFIG_64BIT
- [REGSET_LAST_BREAK] = {
- .core_note_type = NT_S390_LAST_BREAK,
- .n = 1,
- .size = sizeof(long),
- .align = sizeof(long),
- .get = s390_last_break_get,
- .set = s390_last_break_set,
- },
- [REGSET_TDB] = {
- .core_note_type = NT_S390_TDB,
- .n = 1,
- .size = 256,
- .align = 1,
- .get = s390_tdb_get,
- .set = s390_tdb_set,
- },
-#endif
- [REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_S390_SYSTEM_CALL,
- .n = 1,
- .size = sizeof(unsigned int),
- .align = sizeof(unsigned int),
- .get = s390_system_call_get,
- .set = s390_system_call_set,
- },
-};
+static int s390_gs_cb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct gs_cb *data = target->thread.gs_cb;
-static const struct user_regset_view user_s390_view = {
- .name = UTS_MACHINE,
- .e_machine = EM_S390,
- .regsets = s390_regsets,
- .n = ARRAY_SIZE(s390_regsets)
-};
+ if (!cpu_has_gs())
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ if (target == current)
+ save_gs_cb(data);
+ return membuf_write(&to, data, sizeof(struct gs_cb));
+}
-#ifdef CONFIG_COMPAT
-static int s390_compat_regs_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+static int s390_gs_cb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
{
- if (target == current)
- save_access_regs(target->thread.acrs);
+ struct gs_cb gs_cb = { }, *data = NULL;
+ int rc;
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user_compat(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user_compat(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
+ if (!cpu_has_gs())
+ return -ENODEV;
+ if (!target->thread.gs_cb) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
}
- return 0;
+ if (!target->thread.gs_cb)
+ gs_cb.gsd = 25;
+ else if (target == current)
+ save_gs_cb(&gs_cb);
+ else
+ gs_cb = *target->thread.gs_cb;
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &gs_cb, 0, sizeof(gs_cb));
+ if (rc) {
+ kfree(data);
+ return -EFAULT;
+ }
+ preempt_disable();
+ if (!target->thread.gs_cb)
+ target->thread.gs_cb = data;
+ *target->thread.gs_cb = gs_cb;
+ if (target == current) {
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
+ restore_gs_cb(target->thread.gs_cb);
+ }
+ preempt_enable();
+ return rc;
}
-static int s390_compat_regs_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
+static int s390_gs_bc_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
{
- int rc = 0;
+ struct gs_cb *data = target->thread.gs_bc_cb;
- if (target == current)
- save_access_regs(target->thread.acrs);
+ if (!cpu_has_gs())
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return membuf_write(&to, data, sizeof(struct gs_cb));
+}
- if (kbuf) {
- const compat_ulong_t *k = kbuf;
- while (count > 0 && !rc) {
- rc = __poke_user_compat(target, pos, *k++);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- const compat_ulong_t __user *u = ubuf;
- while (count > 0 && !rc) {
- compat_ulong_t word;
- rc = __get_user(word, u++);
- if (rc)
- break;
- rc = __poke_user_compat(target, pos, word);
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
+static int s390_gs_bc_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_bc_cb;
+
+ if (!cpu_has_gs())
+ return -ENODEV;
+ if (!data) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ target->thread.gs_bc_cb = data;
}
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static bool is_ri_cb_valid(struct runtime_instr_cb *cb)
+{
+ return (cb->rca & 0x1f) == 0 &&
+ (cb->roa & 0xfff) == 0 &&
+ (cb->rla & 0xfff) == 0xfff &&
+ cb->s == 1 &&
+ cb->k == 1 &&
+ cb->h == 0 &&
+ cb->reserved1 == 0 &&
+ cb->ps == 1 &&
+ cb->qs == 0 &&
+ cb->pc == 1 &&
+ cb->qc == 0 &&
+ cb->reserved2 == 0 &&
+ cb->reserved3 == 0 &&
+ cb->reserved4 == 0 &&
+ cb->reserved5 == 0 &&
+ cb->reserved6 == 0 &&
+ cb->reserved7 == 0 &&
+ cb->reserved8 == 0 &&
+ cb->rla >= cb->roa &&
+ cb->rca >= cb->roa &&
+ cb->rca <= cb->rla+1 &&
+ cb->m < 3;
+}
+
+static int s390_runtime_instr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct runtime_instr_cb *data = target->thread.ri_cb;
- if (rc == 0 && target == current)
- restore_access_regs(target->thread.acrs);
+ if (!test_facility(64))
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
- return rc;
+ return membuf_write(&to, data, sizeof(struct runtime_instr_cb));
}
-static int s390_compat_regs_high_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+static int s390_runtime_instr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
{
- compat_ulong_t *gprs_high;
+ struct runtime_instr_cb ri_cb = { }, *data = NULL;
+ int rc;
- gprs_high = (compat_ulong_t *)
- &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = *gprs_high;
- gprs_high += 2;
- count -= sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(*gprs_high, u++))
- return -EFAULT;
- gprs_high += 2;
- count -= sizeof(*u);
- }
- }
- return 0;
-}
+ if (!test_facility(64))
+ return -ENODEV;
-static int s390_compat_regs_high_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- compat_ulong_t *gprs_high;
- int rc = 0;
+ if (!target->thread.ri_cb) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ }
- gprs_high = (compat_ulong_t *)
- &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
- if (kbuf) {
- const compat_ulong_t *k = kbuf;
- while (count > 0) {
- *gprs_high = *k++;
- *gprs_high += 2;
- count -= sizeof(*k);
- }
- } else {
- const compat_ulong_t __user *u = ubuf;
- while (count > 0 && !rc) {
- unsigned long word;
- rc = __get_user(word, u++);
- if (rc)
- break;
- *gprs_high = word;
- *gprs_high += 2;
- count -= sizeof(*u);
- }
+ if (target->thread.ri_cb) {
+ if (target == current)
+ store_runtime_instr_cb(&ri_cb);
+ else
+ ri_cb = *target->thread.ri_cb;
}
- return rc;
-}
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &ri_cb, 0, sizeof(struct runtime_instr_cb));
+ if (rc) {
+ kfree(data);
+ return -EFAULT;
+ }
-static int s390_compat_last_break_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- compat_ulong_t last_break;
-
- if (count > 0) {
- last_break = task_thread_info(target)->last_break;
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(last_break, u))
- return -EFAULT;
- }
+ if (!is_ri_cb_valid(&ri_cb)) {
+ kfree(data);
+ return -EINVAL;
}
- return 0;
-}
+ /*
+ * Override access key in any case, since user space should
+ * not be able to set it, nor should it care about it.
+ */
+ ri_cb.key = PAGE_DEFAULT_KEY >> 4;
+ preempt_disable();
+ if (!target->thread.ri_cb)
+ target->thread.ri_cb = data;
+ *target->thread.ri_cb = ri_cb;
+ if (target == current)
+ load_runtime_instr_cb(target->thread.ri_cb);
+ preempt_enable();
-static int s390_compat_last_break_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
return 0;
}
-static const struct user_regset s390_compat_regsets[] = {
- [REGSET_GENERAL] = {
- .core_note_type = NT_PRSTATUS,
- .n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
- .size = sizeof(compat_long_t),
- .align = sizeof(compat_long_t),
- .get = s390_compat_regs_get,
- .set = s390_compat_regs_set,
+static const struct user_regset s390_regsets[] = {
+ {
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
+ .n = sizeof(s390_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = s390_regs_get,
+ .set = s390_regs_set,
},
- [REGSET_FP] = {
- .core_note_type = NT_PRFPREG,
- .n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
- .size = sizeof(compat_long_t),
- .align = sizeof(compat_long_t),
- .get = s390_fpregs_get,
+ {
+ USER_REGSET_NOTE_TYPE(PRFPREG),
+ .n = sizeof(s390_fp_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = s390_fpregs_get,
.set = s390_fpregs_set,
},
- [REGSET_LAST_BREAK] = {
- .core_note_type = NT_S390_LAST_BREAK,
+ {
+ USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL),
+ .n = 1,
+ .size = sizeof(unsigned int),
+ .align = sizeof(unsigned int),
+ .regset_get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+ {
+ USER_REGSET_NOTE_TYPE(S390_LAST_BREAK),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_compat_last_break_get,
- .set = s390_compat_last_break_set,
+ .regset_get = s390_last_break_get,
+ .set = s390_last_break_set,
},
- [REGSET_TDB] = {
- .core_note_type = NT_S390_TDB,
+ {
+ USER_REGSET_NOTE_TYPE(S390_TDB),
.n = 1,
.size = 256,
.align = 1,
- .get = s390_tdb_get,
+ .regset_get = s390_tdb_get,
.set = s390_tdb_set,
},
- [REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_S390_SYSTEM_CALL,
- .n = 1,
- .size = sizeof(compat_uint_t),
- .align = sizeof(compat_uint_t),
- .get = s390_system_call_get,
- .set = s390_system_call_set,
+ {
+ USER_REGSET_NOTE_TYPE(S390_VXRS_LOW),
+ .n = __NUM_VXRS_LOW,
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .regset_get = s390_vxrs_low_get,
+ .set = s390_vxrs_low_set,
+ },
+ {
+ USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH),
+ .n = __NUM_VXRS_HIGH,
+ .size = sizeof(__vector128),
+ .align = sizeof(__vector128),
+ .regset_get = s390_vxrs_high_get,
+ .set = s390_vxrs_high_set,
},
- [REGSET_GENERAL_EXTENDED] = {
- .core_note_type = NT_S390_HIGH_GPRS,
- .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
- .size = sizeof(compat_long_t),
- .align = sizeof(compat_long_t),
- .get = s390_compat_regs_high_get,
- .set = s390_compat_regs_high_set,
+ {
+ USER_REGSET_NOTE_TYPE(S390_GS_CB),
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .regset_get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
+ {
+ USER_REGSET_NOTE_TYPE(S390_GS_BC),
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .regset_get = s390_gs_bc_get,
+ .set = s390_gs_bc_set,
+ },
+ {
+ USER_REGSET_NOTE_TYPE(S390_RI_CB),
+ .n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .regset_get = s390_runtime_instr_get,
+ .set = s390_runtime_instr_set,
},
};
-static const struct user_regset_view user_s390_compat_view = {
- .name = "s390",
+static const struct user_regset_view user_s390_view = {
+ .name = "s390x",
.e_machine = EM_S390,
- .regsets = s390_compat_regsets,
- .n = ARRAY_SIZE(s390_compat_regsets)
+ .regsets = s390_regsets,
+ .n = ARRAY_SIZE(s390_regsets)
};
-#endif
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT))
- return &user_s390_compat_view;
-#endif
return &user_s390_view;
}
@@ -1258,20 +1000,13 @@ static const char *gpr_names[NUM_GPRS] = {
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
-{
- if (offset >= NUM_GPRS)
- return 0;
- return regs->gprs[offset];
-}
-
int regs_query_register_offset(const char *name)
{
unsigned long offset;
if (!name || *name != 'r')
return -EINVAL;
- if (strict_strtoul(name + 1, 10, &offset))
+ if (kstrtoul(name + 1, 10, &offset))
return -EINVAL;
if (offset >= NUM_GPRS)
return -EINVAL;
@@ -1284,29 +1019,3 @@ const char *regs_query_register_name(unsigned int offset)
return NULL;
return gpr_names[offset];
}
-
-static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
-{
- unsigned long ksp = kernel_stack_pointer(regs);
-
- return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
-}
-
-/**
- * regs_get_kernel_stack_nth() - get Nth entry of the stack
- * @regs:pt_regs which contains kernel stack pointer.
- * @n:stack entry number.
- *
- * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
- * this returns 0.
- */
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
-{
- unsigned long addr;
-
- addr = kernel_stack_pointer(regs) + n * sizeof(long);
- if (!regs_within_kernel_stack(regs, addr))
- return 0;
- return *(unsigned long *)addr;
-}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index dd8016b0477e..69fcaf54d5ca 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -1,92 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
- * S390 version
- * Copyright IBM Corp. 2000
- * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ * Copyright IBM Corp 2000, 2011
+ * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Denis Joseph Barrow,
*/
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+#include <asm/lowcore.h>
-#
-# store_status: Empty implementation until kdump is supported on 31 bit
-#
-ENTRY(store_status)
- br %r14
+ GEN_BR_THUNK %r9
#
-# do_reipl_asm
-# Parameter: r2 = schid of reipl device
+# Issue "store status" for the current CPU to its prefix page
+# and call passed function afterwards
+#
+# r2 = Function to be called after store status
+# r3 = Parameter for function
#
-ENTRY(do_reipl_asm)
- basr %r13,0
-.Lpg0: lpsw .Lnewpsw-.Lpg0(%r13)
-.Lpg1: # do store status of all registers
+SYM_CODE_START(store_status)
+ STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA
+ /* General purpose registers */
+ GET_LC %r13
+ /* Control registers */
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
+ /* Access registers */
+ stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
+ /* Floating point registers */
+ lay %r1,__LC_FPREGS_SAVE_AREA(%r13)
+ std %f0, 0x00(%r1)
+ std %f1, 0x08(%r1)
+ std %f2, 0x10(%r1)
+ std %f3, 0x18(%r1)
+ std %f4, 0x20(%r1)
+ std %f5, 0x28(%r1)
+ std %f6, 0x30(%r1)
+ std %f7, 0x38(%r1)
+ std %f8, 0x40(%r1)
+ std %f9, 0x48(%r1)
+ std %f10,0x50(%r1)
+ std %f11,0x58(%r1)
+ std %f12,0x60(%r1)
+ std %f13,0x68(%r1)
+ std %f14,0x70(%r1)
+ std %f15,0x78(%r1)
+ /* Floating point control register */
+ lay %r1,__LC_FP_CREG_SAVE_AREA(%r13)
+ stfpc 0(%r1)
+ /* CPU timer */
+ lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
+ stpt 0(%r1)
+ /* Store prefix register */
+ lay %r1,__LC_PREFIX_SAVE_AREA(%r13)
+ stpx 0(%r1)
+ /* Clock comparator - seven bytes */
+ larl %r4,clkcmp
+ stckc 0(%r4)
+ lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
+ mvc 1(7,%r1),1(%r4)
+ /* Program status word */
+ lay %r1,__LC_PSW_SAVE_AREA(%r13)
+ epsw %r4,%r5
+ st %r4,0(%r1)
+ st %r5,4(%r1)
+ stg %r2,8(%r1)
+ lgr %r9,%r2
+ lgr %r2,%r3
+ BR_EX %r9
+SYM_CODE_END(store_status)
- stm %r0,%r15,__LC_GPREGS_SAVE_AREA
- stctl %c0,%c15,__LC_CREGS_SAVE_AREA
- stam %a0,%a15,__LC_AREGS_SAVE_AREA
- l %r10,.Ldump_pfx-.Lpg0(%r13)
- mvc __LC_PREFIX_SAVE_AREA(4),0(%r10)
- stckc .Lclkcmp-.Lpg0(%r13)
- mvc __LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13)
- stpt __LC_CPU_TIMER_SAVE_AREA
- st %r13, __LC_PSW_SAVE_AREA+4
- lctl %c6,%c6,.Lall-.Lpg0(%r13)
- lr %r1,%r2
- mvc __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13)
- stsch .Lschib-.Lpg0(%r13)
- oi .Lschib+5-.Lpg0(%r13),0x84
-.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01
- msch .Lschib-.Lpg0(%r13)
- lhi %r0,5
-.Lssch: ssch .Liplorb-.Lpg0(%r13)
- jz .L001
- brct %r0,.Lssch
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L001: mvc __LC_IO_NEW_PSW(8),.Lionew-.Lpg0(%r13)
-.Ltpi: lpsw .Lwaitpsw-.Lpg0(%r13)
-.Lcont: c %r1,__LC_SUBCHANNEL_ID
- jnz .Ltpi
- clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
- jnz .Ltpi
- tsch .Liplirb-.Lpg0(%r13)
- tm .Liplirb+9-.Lpg0(%r13),0xbf
- jz .L002
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3
- jz .L003
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L003: st %r1,__LC_SUBCHANNEL_ID
- lpsw 0
- sigp 0,0,SIGP_RESTART
-.Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13)
- lpsw .Ldispsw-.Lpg0(%r13)
- .align 8
-.Lclkcmp: .quad 0x0000000000000000
-.Lall: .long 0xff000000
-.Ldump_pfx: .long dump_prefix_page
- .align 8
-.Lnewpsw: .long 0x00080000,0x80000000+.Lpg1
-.Lpcnew: .long 0x00080000,0x80000000+.Lecs
-.Lionew: .long 0x00080000,0x80000000+.Lcont
-.Lwaitpsw: .long 0x020a0000,0x00000000+.Ltpi
-.Ldispsw: .long 0x000a0000,0x00000000
-.Liplccws: .long 0x02000000,0x60000018
- .long 0x08000008,0x20000001
-.Liplorb: .long 0x0049504c,0x0040ff80
- .long 0x00000000+.Liplccws
-.Lschib: .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
-.Liplirb: .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
+ .section .bss
+ .balign 8
+SYM_DATA_LOCAL(clkcmp, .quad 0x0000000000000000)
+ .previous
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
deleted file mode 100644
index dc3b1273c4dc..000000000000
--- a/arch/s390/kernel/reipl64.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright IBM Corp 2000, 2011
- * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
- * Denis Joseph Barrow,
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/sigp.h>
-
-#
-# store_status
-#
-# Prerequisites to run this function:
-# - Prefix register is set to zero
-# - Original prefix register is stored in "dump_prefix_page"
-# - Lowcore protection is off
-#
-ENTRY(store_status)
- /* Save register one and load save area base */
- stg %r1,__LC_SAVE_AREA_RESTART
- lghi %r1,SAVE_AREA_BASE
- /* General purpose registers */
- stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- lg %r2,__LC_SAVE_AREA_RESTART
- stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
- /* Control registers */
- stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- /* Access registers */
- stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- /* Floating point registers */
- std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
- /* Floating point control register */
- stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
- /* CPU timer */
- stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
- /* Saved prefix register */
- larl %r2,dump_prefix_page
- mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
- /* Clock comparator - seven bytes */
- larl %r2,.Lclkcmp
- stckc 0(%r2)
- mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
- /* Program status word */
- epsw %r2,%r3
- st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
- st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
- larl %r2,store_status
- stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
- br %r14
-
- .section .bss
- .align 8
-.Lclkcmp: .quad 0x0000000000000000
- .previous
-
-#
-# do_reipl_asm
-# Parameter: r2 = schid of reipl device
-#
-
-ENTRY(do_reipl_asm)
- basr %r13,0
-.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13)
-.Lpg1: brasl %r14,store_status
-
- lctlg %c6,%c6,.Lall-.Lpg0(%r13)
- lgr %r1,%r2
- mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
- stsch .Lschib-.Lpg0(%r13)
- oi .Lschib+5-.Lpg0(%r13),0x84
-.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01
- msch .Lschib-.Lpg0(%r13)
- lghi %r0,5
-.Lssch: ssch .Liplorb-.Lpg0(%r13)
- jz .L001
- brct %r0,.Lssch
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L001: mvc __LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13)
-.Ltpi: lpswe .Lwaitpsw-.Lpg0(%r13)
-.Lcont: c %r1,__LC_SUBCHANNEL_ID
- jnz .Ltpi
- clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
- jnz .Ltpi
- tsch .Liplirb-.Lpg0(%r13)
- tm .Liplirb+9-.Lpg0(%r13),0xbf
- jz .L002
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3
- jz .L003
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L003: st %r1,__LC_SUBCHANNEL_ID
- lhi %r1,0 # mode 0 = esa
- slr %r0,%r0 # set cpuid to zero
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
- lpsw 0
-.Ldisab: sll %r14,1
- srl %r14,1 # need to kill hi bit to avoid specification exceptions.
- st %r14,.Ldispsw+12-.Lpg0(%r13)
- lpswe .Ldispsw-.Lpg0(%r13)
- .align 8
-.Lall: .quad 0x00000000ff000000
- .align 16
-/*
- * These addresses have to be 31 bit otherwise
- * the sigp will throw a specifcation exception
- * when switching to ESA mode as bit 31 be set
- * in the ESA psw.
- * Bit 31 of the addresses has to be 0 for the
- * 31bit lpswe instruction a fact they appear to have
- * omitted from the pop.
- */
-.Lnewpsw: .quad 0x0000000080000000
- .quad .Lpg1
-.Lpcnew: .quad 0x0000000080000000
- .quad .Lecs
-.Lionew: .quad 0x0000000080000000
- .quad .Lcont
-.Lwaitpsw: .quad 0x0202000080000000
- .quad .Ltpi
-.Ldispsw: .quad 0x0002000080000000
- .quad 0x0000000000000000
-.Liplccws: .long 0x02000000,0x60000018
- .long 0x08000008,0x20000001
-.Liplorb: .long 0x0049504c,0x0040ff80
- .long 0x00000000+.Liplccws
-.Lschib: .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
-.Liplirb: .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index f4e6f20e117a..0ae297c82afd 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -1,118 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright IBM Corp. 2005
*
- * Author(s): Rolf Adelsberger,
- * Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Author(s): Rolf Adelsberger
*
*/
#include <linux/linkage.h>
+#include <asm/page.h>
#include <asm/sigp.h>
/*
* moves the new kernel to its destination...
* %r2 = pointer to first kimage_entry_t
* %r3 = start address - where to jump to after the job is done...
+ * %r4 = subcode
*
* %r5 will be used as temp. storage
* %r6 holds the destination address
* %r7 = PAGE_SIZE
* %r8 holds the source address
* %r9 = PAGE_SIZE
- * %r10 is a page mask
+ *
+ * 0xf000 is a page_mask
*/
.text
-ENTRY(relocate_kernel)
- basr %r13,0 # base address
- .base:
- stnsm sys_msk-.base(%r13),0xfb # disable DAT
- stctl %c0,%c15,ctlregs-.base(%r13)
- stm %r0,%r15,gprregs-.base(%r13)
- la %r1,load_psw-.base(%r13)
- mvc 0(8,%r0),0(%r1)
- la %r0,.back-.base(%r13)
- st %r0,4(%r0)
- oi 4(%r0),0x80
- mvc 0x68(8,%r0),0(%r1)
- la %r0,.back_pgm-.base(%r13)
- st %r0,0x6c(%r0)
- oi 0x6c(%r0),0x80
- lhi %r0,0
- diag %r0,%r0,0x308
- .back:
- basr %r13,0
- .back_base:
- oi have_diag308-.back_base(%r13),0x01
- lctl %c0,%c15,ctlregs-.back_base(%r13)
- lm %r0,%r15,gprregs-.back_base(%r13)
- j .start_reloc
- .back_pgm:
- lm %r0,%r15,gprregs-.base(%r13)
- .start_reloc:
- lhi %r10,-1 # preparing the mask
- sll %r10,12 # shift it such that it becomes 0xf000
- .top:
- lhi %r7,4096 # load PAGE_SIZE in r7
- lhi %r9,4096 # load PAGE_SIZE in r9
- l %r5,0(%r2) # read another word for indirection page
- ahi %r2,4 # increment pointer
- tml %r5,0x1 # is it a destination page?
- je .indir_check # NO, goto "indir_check"
- lr %r6,%r5 # r6 = r5
- nr %r6,%r10 # mask it out and...
- j .top # ...next iteration
- .indir_check:
- tml %r5,0x2 # is it a indirection page?
- je .done_test # NO, goto "done_test"
- nr %r5,%r10 # YES, mask out,
- lr %r2,%r5 # move it into the right register,
- j .top # and read next...
- .done_test:
- tml %r5,0x4 # is it the done indicator?
- je .source_test # NO! Well, then it should be the source indicator...
- j .done # ok, lets finish it here...
- .source_test:
- tml %r5,0x8 # it should be a source indicator...
- je .top # NO, ignore it...
- lr %r8,%r5 # r8 = r5
- nr %r8,%r10 # masking
- 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
- jo 0b
- j .top
- .done:
- sr %r0,%r0 # clear register r0
- la %r4,load_psw-.base(%r13) # load psw-address into the register
- o %r3,4(%r4) # or load address into psw
- st %r3,4(%r4)
- mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
- tm have_diag308-.base(%r13),0x01
- jno .no_diag308
- diag %r0,%r0,0x308
- .no_diag308:
- sr %r1,%r1 # clear %r1
- sr %r2,%r2 # clear %r2
- sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
- lpsw 0 # hopefully start new kernel...
+SYM_CODE_START(relocate_kernel)
+ basr %r13,0 # base address
+.base:
+ lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
+ lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
+ lg %r5,0(%r2) # read another word for indirection page
+ aghi %r2,8 # increment pointer
+ tml %r5,0x1 # is it a destination page?
+ je .indir_check # NO, goto "indir_check"
+ lgr %r6,%r5 # r6 = r5
+ nill %r6,0xf000 # mask it out and...
+ j .base # ...next iteration
+.indir_check:
+ tml %r5,0x2 # is it a indirection page?
+ je .done_test # NO, goto "done_test"
+ nill %r5,0xf000 # YES, mask out,
+ lgr %r2,%r5 # move it into the right register,
+ j .base # and read next...
+.done_test:
+ tml %r5,0x4 # is it the done indicator?
+ je .source_test # NO! Well, then it should be the source indicator...
+ j .done # ok, lets finish it here...
+.source_test:
+ tml %r5,0x8 # it should be a source indicator...
+ je .base # NO, ignore it...
+ lgr %r8,%r5 # r8 = r5
+ nill %r8,0xf000 # masking
+0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+ jo 0b
+ j .base
+.done:
+ lgr %r0,%r4 # subcode
+ cghi %r3,0
+ je .diag
+ la %r4,load_psw-.base(%r13) # load psw-address into the register
+ o %r3,4(%r4) # or load address into psw
+ st %r3,4(%r4)
+ mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
+.diag:
+ diag %r0,%r0,0x308
+SYM_CODE_END(relocate_kernel)
- .align 8
- load_psw:
- .long 0x00080000,0x80000000
- sys_msk:
- .quad 0
- ctlregs:
- .rept 16
- .long 0
- .endr
- gprregs:
- .rept 16
- .long 0
- .endr
- have_diag308:
- .byte 0
- .align 8
- relocate_kernel_end:
- .align 8
- .globl relocate_kernel_len
- relocate_kernel_len:
- .quad relocate_kernel_end - relocate_kernel
+ .balign 8
+SYM_DATA_START_LOCAL(load_psw)
+ .long 0x00080000,0x80000000
+SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end)
+ .balign 8
+SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel)
diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S
deleted file mode 100644
index cfac28330b03..000000000000
--- a/arch/s390/kernel/relocate_kernel64.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright IBM Corp. 2005
- *
- * Author(s): Rolf Adelsberger,
- * Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/sigp.h>
-
-/*
- * moves the new kernel to its destination...
- * %r2 = pointer to first kimage_entry_t
- * %r3 = start address - where to jump to after the job is done...
- *
- * %r5 will be used as temp. storage
- * %r6 holds the destination address
- * %r7 = PAGE_SIZE
- * %r8 holds the source address
- * %r9 = PAGE_SIZE
- *
- * 0xf000 is a page_mask
- */
-
- .text
-ENTRY(relocate_kernel)
- basr %r13,0 # base address
- .base:
- stnsm sys_msk-.base(%r13),0xfb # disable DAT
- stctg %c0,%c15,ctlregs-.base(%r13)
- stmg %r0,%r15,gprregs-.base(%r13)
- lghi %r0,3
- sllg %r0,%r0,31
- stg %r0,0x1d0(%r0)
- la %r0,.back_pgm-.base(%r13)
- stg %r0,0x1d8(%r0)
- la %r1,load_psw-.base(%r13)
- mvc 0(8,%r0),0(%r1)
- la %r0,.back-.base(%r13)
- st %r0,4(%r0)
- oi 4(%r0),0x80
- lghi %r0,0
- diag %r0,%r0,0x308
- .back:
- lhi %r1,1 # mode 1 = esame
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
- sam64 # switch to 64 bit addressing mode
- basr %r13,0
- .back_base:
- oi have_diag308-.back_base(%r13),0x01
- lctlg %c0,%c15,ctlregs-.back_base(%r13)
- lmg %r0,%r15,gprregs-.back_base(%r13)
- j .top
- .back_pgm:
- lmg %r0,%r15,gprregs-.base(%r13)
- .top:
- lghi %r7,4096 # load PAGE_SIZE in r7
- lghi %r9,4096 # load PAGE_SIZE in r9
- lg %r5,0(%r2) # read another word for indirection page
- aghi %r2,8 # increment pointer
- tml %r5,0x1 # is it a destination page?
- je .indir_check # NO, goto "indir_check"
- lgr %r6,%r5 # r6 = r5
- nill %r6,0xf000 # mask it out and...
- j .top # ...next iteration
- .indir_check:
- tml %r5,0x2 # is it a indirection page?
- je .done_test # NO, goto "done_test"
- nill %r5,0xf000 # YES, mask out,
- lgr %r2,%r5 # move it into the right register,
- j .top # and read next...
- .done_test:
- tml %r5,0x4 # is it the done indicator?
- je .source_test # NO! Well, then it should be the source indicator...
- j .done # ok, lets finish it here...
- .source_test:
- tml %r5,0x8 # it should be a source indicator...
- je .top # NO, ignore it...
- lgr %r8,%r5 # r8 = r5
- nill %r8,0xf000 # masking
- 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
- jo 0b
- j .top
- .done:
- sgr %r0,%r0 # clear register r0
- la %r4,load_psw-.base(%r13) # load psw-address into the register
- o %r3,4(%r4) # or load address into psw
- st %r3,4(%r4)
- mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
- tm have_diag308-.base(%r13),0x01
- jno .no_diag308
- diag %r0,%r0,0x308
- .no_diag308:
- sam31 # 31 bit mode
- sr %r1,%r1 # erase register r1
- sr %r2,%r2 # erase register r2
- sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
- lpsw 0 # hopefully start new kernel...
-
- .align 8
- load_psw:
- .long 0x00080000,0x80000000
- sys_msk:
- .quad 0
- ctlregs:
- .rept 16
- .quad 0
- .endr
- gprregs:
- .rept 16
- .quad 0
- .endr
- have_diag308:
- .byte 0
- .align 8
- relocate_kernel_end:
- .align 8
- .globl relocate_kernel_len
- relocate_kernel_len:
- .quad relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c
new file mode 100644
index 000000000000..af10e6bdd34e
--- /dev/null
+++ b/arch/s390/kernel/rethook.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include "rethook.h"
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+ rh->ret_addr = regs->gprs[14];
+ rh->frame = regs->gprs[15];
+
+ /* Replace the return addr with trampoline addr */
+ regs->gprs[14] = (unsigned long)&arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+void arch_rethook_fixup_return(struct pt_regs *regs,
+ unsigned long correct_ret_addr)
+{
+ /* Replace fake return address with real one. */
+ regs->gprs[14] = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+ return rethook_trampoline_handler(regs, regs->gprs[15]);
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/* assembler function that handles the rethook must not be probed itself */
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h
new file mode 100644
index 000000000000..32f069eed3f3
--- /dev/null
+++ b/arch/s390/kernel/rethook.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __S390_RETHOOK_H
+#define __S390_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 077a99389b07..1788a5454b6f 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2012
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
@@ -11,23 +12,35 @@
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/kernel_stat.h>
+#include <linux/sched/task_stack.h>
+
#include <asm/runtime_instr.h>
#include <asm/cpu_mf.h>
#include <asm/irq.h>
+#include "entry.h"
+
/* empty control block to disable RI by loading it */
struct runtime_instr_cb runtime_instr_empty_cb;
-static int runtime_instr_avail(void)
+void runtime_instr_release(struct task_struct *tsk)
{
- return test_facility(64);
+ kfree(tsk->thread.ri_cb);
}
static void disable_runtime_instr(void)
{
- struct pt_regs *regs = task_pt_regs(current);
+ struct task_struct *task = current;
+ struct pt_regs *regs;
+ if (!task->thread.ri_cb)
+ return;
+ regs = task_pt_regs(task);
+ preempt_disable();
load_runtime_instr_cb(&runtime_instr_empty_cb);
+ kfree(task->thread.ri_cb);
+ task->thread.ri_cb = NULL;
+ preempt_enable();
/*
* Make sure the RI bit is deleted from the PSW. If the user did not
@@ -39,77 +52,34 @@ static void disable_runtime_instr(void)
static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
{
- cb->buf_limit = 0xfff;
- if (s390_user_mode == HOME_SPACE_MODE)
- cb->home_space = 1;
- cb->int_requested = 1;
- cb->pstate = 1;
- cb->pstate_set_buf = 1;
- cb->pstate_sample = 1;
- cb->pstate_collect = 1;
- cb->key = PAGE_DEFAULT_KEY;
- cb->valid = 1;
-}
-
-void exit_thread_runtime_instr(void)
-{
- struct task_struct *task = current;
-
- if (!task->thread.ri_cb)
- return;
- disable_runtime_instr();
- kfree(task->thread.ri_cb);
- task->thread.ri_signum = 0;
- task->thread.ri_cb = NULL;
-}
-
-static void runtime_instr_int_handler(struct ext_code ext_code,
- unsigned int param32, unsigned long param64)
-{
- struct siginfo info;
-
- if (!(param32 & CPU_MF_INT_RI_MASK))
- return;
-
- inc_irq_stat(IRQEXT_CMR);
-
- if (!current->thread.ri_cb)
- return;
- if (current->thread.ri_signum < SIGRTMIN ||
- current->thread.ri_signum > SIGRTMAX) {
- WARN_ON_ONCE(1);
- return;
- }
-
- memset(&info, 0, sizeof(info));
- info.si_signo = current->thread.ri_signum;
- info.si_code = SI_QUEUE;
- if (param32 & CPU_MF_INT_RI_BUF_FULL)
- info.si_int = ENOBUFS;
- else if (param32 & CPU_MF_INT_RI_HALTED)
- info.si_int = ECANCELED;
- else
- return; /* unknown reason */
-
- send_sig_info(current->thread.ri_signum, &info, current);
+ cb->rla = 0xfff;
+ cb->s = 1;
+ cb->k = 1;
+ cb->ps = 1;
+ cb->pc = 1;
+ cb->key = PAGE_DEFAULT_KEY >> 4;
+ cb->v = 1;
}
+/*
+ * The signum argument is unused. In older kernels it was used to
+ * specify a real-time signal. For backwards compatibility user space
+ * should pass a valid real-time signal number (the signum argument
+ * was checked in older kernels).
+ */
SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
{
struct runtime_instr_cb *cb;
- if (!runtime_instr_avail())
+ if (!test_facility(64))
return -EOPNOTSUPP;
if (command == S390_RUNTIME_INSTR_STOP) {
- preempt_disable();
- exit_thread_runtime_instr();
- preempt_enable();
+ disable_runtime_instr();
return 0;
}
- if (command != S390_RUNTIME_INSTR_START ||
- (signum < SIGRTMIN || signum > SIGRTMAX))
+ if (command != S390_RUNTIME_INSTR_START)
return -EINVAL;
if (!current->thread.ri_cb) {
@@ -122,7 +92,6 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
}
init_runtime_instr_cb(cb);
- current->thread.ri_signum = signum;
/* now load the control block to make it available */
preempt_disable();
@@ -131,20 +100,3 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
preempt_enable();
return 0;
}
-
-static int __init runtime_instr_init(void)
-{
- int rc;
-
- if (!runtime_instr_avail())
- return 0;
-
- measurement_alert_subclass_register();
- rc = register_external_interrupt(0x1407, runtime_instr_int_handler);
- if (rc)
- measurement_alert_subclass_unregister();
- else
- pr_info("Runtime instrumentation facility initialized\n");
- return rc;
-}
-device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
deleted file mode 100644
index 3bac589844a7..000000000000
--- a/arch/s390/kernel/s390_ksyms.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <linux/module.h>
-#include <linux/kvm_host.h>
-#include <asm/ftrace.h>
-
-#ifdef CONFIG_FUNCTION_TRACER
-EXPORT_SYMBOL(_mcount);
-#endif
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
-EXPORT_SYMBOL(sie64a);
-EXPORT_SYMBOL(sie_exit);
-#endif
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
deleted file mode 100644
index 29bd7bec4176..000000000000
--- a/arch/s390/kernel/sclp.S
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Mini SCLP driver.
- *
- * Copyright IBM Corp. 2004, 2009
- *
- * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-
-LC_EXT_NEW_PSW = 0x58 # addr of ext int handler
-LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit
-LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter
-LC_EXT_INT_CODE = 0x86 # addr of ext int code
-LC_AR_MODE_ID = 0xa3
-
-#
-# Subroutine which waits synchronously until either an external interruption
-# or a timeout occurs.
-#
-# Parameters:
-# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds
-#
-# Returns:
-# R2 = 0 on interrupt, 2 on timeout
-# R3 = external interruption parameter if R2=0
-#
-
-_sclp_wait_int:
- stm %r6,%r15,24(%r15) # save registers
- basr %r13,0 # get base register
-.LbaseS1:
- ahi %r15,-96 # create stack frame
- la %r8,LC_EXT_NEW_PSW # register int handler
- la %r9,.LextpswS1-.LbaseS1(%r13)
-#ifdef CONFIG_64BIT
- tm LC_AR_MODE_ID,1
- jno .Lesa1
- la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit
- la %r9,.LextpswS1_64-.LbaseS1(%r13)
-.Lesa1:
-#endif
- mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8)
- mvc 0(16,%r8),0(%r9)
-#ifdef CONFIG_64BIT
- epsw %r6,%r7 # set current addressing mode
- nill %r6,0x1 # in new psw (31 or 64 bit mode)
- nilh %r7,0x8000
- stm %r6,%r7,0(%r8)
-#endif
- lhi %r6,0x0200 # cr mask for ext int (cr0.54)
- ltr %r2,%r2
- jz .LsetctS1
- ahi %r6,0x0800 # cr mask for clock int (cr0.52)
- stck .LtimeS1-.LbaseS1(%r13) # initiate timeout
- al %r2,.LtimeS1-.LbaseS1(%r13)
- st %r2,.LtimeS1-.LbaseS1(%r13)
- sckc .LtimeS1-.LbaseS1(%r13)
-
-.LsetctS1:
- stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts
- l %r0,.LctlS1-.LbaseS1(%r13)
- lhi %r1,~(0x200 | 0x800) # clear old values
- nr %r1,%r0
- or %r1,%r6 # set new value
- st %r1,.LctlS1-.LbaseS1(%r13)
- lctl %c0,%c0,.LctlS1-.LbaseS1(%r13)
- st %r0,.LctlS1-.LbaseS1(%r13)
- lhi %r2,2 # return code for timeout
-.LloopS1:
- lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt
-.LwaitS1:
- lh %r7,LC_EXT_INT_CODE
- chi %r7,0x1004 # timeout?
- je .LtimeoutS1
- chi %r7,0x2401 # service int?
- jne .LloopS1
- sr %r2,%r2
- l %r3,LC_EXT_INT_PARAM
-.LtimeoutS1:
- lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting
- # restore old handler
- mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
- lm %r6,%r15,120(%r15) # restore registers
- br %r14 # return to caller
-
- .align 8
-.LoldpswS1:
- .long 0, 0, 0, 0 # old ext int PSW
-.LextpswS1:
- .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int
-#ifdef CONFIG_64BIT
-.LextpswS1_64:
- .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit
-#endif
-.LwaitpswS1:
- .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int
-.LtimeS1:
- .quad 0 # current time
-.LctlS1:
- .long 0 # CT0 contents
-
-#
-# Subroutine to synchronously issue a service call.
-#
-# Parameters:
-# R2 = command word
-# R3 = sccb address
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-# R3 = sccb response code if R2 = 0
-#
-
-_sclp_servc:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- lr %r6,%r2 # save command word
- lr %r7,%r3 # save sccb address
-.LretryS2:
- lhi %r2,1 # error return code
- .insn rre,0xb2200000,%r6,%r7 # servc
- brc 1,.LendS2 # exit if not operational
- brc 8,.LnotbusyS2 # go on if not busy
- sr %r2,%r2 # wait until no longer busy
- bras %r14,_sclp_wait_int
- j .LretryS2 # retry
-.LnotbusyS2:
- sr %r2,%r2 # wait until result
- bras %r14,_sclp_wait_int
- sr %r2,%r2
- lh %r3,6(%r7)
-.LendS2:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-
-#
-# Subroutine to set up the SCLP interface.
-#
-# Parameters:
-# R2 = 0 to activate, non-zero to deactivate
-#
-# Returns:
-# R2 = 0 on success, non-zero on failure
-#
-
-_sclp_setup:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- basr %r13,0 # get base register
-.LbaseS3:
- l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb
- mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
- ltr %r2,%r2 # initialization?
- jz .LdoinitS3 # go ahead
- # clear masks
- xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
-.LdoinitS3:
- l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
- lr %r3,%r6 # get sccb address
- bras %r14,_sclp_servc # issue service call
- ltr %r2,%r2 # servc successful?
- jnz .LerrorS3
- chi %r3,0x20 # write mask successful?
- jne .LerrorS3
- # check masks
- la %r2,.LinitmaskS3-.LinitsccbS3(%r6)
- l %r1,0(%r2) # receive mask ok?
- n %r1,12(%r2)
- cl %r1,0(%r2)
- jne .LerrorS3
- l %r1,4(%r2) # send mask ok?
- n %r1,8(%r2)
- cl %r1,4(%r2)
- sr %r2,%r2
- je .LendS3
-.LerrorS3:
- lhi %r2,1 # error return code
-.LendS3:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-.LwritemaskS3:
- .long 0x00780005 # SCLP command for write mask
-.LinitsccbS3:
- .word .LinitendS3-.LinitsccbS3
- .byte 0,0,0,0
- .word 0
- .word 0
- .word 4
-.LinitmaskS3:
- .long 0x80000000
- .long 0x40000000
- .long 0
- .long 0
-.LinitendS3:
-
-#
-# Subroutine which prints a given text to the SCLP console.
-#
-# Parameters:
-# R2 = address of nil-terminated ASCII text
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-#
-
-_sclp_print:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- basr %r13,0 # get base register
-.LbaseS4:
- l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb
- mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
- la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr
- sr %r0,%r0
- l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table
-.LinitmtoS4:
- # initialize mto
- mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
- lhi %r6,.LmtoendS4-.LmtoS4 # current mto length
-.LloopS4:
- ic %r0,0(%r2) # get character
- ahi %r2,1
- ltr %r0,%r0 # end of string?
- jz .LfinalizemtoS4
- chi %r0,0x0a # end of line (NL)?
- jz .LfinalizemtoS4
- stc %r0,0(%r6,%r7) # copy to mto
- la %r11,0(%r6,%r7)
- tr 0(1,%r11),0(%r10) # translate to EBCDIC
- ahi %r6,1
- j .LloopS4
-.LfinalizemtoS4:
- sth %r6,0(%r7) # update mto length
- lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length
- ar %r9,%r6
- sth %r9,.LmdbS4-.LwritesccbS4(%r8)
- lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
- ar %r9,%r6
- sth %r9,.LevbufS4-.LwritesccbS4(%r8)
- lh %r9,0(%r8) # update sccb length
- ar %r9,%r6
- sth %r9,0(%r8)
- ar %r7,%r6 # update current mto address
- ltr %r0,%r0 # more characters?
- jnz .LinitmtoS4
- l %r2,.LwritedataS4-.LbaseS4(%r13)# write data
- lr %r3,%r8
- bras %r14,_sclp_servc
- ltr %r2,%r2 # servc successful?
- jnz .LendS4
- chi %r3,0x20 # write data successful?
- je .LendS4
- lhi %r2,1 # error return code
-.LendS4:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-
-#
-# Function which prints a given text to the SCLP console.
-#
-# Parameters:
-# R2 = address of nil-terminated ASCII text
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-#
-
-ENTRY(_sclp_print_early)
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
-#ifdef CONFIG_64BIT
- tm LC_AR_MODE_ID,1
- jno .Lesa2
- ahi %r15,-80
- stmh %r6,%r15,96(%r15) # store upper register halves
-.Lesa2:
-#endif
- lr %r10,%r2 # save string pointer
- lhi %r2,0
- bras %r14,_sclp_setup # enable console
- ltr %r2,%r2
- jnz .LendS5
- lr %r2,%r10
- bras %r14,_sclp_print # print string
- ltr %r2,%r2
- jnz .LendS5
- lhi %r2,1
- bras %r14,_sclp_setup # disable console
-.LendS5:
-#ifdef CONFIG_64BIT
- tm LC_AR_MODE_ID,1
- jno .Lesa3
- lmh %r6,%r15,96(%r15) # store upper register halves
- ahi %r15,80
-.Lesa3:
-#endif
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-
-.LwritedataS4:
- .long 0x00760005 # SCLP command for write data
-.LwritesccbS4:
- # sccb
- .word .LmtoS4-.LwritesccbS4
- .byte 0
- .byte 0,0,0
- .word 0
-
- # evbuf
-.LevbufS4:
- .word .LmtoS4-.LevbufS4
- .byte 0x02
- .byte 0
- .word 0
-
-.LmdbS4:
- # mdb
- .word .LmtoS4-.LmdbS4
- .word 1
- .long 0xd4c4c240
- .long 1
-
- # go
-.LgoS4:
- .word .LmtoS4-.LgoS4
- .word 1
- .long 0
- .byte 0,0,0,0,0,0,0,0
- .byte 0,0,0
- .byte 0
- .byte 0,0,0,0,0,0,0
- .byte 0
- .word 0
- .byte 0,0,0,0,0,0,0,0,0,0
- .byte 0,0,0,0,0,0,0,0
- .byte 0,0,0,0,0,0,0,0
-
-.LmtoS4:
- .word .LmtoendS4-.LmtoS4
- .word 4
- .word 0x1000
- .byte 0
- .byte 0,0,0
-.LmtoendS4:
-
- # Global constants
-.LsccbS0:
- .long _sclp_work_area
-.Lascebc:
- .long _ascebc
-
-.section .data,"aw",@progbits
- .balign 4096
-_sclp_work_area:
- .fill 4096
-.previous
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 497451ec5e26..c1fe0b53c5ac 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* S390 version
* Copyright IBM Corp. 1999, 2012
@@ -12,70 +13,72 @@
* This file handles the architecture-dependent parts of initialization
*/
-#define KMSG_COMPONENT "setup"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "setup: " fmt
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
+#include <linux/random.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/initrd.h>
-#include <linux/bootmem.h>
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/kernel_stat.h>
+#include <linux/dma-map-ops.h>
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/pfn.h>
#include <linux/ctype.h>
#include <linux/reboot.h>
#include <linux/topology.h>
-#include <linux/ftrace.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
-#include <linux/compat.h>
+#include <linux/start_kernel.h>
+#include <linux/hugetlb.h>
+#include <linux/kmemleak.h>
+#include <asm/archrandom.h>
+#include <asm/boot_data.h>
+#include <asm/machine.h>
#include <asm/ipl.h>
-#include <asm/uaccess.h>
#include <asm/facility.h>
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
+#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
#include <asm/ebcdic.h>
-#include <asm/kvm_virtio.h>
#include <asm/diag.h>
#include <asm/os_info.h>
#include <asm/sclp.h>
+#include <asm/stacktrace.h>
+#include <asm/sysinfo.h>
+#include <asm/numa.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
+#include <asm/uv.h>
+#include <asm/asm-offsets.h>
#include "entry.h"
-long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
- PSW_MASK_EA | PSW_MASK_BA;
-long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT |
- PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK |
- PSW_MASK_PSTATE | PSW_ASC_HOME;
-
-/*
- * User copy operations.
- */
-struct uaccess_ops uaccess;
-EXPORT_SYMBOL(uaccess);
-
/*
* Machine setup..
*/
@@ -88,33 +91,101 @@ EXPORT_SYMBOL(console_devno);
unsigned int console_irq = -1;
EXPORT_SYMBOL(console_irq);
-unsigned long elf_hwcap = 0;
-char elf_platform[ELF_PLATFORM_SIZE];
+/*
+ * Some code and data needs to stay below 2 GB, even when the kernel would be
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .amode31 section.
+ */
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
+struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
+struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
-struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
+/*
+ * Control registers CR2, CR5 and CR15 are initialized with addresses
+ * of tables that must be placed below 2G which is handled by the AMODE31
+ * sections.
+ * Because the AMODE31 sections are relocated below 2G at startup,
+ * the content of control registers CR2, CR5 and CR15 must be updated
+ * with new addresses after the relocation. The initial initialization of
+ * control registers occurs in head.S and then gets updated again after AMODE31
+ * relocation. We must access the relevant AMODE31 tables indirectly via
+ * pointers placed in the .amode31.refs linker section. Those pointers get
+ * updated automatically during AMODE31 relocation and always contain a valid
+ * address within AMODE31 sections.
+ */
-int __initdata memory_end_set;
-unsigned long __initdata memory_end;
+static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
-unsigned long VMALLOC_START;
+static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
+ [1] = 0xffffffffffffffff
+};
+
+static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0,
+ 0x80000000, 0, 0, 0
+};
+
+static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
+ 0, 0, 0x89000000, 0,
+ 0, 0, 0x8a000000, 0
+};
+
+static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
+static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
+static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
+static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
+
+unsigned long __bootdata_preserved(max_mappable);
+struct physmem_info __bootdata(physmem_info);
+
+struct vm_layout __bootdata_preserved(vm_layout);
+EXPORT_SYMBOL(vm_layout);
+int __bootdata_preserved(__kaslr_enabled);
+unsigned int __bootdata_preserved(zlib_dfltcc_support);
+EXPORT_SYMBOL(zlib_dfltcc_support);
+u64 __bootdata_preserved(stfle_fac_list[16]);
+EXPORT_SYMBOL(stfle_fac_list);
+struct oldmem_data __bootdata_preserved(oldmem_data);
+
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
-unsigned long VMALLOC_END;
+unsigned long __bootdata_preserved(VMALLOC_END);
EXPORT_SYMBOL(VMALLOC_END);
-struct page *vmemmap;
+struct page *__bootdata_preserved(vmemmap);
EXPORT_SYMBOL(vmemmap);
+unsigned long __bootdata_preserved(vmemmap_size);
-#ifdef CONFIG_64BIT
-unsigned long MODULES_VADDR;
-unsigned long MODULES_END;
-#endif
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
/* An array with a pointer to the lowcore of every CPU. */
-struct _lowcore *lowcore_ptr[NR_CPUS];
+struct lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
/*
+ * The Write Back bit position in the physaddr is given by the SLPC PCI.
+ * Leaving the mask zero always uses write through which is safe
+ */
+unsigned long mio_wb_bit_mask __ro_after_init;
+
+/*
* This is set up by the setup-routine at boot-time
* for S390 need to find out, what we have to setup
* using address 0x10400 ...
@@ -142,31 +213,28 @@ __setup("condev=", condev_setup);
static void __init set_preferred_console(void)
{
- if (MACHINE_IS_KVM) {
- if (sclp_has_vt220())
- add_preferred_console("ttyS", 1, NULL);
- else if (sclp_has_linemode())
- add_preferred_console("ttyS", 0, NULL);
- else
- add_preferred_console("hvc", 0, NULL);
- } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+ if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
add_preferred_console("ttyS", 0, NULL);
else if (CONSOLE_IS_3270)
add_preferred_console("tty3270", 0, NULL);
+ else if (CONSOLE_IS_VT220)
+ add_preferred_console("ttysclp", 0, NULL);
+ else if (CONSOLE_IS_HVC)
+ add_preferred_console("hvc", 0, NULL);
}
static int __init conmode_setup(char *str)
{
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
- if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
+ if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
SET_CONSOLE_SCLP;
#endif
#if defined(CONFIG_TN3215_CONSOLE)
- if (strncmp(str, "3215", 5) == 0)
+ if (!strcmp(str, "3215"))
SET_CONSOLE_3215;
#endif
#if defined(CONFIG_TN3270_CONSOLE)
- if (strncmp(str, "3270", 5) == 0)
+ if (!strcmp(str, "3270"))
SET_CONSOLE_3270;
#endif
set_preferred_console();
@@ -180,7 +248,7 @@ static void __init conmode_default(void)
char query_buffer[1024];
char *ptr;
- if (MACHINE_IS_VM) {
+ if (machine_is_vm()) {
cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
ptr = strstr(query_buffer, "SUBCHANNEL =");
@@ -201,7 +269,7 @@ static void __init conmode_default(void)
#endif
return;
}
- if (strncmp(ptr + 8, "3270", 4) == 0) {
+ if (str_has_prefix(ptr + 8, "3270")) {
#if defined(CONFIG_TN3270_CONSOLE)
SET_CONSOLE_3270;
#elif defined(CONFIG_TN3215_CONSOLE)
@@ -209,7 +277,7 @@ static void __init conmode_default(void)
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
SET_CONSOLE_SCLP;
#endif
- } else if (strncmp(ptr + 8, "3215", 4) == 0) {
+ } else if (str_has_prefix(ptr + 8, "3215")) {
#if defined(CONFIG_TN3215_CONSOLE)
SET_CONSOLE_3215;
#elif defined(CONFIG_TN3270_CONSOLE)
@@ -218,6 +286,13 @@ static void __init conmode_default(void)
SET_CONSOLE_SCLP;
#endif
}
+ } else if (machine_is_kvm()) {
+ if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
+ SET_CONSOLE_VT220;
+ else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
+ SET_CONSOLE_SCLP;
+ else
+ SET_CONSOLE_HVC;
} else {
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
SET_CONSOLE_SCLP;
@@ -225,19 +300,19 @@ static void __init conmode_default(void)
}
}
-#ifdef CONFIG_ZFCPDUMP
+#ifdef CONFIG_CRASH_DUMP
static void __init setup_zfcpdump(void)
{
- if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ if (!is_ipl_type_dump())
return;
- if (OLDMEM_BASE)
+ if (oldmem_data.start)
return;
- strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+ strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE);
console_loglevel = 2;
}
#else
static inline void setup_zfcpdump(void) {}
-#endif /* CONFIG_ZFCPDUMP */
+#endif /* CONFIG_CRASH_DUMP */
/*
* Reboot, halt and power_off stubs. They just call _machine_restart,
@@ -283,163 +358,127 @@ void machine_power_off(void)
void (*pm_power_off)(void) = machine_power_off;
EXPORT_SYMBOL_GPL(pm_power_off);
-static int __init early_parse_mem(char *p)
-{
- memory_end = memparse(p, &p);
- memory_end_set = 1;
- return 0;
-}
-early_param("mem", early_parse_mem);
+void *restart_stack;
-static int __init parse_vmalloc(char *arg)
+unsigned long stack_alloc(void)
{
- if (!arg)
- return -EINVAL;
- VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
- return 0;
-}
-early_param("vmalloc", parse_vmalloc);
+ void *stack;
-unsigned int s390_user_mode = PRIMARY_SPACE_MODE;
-EXPORT_SYMBOL_GPL(s390_user_mode);
-
-static void __init set_user_mode_primary(void)
-{
- psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME;
- psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
-#ifdef CONFIG_COMPAT
- psw32_user_bits =
- (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY;
-#endif
- uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos_switch : uaccess_pt;
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ kmemleak_not_leak(stack);
+ return (unsigned long)stack;
}
-static int __init early_parse_user_mode(char *p)
+void stack_free(unsigned long stack)
{
- if (p && strcmp(p, "primary") == 0)
- s390_user_mode = PRIMARY_SPACE_MODE;
- else if (!p || strcmp(p, "home") == 0)
- s390_user_mode = HOME_SPACE_MODE;
- else
- return 1;
- return 0;
+ vfree((void *)stack);
}
-early_param("user_mode", early_parse_user_mode);
-static void __init setup_addressing_mode(void)
+static unsigned long __init stack_alloc_early(void)
{
- if (s390_user_mode != PRIMARY_SPACE_MODE)
- return;
- set_user_mode_primary();
- if (MACHINE_HAS_MVCOS)
- pr_info("Address spaces switched, mvcos available\n");
- else
- pr_info("Address spaces switched, mvcos not available\n");
-}
+ unsigned long stack;
-void *restart_stack __attribute__((__section__(".data")));
+ stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
+ return stack;
+}
static void __init setup_lowcore(void)
{
- struct _lowcore *lc;
+ struct lowcore *lc, *abs_lc;
/*
* Setup lowcore for boot cpu
*/
- BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
- lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
- lc->restart_psw.mask = psw_kernel_bits;
- lc->restart_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = psw_kernel_bits |
- PSW_MASK_DAT | PSW_MASK_MCHECK;
- lc->external_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = psw_kernel_bits |
- PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
- lc->program_new_psw.mask = psw_kernel_bits |
- PSW_MASK_DAT | PSW_MASK_MCHECK;
- lc->program_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
- lc->mcck_new_psw.mask = psw_kernel_bits;
- lc->mcck_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = psw_kernel_bits |
- PSW_MASK_DAT | PSW_MASK_MCHECK;
- lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
- lc->clock_comparator = -1ULL;
- lc->kernel_stack = ((unsigned long) &init_thread_union)
- + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->async_stack = (unsigned long)
- __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0)
- + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->panic_stack = (unsigned long)
- __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0)
- + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->current_task = (unsigned long) init_thread_union.thread_info.task;
- lc->thread_info = (unsigned long) &init_thread_union;
- lc->machine_flags = S390_lowcore.machine_flags;
- lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
- memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
- MAX_FACILITY_BIT/8);
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE) {
- lc->extended_save_area_addr = (__u32)
- __alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0);
- /* enable extended save area */
- __ctl_set_bit(14, 29);
- }
-#else
- lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
-#endif
- lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
- lc->async_enter_timer = S390_lowcore.async_enter_timer;
- lc->exit_timer = S390_lowcore.exit_timer;
- lc->user_timer = S390_lowcore.user_timer;
- lc->system_timer = S390_lowcore.system_timer;
- lc->steal_timer = S390_lowcore.steal_timer;
- lc->last_update_timer = S390_lowcore.last_update_timer;
- lc->last_update_clock = S390_lowcore.last_update_clock;
- lc->ftrace_func = S390_lowcore.ftrace_func;
-
- restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
- restart_stack += ASYNC_SIZE;
-
+ BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
+ lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
+ if (!lc)
+ panic("%s: Failed to allocate %zu bytes align=%zx\n",
+ __func__, sizeof(*lc), sizeof(*lc));
+
+ lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
+ lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+ lc->restart_psw.addr = __pa(restart_int_handler);
+ lc->external_new_psw.mask = PSW_KERNEL_BITS;
+ lc->external_new_psw.addr = (unsigned long) ext_int_handler;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS;
+ lc->svc_new_psw.addr = (unsigned long) system_call;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS;
+ lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
+ lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS;
+ lc->io_new_psw.addr = (unsigned long) io_int_handler;
+ lc->clock_comparator = clock_comparator_max;
+ lc->current_task = (unsigned long)&init_task;
+ lc->lpp = LPP_MAGIC;
+ lc->preempt_count = get_lowcore()->preempt_count;
+ nmi_alloc_mcesa_early(&lc->mcesad);
+ lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
+ lc->exit_timer = get_lowcore()->exit_timer;
+ lc->user_timer = get_lowcore()->user_timer;
+ lc->system_timer = get_lowcore()->system_timer;
+ lc->steal_timer = get_lowcore()->steal_timer;
+ lc->last_update_timer = get_lowcore()->last_update_timer;
+ lc->last_update_clock = get_lowcore()->last_update_clock;
+ /*
+ * Allocate the global restart stack which is the same for
+ * all CPUs in case *one* of them does a PSW restart.
+ */
+ restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
+ lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+ lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+ lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+ lc->kernel_stack = get_lowcore()->kernel_stack;
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
- * restart data to the absolute zero lowcore. This is necesary if
+ * restart data to the absolute zero lowcore. This is necessary if
* PSW restart is done on an offline CPU that has lowcore zero.
*/
lc->restart_stack = (unsigned long) restart_stack;
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
- lc->restart_source = -1UL;
-
- /* Setup absolute zero lowcore */
- mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
- mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
- mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
- mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
- mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
-
- set_prefix((u32)(unsigned long) lc);
+ lc->restart_source = -1U;
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
+ arch_spin_lock_setup(0);
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+ lc->preempt_count = PREEMPT_DISABLED;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
+ lc->user_asce = get_lowcore()->user_asce;
+
+ system_ctlreg_init_save_area(lc);
+ abs_lc = get_abs_lowcore();
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ abs_lc->program_new_psw = lc->program_new_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc);
+
+ set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
+ if (abs_lowcore_map(0, lowcore_ptr[0], false))
+ panic("Couldn't setup absolute lowcore");
}
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource __initdata *standard_resources[] = {
@@ -451,33 +490,29 @@ static struct resource __initdata *standard_resources[] = {
static void __init setup_resources(void)
{
struct resource *res, *std_res, *sub_res;
- int i, j;
-
- code_resource.start = (unsigned long) &_text;
- code_resource.end = (unsigned long) &_etext - 1;
- data_resource.start = (unsigned long) &_etext;
- data_resource.end = (unsigned long) &_edata - 1;
- bss_resource.start = (unsigned long) &__bss_start;
- bss_resource.end = (unsigned long) &__bss_stop - 1;
-
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- if (!memory_chunk[i].size)
- continue;
- res = alloc_bootmem_low(sizeof(*res));
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
- switch (memory_chunk[i].type) {
- case CHUNK_READ_WRITE:
- res->name = "System RAM";
- break;
- case CHUNK_READ_ONLY:
- res->name = "System ROM";
- res->flags |= IORESOURCE_READONLY;
- break;
- default:
- res->name = "reserved";
- }
- res->start = memory_chunk[i].addr;
- res->end = res->start + memory_chunk[i].size - 1;
+ phys_addr_t start, end;
+ int j;
+ u64 i;
+
+ code_resource.start = __pa_symbol(_text);
+ code_resource.end = __pa_symbol(_etext) - 1;
+ data_resource.start = __pa_symbol(_etext);
+ data_resource.end = __pa_symbol(_edata) - 1;
+ bss_resource.start = __pa_symbol(__bss_start);
+ bss_resource.end = __pa_symbol(__bss_stop) - 1;
+
+ for_each_mem_range(i, &start, &end) {
+ res = memblock_alloc_or_panic(sizeof(*res), 8);
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+
+ res->name = "System RAM";
+ res->start = start;
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in resources, end points to the last byte in
+ * the range.
+ */
+ res->end = end - 1;
request_resource(&iomem_resource, res);
for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@@ -486,7 +521,7 @@ static void __init setup_resources(void)
std_res->start > res->end)
continue;
if (std_res->end > res->end) {
- sub_res = alloc_bootmem_low(sizeof(*sub_res));
+ sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
*sub_res = *std_res;
sub_res->end = res->end;
std_res->start = res->end + 1;
@@ -496,203 +531,47 @@ static void __init setup_resources(void)
}
}
}
-}
-
-static void __init setup_memory_end(void)
-{
- unsigned long vmax, vmalloc_size, tmp;
- unsigned long real_memory_size = 0;
- int i;
-
-
-#ifdef CONFIG_ZFCPDUMP
- if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) {
- memory_end = ZFCPDUMP_HSA_SIZE;
- memory_end_set = 1;
- }
-#endif
- memory_end &= PAGE_MASK;
-
+#ifdef CONFIG_CRASH_DUMP
/*
- * Make sure all chunks are MAX_ORDER aligned so we don't need the
- * extra checks that HOLES_IN_ZONE would require.
+ * Re-add removed crash kernel memory as reserved memory. This makes
+ * sure it will be mapped with the identity mapping and struct pages
+ * will be created, so it can be resized later on.
+ * However add it later since the crash kernel resource should not be
+ * part of the System RAM resource.
*/
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- unsigned long start, end;
- struct mem_chunk *chunk;
- unsigned long align;
-
- chunk = &memory_chunk[i];
- if (!chunk->size)
- continue;
- align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
- start = (chunk->addr + align - 1) & ~(align - 1);
- end = (chunk->addr + chunk->size) & ~(align - 1);
- if (start >= end)
- memset(chunk, 0, sizeof(*chunk));
- else {
- chunk->addr = start;
- chunk->size = end - start;
- }
- real_memory_size = max(real_memory_size,
- chunk->addr + chunk->size);
+ if (crashk_res.end) {
+ memblock_add_node(crashk_res.start, resource_size(&crashk_res),
+ 0, MEMBLOCK_NONE);
+ memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+ insert_resource(&iomem_resource, &crashk_res);
}
-
- /* Choose kernel address space layout: 2, 3, or 4 levels. */
-#ifdef CONFIG_64BIT
- vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
- tmp = (memory_end ?: real_memory_size) / PAGE_SIZE;
- tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
- if (tmp <= (1UL << 42))
- vmax = 1UL << 42; /* 3-level kernel page table */
- else
- vmax = 1UL << 53; /* 4-level kernel page table */
- /* module area is at the end of the kernel address space. */
- MODULES_END = vmax;
- MODULES_VADDR = MODULES_END - MODULES_LEN;
- VMALLOC_END = MODULES_VADDR;
-#else
- vmalloc_size = VMALLOC_END ?: 96UL << 20;
- vmax = 1UL << 31; /* 2-level kernel page table */
- /* vmalloc area is at the end of the kernel address space. */
- VMALLOC_END = vmax;
#endif
- VMALLOC_START = vmax - vmalloc_size;
-
- /* Split remaining virtual space between 1:1 mapping & vmemmap array */
- tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
- /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
- tmp = SECTION_ALIGN_UP(tmp);
- tmp = VMALLOC_START - tmp * sizeof(struct page);
- tmp &= ~((vmax >> 11) - 1); /* align to page table level */
- tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
- vmemmap = (struct page *) tmp;
-
- /* Take care that memory_end is set and <= vmemmap */
- memory_end = min(memory_end ?: real_memory_size, tmp);
-
- /* Fixup memory chunk array to fit into 0..memory_end */
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- struct mem_chunk *chunk = &memory_chunk[i];
-
- if (!chunk->size)
- continue;
- if (chunk->addr >= memory_end) {
- memset(chunk, 0, sizeof(*chunk));
- continue;
- }
- if (chunk->addr + chunk->size > memory_end)
- chunk->size = memory_end - chunk->addr;
- }
}
-static void __init setup_vmcoreinfo(void)
+static void __init setup_memory_end(void)
{
- mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+ max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
+ pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
}
#ifdef CONFIG_CRASH_DUMP
/*
- * Find suitable location for crashkernel memory
- */
-static unsigned long __init find_crash_base(unsigned long crash_size,
- char **msg)
-{
- unsigned long crash_base;
- struct mem_chunk *chunk;
- int i;
-
- if (memory_chunk[0].size < crash_size) {
- *msg = "first memory chunk must be at least crashkernel size";
- return 0;
- }
- if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
- return OLDMEM_BASE;
-
- for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
- chunk = &memory_chunk[i];
- if (chunk->size == 0)
- continue;
- if (chunk->type != CHUNK_READ_WRITE)
- continue;
- if (chunk->size < crash_size)
- continue;
- crash_base = (chunk->addr + chunk->size) - crash_size;
- if (crash_base < crash_size)
- continue;
- if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
- continue;
- if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
- continue;
- return crash_base;
- }
- *msg = "no suitable area found";
- return 0;
-}
-
-/*
- * Check if crash_base and crash_size is valid
- */
-static int __init verify_crash_base(unsigned long crash_base,
- unsigned long crash_size,
- char **msg)
-{
- struct mem_chunk *chunk;
- int i;
-
- /*
- * Because we do the swap to zero, we must have at least 'crash_size'
- * bytes free space before crash_base
- */
- if (crash_size > crash_base) {
- *msg = "crashkernel offset must be greater than size";
- return -EINVAL;
- }
-
- /* First memory chunk must be at least crash_size */
- if (memory_chunk[0].size < crash_size) {
- *msg = "first memory chunk must be at least crashkernel size";
- return -EINVAL;
- }
- /* Check if we fit into the respective memory chunk */
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- chunk = &memory_chunk[i];
- if (chunk->size == 0)
- continue;
- if (crash_base < chunk->addr)
- continue;
- if (crash_base >= chunk->addr + chunk->size)
- continue;
- /* we have found the memory chunk */
- if (crash_base + crash_size > chunk->addr + chunk->size) {
- *msg = "selected memory chunk is too small for "
- "crashkernel memory";
- return -EINVAL;
- }
- return 0;
- }
- *msg = "invalid memory range specified";
- return -EINVAL;
-}
-
-/*
- * When kdump is enabled, we have to ensure that no memory from
- * the area [0 - crashkernel memory size] and
- * [crashk_res.start - crashk_res.end] is set offline.
+ * When kdump is enabled, we have to ensure that no memory from the area
+ * [0 - crashkernel memory size] is set offline - it will be exchanged with
+ * the crashkernel memory region when kdump is triggered. The crashkernel
+ * memory region can never get offlined (pages are unmovable).
*/
static int kdump_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct memory_notify *arg = data;
+ if (action != MEM_GOING_OFFLINE)
+ return NOTIFY_OK;
if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
return NOTIFY_BAD;
- if (arg->start_pfn > PFN_DOWN(crashk_res.end))
- return NOTIFY_OK;
- if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
- return NOTIFY_OK;
- return NOTIFY_BAD;
+ return NOTIFY_OK;
}
static struct notifier_block kdump_mem_nb = {
@@ -702,24 +581,15 @@ static struct notifier_block kdump_mem_nb = {
#endif
/*
- * Make sure that oldmem, where the dump is stored, is protected
+ * Reserve page tables created by decompressor
*/
-static void reserve_oldmem(void)
+static void __init reserve_pgtables(void)
{
-#ifdef CONFIG_CRASH_DUMP
- unsigned long real_size = 0;
- int i;
+ unsigned long start, end;
+ struct reserved_range *range;
- if (!OLDMEM_BASE)
- return;
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- struct mem_chunk *chunk = &memory_chunk[i];
-
- real_size = max(real_size, chunk->addr + chunk->size);
- }
- create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE);
- create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE);
-#endif
+ for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
+ memblock_reserve(start, end - start);
}
/*
@@ -729,277 +599,297 @@ static void __init reserve_crashkernel(void)
{
#ifdef CONFIG_CRASH_DUMP
unsigned long long crash_base, crash_size;
- char *msg = NULL;
+ phys_addr_t low, high;
int rc;
- rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
- &crash_base);
- if (rc || crash_size == 0)
- return;
+ rc = parse_crashkernel(boot_command_line, ident_map_size,
+ &crash_size, &crash_base, NULL, NULL, NULL);
+
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
- if (register_memory_notifier(&kdump_mem_nb))
+ if (rc || crash_size == 0)
return;
- if (!crash_base)
- crash_base = find_crash_base(crash_size, &msg);
+
+ if (memblock.memory.regions[0].size < crash_size) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "first memory chunk must be at least crashkernel size");
+ return;
+ }
+
+ low = crash_base ?: oldmem_data.start;
+ high = low + crash_size;
+ if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
+ /* The crashkernel fits into OLDMEM, reuse OLDMEM */
+ crash_base = low;
+ } else {
+ /* Find suitable area in free memory */
+ low = max_t(unsigned long, crash_size, sclp.hsa_size);
+ high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+ if (crash_base && crash_base < low) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "crash_base too low");
+ return;
+ }
+ low = crash_base ?: low;
+ crash_base = memblock_phys_alloc_range(crash_size,
+ KEXEC_CRASH_MEM_ALIGN,
+ low, high);
+ }
+
if (!crash_base) {
- pr_info("crashkernel reservation failed: %s\n", msg);
- unregister_memory_notifier(&kdump_mem_nb);
+ pr_info("crashkernel reservation failed: %s\n",
+ "no suitable area found");
return;
}
- if (verify_crash_base(crash_base, crash_size, &msg)) {
- pr_info("crashkernel reservation failed: %s\n", msg);
- unregister_memory_notifier(&kdump_mem_nb);
+
+ if (register_memory_notifier(&kdump_mem_nb)) {
+ memblock_phys_free(crash_base, crash_size);
return;
}
- if (!OLDMEM_BASE && MACHINE_IS_VM)
+
+ if (!oldmem_data.start && machine_is_vm())
diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
- insert_resource(&iomem_resource, &crashk_res);
- create_mem_hole(memory_chunk, crash_base, crash_size);
+ memblock_remove(crash_base, crash_size);
pr_info("Reserving %lluMB of memory at %lluMB "
"for crashkernel (System RAM: %luMB)\n",
- crash_size >> 20, crash_base >> 20, memory_end >> 20);
+ crash_size >> 20, crash_base >> 20,
+ (unsigned long)memblock.memory.total_size >> 20);
os_info_crashkernel_add(crash_base, crash_size);
#endif
}
-static void __init setup_memory(void)
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
{
- unsigned long bootmap_size;
- unsigned long start_pfn, end_pfn;
- int i;
+ unsigned long addr, size;
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- start_pfn = PFN_UP(__pa(&_end));
- end_pfn = max_pfn = PFN_DOWN(memory_end);
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
+ return;
+ initrd_start = (unsigned long)__va(addr);
+ initrd_end = initrd_start + size;
+ memblock_reserve(addr, size);
+}
-#ifdef CONFIG_BLK_DEV_INITRD
- /*
- * Move the initrd in case the bitmap of the bootmem allocater
- * would overwrite it.
- */
+/*
+ * Reserve the memory area used to pass the certificate lists
+ */
+static void __init reserve_certificate_list(void)
+{
+ if (ipl_cert_list_addr)
+ memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
+}
- if (INITRD_START && INITRD_SIZE) {
- unsigned long bmap_size;
- unsigned long start;
+static void __init reserve_physmem_info(void)
+{
+ unsigned long addr, size;
- bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
- bmap_size = PFN_PHYS(bmap_size);
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
+ memblock_reserve(addr, size);
+}
- if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
- start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
+static void __init free_physmem_info(void)
+{
+ unsigned long addr, size;
-#ifdef CONFIG_CRASH_DUMP
- if (OLDMEM_BASE) {
- /* Move initrd behind kdump oldmem */
- if (start + INITRD_SIZE > OLDMEM_BASE &&
- start < OLDMEM_BASE + OLDMEM_SIZE)
- start = OLDMEM_BASE + OLDMEM_SIZE;
- }
-#endif
- if (start + INITRD_SIZE > memory_end) {
- pr_err("initrd extends beyond end of "
- "memory (0x%08lx > 0x%08lx) "
- "disabling initrd\n",
- start + INITRD_SIZE, memory_end);
- INITRD_START = INITRD_SIZE = 0;
- } else {
- pr_info("Moving initrd (0x%08lx -> "
- "0x%08lx, size: %ld)\n",
- INITRD_START, start, INITRD_SIZE);
- memmove((void *) start, (void *) INITRD_START,
- INITRD_SIZE);
- INITRD_START = start;
- }
- }
- }
-#endif
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
+ memblock_phys_free(addr, size);
+}
- /*
- * Initialize the boot-time allocator
- */
- bootmap_size = init_bootmem(start_pfn, end_pfn);
+static void __init memblock_add_physmem_info(void)
+{
+ unsigned long start, end;
+ int i;
- /*
- * Register RAM areas with the bootmem allocator.
- */
+ pr_debug("physmem info source: %s (%hhd)\n",
+ get_physmem_info_source(), physmem_info.info_source);
+ /* keep memblock lists close to the kernel */
+ memblock_set_bottom_up(true);
+ for_each_physmem_usable_range(i, &start, &end)
+ memblock_add(start, end - start);
+ for_each_physmem_online_range(i, &start, &end)
+ memblock_physmem_add(start, end - start);
+ memblock_set_bottom_up(false);
+ memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
+}
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- unsigned long start_chunk, end_chunk, pfn;
-
- if (!memory_chunk[i].size)
- continue;
- start_chunk = PFN_DOWN(memory_chunk[i].addr);
- end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
- end_chunk = min(end_chunk, end_pfn);
- if (start_chunk >= end_chunk)
- continue;
- memblock_add_node(PFN_PHYS(start_chunk),
- PFN_PHYS(end_chunk - start_chunk), 0);
- pfn = max(start_chunk, start_pfn);
- storage_key_init_range(PFN_PHYS(pfn), PFN_PHYS(end_chunk));
+static void __init setup_high_memory(void)
+{
+ high_memory = __va(ident_map_size);
+}
+
+/*
+ * Reserve memory used for lowcore.
+ */
+static void __init reserve_lowcore(void)
+{
+ void *lowcore_start = get_lowcore();
+ void *lowcore_end = lowcore_start + sizeof(struct lowcore);
+ void *start, *end;
+
+ if (absolute_pointer(__identity_base) < lowcore_end) {
+ start = max(lowcore_start, (void *)__identity_base);
+ end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
+ memblock_reserve(__pa(start), __pa(end));
}
+}
- psw_set_key(PAGE_DEFAULT_KEY);
+/*
+ * Reserve memory used for absolute lowcore/command line/kernel image.
+ */
+static void __init reserve_kernel(void)
+{
+ memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+ memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
+ memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
+ memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
+ memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
+ memblock_reserve(__pa(_stext), _end - _stext);
+}
- free_bootmem_with_active_regions(0, max_pfn);
+static void __init setup_memory(void)
+{
+ phys_addr_t start, end;
+ u64 i;
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * Init storage key for present memory
*/
- reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
- reserve_bootmem((unsigned long)_stext,
- PFN_PHYS(start_pfn) - (unsigned long)_stext,
- BOOTMEM_DEFAULT);
- /*
- * Reserve the bootmem bitmap itself as well. We do this in two
- * steps (first step was init_bootmem()) because this catches
- * the (very unlikely) case of us accidentally initializing the
- * bootmem allocator with an invalid RAM area.
- */
- reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
- BOOTMEM_DEFAULT);
+ for_each_mem_range(i, &start, &end)
+ storage_key_init_range(start, end);
-#ifdef CONFIG_CRASH_DUMP
- if (crashk_res.start)
- reserve_bootmem(crashk_res.start,
- crashk_res.end - crashk_res.start + 1,
- BOOTMEM_DEFAULT);
- if (is_kdump_kernel())
- reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
- PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
-#endif
-#ifdef CONFIG_BLK_DEV_INITRD
- if (INITRD_START && INITRD_SIZE) {
- if (INITRD_START + INITRD_SIZE <= memory_end) {
- reserve_bootmem(INITRD_START, INITRD_SIZE,
- BOOTMEM_DEFAULT);
- initrd_start = INITRD_START;
- initrd_end = initrd_start + INITRD_SIZE;
- } else {
- pr_err("initrd extends beyond end of "
- "memory (0x%08lx > 0x%08lx) "
- "disabling initrd\n",
- initrd_start + INITRD_SIZE, memory_end);
- initrd_start = initrd_end = 0;
- }
- }
-#endif
+ psw_set_key(PAGE_DEFAULT_KEY);
+}
+
+static void __init relocate_amode31_section(void)
+{
+ unsigned long amode31_size = __eamode31 - __samode31;
+ long amode31_offset, *ptr;
+
+ amode31_offset = AMODE31_START - (unsigned long)__samode31;
+ pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
+
+ /* Move original AMODE31 section to the new one */
+ memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
+ /* Zero out the old AMODE31 section to catch invalid accesses within it */
+ memset(__samode31, 0, amode31_size);
+
+ /* Update all AMODE31 region references */
+ for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
+ *ptr += amode31_offset;
+}
+
+/* This must be called after AMODE31 relocation */
+static void __init setup_cr(void)
+{
+ union ctlreg2 cr2;
+ union ctlreg5 cr5;
+ union ctlreg15 cr15;
+
+ __ctl_duct[1] = (unsigned long)__ctl_aste;
+ __ctl_duct[2] = (unsigned long)__ctl_aste;
+ __ctl_duct[4] = (unsigned long)__ctl_duald;
+
+ /* Update control registers CR2, CR5 and CR15 */
+ local_ctl_store(2, &cr2.reg);
+ local_ctl_store(5, &cr5.reg);
+ local_ctl_store(15, &cr15.reg);
+ cr2.ducto = (unsigned long)__ctl_duct >> 6;
+ cr5.pasteo = (unsigned long)__ctl_duct >> 6;
+ cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
+ system_ctl_load(2, &cr2.reg);
+ system_ctl_load(5, &cr5.reg);
+ system_ctl_load(15, &cr15.reg);
}
/*
- * Setup hardware capabilities.
+ * Add system information as device randomness
*/
-static void __init setup_hwcaps(void)
+static void __init setup_randomness(void)
{
- static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
- struct cpuid cpu_id;
- int i;
+ struct sysinfo_3_2_2 *vmms;
- /*
- * The store facility list bits numbers as found in the principles
- * of operation are numbered with bit 1UL<<31 as number 0 to
- * bit 1UL<<0 as number 31.
- * Bit 0: instructions named N3, "backported" to esa-mode
- * Bit 2: z/Architecture mode is active
- * Bit 7: the store-facility-list-extended facility is installed
- * Bit 17: the message-security assist is installed
- * Bit 19: the long-displacement facility is installed
- * Bit 21: the extended-immediate facility is installed
- * Bit 22: extended-translation facility 3 is installed
- * Bit 30: extended-translation facility 3 enhancement facility
- * These get translated to:
- * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
- * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
- * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
- * HWCAP_S390_ETF3EH bit 8 (22 && 30).
- */
- for (i = 0; i < 6; i++)
- if (test_facility(stfl_bits[i]))
- elf_hwcap |= 1UL << i;
+ vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
+ if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+ add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
+ memblock_free(vmms, PAGE_SIZE);
- if (test_facility(22) && test_facility(30))
- elf_hwcap |= HWCAP_S390_ETF3EH;
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ static_branch_enable(&s390_arch_random_available);
+}
- /*
- * Check for additional facilities with store-facility-list-extended.
- * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
- * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
- * as stored by stfl, bits 32-xxx contain additional facilities.
- * How many facility words are stored depends on the number of
- * doublewords passed to the instruction. The additional facilities
- * are:
- * Bit 42: decimal floating point facility is installed
- * Bit 44: perform floating point operation facility is installed
- * translated to:
- * HWCAP_S390_DFP bit 6 (42 && 44).
- */
- if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
- elf_hwcap |= HWCAP_S390_DFP;
+/*
+ * Issue diagnose 318 to set the control program name and
+ * version codes.
+ */
+static void __init setup_control_program_code(void)
+{
+ union diag318_info diag318_info = {
+ .cpnc = CPNC_LINUX,
+ .cpvc = 0,
+ };
- /*
- * Huge page support HWCAP_S390_HPAGE is bit 7.
- */
- if (MACHINE_HAS_HPAGE)
- elf_hwcap |= HWCAP_S390_HPAGE;
+ if (!sclp.has_diag318)
+ return;
-#if defined(CONFIG_64BIT)
- /*
- * 64-bit register support for 31-bit processes
- * HWCAP_S390_HIGH_GPRS is bit 9.
- */
- elf_hwcap |= HWCAP_S390_HIGH_GPRS;
+ diag_stat_inc(DIAG_STAT_X318);
+ asm volatile("diag %0,0,0x318" : : "d" (diag318_info.val));
+}
- /*
- * Transactional execution support HWCAP_S390_TE is bit 10.
- */
- if (test_facility(50) && test_facility(73))
- elf_hwcap |= HWCAP_S390_TE;
-#endif
+/*
+ * Print the component list from the IPL report
+ */
+static void __init log_component_list(void)
+{
+ struct ipl_rb_component_entry *ptr, *end;
+ char *str;
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672:
-#if !defined(CONFIG_64BIT)
- default: /* Use "g5" as default for 31 bit kernels. */
-#endif
- strcpy(elf_platform, "g5");
- break;
- case 0x2064:
- case 0x2066:
-#if defined(CONFIG_64BIT)
- default: /* Use "z900" as default for 64 bit kernels. */
-#endif
- strcpy(elf_platform, "z900");
- break;
- case 0x2084:
- case 0x2086:
- strcpy(elf_platform, "z990");
- break;
- case 0x2094:
- case 0x2096:
- strcpy(elf_platform, "z9-109");
- break;
- case 0x2097:
- case 0x2098:
- strcpy(elf_platform, "z10");
- break;
- case 0x2817:
- case 0x2818:
- strcpy(elf_platform, "z196");
- break;
- case 0x2827:
- strcpy(elf_platform, "zEC12");
- break;
+ if (!early_ipl_comp_list_addr)
+ return;
+ if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
+ pr_info("Linux is running with Secure-IPL enabled\n");
+ else
+ pr_info("Linux is running with Secure-IPL disabled\n");
+ ptr = __va(early_ipl_comp_list_addr);
+ end = (void *) ptr + early_ipl_comp_list_size;
+ pr_info("The IPL report contains the following components:\n");
+ while (ptr < end) {
+ if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
+ if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
+ str = "signed, verified";
+ else
+ str = "signed, verification failed";
+ } else {
+ str = "not signed";
+ }
+ pr_info("%016llx - %016llx (%s)\n",
+ ptr->addr, ptr->addr + ptr->len, str);
+ ptr++;
}
}
/*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+ char fmt[] = KERN_SOH "0boot: %s";
+ int level = printk_get_level(buf);
+
+ buf = skip_timestamp(printk_skip_level(buf));
+ if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+ return;
+
+ fmt[1] = level;
+ printk(fmt, buf);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -1009,27 +899,23 @@ void __init setup_arch(char **cmdline_p)
/*
* print what head.S has found out about the machine
*/
-#ifndef CONFIG_64BIT
- if (MACHINE_IS_VM)
- pr_info("Linux is running as a z/VM "
- "guest operating system in 31-bit mode\n");
- else if (MACHINE_IS_LPAR)
- pr_info("Linux is running natively in 31-bit mode\n");
- if (MACHINE_HAS_IEEE)
- pr_info("The hardware system has IEEE compatible "
- "floating point units\n");
- else
- pr_info("The hardware system has no IEEE compatible "
- "floating point units\n");
-#else /* CONFIG_64BIT */
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
pr_info("Linux is running as a z/VM "
"guest operating system in 64-bit mode\n");
- else if (MACHINE_IS_KVM)
+ else if (machine_is_kvm())
pr_info("Linux is running under KVM in 64-bit mode\n");
- else if (MACHINE_IS_LPAR)
+ else if (machine_is_lpar())
pr_info("Linux is running natively in 64-bit mode\n");
-#endif /* CONFIG_64BIT */
+ else
+ pr_info("Linux is running as a guest in 64-bit mode\n");
+ /* Print decompressor messages if not already printed */
+ if (!boot_earlyprintk)
+ boot_rb_foreach(print_rb_entry);
+
+ if (machine_has_relocated_lowcore())
+ pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
+
+ log_component_list();
/* Have one command line that is parsed and saved in /proc/cmdline */
/* boot_command_line has been already set up in early.c */
@@ -1037,46 +923,96 @@ void __init setup_arch(char **cmdline_p)
ROOT_DEV = Root_RAM0;
- init_mm.start_code = PAGE_OFFSET;
- init_mm.end_code = (unsigned long) &_etext;
- init_mm.end_data = (unsigned long) &_edata;
- init_mm.brk = (unsigned long) &_end;
+ setup_initial_init_mm(_text, _etext, _edata, _end);
- if (MACHINE_HAS_MVCOS)
- memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
- else
- memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
+ if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
+ nospec_auto_detect();
+ jump_label_init();
parse_early_param();
- detect_memory_layout(memory_chunk, memory_end);
+#ifdef CONFIG_CRASH_DUMP
+ /* Deactivate elfcorehdr= kernel parameter */
+ elfcorehdr_addr = ELFCORE_ADDR_MAX;
+#endif
+
os_info_init();
setup_ipl();
- reserve_oldmem();
+ setup_control_program_code();
+
+ /* Do some memory reservations *before* memory is added to memblock */
+ reserve_pgtables();
+ reserve_lowcore();
+ reserve_kernel();
+ reserve_initrd();
+ reserve_certificate_list();
+ reserve_physmem_info();
+ memblock_set_current_limit(ident_map_size);
+ memblock_allow_resize();
+
+ /* Get information about *all* installed memory */
+ memblock_add_physmem_info();
+
+ free_physmem_info();
setup_memory_end();
- setup_addressing_mode();
- reserve_crashkernel();
+ setup_high_memory();
+ memblock_dump_all();
setup_memory();
- setup_resources();
- setup_vmcoreinfo();
- setup_lowcore();
- cpu_init();
- s390_init_cpu_topology();
+ relocate_amode31_section();
+ setup_cr();
+ setup_uv();
+ dma_contiguous_reserve(ident_map_size);
+ vmcp_cma_reserve();
+ if (cpu_has_edat2())
+ hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+ reserve_crashkernel();
+#ifdef CONFIG_CRASH_DUMP
/*
- * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
+ * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
+ * Therefore CPU and device initialization should be done afterwards.
*/
- setup_hwcaps();
+ smp_save_dump_secondary_cpus();
+#endif
+ setup_resources();
+ setup_lowcore();
+ smp_fill_possible_mask();
+ cpu_detect_mhz_feature();
+ cpu_init();
+ numa_setup();
+ smp_detect_cpus();
+ topology_init_early();
+ setup_protection_map();
/*
- * Create kernel page tables and switch to virtual addressing.
+ * Create kernel page tables.
*/
paging_init();
+ /*
+ * After paging_init created the kernel page table, the new PSWs
+ * in lowcore can now run with DAT enabled.
+ */
+#ifdef CONFIG_CRASH_DUMP
+ smp_save_dump_ipl_cpu();
+#endif
+
/* Setup default console */
conmode_default();
set_preferred_console();
- /* Setup zfcpdump support */
+ apply_alternative_instructions();
+ if (IS_ENABLED(CONFIG_EXPOLINE))
+ nospec_init_branches();
+
+ /* Setup zfcp/nvme dump support */
setup_zfcpdump();
+
+ /* Add system specific data to the random pool */
+ setup_randomness();
+}
+
+void __init arch_cpu_finalize_init(void)
+{
+ sclp_init();
}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index c45becf82e01..4874de5edea0 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 1999, 2006
* Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
@@ -10,10 +11,13 @@
*/
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/rseq.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/signal.h>
+#include <linux/entry-common.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/ptrace.h>
@@ -22,75 +26,135 @@
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
-#include <linux/tracehook.h>
#include <linux/syscalls.h>
-#include <linux/compat.h>
#include <asm/ucontext.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/vdso-symbols.h>
+#include <asm/access-regs.h>
#include <asm/lowcore.h>
-#include <asm/switch_to.h>
#include "entry.h"
-typedef struct
+/*
+ * Layout of an old-style signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | struct sigcontext |
+ * | oldmask |
+ * | _sigregs * |
+ * -----------------------------------------
+ * | _sigregs with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * -----------------------------------------
+ * | int signo |
+ * -----------------------------------------
+ * | _sigregs_ext with |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt) |
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * | __u16 svc_insn |
+ * -----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
{
__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
struct sigcontext sc;
_sigregs sregs;
int signo;
- __u8 retcode[S390_SYSCALL_SIZE];
-} sigframe;
+ _sigregs_ext sregs_ext;
+ __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
+};
-typedef struct
+/*
+ * Layout of an rt signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | svc __NR_rt_sigreturn 2 byte |
+ * -----------------------------------------
+ * | struct siginfo |
+ * -----------------------------------------
+ * | struct ucontext_extended with |
+ * | unsigned long uc_flags |
+ * | struct ucontext *uc_link |
+ * | stack_t uc_stack |
+ * | _sigregs uc_mcontext with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * | sigset_t uc_sigmask |
+ * | _sigregs_ext uc_mcontext_ext |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt)|
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
{
__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
- __u8 retcode[S390_SYSCALL_SIZE];
+ __u16 svc_insn;
struct siginfo info;
- struct ucontext uc;
-} rt_sigframe;
+ struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+ save_access_regs(current->thread.acrs);
+ save_user_fpu_regs();
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+ restore_access_regs(current->thread.acrs);
+}
/* Returns non-zero on fault. */
static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
_sigregs user_sregs;
- save_access_regs(current->thread.acrs);
-
/* Copy a 'clean' PSW mask to the user to avoid leaking
information about whether PER is currently on. */
- user_sregs.regs.psw.mask = psw_user_bits |
- (regs->psw.mask & PSW_MASK_USER);
+ user_sregs.regs.psw.mask = PSW_USER_BITS |
+ (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
user_sregs.regs.psw.addr = regs->psw.addr;
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
- sizeof(sregs->regs.acrs));
- /*
- * We have to store the fp registers to current->thread.fp_regs
- * to merge them with the emulated registers.
- */
- save_fp_regs(&current->thread.fp_regs);
- memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
- sizeof(s390_fp_regs));
- return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs));
+ sizeof(user_sregs.regs.acrs));
+ fpregs_store(&user_sregs.fpregs, &current->thread.ufpu);
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+ return -EFAULT;
+ return 0;
}
-/* Returns positive number on error */
static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
- int err;
_sigregs user_sregs;
- /* Alwys make any pending restarted system call return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ /* Always make any pending restarted system call return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
- err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs));
- if (err)
- return err;
- /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
- (user_sregs.regs.psw.mask & PSW_MASK_USER);
+ if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
/* Check for invalid user address space control. */
- if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
- regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
(regs->psw.mask & ~PSW_MASK_ASC);
/* Check for invalid amode */
if (regs->psw.mask & PSW_MASK_EA)
@@ -98,60 +162,102 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
regs->psw.addr = user_sregs.regs.psw.addr;
memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
- sizeof(sregs->regs.acrs));
- restore_access_regs(current->thread.acrs);
+ sizeof(current->thread.acrs));
+
+ fpregs_load(&user_sregs.fpregs, &current->thread.ufpu);
+
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+ return 0;
+}
- memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
- sizeof(s390_fp_regs));
- current->thread.fp_regs.fpc &= FPC_VALID_MASK;
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Save vector registers to signal stack */
+ if (cpu_has_vx()) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = current->thread.ufpu.vxrs[i].low;
+ if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_to_user(&sregs_ext->vxrs_high,
+ current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ }
+ return 0;
+}
- restore_fp_regs(&current->thread.fp_regs);
- clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */
+static int restore_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Restore vector registers from signal stack */
+ if (cpu_has_vx()) {
+ if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
+ &sregs_ext->vxrs_high,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ current->thread.ufpu.vxrs[i].low = vxrs[i];
+ }
return 0;
}
SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
- sigframe __user *frame = (sigframe __user *)regs->gprs[15];
+ struct sigframe __user *frame =
+ (struct sigframe __user *) regs->gprs[15];
sigset_t set;
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe;
set_current_blocked(&set);
+ save_user_fpu_regs();
if (restore_sigregs(regs, &frame->sregs))
goto badframe;
+ if (restore_sigregs_ext(regs, &frame->sregs_ext))
+ goto badframe;
+ load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
- rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15];
+ struct rt_sigframe __user *frame =
+ (struct rt_sigframe __user *)regs->gprs[15];
sigset_t set;
if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
set_current_blocked(&set);
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+ save_user_fpu_regs();
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
- if (restore_altstack(&frame->uc.uc_stack))
+ if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
goto badframe;
+ load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
/*
- * Set up a signal frame.
- */
-
-
-/*
* Determine which stack to use..
*/
static inline void __user *
@@ -175,60 +281,63 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
return (void __user *)((sp - frame_size) & -8ul);
}
-static inline int map_signal(int sig)
-{
- if (current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32)
- return current_thread_info()->exec_domain->signal_invmap[sig];
- else
- return sig;
-}
-
static int setup_frame(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs * regs)
{
- sigframe __user *frame;
-
- frame = get_sigframe(ka, regs, sizeof(sigframe));
+ struct sigframe __user *frame;
+ struct sigcontext sc;
+ unsigned long restorer;
+ size_t frame_size;
+ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+ if (cpu_has_vx())
+ frame_size += sizeof(frame->sregs_ext);
+ frame = get_sigframe(ka, regs, frame_size);
if (frame == (void __user *) -1UL)
- goto give_sigsegv;
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
+
+ /* Create struct sigcontext on the signal stack */
+ memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+ sc.sregs = (_sigregs __user __force *) &frame->sregs;
+ if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+ return -EFAULT;
- if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE))
- goto give_sigsegv;
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+ /* Create _sigregs on the signal stack */
if (save_sigregs(regs, &frame->sregs))
- goto give_sigsegv;
- if (__put_user(&frame->sregs, &frame->sc.sregs))
- goto give_sigsegv;
+ return -EFAULT;
+
+ /* Place signal number on stack to allow backtrace from handler. */
+ if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+ return -EFAULT;
+
+ /* Create _sigregs_ext on the signal stack */
+ if (save_sigregs_ext(regs, &frame->sregs_ext))
+ return -EFAULT;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (unsigned long)
- ka->sa.sa_restorer | PSW_ADDR_AMODE;
- } else {
- regs->gprs[14] = (unsigned long)
- frame->retcode | PSW_ADDR_AMODE;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
- (u16 __user *)(frame->retcode)))
- goto give_sigsegv;
- }
-
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (addr_t __user *) frame))
- goto give_sigsegv;
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = (unsigned long) ka->sa.sa_restorer;
+ else
+ restorer = VDSO_SYMBOL(current, sigreturn);
/* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
regs->gprs[15] = (unsigned long) frame;
/* Force default amode and default user address space control. */
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+ regs->psw.addr = (unsigned long) ka->sa.sa_handler;
- regs->gprs[2] = map_signal(sig);
+ regs->gprs[2] = sig;
regs->gprs[3] = (unsigned long) &frame->sc;
/* We forgot to include these in the sigcontext.
@@ -238,93 +347,83 @@ static int setup_frame(int sig, struct k_sigaction *ka,
/* set extra registers only for synchronous signals */
regs->gprs[4] = regs->int_code & 127;
regs->gprs[5] = regs->int_parm_long;
- regs->gprs[6] = task_thread_info(current)->last_break;
+ regs->gprs[6] = current->thread.last_break;
}
-
- /* Place signal number on stack to allow backtrace from handler. */
- if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
- goto give_sigsegv;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
{
- int err = 0;
- rt_sigframe __user *frame;
-
- frame = get_sigframe(ka, regs, sizeof(rt_sigframe));
-
+ struct rt_sigframe __user *frame;
+ unsigned long uc_flags, restorer;
+ size_t frame_size;
+
+ frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+ uc_flags = 0;
+ if (cpu_has_vx()) {
+ frame_size += sizeof(_sigregs_ext);
+ uc_flags |= UC_VXRS;
+ }
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
- if (copy_siginfo_to_user(&frame->info, info))
- goto give_sigsegv;
+ return -EFAULT;
- /* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(NULL, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
- err |= save_sigregs(regs, &frame->uc.uc_mcontext);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto give_sigsegv;
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (unsigned long)
- ka->sa.sa_restorer | PSW_ADDR_AMODE;
- } else {
- regs->gprs[14] = (unsigned long)
- frame->retcode | PSW_ADDR_AMODE;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
- (u16 __user *)(frame->retcode)))
- goto give_sigsegv;
- }
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ restorer = (unsigned long) ksig->ka.sa.sa_restorer;
+ else
+ restorer = VDSO_SYMBOL(current, rt_sigreturn);
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (addr_t __user *) frame))
- goto give_sigsegv;
+ /* Create siginfo on the signal stack */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create ucontext on the signal stack. */
+ if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+ __put_user(NULL, &frame->uc.uc_link) ||
+ __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+ save_sigregs(regs, &frame->uc.uc_mcontext) ||
+ __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+ save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+ return -EFAULT;
/* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
regs->gprs[15] = (unsigned long) frame;
/* Force default amode and default user address space control. */
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+ regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
- regs->gprs[2] = map_signal(sig);
+ regs->gprs[2] = ksig->sig;
regs->gprs[3] = (unsigned long) &frame->info;
regs->gprs[4] = (unsigned long) &frame->uc;
- regs->gprs[5] = task_thread_info(current)->last_break;
+ regs->gprs[5] = current->thread.last_break;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
-static void handle_signal(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset,
- struct pt_regs *regs)
+static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs)
{
int ret;
/* Set up the stack frame */
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ ret = setup_rt_frame(ksig, oldset, regs);
else
- ret = setup_frame(sig, ka, oldset, regs);
- if (ret)
- return;
- signal_delivered(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLE_STEP));
+ ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs);
+
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
}
/*
@@ -336,11 +435,10 @@ static void handle_signal(unsigned long sig, struct k_sigaction *ka,
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
-void do_signal(struct pt_regs *regs)
+
+void arch_do_signal_or_restart(struct pt_regs *regs)
{
- siginfo_t info;
- int signr;
- struct k_sigaction ka;
+ struct ksignal ksig;
sigset_t *oldset = sigmask_to_save();
/*
@@ -348,14 +446,13 @@ void do_signal(struct pt_regs *regs)
* the debugger may change all our registers, including the system
* call information.
*/
- current_thread_info()->system_call =
- test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0;
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+ current->thread.system_call =
+ test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
- if (signr > 0) {
+ if (get_signal(&ksig)) {
/* Whee! Actually deliver the signal. */
- if (current_thread_info()->system_call) {
- regs->int_code = current_thread_info()->system_call;
+ if (current->thread.system_call) {
+ regs->int_code = current->thread.system_call;
/* Check for system call restarting. */
switch (regs->gprs[2]) {
case -ERESTART_RESTARTBLOCK:
@@ -363,11 +460,11 @@ void do_signal(struct pt_regs *regs)
regs->gprs[2] = -EINTR;
break;
case -ERESTARTSYS:
- if (!(ka.sa.sa_flags & SA_RESTART)) {
+ if (!(ksig.ka.sa.sa_flags & SA_RESTART)) {
regs->gprs[2] = -EINTR;
break;
}
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOINTR:
regs->gprs[2] = regs->orig_gpr2;
regs->psw.addr =
@@ -377,32 +474,33 @@ void do_signal(struct pt_regs *regs)
}
}
/* No longer in a system call */
- clear_thread_flag(TIF_SYSCALL);
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
- if (is_compat_task())
- handle_signal32(signr, &ka, &info, oldset, regs);
- else
- handle_signal(signr, &ka, &info, oldset, regs);
+ rseq_signal_deliver(&ksig, regs);
+ handle_signal(&ksig, oldset, regs);
return;
}
/* No handlers present - check for system call restart */
- clear_thread_flag(TIF_SYSCALL);
- if (current_thread_info()->system_call) {
- regs->int_code = current_thread_info()->system_call;
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ if (current->thread.system_call) {
+ regs->int_code = current->thread.system_call;
switch (regs->gprs[2]) {
case -ERESTART_RESTARTBLOCK:
/* Restart with sys_restart_syscall */
- regs->int_code = __NR_restart_syscall;
- /* fallthrough */
+ regs->gprs[2] = regs->orig_gpr2;
+ current->restart_block.arch_data = regs->psw.addr;
+ regs->psw.addr = VDSO_SYMBOL(current, restart_syscall);
+ if (test_thread_flag(TIF_SINGLE_STEP))
+ clear_thread_flag(TIF_PER_TRAP);
+ break;
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
- /* Restart system call with magic TIF bit. */
regs->gprs[2] = regs->orig_gpr2;
- set_thread_flag(TIF_SYSCALL);
+ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
if (test_thread_flag(TIF_SINGLE_STEP))
- set_thread_flag(TIF_PER_TRAP);
+ clear_thread_flag(TIF_PER_TRAP);
break;
}
}
@@ -412,9 +510,3 @@ void do_signal(struct pt_regs *regs)
*/
restore_saved_sigmask();
}
-
-void do_notify_resume(struct pt_regs *regs)
-{
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
-}
diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c
new file mode 100644
index 000000000000..cc869de6e3a5
--- /dev/null
+++ b/arch/s390/kernel/skey.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/rwonce.h>
+#include <asm/page.h>
+#include <asm/skey.h>
+
+int skey_regions_initialized;
+
+static inline unsigned long load_real_address(unsigned long address)
+{
+ unsigned long real;
+
+ asm volatile(
+ " lra %[real],0(%[address])"
+ : [real] "=d" (real)
+ : [address] "a" (address)
+ : "cc");
+ return real;
+}
+
+/*
+ * Initialize storage keys of registered memory regions with the
+ * default key. This is useful for code which is executed with a
+ * non-default access key.
+ */
+void __skey_regions_initialize(void)
+{
+ unsigned long address, real;
+ struct skey_region *r, *end;
+
+ r = __skey_region_start;
+ end = __skey_region_end;
+ while (r < end) {
+ address = r->start & PAGE_MASK;
+ do {
+ real = load_real_address(address);
+ page_set_storage_key(real, PAGE_DEFAULT_KEY, 1);
+ address += PAGE_SIZE;
+ } while (address < r->end);
+ r++;
+ }
+ /*
+ * Make sure storage keys are initialized before
+ * skey_regions_initialized is changed.
+ */
+ barrier();
+ WRITE_ONCE(skey_regions_initialized, 1);
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 15a016c10563..b7429f30afc1 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1,10 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SMP related functions
*
* Copyright IBM Corp. 1999, 2012
* Author(s): Denis Joseph Barrow,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*
* based on other smp stuff by
* (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
@@ -15,11 +15,12 @@
* operates on physical cpu numbers needs to go into smp.c.
*/
-#define KMSG_COMPONENT "cpu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpu: " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
-#include <linux/module.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/err.h>
@@ -28,29 +29,45 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irqflags.h>
+#include <linux/irq_work.h>
#include <linux/cpu.h>
#include <linux/slab.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
#include <linux/crash_dump.h>
+#include <linux/kprobes.h>
+#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
-#include <asm/switch_to.h>
+#include <asm/machine.h>
+#include <asm/ctlreg.h>
+#include <asm/pfault.h>
+#include <asm/diag.h>
#include <asm/facility.h>
+#include <asm/fpu.h>
#include <asm/ipl.h>
#include <asm/setup.h>
#include <asm/irq.h>
#include <asm/tlbflush.h>
#include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/sclp.h>
-#include <asm/vdso.h>
#include <asm/debug.h>
#include <asm/os_info.h>
#include <asm/sigp.h>
+#include <asm/idle.h>
+#include <asm/nmi.h>
+#include <asm/stacktrace.h>
+#include <asm/topology.h>
+#include <asm/vdso.h>
+#include <asm/maccess.h>
#include "entry.h"
enum {
ec_schedule = 0,
ec_call_function_single,
ec_stop_cpu,
+ ec_mcck_pending,
+ ec_irq_work,
};
enum {
@@ -58,46 +75,46 @@ enum {
CPU_STATE_CONFIGURED,
};
-struct pcpu {
- struct cpu cpu;
- struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
- unsigned long async_stack; /* async stack for the cpu */
- unsigned long panic_stack; /* panic stack for the cpu */
- unsigned long ec_mask; /* bit mask for ec_xxx functions */
- int state; /* physical cpu state */
- int polarization; /* physical polarization */
- u16 address; /* physical cpu address */
-};
+static u8 boot_core_type;
+DEFINE_PER_CPU(struct pcpu, pcpu_devices);
+/*
+ * Pointer to the pcpu area of the boot CPU. This is required when a restart
+ * interrupt is triggered on an offline CPU. For that case accessing percpu
+ * data with the common primitives does not work, since the percpu offset is
+ * stored in a non existent lowcore.
+ */
+static struct pcpu *ipl_pcpu;
+
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
-static u8 boot_cpu_type;
-static u16 boot_cpu_address;
-static struct pcpu pcpu_devices[NR_CPUS];
+#ifdef CONFIG_CRASH_DUMP
+__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+#endif
+
+static unsigned int smp_max_threads __initdata = -1U;
+cpumask_t cpu_setup_mask;
+
+static int __init early_smt(char *s)
+{
+ get_option(&s, &smp_max_threads);
+ return 0;
+}
+early_param("smt", early_smt);
/*
* The smp_cpu_state_mutex must be held when changing the state or polarization
- * member of a pcpu data structure within the pcpu_devices arreay.
+ * member of a pcpu data structure within the pcpu_devices array.
*/
DEFINE_MUTEX(smp_cpu_state_mutex);
/*
* Signal processor helper functions.
*/
-static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
-{
- register unsigned int reg1 asm ("1") = parm;
- int cc;
-
- asm volatile(
- " sigp %1,%2,0(%3)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
- if (status && cc == 1)
- *status = reg1;
- return cc;
-}
-
-static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm)
{
int cc;
@@ -125,7 +142,7 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
static inline int pcpu_stopped(struct pcpu *pcpu)
{
- u32 uninitialized_var(status);
+ u32 status;
if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
0, &status) != SIGP_CC_STATUS_STORED)
@@ -145,159 +162,187 @@ static inline int pcpu_running(struct pcpu *pcpu)
/*
* Find struct pcpu by cpu address.
*/
-static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
{
int cpu;
for_each_cpu(cpu, mask)
- if (pcpu_devices[cpu].address == address)
- return pcpu_devices + cpu;
+ if (per_cpu(pcpu_devices, cpu).address == address)
+ return &per_cpu(pcpu_devices, cpu);
return NULL;
}
static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
{
- int order;
-
- set_bit(ec_bit, &pcpu->ec_mask);
- order = pcpu_running(pcpu) ?
- SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
- pcpu_sigp_retry(pcpu, order, 0);
+ if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+ return;
+ pcpu->ec_clk = get_tod_clock_fast();
+ pcpu_sigp_retry(pcpu, SIGP_EXTERNAL_CALL, 0);
}
-static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
- struct _lowcore *lc;
-
- if (pcpu != &pcpu_devices[0]) {
- pcpu->lowcore = (struct _lowcore *)
- __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
- pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
- pcpu->panic_stack = __get_free_page(GFP_KERNEL);
- if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack)
- goto out;
- }
- lc = pcpu->lowcore;
- memcpy(lc, &S390_lowcore, 512);
+ unsigned long async_stack, nodat_stack, mcck_stack;
+ struct lowcore *lc;
+
+ lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+ nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+ async_stack = stack_alloc();
+ mcck_stack = stack_alloc();
+ if (!lc || !nodat_stack || !async_stack || !mcck_stack)
+ goto out;
+ memcpy(lc, get_lowcore(), 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
- lc->async_stack = pcpu->async_stack + ASYNC_SIZE
- - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->panic_stack = pcpu->panic_stack + PAGE_SIZE
- - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->async_stack = async_stack + STACK_INIT_OFFSET;
+ lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
+ lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
lc->cpu_nr = cpu;
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE) {
- lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
- if (!lc->extended_save_area_addr)
- goto out;
- }
-#else
- if (vdso_alloc_per_cpu(lc))
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->spinlock_index = 0;
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+ lc->preempt_count = PREEMPT_DISABLED;
+ if (nmi_alloc_mcesa(&lc->mcesad))
goto out;
-#endif
+ if (abs_lowcore_map(cpu, lc, true))
+ goto out_mcesa;
lowcore_ptr[cpu] = lc;
- pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
return 0;
+
+out_mcesa:
+ nmi_free_mcesa(&lc->mcesad);
out:
- if (pcpu != &pcpu_devices[0]) {
- free_page(pcpu->panic_stack);
- free_pages(pcpu->async_stack, ASYNC_ORDER);
- free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
- }
+ stack_free(mcck_stack);
+ stack_free(async_stack);
+ free_pages(nodat_stack, THREAD_SIZE_ORDER);
+ free_pages((unsigned long) lc, LC_ORDER);
return -ENOMEM;
}
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void pcpu_free_lowcore(struct pcpu *pcpu)
+static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
{
- pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
- lowcore_ptr[pcpu - pcpu_devices] = NULL;
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE) {
- struct _lowcore *lc = pcpu->lowcore;
+ unsigned long async_stack, nodat_stack, mcck_stack;
+ struct lowcore *lc;
- free_page((unsigned long) lc->extended_save_area_addr);
- lc->extended_save_area_addr = 0;
- }
-#else
- vdso_free_per_cpu(pcpu->lowcore);
-#endif
- if (pcpu != &pcpu_devices[0]) {
- free_page(pcpu->panic_stack);
- free_pages(pcpu->async_stack, ASYNC_ORDER);
- free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
- }
+ lc = lowcore_ptr[cpu];
+ nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
+ async_stack = lc->async_stack - STACK_INIT_OFFSET;
+ mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
+ lowcore_ptr[cpu] = NULL;
+ abs_lowcore_unmap(cpu);
+ nmi_free_mcesa(&lc->mcesad);
+ stack_free(async_stack);
+ stack_free(mcck_stack);
+ free_pages(nodat_stack, THREAD_SIZE_ORDER);
+ free_pages((unsigned long) lc, LC_ORDER);
}
-#endif /* CONFIG_HOTPLUG_CPU */
-
static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
- struct _lowcore *lc = pcpu->lowcore;
+ struct lowcore *lc, *abs_lc;
- atomic_inc(&init_mm.context.attach_count);
+ lc = lowcore_ptr[cpu];
+ cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
+ lc->pcpu = (unsigned long)pcpu;
+ lc->restart_flags = RESTART_FLAG_CTLREGS;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
- lc->kernel_asce = S390_lowcore.kernel_asce;
- lc->machine_flags = S390_lowcore.machine_flags;
- lc->ftrace_func = S390_lowcore.ftrace_func;
- lc->user_timer = lc->system_timer = lc->steal_timer = 0;
- __ctl_store(lc->cregs_save_area, 0, 15);
+ lc->kernel_asce = get_lowcore()->kernel_asce;
+ lc->user_asce = s390_invalid_asce;
+ lc->user_timer = lc->system_timer =
+ lc->steal_timer = lc->avg_steal_timer = 0;
+ abs_lc = get_abs_lowcore();
+ memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
+ put_abs_lowcore(abs_lc);
+ lc->cregs_save_area[1] = lc->user_asce;
+ lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
- memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
- MAX_FACILITY_BIT/8);
+ arch_spin_lock_setup(cpu);
}
-static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+static void pcpu_attach_task(int cpu, struct task_struct *tsk)
{
- struct _lowcore *lc = pcpu->lowcore;
- struct thread_info *ti = task_thread_info(tsk);
-
- lc->kernel_stack = (unsigned long) task_stack_page(tsk)
- + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->thread_info = (unsigned long) task_thread_info(tsk);
- lc->current_task = (unsigned long) tsk;
- lc->user_timer = ti->user_timer;
- lc->system_timer = ti->system_timer;
+ struct lowcore *lc;
+
+ lc = lowcore_ptr[cpu];
+ lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
+ lc->current_task = (unsigned long)tsk;
+ lc->lpp = LPP_MAGIC;
+ lc->current_pid = tsk->pid;
+ lc->user_timer = tsk->thread.user_timer;
+ lc->guest_timer = tsk->thread.guest_timer;
+ lc->system_timer = tsk->thread.system_timer;
+ lc->hardirq_timer = tsk->thread.hardirq_timer;
+ lc->softirq_timer = tsk->thread.softirq_timer;
lc->steal_timer = 0;
+#ifdef CONFIG_STACKPROTECTOR
+ lc->stack_canary = tsk->stack_canary;
+#endif
}
-static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
{
- struct _lowcore *lc = pcpu->lowcore;
+ struct lowcore *lc;
+ lc = lowcore_ptr[cpu];
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
- lc->restart_source = -1UL;
- pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+ lc->restart_source = -1U;
+ pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
}
+typedef void (pcpu_delegate_fn)(void *);
+
/*
* Call function via PSW restart on pcpu and stop the current cpu.
*/
-static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
- void *data, unsigned long stack)
+static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
+{
+ func(data); /* should not return */
+}
+
+static void __noreturn pcpu_delegate(struct pcpu *pcpu, int cpu,
+ pcpu_delegate_fn *func,
+ void *data, unsigned long stack)
{
- struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
- unsigned long source_cpu = stap();
+ struct lowcore *lc, *abs_lc;
+ unsigned int source_cpu;
- __load_psw_mask(psw_kernel_bits);
- if (pcpu->address == source_cpu)
- func(data); /* should not return */
+ lc = lowcore_ptr[cpu];
+ source_cpu = stap();
+
+ if (pcpu->address == source_cpu) {
+ call_on_stack(2, stack, void, __pcpu_delegate,
+ pcpu_delegate_fn *, func, void *, data);
+ }
/* Stop target cpu (if func returns this stops the current cpu). */
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0);
/* Restart func on the target cpu and stop the current cpu. */
- mem_assign_absolute(lc->restart_stack, stack);
- mem_assign_absolute(lc->restart_fn, (unsigned long) func);
- mem_assign_absolute(lc->restart_data, (unsigned long) data);
- mem_assign_absolute(lc->restart_source, source_cpu);
+ if (lc) {
+ lc->restart_stack = stack;
+ lc->restart_fn = (unsigned long)func;
+ lc->restart_data = (unsigned long)data;
+ lc->restart_source = source_cpu;
+ } else {
+ abs_lc = get_abs_lowcore();
+ abs_lc->restart_stack = stack;
+ abs_lc->restart_fn = (unsigned long)func;
+ abs_lc->restart_data = (unsigned long)data;
+ abs_lc->restart_source = source_cpu;
+ put_abs_lowcore(abs_lc);
+ }
asm volatile(
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
" brc 2,0b # busy, try again\n"
"1: sigp 0,%1,%3 # sigp stop to current cpu\n"
- " brc 2,1b # busy, try again\n"
+ " brc 2,1b # busy, try again"
: : "d" (pcpu->address), "d" (source_cpu),
"K" (SIGP_RESTART), "K" (SIGP_STOP)
: "0", "1", "cc");
@@ -305,27 +350,36 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
}
/*
- * Call function on an online CPU.
+ * Enable additional logical cpus for multi-threading.
*/
-void smp_call_online_cpu(void (*func)(void *), void *data)
+static int pcpu_set_smt(unsigned int mtid)
{
- struct pcpu *pcpu;
+ int cc;
- /* Use the current cpu if it is online. */
- pcpu = pcpu_find_address(cpu_online_mask, stap());
- if (!pcpu)
- /* Use the first online cpu. */
- pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
- pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
+ if (smp_cpu_mtid == mtid)
+ return 0;
+ cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
+ if (cc == 0) {
+ smp_cpu_mtid = mtid;
+ smp_cpu_mt_shift = 0;
+ while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+ smp_cpu_mt_shift++;
+ per_cpu(pcpu_devices, 0).address = stap();
+ }
+ return cc;
}
/*
* Call function on the ipl CPU.
*/
-void smp_call_ipl_cpu(void (*func)(void *), void *data)
+void __noreturn smp_call_ipl_cpu(void (*func)(void *), void *data)
{
- pcpu_delegate(&pcpu_devices[0], func, data,
- pcpu_devices->panic_stack + PAGE_SIZE);
+ struct lowcore *lc = lowcore_ptr[0];
+
+ if (ipl_pcpu->address == stap())
+ lc = get_lowcore();
+
+ pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@@ -333,81 +387,94 @@ int smp_find_processor_id(u16 address)
int cpu;
for_each_present_cpu(cpu)
- if (pcpu_devices[cpu].address == address)
+ if (per_cpu(pcpu_devices, cpu).address == address)
return cpu;
return -1;
}
-int smp_vcpu_scheduled(int cpu)
+void schedule_mcck_handler(void)
{
- return pcpu_running(pcpu_devices + cpu);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
}
-void smp_yield(void)
+bool notrace arch_vcpu_is_preempted(int cpu)
{
- if (MACHINE_HAS_DIAG44)
- asm volatile("diag 0,0,0x44");
+ if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
+ return false;
+ if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
+ return false;
+ return true;
}
+EXPORT_SYMBOL(arch_vcpu_is_preempted);
-void smp_yield_cpu(int cpu)
+void notrace smp_yield_cpu(int cpu)
{
- if (MACHINE_HAS_DIAG9C)
- asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
- else if (MACHINE_HAS_DIAG44)
- asm volatile("diag 0,0,0x44");
+ if (!machine_has_diag9c())
+ return;
+ diag_stat_inc_norecursion(DIAG_STAT_X09C);
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (per_cpu(pcpu_devices, cpu).address));
}
+EXPORT_SYMBOL_GPL(smp_yield_cpu);
/*
* Send cpus emergency shutdown signal. This gives the cpus the
* opportunity to complete outstanding interrupts.
*/
-void smp_emergency_stop(cpumask_t *cpumask)
+void notrace smp_emergency_stop(void)
{
+ static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+ static cpumask_t cpumask;
u64 end;
int cpu;
- end = get_tod_clock() + (1000000UL << 12);
- for_each_cpu(cpu, cpumask) {
- struct pcpu *pcpu = pcpu_devices + cpu;
+ arch_spin_lock(&lock);
+ cpumask_copy(&cpumask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &cpumask);
+
+ end = get_tod_clock_monotonic() + (1000000UL << 12);
+ for_each_cpu(cpu, &cpumask) {
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
- get_tod_clock() < end)
+ get_tod_clock_monotonic() < end)
cpu_relax();
}
- while (get_tod_clock() < end) {
- for_each_cpu(cpu, cpumask)
- if (pcpu_stopped(pcpu_devices + cpu))
- cpumask_clear_cpu(cpu, cpumask);
- if (cpumask_empty(cpumask))
+ while (get_tod_clock_monotonic() < end) {
+ for_each_cpu(cpu, &cpumask)
+ if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
+ cpumask_clear_cpu(cpu, &cpumask);
+ if (cpumask_empty(&cpumask))
break;
cpu_relax();
}
+ arch_spin_unlock(&lock);
}
+NOKPROBE_SYMBOL(smp_emergency_stop);
/*
* Stop all cpus but the current one.
*/
void smp_send_stop(void)
{
- cpumask_t cpumask;
+ struct pcpu *pcpu;
int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS);
trace_hardirqs_off();
debug_set_critical();
- cpumask_copy(&cpumask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &cpumask);
if (oops_in_progress)
- smp_emergency_stop(&cpumask);
+ smp_emergency_stop();
/* stop all processors */
- for_each_cpu(cpu, &cpumask) {
- struct pcpu *pcpu = pcpu_devices + cpu;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
while (!pcpu_stopped(pcpu))
cpu_relax();
@@ -415,15 +482,6 @@ void smp_send_stop(void)
}
/*
- * Stop the current cpu.
- */
-void smp_stop_cpu(void)
-{
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
- for (;;) ;
-}
-
-/*
* This is the main routine where commands issued by other
* cpus are handled.
*/
@@ -432,13 +490,17 @@ static void smp_handle_ext_call(void)
unsigned long bits;
/* handle bit signal external calls */
- bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
scheduler_ipi();
if (test_bit(ec_call_function_single, &bits))
generic_smp_call_function_single_interrupt();
+ if (test_bit(ec_mcck_pending, &bits))
+ s390_handle_mcck();
+ if (test_bit(ec_irq_work, &bits))
+ irq_work_run();
}
static void do_ext_call_interrupt(struct ext_code ext_code,
@@ -453,267 +515,349 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
-#ifndef CONFIG_64BIT
-/*
- * this function sends a 'purge tlb' signal to another CPU.
- */
-static void smp_ptlb_callback(void *info)
-{
- __tlb_flush_local();
-}
-
-void smp_ptlb_all(void)
-{
- on_each_cpu(smp_ptlb_callback, NULL, 1);
-}
-EXPORT_SYMBOL(smp_ptlb_all);
-#endif /* ! CONFIG_64BIT */
-
/*
* this function sends a 'reschedule' IPI to another CPU.
* it goes straight through and wastes no time serializing
* anything. Worst case is that we lose a reschedule ...
*/
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
}
-/*
- * parameter area for the set/clear control bit callbacks
- */
-struct ec_creg_mask_parms {
- unsigned long orval;
- unsigned long andval;
- int cr;
-};
-
-/*
- * callback for setting/clearing control bits
- */
-static void smp_ctl_bit_callback(void *info)
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
{
- struct ec_creg_mask_parms *pp = info;
- unsigned long cregs[16];
-
- __ctl_store(cregs, 0, 15);
- cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
- __ctl_load(cregs, 0, 15);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
}
+#endif
-/*
- * Set a bit in a control register of all cpus
- */
-void smp_ctl_set_bit(int cr, int bit)
+#ifdef CONFIG_CRASH_DUMP
+
+int smp_store_status(int cpu)
{
- struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
+ struct lowcore *lc;
+ struct pcpu *pcpu;
+ unsigned long pa;
- on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+ lc = lowcore_ptr[cpu];
+ pa = __pa(&lc->floating_pt_save_area);
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
+ pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ if (!cpu_has_vx() && !cpu_has_gs())
+ return 0;
+ pa = lc->mcesad & MCESA_ORIGIN_MASK;
+ if (cpu_has_gs())
+ pa |= lc->mcesad & MCESA_LC_MASK;
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ return 0;
}
-EXPORT_SYMBOL(smp_ctl_set_bit);
/*
- * Clear a bit in a control register of all cpus
+ * Collect CPU state of the previous, crashed system.
+ * There are three cases:
+ * 1) standard zfcp/nvme dump
+ * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The boot CPU state is located in
+ * the absolute lowcore of the memory stored in the HSA. The zcore code
+ * will copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The firmware or the boot-loader
+ * stored the registers of the boot CPU in the absolute lowcore in the
+ * memory of the old system.
+ * 3) kdump or stand-alone kdump for DASD
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The kexec code or the boot-loader
+ * stored the registers of the boot CPU in the memory of the old system.
+ *
+ * Note that the legacy kdump mode where the old kernel stored the CPU states
+ * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr=
+ * kernel parameter. The is_kdump_kernel() implementation on s390 is independent
+ * of the elfcorehdr= parameter.
*/
-void smp_ctl_clear_bit(int cr, int bit)
+static bool dump_available(void)
{
- struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
-
- on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+ return oldmem_data.start || is_ipl_type_dump();
}
-EXPORT_SYMBOL(smp_ctl_clear_bit);
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
+void __init smp_save_dump_ipl_cpu(void)
+{
+ struct save_area *sa;
+ void *regs;
-struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
-EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
+ if (!dump_available())
+ return;
+ sa = save_area_alloc(true);
+ regs = memblock_alloc_or_panic(512, 8);
+ copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
+ save_area_add_regs(sa, regs);
+ memblock_free(regs, 512);
+ if (cpu_has_vx())
+ save_area_add_vxrs(sa, boot_cpu_vector_save_area);
+}
-static void __init smp_get_save_area(int cpu, u16 address)
+void __init smp_save_dump_secondary_cpus(void)
{
- void *lc = pcpu_devices[0].lowcore;
- struct save_area *save_area;
+ int addr, boot_cpu_addr, max_cpu_addr;
+ struct save_area *sa;
+ void *page;
- if (is_kdump_kernel())
- return;
- if (!OLDMEM_BASE && (address == boot_cpu_address ||
- ipl_info.type != IPL_TYPE_FCP_DUMP))
- return;
- if (cpu >= NR_CPUS) {
- pr_warning("CPU %i exceeds the maximum %i and is excluded "
- "from the dump\n", cpu, NR_CPUS - 1);
- return;
- }
- save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL);
- if (!save_area)
- panic("could not allocate memory for save area\n");
- zfcpdump_save_areas[cpu] = save_area;
-#ifdef CONFIG_CRASH_DUMP
- if (address == boot_cpu_address) {
- /* Copy the registers of the boot cpu. */
- copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
- SAVE_AREA_BASE - PAGE_SIZE, 0);
+ if (!dump_available())
return;
+ /* Allocate a page as dumping area for the store status sigps */
+ page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+ if (!page)
+ panic("ERROR: Failed to allocate %lx bytes below %lx\n",
+ PAGE_SIZE, 1UL << 31);
+
+ /* Set multi-threading state to the previous system. */
+ pcpu_set_smt(sclp.mtid_prev);
+ boot_cpu_addr = stap();
+ max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+ for (addr = 0; addr <= max_cpu_addr; addr++) {
+ if (addr == boot_cpu_addr)
+ continue;
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ sa = save_area_alloc(false);
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+ save_area_add_regs(sa, page);
+ if (cpu_has_vx()) {
+ __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+ save_area_add_vxrs(sa, page);
+ }
}
-#endif
- /* Get the registers of a non-boot cpu. */
- __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
- memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
+ memblock_free(page, PAGE_SIZE);
+ diag_amode31_ops.diag308_reset();
+ pcpu_set_smt(0);
}
+#endif /* CONFIG_CRASH_DUMP */
-int smp_store_status(int cpu)
+void smp_cpu_set_polarization(int cpu, int val)
{
- struct pcpu *pcpu;
-
- pcpu = pcpu_devices + cpu;
- if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
- 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
- return -EIO;
- return 0;
+ per_cpu(pcpu_devices, cpu).polarization = val;
}
-#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
+int smp_cpu_get_polarization(int cpu)
+{
+ return per_cpu(pcpu_devices, cpu).polarization;
+}
-static inline void smp_get_save_area(int cpu, u16 address) { }
+void smp_cpu_set_capacity(int cpu, unsigned long val)
+{
+ per_cpu(pcpu_devices, cpu).capacity = val;
+}
-#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
+unsigned long smp_cpu_get_capacity(int cpu)
+{
+ return per_cpu(pcpu_devices, cpu).capacity;
+}
-void smp_cpu_set_polarization(int cpu, int val)
+void smp_set_core_capacity(int cpu, unsigned long val)
{
- pcpu_devices[cpu].polarization = val;
+ int i;
+
+ cpu = smp_get_base_cpu(cpu);
+ for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++)
+ smp_cpu_set_capacity(i, val);
}
-int smp_cpu_get_polarization(int cpu)
+int smp_cpu_get_cpu_address(int cpu)
{
- return pcpu_devices[cpu].polarization;
+ return per_cpu(pcpu_devices, cpu).address;
}
-static struct sclp_cpu_info *smp_get_cpu_info(void)
+static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
{
static int use_sigp_detection;
- struct sclp_cpu_info *info;
int address;
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+ if (use_sigp_detection || sclp_get_core_info(info, early)) {
use_sigp_detection = 1;
- for (address = 0; address <= MAX_CPU_ADDRESS; address++) {
- if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
+ for (address = 0;
+ address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
+ address += (1U << smp_cpu_mt_shift)) {
+ if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- info->cpu[info->configured].address = address;
+ info->core[info->configured].core_id =
+ address >> smp_cpu_mt_shift;
+ info->core[info->configured].type = boot_core_type;
info->configured++;
}
info->combined = info->configured;
}
- return info;
}
-static int __cpuinit smp_add_present_cpu(int cpu);
-
-static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info,
- int sysfs_add)
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+ bool configured, bool early)
{
struct pcpu *pcpu;
- cpumask_t avail;
int cpu, nr, i;
+ u16 address;
nr = 0;
- cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
- cpu = cpumask_first(&avail);
- for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
- if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
- continue;
- if (pcpu_find_address(cpu_present_mask, info->cpu[i].address))
+ if (sclp.has_core_type && core->type != boot_core_type)
+ return nr;
+ cpu = cpumask_first(avail);
+ address = core->core_id << smp_cpu_mt_shift;
+ for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+ if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- pcpu = pcpu_devices + cpu;
- pcpu->address = info->cpu[i].address;
- pcpu->state = (i >= info->configured) ?
- CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+ pcpu->address = address + i;
+ if (configured)
+ pcpu->state = CPU_STATE_CONFIGURED;
+ else
+ pcpu->state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
set_cpu_present(cpu, true);
- if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+ if (!early && arch_register_cpu(cpu))
set_cpu_present(cpu, false);
else
nr++;
- cpu = cpumask_next(cpu, &avail);
+ cpumask_clear_cpu(cpu, avail);
+ cpu = cpumask_next(cpu, avail);
}
return nr;
}
-static void __init smp_detect_cpus(void)
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
{
- unsigned int cpu, c_cpus, s_cpus;
- struct sclp_cpu_info *info;
+ struct sclp_core_entry *core;
+ static cpumask_t avail;
+ bool configured;
+ u16 core_id;
+ int nr, i;
- info = smp_get_cpu_info();
- if (!info)
- panic("smp_detect_cpus failed to allocate memory\n");
- if (info->has_cpu_type) {
- for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->cpu[cpu].address != boot_cpu_address)
- continue;
- /* The boot cpu dictates the cpu type. */
- boot_cpu_type = info->cpu[cpu].type;
- break;
+ cpus_read_lock();
+ mutex_lock(&smp_cpu_state_mutex);
+ nr = 0;
+ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+ /*
+ * Add IPL core first (which got logical CPU number 0) to make sure
+ * that all SMT threads get subsequent logical CPU numbers.
+ */
+ if (early) {
+ core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
+ for (i = 0; i < info->configured; i++) {
+ core = &info->core[i];
+ if (core->core_id == core_id) {
+ nr += smp_add_core(core, &avail, true, early);
+ break;
+ }
}
}
+ for (i = 0; i < info->combined; i++) {
+ configured = i < info->configured;
+ nr += smp_add_core(&info->core[i], &avail, configured, early);
+ }
+ mutex_unlock(&smp_cpu_state_mutex);
+ cpus_read_unlock();
+ return nr;
+}
+
+void __init smp_detect_cpus(void)
+{
+ unsigned int cpu, mtid, c_cpus, s_cpus;
+ struct sclp_core_info *info;
+ u16 address;
+
+ /* Get CPU information */
+ info = memblock_alloc_or_panic(sizeof(*info), 8);
+ smp_get_core_info(info, 1);
+ /* Find boot CPU type */
+ if (sclp.has_core_type) {
+ address = stap();
+ for (cpu = 0; cpu < info->combined; cpu++)
+ if (info->core[cpu].core_id == address) {
+ /* The boot cpu dictates the cpu type. */
+ boot_core_type = info->core[cpu].type;
+ break;
+ }
+ if (cpu >= info->combined)
+ panic("Could not find boot CPU type");
+ }
+
+ /* Set multi-threading state for the current system */
+ mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
+ mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+ pcpu_set_smt(mtid);
+ cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1);
+
+ /* Print number of CPUs */
c_cpus = s_cpus = 0;
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
+ if (sclp.has_core_type &&
+ info->core[cpu].type != boot_core_type)
continue;
- if (cpu < info->configured) {
- smp_get_save_area(c_cpus, info->cpu[cpu].address);
- c_cpus++;
- } else
- s_cpus++;
+ if (cpu < info->configured)
+ c_cpus += smp_cpu_mtid + 1;
+ else
+ s_cpus += smp_cpu_mtid + 1;
}
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
- get_online_cpus();
- __smp_rescan_cpus(info, 0);
- put_online_cpus();
- kfree(info);
+ memblock_free(info, sizeof(*info));
}
/*
* Activate a secondary processor.
*/
-static void __cpuinit smp_start_secondary(void *cpuvoid)
+static void smp_start_secondary(void *cpuvoid)
{
- S390_lowcore.last_update_clock = get_tod_clock();
- S390_lowcore.restart_stack = (unsigned long) restart_stack;
- S390_lowcore.restart_fn = (unsigned long) do_restart;
- S390_lowcore.restart_data = 0;
- S390_lowcore.restart_source = -1UL;
- restore_access_regs(S390_lowcore.access_regs_save_area);
- __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
- __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
+ struct lowcore *lc = get_lowcore();
+ int cpu = raw_smp_processor_id();
+
+ lc->last_update_clock = get_tod_clock();
+ lc->restart_stack = (unsigned long)restart_stack;
+ lc->restart_fn = (unsigned long)do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1U;
+ lc->restart_flags = 0;
+ restore_access_regs(lc->access_regs_save_area);
cpu_init();
- preempt_disable();
+ rcutree_report_cpu_starting(cpu);
init_cpu_timer();
- init_cpu_vtimer();
+ vtime_init();
+ vdso_getcpu_init();
pfault_init();
- notify_cpu_starting(smp_processor_id());
- set_cpu_online(smp_processor_id(), true);
+ cpumask_set_cpu(cpu, &cpu_setup_mask);
+ update_cpu_masks();
+ notify_cpu_starting(cpu);
+ if (topology_cpu_dedicated(cpu))
+ set_cpu_flag(CIF_DEDICATED_CPU);
+ else
+ clear_cpu_flag(CIF_DEDICATED_CPU);
+ set_cpu_online(cpu, true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Upping and downing of CPUs */
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- struct pcpu *pcpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
int rc;
- pcpu = pcpu_devices + cpu;
if (pcpu->state != CPU_STATE_CONFIGURED)
return -EIO;
if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
@@ -723,44 +867,50 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
rc = pcpu_alloc_lowcore(pcpu, cpu);
if (rc)
return rc;
+ /*
+ * Make sure global control register contents do not change
+ * until new CPU has initialized control registers.
+ */
+ system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
- pcpu_attach_task(pcpu, tidle);
- pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ pcpu_attach_task(cpu, tidle);
+ pcpu_start_fn(cpu, smp_start_secondary, NULL);
+ /* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
+ system_ctlreg_unlock();
return 0;
}
-static int __init setup_possible_cpus(char *s)
-{
- int max, cpu;
+static unsigned int setup_possible_cpus __initdata;
- if (kstrtoint(s, 0, &max) < 0)
- return 0;
- init_cpu_possible(cpumask_of(0));
- for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
- set_cpu_possible(cpu, true);
+static int __init _setup_possible_cpus(char *s)
+{
+ get_option(&s, &setup_possible_cpus);
return 0;
}
-early_param("possible_cpus", setup_possible_cpus);
-
-#ifdef CONFIG_HOTPLUG_CPU
+early_param("possible_cpus", _setup_possible_cpus);
int __cpu_disable(void)
{
- unsigned long cregs[16];
+ struct ctlreg cregs[16];
+ int cpu;
/* Handle possible pending IPIs */
smp_handle_ext_call();
- set_cpu_online(smp_processor_id(), false);
+ cpu = smp_processor_id();
+ set_cpu_online(cpu, false);
+ cpumask_clear_cpu(cpu, &cpu_setup_mask);
+ update_cpu_masks();
/* Disable pseudo page faults on this cpu. */
pfault_fini();
/* Disable interrupt sources via control register. */
- __ctl_store(cregs, 0, 15);
- cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */
- cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
- cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
- __ctl_load(cregs, 0, 15);
+ __local_ctl_store(0, 15, cregs);
+ cregs[0].val &= ~0x0000ee70UL; /* disable all external interrupts */
+ cregs[6].val &= ~0xff000000UL; /* disable all I/O interrupts */
+ cregs[14].val &= ~0x1f000000UL; /* disable most machine checks */
+ __local_ctl_load(0, 15, cregs);
+ clear_cpu_flag(CIF_NOHZ_DELAY);
return 0;
}
@@ -769,58 +919,67 @@ void __cpu_die(unsigned int cpu)
struct pcpu *pcpu;
/* Wait until target cpu is down */
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
- pcpu_free_lowcore(pcpu);
- atomic_dec(&init_mm.context.attach_count);
+ pcpu_free_lowcore(pcpu, cpu);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+ cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ pcpu->flags = 0;
}
void __noreturn cpu_die(void)
{
idle_task_exit();
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
for (;;) ;
}
-#endif /* CONFIG_HOTPLUG_CPU */
+void __init smp_fill_possible_mask(void)
+{
+ unsigned int possible, sclp_max, cpu;
+
+ sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
+ sclp_max = min(smp_max_threads, sclp_max);
+ sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
+ possible = setup_possible_cpus ?: nr_cpu_ids;
+ possible = min(possible, sclp_max);
+ for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+ set_cpu_possible(cpu, true);
+}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- /* request the 0x1201 emergency signal external interrupt */
- if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
+ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
- /* request the 0x1202 external call external interrupt */
- if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
+ system_ctl_set_bit(0, 14);
+ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
- smp_detect_cpus();
+ system_ctl_set_bit(0, 13);
+ smp_rescan_cpus(true);
}
void __init smp_prepare_boot_cpu(void)
{
- struct pcpu *pcpu = pcpu_devices;
-
- boot_cpu_address = stap();
- pcpu->state = CPU_STATE_CONFIGURED;
- pcpu->address = boot_cpu_address;
- pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
- pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
- + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE
- + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- S390_lowcore.percpu_offset = __per_cpu_offset[0];
- smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
- set_cpu_present(0, true);
- set_cpu_online(0, true);
-}
+ struct lowcore *lc = get_lowcore();
-void __init smp_cpus_done(unsigned int max_cpus)
-{
+ WARN_ON(!cpu_present(0) || !cpu_online(0));
+ lc->percpu_offset = __per_cpu_offset[0];
+ ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
+ ipl_pcpu->state = CPU_STATE_CONFIGURED;
+ lc->pcpu = (unsigned long)ipl_pcpu;
+ smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH);
}
void __init smp_setup_processor_id(void)
{
- S390_lowcore.cpu_nr = 0;
+ struct lowcore *lc = get_lowcore();
+
+ lc->cpu_nr = 0;
+ per_cpu(pcpu_devices, 0).address = stap();
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
}
/*
@@ -834,14 +993,13 @@ int setup_profiling_timer(unsigned int multiplier)
return 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
static ssize_t cpu_configure_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -851,41 +1009,53 @@ static ssize_t cpu_configure_store(struct device *dev,
const char *buf, size_t count)
{
struct pcpu *pcpu;
- int cpu, val, rc;
+ int cpu, val, rc, i;
char delim;
if (sscanf(buf, "%d %c", &val, &delim) != 1)
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&smp_cpu_state_mutex);
rc = -EBUSY;
- /* disallow configuration changes of online cpus and cpu 0 */
+ /* disallow configuration changes of online cpus */
cpu = dev->id;
- if (cpu_online(cpu) || cpu == 0)
- goto out;
- pcpu = pcpu_devices + cpu;
+ cpu = smp_get_base_cpu(cpu);
+ for (i = 0; i <= smp_cpu_mtid; i++)
+ if (cpu_online(cpu + i))
+ goto out;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
rc = 0;
switch (val) {
case 0:
if (pcpu->state != CPU_STATE_CONFIGURED)
break;
- rc = sclp_cpu_deconfigure(pcpu->address);
+ rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
- pcpu->state = CPU_STATE_STANDBY;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
topology_expect_change();
break;
case 1:
if (pcpu->state != CPU_STATE_STANDBY)
break;
- rc = sclp_cpu_configure(pcpu->address);
+ rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
- pcpu->state = CPU_STATE_CONFIGURED;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
topology_expect_change();
break;
default:
@@ -893,23 +1063,20 @@ static ssize_t cpu_configure_store(struct device *dev,
}
out:
mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return rc ? rc : count;
}
static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
-#endif /* CONFIG_HOTPLUG_CPU */
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+ return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
static struct attribute *cpu_common_attrs[] = {
-#ifdef CONFIG_HOTPLUG_CPU
&dev_attr_configure.attr,
-#endif
&dev_attr_address.attr,
NULL,
};
@@ -918,42 +1085,6 @@ static struct attribute_group cpu_common_attr_group = {
.attrs = cpu_common_attrs,
};
-static ssize_t show_idle_count(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long long idle_count;
- unsigned int sequence;
-
- do {
- sequence = ACCESS_ONCE(idle->sequence);
- idle_count = ACCESS_ONCE(idle->idle_count);
- if (ACCESS_ONCE(idle->clock_idle_enter))
- idle_count++;
- } while ((sequence & 1) || (idle->sequence != sequence));
- return sprintf(buf, "%llu\n", idle_count);
-}
-static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
-
-static ssize_t show_idle_time(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long long now, idle_time, idle_enter, idle_exit;
- unsigned int sequence;
-
- do {
- now = get_tod_clock();
- sequence = ACCESS_ONCE(idle->sequence);
- idle_time = ACCESS_ONCE(idle->idle_time);
- idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
- idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
- } while ((sequence & 1) || (idle->sequence != sequence));
- idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
- return sprintf(buf, "%llu\n", idle_time >> 12);
-}
-static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-
static struct attribute *cpu_online_attrs[] = {
&dev_attr_idle_count.attr,
&dev_attr_idle_time_us.attr,
@@ -964,76 +1095,61 @@ static struct attribute_group cpu_online_attr_group = {
.attrs = cpu_online_attrs,
};
-static int __cpuinit smp_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int smp_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)(long)hcpu;
- struct cpu *c = &pcpu_devices[cpu].cpu;
- struct device *s = &c->dev;
- int err = 0;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
- break;
- case CPU_DEAD:
- sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
- break;
- }
- return notifier_from_errno(err);
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
+
+ return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
+}
+
+static int smp_cpu_pre_down(unsigned int cpu)
+{
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
+
+ sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
+ return 0;
}
-static int __cpuinit smp_add_present_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
{
- struct cpu *c = &pcpu_devices[cpu].cpu;
- struct device *s = &c->dev;
+ return !!cpu;
+}
+
+int arch_register_cpu(int cpu)
+{
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
int rc;
- c->hotpluggable = 1;
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
rc = register_cpu(c, cpu);
if (rc)
goto out;
- rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+ rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group);
if (rc)
goto out_cpu;
- if (cpu_online(cpu)) {
- rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
- if (rc)
- goto out_online;
- }
rc = topology_cpu_init(c);
if (rc)
goto out_topology;
return 0;
out_topology:
- if (cpu_online(cpu))
- sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
-out_online:
- sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group);
out_cpu:
-#ifdef CONFIG_HOTPLUG_CPU
unregister_cpu(c);
-#endif
out:
return rc;
}
-#ifdef CONFIG_HOTPLUG_CPU
-
-int __ref smp_rescan_cpus(void)
+int __ref smp_rescan_cpus(bool early)
{
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
int nr;
- info = smp_get_cpu_info();
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
- get_online_cpus();
- mutex_lock(&smp_cpu_state_mutex);
- nr = __smp_rescan_cpus(info, 1);
- mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
+ smp_get_core_info(info, 0);
+ nr = __smp_rescan_cpus(info, early);
kfree(info);
if (nr)
topology_schedule_update();
@@ -1047,27 +1163,30 @@ static ssize_t __ref rescan_store(struct device *dev,
{
int rc;
- rc = smp_rescan_cpus();
+ rc = lock_device_hotplug_sysfs();
+ if (rc)
+ return rc;
+ rc = smp_rescan_cpus(false);
+ unlock_device_hotplug();
return rc ? rc : count;
}
-static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
-#endif /* CONFIG_HOTPLUG_CPU */
+static DEVICE_ATTR_WO(rescan);
static int __init s390_smp_init(void)
{
- int cpu, rc;
+ struct device *dev_root;
+ int rc;
- hotcpu_notifier(smp_cpu_notify, 0);
-#ifdef CONFIG_HOTPLUG_CPU
- rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
- if (rc)
- return rc;
-#endif
- for_each_present_cpu(cpu) {
- rc = smp_add_present_cpu(cpu);
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ rc = device_create_file(dev_root, &dev_attr_rescan);
+ put_device(dev_root);
if (rc)
return rc;
}
- return 0;
+ rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
+ smp_cpu_online, smp_cpu_pre_down);
+ rc = rc <= 0 ? rc : 0;
+ return rc;
}
subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/stackprotector.c b/arch/s390/kernel/stackprotector.c
new file mode 100644
index 000000000000..d4e40483f008
--- /dev/null
+++ b/arch/s390/kernel/stackprotector.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) "stackprot: " fmt
+#endif
+
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/printk.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+#include <asm/machine.h>
+#include <asm/asm-offsets.h>
+#include <asm/arch-stackprotector.h>
+
+#ifdef __DECOMPRESSOR
+
+#define DEBUGP boot_debug
+#define EMERGP boot_emerg
+#define PANIC boot_panic
+
+#else /* __DECOMPRESSOR */
+
+#define DEBUGP pr_debug
+#define EMERGP pr_emerg
+#define PANIC panic
+
+#endif /* __DECOMPRESSOR */
+
+int __bootdata_preserved(stack_protector_debug);
+
+unsigned long __stack_chk_guard;
+EXPORT_SYMBOL(__stack_chk_guard);
+
+struct insn_ril {
+ u8 opc1 : 8;
+ u8 r1 : 4;
+ u8 opc2 : 4;
+ u32 imm;
+} __packed;
+
+/*
+ * Convert a virtual instruction address to a real instruction address. The
+ * decompressor needs to patch instructions within the kernel image based on
+ * their virtual addresses, while dynamic address translation is still
+ * disabled. Therefore a translation from virtual kernel image addresses to
+ * the corresponding physical addresses is required.
+ *
+ * After dynamic address translation is enabled and when the kernel needs to
+ * patch instructions such a translation is not required since the addresses
+ * are identical.
+ */
+static struct insn_ril *vaddress_to_insn(unsigned long vaddress)
+{
+#ifdef __DECOMPRESSOR
+ return (struct insn_ril *)__kernel_pa(vaddress);
+#else
+ return (struct insn_ril *)vaddress;
+#endif
+}
+
+static unsigned long insn_to_vaddress(struct insn_ril *insn)
+{
+#ifdef __DECOMPRESSOR
+ return (unsigned long)__kernel_va(insn);
+#else
+ return (unsigned long)insn;
+#endif
+}
+
+#define INSN_RIL_STRING_SIZE (sizeof(struct insn_ril) * 2 + 1)
+
+static void insn_ril_to_string(char *str, struct insn_ril *insn)
+{
+ u8 *ptr = (u8 *)insn;
+ int i;
+
+ for (i = 0; i < sizeof(*insn); i++)
+ hex_byte_pack(&str[2 * i], ptr[i]);
+ str[2 * i] = 0;
+}
+
+static void stack_protector_dump(struct insn_ril *old, struct insn_ril *new)
+{
+ char ostr[INSN_RIL_STRING_SIZE];
+ char nstr[INSN_RIL_STRING_SIZE];
+
+ insn_ril_to_string(ostr, old);
+ insn_ril_to_string(nstr, new);
+ DEBUGP("%016lx: %s -> %s\n", insn_to_vaddress(old), ostr, nstr);
+}
+
+static int stack_protector_verify(struct insn_ril *insn, unsigned long kernel_start)
+{
+ char istr[INSN_RIL_STRING_SIZE];
+ unsigned long vaddress, offset;
+
+ /* larl */
+ if (insn->opc1 == 0xc0 && insn->opc2 == 0x0)
+ return 0;
+ /* lgrl */
+ if (insn->opc1 == 0xc4 && insn->opc2 == 0x8)
+ return 0;
+ insn_ril_to_string(istr, insn);
+ vaddress = insn_to_vaddress(insn);
+ if (__is_defined(__DECOMPRESSOR)) {
+ offset = (unsigned long)insn - kernel_start + TEXT_OFFSET;
+ EMERGP("Unexpected instruction at %016lx/%016lx: %s\n", vaddress, offset, istr);
+ PANIC("Stackprotector error\n");
+ } else {
+ EMERGP("Unexpected instruction at %016lx: %s\n", vaddress, istr);
+ }
+ return -EINVAL;
+}
+
+int __stack_protector_apply(unsigned long *start, unsigned long *end, unsigned long kernel_start)
+{
+ unsigned long canary, *loc;
+ struct insn_ril *insn, new;
+ int rc;
+
+ /*
+ * Convert LARL/LGRL instructions to LLILF so register R1 contains the
+ * address of the per-cpu / per-process stack canary:
+ *
+ * LARL/LGRL R1,__stack_chk_guard => LLILF R1,__lc_stack_canary
+ */
+ canary = __LC_STACK_CANARY;
+ if (machine_has_relocated_lowcore())
+ canary += LOWCORE_ALT_ADDRESS;
+ for (loc = start; loc < end; loc++) {
+ insn = vaddress_to_insn(*loc);
+ rc = stack_protector_verify(insn, kernel_start);
+ if (rc)
+ return rc;
+ new = *insn;
+ new.opc1 = 0xc0;
+ new.opc2 = 0xf;
+ new.imm = canary;
+ if (stack_protector_debug)
+ stack_protector_dump(insn, &new);
+ s390_kernel_write(insn, &new, sizeof(*insn));
+ }
+ return 0;
+}
+
+#ifdef __DECOMPRESSOR
+void __stack_protector_apply_early(unsigned long kernel_start)
+{
+ unsigned long *start, *end;
+
+ start = (unsigned long *)vmlinux.stack_prot_start;
+ end = (unsigned long *)vmlinux.stack_prot_end;
+ __stack_protector_apply(start, end, kernel_start);
+}
+#endif
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 1785cd82253c..3aae7f70e6ab 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -1,96 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Stack trace management functions
*
* Copyright IBM Corp. 2006
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#include <linux/sched.h>
+#include <linux/perf_event.h>
#include <linux/stacktrace.h>
-#include <linux/kallsyms.h>
-#include <linux/module.h>
-
-static unsigned long save_context_stack(struct stack_trace *trace,
- unsigned long sp,
- unsigned long low,
- unsigned long high,
- int savesched)
+#include <linux/uaccess.h>
+#include <asm/asm-offsets.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
+ struct unwind_state state;
unsigned long addr;
- while(1) {
- sp &= PSW_ADDR_INSN;
- if (sp < low || sp > high)
- return sp;
- sf = (struct stack_frame *)sp;
- while(1) {
- addr = sf->gprs[8] & PSW_ADDR_INSN;
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *)sp;
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long)(sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *)sp;
- addr = regs->psw.addr & PSW_ADDR_INSN;
- if (savesched || !in_sched_functions(addr)) {
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- }
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = regs->gprs[15];
+ unwind_for_each_frame(&state, task, regs, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || !consume_entry(cookie, addr))
+ break;
+ }
+}
+
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
+{
+ struct unwind_state state;
+ unsigned long addr;
+
+ unwind_for_each_frame(&state, task, NULL, 0) {
+ if (state.stack_info.type != STACK_TYPE_TASK)
+ return -EINVAL;
+
+ if (state.regs)
+ return -EINVAL;
+
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ return -EINVAL;
+
+#ifdef CONFIG_RETHOOK
+ /*
+ * Mark stacktraces with krethook functions on them
+ * as unreliable.
+ */
+ if (state.ip == (unsigned long)arch_rethook_trampoline)
+ return -EINVAL;
+#endif
+
+ if (!consume_entry(cookie, addr))
+ return -EINVAL;
+ }
+
+ /* Check for stack corruption */
+ if (unwind_error(&state))
+ return -EINVAL;
+ return 0;
+}
+
+static inline bool store_ip(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry, bool perf,
+ unsigned long ip)
+{
+#ifdef CONFIG_PERF_EVENTS
+ if (perf) {
+ if (perf_callchain_store(entry, ip))
+ return false;
+ return true;
}
+#endif
+ return consume_entry(cookie, ip);
+}
+
+static inline bool ip_invalid(unsigned long ip)
+{
+ /*
+ * Perform some basic checks if an instruction address taken
+ * from unreliable source is invalid.
+ */
+ if (ip & 1)
+ return true;
+ if (ip < mmap_min_addr)
+ return true;
+ if (ip >= current->mm->context.asce_limit)
+ return true;
+ return false;
}
-void save_stack_trace(struct stack_trace *trace)
+static inline bool ip_within_vdso(unsigned long ip)
{
- register unsigned long sp asm ("15");
- unsigned long orig_sp, new_sp;
-
- orig_sp = sp & PSW_ADDR_INSN;
- new_sp = save_context_stack(trace, orig_sp,
- S390_lowcore.panic_stack - PAGE_SIZE,
- S390_lowcore.panic_stack, 1);
- if (new_sp != orig_sp)
+ return in_range(ip, current->mm->context.vdso_base, vdso_text_size());
+}
+
+void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry,
+ const struct pt_regs *regs, bool perf)
+{
+ struct stack_frame_vdso_wrapper __user *sf_vdso;
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (!current->mm)
return;
- new_sp = save_context_stack(trace, new_sp,
- S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack, 1);
- if (new_sp != orig_sp)
+ ip = instruction_pointer(regs);
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
return;
- save_context_stack(trace, new_sp,
- S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE, 1);
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (1) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ /*
+ * VDSO entry code has a non-standard stack frame layout.
+ * See VDSO user wrapper code for details.
+ */
+ if (!sp && ip_within_vdso(ip)) {
+ sf_vdso = (void __user *)sf;
+ if (__get_user(ip, &sf_vdso->return_address))
+ break;
+ sp = (unsigned long)sf + STACK_FRAME_VDSO_OVERHEAD;
+ sf = (void __user *)sp;
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ } else {
+ sf = (void __user *)sp;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ }
+ /* Sanity check: ABI requires SP to be 8 byte aligned. */
+ if (sp & 0x7)
+ break;
+ if (ip_invalid(ip)) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (!first)
+ break;
+ ip = regs->gprs[14];
+ if (ip_invalid(ip))
+ break;
+ }
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
+ break;
+ first = false;
+ }
+ pagefault_enable();
}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
{
- unsigned long sp, low, high;
-
- sp = tsk->thread.ksp & PSW_ADDR_INSN;
- low = (unsigned long) task_stack_page(tsk);
- high = (unsigned long) task_pt_regs(tsk);
- save_context_stack(trace, sp, low, high, 0);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
new file mode 100644
index 000000000000..5eae2e25997a
--- /dev/null
+++ b/arch/s390/kernel/sthyi.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * store hypervisor information instruction emulation functions.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/syscalls.h>
+#include <linux/mutex.h>
+#include <asm/asm-offsets.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/facility.h>
+#include <asm/sthyi.h>
+#include <asm/asm.h>
+#include "entry.h"
+
+#define DED_WEIGHT 0xffff
+/*
+ * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
+ * as they are justified with spaces.
+ */
+#define CP 0xc3d7404040404040UL
+#define IFL 0xc9c6d34040404040UL
+
+enum hdr_flags {
+ HDR_NOT_LPAR = 0x10,
+ HDR_STACK_INCM = 0x20,
+ HDR_STSI_UNAV = 0x40,
+ HDR_PERF_UNAV = 0x80,
+};
+
+enum mac_validity {
+ MAC_NAME_VLD = 0x20,
+ MAC_ID_VLD = 0x40,
+ MAC_CNT_VLD = 0x80,
+};
+
+enum par_flag {
+ PAR_MT_EN = 0x80,
+};
+
+enum par_validity {
+ PAR_GRP_VLD = 0x08,
+ PAR_ID_VLD = 0x10,
+ PAR_ABS_VLD = 0x20,
+ PAR_WGHT_VLD = 0x40,
+ PAR_PCNT_VLD = 0x80,
+};
+
+struct hdr_sctn {
+ u8 infhflg1;
+ u8 infhflg2; /* reserved */
+ u8 infhval1; /* reserved */
+ u8 infhval2; /* reserved */
+ u8 reserved[3];
+ u8 infhygct;
+ u16 infhtotl;
+ u16 infhdln;
+ u16 infmoff;
+ u16 infmlen;
+ u16 infpoff;
+ u16 infplen;
+ u16 infhoff1;
+ u16 infhlen1;
+ u16 infgoff1;
+ u16 infglen1;
+ u16 infhoff2;
+ u16 infhlen2;
+ u16 infgoff2;
+ u16 infglen2;
+ u16 infhoff3;
+ u16 infhlen3;
+ u16 infgoff3;
+ u16 infglen3;
+ u8 reserved2[4];
+} __packed;
+
+struct mac_sctn {
+ u8 infmflg1; /* reserved */
+ u8 infmflg2; /* reserved */
+ u8 infmval1;
+ u8 infmval2; /* reserved */
+ u16 infmscps;
+ u16 infmdcps;
+ u16 infmsifl;
+ u16 infmdifl;
+ char infmname[8];
+ char infmtype[4];
+ char infmmanu[16];
+ char infmseq[16];
+ char infmpman[4];
+ u8 reserved[4];
+} __packed;
+
+struct par_sctn {
+ u8 infpflg1;
+ u8 infpflg2; /* reserved */
+ u8 infpval1;
+ u8 infpval2; /* reserved */
+ u16 infppnum;
+ u16 infpscps;
+ u16 infpdcps;
+ u16 infpsifl;
+ u16 infpdifl;
+ u16 reserved;
+ char infppnam[8];
+ u32 infpwbcp;
+ u32 infpabcp;
+ u32 infpwbif;
+ u32 infpabif;
+ char infplgnm[8];
+ u32 infplgcp;
+ u32 infplgif;
+} __packed;
+
+struct sthyi_sctns {
+ struct hdr_sctn hdr;
+ struct mac_sctn mac;
+ struct par_sctn par;
+} __packed;
+
+struct cpu_inf {
+ u64 lpar_cap;
+ u64 lpar_grp_cap;
+ u64 lpar_weight;
+ u64 all_weight;
+ int cpu_num_ded;
+ int cpu_num_shd;
+};
+
+struct lpar_cpu_inf {
+ struct cpu_inf cp;
+ struct cpu_inf ifl;
+};
+
+/*
+ * STHYI requires extensive locking in the higher hypervisors
+ * and is very computational/memory expensive. Therefore we
+ * cache the retrieved data whose valid period is 1s.
+ */
+#define CACHE_VALID_JIFFIES HZ
+
+struct sthyi_info {
+ void *info;
+ unsigned long end;
+};
+
+static DEFINE_MUTEX(sthyi_mutex);
+static struct sthyi_info sthyi_cache;
+
+static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
+{
+ return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
+}
+
+/*
+ * Scales the cpu capping from the lpar range to the one expected in
+ * sthyi data.
+ *
+ * diag204 reports a cap in hundredths of processor units.
+ * z/VM's range for one core is 0 - 0x10000.
+ */
+static u32 scale_cap(u32 in)
+{
+ return (0x10000 * in) / 100;
+}
+
+static void fill_hdr(struct sthyi_sctns *sctns)
+{
+ sctns->hdr.infhdln = sizeof(sctns->hdr);
+ sctns->hdr.infmoff = sizeof(sctns->hdr);
+ sctns->hdr.infmlen = sizeof(sctns->mac);
+ sctns->hdr.infplen = sizeof(sctns->par);
+ sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
+ sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
+}
+
+static void fill_stsi_mac(struct sthyi_sctns *sctns,
+ struct sysinfo_1_1_1 *sysinfo)
+{
+ sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+ if (*(u64 *)sctns->mac.infmname != 0)
+ sctns->mac.infmval1 |= MAC_NAME_VLD;
+
+ if (stsi(sysinfo, 1, 1, 1))
+ return;
+
+ memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
+ memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
+ memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
+ memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
+
+ sctns->mac.infmval1 |= MAC_ID_VLD;
+}
+
+static void fill_stsi_par(struct sthyi_sctns *sctns,
+ struct sysinfo_2_2_2 *sysinfo)
+{
+ if (stsi(sysinfo, 2, 2, 2))
+ return;
+
+ sctns->par.infppnum = sysinfo->lpar_number;
+ memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
+
+ sctns->par.infpval1 |= PAR_ID_VLD;
+}
+
+static void fill_stsi(struct sthyi_sctns *sctns)
+{
+ void *sysinfo;
+
+ /* Errors are handled through the validity bits in the response. */
+ sysinfo = (void *)__get_free_page(GFP_KERNEL);
+ if (!sysinfo)
+ return;
+
+ fill_stsi_mac(sctns, sysinfo);
+ fill_stsi_par(sctns, sysinfo);
+
+ free_pages((unsigned long)sysinfo, 0);
+}
+
+static void fill_diag_mac(struct sthyi_sctns *sctns,
+ struct diag204_x_phys_block *block,
+ void *diag224_buf)
+{
+ int i;
+
+ for (i = 0; i < block->hdr.cpus; i++) {
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdcps++;
+ else
+ sctns->mac.infmscps++;
+ break;
+ case IFL:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdifl++;
+ else
+ sctns->mac.infmsifl++;
+ break;
+ }
+ }
+ sctns->mac.infmval1 |= MAC_CNT_VLD;
+}
+
+/* Returns a pointer to the next partition block. */
+static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
+ bool this_lpar,
+ void *diag224_buf,
+ struct diag204_x_part_block *block)
+{
+ int i, capped = 0, weight_cp = 0, weight_ifl = 0;
+ struct cpu_inf *cpu_inf;
+
+ for (i = 0; i < block->hdr.rcpus; i++) {
+ if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
+ continue;
+
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ cpu_inf = &part_inf->cp;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_cp |= block->cpus[i].cur_weight;
+ break;
+ case IFL:
+ cpu_inf = &part_inf->ifl;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_ifl |= block->cpus[i].cur_weight;
+ break;
+ default:
+ continue;
+ }
+
+ if (!this_lpar)
+ continue;
+
+ capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
+ cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
+ cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
+
+ if (block->cpus[i].weight == DED_WEIGHT)
+ cpu_inf->cpu_num_ded += 1;
+ else
+ cpu_inf->cpu_num_shd += 1;
+ }
+
+ if (this_lpar && capped) {
+ part_inf->cp.lpar_weight = weight_cp;
+ part_inf->ifl.lpar_weight = weight_ifl;
+ }
+ part_inf->cp.all_weight += weight_cp;
+ part_inf->ifl.all_weight += weight_ifl;
+ return (struct diag204_x_part_block *)&block->cpus[i];
+}
+
+static void *diag204_get_data(bool diag204_allow_busy)
+{
+ unsigned long subcode;
+ void *diag204_buf;
+ int pages, rc;
+
+ subcode = DIAG204_SUBC_RSI;
+ subcode |= DIAG204_INFO_EXT;
+ pages = diag204(subcode, 0, NULL);
+ if (pages < 0)
+ return ERR_PTR(pages);
+ if (pages == 0)
+ return ERR_PTR(-ENODATA);
+ diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
+ PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (!diag204_buf)
+ return ERR_PTR(-ENOMEM);
+ subcode = DIAG204_SUBC_STIB7;
+ subcode |= DIAG204_INFO_EXT;
+ if (diag204_has_bif() && diag204_allow_busy)
+ subcode |= DIAG204_BIF_BIT;
+ rc = diag204(subcode, pages, diag204_buf);
+ if (rc < 0) {
+ vfree(diag204_buf);
+ return ERR_PTR(rc);
+ }
+ return diag204_buf;
+}
+
+static bool is_diag204_cached(struct sthyi_sctns *sctns)
+{
+ /*
+ * Check if validity bits are set when diag204 data
+ * is gathered.
+ */
+ if (sctns->par.infpval1)
+ return true;
+ return false;
+}
+
+static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf)
+{
+ int i;
+ bool this_lpar;
+ void *diag224_buf = NULL;
+ struct diag204_x_info_blk_hdr *ti_hdr;
+ struct diag204_x_part_block *part_block;
+ struct diag204_x_phys_block *phys_block;
+ struct lpar_cpu_inf lpar_inf = {};
+
+ diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+ if (!diag224_buf || diag224(diag224_buf))
+ goto out;
+
+ ti_hdr = diag204_buf;
+ part_block = diag204_buf + sizeof(*ti_hdr);
+
+ for (i = 0; i < ti_hdr->npar; i++) {
+ /*
+ * For the calling lpar we also need to get the cpu
+ * caps and weights. The time information block header
+ * specifies the offset to the partition block of the
+ * caller lpar, so we know when we process its data.
+ */
+ this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
+ part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
+ part_block);
+ }
+
+ phys_block = (struct diag204_x_phys_block *)part_block;
+ part_block = diag204_buf + ti_hdr->this_part;
+ if (part_block->hdr.mtid)
+ sctns->par.infpflg1 = PAR_MT_EN;
+
+ sctns->par.infpval1 |= PAR_GRP_VLD;
+ sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
+ sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
+ memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
+ sizeof(sctns->par.infplgnm));
+
+ sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
+ sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
+ sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
+ sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
+ sctns->par.infpval1 |= PAR_PCNT_VLD;
+
+ sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
+ sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
+ sctns->par.infpval1 |= PAR_ABS_VLD;
+
+ /*
+ * Everything below needs global performance data to be
+ * meaningful.
+ */
+ if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
+ sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
+ goto out;
+ }
+
+ fill_diag_mac(sctns, phys_block, diag224_buf);
+
+ if (lpar_inf.cp.lpar_weight) {
+ sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
+ lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
+ }
+
+ if (lpar_inf.ifl.lpar_weight) {
+ sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
+ lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
+ }
+ sctns->par.infpval1 |= PAR_WGHT_VLD;
+
+out:
+ free_page((unsigned long)diag224_buf);
+}
+
+static int sthyi(u64 vaddr, u64 *rc)
+{
+ union register_pair r1 = { .even = 0, }; /* subcode */
+ union register_pair r2 = { .even = vaddr, };
+ int cc;
+
+ asm volatile(
+ ".insn rre,0xB2560000,%[r1],%[r2]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r2] "+&d" (r2.pair)
+ : [r1] "d" (r1.pair)
+ : CC_CLOBBER_LIST("memory"));
+ *rc = r2.odd;
+ return CC_TRANSFORM(cc);
+}
+
+static int fill_dst(void *dst, u64 *rc)
+{
+ void *diag204_buf;
+
+ struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
+
+ /*
+ * If the facility is on, we don't want to emulate the instruction.
+ * We ask the hypervisor to provide the data.
+ */
+ if (test_facility(74)) {
+ memset(dst, 0, PAGE_SIZE);
+ return sthyi((u64)dst, rc);
+ }
+ /*
+ * When emulating, if diag204 returns BUSY don't reset dst buffer
+ * and use cached data.
+ */
+ *rc = 0;
+ diag204_buf = diag204_get_data(is_diag204_cached(sctns));
+ if (IS_ERR(diag204_buf))
+ return PTR_ERR(diag204_buf);
+ memset(dst, 0, PAGE_SIZE);
+ fill_hdr(sctns);
+ fill_stsi(sctns);
+ fill_diag(sctns, diag204_buf);
+ vfree(diag204_buf);
+ return 0;
+}
+
+static int sthyi_init_cache(void)
+{
+ if (sthyi_cache.info)
+ return 0;
+ sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!sthyi_cache.info)
+ return -ENOMEM;
+ sthyi_cache.end = jiffies - 1; /* expired */
+ return 0;
+}
+
+static int sthyi_update_cache(u64 *rc)
+{
+ int r;
+
+ r = fill_dst(sthyi_cache.info, rc);
+ if (r == 0) {
+ sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ } else if (r == -EBUSY) {
+ /* mark as expired and return 0 to keep using cached data */
+ sthyi_cache.end = jiffies - 1;
+ r = 0;
+ }
+ return r;
+}
+
+/*
+ * sthyi_fill - Fill page with data returned by the STHYI instruction
+ *
+ * @dst: Pointer to zeroed page
+ * @rc: Pointer for storing the return code of the instruction
+ *
+ * Fills the destination with system information returned by the STHYI
+ * instruction. The data is generated by emulation or execution of STHYI,
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
+ */
+int sthyi_fill(void *dst, u64 *rc)
+{
+ int r;
+
+ mutex_lock(&sthyi_mutex);
+ r = sthyi_init_cache();
+ if (r)
+ goto out;
+
+ if (time_is_before_jiffies(sthyi_cache.end)) {
+ /* cache expired */
+ r = sthyi_update_cache(rc);
+ if (r)
+ goto out;
+ }
+ *rc = 0;
+ memcpy(dst, sthyi_cache.info, PAGE_SIZE);
+out:
+ mutex_unlock(&sthyi_mutex);
+ return r;
+}
+EXPORT_SYMBOL_GPL(sthyi_fill);
+
+SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer,
+ u64 __user *, return_code, unsigned long, flags)
+{
+ u64 sthyi_rc;
+ void *info;
+ int r;
+
+ if (flags)
+ return -EINVAL;
+ if (function_code != STHYI_FC_CP_IFL_CAP)
+ return -EOPNOTSUPP;
+ info = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ r = sthyi_fill(info, &sthyi_rc);
+ if (r < 0)
+ goto out;
+ if (return_code && put_user(sthyi_rc, return_code)) {
+ r = -EFAULT;
+ goto out;
+ }
+ if (copy_to_user(buffer, info, PAGE_SIZE))
+ r = -EFAULT;
+out:
+ free_page((unsigned long)info);
+ return r;
+}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
deleted file mode 100644
index c479d2f9605b..000000000000
--- a/arch/s390/kernel/suspend.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Suspend support specific for s390.
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- */
-
-#include <linux/pfn.h>
-#include <linux/suspend.h>
-#include <linux/mm.h>
-#include <asm/ctl_reg.h>
-
-/*
- * References to section boundaries
- */
-extern const void __nosave_begin, __nosave_end;
-
-/*
- * The restore of the saved pages in an hibernation image will set
- * the change and referenced bits in the storage key for each page.
- * Overindication of the referenced bits after an hibernation cycle
- * does not cause any harm but the overindication of the change bits
- * would cause trouble.
- * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
- * page to the most significant byte of the associated page frame
- * number in the hibernation image.
- */
-
-/*
- * Key storage is allocated as a linked list of pages.
- * The size of the keys array is (PAGE_SIZE - sizeof(long))
- */
-struct page_key_data {
- struct page_key_data *next;
- unsigned char data[];
-};
-
-#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *))
-
-static struct page_key_data *page_key_data;
-static struct page_key_data *page_key_rp, *page_key_wp;
-static unsigned long page_key_rx, page_key_wx;
-unsigned long suspend_zero_pages;
-
-/*
- * For each page in the hibernation image one additional byte is
- * stored in the most significant byte of the page frame number.
- * On suspend no additional memory is required but on resume the
- * keys need to be memorized until the page data has been restored.
- * Only then can the storage keys be set to their old state.
- */
-unsigned long page_key_additional_pages(unsigned long pages)
-{
- return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
-}
-
-/*
- * Free page_key_data list of arrays.
- */
-void page_key_free(void)
-{
- struct page_key_data *pkd;
-
- while (page_key_data) {
- pkd = page_key_data;
- page_key_data = pkd->next;
- free_page((unsigned long) pkd);
- }
-}
-
-/*
- * Allocate page_key_data list of arrays with enough room to store
- * one byte for each page in the hibernation image.
- */
-int page_key_alloc(unsigned long pages)
-{
- struct page_key_data *pk;
- unsigned long size;
-
- size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
- while (size--) {
- pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
- if (!pk) {
- page_key_free();
- return -ENOMEM;
- }
- pk->next = page_key_data;
- page_key_data = pk;
- }
- page_key_rp = page_key_wp = page_key_data;
- page_key_rx = page_key_wx = 0;
- return 0;
-}
-
-/*
- * Save the storage key into the upper 8 bits of the page frame number.
- */
-void page_key_read(unsigned long *pfn)
-{
- unsigned long addr;
-
- addr = (unsigned long) page_address(pfn_to_page(*pfn));
- *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
-}
-
-/*
- * Extract the storage key from the upper 8 bits of the page frame number
- * and store it in the page_key_data list of arrays.
- */
-void page_key_memorize(unsigned long *pfn)
-{
- page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
- *(unsigned char *) pfn = 0;
- if (++page_key_wx < PAGE_KEY_DATA_SIZE)
- return;
- page_key_wp = page_key_wp->next;
- page_key_wx = 0;
-}
-
-/*
- * Get the next key from the page_key_data list of arrays and set the
- * storage key of the page referred by @address. If @address refers to
- * a "safe" page the swsusp_arch_resume code will transfer the storage
- * key from the buffer page to the original page.
- */
-void page_key_write(void *address)
-{
- page_set_storage_key((unsigned long) address,
- page_key_rp->data[page_key_rx], 0);
- if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
- return;
- page_key_rp = page_key_rp->next;
- page_key_rx = 0;
-}
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
- unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
-
- /* Always save lowcore pages (LC protection might be enabled). */
- if (pfn <= LC_PAGES)
- return 0;
- if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
- return 1;
- /* Skip memory holes and read-only pages (NSS, DCSS, ...). */
- if (tprot(PFN_PHYS(pfn)))
- return 1;
- return 0;
-}
-
-/*
- * PM notifier callback for suspend
- */
-static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
- void *ptr)
-{
- switch (action) {
- case PM_SUSPEND_PREPARE:
- case PM_HIBERNATION_PREPARE:
- suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
- if (!suspend_zero_pages)
- return NOTIFY_BAD;
- break;
- case PM_POST_SUSPEND:
- case PM_POST_HIBERNATION:
- free_pages(suspend_zero_pages, LC_ORDER);
- break;
- default:
- return NOTIFY_DONE;
- }
- return NOTIFY_OK;
-}
-
-static int __init suspend_pm_init(void)
-{
- pm_notifier(suspend_pm_cb, 0);
- return 0;
-}
-arch_initcall(suspend_pm_init);
-
-void save_processor_state(void)
-{
- /* swsusp_arch_suspend() actually saves all cpu register contents.
- * Machine checks must be disabled since swsusp_arch_suspend() stores
- * register contents to their lowcore save areas. That's the same
- * place where register contents on machine checks would be saved.
- * To avoid register corruption disable machine checks.
- * We must also disable machine checks in the new psw mask for
- * program checks, since swsusp_arch_suspend() may generate program
- * checks. Disabling machine checks for all other new psw masks is
- * just paranoia.
- */
- local_mcck_disable();
- /* Disable lowcore protection */
- __ctl_clear_bit(0,28);
- S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
-}
-
-void restore_processor_state(void)
-{
- S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
- /* Enable lowcore protection */
- __ctl_set_bit(0,28);
- local_mcck_enable();
-}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
deleted file mode 100644
index c487be4cfc81..000000000000
--- a/arch/s390/kernel/swsusp_asm64.S
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * S390 64-bit swsusp implementation
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- * Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/sigp.h>
-
-/*
- * Save register context in absolute 0 lowcore and call swsusp_save() to
- * create in-memory kernel image. The context is saved in the designated
- * "store status" memory locations (see POP).
- * We return from this function twice. The first time during the suspend to
- * disk process. The second time via the swsusp_arch_resume() function
- * (see below) in the resume process.
- * This function runs with disabled interrupts.
- */
- .section .text
-ENTRY(swsusp_arch_suspend)
- stmg %r6,%r15,__SF_GPRS(%r15)
- lgr %r1,%r15
- aghi %r15,-STACK_FRAME_OVERHEAD
- stg %r1,__SF_BACKCHAIN(%r15)
-
- /* Deactivate DAT */
- stnsm __SF_EMPTY(%r15),0xfb
-
- /* Store prefix register on stack */
- stpx __SF_EMPTY(%r15)
-
- /* Save prefix register contents for lowcore copy */
- llgf %r10,__SF_EMPTY(%r15)
-
- /* Get pointer to save area */
- lghi %r1,0x1000
-
- /* Save CPU address */
- stap __LC_EXT_CPU_ADDR(%r0)
-
- /* Store registers */
- mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
- stfpc 0x31c(%r1) /* store fpu control */
- std 0,0x200(%r1) /* store f0 */
- std 1,0x208(%r1) /* store f1 */
- std 2,0x210(%r1) /* store f2 */
- std 3,0x218(%r1) /* store f3 */
- std 4,0x220(%r1) /* store f4 */
- std 5,0x228(%r1) /* store f5 */
- std 6,0x230(%r1) /* store f6 */
- std 7,0x238(%r1) /* store f7 */
- std 8,0x240(%r1) /* store f8 */
- std 9,0x248(%r1) /* store f9 */
- std 10,0x250(%r1) /* store f10 */
- std 11,0x258(%r1) /* store f11 */
- std 12,0x260(%r1) /* store f12 */
- std 13,0x268(%r1) /* store f13 */
- std 14,0x270(%r1) /* store f14 */
- std 15,0x278(%r1) /* store f15 */
- stam %a0,%a15,0x340(%r1) /* store access registers */
- stctg %c0,%c15,0x380(%r1) /* store control registers */
- stmg %r0,%r15,0x280(%r1) /* store general registers */
-
- stpt 0x328(%r1) /* store timer */
- stck __SF_EMPTY(%r15) /* store clock */
- stckc 0x330(%r1) /* store clock comparator */
-
- /* Update cputime accounting before going to sleep */
- lg %r0,__LC_LAST_UPDATE_TIMER
- slg %r0,0x328(%r1)
- alg %r0,__LC_SYSTEM_TIMER
- stg %r0,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1)
- lg %r0,__LC_LAST_UPDATE_CLOCK
- slg %r0,__SF_EMPTY(%r15)
- alg %r0,__LC_STEAL_TIMER
- stg %r0,__LC_STEAL_TIMER
- mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
-
- /* Activate DAT */
- stosm __SF_EMPTY(%r15),0x04
-
- /* Set prefix page to zero */
- xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
- spx __SF_EMPTY(%r15)
-
- /* Save absolute zero pages */
- larl %r2,suspend_zero_pages
- lg %r2,0(%r2)
- lghi %r4,0
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Copy lowcore to absolute zero lowcore */
- lghi %r2,0
- lgr %r4,%r10
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Save image */
- brasl %r14,swsusp_save
-
- /* Restore prefix register and return */
- lghi %r1,0x1000
- spx 0x318(%r1)
- lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
- lghi %r2,0
- br %r14
-
-/*
- * Restore saved memory image to correct place and restore register context.
- * Then we return to the function that called swsusp_arch_suspend().
- * swsusp_arch_resume() runs with disabled interrupts.
- */
-ENTRY(swsusp_arch_resume)
- stmg %r6,%r15,__SF_GPRS(%r15)
- lgr %r1,%r15
- aghi %r15,-STACK_FRAME_OVERHEAD
- stg %r1,__SF_BACKCHAIN(%r15)
-
- /* Make all free pages stable */
- lghi %r2,1
- brasl %r14,arch_set_page_states
-
- /* Deactivate DAT */
- stnsm __SF_EMPTY(%r15),0xfb
-
- /* Set prefix page to zero */
- xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
- spx __SF_EMPTY(%r15)
-
- /* Restore saved image */
- larl %r1,restore_pblist
- lg %r1,0(%r1)
- ltgr %r1,%r1
- jz 2f
-0:
- lg %r2,8(%r1)
- lg %r4,0(%r1)
- iske %r0,%r4
- lghi %r3,PAGE_SIZE
- lghi %r5,PAGE_SIZE
-1:
- mvcle %r2,%r4,0
- jo 1b
- lg %r2,8(%r1)
- sske %r0,%r2
- lg %r1,16(%r1)
- ltgr %r1,%r1
- jnz 0b
-2:
- ptlb /* flush tlb */
-
- /* Reset System */
- larl %r1,restart_entry
- larl %r2,.Lrestart_diag308_psw
- og %r1,0(%r2)
- stg %r1,0(%r0)
- larl %r1,.Lnew_pgm_check_psw
- epsw %r2,%r3
- stm %r2,%r3,0(%r1)
- mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
- lghi %r0,0
- diag %r0,%r0,0x308
-restart_entry:
- lhi %r1,1
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- sam64
- larl %r1,.Lnew_pgm_check_psw
- lpswe 0(%r1)
-pgm_check_entry:
-
- /* Switch to original suspend CPU */
- larl %r1,.Lresume_cpu /* Resume CPU address: r2 */
- stap 0(%r1)
- llgh %r2,0(%r1)
- llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */
- cgr %r1,%r2
- je restore_registers /* r1 = r2 -> nothing to do */
- larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */
- mvc __LC_RST_NEW_PSW(16,%r0),0(%r4)
-3:
- sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */
- brc 8,4f /* accepted */
- brc 2,3b /* busy, try again */
-
- /* Suspend CPU not available -> panic */
- larl %r15,init_thread_union
- ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER)
- larl %r2,.Lpanic_string
- larl %r3,_sclp_print_early
- lghi %r1,0
- sam31
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- basr %r14,%r3
- larl %r3,.Ldisabled_wait_31
- lpsw 0(%r3)
-4:
- /* Switch to suspend CPU */
- sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */
- brc 2,4b /* busy, try again */
-5:
- sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */
- brc 2,5b /* busy, try again */
-6: j 6b
-
-restart_suspend:
- larl %r1,.Lresume_cpu
- llgh %r2,0(%r1)
-7:
- sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */
- brc 8,7b /* accepted, status 0, still running */
- brc 2,7b /* busy, try again */
- tmll %r9,0x40 /* Test if resume CPU is stopped */
- jz 7b
-
-restore_registers:
- /* Restore registers */
- lghi %r13,0x1000 /* %r1 = pointer to save area */
-
- /* Ignore time spent in suspended state. */
- llgf %r1,0x318(%r13)
- stck __LC_LAST_UPDATE_CLOCK(%r1)
- spt 0x328(%r13) /* reprogram timer */
- //sckc 0x330(%r13) /* set clock comparator */
-
- lctlg %c0,%c15,0x380(%r13) /* load control registers */
- lam %a0,%a15,0x340(%r13) /* load access registers */
-
- lfpc 0x31c(%r13) /* load fpu control */
- ld 0,0x200(%r13) /* load f0 */
- ld 1,0x208(%r13) /* load f1 */
- ld 2,0x210(%r13) /* load f2 */
- ld 3,0x218(%r13) /* load f3 */
- ld 4,0x220(%r13) /* load f4 */
- ld 5,0x228(%r13) /* load f5 */
- ld 6,0x230(%r13) /* load f6 */
- ld 7,0x238(%r13) /* load f7 */
- ld 8,0x240(%r13) /* load f8 */
- ld 9,0x248(%r13) /* load f9 */
- ld 10,0x250(%r13) /* load f10 */
- ld 11,0x258(%r13) /* load f11 */
- ld 12,0x260(%r13) /* load f12 */
- ld 13,0x268(%r13) /* load f13 */
- ld 14,0x270(%r13) /* load f14 */
- ld 15,0x278(%r13) /* load f15 */
-
- /* Load old stack */
- lg %r15,0x2f8(%r13)
-
- /* Save prefix register */
- mvc __SF_EMPTY(4,%r15),0x318(%r13)
-
- /* Restore absolute zero pages */
- lghi %r2,0
- larl %r4,suspend_zero_pages
- lg %r4,0(%r4)
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Restore prefix register */
- spx __SF_EMPTY(%r15)
-
- /* Activate DAT */
- stosm __SF_EMPTY(%r15),0x04
-
- /* Make all free pages unstable */
- lghi %r2,0
- brasl %r14,arch_set_page_states
-
- /* Log potential guest relocation */
- brasl %r14,lgr_info_log
-
- /* Reinitialize the channel subsystem */
- brasl %r14,channel_subsystem_reinit
-
- /* Return 0 */
- lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
- lghi %r2,0
- br %r14
-
- .section .data..nosave,"aw",@progbits
- .align 8
-.Ldisabled_wait_31:
- .long 0x000a0000,0x00000000
-.Lpanic_string:
- .asciz "Resume not possible because suspend CPU is no longer available"
- .align 8
-.Lrestart_diag308_psw:
- .long 0x00080000,0x80000000
-.Lrestart_suspend_psw:
- .quad 0x0000000180000000,restart_suspend
-.Lnew_pgm_check_psw:
- .quad 0,pgm_check_entry
-.Lresume_cpu:
- .byte 0,0
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
deleted file mode 100644
index 23eb222c1658..000000000000
--- a/arch/s390/kernel/sys_s390.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * S390 version
- * Copyright IBM Corp. 1999, 2000
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Thomas Spatzier (tspat@de.ibm.com)
- *
- * Derived from "arch/i386/kernel/sys_i386.c"
- *
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/s390
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/personality.h>
-#include <linux/unistd.h>
-#include <linux/ipc.h>
-#include <asm/uaccess.h>
-#include "entry.h"
-
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
- struct s390_mmap_arg_struct a;
- int error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls.
- */
-SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
- unsigned long, third, void __user *, ptr)
-{
- if (call >> 16)
- return -EINVAL;
- /* The s390 sys_ipc variant has only five parameters instead of six
- * like the generic variant. The only difference is the handling of
- * the SEMTIMEDOP subcall where on s390 the third parameter is used
- * as a pointer to a struct timespec where the generic variant uses
- * the fifth parameter.
- * Therefore we can call the generic variant by simply passing the
- * third parameter also as fifth parameter.
- */
- return sys_ipc(call, first, second, third, ptr, third);
-}
-
-#ifdef CONFIG_64BIT
-SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
-{
- unsigned int ret;
-
- if (personality(current->personality) == PER_LINUX32 &&
- personality(personality) == PER_LINUX)
- personality |= PER_LINUX32;
- ret = sys_personality(personality);
- if (personality(ret) == PER_LINUX32)
- ret &= ~PER_LINUX32;
-
- return ret;
-}
-#endif /* CONFIG_64BIT */
-
-/*
- * Wrapper function for sys_fadvise64/fadvise64_64
- */
-#ifndef CONFIG_64BIT
-
-SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low,
- size_t, len, int, advice)
-{
- return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low,
- len, advice);
-}
-
-struct fadvise64_64_args {
- int fd;
- long long offset;
- long long len;
- int advice;
-};
-
-SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
-{
- struct fadvise64_64_args a;
-
- if ( copy_from_user(&a, args, sizeof(a)) )
- return -EFAULT;
- return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
-}
-
-/*
- * This is a wrapper to call sys_fallocate(). For 31 bit s390 the last
- * 64 bit argument "len" is split into the upper and lower 32 bits. The
- * system call wrapper in the user space loads the value to %r6/%r7.
- * The code in entry.S keeps the values in %r2 - %r6 where they are and
- * stores %r7 to 96(%r15). But the standard C linkage requires that
- * the whole 64 bit value for len is stored on the stack and doesn't
- * use %r6 at all. So s390_fallocate has to convert the arguments from
- * %r2: fd, %r3: mode, %r4/%r5: offset, %r6/96(%r15)-99(%r15): len
- * to
- * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len
- */
-SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset,
- u32, len_high, u32, len_low)
-{
- return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low);
-}
-#endif
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
new file mode 100644
index 000000000000..795b6cca74c9
--- /dev/null
+++ b/arch/s390/kernel/syscall.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Thomas Spatzier (tspat@de.ibm.com)
+ *
+ * Derived from "arch/i386/kernel/sys_i386.c"
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/s390
+ * platform.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/personality.h>
+#include <linux/unistd.h>
+#include <linux/ipc.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/thread_info.h>
+#include <linux/entry-common.h>
+
+#include <asm/ptrace.h>
+#include <asm/vtime.h>
+
+#include "entry.h"
+
+#define __SYSCALL(nr, sym) long __s390x_##sym(struct pt_regs *);
+#include <asm/syscall_table.h>
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) [nr] = (__s390x_##sym),
+const sys_call_ptr_t sys_call_table[__NR_syscalls] = {
+#include <asm/syscall_table.h>
+};
+#undef __SYSCALL
+
+#ifdef CONFIG_SYSVIPC
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
+ */
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+ unsigned long, third, void __user *, ptr)
+{
+ if (call >> 16)
+ return -EINVAL;
+ /* The s390 sys_ipc variant has only five parameters instead of six
+ * like the generic variant. The only difference is the handling of
+ * the SEMTIMEDOP subcall where on s390 the third parameter is used
+ * as a pointer to a struct timespec where the generic variant uses
+ * the fifth parameter.
+ * Therefore we can call the generic variant by simply passing the
+ * third parameter also as fifth parameter.
+ */
+ return ksys_ipc(call, first, second, third, ptr, third);
+}
+#endif /* CONFIG_SYSVIPC */
+
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
+{
+ unsigned int ret = current->personality;
+
+ if (personality(current->personality) == PER_LINUX32 &&
+ personality(personality) == PER_LINUX)
+ personality |= PER_LINUX32;
+
+ if (personality != 0xffffffff)
+ set_personality(personality);
+
+ if (personality(ret) == PER_LINUX32)
+ ret &= ~PER_LINUX32;
+
+ return ret;
+}
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+ return -ENOSYS;
+}
+
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
+{
+ unsigned long nr;
+
+ add_random_kstack_offset();
+ enter_from_user_mode(regs);
+ regs->psw = get_lowcore()->svc_old_psw;
+ regs->int_code = get_lowcore()->svc_int_code;
+ update_timer_sys();
+ if (cpu_has_bear())
+ current->thread.last_break = regs->last_break;
+ local_irq_enable();
+ regs->orig_gpr2 = regs->gprs[2];
+ if (unlikely(per_trap))
+ set_thread_flag(TIF_PER_TRAP);
+ regs->flags = 0;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
+ nr = regs->int_code & 0xffff;
+ if (likely(!nr)) {
+ nr = regs->gprs[1] & 0xffff;
+ regs->int_code &= ~0xffffUL;
+ regs->int_code |= nr;
+ }
+ regs->gprs[2] = nr;
+ if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
+ regs->psw.addr = current->restart_block.arch_data;
+ current->restart_block.arch_data = 1;
+ }
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ /*
+ * In the s390 ptrace ABI, both the syscall number and the return value
+ * use gpr2. However, userspace puts the syscall number either in the
+ * svc instruction itself, or uses gpr1. To make at least skipping syscalls
+ * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
+ * and if set, the syscall will be skipped.
+ */
+ if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
+ goto out;
+ regs->gprs[2] = -ENOSYS;
+ if (likely(nr < NR_syscalls))
+ regs->gprs[2] = sys_call_table[nr](regs);
+out:
+ syscall_exit_to_user_mode(regs);
+}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
deleted file mode 100644
index 913410bd74a3..000000000000
--- a/arch/s390/kernel/syscalls.S
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * definitions for sys_call_table, each line represents an
- * entry in the table in the form
- * SYSCALL(31 bit syscall, 64 bit syscall, 31 bit emulated syscall)
- *
- * this file is meant to be included from entry.S and entry64.S
- */
-
-#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall)
-
-NI_SYSCALL /* 0 */
-SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper)
-SYSCALL(sys_fork,sys_fork,sys_fork)
-SYSCALL(sys_read,sys_read,sys32_read_wrapper)
-SYSCALL(sys_write,sys_write,sys32_write_wrapper)
-SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */
-SYSCALL(sys_close,sys_close,sys32_close_wrapper)
-SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall)
-SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper)
-SYSCALL(sys_link,sys_link,sys32_link_wrapper)
-SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */
-SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper)
-SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper)
-SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */
-SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper)
-SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */
-SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/
-NI_SYSCALL /* old break syscall holder */
-NI_SYSCALL /* old stat syscall holder */
-SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek)
-SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */
-SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper)
-SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper)
-SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/
-SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/
-SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */
-SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper)
-SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper)
-NI_SYSCALL /* old fstat syscall */
-SYSCALL(sys_pause,sys_pause,sys_pause)
-SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */
-NI_SYSCALL /* old stty syscall */
-NI_SYSCALL /* old gtty syscall */
-SYSCALL(sys_access,sys_access,sys32_access_wrapper)
-SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper)
-NI_SYSCALL /* 35 old ftime syscall */
-SYSCALL(sys_sync,sys_sync,sys_sync)
-SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper)
-SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper)
-SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper)
-SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */
-SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper)
-SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper)
-SYSCALL(sys_times,sys_times,compat_sys_times_wrapper)
-NI_SYSCALL /* old prof syscall */
-SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */
-SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/
-SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/
-SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper)
-SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */
-SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */
-SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper)
-SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper)
-NI_SYSCALL /* old lock syscall */
-SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper)
-SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */
-NI_SYSCALL /* intel mpx syscall */
-SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper)
-NI_SYSCALL /* old ulimit syscall */
-NI_SYSCALL /* old uname syscall */
-SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */
-SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper)
-SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper)
-SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper)
-SYSCALL(sys_getppid,sys_getppid,sys_getppid)
-SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */
-SYSCALL(sys_setsid,sys_setsid,sys_setsid)
-SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction)
-NI_SYSCALL /* old sgetmask syscall*/
-NI_SYSCALL /* old ssetmask syscall*/
-SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */
-SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */
-SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper)
-SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper)
-SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper)
-SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */
-SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper)
-SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage)
-SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper)
-SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper)
-SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */
-SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */
-NI_SYSCALL /* old select syscall */
-SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper)
-NI_SYSCALL /* old lstat syscall */
-SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */
-SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper)
-SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper)
-SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper)
-SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */
-SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper) /* 90 */
-SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper)
-SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate)
-SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate)
-SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper)
-SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/
-SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper)
-SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper)
-NI_SYSCALL /* old profil syscall */
-SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper)
-SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */
-NI_SYSCALL /* ioperm for i386 */
-SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper)
-SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper)
-SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer)
-SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */
-SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper)
-SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper)
-SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper)
-NI_SYSCALL /* old uname syscall */
-SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */
-SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup)
-NI_SYSCALL /* old "idle" system call */
-NI_SYSCALL /* vm86old for i386 */
-SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4)
-SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */
-SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper)
-SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc)
-SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
-SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn)
-SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */
-SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper)
-SYSCALL(sys_newuname,sys_newuname,sys32_newuname_wrapper)
-NI_SYSCALL /* modify_ldt for i386 */
-SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper)
-SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */
-SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask)
-NI_SYSCALL /* old "create module" */
-SYSCALL(sys_init_module,sys_init_module,sys_init_module_wrapper)
-SYSCALL(sys_delete_module,sys_delete_module,sys_delete_module_wrapper)
-NI_SYSCALL /* 130: old get_kernel_syms */
-SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper)
-SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper)
-SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper)
-SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper)
-SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */
-SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper)
-NI_SYSCALL /* for afs_syscall */
-SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */
-SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */
-SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */
-SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper)
-SYSCALL(sys_select,sys_select,compat_sys_select_wrapper)
-SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper)
-SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper)
-SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */
-SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper)
-SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper)
-SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper)
-SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl)
-SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */
-SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper)
-SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper)
-SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall)
-SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper)
-SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */
-SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper)
-SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper)
-SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield)
-SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper)
-SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */
-SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
-SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper)
-SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper)
-SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */
-SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */
-NI_SYSCALL /* for vm86 */
-NI_SYSCALL /* old sys_query_module */
-SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper)
-NI_SYSCALL /* old nfsservctl */
-SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */
-SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */
-SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper)
-SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn)
-SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction)
-SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */
-SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending)
-SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait)
-SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo)
-SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend)
-SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */
-SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper)
-SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */
-SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper)
-SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper)
-SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */
-SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack)
-SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile)
-NI_SYSCALL /* streams1 */
-NI_SYSCALL /* streams2 */
-SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */
-SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper)
-SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper)
-SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper)
-SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper)
-SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */
-SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper)
-SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper)
-SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper)
-SYSCALL(sys_getuid,sys_getuid,sys_getuid)
-SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */
-SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid)
-SYSCALL(sys_getegid,sys_getegid,sys_getegid)
-SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper)
-SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper)
-SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */
-SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper)
-SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper)
-SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper)
-SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper)
-SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */
-SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper)
-SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper)
-SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper)
-SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper)
-SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */
-SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper)
-SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper)
-SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper)
-SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper)
-SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */
-SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper)
-SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper)
-SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64)
-SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper)
-SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */
-SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper)
-SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper)
-SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper)
-SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper)
-SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */
-SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper)
-SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper)
-SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper)
-SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper)
-SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */
-SYSCALL(sys_gettid,sys_gettid,sys_gettid)
-SYSCALL(sys_tkill,sys_tkill,sys_tkill_wrapper)
-SYSCALL(sys_futex,sys_futex,compat_sys_futex)
-SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper)
-SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */
-SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill_wrapper)
-NI_SYSCALL /* reserved for TUX */
-SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper)
-SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper)
-SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */
-SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper)
-SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper)
-SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper)
-SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper)
-SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */
-SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper)
-SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper)
-SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper)
-SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper)
-SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */
-SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper)
-SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper)
-SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper)
-SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper)
-SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */
-SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
-SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
-NI_SYSCALL /* reserved for vserver */
-SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
-SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
-SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
-SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper)
-NI_SYSCALL /* 268 sys_mbind */
-NI_SYSCALL /* 269 sys_get_mempolicy */
-NI_SYSCALL /* 270 sys_set_mempolicy */
-SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper)
-SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper)
-SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper)
-SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper)
-SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */
-SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper)
-SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper)
-SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper)
-SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper)
-SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl_wrapper) /* 280 */
-SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid)
-SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper)
-SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper)
-SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init)
-SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */
-SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper)
-NI_SYSCALL /* 287 sys_migrate_pages */
-SYSCALL(sys_openat,sys_openat,compat_sys_openat)
-SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper)
-SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */
-SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper)
-SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper)
-SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper)
-SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper)
-SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */
-SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper)
-SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper)
-SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper)
-SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper)
-SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */
-SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper)
-SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper)
-SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper)
-SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list)
-SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list)
-SYSCALL(sys_splice,sys_splice,sys_splice_wrapper)
-SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper)
-SYSCALL(sys_tee,sys_tee,sys_tee_wrapper)
-SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL /* 310 sys_move_pages */
-SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper)
-SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait)
-SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper)
-SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper)
-SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */
-SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd)
-NI_SYSCALL /* 317 old sys_timer_fd */
-SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper)
-SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper)
-SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
-SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime)
-SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4)
-SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper)
-SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper)
-SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */
-SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper)
-SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
-SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv)
-SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev)
-SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
-SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper)
-SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper)
-SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark)
-SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper)
-SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */
-SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at)
-SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
-SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
-SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)
-SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */
-SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper)
-SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper)
-SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper)
-SYSCALL(sys_finit_module,sys_finit_module,sys_finit_module_wrapper)
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
new file mode 100644
index 000000000000..d5fca0ca0890
--- /dev/null
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+
+$(shell mkdir -p $(uapi) $(kapi))
+
+syscall := $(src)/syscall.tbl
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis common,$* $< $@
+
+quiet_cmd_systbl = SYSTBL $@
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis common,$* $< $@
+
+$(uapi)/unistd_%.h: $(syscall) $(syshdr) FORCE
+ $(call if_changed,syshdr)
+
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,systbl)
+
+uapisyshdr-y += unistd_64.h
+kapisyshdr-y += syscall_table.h
+
+uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
+
+PHONY += all
+all: $(uapisyshdr-y) $(kapisyshdr-y)
+ @:
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
new file mode 100644
index 000000000000..417ed16b3c63
--- /dev/null
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -0,0 +1,399 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+# <nr> <abi> <syscall> <entry>
+#
+# <abi> is always common.
+
+1 common exit sys_exit
+2 common fork sys_fork
+3 common read sys_read
+4 common write sys_write
+5 common open sys_open
+6 common close sys_close
+7 common restart_syscall sys_restart_syscall
+8 common creat sys_creat
+9 common link sys_link
+10 common unlink sys_unlink
+11 common execve sys_execve
+12 common chdir sys_chdir
+14 common mknod sys_mknod
+15 common chmod sys_chmod
+19 common lseek sys_lseek
+20 common getpid sys_getpid
+21 common mount sys_mount
+22 common umount sys_oldumount
+26 common ptrace sys_ptrace
+27 common alarm sys_alarm
+29 common pause sys_pause
+30 common utime sys_utime
+33 common access sys_access
+34 common nice sys_nice
+36 common sync sys_sync
+37 common kill sys_kill
+38 common rename sys_rename
+39 common mkdir sys_mkdir
+40 common rmdir sys_rmdir
+41 common dup sys_dup
+42 common pipe sys_pipe
+43 common times sys_times
+45 common brk sys_brk
+48 common signal sys_signal
+51 common acct sys_acct
+52 common umount2 sys_umount
+54 common ioctl sys_ioctl
+55 common fcntl sys_fcntl
+57 common setpgid sys_setpgid
+60 common umask sys_umask
+61 common chroot sys_chroot
+62 common ustat sys_ustat
+63 common dup2 sys_dup2
+64 common getppid sys_getppid
+65 common getpgrp sys_getpgrp
+66 common setsid sys_setsid
+67 common sigaction sys_sigaction
+72 common sigsuspend sys_sigsuspend
+73 common sigpending sys_sigpending
+74 common sethostname sys_sethostname
+75 common setrlimit sys_setrlimit
+77 common getrusage sys_getrusage
+78 common gettimeofday sys_gettimeofday
+79 common settimeofday sys_settimeofday
+83 common symlink sys_symlink
+85 common readlink sys_readlink
+86 common uselib sys_uselib
+87 common swapon sys_swapon
+88 common reboot sys_reboot
+89 common readdir sys_ni_syscall
+90 common mmap sys_old_mmap
+91 common munmap sys_munmap
+92 common truncate sys_truncate
+93 common ftruncate sys_ftruncate
+94 common fchmod sys_fchmod
+96 common getpriority sys_getpriority
+97 common setpriority sys_setpriority
+99 common statfs sys_statfs
+100 common fstatfs sys_fstatfs
+102 common socketcall sys_socketcall
+103 common syslog sys_syslog
+104 common setitimer sys_setitimer
+105 common getitimer sys_getitimer
+106 common stat sys_newstat
+107 common lstat sys_newlstat
+108 common fstat sys_newfstat
+110 common lookup_dcookie sys_ni_syscall
+111 common vhangup sys_vhangup
+112 common idle sys_ni_syscall
+114 common wait4 sys_wait4
+115 common swapoff sys_swapoff
+116 common sysinfo sys_sysinfo
+117 common ipc sys_s390_ipc
+118 common fsync sys_fsync
+119 common sigreturn sys_sigreturn
+120 common clone sys_clone
+121 common setdomainname sys_setdomainname
+122 common uname sys_newuname
+124 common adjtimex sys_adjtimex
+125 common mprotect sys_mprotect
+126 common sigprocmask sys_sigprocmask
+127 common create_module sys_ni_syscall
+128 common init_module sys_init_module
+129 common delete_module sys_delete_module
+130 common get_kernel_syms sys_ni_syscall
+131 common quotactl sys_quotactl
+132 common getpgid sys_getpgid
+133 common fchdir sys_fchdir
+134 common bdflush sys_ni_syscall
+135 common sysfs sys_sysfs
+136 common personality sys_s390_personality
+137 common afs_syscall sys_ni_syscall
+141 common getdents sys_getdents
+142 common select sys_select
+143 common flock sys_flock
+144 common msync sys_msync
+145 common readv sys_readv
+146 common writev sys_writev
+147 common getsid sys_getsid
+148 common fdatasync sys_fdatasync
+149 common _sysctl sys_ni_syscall
+150 common mlock sys_mlock
+151 common munlock sys_munlock
+152 common mlockall sys_mlockall
+153 common munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam
+155 common sched_getparam sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min
+161 common sched_rr_get_interval sys_sched_rr_get_interval
+162 common nanosleep sys_nanosleep
+163 common mremap sys_mremap
+167 common query_module sys_ni_syscall
+168 common poll sys_poll
+169 common nfsservctl sys_ni_syscall
+172 common prctl sys_prctl
+173 common rt_sigreturn sys_rt_sigreturn
+174 common rt_sigaction sys_rt_sigaction
+175 common rt_sigprocmask sys_rt_sigprocmask
+176 common rt_sigpending sys_rt_sigpending
+177 common rt_sigtimedwait sys_rt_sigtimedwait
+178 common rt_sigqueueinfo sys_rt_sigqueueinfo
+179 common rt_sigsuspend sys_rt_sigsuspend
+180 common pread64 sys_pread64
+181 common pwrite64 sys_pwrite64
+183 common getcwd sys_getcwd
+184 common capget sys_capget
+185 common capset sys_capset
+186 common sigaltstack sys_sigaltstack
+187 common sendfile sys_sendfile64
+188 common getpmsg sys_ni_syscall
+189 common putpmsg sys_ni_syscall
+190 common vfork sys_vfork
+191 common getrlimit sys_getrlimit
+198 common lchown sys_lchown
+199 common getuid sys_getuid
+200 common getgid sys_getgid
+201 common geteuid sys_geteuid
+202 common getegid sys_getegid
+203 common setreuid sys_setreuid
+204 common setregid sys_setregid
+205 common getgroups sys_getgroups
+206 common setgroups sys_setgroups
+207 common fchown sys_fchown
+208 common setresuid sys_setresuid
+209 common getresuid sys_getresuid
+210 common setresgid sys_setresgid
+211 common getresgid sys_getresgid
+212 common chown sys_chown
+213 common setuid sys_setuid
+214 common setgid sys_setgid
+215 common setfsuid sys_setfsuid
+216 common setfsgid sys_setfsgid
+217 common pivot_root sys_pivot_root
+218 common mincore sys_mincore
+219 common madvise sys_madvise
+220 common getdents64 sys_getdents64
+222 common readahead sys_readahead
+224 common setxattr sys_setxattr
+225 common lsetxattr sys_lsetxattr
+226 common fsetxattr sys_fsetxattr
+227 common getxattr sys_getxattr
+228 common lgetxattr sys_lgetxattr
+229 common fgetxattr sys_fgetxattr
+230 common listxattr sys_listxattr
+231 common llistxattr sys_llistxattr
+232 common flistxattr sys_flistxattr
+233 common removexattr sys_removexattr
+234 common lremovexattr sys_lremovexattr
+235 common fremovexattr sys_fremovexattr
+236 common gettid sys_gettid
+237 common tkill sys_tkill
+238 common futex sys_futex
+239 common sched_setaffinity sys_sched_setaffinity
+240 common sched_getaffinity sys_sched_getaffinity
+241 common tgkill sys_tgkill
+243 common io_setup sys_io_setup
+244 common io_destroy sys_io_destroy
+245 common io_getevents sys_io_getevents
+246 common io_submit sys_io_submit
+247 common io_cancel sys_io_cancel
+248 common exit_group sys_exit_group
+249 common epoll_create sys_epoll_create
+250 common epoll_ctl sys_epoll_ctl
+251 common epoll_wait sys_epoll_wait
+252 common set_tid_address sys_set_tid_address
+253 common fadvise64 sys_fadvise64_64
+254 common timer_create sys_timer_create
+255 common timer_settime sys_timer_settime
+256 common timer_gettime sys_timer_gettime
+257 common timer_getoverrun sys_timer_getoverrun
+258 common timer_delete sys_timer_delete
+259 common clock_settime sys_clock_settime
+260 common clock_gettime sys_clock_gettime
+261 common clock_getres sys_clock_getres
+262 common clock_nanosleep sys_clock_nanosleep
+265 common statfs64 sys_statfs64
+266 common fstatfs64 sys_fstatfs64
+267 common remap_file_pages sys_remap_file_pages
+268 common mbind sys_mbind
+269 common get_mempolicy sys_get_mempolicy
+270 common set_mempolicy sys_set_mempolicy
+271 common mq_open sys_mq_open
+272 common mq_unlink sys_mq_unlink
+273 common mq_timedsend sys_mq_timedsend
+274 common mq_timedreceive sys_mq_timedreceive
+275 common mq_notify sys_mq_notify
+276 common mq_getsetattr sys_mq_getsetattr
+277 common kexec_load sys_kexec_load
+278 common add_key sys_add_key
+279 common request_key sys_request_key
+280 common keyctl sys_keyctl
+281 common waitid sys_waitid
+282 common ioprio_set sys_ioprio_set
+283 common ioprio_get sys_ioprio_get
+284 common inotify_init sys_inotify_init
+285 common inotify_add_watch sys_inotify_add_watch
+286 common inotify_rm_watch sys_inotify_rm_watch
+287 common migrate_pages sys_migrate_pages
+288 common openat sys_openat
+289 common mkdirat sys_mkdirat
+290 common mknodat sys_mknodat
+291 common fchownat sys_fchownat
+292 common futimesat sys_futimesat
+293 common newfstatat sys_newfstatat
+294 common unlinkat sys_unlinkat
+295 common renameat sys_renameat
+296 common linkat sys_linkat
+297 common symlinkat sys_symlinkat
+298 common readlinkat sys_readlinkat
+299 common fchmodat sys_fchmodat
+300 common faccessat sys_faccessat
+301 common pselect6 sys_pselect6
+302 common ppoll sys_ppoll
+303 common unshare sys_unshare
+304 common set_robust_list sys_set_robust_list
+305 common get_robust_list sys_get_robust_list
+306 common splice sys_splice
+307 common sync_file_range sys_sync_file_range
+308 common tee sys_tee
+309 common vmsplice sys_vmsplice
+310 common move_pages sys_move_pages
+311 common getcpu sys_getcpu
+312 common epoll_pwait sys_epoll_pwait
+313 common utimes sys_utimes
+314 common fallocate sys_fallocate
+315 common utimensat sys_utimensat
+316 common signalfd sys_signalfd
+317 common timerfd sys_ni_syscall
+318 common eventfd sys_eventfd
+319 common timerfd_create sys_timerfd_create
+320 common timerfd_settime sys_timerfd_settime
+321 common timerfd_gettime sys_timerfd_gettime
+322 common signalfd4 sys_signalfd4
+323 common eventfd2 sys_eventfd2
+324 common inotify_init1 sys_inotify_init1
+325 common pipe2 sys_pipe2
+326 common dup3 sys_dup3
+327 common epoll_create1 sys_epoll_create1
+328 common preadv sys_preadv
+329 common pwritev sys_pwritev
+330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+331 common perf_event_open sys_perf_event_open
+332 common fanotify_init sys_fanotify_init
+333 common fanotify_mark sys_fanotify_mark
+334 common prlimit64 sys_prlimit64
+335 common name_to_handle_at sys_name_to_handle_at
+336 common open_by_handle_at sys_open_by_handle_at
+337 common clock_adjtime sys_clock_adjtime
+338 common syncfs sys_syncfs
+339 common setns sys_setns
+340 common process_vm_readv sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev
+342 common s390_runtime_instr sys_s390_runtime_instr
+343 common kcmp sys_kcmp
+344 common finit_module sys_finit_module
+345 common sched_setattr sys_sched_setattr
+346 common sched_getattr sys_sched_getattr
+347 common renameat2 sys_renameat2
+348 common seccomp sys_seccomp
+349 common getrandom sys_getrandom
+350 common memfd_create sys_memfd_create
+351 common bpf sys_bpf
+352 common s390_pci_mmio_write sys_s390_pci_mmio_write
+353 common s390_pci_mmio_read sys_s390_pci_mmio_read
+354 common execveat sys_execveat
+355 common userfaultfd sys_userfaultfd
+356 common membarrier sys_membarrier
+357 common recvmmsg sys_recvmmsg
+358 common sendmmsg sys_sendmmsg
+359 common socket sys_socket
+360 common socketpair sys_socketpair
+361 common bind sys_bind
+362 common connect sys_connect
+363 common listen sys_listen
+364 common accept4 sys_accept4
+365 common getsockopt sys_getsockopt
+366 common setsockopt sys_setsockopt
+367 common getsockname sys_getsockname
+368 common getpeername sys_getpeername
+369 common sendto sys_sendto
+370 common sendmsg sys_sendmsg
+371 common recvfrom sys_recvfrom
+372 common recvmsg sys_recvmsg
+373 common shutdown sys_shutdown
+374 common mlock2 sys_mlock2
+375 common copy_file_range sys_copy_file_range
+376 common preadv2 sys_preadv2
+377 common pwritev2 sys_pwritev2
+378 common s390_guarded_storage sys_s390_guarded_storage
+379 common statx sys_statx
+380 common s390_sthyi sys_s390_sthyi
+381 common kexec_file_load sys_kexec_file_load
+382 common io_pgetevents sys_io_pgetevents
+383 common rseq sys_rseq
+384 common pkey_mprotect sys_pkey_mprotect
+385 common pkey_alloc sys_pkey_alloc
+386 common pkey_free sys_pkey_free
+# room for arch specific syscalls
+392 common semtimedop sys_semtimedop
+393 common semget sys_semget
+394 common semctl sys_semctl
+395 common shmget sys_shmget
+396 common shmctl sys_shmctl
+397 common shmat sys_shmat
+398 common shmdt sys_shmdt
+399 common msgget sys_msgget
+400 common msgsnd sys_msgsnd
+401 common msgrcv sys_msgrcv
+402 common msgctl sys_msgctl
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
+436 common close_range sys_close_range
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
+441 common epoll_pwait2 sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
+443 common quotactl_fd sys_quotactl_fd
+444 common landlock_create_ruleset sys_landlock_create_ruleset
+445 common landlock_add_rule sys_landlock_add_rule
+446 common landlock_restrict_self sys_landlock_restrict_self
+447 common memfd_secret sys_memfd_secret
+448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
+453 common map_shadow_stack sys_map_shadow_stack
+454 common futex_wake sys_futex_wake
+455 common futex_wait sys_futex_wait
+456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
+459 common lsm_get_self_attr sys_lsm_get_self_attr
+460 common lsm_set_self_attr sys_lsm_set_self_attr
+461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
+463 common setxattrat sys_setxattrat
+464 common getxattrat sys_getxattrat
+465 common listxattrat sys_listxattrat
+466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
+470 common listns sys_listns
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 62f89d98e880..33ca3e47a0e6 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -1,62 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2001, 2009
* Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
*/
+#include <linux/cpufeature.h>
+#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/slab.h>
+#include <asm/asm-extable.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
+#include <asm/debug.h>
#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/topology.h>
-
-/* Sigh, math-emu. Don't ask. */
-#include <asm/sfp-util.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
+#include <asm/fpu.h>
+#include <asm/asm.h>
int topology_max_mnest;
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
+#ifdef CONFIG_PROC_FS
+
+static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
{
- register int r0 asm("0") = (fc << 28) | sel1;
- register int r1 asm("1") = sel2;
- int rc = 0;
-
- asm volatile(
- " stsi 0(%3)\n"
- "0: jz 2f\n"
- "1: lhi %1,%4\n"
- "2:\n"
- EX_TABLE(0b, 1b)
- : "+d" (r0), "+d" (rc)
- : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
- : "cc", "memory");
- if (rc)
- return rc;
- return fc ? 0 : ((unsigned int) r0) >> 28;
+ switch (encoding) {
+ case 1: /* EBCDIC */
+ EBCASC(name, len);
+ break;
+ case 2: /* UTF-8 */
+ break;
+ default:
+ return false;
+ }
+ return true;
}
-EXPORT_SYMBOL(stsi);
static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
{
+ bool has_var_cap;
int i;
if (stsi(info, 1, 1, 1))
return;
+ has_var_cap = !!info->model_var_cap[0];
EBCASC(info->manufacturer, sizeof(info->manufacturer));
EBCASC(info->type, sizeof(info->type));
EBCASC(info->model, sizeof(info->model));
@@ -65,8 +59,12 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
EBCASC(info->model_capacity, sizeof(info->model_capacity));
EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+ if (has_var_cap)
+ EBCASC(info->model_var_cap, sizeof(info->model_var_cap));
seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
seq_printf(m, "Type: %-4.4s\n", info->type);
+ if (info->lic)
+ seq_printf(m, "LIC Identifier: %016lx\n", info->lic);
/*
* Sigh: the model field has been renamed with System z9
* to model_capacity and a new model field has been added
@@ -90,12 +88,18 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
info->model_temp_cap,
info->model_temp_cap_rating);
+ if (has_var_cap && info->model_var_cap_rating)
+ seq_printf(m, "Model Var. Capacity: %-16.16s %08u\n",
+ info->model_var_cap,
+ info->model_var_cap_rating);
if (info->ncr)
seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr);
if (info->npr)
seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
if (info->ntr)
seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+ if (has_var_cap && info->nvr)
+ seq_printf(m, "Nominal Var. Rating: %08u\n", info->nvr);
if (info->cai) {
seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai);
seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr);
@@ -111,11 +115,10 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
{
- static int max_mnest;
- int i, rc;
+ int i;
seq_putc(m, '\n');
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
if (stsi(info, 15, 1, topology_max_mnest))
return;
@@ -123,7 +126,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
seq_printf(m, " %d", info->mag[i]);
seq_putc(m, '\n');
-#ifdef CONFIG_SCHED_MC
+#ifdef CONFIG_SCHED_TOPOLOGY
store_topology(info);
seq_printf(m, "CPU Topology SW: ");
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
@@ -145,6 +148,10 @@ static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured);
seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby);
seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved);
+ if (info->mt_installed) {
+ seq_printf(m, "CPUs G-MTID: %d\n", info->mt_gtid);
+ seq_printf(m, "CPUs S-MTID: %d\n", info->mt_stid);
+ }
/*
* Sigh 2. According to the specification the alternate
* capability field is a 32 bit floating point number
@@ -194,6 +201,33 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved);
seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated);
seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared);
+ if (info->mt_installed) {
+ seq_printf(m, "LPAR CPUs G-MTID: %d\n", info->mt_gtid);
+ seq_printf(m, "LPAR CPUs S-MTID: %d\n", info->mt_stid);
+ seq_printf(m, "LPAR CPUs PS-MTID: %d\n", info->mt_psmtid);
+ }
+ if (convert_ext_name(info->vsne, info->ext_name, sizeof(info->ext_name))) {
+ seq_printf(m, "LPAR Extended Name: %-.256s\n", info->ext_name);
+ seq_printf(m, "LPAR UUID: %pUb\n", &info->uuid);
+ }
+}
+
+static void print_ext_name(struct seq_file *m, int lvl,
+ struct sysinfo_3_2_2 *info)
+{
+ size_t len = sizeof(info->ext_names[lvl]);
+
+ if (!convert_ext_name(info->vm[lvl].evmne, info->ext_names[lvl], len))
+ return;
+ seq_printf(m, "VM%02d Extended Name: %-.256s\n", lvl,
+ info->ext_names[lvl]);
+}
+
+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
+{
+ if (uuid_is_null(&info->vm[i].uuid))
+ return;
+ seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid);
}
static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
@@ -213,6 +247,8 @@ static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby);
seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved);
+ print_ext_name(m, i, info);
+ print_uuid(m, i, info);
}
}
@@ -238,25 +274,15 @@ static int sysinfo_show(struct seq_file *m, void *v)
return 0;
}
-static int sysinfo_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sysinfo_show, NULL);
-}
-
-static const struct file_operations sysinfo_fops = {
- .open = sysinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init sysinfo_create_proc(void)
{
- proc_create("sysinfo", 0444, NULL, &sysinfo_fops);
+ proc_create_single("sysinfo", 0444, NULL, sysinfo_show);
return 0;
}
device_initcall(sysinfo_create_proc);
+#endif /* CONFIG_PROC_FS */
+
/*
* Service levels interface.
*/
@@ -330,24 +356,12 @@ static const struct seq_operations service_level_seq_ops = {
.show = service_level_show
};
-static int service_level_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &service_level_seq_ops);
-}
-
-static const struct file_operations service_level_ops = {
- .open = service_level_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
static void service_level_vm_print(struct seq_file *m,
struct service_level *slr)
{
char *query_buffer, *str;
- query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+ query_buffer = kmalloc(1024, GFP_KERNEL);
if (!query_buffer)
return;
cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
@@ -364,8 +378,8 @@ static struct service_level service_level_vm = {
static __init int create_proc_service_level(void)
{
- proc_create("service_levels", 0, NULL, &service_level_ops);
- if (MACHINE_IS_VM)
+ proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
+ if (machine_is_vm())
register_service_level(&service_level_vm);
return 0;
}
@@ -376,11 +390,9 @@ subsys_initcall(create_proc_service_level);
*/
void s390_adjust_jiffies(void)
{
+ DECLARE_KERNEL_FPU_ONSTACK16(fpu);
struct sysinfo_1_2_2 *info;
- const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */
- FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
- FP_DECL_EX;
- unsigned int capability;
+ unsigned long capability;
info = (void *) get_zeroed_page(GFP_KERNEL);
if (!info)
@@ -396,15 +408,18 @@ void s390_adjust_jiffies(void)
* higher cpu capacity. Bogomips are the other way round.
* To get to a halfway suitable number we divide 1e7
* by the cpu capability number. Yes, that means a floating
- * point division .. math-emu here we come :-)
+ * point division ..
*/
- FP_UNPACK_SP(SA, &fmil);
- if ((info->capability >> 23) == 0)
- FP_FROM_INT_S(SB, (long) info->capability, 64, long);
+ kernel_fpu_begin(&fpu, KERNEL_FPR);
+ fpu_sfpc(0);
+ if (info->capability & 0xff800000)
+ fpu_ldgr(2, info->capability);
else
- FP_UNPACK_SP(SB, &info->capability);
- FP_DIV_S(SR, SA, SB);
- FP_TO_INT_S(capability, SR, 32, 0);
+ fpu_cefbr(2, info->capability);
+ fpu_cefbr(0, 10000000);
+ fpu_debr(0, 2);
+ capability = fpu_cgebr(0, 5);
+ kernel_fpu_end(&fpu, KERNEL_FPR);
} else
/*
* Really old machine without stsi block for basic
@@ -418,7 +433,7 @@ void s390_adjust_jiffies(void)
/*
* calibrate the delay loop
*/
-void __cpuinit calibrate_delay(void)
+void calibrate_delay(void)
{
s390_adjust_jiffies();
/* Print the good old Bogomips line .. */
@@ -426,3 +441,96 @@ void __cpuinit calibrate_delay(void)
"%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define STSI_FILE(fc, s1, s2) \
+static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\
+{ \
+ file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \
+ if (!file->private_data) \
+ return -ENOMEM; \
+ if (stsi(file->private_data, fc, s1, s2)) { \
+ free_page((unsigned long)file->private_data); \
+ file->private_data = NULL; \
+ return -EACCES; \
+ } \
+ return nonseekable_open(inode, file); \
+} \
+ \
+static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
+ .open = stsi_open_##fc##_##s1##_##s2, \
+ .release = stsi_release, \
+ .read = stsi_read, \
+};
+
+static int stsi_release(struct inode *inode, struct file *file)
+{
+ free_page((unsigned long)file->private_data);
+ return 0;
+}
+
+static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+{
+ return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE);
+}
+
+STSI_FILE( 1, 1, 1);
+STSI_FILE( 1, 2, 1);
+STSI_FILE( 1, 2, 2);
+STSI_FILE( 2, 2, 1);
+STSI_FILE( 2, 2, 2);
+STSI_FILE( 3, 2, 2);
+STSI_FILE(15, 1, 2);
+STSI_FILE(15, 1, 3);
+STSI_FILE(15, 1, 4);
+STSI_FILE(15, 1, 5);
+STSI_FILE(15, 1, 6);
+
+struct stsi_file {
+ const struct file_operations *fops;
+ char *name;
+};
+
+static struct stsi_file stsi_file[] __initdata = {
+ {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"},
+ {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"},
+ {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"},
+ {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"},
+ {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"},
+ {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"},
+ {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"},
+ {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"},
+ {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"},
+ {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"},
+ {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"},
+};
+
+static u8 stsi_0_0_0;
+
+static __init int stsi_init_debugfs(void)
+{
+ struct dentry *stsi_root;
+ struct stsi_file *sf;
+ int lvl, i;
+
+ stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir);
+ lvl = stsi(NULL, 0, 0, 0);
+ if (lvl > 0)
+ stsi_0_0_0 = lvl;
+ debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0);
+ for (i = 0; i < ARRAY_SIZE(stsi_file); i++) {
+ sf = &stsi_file[i];
+ debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
+ }
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) {
+ char link_to[10];
+
+ snprintf(link_to, sizeof(link_to), "15_1_%d", topology_mnest_limit());
+ debugfs_create_symlink("topology", stsi_root, link_to);
+ }
+ return 0;
+}
+device_initcall(stsi_init_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
new file mode 100644
index 000000000000..26f2981aa09e
--- /dev/null
+++ b/arch/s390/kernel/text_amode31.S
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code that needs to run below 2 GB.
+ *
+ * Copyright IBM Corp. 2019
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-extable.h>
+#include <asm/errno.h>
+#include <asm/sigp.h>
+
+ .section .amode31.text,"ax"
+/*
+ * Simplified version of expoline thunk. The normal thunks can not be used here,
+ * because they might be more than 2 GB away, and not reachable by the relative
+ * branch. No comdat, exrl, etc. optimizations used here, because it only
+ * affects a few functions that are not performance-relevant.
+ */
+ .macro BR_EX_AMODE31_r14
+ exrl 0,0f
+ j .
+0: br %r14
+ .endm
+
+/*
+ * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode)
+ */
+SYM_FUNC_START(_diag14_amode31)
+ lgr %r1,%r2
+ lgr %r2,%r3
+ lgr %r3,%r4
+ lhi %r5,-EIO
+ sam31
+ diag %r1,%r2,0x14
+.Ldiag14_ex:
+ ipm %r5
+ srl %r5,28
+.Ldiag14_fault:
+ sam64
+ lgfr %r2,%r5
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault)
+SYM_FUNC_END(_diag14_amode31)
+
+/*
+ * int _diag210_amode31(struct diag210 *addr)
+ */
+SYM_FUNC_START(_diag210_amode31)
+ lgr %r1,%r2
+ lhi %r2,-1
+ sam31
+ diag %r1,%r0,0x210
+.Ldiag210_ex:
+ ipm %r2
+ srl %r2,28
+.Ldiag210_fault:
+ sam64
+ lgfr %r2,%r2
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault)
+SYM_FUNC_END(_diag210_amode31)
+
+/*
+ * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len)
+*/
+SYM_FUNC_START(_diag8c_amode31)
+ llgf %r3,0(%r3)
+ sam31
+ diag %r2,%r4,0x8c
+.Ldiag8c_ex:
+ sam64
+ lgfr %r2,%r3
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex)
+SYM_FUNC_END(_diag8c_amode31)
+/*
+ * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
+ */
+SYM_FUNC_START(_diag26c_amode31)
+ lghi %r5,-EOPNOTSUPP
+ sam31
+ diag %r2,%r4,0x26c
+.Ldiag26c_ex:
+ sam64
+ lgfr %r2,%r5
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex)
+SYM_FUNC_END(_diag26c_amode31)
+
+/*
+ * void _diag0c_amode31(unsigned long rx)
+ */
+SYM_FUNC_START(_diag0c_amode31)
+ sam31
+ diag %r2,%r2,0x0c
+ sam64
+ BR_EX_AMODE31_r14
+SYM_FUNC_END(_diag0c_amode31)
+
+/*
+ * void _diag308_reset_amode31(void)
+ *
+ * Calls diag 308 subcode 1 and continues execution
+ */
+SYM_FUNC_START(_diag308_reset_amode31)
+ larl %r4,ctlregs # Save control registers
+ stctg %c0,%c15,0(%r4)
+ lg %r2,0(%r4) # Disable lowcore protection
+ nilh %r2,0xefff
+ larl %r4,ctlreg0
+ stg %r2,0(%r4)
+ lctlg %c0,%c0,0(%r4)
+ larl %r4,fpctl # Floating point control register
+ stfpc 0(%r4)
+ larl %r4,prefix # Save prefix register
+ stpx 0(%r4)
+ larl %r4,prefix_zero # Set prefix register to 0
+ spx 0(%r4)
+ larl %r4,continue_psw # Save PSW flags
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r4)
+ larl %r4,.Lrestart_part2 # Setup restart PSW at absolute 0
+ larl %r3,restart_diag308_psw
+ og %r4,0(%r3) # Save PSW
+ lghi %r3,0
+ sturg %r4,%r3 # Use sturg, because of large pages
+ lghi %r1,1
+ lghi %r0,0
+ diag %r0,%r1,0x308
+.Lrestart_part2:
+ lhi %r0,0 # Load r0 with zero
+ lhi %r1,2 # Use mode 2 = ESAME (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
+ sam64 # Switch to 64 bit addressing mode
+ larl %r4,ctlregs # Restore control registers
+ lctlg %c0,%c15,0(%r4)
+ larl %r4,fpctl # Restore floating point ctl register
+ lfpc 0(%r4)
+ larl %r4,prefix # Restore prefix register
+ spx 0(%r4)
+ larl %r4,continue_psw # Restore PSW flags
+ larl %r2,.Lcontinue
+ stg %r2,8(%r4)
+ lpswe 0(%r4)
+.Lcontinue:
+ BR_EX_AMODE31_r14
+SYM_FUNC_END(_diag308_reset_amode31)
+
+ .section .amode31.data,"aw",@progbits
+ .balign 8
+SYM_DATA_LOCAL(restart_diag308_psw, .long 0x00080000,0x80000000)
+SYM_DATA_LOCAL(continue_psw, .quad 0,0)
+SYM_DATA_LOCAL(ctlreg0, .quad 0)
+SYM_DATA_LOCAL(ctlregs, .fill 16,8,0)
+SYM_DATA_LOCAL(fpctl, .long 0)
+SYM_DATA_LOCAL(prefix, .long 0)
+SYM_DATA_LOCAL(prefix_zero, .long 0)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 876546b9cfa1..bd0df61d1907 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Time of day based timer functions.
*
@@ -11,13 +12,13 @@
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*/
-#define KMSG_COMPONENT "time"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "time: " fmt
#include <linux/kernel_stat.h>
#include <linux/errno.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
@@ -34,104 +35,107 @@
#include <linux/profile.h>
#include <linux/timex.h>
#include <linux/notifier.h>
-#include <linux/timekeeper_internal.h>
#include <linux/clockchips.h>
#include <linux/gfp.h>
#include <linux/kprobes.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <vdso/vsyscall.h>
+#include <vdso/clocksource.h>
+#include <vdso/helpers.h>
+#include <asm/facility.h>
#include <asm/delay.h>
#include <asm/div64.h>
#include <asm/vdso.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/vtimer.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
#include <asm/cio.h>
#include "entry.h"
-/* change this if you have some constant time drift */
-#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ)
-#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
+union tod_clock __bootdata_preserved(tod_clock_base);
+EXPORT_SYMBOL_GPL(tod_clock_base);
-u64 sched_clock_base_cc = -1; /* Force to data section. */
-EXPORT_SYMBOL_GPL(sched_clock_base_cc);
+u64 __bootdata_preserved(clock_comparator_max);
+EXPORT_SYMBOL_GPL(clock_comparator_max);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
+unsigned char ptff_function_mask[16];
+
+static unsigned long lpar_offset;
+static unsigned long initial_leap_seconds;
+
/*
- * Scheduler clock - returns current time in nanosec units.
+ * Get time offsets with PTFF
*/
-unsigned long long notrace __kprobes sched_clock(void)
+void __init time_early_init(void)
{
- return tod_to_ns(get_tod_clock_monotonic());
+ struct ptff_qto qto;
+ struct ptff_qui qui;
+
+ vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
+
+ if (!test_facility(28))
+ return;
+
+ ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+ /* get LPAR offset */
+ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+ lpar_offset = qto.tod_epoch_difference;
+
+ /* get initial leap seconds */
+ if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+ initial_leap_seconds = (unsigned long)
+ ((long) qui.old_leap * 4096000000L);
+}
+
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+ return tod_to_ns(__get_tod_clock_monotonic());
}
/*
- * Monotonic_clock - returns # of nanoseconds passed since time_init()
+ * Scheduler clock - returns current time in nanosec units.
*/
-unsigned long long monotonic_clock(void)
+unsigned long long notrace sched_clock(void)
{
- return sched_clock();
+ return tod_to_ns(get_tod_clock_monotonic());
}
-EXPORT_SYMBOL(monotonic_clock);
+NOKPROBE_SYMBOL(sched_clock);
-void tod_to_timeval(__u64 todval, struct timespec *xt)
+static void ext_to_timespec64(union tod_clock *clk, struct timespec64 *xt)
{
- unsigned long long sec;
+ unsigned long rem, sec, nsec;
- sec = todval >> 12;
- do_div(sec, 1000000);
+ sec = clk->us;
+ rem = do_div(sec, 1000000);
+ nsec = ((clk->sus + (rem << 12)) * 125) >> 9;
xt->tv_sec = sec;
- todval -= (sec * 1000000) << 12;
- xt->tv_nsec = ((todval * 1000) >> 12);
+ xt->tv_nsec = nsec;
}
-EXPORT_SYMBOL(tod_to_timeval);
void clock_comparator_work(void)
{
struct clock_event_device *cd;
- S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
- cd = &__get_cpu_var(comparators);
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
-/*
- * Fixup the clock comparator.
- */
-static void fixup_clock_comparator(unsigned long long delta)
-{
- /* If nobody is waiting there's nothing to fix. */
- if (S390_lowcore.clock_comparator == -1ULL)
- return;
- S390_lowcore.clock_comparator += delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
-}
-
-static int s390_next_ktime(ktime_t expires,
+static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- struct timespec ts;
- u64 nsecs;
-
- ts.tv_sec = ts.tv_nsec = 0;
- monotonic_to_bootbased(&ts);
- nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires));
- do_div(nsecs, 125);
- S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9);
- /* Program the maximum value if we have an overflow (== year 2042) */
- if (unlikely(S390_lowcore.clock_comparator < sched_clock_base_cc))
- S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = get_tod_clock() + delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
return 0;
}
-static void s390_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
-}
-
/*
* Set up lowcore and control register of the current cpu to
* enable TOD clock and clock comparator interrupts.
@@ -141,30 +145,30 @@ void init_cpu_timer(void)
struct clock_event_device *cd;
int cpu;
- S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ set_clock_comparator(get_lowcore()->clock_comparator);
cpu = smp_processor_id();
cd = &per_cpu(comparators, cpu);
cd->name = "comparator";
- cd->features = CLOCK_EVT_FEAT_ONESHOT |
- CLOCK_EVT_FEAT_KTIME;
+ cd->features = CLOCK_EVT_FEAT_ONESHOT;
cd->mult = 16777;
cd->shift = 12;
cd->min_delta_ns = 1;
+ cd->min_delta_ticks = 1;
cd->max_delta_ns = LONG_MAX;
+ cd->max_delta_ticks = ULONG_MAX;
cd->rating = 400;
cd->cpumask = cpumask_of(cpu);
- cd->set_next_ktime = s390_next_ktime;
- cd->set_mode = s390_set_mode;
+ cd->set_next_event = s390_next_event;
clockevents_register_device(cd);
/* Enable clock comparator timer interrupt. */
- __ctl_set_bit(0,11);
+ local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SUBMASK_BIT);
/* Always allow the timing alert external interrupt. */
- __ctl_set_bit(0, 4);
+ local_ctl_set_bit(0, CR0_ETR_SUBMASK_BIT);
}
static void clock_comparator_interrupt(struct ext_code ext_code,
@@ -172,49 +176,64 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
- if (S390_lowcore.clock_comparator == -1ULL)
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator == clock_comparator_max)
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
-static void etr_timing_alert(struct etr_irq_parm *);
static void stp_timing_alert(struct stp_irq_parm *);
static void timing_alert_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
inc_irq_stat(IRQEXT_TLA);
- if (param32 & 0x00c40000)
- etr_timing_alert((struct etr_irq_parm *) &param32);
if (param32 & 0x00038000)
stp_timing_alert((struct stp_irq_parm *) &param32);
}
-static void etr_reset(void);
static void stp_reset(void);
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
- tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
+ union tod_clock clk;
+ u64 delta;
+
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ store_tod_clock_ext(&clk);
+ clk.eitod -= delta;
+ ext_to_timespec64(&clk, ts);
}
-void read_boot_clock(struct timespec *ts)
+void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
+ struct timespec64 *boot_offset)
{
- tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+ struct timespec64 boot_time;
+ union tod_clock clk;
+ u64 delta;
+
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ clk = tod_clock_base;
+ clk.eitod -= delta;
+ ext_to_timespec64(&clk, &boot_time);
+
+ read_persistent_clock64(wall_time);
+ *boot_offset = timespec64_sub(*wall_time, boot_time);
}
-static cycle_t read_tod_clock(struct clocksource *cs)
+static u64 read_tod_clock(struct clocksource *cs)
{
- return get_tod_clock();
+ return get_tod_clock_monotonic();
}
static struct clocksource clocksource_tod = {
.name = "tod",
.rating = 400,
.read = read_tod_clock,
- .mask = -1ULL,
- .mult = 1000,
- .shift = 12,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 4096000,
+ .shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .vdso_clock_mode = VDSO_CLOCKMODE_TOD,
+ .id = CSID_S390_TOD,
};
struct clocksource * __init clocksource_default_clock(void)
@@ -222,38 +241,6 @@ struct clocksource * __init clocksource_default_clock(void)
return &clocksource_tod;
}
-void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult)
-{
- if (clock != &clocksource_tod)
- return;
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_wmb();
- vdso_data->xtime_tod_stamp = clock->cycle_last;
- vdso_data->xtime_clock_sec = wall_time->tv_sec;
- vdso_data->xtime_clock_nsec = wall_time->tv_nsec;
- vdso_data->wtom_clock_sec = wtm->tv_sec;
- vdso_data->wtom_clock_nsec = wtm->tv_nsec;
- vdso_data->ntp_mult = mult;
- smp_wmb();
- ++vdso_data->tb_update_count;
-}
-
-extern struct timezone sys_tz;
-
-void update_vsyscall_tz(void)
-{
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_wmb();
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
- smp_wmb();
- ++vdso_data->tb_update_count;
-}
-
/*
* Initialize the TOD clock and the CPU timer of
* the boot cpu.
@@ -261,18 +248,17 @@ void update_vsyscall_tz(void)
void __init time_init(void)
{
/* Reset time synchronization interfaces. */
- etr_reset();
stp_reset();
/* request the clock comparator external interrupt */
- if (register_external_interrupt(0x1004, clock_comparator_interrupt))
- panic("Couldn't request external interrupt 0x1004");
+ if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+ panic("Couldn't request external interrupt 0x1004");
/* request the timing alert external interrupt */
- if (register_external_interrupt(0x1406, timing_alert_interrupt))
+ if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
- if (clocksource_register(&clocksource_tod) != 0)
+ if (__clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source");
/* Enable TOD clock interrupts on the boot cpu. */
@@ -282,106 +268,66 @@ void __init time_init(void)
vtime_init();
}
-/*
- * The time is "clock". old is what we think the time is.
- * Adjust the value by a multiple of jiffies and add the delta to ntp.
- * "delay" is an approximation how long the synchronization took. If
- * the time correction is positive, then "delay" is subtracted from
- * the time difference and only the remaining part is passed to ntp.
- */
-static unsigned long long adjust_time(unsigned long long old,
- unsigned long long clock,
- unsigned long long delay)
-{
- unsigned long long delta, ticks;
- struct timex adjust;
-
- if (clock > old) {
- /* It is later than we thought. */
- delta = ticks = clock - old;
- delta = ticks = (delta < delay) ? 0 : delta - delay;
- delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
- adjust.offset = ticks * (1000000 / HZ);
- } else {
- /* It is earlier than we thought. */
- delta = ticks = old - clock;
- delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
- delta = -delta;
- adjust.offset = -ticks * (1000000 / HZ);
- }
- sched_clock_base_cc += delta;
- if (adjust.offset != 0) {
- pr_notice("The ETR interface has adjusted the clock "
- "by %li microseconds\n", adjust.offset);
- adjust.modes = ADJ_OFFSET_SINGLESHOT;
- do_adjtimex(&adjust);
- }
- return delta;
-}
-
static DEFINE_PER_CPU(atomic_t, clock_sync_word);
-static DEFINE_MUTEX(clock_sync_mutex);
+static DEFINE_MUTEX(stp_mutex);
static unsigned long clock_sync_flags;
-#define CLOCK_SYNC_HAS_ETR 0
-#define CLOCK_SYNC_HAS_STP 1
-#define CLOCK_SYNC_ETR 2
-#define CLOCK_SYNC_STP 3
+#define CLOCK_SYNC_HAS_STP 0
+#define CLOCK_SYNC_STP 1
+#define CLOCK_SYNC_STPINFO_VALID 2
/*
- * The synchronous get_clock function. It will write the current clock
- * value to the clock pointer and return 0 if the clock is in sync with
- * the external time source. If the clock mode is local it will return
- * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
- * reference.
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
*/
-int get_sync_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long *clock)
{
atomic_t *sw_ptr;
unsigned int sw0, sw1;
sw_ptr = &get_cpu_var(clock_sync_word);
sw0 = atomic_read(sw_ptr);
- *clock = get_tod_clock();
+ *clock = get_tod_clock() - lpar_offset;
sw1 = atomic_read(sw_ptr);
put_cpu_var(clock_sync_word);
if (sw0 == sw1 && (sw0 & 0x80000000U))
/* Success: time is in sync. */
return 0;
- if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
- !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
- if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
- !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+ if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
return -EACCES;
return -EAGAIN;
}
-EXPORT_SYMBOL(get_sync_clock);
+EXPORT_SYMBOL(get_phys_clock);
/*
- * Make get_sync_clock return -EAGAIN.
+ * Make get_phys_clock() return -EAGAIN.
*/
static void disable_sync_clock(void *dummy)
{
- atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
/*
- * Clear the in-sync bit 2^31. All get_sync_clock calls will
+ * Clear the in-sync bit 2^31. All get_phys_clock calls will
* fail until the sync bit is turned back on. In addition
* increase the "sequence" counter to avoid the race of an
- * etr event and the complete recovery against get_sync_clock.
+ * stp event and the complete recovery against get_phys_clock.
*/
- atomic_clear_mask(0x80000000, sw_ptr);
+ atomic_andnot(0x80000000, sw_ptr);
atomic_inc(sw_ptr);
}
/*
- * Make get_sync_clock return 0 again.
+ * Make get_phys_clock() return 0 again.
* Needs to be called from a context disabled for preemption.
*/
static void enable_sync_clock(void)
{
- atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
- atomic_set_mask(0x80000000, sw_ptr);
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+ atomic_or(0x80000000, sw_ptr);
}
/*
@@ -398,1039 +344,69 @@ static inline int check_sync_clock(void)
return rc;
}
-/* Single threaded workqueue used for etr and stp sync events */
-static struct workqueue_struct *time_sync_wq;
-
-static void __init time_init_wq(void)
-{
- if (time_sync_wq)
- return;
- time_sync_wq = create_singlethread_workqueue("timesync");
-}
-
-/*
- * External Time Reference (ETR) code.
- */
-static int etr_port0_online;
-static int etr_port1_online;
-static int etr_steai_available;
-
-static int __init early_parse_etr(char *p)
-{
- if (strncmp(p, "off", 3) == 0)
- etr_port0_online = etr_port1_online = 0;
- else if (strncmp(p, "port0", 5) == 0)
- etr_port0_online = 1;
- else if (strncmp(p, "port1", 5) == 0)
- etr_port1_online = 1;
- else if (strncmp(p, "on", 2) == 0)
- etr_port0_online = etr_port1_online = 1;
- return 0;
-}
-early_param("etr", early_parse_etr);
-
-enum etr_event {
- ETR_EVENT_PORT0_CHANGE,
- ETR_EVENT_PORT1_CHANGE,
- ETR_EVENT_PORT_ALERT,
- ETR_EVENT_SYNC_CHECK,
- ETR_EVENT_SWITCH_LOCAL,
- ETR_EVENT_UPDATE,
-};
-
/*
- * Valid bit combinations of the eacr register are (x = don't care):
- * e0 e1 dp p0 p1 ea es sl
- * 0 0 x 0 0 0 0 0 initial, disabled state
- * 0 0 x 0 1 1 0 0 port 1 online
- * 0 0 x 1 0 1 0 0 port 0 online
- * 0 0 x 1 1 1 0 0 both ports online
- * 0 1 x 0 1 1 0 0 port 1 online and usable, ETR or PPS mode
- * 0 1 x 0 1 1 0 1 port 1 online, usable and ETR mode
- * 0 1 x 0 1 1 1 0 port 1 online, usable, PPS mode, in-sync
- * 0 1 x 0 1 1 1 1 port 1 online, usable, ETR mode, in-sync
- * 0 1 x 1 1 1 0 0 both ports online, port 1 usable
- * 0 1 x 1 1 1 1 0 both ports online, port 1 usable, PPS mode, in-sync
- * 0 1 x 1 1 1 1 1 both ports online, port 1 usable, ETR mode, in-sync
- * 1 0 x 1 0 1 0 0 port 0 online and usable, ETR or PPS mode
- * 1 0 x 1 0 1 0 1 port 0 online, usable and ETR mode
- * 1 0 x 1 0 1 1 0 port 0 online, usable, PPS mode, in-sync
- * 1 0 x 1 0 1 1 1 port 0 online, usable, ETR mode, in-sync
- * 1 0 x 1 1 1 0 0 both ports online, port 0 usable
- * 1 0 x 1 1 1 1 0 both ports online, port 0 usable, PPS mode, in-sync
- * 1 0 x 1 1 1 1 1 both ports online, port 0 usable, ETR mode, in-sync
- * 1 1 x 1 1 1 1 0 both ports online & usable, ETR, in-sync
- * 1 1 x 1 1 1 1 1 both ports online & usable, ETR, in-sync
+ * Apply clock delta to the global data structures.
+ * This is called once on the CPU that performed the clock sync.
*/
-static struct etr_eacr etr_eacr;
-static u64 etr_tolec; /* time of last eacr update */
-static struct etr_aib etr_port0;
-static int etr_port0_uptodate;
-static struct etr_aib etr_port1;
-static int etr_port1_uptodate;
-static unsigned long etr_events;
-static struct timer_list etr_timer;
-
-static void etr_timeout(unsigned long dummy);
-static void etr_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(etr_work_mutex);
-static DECLARE_WORK(etr_work, etr_work_fn);
-
-/*
- * Reset ETR attachment.
- */
-static void etr_reset(void)
+static void clock_sync_global(long delta)
{
- etr_eacr = (struct etr_eacr) {
- .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
- .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
- .es = 0, .sl = 0 };
- if (etr_setr(&etr_eacr) == 0) {
- etr_tolec = get_tod_clock();
- set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
- if (etr_port0_online && etr_port1_online)
- set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
- } else if (etr_port0_online || etr_port1_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an ETR interface\n");
- etr_port0_online = etr_port1_online = 0;
- }
-}
-
-static int __init etr_init(void)
-{
- struct etr_aib aib;
+ struct ptff_qto qto;
- if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
- return 0;
- time_init_wq();
- /* Check if this machine has the steai instruction. */
- if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
- etr_steai_available = 1;
- setup_timer(&etr_timer, etr_timeout, 0UL);
- if (etr_port0_online) {
- set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- queue_work(time_sync_wq, &etr_work);
- }
- if (etr_port1_online) {
- set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- queue_work(time_sync_wq, &etr_work);
- }
- return 0;
+ /* Fixup the monotonic sched clock. */
+ tod_clock_base.eitod += delta;
+ vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
+ /* Update LPAR offset. */
+ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+ lpar_offset = qto.tod_epoch_difference;
+ /* Call the TOD clock change notifier. */
+ atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &delta);
}
-arch_initcall(etr_init);
-
-/*
- * Two sorts of ETR machine checks. The architecture reads:
- * "When a machine-check niterruption occurs and if a switch-to-local or
- * ETR-sync-check interrupt request is pending but disabled, this pending
- * disabled interruption request is indicated and is cleared".
- * Which means that we can get etr_switch_to_local events from the machine
- * check handler although the interruption condition is disabled. Lovely..
- */
-
/*
- * Switch to local machine check. This is called when the last usable
- * ETR port goes inactive. After switch to local the clock is not in sync.
+ * Apply clock delta to the per-CPU data structures of this CPU.
+ * This is called for each online CPU after the call to clock_sync_global.
*/
-void etr_switch_to_local(void)
+static void clock_sync_local(long delta)
{
- if (!etr_eacr.sl)
- return;
- disable_sync_clock(NULL);
- if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
- etr_eacr.es = etr_eacr.sl = 0;
- etr_setr(&etr_eacr);
- queue_work(time_sync_wq, &etr_work);
+ /* Add the delta to the clock comparator. */
+ if (get_lowcore()->clock_comparator != clock_comparator_max) {
+ get_lowcore()->clock_comparator += delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
+ /* Adjust the last_update_clock time-stamp. */
+ get_lowcore()->last_update_clock += delta;
}
-/*
- * ETR sync check machine check. This is called when the ETR OTE and the
- * local clock OTE are farther apart than the ETR sync check tolerance.
- * After a ETR sync check the clock is not in sync. The machine check
- * is broadcasted to all cpus at the same time.
- */
-void etr_sync_check(void)
-{
- if (!etr_eacr.es)
- return;
- disable_sync_clock(NULL);
- if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
- etr_eacr.es = 0;
- etr_setr(&etr_eacr);
- queue_work(time_sync_wq, &etr_work);
- }
-}
-
-/*
- * ETR timing alert. There are two causes:
- * 1) port state change, check the usability of the port
- * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
- * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
- * or ETR-data word 4 (edf4) has changed.
- */
-static void etr_timing_alert(struct etr_irq_parm *intparm)
-{
- if (intparm->pc0)
- /* ETR port 0 state change. */
- set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- if (intparm->pc1)
- /* ETR port 1 state change. */
- set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- if (intparm->eai)
- /*
- * ETR port alert on either port 0, 1 or both.
- * Both ports are not up-to-date now.
- */
- set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
- queue_work(time_sync_wq, &etr_work);
-}
-
-static void etr_timeout(unsigned long dummy)
-{
- set_bit(ETR_EVENT_UPDATE, &etr_events);
- queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * Check if the etr mode is pss.
- */
-static inline int etr_mode_is_pps(struct etr_eacr eacr)
-{
- return eacr.es && !eacr.sl;
-}
-
-/*
- * Check if the etr mode is etr.
- */
-static inline int etr_mode_is_etr(struct etr_eacr eacr)
-{
- return eacr.es && eacr.sl;
-}
-
-/*
- * Check if the port can be used for TOD synchronization.
- * For PPS mode the port has to receive OTEs. For ETR mode
- * the port has to receive OTEs, the ETR stepping bit has to
- * be zero and the validity bits for data frame 1, 2, and 3
- * have to be 1.
- */
-static int etr_port_valid(struct etr_aib *aib, int port)
-{
- unsigned int psc;
-
- /* Check that this port is receiving OTEs. */
- if (aib->tsp == 0)
- return 0;
-
- psc = port ? aib->esw.psc1 : aib->esw.psc0;
- if (psc == etr_lpsc_pps_mode)
- return 1;
- if (psc == etr_lpsc_operational_step)
- return !aib->esw.y && aib->slsw.v1 &&
- aib->slsw.v2 && aib->slsw.v3;
- return 0;
-}
-
-/*
- * Check if two ports are on the same network.
- */
-static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2)
-{
- // FIXME: any other fields we have to compare?
- return aib1->edf1.net_id == aib2->edf1.net_id;
-}
-
-/*
- * Wrapper for etr_stei that converts physical port states
- * to logical port states to be consistent with the output
- * of stetr (see etr_psc vs. etr_lpsc).
- */
-static void etr_steai_cv(struct etr_aib *aib, unsigned int func)
-{
- BUG_ON(etr_steai(aib, func) != 0);
- /* Convert port state to logical port state. */
- if (aib->esw.psc0 == 1)
- aib->esw.psc0 = 2;
- else if (aib->esw.psc0 == 0 && aib->esw.p == 0)
- aib->esw.psc0 = 1;
- if (aib->esw.psc1 == 1)
- aib->esw.psc1 = 2;
- else if (aib->esw.psc1 == 0 && aib->esw.p == 1)
- aib->esw.psc1 = 1;
-}
+/* Single threaded workqueue used for stp sync events */
+static struct workqueue_struct *time_sync_wq;
-/*
- * Check if the aib a2 is still connected to the same attachment as
- * aib a1, the etv values differ by one and a2 is valid.
- */
-static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
+static void __init time_init_wq(void)
{
- int state_a1, state_a2;
-
- /* Paranoia check: e0/e1 should better be the same. */
- if (a1->esw.eacr.e0 != a2->esw.eacr.e0 ||
- a1->esw.eacr.e1 != a2->esw.eacr.e1)
- return 0;
-
- /* Still connected to the same etr ? */
- state_a1 = p ? a1->esw.psc1 : a1->esw.psc0;
- state_a2 = p ? a2->esw.psc1 : a2->esw.psc0;
- if (state_a1 == etr_lpsc_operational_step) {
- if (state_a2 != etr_lpsc_operational_step ||
- a1->edf1.net_id != a2->edf1.net_id ||
- a1->edf1.etr_id != a2->edf1.etr_id ||
- a1->edf1.etr_pn != a2->edf1.etr_pn)
- return 0;
- } else if (state_a2 != etr_lpsc_pps_mode)
- return 0;
-
- /* The ETV value of a2 needs to be ETV of a1 + 1. */
- if (a1->edf2.etv + 1 != a2->edf2.etv)
- return 0;
-
- if (!etr_port_valid(a2, p))
- return 0;
-
- return 1;
+ if (time_sync_wq)
+ return;
+ time_sync_wq = create_singlethread_workqueue("timesync");
}
struct clock_sync_data {
atomic_t cpus;
int in_sync;
- unsigned long long fixup_cc;
- int etr_port;
- struct etr_aib *etr_aib;
+ long clock_delta;
};
-static void clock_sync_cpu(struct clock_sync_data *sync)
-{
- atomic_dec(&sync->cpus);
- enable_sync_clock();
- /*
- * This looks like a busy wait loop but it isn't. etr_sync_cpus
- * is called on all other cpus while the TOD clocks is stopped.
- * __udelay will stop the cpu on an enabled wait psw until the
- * TOD is running again.
- */
- while (sync->in_sync == 0) {
- __udelay(1);
- /*
- * A different cpu changes *in_sync. Therefore use
- * barrier() to force memory access.
- */
- barrier();
- }
- if (sync->in_sync != 1)
- /* Didn't work. Clear per-cpu in sync bit again. */
- disable_sync_clock(NULL);
- /*
- * This round of TOD syncing is done. Set the clock comparator
- * to the next tick and let the processor continue.
- */
- fixup_clock_comparator(sync->fixup_cc);
-}
-
-/*
- * Sync the TOD clock using the port referred to by aibp. This port
- * has to be enabled and the other port has to be disabled. The
- * last eacr update has to be more than 1.6 seconds in the past.
- */
-static int etr_sync_clock(void *data)
-{
- static int first;
- unsigned long long clock, old_clock, delay, delta;
- struct clock_sync_data *etr_sync;
- struct etr_aib *sync_port, *aib;
- int port;
- int rc;
-
- etr_sync = data;
-
- if (xchg(&first, 1) == 1) {
- /* Slave */
- clock_sync_cpu(etr_sync);
- return 0;
- }
-
- /* Wait until all other cpus entered the sync function. */
- while (atomic_read(&etr_sync->cpus) != 0)
- cpu_relax();
-
- port = etr_sync->etr_port;
- aib = etr_sync->etr_aib;
- sync_port = (port == 0) ? &etr_port0 : &etr_port1;
- enable_sync_clock();
-
- /* Set clock to next OTE. */
- __ctl_set_bit(14, 21);
- __ctl_set_bit(0, 29);
- clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
- old_clock = get_tod_clock();
- if (set_tod_clock(clock) == 0) {
- __udelay(1); /* Wait for the clock to start. */
- __ctl_clear_bit(0, 29);
- __ctl_clear_bit(14, 21);
- etr_stetr(aib);
- /* Adjust Linux timing variables. */
- delay = (unsigned long long)
- (aib->edf2.etv - sync_port->edf2.etv) << 32;
- delta = adjust_time(old_clock, clock, delay);
- etr_sync->fixup_cc = delta;
- fixup_clock_comparator(delta);
- /* Verify that the clock is properly set. */
- if (!etr_aib_follows(sync_port, aib, port)) {
- /* Didn't work. */
- disable_sync_clock(NULL);
- etr_sync->in_sync = -EAGAIN;
- rc = -EAGAIN;
- } else {
- etr_sync->in_sync = 1;
- rc = 0;
- }
- } else {
- /* Could not set the clock ?!? */
- __ctl_clear_bit(0, 29);
- __ctl_clear_bit(14, 21);
- disable_sync_clock(NULL);
- etr_sync->in_sync = -EAGAIN;
- rc = -EAGAIN;
- }
- xchg(&first, 0);
- return rc;
-}
-
-static int etr_sync_clock_stop(struct etr_aib *aib, int port)
-{
- struct clock_sync_data etr_sync;
- struct etr_aib *sync_port;
- int follows;
- int rc;
-
- /* Check if the current aib is adjacent to the sync port aib. */
- sync_port = (port == 0) ? &etr_port0 : &etr_port1;
- follows = etr_aib_follows(sync_port, aib, port);
- memcpy(sync_port, aib, sizeof(*aib));
- if (!follows)
- return -EAGAIN;
- memset(&etr_sync, 0, sizeof(etr_sync));
- etr_sync.etr_aib = aib;
- etr_sync.etr_port = port;
- get_online_cpus();
- atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
- rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
- put_online_cpus();
- return rc;
-}
-
-/*
- * Handle the immediate effects of the different events.
- * The port change event is used for online/offline changes.
- */
-static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
-{
- if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events))
- eacr.es = 0;
- if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events))
- eacr.es = eacr.sl = 0;
- if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events))
- etr_port0_uptodate = etr_port1_uptodate = 0;
-
- if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) {
- if (eacr.e0)
- /*
- * Port change of an enabled port. We have to
- * assume that this can have caused an stepping
- * port switch.
- */
- etr_tolec = get_tod_clock();
- eacr.p0 = etr_port0_online;
- if (!eacr.p0)
- eacr.e0 = 0;
- etr_port0_uptodate = 0;
- }
- if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) {
- if (eacr.e1)
- /*
- * Port change of an enabled port. We have to
- * assume that this can have caused an stepping
- * port switch.
- */
- etr_tolec = get_tod_clock();
- eacr.p1 = etr_port1_online;
- if (!eacr.p1)
- eacr.e1 = 0;
- etr_port1_uptodate = 0;
- }
- clear_bit(ETR_EVENT_UPDATE, &etr_events);
- return eacr;
-}
-
-/*
- * Set up a timer that expires after the etr_tolec + 1.6 seconds if
- * one of the ports needs an update.
- */
-static void etr_set_tolec_timeout(unsigned long long now)
-{
- unsigned long micros;
-
- if ((!etr_eacr.p0 || etr_port0_uptodate) &&
- (!etr_eacr.p1 || etr_port1_uptodate))
- return;
- micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0;
- micros = (micros > 1600000) ? 0 : 1600000 - micros;
- mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1);
-}
-
-/*
- * Set up a time that expires after 1/2 second.
- */
-static void etr_set_sync_timeout(void)
-{
- mod_timer(&etr_timer, jiffies + HZ/2);
-}
-
-/*
- * Update the aib information for one or both ports.
- */
-static struct etr_eacr etr_handle_update(struct etr_aib *aib,
- struct etr_eacr eacr)
-{
- /* With both ports disabled the aib information is useless. */
- if (!eacr.e0 && !eacr.e1)
- return eacr;
-
- /* Update port0 or port1 with aib stored in etr_work_fn. */
- if (aib->esw.q == 0) {
- /* Information for port 0 stored. */
- if (eacr.p0 && !etr_port0_uptodate) {
- etr_port0 = *aib;
- if (etr_port0_online)
- etr_port0_uptodate = 1;
- }
- } else {
- /* Information for port 1 stored. */
- if (eacr.p1 && !etr_port1_uptodate) {
- etr_port1 = *aib;
- if (etr_port0_online)
- etr_port1_uptodate = 1;
- }
- }
-
- /*
- * Do not try to get the alternate port aib if the clock
- * is not in sync yet.
- */
- if (!eacr.es || !check_sync_clock())
- return eacr;
-
- /*
- * If steai is available we can get the information about
- * the other port immediately. If only stetr is available the
- * data-port bit toggle has to be used.
- */
- if (etr_steai_available) {
- if (eacr.p0 && !etr_port0_uptodate) {
- etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
- etr_port0_uptodate = 1;
- }
- if (eacr.p1 && !etr_port1_uptodate) {
- etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1);
- etr_port1_uptodate = 1;
- }
- } else {
- /*
- * One port was updated above, if the other
- * port is not uptodate toggle dp bit.
- */
- if ((eacr.p0 && !etr_port0_uptodate) ||
- (eacr.p1 && !etr_port1_uptodate))
- eacr.dp ^= 1;
- else
- eacr.dp = 0;
- }
- return eacr;
-}
-
-/*
- * Write new etr control register if it differs from the current one.
- * Return 1 if etr_tolec has been updated as well.
- */
-static void etr_update_eacr(struct etr_eacr eacr)
-{
- int dp_changed;
-
- if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0)
- /* No change, return. */
- return;
- /*
- * The disable of an active port of the change of the data port
- * bit can/will cause a change in the data port.
- */
- dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 ||
- (etr_eacr.dp ^ eacr.dp) != 0;
- etr_eacr = eacr;
- etr_setr(&etr_eacr);
- if (dp_changed)
- etr_tolec = get_tod_clock();
-}
-
-/*
- * ETR work. In this function you'll find the main logic. In
- * particular this is the only function that calls etr_update_eacr(),
- * it "controls" the etr control register.
- */
-static void etr_work_fn(struct work_struct *work)
-{
- unsigned long long now;
- struct etr_eacr eacr;
- struct etr_aib aib;
- int sync_port;
-
- /* prevent multiple execution. */
- mutex_lock(&etr_work_mutex);
-
- /* Create working copy of etr_eacr. */
- eacr = etr_eacr;
-
- /* Check for the different events and their immediate effects. */
- eacr = etr_handle_events(eacr);
-
- /* Check if ETR is supposed to be active. */
- eacr.ea = eacr.p0 || eacr.p1;
- if (!eacr.ea) {
- /* Both ports offline. Reset everything. */
- eacr.dp = eacr.es = eacr.sl = 0;
- on_each_cpu(disable_sync_clock, NULL, 1);
- del_timer_sync(&etr_timer);
- etr_update_eacr(eacr);
- goto out_unlock;
- }
-
- /* Store aib to get the current ETR status word. */
- BUG_ON(etr_stetr(&aib) != 0);
- etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */
- now = get_tod_clock();
-
- /*
- * Update the port information if the last stepping port change
- * or data port change is older than 1.6 seconds.
- */
- if (now >= etr_tolec + (1600000 << 12))
- eacr = etr_handle_update(&aib, eacr);
-
- /*
- * Select ports to enable. The preferred synchronization mode is PPS.
- * If a port can be enabled depends on a number of things:
- * 1) The port needs to be online and uptodate. A port is not
- * disabled just because it is not uptodate, but it is only
- * enabled if it is uptodate.
- * 2) The port needs to have the same mode (pps / etr).
- * 3) The port needs to be usable -> etr_port_valid() == 1
- * 4) To enable the second port the clock needs to be in sync.
- * 5) If both ports are useable and are ETR ports, the network id
- * has to be the same.
- * The eacr.sl bit is used to indicate etr mode vs. pps mode.
- */
- if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) {
- eacr.sl = 0;
- eacr.e0 = 1;
- if (!etr_mode_is_pps(etr_eacr))
- eacr.es = 0;
- if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode)
- eacr.e1 = 0;
- // FIXME: uptodate checks ?
- else if (etr_port0_uptodate && etr_port1_uptodate)
- eacr.e1 = 1;
- sync_port = (etr_port0_uptodate &&
- etr_port_valid(&etr_port0, 0)) ? 0 : -1;
- } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
- eacr.sl = 0;
- eacr.e0 = 0;
- eacr.e1 = 1;
- if (!etr_mode_is_pps(etr_eacr))
- eacr.es = 0;
- sync_port = (etr_port1_uptodate &&
- etr_port_valid(&etr_port1, 1)) ? 1 : -1;
- } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
- eacr.sl = 1;
- eacr.e0 = 1;
- if (!etr_mode_is_etr(etr_eacr))
- eacr.es = 0;
- if (!eacr.es || !eacr.p1 ||
- aib.esw.psc1 != etr_lpsc_operational_alt)
- eacr.e1 = 0;
- else if (etr_port0_uptodate && etr_port1_uptodate &&
- etr_compare_network(&etr_port0, &etr_port1))
- eacr.e1 = 1;
- sync_port = (etr_port0_uptodate &&
- etr_port_valid(&etr_port0, 0)) ? 0 : -1;
- } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
- eacr.sl = 1;
- eacr.e0 = 0;
- eacr.e1 = 1;
- if (!etr_mode_is_etr(etr_eacr))
- eacr.es = 0;
- sync_port = (etr_port1_uptodate &&
- etr_port_valid(&etr_port1, 1)) ? 1 : -1;
- } else {
- /* Both ports not usable. */
- eacr.es = eacr.sl = 0;
- sync_port = -1;
- }
-
- /*
- * If the clock is in sync just update the eacr and return.
- * If there is no valid sync port wait for a port update.
- */
- if ((eacr.es && check_sync_clock()) || sync_port < 0) {
- etr_update_eacr(eacr);
- etr_set_tolec_timeout(now);
- goto out_unlock;
- }
-
- /*
- * Prepare control register for clock syncing
- * (reset data port bit, set sync check control.
- */
- eacr.dp = 0;
- eacr.es = 1;
-
- /*
- * Update eacr and try to synchronize the clock. If the update
- * of eacr caused a stepping port switch (or if we have to
- * assume that a stepping port switch has occurred) or the
- * clock syncing failed, reset the sync check control bit
- * and set up a timer to try again after 0.5 seconds
- */
- etr_update_eacr(eacr);
- if (now < etr_tolec + (1600000 << 12) ||
- etr_sync_clock_stop(&aib, sync_port) != 0) {
- /* Sync failed. Try again in 1/2 second. */
- eacr.es = 0;
- etr_update_eacr(eacr);
- etr_set_sync_timeout();
- } else
- etr_set_tolec_timeout(now);
-out_unlock:
- mutex_unlock(&etr_work_mutex);
-}
-
-/*
- * Sysfs interface functions
- */
-static struct bus_type etr_subsys = {
- .name = "etr",
- .dev_name = "etr",
-};
-
-static struct device etr_port0_dev = {
- .id = 0,
- .bus = &etr_subsys,
-};
-
-static struct device etr_port1_dev = {
- .id = 1,
- .bus = &etr_subsys,
-};
-
-/*
- * ETR subsys attributes
- */
-static ssize_t etr_stepping_port_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%i\n", etr_port0.esw.p);
-}
-
-static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
-
-static ssize_t etr_stepping_mode_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- char *mode_str;
-
- if (etr_mode_is_pps(etr_eacr))
- mode_str = "pps";
- else if (etr_mode_is_etr(etr_eacr))
- mode_str = "etr";
- else
- mode_str = "local";
- return sprintf(buf, "%s\n", mode_str);
-}
-
-static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
-
-/*
- * ETR port attributes
- */
-static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
-{
- if (dev == &etr_port0_dev)
- return etr_port0_online ? &etr_port0 : NULL;
- else
- return etr_port1_online ? &etr_port1 : NULL;
-}
-
-static ssize_t etr_online_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- unsigned int online;
-
- online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online;
- return sprintf(buf, "%i\n", online);
-}
-
-static ssize_t etr_online_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- unsigned int value;
-
- value = simple_strtoul(buf, NULL, 0);
- if (value != 0 && value != 1)
- return -EINVAL;
- if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
- return -EOPNOTSUPP;
- mutex_lock(&clock_sync_mutex);
- if (dev == &etr_port0_dev) {
- if (etr_port0_online == value)
- goto out; /* Nothing to do. */
- etr_port0_online = value;
- if (etr_port0_online && etr_port1_online)
- set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
- else
- clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
- set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- queue_work(time_sync_wq, &etr_work);
- } else {
- if (etr_port1_online == value)
- goto out; /* Nothing to do. */
- etr_port1_online = value;
- if (etr_port0_online && etr_port1_online)
- set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
- else
- clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
- set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- queue_work(time_sync_wq, &etr_work);
- }
-out:
- mutex_unlock(&clock_sync_mutex);
- return count;
-}
-
-static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
-
-static ssize_t etr_stepping_control_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
- etr_eacr.e0 : etr_eacr.e1);
-}
-
-static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
-
-static ssize_t etr_mode_code_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- if (!etr_port0_online && !etr_port1_online)
- /* Status word is not uptodate if both ports are offline. */
- return -ENODATA;
- return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
- etr_port0.esw.psc0 : etr_port0.esw.psc1);
-}
-
-static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
-
-static ssize_t etr_untuned_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct etr_aib *aib = etr_aib_from_dev(dev);
-
- if (!aib || !aib->slsw.v1)
- return -ENODATA;
- return sprintf(buf, "%i\n", aib->edf1.u);
-}
-
-static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
-
-static ssize_t etr_network_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct etr_aib *aib = etr_aib_from_dev(dev);
-
- if (!aib || !aib->slsw.v1)
- return -ENODATA;
- return sprintf(buf, "%i\n", aib->edf1.net_id);
-}
-
-static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
-
-static ssize_t etr_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct etr_aib *aib = etr_aib_from_dev(dev);
-
- if (!aib || !aib->slsw.v1)
- return -ENODATA;
- return sprintf(buf, "%i\n", aib->edf1.etr_id);
-}
-
-static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
-
-static ssize_t etr_port_number_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct etr_aib *aib = etr_aib_from_dev(dev);
-
- if (!aib || !aib->slsw.v1)
- return -ENODATA;
- return sprintf(buf, "%i\n", aib->edf1.etr_pn);
-}
-
-static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
-
-static ssize_t etr_coupled_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct etr_aib *aib = etr_aib_from_dev(dev);
-
- if (!aib || !aib->slsw.v3)
- return -ENODATA;
- return sprintf(buf, "%i\n", aib->edf3.c);
-}
-
-static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
-
-static ssize_t etr_local_time_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct etr_aib *aib = etr_aib_from_dev(dev);
-
- if (!aib || !aib->slsw.v3)
- return -ENODATA;
- return sprintf(buf, "%i\n", aib->edf3.blto);
-}
-
-static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
-
-static ssize_t etr_utc_offset_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct etr_aib *aib = etr_aib_from_dev(dev);
-
- if (!aib || !aib->slsw.v3)
- return -ENODATA;
- return sprintf(buf, "%i\n", aib->edf3.buo);
-}
-
-static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
-
-static struct device_attribute *etr_port_attributes[] = {
- &dev_attr_online,
- &dev_attr_stepping_control,
- &dev_attr_state_code,
- &dev_attr_untuned,
- &dev_attr_network,
- &dev_attr_id,
- &dev_attr_port,
- &dev_attr_coupled,
- &dev_attr_local_time,
- &dev_attr_utc_offset,
- NULL
-};
-
-static int __init etr_register_port(struct device *dev)
-{
- struct device_attribute **attr;
- int rc;
-
- rc = device_register(dev);
- if (rc)
- goto out;
- for (attr = etr_port_attributes; *attr; attr++) {
- rc = device_create_file(dev, *attr);
- if (rc)
- goto out_unreg;
- }
- return 0;
-out_unreg:
- for (; attr >= etr_port_attributes; attr--)
- device_remove_file(dev, *attr);
- device_unregister(dev);
-out:
- return rc;
-}
-
-static void __init etr_unregister_port(struct device *dev)
-{
- struct device_attribute **attr;
-
- for (attr = etr_port_attributes; *attr; attr++)
- device_remove_file(dev, *attr);
- device_unregister(dev);
-}
-
-static int __init etr_init_sysfs(void)
-{
- int rc;
-
- rc = subsys_system_register(&etr_subsys, NULL);
- if (rc)
- goto out;
- rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
- if (rc)
- goto out_unreg_subsys;
- rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
- if (rc)
- goto out_remove_stepping_port;
- rc = etr_register_port(&etr_port0_dev);
- if (rc)
- goto out_remove_stepping_mode;
- rc = etr_register_port(&etr_port1_dev);
- if (rc)
- goto out_remove_port0;
- return 0;
-
-out_remove_port0:
- etr_unregister_port(&etr_port0_dev);
-out_remove_stepping_mode:
- device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-out_remove_stepping_port:
- device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-out_unreg_subsys:
- bus_unregister(&etr_subsys);
-out:
- return rc;
-}
-
-device_initcall(etr_init_sysfs);
-
/*
* Server Time Protocol (STP) code.
*/
-static int stp_online;
+static bool stp_online = true;
static struct stp_sstpi stp_info;
static void *stp_page;
static void stp_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(stp_work_mutex);
static DECLARE_WORK(stp_work, stp_work_fn);
static struct timer_list stp_timer;
static int __init early_parse_stp(char *p)
{
- if (strncmp(p, "off", 3) == 0)
- stp_online = 0;
- else if (strncmp(p, "on", 2) == 0)
- stp_online = 1;
- return 0;
+ return kstrtobool(p, &stp_online);
}
early_param("stp", early_parse_stp);
@@ -1442,19 +418,23 @@ static void __init stp_reset(void)
int rc;
stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
- rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an STP interface\n");
free_page((unsigned long) stp_page);
stp_page = NULL;
- stp_online = 0;
+ stp_online = false;
}
}
-static void stp_timeout(unsigned long dummy)
+bool stp_enabled(void)
+{
+ return test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online;
+}
+EXPORT_SYMBOL(stp_enabled);
+
+static void stp_timeout(struct timer_list *unused)
{
queue_work(time_sync_wq, &stp_work);
}
@@ -1463,7 +443,7 @@ static int __init stp_init(void)
{
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return 0;
- setup_timer(&stp_timer, stp_timeout, 0UL);
+ timer_setup(&stp_timer, stp_timeout, 0);
time_init_wq();
if (!stp_online)
return 0;
@@ -1493,10 +473,10 @@ static void stp_timing_alert(struct stp_irq_parm *intparm)
* After a STP sync check the clock is not in sync. The machine check
* is broadcasted to all cpus at the same time.
*/
-void stp_sync_check(void)
+int stp_sync_check(void)
{
disable_sync_clock(NULL);
- queue_work(time_sync_wq, &stp_work);
+ return 1;
}
/*
@@ -1505,55 +485,74 @@ void stp_sync_check(void)
* have matching CTN ids and have a valid stratum-1 configuration
* but the configurations do not match.
*/
-void stp_island_check(void)
+int stp_island_check(void)
{
disable_sync_clock(NULL);
+ return 1;
+}
+
+void stp_queue_work(void)
+{
queue_work(time_sync_wq, &stp_work);
}
+static int __store_stpinfo(void)
+{
+ int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+
+ if (rc)
+ clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+ else
+ set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+ return rc;
+}
+
+static int stpinfo_valid(void)
+{
+ return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+}
static int stp_sync_clock(void *data)
{
+ struct clock_sync_data *sync = data;
+ long clock_delta, flags;
static int first;
- unsigned long long old_clock, delta;
- struct clock_sync_data *stp_sync;
int rc;
- stp_sync = data;
-
- if (xchg(&first, 1) == 1) {
- /* Slave */
- clock_sync_cpu(stp_sync);
- return 0;
- }
-
- /* Wait until all other cpus entered the sync function. */
- while (atomic_read(&stp_sync->cpus) != 0)
- cpu_relax();
-
enable_sync_clock();
-
- rc = 0;
- if (stp_info.todoff[0] || stp_info.todoff[1] ||
- stp_info.todoff[2] || stp_info.todoff[3] ||
- stp_info.tmd != 2) {
- old_clock = get_tod_clock();
- rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
- if (rc == 0) {
- delta = adjust_time(old_clock, get_tod_clock(), 0);
- fixup_clock_comparator(delta);
- rc = chsc_sstpi(stp_page, &stp_info,
- sizeof(struct stp_sstpi));
- if (rc == 0 && stp_info.tmd != 2)
- rc = -EAGAIN;
+ if (xchg(&first, 1) == 0) {
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&sync->cpus) != 0)
+ cpu_relax();
+ rc = 0;
+ if (stp_info.todoff || stp_info.tmd != 2) {
+ flags = vdso_update_begin();
+ rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
+ &clock_delta);
+ if (rc == 0) {
+ sync->clock_delta = clock_delta;
+ clock_sync_global(clock_delta);
+ rc = __store_stpinfo();
+ if (rc == 0 && stp_info.tmd != 2)
+ rc = -EAGAIN;
+ }
+ vdso_update_end(flags);
}
+ sync->in_sync = rc ? -EAGAIN : 1;
+ xchg(&first, 0);
+ } else {
+ /* Slave */
+ atomic_dec(&sync->cpus);
+ /* Wait for in_sync to be set. */
+ while (READ_ONCE(sync->in_sync) == 0)
+ ;
}
- if (rc) {
+ if (sync->in_sync != 1)
+ /* Didn't work. Clear per-cpu in sync bit again. */
disable_sync_clock(NULL);
- stp_sync->in_sync = -EAGAIN;
- } else
- stp_sync->in_sync = 1;
- xchg(&first, 0);
+ /* Apply clock delta to per-CPU fields of this CPU. */
+ clock_sync_local(sync->clock_delta);
+
return 0;
}
@@ -1567,160 +566,222 @@ static void stp_work_fn(struct work_struct *work)
int rc;
/* prevent multiple execution. */
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (!stp_online) {
- chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
- del_timer_sync(&stp_timer);
+ chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
+ timer_delete_sync(&stp_timer);
goto out_unlock;
}
- rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL);
if (rc)
goto out_unlock;
- rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+ rc = __store_stpinfo();
if (rc || stp_info.c == 0)
goto out_unlock;
/* Skip synchronization if the clock is already in sync. */
- if (check_sync_clock())
- goto out_unlock;
-
- memset(&stp_sync, 0, sizeof(stp_sync));
- get_online_cpus();
- atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
- stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
- put_online_cpus();
+ if (!check_sync_clock()) {
+ memset(&stp_sync, 0, sizeof(stp_sync));
+ cpus_read_lock();
+ atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+ cpus_read_unlock();
+ }
if (!check_sync_clock())
/*
- * There is a usable clock but the synchonization failed.
+ * There is a usable clock but the synchronization failed.
* Retry after a second.
*/
- mod_timer(&stp_timer, jiffies + HZ);
+ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
out_unlock:
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
}
/*
* STP subsys sysfs interface functions
*/
-static struct bus_type stp_subsys = {
+static const struct bus_type stp_subsys = {
.name = "stp",
.dev_name = "stp",
};
-static ssize_t stp_ctn_id_show(struct device *dev,
+static ssize_t ctn_id_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%016llx\n",
- *(unsigned long long *) stp_info.ctnid);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sysfs_emit(buf, "%016lx\n",
+ *(unsigned long *)stp_info.ctnid);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+static DEVICE_ATTR_RO(ctn_id);
-static ssize_t stp_ctn_type_show(struct device *dev,
+static ssize_t ctn_type_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", stp_info.ctn);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sysfs_emit(buf, "%i\n", stp_info.ctn);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+static DEVICE_ATTR_RO(ctn_type);
-static ssize_t stp_dst_offset_show(struct device *dev,
+static ssize_t dst_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x2000))
- return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x2000))
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.dsto);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+static DEVICE_ATTR_RO(dst_offset);
-static ssize_t stp_leap_seconds_show(struct device *dev,
+static ssize_t leap_seconds_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x8000))
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x8000))
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.leaps);
+ mutex_unlock(&stp_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR_RO(leap_seconds);
+
+static ssize_t leap_seconds_scheduled_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct stp_stzi stzi;
+ ssize_t ret;
+
+ mutex_lock(&stp_mutex);
+ if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) {
+ mutex_unlock(&stp_mutex);
return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ }
+
+ ret = chsc_stzi(stp_page, &stzi, sizeof(stzi));
+ mutex_unlock(&stp_mutex);
+ if (ret < 0)
+ return ret;
+
+ if (!stzi.lsoib.p)
+ return sysfs_emit(buf, "0,0\n");
+
+ return sysfs_emit(buf, "%lu,%d\n",
+ tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+ stzi.lsoib.nlso - stzi.lsoib.also);
}
-static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+static DEVICE_ATTR_RO(leap_seconds_scheduled);
-static ssize_t stp_stratum_show(struct device *dev,
+static ssize_t stratum_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.stratum);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+static DEVICE_ATTR_RO(stratum);
-static ssize_t stp_time_offset_show(struct device *dev,
+static ssize_t time_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x0800))
- return -ENODATA;
- return sprintf(buf, "%i\n", (int) stp_info.tto);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x0800))
+ ret = sysfs_emit(buf, "%i\n", (int)stp_info.tto);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+static DEVICE_ATTR_RO(time_offset);
-static ssize_t stp_time_zone_offset_show(struct device *dev,
+static ssize_t time_zone_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x4000))
- return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x4000))
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.tzo);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(time_zone_offset, 0400,
- stp_time_zone_offset_show, NULL);
+static DEVICE_ATTR_RO(time_zone_offset);
-static ssize_t stp_timing_mode_show(struct device *dev,
+static ssize_t timing_mode_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", stp_info.tmd);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sysfs_emit(buf, "%i\n", stp_info.tmd);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+static DEVICE_ATTR_RO(timing_mode);
-static ssize_t stp_timing_state_show(struct device *dev,
+static ssize_t timing_state_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", stp_info.tst);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sysfs_emit(buf, "%i\n", stp_info.tst);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
-static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+static DEVICE_ATTR_RO(timing_state);
-static ssize_t stp_online_show(struct device *dev,
+static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%i\n", stp_online);
+ return sysfs_emit(buf, "%i\n", stp_online);
}
-static ssize_t stp_online_store(struct device *dev,
+static ssize_t online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -1731,14 +792,14 @@ static ssize_t stp_online_store(struct device *dev,
return -EINVAL;
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
- mutex_lock(&clock_sync_mutex);
+ mutex_lock(&stp_mutex);
stp_online = value;
if (stp_online)
set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
else
clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
queue_work(time_sync_wq, &stp_work);
- mutex_unlock(&clock_sync_mutex);
+ mutex_unlock(&stp_mutex);
return count;
}
@@ -1746,46 +807,27 @@ static ssize_t stp_online_store(struct device *dev,
* Can't use DEVICE_ATTR because the attribute should be named
* stp/online but dev_attr_online already exists in this file ..
*/
-static struct device_attribute dev_attr_stp_online = {
- .attr = { .name = "online", .mode = 0600 },
- .show = stp_online_show,
- .store = stp_online_store,
-};
-
-static struct device_attribute *stp_attributes[] = {
- &dev_attr_ctn_id,
- &dev_attr_ctn_type,
- &dev_attr_dst_offset,
- &dev_attr_leap_seconds,
- &dev_attr_stp_online,
- &dev_attr_stratum,
- &dev_attr_time_offset,
- &dev_attr_time_zone_offset,
- &dev_attr_timing_mode,
- &dev_attr_timing_state,
+static DEVICE_ATTR_RW(online);
+
+static struct attribute *stp_dev_attrs[] = {
+ &dev_attr_ctn_id.attr,
+ &dev_attr_ctn_type.attr,
+ &dev_attr_dst_offset.attr,
+ &dev_attr_leap_seconds.attr,
+ &dev_attr_online.attr,
+ &dev_attr_leap_seconds_scheduled.attr,
+ &dev_attr_stratum.attr,
+ &dev_attr_time_offset.attr,
+ &dev_attr_time_zone_offset.attr,
+ &dev_attr_timing_mode.attr,
+ &dev_attr_timing_state.attr,
NULL
};
+ATTRIBUTE_GROUPS(stp_dev);
static int __init stp_init_sysfs(void)
{
- struct device_attribute **attr;
- int rc;
-
- rc = subsys_system_register(&stp_subsys, NULL);
- if (rc)
- goto out;
- for (attr = stp_attributes; *attr; attr++) {
- rc = device_create_file(stp_subsys.dev_root, *attr);
- if (rc)
- goto out_unreg;
- }
- return 0;
-out_unreg:
- for (; attr >= stp_attributes; attr--)
- device_remove_file(stp_subsys.dev_root, *attr);
- bus_unregister(&stp_subsys);
-out:
- return rc;
+ return subsys_system_register(&stp_subsys, stp_dev_groups);
}
device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 4b2e3e317004..1913a5566ac2 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -1,92 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2007, 2011
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#define KMSG_COMPONENT "cpu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpu: " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/init.h>
+#include <linux/sched/topology.h>
#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
+#include <asm/hiperdispatch.h>
#include <asm/sysinfo.h>
+#include <asm/asm.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
#define PTF_CHECK (2UL)
+enum {
+ TOPOLOGY_MODE_HW,
+ TOPOLOGY_MODE_SINGLE,
+ TOPOLOGY_MODE_PACKAGE,
+ TOPOLOGY_MODE_UNINITIALIZED
+};
+
struct mask_info {
struct mask_info *next;
unsigned char id;
cpumask_t mask;
};
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
+static int cpu_management;
-static int topology_enabled = 1;
static DECLARE_WORK(topology_work, topology_work_fn);
-/* topology_lock protects the socket and book linked lists */
-static DEFINE_SPINLOCK(topology_lock);
+/*
+ * Socket/Book linked lists and cpu_topology updates are
+ * protected by "sched_domains_mutex".
+ */
static struct mask_info socket_info;
static struct mask_info book_info;
+static struct mask_info drawer_info;
struct cpu_topology_s390 cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
-static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
+static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
{
- cpumask_t mask;
+ static cpumask_t mask;
+
+ cpumask_clear(&mask);
+ if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
+ goto out;
+ cpumask_set_cpu(cpu, &mask);
+ switch (topology_mode) {
+ case TOPOLOGY_MODE_HW:
+ while (info) {
+ if (cpumask_test_cpu(cpu, &info->mask)) {
+ cpumask_copy(&mask, &info->mask);
+ break;
+ }
+ info = info->next;
+ }
+ break;
+ case TOPOLOGY_MODE_PACKAGE:
+ cpumask_copy(&mask, cpu_present_mask);
+ break;
+ default:
+ fallthrough;
+ case TOPOLOGY_MODE_SINGLE:
+ break;
+ }
+ cpumask_and(&mask, &mask, &cpu_setup_mask);
+out:
+ cpumask_copy(dst, &mask);
+}
+
+static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
+{
+ static cpumask_t mask;
+ unsigned int max_cpu;
- cpumask_copy(&mask, cpumask_of(cpu));
- if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
- return mask;
- for (; info; info = info->next) {
- if (cpumask_test_cpu(cpu, &info->mask))
- return info->mask;
+ cpumask_clear(&mask);
+ if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
+ goto out;
+ cpumask_set_cpu(cpu, &mask);
+ if (topology_mode != TOPOLOGY_MODE_HW)
+ goto out;
+ cpu -= cpu % (smp_cpu_mtid + 1);
+ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+ for (; cpu <= max_cpu; cpu++) {
+ if (cpumask_test_cpu(cpu, &cpu_setup_mask))
+ cpumask_set_cpu(cpu, &mask);
}
- return mask;
+out:
+ cpumask_copy(dst, &mask);
}
-static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
- struct mask_info *book,
- struct mask_info *socket,
- int one_socket_per_cpu)
+#define TOPOLOGY_CORE_BITS 64
+
+static void add_cpus_to_mask(struct topology_core *tl_core,
+ struct mask_info *drawer,
+ struct mask_info *book,
+ struct mask_info *socket)
{
- unsigned int cpu;
+ struct cpu_topology_s390 *topo;
+ unsigned int core;
- for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) {
- unsigned int rcpu;
- int lcpu;
+ for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
+ unsigned int max_cpu, rcore;
+ int cpu;
- rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin;
- lcpu = smp_find_processor_id(rcpu);
- if (lcpu < 0)
+ rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+ cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+ if (cpu < 0)
continue;
- cpumask_set_cpu(lcpu, &book->mask);
- cpu_topology[lcpu].book_id = book->id;
- cpumask_set_cpu(lcpu, &socket->mask);
- cpu_topology[lcpu].core_id = rcpu;
- if (one_socket_per_cpu) {
- cpu_topology[lcpu].socket_id = rcpu;
- socket = socket->next;
- } else {
- cpu_topology[lcpu].socket_id = socket->id;
+ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+ for (; cpu <= max_cpu; cpu++) {
+ topo = &cpu_topology[cpu];
+ topo->drawer_id = drawer->id;
+ topo->book_id = book->id;
+ topo->socket_id = socket->id;
+ topo->core_id = rcore;
+ topo->thread_id = cpu;
+ topo->dedicated = tl_core->d;
+ cpumask_set_cpu(cpu, &drawer->mask);
+ cpumask_set_cpu(cpu, &book->mask);
+ cpumask_set_cpu(cpu, &socket->mask);
+ smp_cpu_set_polarization(cpu, tl_core->pp);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
}
- smp_cpu_set_polarization(lcpu, tl_cpu->pp);
}
- return socket;
}
static void clear_masks(void)
@@ -103,25 +166,36 @@ static void clear_masks(void)
cpumask_clear(&info->mask);
info = info->next;
}
+ info = &drawer_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
}
static union topology_entry *next_tle(union topology_entry *tle)
{
if (!tle->nl)
- return (union topology_entry *)((struct topology_cpu *)tle + 1);
+ return (union topology_entry *)((struct topology_core *)tle + 1);
return (union topology_entry *)((struct topology_container *)tle + 1);
}
-static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
+static void tl_to_masks(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
+ struct mask_info *drawer = &drawer_info;
union topology_entry *tle, *end;
+ clear_masks();
tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
+ case 3:
+ drawer = drawer->next;
+ drawer->id = tle->container.id;
+ break;
case 2:
book = book->next;
book->id = tle->container.id;
@@ -131,32 +205,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
socket->id = tle->container.id;
break;
case 0:
- add_cpus_to_mask(&tle->cpu, book, socket, 0);
- break;
- default:
- clear_masks();
- return;
- }
- tle = next_tle(tle);
- }
-}
-
-static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
-{
- struct mask_info *socket = &socket_info;
- struct mask_info *book = &book_info;
- union topology_entry *tle, *end;
-
- tle = info->tle;
- end = (union topology_entry *)((unsigned long)info + info->length);
- while (tle < end) {
- switch (tle->nl) {
- case 1:
- book = book->next;
- book->id = tle->container.id;
- break;
- case 0:
- socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+ add_cpus_to_mask(&tle->cpu, drawer, book, socket);
break;
default:
clear_masks();
@@ -166,52 +215,32 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
}
}
-static void tl_to_masks(struct sysinfo_15_1_x *info)
-{
- struct cpuid cpu_id;
-
- spin_lock_irq(&topology_lock);
- get_cpu_id(&cpu_id);
- clear_masks();
- switch (cpu_id.machine) {
- case 0x2097:
- case 0x2098:
- __tl_to_masks_z10(info);
- break;
- default:
- __tl_to_masks_generic(info);
- }
- spin_unlock_irq(&topology_lock);
-}
-
static void topology_update_polarization_simple(void)
{
int cpu;
- mutex_lock(&smp_cpu_state_mutex);
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
- mutex_unlock(&smp_cpu_state_mutex);
}
static int ptf(unsigned long fc)
{
- int rc;
+ int cc;
asm volatile(
- " .insn rre,0xb9a20000,%1,%1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc)
- : "d" (fc) : "cc");
- return rc;
+ " .insn rre,0xb9a20000,%[fc],%[fc]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [fc] "d" (fc)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
int topology_set_cpu_management(int fc)
{
int cpu, rc;
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
@@ -224,51 +253,91 @@ int topology_set_cpu_management(int fc)
return rc;
}
-static void update_cpu_masks(void)
+void update_cpu_masks(void)
{
- unsigned long flags;
- int cpu;
+ struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
+ int cpu, sibling, pkg_first, smt_first, id;
- spin_lock_irqsave(&topology_lock, flags);
for_each_possible_cpu(cpu) {
- cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu);
- cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu);
- if (!MACHINE_HAS_TOPOLOGY) {
- cpu_topology[cpu].core_id = cpu;
- cpu_topology[cpu].socket_id = cpu;
- cpu_topology[cpu].book_id = cpu;
+ topo = &cpu_topology[cpu];
+ cpu_thread_map(&topo->thread_mask, cpu);
+ cpu_group_map(&topo->core_mask, &socket_info, cpu);
+ cpu_group_map(&topo->book_mask, &book_info, cpu);
+ cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
+ topo->booted_cores = 0;
+ if (topology_mode != TOPOLOGY_MODE_HW) {
+ id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
+ topo->thread_id = cpu;
+ topo->core_id = cpu;
+ topo->socket_id = id;
+ topo->book_id = id;
+ topo->drawer_id = id;
+ }
+ }
+ hd_reset_state();
+ for_each_online_cpu(cpu) {
+ topo = &cpu_topology[cpu];
+ pkg_first = cpumask_first(&topo->core_mask);
+ topo_package = &cpu_topology[pkg_first];
+ if (cpu == pkg_first) {
+ for_each_cpu(sibling, &topo->core_mask) {
+ topo_sibling = &cpu_topology[sibling];
+ smt_first = cpumask_first(&topo_sibling->thread_mask);
+ if (sibling == smt_first) {
+ topo_package->booted_cores++;
+ hd_add_core(sibling);
+ }
+ }
+ } else {
+ topo->booted_cores = topo_package->booted_cores;
}
}
- spin_unlock_irqrestore(&topology_lock, flags);
}
void store_topology(struct sysinfo_15_1_x *info)
{
- if (topology_max_mnest >= 3)
- stsi(info, 15, 1, 3);
+ stsi(info, 15, 1, topology_mnest_limit());
+}
+
+static void __arch_update_dedicated_flag(void *arg)
+{
+ if (topology_cpu_dedicated(smp_processor_id()))
+ set_cpu_flag(CIF_DEDICATED_CPU);
else
- stsi(info, 15, 1, 2);
+ clear_cpu_flag(CIF_DEDICATED_CPU);
}
-int arch_update_cpu_topology(void)
+static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
- struct device *dev;
- int cpu;
+ int rc, hd_status;
- if (!MACHINE_HAS_TOPOLOGY) {
- update_cpu_masks();
- topology_update_polarization_simple();
- return 0;
+ hd_status = 0;
+ rc = 0;
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_has_topology()) {
+ rc = 1;
+ store_topology(info);
+ tl_to_masks(info);
}
- store_topology(info);
- tl_to_masks(info);
update_cpu_masks();
- for_each_online_cpu(cpu) {
- dev = get_cpu_device(cpu);
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- }
- return 1;
+ if (!cpu_has_topology())
+ topology_update_polarization_simple();
+ if (cpu_management == 1)
+ hd_status = hd_enable_hiperdispatch();
+ mutex_unlock(&smp_cpu_state_mutex);
+ if (hd_status == 0)
+ hd_disable_hiperdispatch();
+ return rc;
+}
+
+int arch_update_cpu_topology(void)
+{
+ int rc;
+
+ rc = __arch_update_cpu_topology();
+ on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
+ return rc;
}
static void topology_work_fn(struct work_struct *work)
@@ -281,29 +350,33 @@ void topology_schedule_update(void)
schedule_work(&topology_work);
}
-static void topology_timer_fn(unsigned long ignored)
+static void topology_flush_work(void)
+{
+ flush_work(&topology_work);
+}
+
+static void topology_timer_fn(struct timer_list *unused)
{
if (ptf(PTF_CHECK))
topology_schedule_update();
set_topology_timer();
}
-static struct timer_list topology_timer =
- TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0);
+static struct timer_list topology_timer;
static atomic_t topology_poll = ATOMIC_INIT(0);
static void set_topology_timer(void)
{
if (atomic_add_unless(&topology_poll, -1, 0))
- mod_timer(&topology_timer, jiffies + HZ / 10);
+ mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
else
- mod_timer(&topology_timer, jiffies + HZ * 60);
+ mod_timer(&topology_timer, jiffies + secs_to_jiffies(60));
}
void topology_expect_change(void)
{
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
/* This is racy, but it doesn't matter since it is just a heuristic.
* Worst case is that we poll in a higher frequency for a bit longer.
@@ -314,50 +387,25 @@ void topology_expect_change(void)
set_topology_timer();
}
-static int __init early_parse_topology(char *p)
+static int set_polarization(int polarization)
{
- if (strncmp(p, "off", 3))
- return 0;
- topology_enabled = 0;
- return 0;
-}
-early_param("topology", early_parse_topology);
-
-static void __init alloc_masks(struct sysinfo_15_1_x *info,
- struct mask_info *mask, int offset)
-{
- int i, nr_masks;
+ int rc = 0;
- nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
- for (i = 0; i < info->mnest - offset; i++)
- nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
- nr_masks = max(nr_masks, 1);
- for (i = 0; i < nr_masks; i++) {
- mask->next = alloc_bootmem(sizeof(struct mask_info));
- mask = mask->next;
- }
-}
-
-void __init s390_init_cpu_topology(void)
-{
- struct sysinfo_15_1_x *info;
- int i;
-
- if (!MACHINE_HAS_TOPOLOGY)
- return;
- tl_info = alloc_bootmem_pages(PAGE_SIZE);
- info = tl_info;
- store_topology(info);
- pr_info("The CPU configuration topology of the machine is:");
- for (i = 0; i < TOPOLOGY_NR_MAG; i++)
- printk(KERN_CONT " %d", info->mag[i]);
- printk(KERN_CONT " / %d\n", info->mnest);
- alloc_masks(info, &socket_info, 1);
- alloc_masks(info, &book_info, 2);
+ cpus_read_lock();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == polarization)
+ goto out;
+ rc = topology_set_cpu_management(polarization);
+ if (rc)
+ goto out;
+ cpu_management = polarization;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ cpus_read_unlock();
+ return rc;
}
-static int cpu_management;
-
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -365,7 +413,7 @@ static ssize_t dispatching_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", cpu_management);
+ count = sysfs_emit(buf, "%d\n", cpu_management);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -382,23 +430,10 @@ static ssize_t dispatching_store(struct device *dev,
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- rc = 0;
- get_online_cpus();
- mutex_lock(&smp_cpu_state_mutex);
- if (cpu_management == val)
- goto out;
- rc = topology_set_cpu_management(val);
- if (rc)
- goto out;
- cpu_management = val;
- topology_expect_change();
-out:
- mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
+ rc = set_polarization(val);
return rc ? rc : count;
}
-static DEVICE_ATTR(dispatching, 0644, dispatching_show,
- dispatching_store);
+static DEVICE_ATTR_RW(dispatching);
static ssize_t cpu_polarization_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -409,19 +444,19 @@ static ssize_t cpu_polarization_show(struct device *dev,
mutex_lock(&smp_cpu_state_mutex);
switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
- count = sprintf(buf, "horizontal\n");
+ count = sysfs_emit(buf, "horizontal\n");
break;
case POLARIZATION_VL:
- count = sprintf(buf, "vertical:low\n");
+ count = sysfs_emit(buf, "vertical:low\n");
break;
case POLARIZATION_VM:
- count = sprintf(buf, "vertical:medium\n");
+ count = sysfs_emit(buf, "vertical:medium\n");
break;
case POLARIZATION_VH:
- count = sprintf(buf, "vertical:high\n");
+ count = sysfs_emit(buf, "vertical:high\n");
break;
default:
- count = sprintf(buf, "unknown\n");
+ count = sysfs_emit(buf, "unknown\n");
break;
}
mutex_unlock(&smp_cpu_state_mutex);
@@ -438,20 +473,226 @@ static struct attribute_group topology_cpu_attr_group = {
.attrs = topology_cpu_attrs,
};
+static ssize_t cpu_dedicated_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int cpu = dev->id;
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu));
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
+
+static struct attribute *topology_extra_cpu_attrs[] = {
+ &dev_attr_dedicated.attr,
+ NULL,
+};
+
+static struct attribute_group topology_extra_cpu_attr_group = {
+ .attrs = topology_extra_cpu_attrs,
+};
+
int topology_cpu_init(struct cpu *cpu)
{
- return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+ int rc;
+
+ rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+ if (rc || !cpu_has_topology())
+ return rc;
+ rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
+ if (rc)
+ sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+ return rc;
+}
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_mask;
+}
+
+static const struct cpumask *tl_book_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+ return &cpu_topology[cpu].book_mask;
+}
+
+static const struct cpumask *tl_drawer_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+ return &cpu_topology[cpu].drawer_mask;
+}
+
+static struct sched_domain_topology_level s390_topology[] = {
+ SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT),
+ SDTL_INIT(tl_mc_mask, cpu_core_flags, MC),
+ SDTL_INIT(tl_book_mask, NULL, BOOK),
+ SDTL_INIT(tl_drawer_mask, NULL, DRAWER),
+ SDTL_INIT(tl_pkg_mask, NULL, PKG),
+ { NULL, },
+};
+
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+ struct mask_info *mask, int offset)
+{
+ int i, nr_masks;
+
+ nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+ for (i = 0; i < info->mnest - offset; i++)
+ nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+ nr_masks = max(nr_masks, 1);
+ for (i = 0; i < nr_masks; i++) {
+ mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8);
+ mask = mask->next;
+ }
+}
+
+static int __init detect_polarization(union topology_entry *tle)
+{
+ struct topology_core *tl_core;
+
+ while (tle->nl)
+ tle = next_tle(tle);
+ tl_core = (struct topology_core *)tle;
+ return tl_core->pp != POLARIZATION_HRZ;
+}
+
+void __init topology_init_early(void)
+{
+ struct sysinfo_15_1_x *info;
+
+ set_sched_topology(s390_topology);
+ if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+ if (cpu_has_topology())
+ topology_mode = TOPOLOGY_MODE_HW;
+ else
+ topology_mode = TOPOLOGY_MODE_SINGLE;
+ }
+ if (!cpu_has_topology())
+ goto out;
+ tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
+ info = tl_info;
+ store_topology(info);
+ cpu_management = detect_polarization(info->tle);
+ pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
+ info->mag[0], info->mag[1], info->mag[2], info->mag[3],
+ info->mag[4], info->mag[5], info->mnest);
+ alloc_masks(info, &socket_info, 1);
+ alloc_masks(info, &book_info, 2);
+ alloc_masks(info, &drawer_info, 3);
+out:
+ cpumask_set_cpu(0, &cpu_setup_mask);
+ __arch_update_cpu_topology();
+ __arch_update_dedicated_flag(NULL);
+}
+
+static inline int topology_get_mode(int enabled)
+{
+ if (!enabled)
+ return TOPOLOGY_MODE_SINGLE;
+ return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
}
+static inline int topology_is_enabled(void)
+{
+ return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
+static int __init topology_setup(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ topology_mode = topology_get_mode(enabled);
+ return 0;
+}
+early_param("topology", topology_setup);
+
+static int topology_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int enabled = topology_is_enabled();
+ int new_mode;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &enabled,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ new_mode = topology_get_mode(enabled);
+ if (topology_mode != new_mode) {
+ topology_mode = new_mode;
+ topology_schedule_update();
+ }
+ mutex_unlock(&smp_cpu_state_mutex);
+ topology_flush_work();
+
+ return rc;
+}
+
+static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int polarization;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &polarization,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ polarization = cpu_management;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ return set_polarization(polarization);
+}
+
+static const struct ctl_table topology_ctl_table[] = {
+ {
+ .procname = "topology",
+ .mode = 0644,
+ .proc_handler = topology_ctl_handler,
+ },
+ {
+ .procname = "polarization",
+ .mode = 0644,
+ .proc_handler = polarization_ctl_handler,
+ },
+};
+
static int __init topology_init(void)
{
- if (!MACHINE_HAS_TOPOLOGY) {
+ struct device *dev_root;
+ int rc = 0;
+
+ timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
+ if (cpu_has_topology())
+ set_topology_timer();
+ else
topology_update_polarization_simple();
- goto out;
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL))
+ set_polarization(1);
+ register_sysctl("s390", topology_ctl_table);
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ rc = device_create_file(dev_root, &dev_attr_dispatching);
+ put_device(dev_root);
}
- set_topology_timer();
-out:
- update_cpu_masks();
- return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
+ return rc;
}
device_initcall(topology_init);
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
new file mode 100644
index 000000000000..11a669f3cc93
--- /dev/null
+++ b/arch/s390/kernel/trace.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tracepoint definitions for s390
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/percpu.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/diag.h>
+
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
+
+static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
+
+void notrace trace_s390_diagnose_norecursion(int diag_nr)
+{
+ unsigned long flags;
+ unsigned int *depth;
+
+ /* Avoid lockdep recursion. */
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+ local_irq_save(flags);
+ depth = this_cpu_ptr(&diagnose_trace_depth);
+ if (*depth == 0) {
+ (*depth)++;
+ trace_s390_diagnose(diag_nr);
+ (*depth)--;
+ }
+ local_irq_restore(flags);
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index c5762324d9ee..19687dab32f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -1,161 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* S390 version
* Copyright IBM Corp. 1999, 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*
* Derived from "arch/i386/kernel/traps.c"
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'asm.s'.
- */
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
-#include <linux/module.h>
+#include <linux/randomize_kstack.h>
+#include <linux/extable.h>
#include <linux/ptrace.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/cpu.h>
+#include <linux/entry-common.h>
+#include <linux/kmsan.h>
+#include <asm/asm-extable.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
+#include <asm/vtime.h>
+#include <asm/fpu.h>
+#include <asm/fault.h>
#include "entry.h"
-int show_unhandled_signals = 1;
-
static inline void __user *get_trap_ip(struct pt_regs *regs)
{
-#ifdef CONFIG_64BIT
unsigned long address;
if (regs->int_code & 0x200)
- address = *(unsigned long *)(current->thread.trap_tdb + 24);
+ address = current->thread.trap_tdb.data[3];
else
address = regs->psw.addr;
- return (void __user *)
- ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
-#else
- return (void __user *)
- ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN);
-#endif
-}
-
-static inline void report_user_fault(struct pt_regs *regs, int signr)
-{
- if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
- return;
- if (!unhandled_signal(current, signr))
- return;
- if (!printk_ratelimit())
- return;
- printk("User process fault: interruption code 0x%X ", regs->int_code);
- print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
- printk("\n");
- show_regs(regs);
+ return (void __user *)(address - (regs->int_code >> 16));
}
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
return 1;
}
+#endif
-static void __kprobes do_trap(struct pt_regs *regs,
- int si_signo, int si_code, char *str)
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
- siginfo_t info;
-
- if (notify_die(DIE_TRAP, str, regs, 0,
- regs->int_code, si_signo) == NOTIFY_STOP)
- return;
-
if (user_mode(regs)) {
- info.si_signo = si_signo;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = get_trap_ip(regs);
- force_sig_info(si_signo, &info, current);
- report_user_fault(regs, si_signo);
- } else {
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
- if (fixup)
- regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
- else {
- enum bug_trap_type btt;
-
- btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
- if (btt == BUG_TRAP_TYPE_WARN)
- return;
+ force_sig_fault(si_signo, si_code, get_trap_ip(regs));
+ report_user_fault(regs, si_signo, 0);
+ } else {
+ if (!fixup_exception(regs))
die(regs, str);
- }
- }
+ }
}
-void __kprobes do_per_trap(struct pt_regs *regs)
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
- siginfo_t info;
+ if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP)
+ return;
+ do_report_trap(regs, si_signo, si_code, str);
+}
+NOKPROBE_SYMBOL(do_trap);
+void do_per_trap(struct pt_regs *regs)
+{
if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
return;
if (!current->ptrace)
return;
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr =
- (void __force __user *) current->thread.per_event.address;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address);
}
+NOKPROBE_SYMBOL(do_per_trap);
-void default_trap_handler(struct pt_regs *regs)
+static void default_trap_handler(struct pt_regs *regs)
{
if (user_mode(regs)) {
- report_user_fault(regs, SIGSEGV);
- do_exit(SIGSEGV);
+ report_user_fault(regs, SIGSEGV, 0);
+ force_exit_sig(SIGSEGV);
} else
die(regs, "Unknown program exception");
}
#define DO_ERROR_INFO(name, signr, sicode, str) \
-void name(struct pt_regs *regs) \
+static void name(struct pt_regs *regs) \
{ \
do_trap(regs, signr, sicode, str); \
}
-DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
- "addressing exception")
-DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
- "execute exception")
-DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
- "fixpoint divide exception")
-DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
- "fixpoint overflow exception")
-DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
- "HFP overflow exception")
-DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
- "HFP underflow exception")
-DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
- "HFP significance exception")
-DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
- "HFP divide exception")
-DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
- "HFP square root exception")
-DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
- "operand exception")
-DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
- "privileged operation")
-DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
- "special operation exception")
-DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN,
- "translation exception")
-
-#ifdef CONFIG_64BIT
-DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
- "transaction constraint exception")
-#endif
-
-static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception")
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception");
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception")
+
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
{
int si_code = 0;
+
/* FPC[2] is Data Exception Code */
if ((fpc & 0x00000300) == 0) {
/* bits 6 and 7 of DXC are 0 iff IEEE exception */
@@ -173,201 +129,89 @@ static inline void do_fp_trap(struct pt_regs *regs, int fpc)
do_trap(regs, SIGFPE, si_code, "floating point exception");
}
-void __kprobes illegal_op(struct pt_regs *regs)
+static void translation_specification_exception(struct pt_regs *regs)
{
- siginfo_t info;
- __u8 opcode[6];
- __u16 __user *location;
+ /* May never happen. */
+ panic("Translation-Specification Exception");
+}
+
+static void illegal_op(struct pt_regs *regs)
+{
+ int is_uprobe_insn = 0;
+ u16 __user *location;
int signal = 0;
+ u16 opcode;
location = get_trap_ip(regs);
-
if (user_mode(regs)) {
- if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+ if (get_user(opcode, location))
return;
- if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
- if (current->ptrace) {
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = location;
- force_sig_info(SIGTRAP, &info, current);
- } else
+ if (opcode == S390_BREAKPOINT_U16) {
+ if (current->ptrace)
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
+ else
signal = SIGILL;
-#ifdef CONFIG_MATHEMU
- } else if (opcode[0] == 0xb3) {
- if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
- signal = math_emu_b3(opcode, regs);
- } else if (opcode[0] == 0xed) {
- if (get_user(*((__u32 *) (opcode+2)),
- (__u32 __user *)(location+1)))
- return;
- signal = math_emu_ed(opcode, regs);
- } else if (*((__u16 *) opcode) == 0xb299) {
- if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
- signal = math_emu_srnm(opcode, regs);
- } else if (*((__u16 *) opcode) == 0xb29c) {
- if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
- signal = math_emu_stfpc(opcode, regs);
- } else if (*((__u16 *) opcode) == 0xb29d) {
- if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
- signal = math_emu_lfpc(opcode, regs);
+#ifdef CONFIG_UPROBES
+ } else if (opcode == UPROBE_SWBP_INSN) {
+ is_uprobe_insn = 1;
#endif
- } else
+ } else {
signal = SIGILL;
- } else {
- /*
- * If we get an illegal op in kernel mode, send it through the
- * kprobes notifier. If kprobes doesn't pick it up, SIGILL
- */
- if (notify_die(DIE_BPT, "bpt", regs, 0,
- 3, SIGTRAP) != NOTIFY_STOP)
+ }
+ }
+ /*
+ * This is either an illegal op in kernel mode, or user space trapped
+ * on a uprobes illegal instruction. See if kprobes or uprobes picks
+ * it up. If not, SIGILL.
+ */
+ if (is_uprobe_insn || !user_mode(regs)) {
+ if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP)
signal = SIGILL;
}
-
-#ifdef CONFIG_MATHEMU
- if (signal == SIGFPE)
- do_fp_trap(regs, current->thread.fp_regs.fpc);
- else if (signal == SIGSEGV)
- do_trap(regs, signal, SEGV_MAPERR, "user address fault");
- else
-#endif
if (signal)
do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
}
+NOKPROBE_SYMBOL(illegal_op);
-
-#ifdef CONFIG_MATHEMU
-void specification_exception(struct pt_regs *regs)
+static void vector_exception(struct pt_regs *regs)
{
- __u8 opcode[6];
- __u16 __user *location = NULL;
- int signal = 0;
-
- location = (__u16 __user *) get_trap_ip(regs);
-
- if (user_mode(regs)) {
- get_user(*((__u16 *) opcode), location);
- switch (opcode[0]) {
- case 0x28: /* LDR Rx,Ry */
- signal = math_emu_ldr(opcode);
- break;
- case 0x38: /* LER Rx,Ry */
- signal = math_emu_ler(opcode);
- break;
- case 0x60: /* STD R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_std(opcode, regs);
- break;
- case 0x68: /* LD R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_ld(opcode, regs);
- break;
- case 0x70: /* STE R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_ste(opcode, regs);
- break;
- case 0x78: /* LE R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_le(opcode, regs);
- break;
- default:
- signal = SIGILL;
- break;
- }
- } else
- signal = SIGILL;
-
- if (signal == SIGFPE)
- do_fp_trap(regs, current->thread.fp_regs.fpc);
- else if (signal)
- do_trap(regs, signal, ILL_ILLOPN, "specification exception");
+ int si_code, vic;
+
+ /* get vector interrupt code from fpc */
+ save_user_fpu_regs();
+ vic = (current->thread.ufpu.fpc & 0xf00) >> 8;
+ switch (vic) {
+ case 1: /* invalid vector operation */
+ si_code = FPE_FLTINV;
+ break;
+ case 2: /* division by zero */
+ si_code = FPE_FLTDIV;
+ break;
+ case 3: /* overflow */
+ si_code = FPE_FLTOVF;
+ break;
+ case 4: /* underflow */
+ si_code = FPE_FLTUND;
+ break;
+ case 5: /* inexact */
+ si_code = FPE_FLTRES;
+ break;
+ default: /* unknown cause */
+ si_code = 0;
+ }
+ do_trap(regs, SIGFPE, si_code, "vector exception");
}
-#else
-DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
- "specification exception");
-#endif
-void data_exception(struct pt_regs *regs)
+static void data_exception(struct pt_regs *regs)
{
- __u16 __user *location;
- int signal = 0;
-
- location = get_trap_ip(regs);
-
- if (MACHINE_HAS_IEEE)
- asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
-
-#ifdef CONFIG_MATHEMU
- else if (user_mode(regs)) {
- __u8 opcode[6];
- get_user(*((__u16 *) opcode), location);
- switch (opcode[0]) {
- case 0x28: /* LDR Rx,Ry */
- signal = math_emu_ldr(opcode);
- break;
- case 0x38: /* LER Rx,Ry */
- signal = math_emu_ler(opcode);
- break;
- case 0x60: /* STD R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_std(opcode, regs);
- break;
- case 0x68: /* LD R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_ld(opcode, regs);
- break;
- case 0x70: /* STE R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_ste(opcode, regs);
- break;
- case 0x78: /* LE R,D(X,B) */
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_le(opcode, regs);
- break;
- case 0xb3:
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_b3(opcode, regs);
- break;
- case 0xed:
- get_user(*((__u32 *) (opcode+2)),
- (__u32 __user *)(location+1));
- signal = math_emu_ed(opcode, regs);
- break;
- case 0xb2:
- if (opcode[1] == 0x99) {
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_srnm(opcode, regs);
- } else if (opcode[1] == 0x9c) {
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_stfpc(opcode, regs);
- } else if (opcode[1] == 0x9d) {
- get_user(*((__u16 *) (opcode+2)), location+1);
- signal = math_emu_lfpc(opcode, regs);
- } else
- signal = SIGILL;
- break;
- default:
- signal = SIGILL;
- break;
- }
- }
-#endif
- if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
- signal = SIGFPE;
+ save_user_fpu_regs();
+ if (current->thread.ufpu.fpc & FPC_DXC_MASK)
+ do_fp_trap(regs, current->thread.ufpu.fpc);
else
- signal = SIGILL;
- if (signal == SIGFPE)
- do_fp_trap(regs, current->thread.fp_regs.fpc);
- else if (signal)
- do_trap(regs, signal, ILL_ILLOPN, "data exception");
+ do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
}
-void space_switch_exception(struct pt_regs *regs)
+static void space_switch_exception(struct pt_regs *regs)
{
/* Set user psw back to home space mode. */
if (user_mode(regs))
@@ -376,16 +220,186 @@ void space_switch_exception(struct pt_regs *regs)
do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
}
-void __kprobes kernel_stack_overflow(struct pt_regs * regs)
+static void monitor_event_exception(struct pt_regs *regs)
{
+ if (user_mode(regs))
+ return;
+ switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
+ case BUG_TRAP_TYPE_NONE:
+ fixup_exception(regs);
+ break;
+ case BUG_TRAP_TYPE_WARN:
+ break;
+ case BUG_TRAP_TYPE_BUG:
+ die(regs, "monitor event");
+ break;
+ }
+}
+
+void kernel_stack_invalid(struct pt_regs *regs)
+{
+ /*
+ * Normally regs are unpoisoned by the generic entry code, but
+ * kernel_stack_overflow() is a rare case that is called bypassing it.
+ */
+ kmsan_unpoison_entry_regs(regs);
bust_spinlocks(1);
- printk("Kernel stack overflow.\n");
+ pr_emerg("Kernel stack pointer invalid\n");
show_regs(regs);
bust_spinlocks(0);
- panic("Corrupt kernel stack, can't continue.");
+ panic("Invalid kernel stack pointer, cannot continue");
+}
+NOKPROBE_SYMBOL(kernel_stack_invalid);
+
+static void __init test_monitor_call(void)
+{
+ int val = 1;
+
+ if (!IS_ENABLED(CONFIG_BUG))
+ return;
+ asm_inline volatile(
+ " mc 0,0\n"
+ "0: lhi %[val],0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [val] "+d" (val));
+ if (!val)
+ panic("Monitor call doesn't work!\n");
}
void __init trap_init(void)
{
+ struct lowcore *lc = get_lowcore();
+ unsigned long flags;
+ struct ctlreg cr0;
+
+ local_irq_save(flags);
+ cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
+ psw_bits(lc->external_new_psw).mcheck = 1;
+ psw_bits(lc->program_new_psw).mcheck = 1;
+ psw_bits(lc->svc_new_psw).mcheck = 1;
+ psw_bits(lc->io_new_psw).mcheck = 1;
+ local_ctl_load(0, &cr0);
+ local_irq_restore(flags);
local_mcck_enable();
+ test_monitor_call();
+}
+
+static void (*pgm_check_table[128])(struct pt_regs *regs);
+
+void noinstr __do_pgm_check(struct pt_regs *regs)
+{
+ struct lowcore *lc = get_lowcore();
+ irqentry_state_t state;
+ unsigned int trapnr;
+ union teid teid;
+
+ teid.val = lc->trans_exc_code;
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = teid.val;
+ /*
+ * In case of a guest fault, short-circuit the fault handler and return.
+ * This way the sie64a() function will return 0; fault address and
+ * other relevant bits are saved in current->thread.gmap_teid, and
+ * the fault number in current->thread.gmap_int_code. KVM will be
+ * able to use this information to handle the fault.
+ */
+ if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) {
+ current->thread.gmap_teid.val = regs->int_parm_long;
+ current->thread.gmap_int_code = regs->int_code & 0xffff;
+ return;
+ }
+ state = irqentry_enter(regs);
+ if (user_mode(regs)) {
+ update_timer_sys();
+ if (!cpu_has_bear()) {
+ if (regs->last_break < 4096)
+ regs->last_break = 1;
+ }
+ current->thread.last_break = regs->last_break;
+ }
+ if (lc->pgm_code & 0x0200) {
+ /* transaction abort */
+ current->thread.trap_tdb = lc->pgm_tdb;
+ }
+ if (lc->pgm_code & PGM_INT_CODE_PER) {
+ if (user_mode(regs)) {
+ struct per_event *ev = &current->thread.per_event;
+
+ set_thread_flag(TIF_PER_TRAP);
+ ev->address = lc->per_address;
+ ev->cause = lc->per_code_combined;
+ ev->paid = lc->per_access_id;
+ } else {
+ /* PER event in kernel is kprobes */
+ __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+ do_per_trap(regs);
+ goto out;
+ }
+ }
+ if (!irqs_disabled_flags(regs->psw.mask))
+ trace_hardirqs_on();
+ __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+ trapnr = regs->int_code & PGM_INT_CODE_MASK;
+ if (trapnr)
+ pgm_check_table[trapnr](regs);
+out:
+ local_irq_disable();
+ irqentry_exit(regs, state);
}
+
+/*
+ * The program check table contains exactly 128 (0x00-0x7f) entries. Each
+ * line defines the function to be called corresponding to the program check
+ * interruption code.
+ */
+static void (*pgm_check_table[128])(struct pt_regs *regs) = {
+ [0x00] = default_trap_handler,
+ [0x01] = illegal_op,
+ [0x02] = privileged_op,
+ [0x03] = execute_exception,
+ [0x04] = do_protection_exception,
+ [0x05] = addressing_exception,
+ [0x06] = specification_exception,
+ [0x07] = data_exception,
+ [0x08] = overflow_exception,
+ [0x09] = divide_exception,
+ [0x0a] = overflow_exception,
+ [0x0b] = divide_exception,
+ [0x0c] = hfp_overflow_exception,
+ [0x0d] = hfp_underflow_exception,
+ [0x0e] = hfp_significance_exception,
+ [0x0f] = hfp_divide_exception,
+ [0x10] = do_dat_exception,
+ [0x11] = do_dat_exception,
+ [0x12] = translation_specification_exception,
+ [0x13] = special_op_exception,
+ [0x14] = default_trap_handler,
+ [0x15] = operand_exception,
+ [0x16] = default_trap_handler,
+ [0x17] = default_trap_handler,
+ [0x18] = transaction_exception,
+ [0x19] = default_trap_handler,
+ [0x1a] = default_trap_handler,
+ [0x1b] = vector_exception,
+ [0x1c] = space_switch_exception,
+ [0x1d] = hfp_sqrt_exception,
+ [0x1e ... 0x37] = default_trap_handler,
+ [0x38] = do_dat_exception,
+ [0x39] = do_dat_exception,
+ [0x3a] = do_dat_exception,
+ [0x3b] = do_dat_exception,
+ [0x3c] = default_trap_handler,
+ [0x3d] = do_secure_storage_access,
+ [0x3e] = default_trap_handler,
+ [0x3f] = default_trap_handler,
+ [0x40] = monitor_event_exception,
+ [0x41 ... 0x7f] = default_trap_handler,
+};
+
+#define COND_TRAP(x) asm( \
+ ".weak " __stringify(x) "\n\t" \
+ ".set " __stringify(x) "," \
+ __stringify(default_trap_handler))
+
+COND_TRAP(do_secure_storage_access);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
new file mode 100644
index 000000000000..0f88caca4eaf
--- /dev/null
+++ b/arch/s390/kernel/unwind_bc.c
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/interrupt.h>
+#include <asm/sections.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return 0;
+ return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool outside_of_stack(struct unwind_state *state, unsigned long sp)
+{
+ return (sp <= state->sp) ||
+ (sp > state->stack_info.end - sizeof(struct stack_frame));
+}
+
+static bool update_stack_info(struct unwind_state *state, unsigned long sp)
+{
+ struct stack_info *info = &state->stack_info;
+ unsigned long *mask = &state->stack_mask;
+
+ /* New stack pointer leaves the current stack */
+ if (get_stack_info(sp, state->task, info, mask) != 0 ||
+ !on_stack(info, sp, sizeof(struct stack_frame)))
+ /* 'sp' does not point to a valid stack */
+ return false;
+ return true;
+}
+
+static inline bool is_final_pt_regs(struct unwind_state *state,
+ struct pt_regs *regs)
+{
+ /* user mode or kernel thread pt_regs at the bottom of task stack */
+ if (task_pt_regs(state->task) == regs)
+ return true;
+
+ /* user mode pt_regs at the bottom of irq stack */
+ return state->stack_info.type == STACK_TYPE_IRQ &&
+ state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+ READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
+}
+
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
+bool unwind_next_frame(struct unwind_state *state)
+{
+ struct stack_info *info = &state->stack_info;
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long sp, ip;
+ bool reliable;
+
+ regs = state->regs;
+ if (unlikely(regs)) {
+ sp = state->sp;
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+ reliable = false;
+ regs = NULL;
+ /* skip bogus %r14 or if is the same as regs->psw.addr */
+ if (!__kernel_text_address(ip) || state->ip == unwind_recover_ret_addr(state, ip)) {
+ state->regs = NULL;
+ return unwind_next_frame(state);
+ }
+ } else {
+ sf = (struct stack_frame *) state->sp;
+ sp = READ_ONCE_NOCHECK(sf->back_chain);
+ if (likely(sp)) {
+ /* Non-zero back-chain points to the previous frame */
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+ reliable = true;
+ } else {
+ /* No back-chain, look for a pt_regs structure */
+ sp = state->sp + STACK_FRAME_OVERHEAD;
+ if (!on_stack(info, sp, sizeof(struct pt_regs)))
+ goto out_err;
+ regs = (struct pt_regs *) sp;
+ if (is_final_pt_regs(state, regs))
+ goto out_stop;
+ ip = READ_ONCE_NOCHECK(regs->psw.addr);
+ sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
+ reliable = true;
+ }
+ }
+
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (sp & 0x7)
+ goto out_err;
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->regs = regs;
+ state->reliable = reliable;
+ state->ip = unwind_recover_ret_addr(state, ip);
+ return true;
+
+out_err:
+ state->error = true;
+out_stop:
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long first_frame)
+{
+ struct stack_info *info = &state->stack_info;
+ struct stack_frame *sf;
+ unsigned long ip, sp;
+
+ memset(state, 0, sizeof(*state));
+ state->task = task;
+ state->regs = regs;
+
+ /* Don't even attempt to start from user mode regs: */
+ if (regs && user_mode(regs)) {
+ info->type = STACK_TYPE_UNKNOWN;
+ return;
+ }
+
+ /* Get the instruction pointer from pt_regs or the stack frame */
+ if (regs) {
+ ip = regs->psw.addr;
+ sp = regs->gprs[15];
+ } else if (task == current) {
+ sp = current_frame_address();
+ } else {
+ sp = task->thread.ksp;
+ }
+
+ /* Get current stack pointer and initialize stack info */
+ if (!update_stack_info(state, sp)) {
+ /* Something is wrong with the stack pointer */
+ info->type = STACK_TYPE_UNKNOWN;
+ state->error = true;
+ return;
+ }
+
+ if (!regs) {
+ /* Stack frame is within valid stack */
+ sf = (struct stack_frame *)sp;
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+ }
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->reliable = true;
+ state->ip = unwind_recover_ret_addr(state, ip);
+
+ if (!first_frame)
+ return;
+ /* Skip through the call chain to the specified starting frame */
+ while (!unwind_done(state)) {
+ if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+ if (state->sp >= first_frame)
+ break;
+ }
+ unwind_next_frame(state);
+ }
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
new file mode 100644
index 000000000000..c624f3361e43
--- /dev/null
+++ b/arch/s390/kernel/uprobes.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User-space Probes (UProbes) for s390
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Jan Willeke,
+ */
+
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include <linux/kdebug.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/facility.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+#include "entry.h"
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ return probe_is_prohibited_opcode(auprobe->insn);
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT)
+ return -EINVAL;
+ if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
+ return -EINVAL;
+ clear_thread_flag(TIF_PER_TRAP);
+ auprobe->saved_per = psw_bits(regs->psw).per;
+ auprobe->saved_int_code = regs->int_code;
+ regs->int_code = UPROBE_TRAP_NR;
+ regs->psw.addr = current->utask->xol_vaddr;
+ set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ if (regs->int_code != UPROBE_TRAP_NR)
+ return true;
+ return false;
+}
+
+static int check_per_event(unsigned short cause, unsigned long control,
+ struct pt_regs *regs)
+{
+ if (!(regs->psw.mask & PSW_MASK_PER))
+ return 0;
+ /* user space single step */
+ if (control == 0)
+ return 1;
+ /* over indication for storage alteration */
+ if ((control & 0x20200000) && (cause & 0x2000))
+ return 1;
+ if (cause & 0x8000) {
+ /* all branches */
+ if ((control & 0x80800000) == 0x80000000)
+ return 1;
+ /* branch into selected range */
+ if (((control & 0x80800000) == 0x80800000) &&
+ regs->psw.addr >= current->thread.per_user.start &&
+ regs->psw.addr <= current->thread.per_user.end)
+ return 1;
+ }
+ return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ int fixup = probe_get_fixup_type(auprobe->insn);
+ struct uprobe_task *utask = current->utask;
+
+ clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ psw_bits(regs->psw).per = auprobe->saved_per;
+ regs->int_code = auprobe->saved_int_code;
+
+ if (fixup & FIXUP_PSW_NORMAL)
+ regs->psw.addr += utask->vaddr - utask->xol_vaddr;
+ if (fixup & FIXUP_RETURN_REGISTER) {
+ int reg = (auprobe->insn[0] & 0xf0) >> 4;
+
+ regs->gprs[reg] += utask->vaddr - utask->xol_vaddr;
+ }
+ if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+ int ilen = insn_length(auprobe->insn[0] >> 8);
+
+ if (regs->psw.addr - utask->xol_vaddr == ilen)
+ regs->psw.addr = utask->vaddr + ilen;
+ }
+ if (check_per_event(current->thread.per_event.cause,
+ current->thread.per_user.control, regs)) {
+ /* fix per address */
+ current->thread.per_event.address = utask->vaddr;
+ /* trigger per event */
+ set_thread_flag(TIF_PER_TRAP);
+ }
+ return 0;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!user_mode(regs))
+ return NOTIFY_DONE;
+ if (regs->int_code & 0x200) /* Trap during transaction */
+ return NOTIFY_DONE;
+ switch (val) {
+ case DIE_BPT:
+ if (uprobe_pre_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (uprobe_post_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ clear_thread_flag(TIF_UPROBE_SINGLESTEP);
+ regs->int_code = auprobe->saved_int_code;
+ regs->psw.addr = current->utask->vaddr;
+ current->thread.per_event.address = current->utask->vaddr;
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+ struct pt_regs *regs)
+{
+ unsigned long orig;
+
+ orig = regs->gprs[14];
+ regs->gprs[14] = trampoline;
+ return orig;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return user_stack_pointer(regs) <= ret->stack;
+ else
+ return user_stack_pointer(regs) < ret->stack;
+}
+
+/* Instruction Emulation */
+
+#define EMU_ILLEGAL_OP 1
+#define EMU_SPECIFICATION 2
+#define EMU_ADDRESSING 3
+
+#define emu_load_ril(ptr, output) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else \
+ *(output) = input; \
+ __rc; \
+})
+
+#define emu_store_ril(regs, ptr, input) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(ptr) __ptr = (ptr); \
+ int __rc = 0; \
+ \
+ if ((u64 __force)__ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (put_user(*(input), __ptr)) \
+ __rc = EMU_ADDRESSING; \
+ if (__rc == 0) \
+ sim_stor_event(regs, \
+ (void __force *)__ptr, \
+ mask + 1); \
+ __rc; \
+})
+
+#define emu_cmp_ril(regs, ptr, cmp) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else if (input > *(cmp)) \
+ psw_bits((regs)->psw).cc = 1; \
+ else if (input < *(cmp)) \
+ psw_bits((regs)->psw).cc = 2; \
+ else \
+ psw_bits((regs)->psw).cc = 0; \
+ __rc; \
+})
+
+struct insn_ril {
+ u8 opc0;
+ u8 reg : 4;
+ u8 opc1 : 4;
+ s32 disp;
+} __packed;
+
+union split_register {
+ u64 u64;
+ u32 u32[2];
+ u16 u16[4];
+ s64 s64;
+ s32 s32[2];
+ s16 s16[4];
+};
+
+/*
+ * If user per registers are setup to trace storage alterations and an
+ * emulated store took place on a fitting address a user trap is generated.
+ */
+static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
+{
+ if (!(regs->psw.mask & PSW_MASK_PER))
+ return;
+ if (!(current->thread.per_user.control & PER_EVENT_STORE))
+ return;
+ if ((void *)current->thread.per_user.start > (addr + len))
+ return;
+ if ((void *)current->thread.per_user.end < addr)
+ return;
+ current->thread.per_event.address = regs->psw.addr;
+ current->thread.per_event.cause = PER_EVENT_STORE >> 16;
+ set_thread_flag(TIF_PER_TRAP);
+}
+
+/*
+ * pc relative instructions are emulated, since parameters may not be
+ * accessible from the xol area due to range limitations.
+ */
+static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ union split_register *rx;
+ struct insn_ril *insn;
+ unsigned int ilen;
+ void *uptr;
+ int rc = 0;
+
+ insn = (struct insn_ril *) &auprobe->insn;
+ rx = (union split_register *) &regs->gprs[insn->reg];
+ uptr = (void *)(regs->psw.addr + (insn->disp * 2));
+ ilen = insn_length(insn->opc0);
+
+ switch (insn->opc0) {
+ case 0xc0:
+ switch (insn->opc1) {
+ case 0x00: /* larl */
+ rx->u64 = (unsigned long)uptr;
+ break;
+ }
+ break;
+ case 0xc4:
+ switch (insn->opc1) {
+ case 0x02: /* llhrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x04: /* lghrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u64);
+ break;
+ case 0x05: /* lhrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x06: /* llghrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x08: /* lgrl */
+ rc = emu_load_ril((u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* lgfrl */
+ rc = emu_load_ril((s32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0d: /* lrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x0e: /* llgfrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* sthrl */
+ rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]);
+ break;
+ case 0x0b: /* stgrl */
+ rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* strl */
+ rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ case 0xc6:
+ switch (insn->opc1) {
+ case 0x04: /* cghrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
+ break;
+ case 0x05: /* chrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x06: /* clghrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* clhrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x08: /* cgrl */
+ rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64);
+ break;
+ case 0x0a: /* clgrl */
+ rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* cgfrl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64);
+ break;
+ case 0x0d: /* crl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x0e: /* clgfrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* clrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ }
+ regs->psw.addr = __forward_psw(regs->psw, ilen);
+ switch (rc) {
+ case EMU_ILLEGAL_OP:
+ regs->int_code = ilen << 16 | 0x0001;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL);
+ break;
+ case EMU_SPECIFICATION:
+ regs->int_code = ilen << 16 | 0x0006;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL);
+ break;
+ case EMU_ADDRESSING:
+ regs->int_code = ilen << 16 | 0x0005;
+ do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL);
+ break;
+ }
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) ||
+ (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)) {
+ regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
+ do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
+ return true;
+ }
+ if (probe_is_insn_relative_long(auprobe->insn)) {
+ handle_insn_ril(auprobe, regs);
+ return true;
+ }
+ return false;
+}
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 000000000000..ed46950be86f
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,947 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2024
+ */
+#define pr_fmt(fmt) "prot_virt: " fmt
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+int __bootdata_preserved(prot_virt_guest);
+EXPORT_SYMBOL(prot_virt_guest);
+
+/*
+ * uv_info contains both host and guest information but it's currently only
+ * expected to be used within modules if it's the KVM module or for
+ * any PV guest module.
+ *
+ * The kernel itself will write these values once in uv_query_info()
+ * and then make some of them readable via a sysfs interface.
+ */
+struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
+
+int __bootdata_preserved(prot_virt_host);
+EXPORT_SYMBOL(prot_virt_host);
+
+static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
+{
+ struct uv_cb_init uvcb = {
+ .header.cmd = UVC_CMD_INIT_UV,
+ .header.len = sizeof(uvcb),
+ .stor_origin = stor_base,
+ .stor_len = stor_len,
+ };
+
+ if (uv_call(0, (uint64_t)&uvcb)) {
+ pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+ uvcb.header.rc, uvcb.header.rrc);
+ return -1;
+ }
+ return 0;
+}
+
+void __init setup_uv(void)
+{
+ void *uv_stor_base;
+
+ if (!is_prot_virt_host())
+ return;
+
+ uv_stor_base = memblock_alloc_try_nid(
+ uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+ MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+ if (!uv_stor_base) {
+ pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+ uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ if (uv_init(__pa(uv_stor_base), uv_info.uv_base_stor_len)) {
+ memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ pr_info("Reserving %luMB as ultravisor base storage\n",
+ uv_info.uv_base_stor_len >> 20);
+ return;
+fail:
+ pr_info("Disabling support for protected virtualization");
+ prot_virt_host = 0;
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+int uv_pin_shared(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr,
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(uv_pin_shared);
+
+/*
+ * Requests the Ultravisor to destroy a guest page and make it
+ * accessible to the host. The destroy clears the page instead of
+ * exporting.
+ *
+ * @paddr: Absolute host address of page to be destroyed
+ */
+static int uv_destroy(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_DESTR_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb)) {
+ /*
+ * Older firmware uses 107/d as an indication of a non secure
+ * page. Let us emulate the newer variant (no-op).
+ */
+ if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd)
+ return 0;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * The caller must already hold a reference to the folio
+ */
+int uv_destroy_folio(struct folio *folio)
+{
+ int rc;
+
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_destroy(folio_to_phys(folio));
+ if (!rc)
+ clear_bit(PG_arch_1, &folio->flags.f);
+ folio_put(folio);
+ return rc;
+}
+EXPORT_SYMBOL(uv_destroy_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_destroy_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_destroy_folio(pfn_folio(pte_pfn(pte)));
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
+
+/*
+ * The caller must already hold a reference to the folio.
+ */
+int uv_convert_from_secure_folio(struct folio *folio)
+{
+ int rc;
+
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_convert_from_secure(folio_to_phys(folio));
+ if (!rc)
+ clear_bit(PG_arch_1, &folio->flags.f);
+ folio_put(folio);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_convert_from_secure_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
+}
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
+/*
+ * Calculate the expected ref_count for a folio that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * folio can not be a large folio, for example.
+ */
+static int expected_folio_refs(struct folio *folio)
+{
+ int res;
+
+ res = folio_mapcount(folio);
+ if (folio_test_swapcache(folio)) {
+ res++;
+ } else if (folio_mapping(folio)) {
+ res++;
+ if (folio->private)
+ res++;
+ }
+ return res;
+}
+
+/**
+ * __make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio()), and the folio must be
+ * locked.
+ */
+static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+{
+ int expected, cc = 0;
+
+ if (folio_test_writeback(folio))
+ return -EBUSY;
+ expected = expected_folio_refs(folio) + 1;
+ if (!folio_ref_freeze(folio, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &folio->flags.f);
+ /*
+ * If the UVC does not succeed or fail immediately, we don't want to
+ * loop for long, or we might get stall notifications.
+ * On the other hand, this is a complex scenario and we are holding a lot of
+ * locks, so we can't easily sleep and reschedule. We try only once,
+ * and if the UVC returned busy or partial completion, we return
+ * -EAGAIN and we let the callers deal with it.
+ */
+ cc = __uv_call(0, (u64)uvcb);
+ folio_ref_unfreeze(folio, expected);
+ /*
+ * Return -ENXIO if the folio was not mapped, -EINVAL for other errors.
+ * If busy or partially completed, return -EAGAIN.
+ */
+ if (cc == UVC_CC_OK)
+ return 0;
+ else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL)
+ return -EAGAIN;
+ return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+}
+
+static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
+{
+ int rc;
+
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = __make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ return rc;
+}
+
+/**
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ * split the folio if it is large.
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ * Return: 0 if the operation was successful;
+ * -EAGAIN if splitting the large folio was not successful,
+ * but another attempt can be made;
+ * -EINVAL in case of other folio splitting errors. See split_folio().
+ */
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
+{
+ int rc, tried_splits;
+
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
+
+ if (!folio_test_large(folio))
+ return 0;
+
+ for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+ struct address_space *mapping;
+ loff_t lstart, lend;
+ struct inode *inode;
+
+ folio_lock(folio);
+ rc = split_folio(folio);
+ if (rc != -EBUSY) {
+ folio_unlock(folio);
+ return rc;
+ }
+
+ /*
+ * Splitting with -EBUSY can fail for various reasons, but we
+ * have to handle one case explicitly for now: some mappings
+ * don't allow for splitting dirty folios; writeback will
+ * mark them clean again, including marking all page table
+ * entries mapping the folio read-only, to catch future write
+ * attempts.
+ *
+ * While the system should be writing back dirty folios in the
+ * background, we obtained this folio by looking up a writable
+ * page table entry. On these problematic mappings, writable
+ * page table entries imply dirty folios, preventing the
+ * split in the first place.
+ *
+ * To prevent a livelock when trigger writeback manually and
+ * letting the caller look up the folio again in the page
+ * table (turning it dirty), immediately try to split again.
+ *
+ * This is only a problem for some mappings (e.g., XFS);
+ * mappings that do not support writeback (e.g., shmem) do not
+ * apply.
+ */
+ if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+ !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+ folio_unlock(folio);
+ break;
+ }
+
+ /*
+ * Ideally, we'd only trigger writeback on this exact folio. But
+ * there is no easy way to do that, so we'll stabilize the
+ * mapping while we still hold the folio lock, so we can drop
+ * the folio lock to trigger writeback on the range currently
+ * covered by the folio instead.
+ */
+ mapping = folio->mapping;
+ lstart = folio_pos(folio);
+ lend = lstart + folio_size(folio) - 1;
+ inode = igrab(mapping->host);
+ folio_unlock(folio);
+
+ if (unlikely(!inode))
+ break;
+
+ filemap_write_and_wait_range(mapping, lstart, lend);
+ iput(mapping->host);
+ }
+ return -EAGAIN;
+}
+
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
+{
+ struct vm_area_struct *vma;
+ struct folio_walk fw;
+ struct folio *folio;
+ int rc;
+
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, hva);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+ folio = folio_walk_start(&fw, vma, hva, 0);
+ if (!folio) {
+ mmap_read_unlock(mm);
+ return -ENXIO;
+ }
+
+ folio_get(folio);
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
+ */
+ if (folio_test_hugetlb(folio))
+ rc = -EFAULT;
+ else if (folio_test_large(folio))
+ rc = -E2BIG;
+ else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
+ rc = -ENXIO;
+ else
+ rc = make_folio_secure(mm, folio, uvcb);
+ folio_walk_end(&fw, vma);
+ mmap_read_unlock(mm);
+
+ if (rc == -E2BIG || rc == -EBUSY) {
+ rc = s390_wiggle_split_folio(mm, folio);
+ if (!rc)
+ rc = -EAGAIN;
+ }
+ folio_put(folio);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(make_hva_secure);
+
+/*
+ * To be called with the folio locked or with an extra reference! This will
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
+ */
+int arch_make_folio_accessible(struct folio *folio)
+{
+ int rc = 0;
+
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ /*
+ * PG_arch_1 is used in 2 places:
+ * 1. for storage keys of hugetlb folios and KVM
+ * 2. As an indication that this small folio might be secure. This can
+ * overindicate, e.g. we set the bit before calling
+ * convert_to_secure.
+ * As secure pages are never large folios, both variants can co-exists.
+ */
+ if (!test_bit(PG_arch_1, &folio->flags.f))
+ return 0;
+
+ rc = uv_pin_shared(folio_to_phys(folio));
+ if (!rc) {
+ clear_bit(PG_arch_1, &folio->flags.f);
+ return 0;
+ }
+
+ rc = uv_convert_from_secure(folio_to_phys(folio));
+ if (!rc) {
+ clear_bit(PG_arch_1, &folio->flags.f);
+ return 0;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
+
+static ssize_t uv_query_facilities(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n",
+ uv_info.inst_calls_list[0],
+ uv_info.inst_calls_list[1],
+ uv_info.inst_calls_list[2],
+ uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+ __ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
+ __ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
+ __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
+
+static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len);
+}
+
+static struct kobj_attribute uv_query_dump_cpu_len_attr =
+ __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
+
+static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len);
+}
+
+static struct kobj_attribute uv_query_dump_storage_state_len_attr =
+ __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
+
+static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len);
+}
+
+static struct kobj_attribute uv_query_dump_finalize_len_attr =
+ __ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
+
+static ssize_t uv_query_feature_indications(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications);
+}
+
+static struct kobj_attribute uv_query_feature_indications_attr =
+ __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+ __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+ __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+ __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
+ __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags);
+}
+
+static struct kobj_attribute uv_query_supp_att_pflags_attr =
+ __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+
+static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr =
+ __ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL);
+
+static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_pcf_attr =
+ __ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL);
+
+static ssize_t uv_query_supp_secret_types(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types);
+}
+
+static struct kobj_attribute uv_query_supp_secret_types_attr =
+ __ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL);
+
+static ssize_t uv_query_max_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n",
+ uv_info.max_assoc_secrets + uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_secrets_attr =
+ __ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+
+static ssize_t uv_query_max_retr_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_retr_secrets_attr =
+ __ATTR(max_retr_secrets, 0444, uv_query_max_retr_secrets, NULL);
+
+static ssize_t uv_query_max_assoc_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_assoc_secrets);
+}
+
+static struct kobj_attribute uv_query_max_assoc_secrets_attr =
+ __ATTR(max_assoc_secrets, 0444, uv_query_max_assoc_secrets, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+ &uv_query_facilities_attr.attr,
+ &uv_query_feature_indications_attr.attr,
+ &uv_query_max_guest_cpus_attr.attr,
+ &uv_query_max_guest_vms_attr.attr,
+ &uv_query_max_guest_addr_attr.attr,
+ &uv_query_supp_se_hdr_ver_attr.attr,
+ &uv_query_supp_se_hdr_pcf_attr.attr,
+ &uv_query_dump_storage_state_len_attr.attr,
+ &uv_query_dump_finalize_len_attr.attr,
+ &uv_query_dump_cpu_len_attr.attr,
+ &uv_query_supp_att_req_hdr_ver_attr.attr,
+ &uv_query_supp_att_pflags_attr.attr,
+ &uv_query_supp_add_secret_req_ver_attr.attr,
+ &uv_query_supp_add_secret_pcf_attr.attr,
+ &uv_query_supp_secret_types_attr.attr,
+ &uv_query_max_secrets_attr.attr,
+ &uv_query_max_assoc_secrets_attr.attr,
+ &uv_query_max_retr_secrets_attr.attr,
+ NULL,
+};
+
+static inline struct uv_cb_query_keys uv_query_keys(void)
+{
+ struct uv_cb_query_keys uvcb = {
+ .header.cmd = UVC_CMD_QUERY_KEYS,
+ .header.len = sizeof(uvcb)
+ };
+
+ uv_call(0, (uint64_t)&uvcb);
+ return uvcb;
+}
+
+static inline ssize_t emit_hash(struct uv_key_hash *hash, char *buf, int at)
+{
+ return sysfs_emit_at(buf, at, "%016llx%016llx%016llx%016llx\n",
+ hash->dword[0], hash->dword[1], hash->dword[2], hash->dword[3]);
+}
+
+static ssize_t uv_keys_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_host_key_attr =
+ __ATTR(host_key, 0444, uv_keys_host_key, NULL);
+
+static ssize_t uv_keys_backup_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_BACK_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_backup_host_key_attr =
+ __ATTR(backup_host_key, 0444, uv_keys_backup_host_key, NULL);
+
+static ssize_t uv_keys_all(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(uvcb.key_hashes); i++)
+ len += emit_hash(uvcb.key_hashes + i, buf, len);
+
+ return len;
+}
+
+static struct kobj_attribute uv_keys_all_attr =
+ __ATTR(all, 0444, uv_keys_all, NULL);
+
+static struct attribute_group uv_query_attr_group = {
+ .attrs = uv_query_attrs,
+};
+
+static struct attribute *uv_keys_attrs[] = {
+ &uv_keys_host_key_attr.attr,
+ &uv_keys_backup_host_key_attr.attr,
+ &uv_keys_all_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_keys_attr_group = {
+ .attrs = uv_keys_attrs,
+};
+
+static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", prot_virt_guest);
+}
+
+static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", prot_virt_host);
+}
+
+static struct kobj_attribute uv_prot_virt_guest =
+ __ATTR(prot_virt_guest, 0444, uv_is_prot_virt_guest, NULL);
+
+static struct kobj_attribute uv_prot_virt_host =
+ __ATTR(prot_virt_host, 0444, uv_is_prot_virt_host, NULL);
+
+static const struct attribute *uv_prot_virt_attrs[] = {
+ &uv_prot_virt_guest.attr,
+ &uv_prot_virt_host.attr,
+ NULL,
+};
+
+static struct kset *uv_query_kset;
+static struct kset *uv_keys_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_sysfs_dir_init(const struct attribute_group *grp,
+ struct kset **uv_dir_kset, const char *name)
+{
+ struct kset *kset;
+ int rc;
+
+ kset = kset_create_and_add(name, NULL, uv_kobj);
+ if (!kset)
+ return -ENOMEM;
+ *uv_dir_kset = kset;
+
+ rc = sysfs_create_group(&kset->kobj, grp);
+ if (rc)
+ kset_unregister(kset);
+ return rc;
+}
+
+static int __init uv_sysfs_init(void)
+{
+ int rc = -ENOMEM;
+
+ if (!test_facility(158))
+ return 0;
+
+ uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+ if (!uv_kobj)
+ return -ENOMEM;
+
+ rc = sysfs_create_files(uv_kobj, uv_prot_virt_attrs);
+ if (rc)
+ goto out_kobj;
+
+ rc = uv_sysfs_dir_init(&uv_query_attr_group, &uv_query_kset, "query");
+ if (rc)
+ goto out_ind_files;
+
+ /* Get installed key hashes if available, ignore any errors */
+ if (test_bit_inv(BIT_UVC_CMD_QUERY_KEYS, uv_info.inst_calls_list))
+ uv_sysfs_dir_init(&uv_keys_attr_group, &uv_keys_kset, "keys");
+
+ return 0;
+
+out_ind_files:
+ sysfs_remove_files(uv_kobj, uv_prot_virt_attrs);
+out_kobj:
+ kobject_del(uv_kobj);
+ kobject_put(uv_kobj);
+ return rc;
+}
+device_initcall(uv_sysfs_init);
+
+/*
+ * Locate a secret in the list by its id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
+ *
+ * Context: might sleep.
+ */
+static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
+ const struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 i;
+
+ for (i = 0; i < list->total_num_secrets; i++) {
+ if (memcmp(secret_id, list->secrets[i].id, UV_SECRET_ID_LEN) == 0) {
+ *secret = list->secrets[i].hdr;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/**
+ * uv_find_secret() - search secret metadata for a given secret id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Context: might sleep.
+ */
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 start_idx = 0;
+ u16 list_rc;
+ int ret;
+
+ do {
+ uv_list_secrets(list, start_idx, &list_rc, NULL);
+ if (list_rc != UVC_RC_EXECUTED && list_rc != UVC_RC_MORE_DATA) {
+ if (list_rc == UVC_RC_INV_CMD)
+ return -ENODEV;
+ else
+ return -EIO;
+ }
+ ret = find_secret_in_page(secret_id, list, secret);
+ if (ret == 0)
+ return ret;
+ start_idx = list->next_secret_idx;
+ } while (list_rc == UVC_RC_MORE_DATA && start_idx < list->next_secret_idx);
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(uv_find_secret);
+
+/**
+ * uv_retrieve_secret() - get the secret value for the secret index.
+ * @secret_idx: Secret index for which the secret should be retrieved.
+ * @buf: Buffer to store retrieved secret.
+ * @buf_size: Size of the buffer. The correct buffer size is reported as part of
+ * the result from `uv_get_secret_metadata`.
+ *
+ * Calls the Retrieve Secret UVC and translates the UV return code into an errno.
+ *
+ * Context: might sleep.
+ *
+ * Return:
+ * * %0 - Entry found; buffer contains a valid secret.
+ * * %ENOENT: - No entry found or secret at the index is non-retrievable.
+ * * %ENODEV: - Not supported: UV not available or command not available.
+ * * %EINVAL: - Buffer too small for content.
+ * * %EIO: - Other unexpected UV error.
+ */
+int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size)
+{
+ struct uv_cb_retr_secr uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_RETR_SECRET,
+ .secret_idx = secret_idx,
+ .buf_addr = (u64)buf,
+ .buf_size = buf_size,
+ };
+
+ uv_call_sched(0, (u64)&uvcb);
+
+ switch (uvcb.header.rc) {
+ case UVC_RC_EXECUTED:
+ return 0;
+ case UVC_RC_INV_CMD:
+ return -ENODEV;
+ case UVC_RC_RETR_SECR_STORE_EMPTY:
+ case UVC_RC_RETR_SECR_INV_SECRET:
+ case UVC_RC_RETR_SECR_INV_IDX:
+ return -ENOENT;
+ case UVC_RC_RETR_SECR_BUF_SMALL:
+ return -EINVAL;
+ default:
+ return -EIO;
+ }
+}
+EXPORT_SYMBOL_GPL(uv_retrieve_secret);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index d7776281cb60..a27a90a199be 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -1,334 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* vdso setup for s390
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
-#include <linux/module.h>
+#include <linux/binfmts.h>
+#include <linux/elf.h>
#include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/elf.h>
-#include <linux/security.h>
-#include <linux/bootmem.h>
-#include <linux/compat.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/sections.h>
+#include <linux/smp.h>
+#include <linux/random.h>
+#include <linux/vdso_datastore.h>
+#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/alternative.h>
#include <asm/vdso.h>
-#include <asm/facility.h>
-
-#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
-static unsigned int vdso32_pages;
-static struct page **vdso32_pagelist;
-#endif
-
-#ifdef CONFIG_64BIT
-extern char vdso64_start, vdso64_end;
-static void *vdso64_kbase = &vdso64_start;
-static unsigned int vdso64_pages;
-static struct page **vdso64_pagelist;
-#endif /* CONFIG_64BIT */
-/*
- * Should the kernel map a VDSO page into processes and pass its
- * address down to glibc upon exec()?
- */
-unsigned int __read_mostly vdso_enabled = 1;
+extern char vdso_start[], vdso_end[];
-static int __init vdso_setup(char *s)
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma)
{
- unsigned long val;
- int rc;
-
- rc = 0;
- if (strncmp(s, "on", 3) == 0)
- vdso_enabled = 1;
- else if (strncmp(s, "off", 4) == 0)
- vdso_enabled = 0;
- else {
- rc = strict_strtoul(s, 0, &val);
- vdso_enabled = rc ? 0 : !!val;
- }
- return !rc;
+ current->mm->context.vdso_base = vma->vm_start;
+ return 0;
}
-__setup("vdso=", vdso_setup);
-/*
- * The vdso data page
- */
-static union {
- struct vdso_data data;
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+static struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+};
-/*
- * Setup vdso data page.
- */
-static void vdso_init_data(struct vdso_data *vd)
+int vdso_getcpu_init(void)
{
- vd->ectg_available =
- s390_user_mode != HOME_SPACE_MODE && test_facility(31);
+ set_tod_programmable_field(smp_processor_id());
+ return 0;
}
+early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
-#ifdef CONFIG_64BIT
-/*
- * Allocate/free per cpu vdso data.
- */
-#define SEGMENT_ORDER 2
-
-int vdso_alloc_per_cpu(struct _lowcore *lowcore)
+static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
{
- unsigned long segment_table, page_table, page_frame;
- u32 *psal, *aste;
- int i;
-
- lowcore->vdso_per_cpu_data = __LC_PASTE;
+ unsigned long vvar_start, vdso_text_start, vdso_text_len;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
- if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
- return 0;
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
- segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
- page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
- page_frame = get_zeroed_page(GFP_KERNEL);
- if (!segment_table || !page_table || !page_frame)
+ vdso_text_len = vdso_end - vdso_start;
+ vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
+ rc = vvar_start;
+ if (IS_ERR_VALUE(vvar_start))
goto out;
-
- clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
- PAGE_SIZE << SEGMENT_ORDER);
- clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
- 256*sizeof(unsigned long));
-
- *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
- *(unsigned long *) page_table = _PAGE_RO + page_frame;
-
- psal = (u32 *) (page_table + 256*sizeof(unsigned long));
- aste = psal + 32;
-
- for (i = 4; i < 32; i += 4)
- psal[i] = 0x80000000;
-
- lowcore->paste[4] = (u32)(addr_t) psal;
- psal[0] = 0x20000000;
- psal[2] = (u32)(addr_t) aste;
- *(unsigned long *) (aste + 2) = segment_table +
- _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
- aste[4] = (u32)(addr_t) psal;
- lowcore->vdso_per_cpu_data = page_frame;
-
- return 0;
-
+ vma = vdso_install_vvar_mapping(mm, vvar_start);
+ rc = PTR_ERR(vma);
+ if (IS_ERR(vma))
+ goto out;
+ vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE;
+ /* VM_MAYWRITE for COW so gdb can set breakpoints */
+ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
+ VM_READ|VM_EXEC|VM_SEALED_SYSMAP|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ &vdso_mapping);
+ if (IS_ERR(vma)) {
+ do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
+ rc = PTR_ERR(vma);
+ } else {
+ current->mm->context.vdso_base = vdso_text_start;
+ rc = 0;
+ }
out:
- free_page(page_frame);
- free_page(page_table);
- free_pages(segment_table, SEGMENT_ORDER);
- return -ENOMEM;
+ mmap_write_unlock(mm);
+ return rc;
}
-void vdso_free_per_cpu(struct _lowcore *lowcore)
+static unsigned long vdso_addr(unsigned long start, unsigned long len)
{
- unsigned long segment_table, page_table, page_frame;
- u32 *psal, *aste;
-
- if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
- return;
+ unsigned long addr, end, offset;
- psal = (u32 *)(addr_t) lowcore->paste[4];
- aste = (u32 *)(addr_t) psal[2];
- segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
- page_table = *(unsigned long *) segment_table;
- page_frame = *(unsigned long *) page_table;
-
- free_page(page_frame);
- free_page(page_table);
- free_pages(segment_table, SEGMENT_ORDER);
+ /*
+ * Round up the start address. It can start out unaligned as a result
+ * of stack start randomization.
+ */
+ start = PAGE_ALIGN(start);
+
+ /* Round the lowest possible end address up to a PMD boundary. */
+ end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+ if (end >= VDSO_BASE)
+ end = VDSO_BASE;
+ end -= len;
+
+ if (end > start) {
+ offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
+ addr = start + (offset << PAGE_SHIFT);
+ } else {
+ addr = start;
+ }
+ return addr;
}
-static void vdso_init_cr5(void)
+unsigned long vdso_text_size(void)
{
- unsigned long cr5;
-
- if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
- return;
- cr5 = offsetof(struct _lowcore, paste);
- __ctl_load(cr5, 5, 5);
+ return PAGE_ALIGN(vdso_end - vdso_start);
}
-#endif /* CONFIG_64BIT */
-/*
- * This is called from binfmt_elf, we create the special vma for the
- * vDSO and insert it into the mm struct tree
- */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+unsigned long vdso_size(void)
{
- struct mm_struct *mm = current->mm;
- struct page **vdso_pagelist;
- unsigned long vdso_pages;
- unsigned long vdso_base;
- int rc;
-
- if (!vdso_enabled)
- return 0;
- /*
- * Only map the vdso for dynamically linked elf binaries.
- */
- if (!uses_interp)
- return 0;
-
-#ifdef CONFIG_64BIT
- vdso_pagelist = vdso64_pagelist;
- vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
- if (is_compat_task()) {
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
- }
-#endif
-#else
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
-#endif
-
- /*
- * vDSO has a problem and was disabled, just don't "enable" it for
- * the process
- */
- if (vdso_pages == 0)
- return 0;
-
- current->mm->context.vdso_base = 0;
-
- /*
- * pick a base address for the vDSO in process space. We try to put
- * it at vdso_base which is the "natural" base for it, but we might
- * fail and end up putting it elsewhere.
- */
- down_write(&mm->mmap_sem);
- vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- rc = vdso_base;
- goto out_up;
- }
-
- /*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
- */
- current->mm->context.vdso_base = vdso_base;
-
- /*
- * our vma flags don't have VM_WRITE so by default, the process
- * isn't allowed to write those pages.
- * gdb can break that with ptrace interface, and thus trigger COW
- * on those pages but it's then your responsibility to never do that
- * on the "data" page of the vDSO or you'll stop getting kernel
- * updates and your nice userland gettimeofday will be totally dead.
- * It's fine to use that for setting breakpoints in the vDSO code
- * pages though.
- */
- rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_pagelist);
- if (rc)
- current->mm->context.vdso_base = 0;
-out_up:
- up_write(&mm->mmap_sem);
- return rc;
+ return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE;
}
-const char *arch_vma_name(struct vm_area_struct *vma)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
- return "[vdso]";
- return NULL;
+ unsigned long addr = VDSO_BASE;
+ unsigned long size = vdso_size();
+
+ if (current->flags & PF_RANDOMIZE)
+ addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
+ return map_vdso(addr, size);
}
-static int __init vdso_init(void)
+static struct page ** __init vdso_setup_pages(void *start, void *end)
{
+ int pages = (end - start) >> PAGE_SHIFT;
+ struct page **pagelist;
int i;
- if (!vdso_enabled)
- return 0;
- vdso_init_data(vdso_data);
-#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
- /* Calculate the size of the 32 bit vDSO */
- vdso32_pages = ((&vdso32_end - &vdso32_start
- + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
- /* Make sure pages are in the correct state */
- vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1),
- GFP_KERNEL);
- BUG_ON(vdso32_pagelist == NULL);
- for (i = 0; i < vdso32_pages - 1; i++) {
- struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
- get_page(pg);
- vdso32_pagelist[i] = pg;
- }
- vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
- vdso32_pagelist[vdso32_pages] = NULL;
-#endif
-
-#ifdef CONFIG_64BIT
- /* Calculate the size of the 64 bit vDSO */
- vdso64_pages = ((&vdso64_end - &vdso64_start
- + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
- /* Make sure pages are in the correct state */
- vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1),
- GFP_KERNEL);
- BUG_ON(vdso64_pagelist == NULL);
- for (i = 0; i < vdso64_pages - 1; i++) {
- struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
- get_page(pg);
- vdso64_pagelist[i] = pg;
- }
- vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
- vdso64_pagelist[vdso64_pages] = NULL;
- if (vdso_alloc_per_cpu(&S390_lowcore))
- BUG();
- vdso_init_cr5();
-#endif /* CONFIG_64BIT */
-
- get_page(virt_to_page(vdso_data));
-
- smp_wmb();
-
- return 0;
+ pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (!pagelist)
+ panic("%s: Cannot allocate page list for VDSO", __func__);
+ for (i = 0; i < pages; i++)
+ pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+ return pagelist;
}
-early_initcall(vdso_init);
-int in_gate_area_no_mm(unsigned long addr)
+static void vdso_apply_alternatives(void)
{
- return 0;
+ const struct elf64_shdr *alt, *shdr;
+ struct alt_instr *start, *end;
+ const struct elf64_hdr *hdr;
+
+ hdr = (struct elf64_hdr *)vdso_start;
+ shdr = (void *)hdr + hdr->e_shoff;
+ alt = find_section(hdr, shdr, ".altinstructions");
+ if (!alt)
+ return;
+ start = (void *)hdr + alt->sh_offset;
+ end = (void *)hdr + alt->sh_offset + alt->sh_size;
+ apply_alternatives(start, end);
}
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
+static int __init vdso_init(void)
{
+ vdso_apply_alternatives();
+ vdso_mapping.pages = vdso_setup_pages(vdso_start, vdso_end);
return 0;
}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
+arch_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso/.gitignore b/arch/s390/kernel/vdso/.gitignore
new file mode 100644
index 000000000000..652e31d82582
--- /dev/null
+++ b/arch/s390/kernel/vdso/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso.lds
diff --git a/arch/s390/kernel/vdso/Makefile b/arch/s390/kernel/vdso/Makefile
new file mode 100644
index 000000000000..2fa12d4ac106
--- /dev/null
+++ b/arch/s390/kernel/vdso/Makefile
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile.include
+obj-vdso = vdso_user_wrapper.o note.o vgetrandom-chacha.o
+obj-cvdso = vdso_generic.o getcpu.o vgetrandom.o
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
+CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vdso_generic.o = $(VDSO_CFLAGS_REMOVE)
+
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
+# Build rules
+
+targets := $(obj-vdso) $(obj-cvdso) vdso.so vdso.so.dbg
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+obj-cvdso := $(addprefix $(obj)/, $(obj-cvdso))
+
+KBUILD_AFLAGS_VDSO := $(KBUILD_AFLAGS) -DBUILD_VDSO
+
+KBUILD_CFLAGS_VDSO := $(KBUILD_CFLAGS) -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS_VDSO := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO += -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables
+KBUILD_CFLAGS_VDSO += -fno-stack-protector
+ldflags-y := -shared -soname=linux-vdso.so.1 \
+ --hash-style=both --build-id=sha1 -T
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_VDSO)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_VDSO)
+
+obj-y += vdso_wrapper.o
+targets += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Force dependency (incbin is bad)
+$(obj)/vdso_wrapper.o : $(obj)/vdso.so
+
+quiet_cmd_vdso_and_check = VDSO $@
+ cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) $(obj-cvdso) FORCE
+ $(call if_changed,vdso_and_check)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso): %.o: %.S FORCE
+ $(call if_changed_dep,vdsoas)
+
+$(obj-cvdso): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc)
+
+# actual build commands
+quiet_cmd_vdsoas = VDSOA $@
+ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdsocc = VDSOC $@
+ cmd_vdsocc = $(CC) $(c_flags) -c -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso/gen_vdso_offsets.sh b/arch/s390/kernel/vdso/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..359982fb002d
--- /dev/null
+++ b/arch/s390/kernel/vdso/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso/getcpu.c b/arch/s390/kernel/vdso/getcpu.c
new file mode 100644
index 000000000000..5c5d4a848b76
--- /dev/null
+++ b/arch/s390/kernel/vdso/getcpu.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright IBM Corp. 2020 */
+
+#include <linux/compiler.h>
+#include <linux/getcpu.h>
+#include <asm/timex.h>
+#include "vdso.h"
+
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+{
+ union tod_clock clk;
+
+ /* CPU number is stored in the programmable field of the TOD clock */
+ store_tod_clock_ext(&clk);
+ if (cpu)
+ *cpu = clk.pf;
+ /* NUMA node is always zero */
+ if (node)
+ *node = 0;
+ return 0;
+}
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso/note.S
index 79a071e4357e..db19d0680a0a 100644
--- a/arch/s390/kernel/vdso64/note.S
+++ b/arch/s390/kernel/vdso/note.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
diff --git a/arch/s390/kernel/vdso/vdso.h b/arch/s390/kernel/vdso/vdso.h
new file mode 100644
index 000000000000..8cff033dd854
--- /dev/null
+++ b/arch/s390/kernel/vdso/vdso.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_KERNEL_VDSO_VDSO_H
+#define __ARCH_S390_KERNEL_VDSO_VDSO_H
+
+#include <vdso/datapage.h>
+
+struct getcpu_cache;
+
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
+
+#endif /* __ARCH_S390_KERNEL_VDSO_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso/vdso.lds.S
index 9f5979d102a9..7bec4de0e8e0 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso/vdso.lds.S
@@ -1,16 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This is the infamous ld script for the 64 bits vdso
* library
*/
+
+#include <asm/vdso/vsyscall.h>
+#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
-ENTRY(_start)
SECTIONS
{
- . = VDSO64_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -36,6 +43,10 @@ SECTIONS
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
+ . = ALIGN(8);
+ .altinstructions : { *(.altinstructions) }
+ .altinstr_replacement : { *(.altinstr_replacement) }
+
.dynamic : { *(.dynamic) } :text :dynamic
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
@@ -44,56 +55,16 @@ SECTIONS
.rela.dyn ALIGN(8) : { *(.rela.dyn) }
.got ALIGN(8) : { *(.got .toc) }
+ .got.plt ALIGN(8) : { *(.got.plt) }
_end = .;
PROVIDE(end = .);
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
+ STABS_DEBUG
+ DWARF_DEBUG
.comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the
- * beginning of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* DWARF 3 */
- .debug_pubtypes 0 : { *(.debug_pubtypes) }
- .debug_ranges 0 : { *(.debug_ranges) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
- . = ALIGN(4096);
- PROVIDE(_vdso_data = .);
-
/DISCARD/ : {
*(.note.GNU-stack)
*(.branch_lt)
@@ -132,7 +103,11 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
-
+ __kernel_getcpu;
+ __kernel_restart_syscall;
+ __kernel_rt_sigreturn;
+ __kernel_sigreturn;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/s390/kernel/vdso/vdso_generic.c b/arch/s390/kernel/vdso/vdso_generic.c
new file mode 100644
index 000000000000..a9aa75643c08
--- /dev/null
+++ b/arch/s390/kernel/vdso/vdso_generic.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/vdso/gettimeofday.c"
+#include "vdso.h"
+
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_getres(clock, ts);
+}
diff --git a/arch/s390/kernel/vdso/vdso_user_wrapper.S b/arch/s390/kernel/vdso/vdso_user_wrapper.S
new file mode 100644
index 000000000000..aa06c85bcbd3
--- /dev/null
+++ b/arch/s390/kernel/vdso/vdso_user_wrapper.S
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/vdso.h>
+#include <asm/unistd.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+/*
+ * Older glibc version called vdso without allocating a stackframe. This wrapper
+ * is just used to allocate a stackframe. See
+ * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e
+ * for details.
+ */
+.macro vdso_func func
+SYM_FUNC_START(__kernel_\func)
+ CFI_STARTPROC
+ aghi %r15,-STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
+ CFI_VAL_OFFSET 15,-STACK_FRAME_USER_OVERHEAD
+ stg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
+ CFI_REL_OFFSET 14,__SFVDSO_RETURN_ADDRESS
+ xc __SFUSER_BACKCHAIN(8,%r15),__SFUSER_BACKCHAIN(%r15)
+ brasl %r14,__s390_vdso_\func
+ lg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
+ CFI_RESTORE 14
+ aghi %r15,STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET STACK_FRAME_USER_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+ CFI_ENDPROC
+SYM_FUNC_END(__kernel_\func)
+.endm
+
+vdso_func gettimeofday
+vdso_func clock_getres
+vdso_func clock_gettime
+vdso_func getcpu
+
+.macro vdso_syscall func,syscall
+SYM_FUNC_START(__kernel_\func)
+ CFI_STARTPROC
+ svc \syscall
+ /* Make sure we notice when a syscall returns, which shouldn't happen */
+ .word 0
+ CFI_ENDPROC
+SYM_FUNC_END(__kernel_\func)
+.endm
+
+vdso_syscall restart_syscall,__NR_restart_syscall
+vdso_syscall sigreturn,__NR_sigreturn
+vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso/vdso_wrapper.S
index c245842b516f..f69e62a14978 100644
--- a/arch/s390/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/s390/kernel/vdso/vdso_wrapper.S
@@ -1,14 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/page.h>
__PAGE_ALIGNED_DATA
- .globl vdso64_start, vdso64_end
+ .globl vdso_start, vdso_end
.balign PAGE_SIZE
-vdso64_start:
- .incbin "arch/s390/kernel/vdso64/vdso64.so"
+vdso_start:
+ .incbin "arch/s390/kernel/vdso/vdso.so"
.balign PAGE_SIZE
-vdso64_end:
+vdso_end:
.previous
diff --git a/arch/s390/kernel/vdso/vgetrandom-chacha.S b/arch/s390/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..09c034c2f853
--- /dev/null
+++ b/arch/s390/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/dwarf.h>
+#include <asm/fpu-insn.h>
+
+#define STATE0 %v0
+#define STATE1 %v1
+#define STATE2 %v2
+#define STATE3 %v3
+#define COPY0 %v4
+#define COPY1 %v5
+#define COPY2 %v6
+#define COPY3 %v7
+#define BEPERM %v19
+#define TMP0 %v20
+#define TMP1 %v21
+#define TMP2 %v22
+#define TMP3 %v23
+
+ .section .rodata
+
+ .balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
+ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+ .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
+
+ .text
+/*
+ * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+ CFI_STARTPROC
+ larl %r1,chacha20_constants
+
+ /* COPY0 = "expand 32-byte k" */
+ VL COPY0,0,,%r1
+
+ /* BEPERM = byte selectors for VPERM */
+ ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
+
+ /* COPY1,COPY2 = key */
+ VLM COPY1,COPY2,0,%r3
+
+ /* COPY3 = counter || zero nonce */
+ lg %r3,0(%r4)
+ VZERO COPY3
+ VLVGG COPY3,%r3,0
+
+ lghi %r1,0
+.Lblock:
+ VLR STATE0,COPY0
+ VLR STATE1,COPY1
+ VLR STATE2,COPY2
+ VLR STATE3,COPY3
+
+ lghi %r0,10
+.Ldoubleround:
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
+ VSLDB STATE1,STATE1,STATE1,4
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
+ VSLDB STATE3,STATE3,STATE3,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
+ VSLDB STATE1,STATE1,STATE1,12
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
+ VSLDB STATE3,STATE3,STATE3,4
+ brctg %r0,.Ldoubleround
+
+ /* OUTPUT0 = STATE0 + COPY0 */
+ VAF STATE0,STATE0,COPY0
+ /* OUTPUT1 = STATE1 + COPY1 */
+ VAF STATE1,STATE1,COPY1
+ /* OUTPUT2 = STATE2 + COPY2 */
+ VAF STATE2,STATE2,COPY2
+ /* OUTPUT3 = STATE3 + COPY3 */
+ VAF STATE3,STATE3,COPY3
+
+ ALTERNATIVE \
+ __stringify( \
+ /* Convert STATE to little endian and store to OUTPUT */\
+ VPERM TMP0,STATE0,STATE0,BEPERM; \
+ VPERM TMP1,STATE1,STATE1,BEPERM; \
+ VPERM TMP2,STATE2,STATE2,BEPERM; \
+ VPERM TMP3,STATE3,STATE3,BEPERM; \
+ VSTM TMP0,TMP3,0,%r2), \
+ __stringify( \
+ /* 32 bit wise little endian store to OUTPUT */ \
+ VSTBRF STATE0,0,,%r2; \
+ VSTBRF STATE1,16,,%r2; \
+ VSTBRF STATE2,32,,%r2; \
+ VSTBRF STATE3,48,,%r2; \
+ brcl 0,0), \
+ ALT_FACILITY(148)
+
+ /* ++COPY3.COUNTER */
+ /* alsih %r3,1 */
+ .insn rilu,0xcc0a00000000,%r3,1
+ alcr %r3,%r1
+ VLVGG COPY3,%r3,0
+
+ /* OUTPUT += 64, --NBLOCKS */
+ aghi %r2,64
+ brctg %r5,.Lblock
+
+ /* COUNTER = COPY3.COUNTER */
+ stg %r3,0(%r4)
+
+ /* Zero out potentially sensitive regs */
+ VZERO STATE0
+ VZERO STATE1
+ VZERO STATE2
+ VZERO STATE3
+ VZERO COPY1
+ VZERO COPY2
+
+ /* Early exit if TMP0-TMP3 have not been used */
+ ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
+
+ VZERO TMP0
+ VZERO TMP1
+ VZERO TMP2
+ VZERO TMP3
+
+ br %r14
+ CFI_ENDPROC
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kernel/vdso/vgetrandom.c b/arch/s390/kernel/vdso/vgetrandom.c
new file mode 100644
index 000000000000..b5268b507fb5
--- /dev/null
+++ b/arch/s390/kernel/vdso/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/facility.h>
+#include <uapi/asm-generic/errno.h>
+#include "vdso.h"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (test_facility(129))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
deleted file mode 100644
index e45fba9d0ced..000000000000
--- a/arch/s390/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
deleted file mode 100644
index 8ad2b34ad151..000000000000
--- a/arch/s390/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,58 +0,0 @@
-# List of files in the vdso, has to be asm only for now
-
-obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o
-
-# Build rules
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_31 += -m31 -s
-
-KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-
-# Disable gcov profiling for VDSO code
-GCOV_PROFILE := n
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
- $(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
- $(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
-quiet_cmd_vdso32as = VDSO32A $@
- cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
deleted file mode 100644
index 36aaa25d05da..000000000000
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Userland implementation of clock_getres() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
- .align 4
- .globl __kernel_clock_getres
- .type __kernel_clock_getres,@function
-__kernel_clock_getres:
- .cfi_startproc
- chi %r2,__CLOCK_REALTIME
- je 0f
- chi %r2,__CLOCK_MONOTONIC
- jne 3f
-0: ltr %r3,%r3
- jz 2f /* res == NULL */
- basr %r1,0
-1: l %r0,4f-1b(%r1)
- xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
- st %r0,4(%r3) /* store tp->tv_usec */
-2: lhi %r2,0
- br %r14
-3: lhi %r1,__NR_clock_getres /* fallback to svc */
- svc 0
- br %r14
-4: .long __CLOCK_REALTIME_RES
- .cfi_endproc
- .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
deleted file mode 100644
index b2224e0b974c..000000000000
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Userland implementation of clock_gettime() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
- .align 4
- .globl __kernel_clock_gettime
- .type __kernel_clock_gettime,@function
-__kernel_clock_gettime:
- .cfi_startproc
- basr %r5,0
-0: al %r5,21f-0b(%r5) /* get &_vdso_data */
- chi %r2,__CLOCK_REALTIME
- je 10f
- chi %r2,__CLOCK_MONOTONIC
- jne 19f
-
- /* CLOCK_MONOTONIC */
- ltr %r3,%r3
- jz 9f /* tp == NULL */
-1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 1b
- stck 24(%r15) /* Store TOD clock */
- lm %r0,%r1,24(%r15)
- s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,2f
- ahi %r0,-1
-2: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */
- lr %r2,%r0
- l %r0,__VDSO_NTP_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 3f
- a %r0,__VDSO_NTP_MULT(%r5)
-3: alr %r0,%r2
- srdl %r0,12
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,4f
- ahi %r0,1
-4: l %r2,__VDSO_XTIME_SEC+4(%r5)
- al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */
- al %r1,__VDSO_WTOM_NSEC+4(%r5)
- brc 12,5f
- ahi %r0,1
-5: al %r2,__VDSO_WTOM_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 1b
- basr %r5,0
-6: ltr %r0,%r0
- jnz 7f
- cl %r1,20f-6b(%r5)
- jl 8f
-7: ahi %r2,1
- sl %r1,20f-6b(%r5)
- brc 3,6b
- ahi %r0,-1
- j 6b
-8: st %r2,0(%r3) /* store tp->tv_sec */
- st %r1,4(%r3) /* store tp->tv_nsec */
-9: lhi %r2,0
- br %r14
-
- /* CLOCK_REALTIME */
-10: ltr %r3,%r3 /* tp == NULL */
- jz 18f
-11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 11b
- stck 24(%r15) /* Store TOD clock */
- lm %r0,%r1,24(%r15)
- s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,12f
- ahi %r0,-1
-12: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */
- lr %r2,%r0
- l %r0,__VDSO_NTP_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 13f
- a %r0,__VDSO_NTP_MULT(%r5)
-13: alr %r0,%r2
- srdl %r0,12
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,14f
- ahi %r0,1
-14: l %r2,__VDSO_XTIME_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 11b
- basr %r5,0
-15: ltr %r0,%r0
- jnz 16f
- cl %r1,20f-15b(%r5)
- jl 17f
-16: ahi %r2,1
- sl %r1,20f-15b(%r5)
- brc 3,15b
- ahi %r0,-1
- j 15b
-17: st %r2,0(%r3) /* store tp->tv_sec */
- st %r1,4(%r3) /* store tp->tv_nsec */
-18: lhi %r2,0
- br %r14
-
- /* Fallback to system call */
-19: lhi %r1,__NR_clock_gettime
- svc 0
- br %r14
-
-20: .long 1000000000
-21: .long _vdso_data - 0b
- .cfi_endproc
- .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index 2d3633175e3b..000000000000
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
- .align 4
- .globl __kernel_gettimeofday
- .type __kernel_gettimeofday,@function
-__kernel_gettimeofday:
- .cfi_startproc
- basr %r5,0
-0: al %r5,13f-0b(%r5) /* get &_vdso_data */
-1: ltr %r3,%r3 /* check if tz is NULL */
- je 2f
- mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
-2: ltr %r2,%r2 /* check if tv is NULL */
- je 10f
- l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 1b
- stck 24(%r15) /* Store TOD clock */
- lm %r0,%r1,24(%r15)
- s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,3f
- ahi %r0,-1
-3: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */
- st %r0,24(%r15)
- l %r0,__VDSO_NTP_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 4f
- a %r0,__VDSO_NTP_MULT(%r5)
-4: al %r0,24(%r15)
- srdl %r0,12
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,5f
- ahi %r0,1
-5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 1b
- l %r4,24(%r15) /* get tv_sec from stack */
- basr %r5,0
-6: ltr %r0,%r0
- jnz 7f
- cl %r1,11f-6b(%r5)
- jl 8f
-7: ahi %r4,1
- sl %r1,11f-6b(%r5)
- brc 3,6b
- ahi %r0,-1
- j 6b
-8: st %r4,0(%r2) /* store tv->tv_sec */
- ltr %r1,%r1
- m %r0,12f-6b(%r5)
- jnm 9f
- al %r0,12f-6b(%r5)
-9: srl %r0,6
- st %r0,4(%r2) /* store tv->tv_usec */
-10: slr %r2,%r2
- br %r14
-11: .long 1000000000
-12: .long 274877907
-13: .long _vdso_data - 0b
- .cfi_endproc
- .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
deleted file mode 100644
index 79a071e4357e..000000000000
--- a/arch/s390/kernel/vdso32/note.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
- .long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index a8c379fa1247..000000000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
-
-SECTIONS
-{
- . = VDSO32_LBASE + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- .note : { *(.note.*) } :text :note
-
- . = ALIGN(16);
- .text : {
- *(.text .stub .text.* .gnu.linkonce.t.*)
- } :text
- PROVIDE(__etext = .);
- PROVIDE(_etext = .);
- PROVIDE(etext = .);
-
- /*
- * Other stuff is appended to the text segment:
- */
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
- .rodata1 : { *(.rodata1) }
-
- .dynamic : { *(.dynamic) } :text :dynamic
-
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
- .rela.dyn ALIGN(8) : { *(.rela.dyn) }
- .got ALIGN(8) : { *(.got .toc) }
-
- _end = .;
- PROVIDE(end = .);
-
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the
- * beginning of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* DWARF 3 */
- .debug_pubtypes 0 : { *(.debug_pubtypes) }
- .debug_ranges 0 : { *(.debug_ranges) }
- .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
- . = ALIGN(4096);
- PROVIDE(_vdso_data = .);
-
- /DISCARD/ : {
- *(.note.GNU-stack)
- *(.branch_lt)
- *(.data .data.* .gnu.linkonce.d.* .sdata*)
- *(.bss .sbss .dynbss .dynsbss)
- }
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME 0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- VDSO_VERSION_STRING {
- global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_gettimeofday;
- __kernel_clock_gettime;
- __kernel_clock_getres;
-
- local: *;
- };
-}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index ae42f8ce350b..000000000000
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
- __PAGE_ALIGNED_DATA
-
- .globl vdso32_start, vdso32_end
- .balign PAGE_SIZE
-vdso32_start:
- .incbin "arch/s390/kernel/vdso32/vdso32.so"
- .balign PAGE_SIZE
-vdso32_end:
-
- .previous
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
deleted file mode 100644
index 3fd18cf9fec2..000000000000
--- a/arch/s390/kernel/vdso64/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
deleted file mode 100644
index 2a8ddfd12a5b..000000000000
--- a/arch/s390/kernel/vdso64/Makefile
+++ /dev/null
@@ -1,58 +0,0 @@
-# List of files in the vdso, has to be asm only for now
-
-obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o
-
-# Build rules
-
-targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
-obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
-
-KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_64 += -m64 -s
-
-KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
-
-obj-y += vdso64_wrapper.o
-extra-y += vdso64.lds
-CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-
-# Disable gcov profiling for VDSO code
-GCOV_PROFILE := n
-
-# Force dependency (incbin is bad)
-$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
- $(call if_changed,vdso64ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
- $(call if_changed_dep,vdso64as)
-
-# actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
-quiet_cmd_vdso64as = VDSO64A $@
- cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso64.so: $(obj)/vdso64.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso64.so
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
deleted file mode 100644
index 176e1f75f9aa..000000000000
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Userland implementation of clock_getres() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
- .align 4
- .globl __kernel_clock_getres
- .type __kernel_clock_getres,@function
-__kernel_clock_getres:
- .cfi_startproc
- cghi %r2,__CLOCK_REALTIME
- je 0f
- cghi %r2,__CLOCK_MONOTONIC
- je 0f
- cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
- jne 2f
- larl %r5,_vdso_data
- icm %r0,15,__LC_ECTG_OK(%r5)
- jz 2f
-0: ltgr %r3,%r3
- jz 1f /* res == NULL */
- larl %r1,3f
- lg %r0,0(%r1)
- xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
- stg %r0,8(%r3) /* store tp->tv_usec */
-1: lghi %r2,0
- br %r14
-2: lghi %r1,__NR_clock_getres /* fallback to svc */
- svc 0
- br %r14
-3: .quad __CLOCK_REALTIME_RES
- .cfi_endproc
- .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
deleted file mode 100644
index d46c95ed5f19..000000000000
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Userland implementation of clock_gettime() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
- .align 4
- .globl __kernel_clock_gettime
- .type __kernel_clock_gettime,@function
-__kernel_clock_gettime:
- .cfi_startproc
- larl %r5,_vdso_data
- cghi %r2,__CLOCK_REALTIME
- je 4f
- cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
- je 9f
- cghi %r2,__CLOCK_MONOTONIC
- jne 12f
-
- /* CLOCK_MONOTONIC */
- ltgr %r3,%r3
- jz 3f /* tp == NULL */
-0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 0b
- stck 48(%r15) /* Store TOD clock */
- lg %r1,48(%r15)
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */
- srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- lg %r0,__VDSO_XTIME_SEC(%r5)
- alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */
- alg %r0,__VDSO_WTOM_SEC(%r5)
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 0b
- larl %r5,13f
-1: clg %r1,0(%r5)
- jl 2f
- slg %r1,0(%r5)
- aghi %r0,1
- j 1b
-2: stg %r0,0(%r3) /* store tp->tv_sec */
- stg %r1,8(%r3) /* store tp->tv_nsec */
-3: lghi %r2,0
- br %r14
-
- /* CLOCK_REALTIME */
-4: ltr %r3,%r3 /* tp == NULL */
- jz 8f
-5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 5b
- stck 48(%r15) /* Store TOD clock */
- lg %r1,48(%r15)
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */
- srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- lg %r0,__VDSO_XTIME_SEC(%r5)
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 5b
- larl %r5,13f
-6: clg %r1,0(%r5)
- jl 7f
- slg %r1,0(%r5)
- aghi %r0,1
- j 6b
-7: stg %r0,0(%r3) /* store tp->tv_sec */
- stg %r1,8(%r3) /* store tp->tv_nsec */
-8: lghi %r2,0
- br %r14
-
- /* CLOCK_THREAD_CPUTIME_ID for this thread */
-9: icm %r0,15,__VDSO_ECTG_OK(%r5)
- jz 12f
- ear %r2,%a4
- llilh %r4,0x0100
- sar %a4,%r4
- lghi %r4,0
- epsw %r5,0
- sacf 512 /* Magic ectg instruction */
- .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
- tml %r5,0x4000
- jo 11f
- tml %r5,0x8000
- jno 10f
- sacf 256
- j 11f
-10: sacf 0
-11: sar %a4,%r2
- algr %r1,%r0 /* r1 = cputime as TOD value */
- mghi %r1,1000 /* convert to nanoseconds */
- srlg %r1,%r1,12 /* r1 = cputime in nanosec */
- lgr %r4,%r1
- larl %r5,13f
- srlg %r1,%r1,9 /* divide by 1000000000 */
- mlg %r0,8(%r5)
- srlg %r0,%r0,11 /* r0 = tv_sec */
- stg %r0,0(%r3)
- msg %r0,0(%r5) /* calculate tv_nsec */
- slgr %r4,%r0 /* r4 = tv_nsec */
- stg %r4,8(%r3)
- lghi %r2,0
- br %r14
-
- /* Fallback to system call */
-12: lghi %r1,__NR_clock_gettime
- svc 0
- br %r14
-
-13: .quad 1000000000
-14: .quad 19342813113834067
- .cfi_endproc
- .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index 36ee674722ec..000000000000
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
- .align 4
- .globl __kernel_gettimeofday
- .type __kernel_gettimeofday,@function
-__kernel_gettimeofday:
- .cfi_startproc
- larl %r5,_vdso_data
-0: ltgr %r3,%r3 /* check if tz is NULL */
- je 1f
- mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
-1: ltgr %r2,%r2 /* check if tv is NULL */
- je 4f
- lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 0b
- stck 48(%r15) /* Store TOD clock */
- lg %r1,48(%r15)
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */
- srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime.tv_nsec */
- lg %r0,__VDSO_XTIME_SEC(%r5) /* xtime.tv_sec */
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 0b
- larl %r5,5f
-2: clg %r1,0(%r5)
- jl 3f
- slg %r1,0(%r5)
- aghi %r0,1
- j 2b
-3: stg %r0,0(%r2) /* store tv->tv_sec */
- slgr %r0,%r0 /* tv_nsec -> tv_usec */
- ml %r0,8(%r5)
- srlg %r0,%r0,6
- stg %r0,8(%r2) /* store tv->tv_usec */
-4: lghi %r2,0
- br %r14
-5: .quad 1000000000
- .long 274877907
- .cfi_endproc
- .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
new file mode 100644
index 000000000000..cc8933e04ff7
--- /dev/null
+++ b/arch/s390/kernel/vmcore_info.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <linux/mm.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ struct lowcore *abs_lc;
+
+ VMCOREINFO_SYMBOL(lowcore_ptr);
+ VMCOREINFO_SYMBOL(high_memory);
+ VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+ vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+ vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+ vmcoreinfo_append_str("IDENTITYBASE=%lx\n", __identity_base);
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ vmcoreinfo_append_str("KERNELOFFPHYS=%lx\n", __kaslr_offset_phys);
+ abs_lc = get_abs_lowcore();
+ abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+ put_abs_lowcore(abs_lc);
+}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 35b13ed0af5f..53bcbb91bb9b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -1,22 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* ld script to make s390 Linux kernel
* Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/ftrace.lds.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \
+ *(.bss..invalid_pg_dir)
+
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
+#define RUNTIME_DISCARD_EXIT
+
+#define EMITS_PT_NOTE
+
#include <asm-generic/vmlinux.lds.h>
+#include <asm/vmlinux.lds.h>
-#ifndef CONFIG_64BIT
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(startup)
-jiffies = jiffies_64 + 4;
-#else
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
-ENTRY(startup)
+ENTRY(startup_continue)
jiffies = jiffies_64;
-#endif
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
@@ -26,46 +39,81 @@ PHDRS {
SECTIONS
{
- . = 0x00000000;
+ . = TEXT_OFFSET;
.text : {
- _text = .; /* Text and read-only data */
+ _stext = .; /* Start of text section */
+ _text = .; /* Text and read-only data */
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
- *(.fixup)
+ SOFTIRQENTRY_TEXT
+ FTRACE_HOTPATCH_TRAMPOLINES_TEXT
+ *(.text..*_indirect_*)
*(.gnu.warning)
+ . = ALIGN(PAGE_SIZE);
+ _etext = .; /* End of text section */
} :text = 0x0700
- _etext = .; /* End of text section */
+ RO_DATA(PAGE_SIZE)
- NOTES :text :note
-
- .dummy : { *(.dummy) } :data
+ . = ALIGN(PAGE_SIZE);
+ _sdata = .; /* Start of data section */
- RO_DATA_SECTION(PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE);
+ __start_ro_after_init = .;
+ .data..ro_after_init : {
+ *(.data..ro_after_init)
+ JUMP_TABLE_DATA
+ } :data
+ . = ALIGN(PAGE_SIZE);
+ __end_ro_after_init = .;
-#ifdef CONFIG_SHARED_KERNEL
- . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */
-#endif
+ . = ALIGN(8);
+ .skey_region_table : {
+ __skey_region_start = .;
+ KEEP(*(.skey_region))
+ __skey_region_end = .;
+ }
- . = ALIGN(PAGE_SIZE);
- _eshared = .; /* End of shareable data */
- _sdata = .; /* Start of data section */
+ .data.rel.ro : {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+ .got : {
+ __got_start = .;
+ *(.got)
+ __got_end = .;
+ }
- EXCEPTION_TABLE(16) :data
+ RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
+ .data.rel : {
+ *(.data.rel*)
+ }
+ BOOT_DATA_PRESERVED
- RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+ . = ALIGN(8);
+ .amode31.refs : {
+ _start_amode31_refs = .;
+ *(.amode31.refs)
+ _end_amode31_refs = .;
+ }
+ . = ALIGN(PAGE_SIZE);
_edata = .; /* End of data section */
/* will be freed after init */
. = ALIGN(PAGE_SIZE); /* Init code and data */
__init_begin = .;
- INIT_TEXT_SECTION(PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE);
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ _sinittext = .;
+ INIT_TEXT
+ . = ALIGN(PAGE_SIZE);
+ _einittext = .;
+ }
/*
* .exit.text is discarded at runtime, not link time,
@@ -79,22 +127,162 @@ SECTIONS
EXIT_DATA
}
+ /*
+ * struct alt_inst entries. From the header (alternative.h):
+ * "Alternative instructions for different CPU types or capabilities"
+ * Think locking instructions on spinlocks.
+ * Note, that it is a part of __init region.
+ */
+ . = ALIGN(8);
+ .altinstructions : {
+ __alt_instructions = .;
+ *(.altinstructions)
+ __alt_instructions_end = .;
+ }
+
+ /*
+ * And here are the replacement instructions. The linker sticks
+ * them as binary blobs. The .altinstructions has enough data to
+ * get the address and the length of them to patch the kernel safely.
+ * Note, that it is a part of __init region.
+ */
+ .altinstr_replacement : {
+ *(.altinstr_replacement)
+ }
+
+#ifdef CONFIG_STACKPROTECTOR
+ . = ALIGN(8);
+ .stack_prot_table : {
+ __stack_prot_start = .;
+ KEEP(*(__stack_protector_loc))
+ __stack_prot_end = .;
+ }
+#endif
+
+ /*
+ * Table with the patch locations to undo expolines
+ */
+ . = ALIGN(4);
+ .nospec_call_table : {
+ __nospec_call_start = . ;
+ *(.s390_indirect*)
+ __nospec_call_end = . ;
+ }
+ .nospec_return_table : {
+ __nospec_return_start = . ;
+ *(.s390_return*)
+ __nospec_return_end = . ;
+ }
+
+ BOOT_DATA
+
+ /*
+ * .amode31 section for code, data, ex_table that need to stay
+ * below 2 GB, even when the kernel is relocated above 2 GB.
+ */
+ . = ALIGN(PAGE_SIZE);
+ _samode31 = .;
+ .amode31.text : {
+ _stext_amode31 = .;
+ *(.amode31.text)
+ *(.amode31.text.*_indirect_*)
+ . = ALIGN(PAGE_SIZE);
+ _etext_amode31 = .;
+ }
+ . = ALIGN(16);
+ .amode31.ex_table : {
+ _start_amode31_ex_table = .;
+ KEEP(*(.amode31.ex_table))
+ _stop_amode31_ex_table = .;
+ }
+ . = ALIGN(PAGE_SIZE);
+ .amode31.data : {
+ *(.amode31.data)
+ }
+ . = _samode31 + AMODE31_SIZE;
+ _eamode31 = .;
+
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
+ RUNTIME_CONST_VARIABLES
+
PERCPU_SECTION(0x100)
+
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
- BSS_SECTION(0, 2, 0)
+ BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE);
_end = . ;
/* Debugging sections. */
STABS_DEBUG
DWARF_DEBUG
+ ELF_DETAILS
+
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the three reserved double words.
+ */
+ .got.plt : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+
+ /*
+ * uncompressed image info used by the decompressor
+ * it should match struct vmlinux_info
+ */
+ .vmlinux.info 0 (INFO) : {
+ QUAD(startup_continue) /* entry */
+ QUAD(__bss_start - _stext) /* image_size */
+ QUAD(__bss_stop - __bss_start) /* bss_size */
+ QUAD(__boot_data_start) /* bootdata_off */
+ QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */
+ QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */
+ QUAD(__boot_data_preserved_end -
+ __boot_data_preserved_start) /* bootdata_preserved_size */
+ QUAD(__got_start) /* got_start */
+ QUAD(__got_end) /* got_end */
+ QUAD(_eamode31 - _samode31) /* amode31_size */
+ QUAD(init_mm)
+ QUAD(swapper_pg_dir)
+ QUAD(invalid_pg_dir)
+ QUAD(__alt_instructions)
+ QUAD(__alt_instructions_end)
+#ifdef CONFIG_STACKPROTECTOR
+ QUAD(__stack_prot_start)
+ QUAD(__stack_prot_end)
+#endif
+#ifdef CONFIG_KASAN
+ QUAD(kasan_early_shadow_page)
+ QUAD(kasan_early_shadow_pte)
+ QUAD(kasan_early_shadow_pmd)
+ QUAD(kasan_early_shadow_pud)
+ QUAD(kasan_early_shadow_p4d)
+#endif
+ } :NONE
/* Sections to be discarded */
DISCARDS
+ /DISCARD/ : {
+ *(.eh_frame)
+ *(.interp)
+ }
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 3fb09359eda6..234a0ba30510 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Virtual cpu timer based timer functions.
*
@@ -6,49 +7,43 @@
*/
#include <linux/kernel_stat.h>
-#include <linux/notifier.h>
-#include <linux/kprobes.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/timex.h>
#include <linux/types.h>
#include <linux/time.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-
-#include <asm/irq_regs.h>
+#include <asm/alternative.h>
#include <asm/cputime.h>
#include <asm/vtimer.h>
-#include <asm/irq.h>
+#include <asm/vtime.h>
+#include <asm/cpu_mf.h>
+#include <asm/smp.h>
+
#include "entry.h"
static void virt_timer_expire(void);
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
-
static LIST_HEAD(virt_timer_list);
static DEFINE_SPINLOCK(virt_timer_lock);
static atomic64_t virt_timer_current;
static atomic64_t virt_timer_elapsed;
-static inline u64 get_vtimer(void)
-{
- u64 timer;
-
- asm volatile("stpt %0" : "=m" (timer));
- return timer;
-}
+DEFINE_PER_CPU(u64, mt_cycles[8]);
+static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
static inline void set_vtimer(u64 expires)
{
+ struct lowcore *lc = get_lowcore();
u64 timer;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
- : "=m" (timer) : "m" (expires));
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
- S390_lowcore.last_update_timer = expires;
+ : "=Q" (timer) : "Q" (expires));
+ lc->system_timer += lc->last_update_timer - timer;
+ lc->last_update_timer = expires;
}
static inline int virt_timer_forward(u64 elapsed)
@@ -61,55 +56,141 @@ static inline int virt_timer_forward(u64 elapsed)
return elapsed >= atomic64_read(&virt_timer_current);
}
+static void update_mt_scaling(void)
+{
+ u64 cycles_new[8], *cycles_old;
+ u64 delta, fac, mult, div;
+ int i;
+
+ stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new);
+ cycles_old = this_cpu_ptr(mt_cycles);
+ fac = 1;
+ mult = div = 0;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ delta = cycles_new[i] - cycles_old[i];
+ div += delta;
+ mult *= i + 1;
+ mult += delta * fac;
+ fac *= i + 1;
+ }
+ div *= fac;
+ if (div > 0) {
+ /* Update scaling factor */
+ __this_cpu_write(mt_scaling_mult, mult);
+ __this_cpu_write(mt_scaling_div, div);
+ memcpy(cycles_old, cycles_new,
+ sizeof(u64) * (smp_cpu_mtid + 1));
+ }
+ __this_cpu_write(mt_scaling_jiffies, jiffies_64);
+}
+
+static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new)
+{
+ u64 delta;
+
+ delta = new - *tsk_vtime;
+ *tsk_vtime = new;
+ return delta;
+}
+
+
+static inline u64 scale_vtime(u64 vtime)
+{
+ u64 mult = __this_cpu_read(mt_scaling_mult);
+ u64 div = __this_cpu_read(mt_scaling_div);
+
+ if (smp_cpu_mtid)
+ return vtime * mult / div;
+ return vtime;
+}
+
+static void account_system_index_scaled(struct task_struct *p, u64 cputime,
+ enum cpu_usage_stat index)
+{
+ p->stimescaled += cputime_to_nsecs(scale_vtime(cputime));
+ account_system_index_time(p, cputime_to_nsecs(cputime), index);
+}
+
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
+static int do_account_vtime(struct task_struct *tsk)
{
- struct thread_info *ti = task_thread_info(tsk);
- u64 timer, clock, user, system, steal;
+ u64 timer, clock, user, guest, system, hardirq, softirq;
+ struct lowcore *lc = get_lowcore();
- timer = S390_lowcore.last_update_timer;
- clock = S390_lowcore.last_update_clock;
+ timer = lc->last_update_timer;
+ clock = lc->last_update_clock;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
- " stck %1" /* Store current tod clock value */
- : "=m" (S390_lowcore.last_update_timer),
- "=m" (S390_lowcore.last_update_clock));
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
- S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
-
- user = S390_lowcore.user_timer - ti->user_timer;
- S390_lowcore.steal_timer -= user;
- ti->user_timer = S390_lowcore.user_timer;
- account_user_time(tsk, user, user);
-
- system = S390_lowcore.system_timer - ti->system_timer;
- S390_lowcore.steal_timer -= system;
- ti->system_timer = S390_lowcore.system_timer;
- account_system_time(tsk, hardirq_offset, system, system);
-
- steal = S390_lowcore.steal_timer;
- if ((s64) steal > 0) {
- S390_lowcore.steal_timer = 0;
- account_steal_time(steal);
+ " stckf %1" /* Store current tod clock value */
+ : "=Q" (lc->last_update_timer),
+ "=Q" (lc->last_update_clock)
+ : : "cc");
+ clock = lc->last_update_clock - clock;
+ timer -= lc->last_update_timer;
+
+ if (hardirq_count())
+ lc->hardirq_timer += timer;
+ else
+ lc->system_timer += timer;
+
+ /* Update MT utilization calculation */
+ if (smp_cpu_mtid &&
+ time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+ update_mt_scaling();
+
+ /* Calculate cputime delta */
+ user = update_tsk_timer(&tsk->thread.user_timer,
+ READ_ONCE(lc->user_timer));
+ guest = update_tsk_timer(&tsk->thread.guest_timer,
+ READ_ONCE(lc->guest_timer));
+ system = update_tsk_timer(&tsk->thread.system_timer,
+ READ_ONCE(lc->system_timer));
+ hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
+ READ_ONCE(lc->hardirq_timer));
+ softirq = update_tsk_timer(&tsk->thread.softirq_timer,
+ READ_ONCE(lc->softirq_timer));
+ lc->steal_timer +=
+ clock - user - guest - system - hardirq - softirq;
+
+ /* Push account value */
+ if (user) {
+ account_user_time(tsk, cputime_to_nsecs(user));
+ tsk->utimescaled += cputime_to_nsecs(scale_vtime(user));
}
- return virt_timer_forward(user + system);
+ if (guest) {
+ account_guest_time(tsk, cputime_to_nsecs(guest));
+ tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest));
+ }
+
+ if (system)
+ account_system_index_scaled(tsk, system, CPUTIME_SYSTEM);
+ if (hardirq)
+ account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ);
+ if (softirq)
+ account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
+
+ return virt_timer_forward(user + guest + system + hardirq + softirq);
}
void vtime_task_switch(struct task_struct *prev)
{
- struct thread_info *ti;
-
- do_account_vtime(prev, 0);
- ti = task_thread_info(prev);
- ti->user_timer = S390_lowcore.user_timer;
- ti->system_timer = S390_lowcore.system_timer;
- ti = task_thread_info(current);
- S390_lowcore.user_timer = ti->user_timer;
- S390_lowcore.system_timer = ti->system_timer;
+ struct lowcore *lc = get_lowcore();
+
+ do_account_vtime(prev);
+ prev->thread.user_timer = lc->user_timer;
+ prev->thread.guest_timer = lc->guest_timer;
+ prev->thread.system_timer = lc->system_timer;
+ prev->thread.hardirq_timer = lc->hardirq_timer;
+ prev->thread.softirq_timer = lc->softirq_timer;
+ lc->user_timer = current->thread.user_timer;
+ lc->guest_timer = current->thread.guest_timer;
+ lc->system_timer = current->thread.system_timer;
+ lc->hardirq_timer = current->thread.hardirq_timer;
+ lc->softirq_timer = current->thread.softirq_timer;
}
/*
@@ -117,81 +198,67 @@ void vtime_task_switch(struct task_struct *prev)
* accounting system time in order to correctly compute
* the stolen time accounting.
*/
-void vtime_account_user(struct task_struct *tsk)
+void vtime_flush(struct task_struct *tsk)
{
- if (do_account_vtime(tsk, HARDIRQ_OFFSET))
+ struct lowcore *lc = get_lowcore();
+ u64 steal, avg_steal;
+
+ if (do_account_vtime(tsk))
virt_timer_expire();
+
+ steal = lc->steal_timer;
+ avg_steal = lc->avg_steal_timer;
+ if ((s64) steal > 0) {
+ lc->steal_timer = 0;
+ account_steal_time(cputime_to_nsecs(steal));
+ avg_steal += steal;
+ }
+ lc->avg_steal_timer = avg_steal / 2;
+}
+
+static u64 vtime_delta(void)
+{
+ struct lowcore *lc = get_lowcore();
+ u64 timer = lc->last_update_timer;
+
+ lc->last_update_timer = get_cpu_timer();
+ return timer - lc->last_update_timer;
}
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-void vtime_account_irq_enter(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
- struct thread_info *ti = task_thread_info(tsk);
- u64 timer, system;
-
- WARN_ON_ONCE(!irqs_disabled());
+ struct lowcore *lc = get_lowcore();
+ u64 delta = vtime_delta();
- timer = S390_lowcore.last_update_timer;
- S390_lowcore.last_update_timer = get_vtimer();
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+ if (tsk->flags & PF_VCPU)
+ lc->guest_timer += delta;
+ else
+ lc->system_timer += delta;
- system = S390_lowcore.system_timer - ti->system_timer;
- S390_lowcore.steal_timer -= system;
- ti->system_timer = S390_lowcore.system_timer;
- account_system_time(tsk, 0, system, system);
-
- virt_timer_forward(system);
+ virt_timer_forward(delta);
}
-EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
-
-void vtime_account_system(struct task_struct *tsk)
-__attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
-void __kprobes vtime_stop_cpu(void)
+void vtime_account_softirq(struct task_struct *tsk)
{
- struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
- unsigned long long idle_time;
- unsigned long psw_mask;
-
- trace_hardirqs_on();
-
- /* Wait for external, I/O or machine check interrupt. */
- psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- idle->nohz_delay = 0;
-
- /* Call the assembler magic in entry.S */
- psw_idle(idle, psw_mask);
-
- /* Account time spent with enabled wait psw loaded as idle time. */
- idle->sequence++;
- smp_wmb();
- idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
- idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
- idle->idle_time += idle_time;
- idle->idle_count++;
- account_idle_time(idle_time);
- smp_wmb();
- idle->sequence++;
+ u64 delta = vtime_delta();
+
+ get_lowcore()->softirq_timer += delta;
+
+ virt_timer_forward(delta);
}
-cputime64_t s390_get_idle_time(int cpu)
+void vtime_account_hardirq(struct task_struct *tsk)
{
- struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long long now, idle_enter, idle_exit;
- unsigned int sequence;
-
- do {
- now = get_tod_clock();
- sequence = ACCESS_ONCE(idle->sequence);
- idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
- idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
- } while ((sequence & 1) || (idle->sequence != sequence));
- return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
+ u64 delta = vtime_delta();
+
+ get_lowcore()->hardirq_timer += delta;
+
+ virt_timer_forward(delta);
}
/*
@@ -295,7 +362,7 @@ static void __add_vtimer(struct vtimer_list *timer, int periodic)
}
/*
- * add_virt_timer - add an oneshot virtual CPU timer
+ * add_virt_timer - add a oneshot virtual CPU timer
*/
void add_virt_timer(struct vtimer_list *timer)
{
@@ -371,31 +438,15 @@ EXPORT_SYMBOL(del_virt_timer);
/*
* Start the virtual CPU timer on the current CPU.
*/
-void __cpuinit init_cpu_vtimer(void)
+void vtime_init(void)
{
/* set initial cpu timer */
set_vtimer(VTIMER_MAX_SLICE);
-}
-
-static int __cpuinit s390_nohz_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- struct s390_idle_data *idle;
- long cpu = (long) hcpu;
-
- idle = &per_cpu(s390_idle, cpu);
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DYING:
- idle->nohz_delay = 0;
- default:
- break;
+ /* Setup initial MT scaling values */
+ if (smp_cpu_mtid) {
+ __this_cpu_write(mt_scaling_jiffies, jiffies);
+ __this_cpu_write(mt_scaling_mult, 1);
+ __this_cpu_write(mt_scaling_div, 1);
+ stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
}
- return NOTIFY_OK;
-}
-
-void __init vtime_init(void)
-{
- /* Enable cpu timer interrupts on the boot cpu. */
- init_cpu_vtimer();
- cpu_notifier(s390_nohz_notify, 0);
}
diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c
new file mode 100644
index 000000000000..949fdbf0e8b6
--- /dev/null
+++ b/arch/s390/kernel/wti.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for warning track interruption
+ *
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/smpboot.h>
+#include <linux/irq.h>
+#include <uapi/linux/sched/types.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+
+#define WTI_DBF_LEN 64
+
+struct wti_debug {
+ unsigned long missed;
+ unsigned long addr;
+ pid_t pid;
+};
+
+struct wti_state {
+ /* debug data for s390dbf */
+ struct wti_debug dbg;
+ /*
+ * Represents the real-time thread responsible to
+ * acknowledge the warning-track interrupt and trigger
+ * preliminary and postliminary precautions.
+ */
+ struct task_struct *thread;
+ /*
+ * If pending is true, the real-time thread must be scheduled.
+ * If not, a wake up of that thread will remain a noop.
+ */
+ bool pending;
+};
+
+static DEFINE_PER_CPU(struct wti_state, wti_state);
+
+static debug_info_t *wti_dbg;
+
+/*
+ * During a warning-track grace period, interrupts are disabled
+ * to prevent delays of the warning-track acknowledgment.
+ *
+ * Once the CPU is physically dispatched again, interrupts are
+ * re-enabled.
+ */
+
+static void wti_irq_disable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* disable all I/O interrupts */
+ cr6.val &= ~0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void wti_irq_enable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* enable all I/O interrupts */
+ cr6.val |= 0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void store_debug_data(struct wti_state *st)
+{
+ struct pt_regs *regs = get_irq_regs();
+
+ st->dbg.pid = current->pid;
+ st->dbg.addr = 0;
+ if (!user_mode(regs))
+ st->dbg.addr = regs->psw.addr;
+}
+
+static void wti_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct wti_state *st = this_cpu_ptr(&wti_state);
+
+ inc_irq_stat(IRQEXT_WTI);
+ wti_irq_disable();
+ store_debug_data(st);
+ st->pending = true;
+ wake_up_process(st->thread);
+}
+
+static int wti_pending(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ return st->pending;
+}
+
+static void wti_dbf_grace_period(struct wti_state *st)
+{
+ struct wti_debug *wdi = &st->dbg;
+ char buf[WTI_DBF_LEN];
+
+ if (wdi->addr)
+ snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr);
+ else
+ snprintf(buf, sizeof(buf), "%d <user>", wdi->pid);
+ debug_text_event(wti_dbg, 2, buf);
+ wdi->missed++;
+}
+
+static int wti_show(struct seq_file *seq, void *v)
+{
+ struct wti_state *st;
+ int cpu;
+
+ cpus_read_lock();
+ seq_puts(seq, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(seq, "CPU%-8d", cpu);
+ seq_putc(seq, '\n');
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ seq_printf(seq, " %10lu", st->dbg.missed);
+ }
+ seq_putc(seq, '\n');
+ cpus_read_unlock();
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wti);
+
+static void wti_thread_fn(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ st->pending = false;
+ /*
+ * Yield CPU voluntarily to the hypervisor. Control
+ * resumes when hypervisor decides to dispatch CPU
+ * to this LPAR again.
+ */
+ if (diag49c(DIAG49C_SUBC_ACK))
+ wti_dbf_grace_period(st);
+ wti_irq_enable();
+}
+
+static struct smp_hotplug_thread wti_threads = {
+ .store = &wti_state.thread,
+ .thread_should_run = wti_pending,
+ .thread_fn = wti_thread_fn,
+ .thread_comm = "cpuwti/%u",
+ .selfparking = false,
+};
+
+static int __init wti_init(void)
+{
+ struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 };
+ struct dentry *wti_dir;
+ struct wti_state *st;
+ int cpu, rc;
+
+ rc = -EOPNOTSUPP;
+ if (!sclp.has_wti)
+ goto out;
+ rc = smpboot_register_percpu_thread(&wti_threads);
+ if (WARN_ON(rc))
+ goto out;
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param);
+ }
+ rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+ if (rc) {
+ pr_warn("Couldn't request external interrupt 0x1007\n");
+ goto out_thread;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK);
+ rc = diag49c(DIAG49C_SUBC_REG);
+ if (rc) {
+ pr_warn("Failed to register warning track interrupt through DIAG 49C\n");
+ rc = -EOPNOTSUPP;
+ goto out_subclass;
+ }
+ wti_dir = debugfs_create_dir("wti", arch_debugfs_dir);
+ debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops);
+ wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN);
+ if (!wti_dbg) {
+ rc = -ENOMEM;
+ goto out_debug_register;
+ }
+ rc = debug_register_view(wti_dbg, &debug_hex_ascii_view);
+ if (rc)
+ goto out_debug_register;
+ goto out;
+out_debug_register:
+ debug_unregister(wti_dbg);
+out_subclass:
+ irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK);
+ unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+out_thread:
+ smpboot_unregister_percpu_thread(&wti_threads);
+out:
+ return rc;
+}
+late_initcall(wti_init);