summaryrefslogtreecommitdiff
path: root/arch/s390/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/Makefile14
-rw-r--r--arch/s390/kernel/abs_lowcore.c2
-rw-r--r--arch/s390/kernel/alternative.c82
-rw-r--r--arch/s390/kernel/asm-offsets.c31
-rw-r--r--arch/s390/kernel/cert_store.c1
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/cpacf.c119
-rw-r--r--arch/s390/kernel/cpcmd.c10
-rw-r--r--arch/s390/kernel/crash_dump.c121
-rw-r--r--arch/s390/kernel/debug.c341
-rw-r--r--arch/s390/kernel/diag/Makefile1
-rw-r--r--arch/s390/kernel/diag/diag.c (renamed from arch/s390/kernel/diag.c)43
-rw-r--r--arch/s390/kernel/diag/diag310.c276
-rw-r--r--arch/s390/kernel/diag/diag324.c224
-rw-r--r--arch/s390/kernel/diag/diag_ioctl.h14
-rw-r--r--arch/s390/kernel/diag/diag_misc.c63
-rw-r--r--arch/s390/kernel/dis.c20
-rw-r--r--arch/s390/kernel/dumpstack.c8
-rw-r--r--arch/s390/kernel/early.c82
-rw-r--r--arch/s390/kernel/early_printk.c16
-rw-r--r--arch/s390/kernel/earlypgm.S23
-rw-r--r--arch/s390/kernel/entry.S354
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/fpu.c2
-rw-r--r--arch/s390/kernel/ftrace.c150
-rw-r--r--arch/s390/kernel/ftrace.h2
-rw-r--r--arch/s390/kernel/head64.S8
-rw-r--r--arch/s390/kernel/hiperdispatch.c430
-rw-r--r--arch/s390/kernel/idle.c21
-rw-r--r--arch/s390/kernel/ipl.c460
-rw-r--r--arch/s390/kernel/irq.c34
-rw-r--r--arch/s390/kernel/kprobes.c92
-rw-r--r--arch/s390/kernel/kprobes.h9
-rw-r--r--arch/s390/kernel/kprobes_insn_page.S22
-rw-r--r--arch/s390/kernel/lgr.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c6
-rw-r--r--arch/s390/kernel/mcount.S28
-rw-r--r--arch/s390/kernel/module.c42
-rw-r--r--arch/s390/kernel/nmi.c32
-rw-r--r--arch/s390/kernel/nospec-branch.c20
-rw-r--r--arch/s390/kernel/nospec-sysfs.c12
-rw-r--r--arch/s390/kernel/numa.c11
-rw-r--r--arch/s390/kernel/os_info.c32
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c37
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c11
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c431
-rw-r--r--arch/s390/kernel/perf_event.c40
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c201
-rw-r--r--arch/s390/kernel/perf_pai_ext.c157
-rw-r--r--arch/s390/kernel/process.c11
-rw-r--r--arch/s390/kernel/processor.c20
-rw-r--r--arch/s390/kernel/reipl.S26
-rw-r--r--arch/s390/kernel/setup.c142
-rw-r--r--arch/s390/kernel/signal.c2
-rw-r--r--arch/s390/kernel/smp.c269
-rw-r--r--arch/s390/kernel/stacktrace.c87
-rw-r--r--arch/s390/kernel/sthyi.c105
-rw-r--r--arch/s390/kernel/syscall.c31
-rw-r--r--arch/s390/kernel/syscalls/Makefile31
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl7
-rw-r--r--arch/s390/kernel/sysinfo.c1
-rw-r--r--arch/s390/kernel/text_amode31.S3
-rw-r--r--arch/s390/kernel/time.c64
-rw-r--r--arch/s390/kernel/topology.c140
-rw-r--r--arch/s390/kernel/traps.c56
-rw-r--r--arch/s390/kernel/unwind_bc.c4
-rw-r--r--arch/s390/kernel/uv.c616
-rw-r--r--arch/s390/kernel/vdso.c39
-rw-r--r--arch/s390/kernel/vdso32/Makefile16
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S2
-rw-r--r--arch/s390/kernel/vdso64/Makefile27
-rw-r--r--arch/s390/kernel/vdso64/vdso.h1
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S9
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S33
-rw-r--r--arch/s390/kernel/vdso64/vgetrandom-chacha.S181
-rw-r--r--arch/s390/kernel/vdso64/vgetrandom.c14
-rw-r--r--arch/s390/kernel/vmcore_info.c5
-rw-r--r--arch/s390/kernel/vmlinux.lds.S60
-rw-r--r--arch/s390/kernel/vtime.c90
-rw-r--r--arch/s390/kernel/wti.c215
80 files changed, 4127 insertions, 2320 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index fa029d0dc28f..db5f3a3faefb 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -11,6 +11,8 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
# Do not trace early setup code
CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_stacktrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind_bc.o = $(CC_FLAGS_FTRACE)
endif
@@ -34,22 +36,24 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
+obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
+obj-y += diag/
extra-y += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
+obj-$(CONFIG_SYSFS) += cpacf.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
+obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
@@ -57,7 +61,6 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
obj-$(CONFIG_COMPAT) += $(compat-obj-y)
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_KPROBES) += kprobes_insn_page.o
obj-$(CONFIG_KPROBES) += mcount.o
obj-$(CONFIG_RETHOOK) += rethook.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
@@ -79,7 +82,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index f9efc54ec4b7..88f0b91d7a73 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -2,8 +2,10 @@
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
unsigned long __bootdata_preserved(__abs_lowcore);
+int __bootdata_preserved(relocate_lowcore);
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
{
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index e7bca29f9c34..8d5d0de35de0 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,75 +1,41 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <asm/text-patching.h>
+
+#include <linux/uaccess.h>
+#include <asm/nospec-branch.h>
+#include <asm/abs_lowcore.h>
#include <asm/alternative.h>
#include <asm/facility.h>
-#include <asm/nospec-branch.h>
-static int __initdata_or_module alt_instr_disabled;
-
-static int __init disable_alternative_instructions(char *str)
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
{
- alt_instr_disabled = 1;
- return 0;
-}
-
-early_param("noaltinstr", disable_alternative_instructions);
-
-static void __init_or_module __apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
-{
- struct alt_instr *a;
u8 *instr, *replacement;
+ struct alt_instr *a;
+ bool replace;
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
for (a = start; a < end; a++) {
- instr = (u8 *)&a->instr_offset + a->instr_offset;
- replacement = (u8 *)&a->repl_offset + a->repl_offset;
-
- if (!__test_facility(a->facility, alt_stfle_fac_list))
- continue;
-
- if (unlikely(a->instrlen % 2)) {
- WARN_ONCE(1, "cpu alternatives instructions length is "
- "odd, skipping patching\n");
+ if (!(a->ctx & ctx))
continue;
+ switch (a->type) {
+ case ALT_TYPE_FACILITY:
+ replace = test_facility(a->data);
+ break;
+ case ALT_TYPE_SPEC:
+ replace = nobp_enabled();
+ break;
+ case ALT_TYPE_LOWCORE:
+ replace = have_relocated_lowcore();
+ break;
+ default:
+ replace = false;
}
-
+ if (!replace)
+ continue;
+ instr = (u8 *)&a->instr_offset + a->instr_offset;
+ replacement = (u8 *)&a->repl_offset + a->repl_offset;
s390_kernel_write(instr, replacement, a->instrlen);
}
}
-
-void __init_or_module apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
-{
- if (!alt_instr_disabled)
- __apply_alternatives(start, end);
-}
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-void __init apply_alternative_instructions(void)
-{
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static void do_sync_core(void *info)
-{
- sync_core();
-}
-
-void text_poke_sync(void)
-{
- on_each_cpu(do_sync_core, NULL, 1);
-}
-
-void text_poke_sync_lock(void)
-{
- cpus_read_lock();
- text_poke_sync();
- cpus_read_unlock();
-}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index fa5f6885c74a..36709112ae7a 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -13,8 +13,6 @@
#include <linux/purgatory.h>
#include <linux/pgtable.h>
#include <linux/ftrace.h>
-#include <asm/idle.h>
-#include <asm/gmap.h>
#include <asm/stacktrace.h>
int main(void)
@@ -29,6 +27,7 @@ int main(void)
BLANK();
/* thread info offsets */
OFFSET(__TI_flags, task_struct, thread_info.flags);
+ OFFSET(__TI_sie, task_struct, thread_info.sie);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_PSW, pt_regs, psw);
@@ -64,12 +63,13 @@ int main(void)
OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+ OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
- /* idle data offsets */
- OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
- OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
- OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
+ OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
+ DEFINE(STACK_FRAME_USER_OVERHEAD, sizeof(struct stack_frame_user));
+ OFFSET(__SFVDSO_RETURN_ADDRESS, stack_frame_vdso_wrapper, return_address);
+ DEFINE(STACK_FRAME_VDSO_OVERHEAD, sizeof(struct stack_frame_vdso_wrapper));
BLANK();
/* hardware defined lowcore locations 0x000 - 0x1ff */
OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
@@ -111,10 +111,9 @@ int main(void)
OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
/* software defined lowcore locations 0x200 - 0xdff*/
- OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
- OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+ OFFSET(__LC_SAVE_AREA, lowcore, save_area);
OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
- OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+ OFFSET(__LC_PCPU, lowcore, pcpu);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
@@ -138,7 +137,6 @@ int main(void)
OFFSET(__LC_USER_ASCE, lowcore, user_asce);
OFFSET(__LC_LPP, lowcore, lpp);
OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
- OFFSET(__LC_GMAP, lowcore, gmap);
OFFSET(__LC_LAST_BREAK, lowcore, last_break);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
@@ -161,7 +159,6 @@ int main(void)
OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
BLANK();
/* gmap/sie offsets */
- OFFSET(__GMAP_ASCE, gmap, asce);
OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
/* kexec_sha_region */
@@ -178,13 +175,9 @@ int main(void)
DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* function graph return value tracing */
- OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
- OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
- DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
-#endif
- OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
- DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
+ OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
+ DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
+
+ OFFSET(__PCPU_FLAGS, pcpu, flags);
return 0;
}
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
index 554447768bdd..bf983513dd33 100644
--- a/arch/s390/kernel/cert_store.c
+++ b/arch/s390/kernel/cert_store.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
#include <crypto/sha2.h>
#include <keys/user-type.h>
#include <asm/debug.h>
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 1942e2a9f8db..5a86b9d1da71 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -24,11 +24,11 @@
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
+#include <asm/vdso-symbols.h>
#include <asm/access-regs.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
#include <asm/lowcore.h>
-#include <asm/vdso.h>
#include <asm/fpu.h>
#include "compat_linux.h"
#include "compat_ptrace.h"
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
new file mode 100644
index 000000000000..4b9b34f95d72
--- /dev/null
+++ b/arch/s390/kernel/cpacf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "cpacf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <asm/cpacf.h>
+
+#define CPACF_QUERY(name, instruction) \
+static ssize_t name##_query_raw_read(struct file *fp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_mask_t mask; \
+ \
+ if (!cpacf_query(CPACF_##instruction, &mask)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \
+} \
+static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+
+CPACF_QUERY(km, KM);
+CPACF_QUERY(kmc, KMC);
+CPACF_QUERY(kimd, KIMD);
+CPACF_QUERY(klmd, KLMD);
+CPACF_QUERY(kmac, KMAC);
+CPACF_QUERY(pckmo, PCKMO);
+CPACF_QUERY(kmf, KMF);
+CPACF_QUERY(kmctr, KMCTR);
+CPACF_QUERY(kmo, KMO);
+CPACF_QUERY(pcc, PCC);
+CPACF_QUERY(prno, PRNO);
+CPACF_QUERY(kma, KMA);
+CPACF_QUERY(kdsa, KDSA);
+
+#define CPACF_QAI(name, instruction) \
+static ssize_t name##_query_auth_info_raw_read( \
+ struct file *fp, struct kobject *kobj, \
+ const struct bin_attribute *attr, char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_qai_t qai; \
+ \
+ if (!cpacf_qai(CPACF_##instruction, &qai)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &qai, \
+ sizeof(qai)); \
+} \
+static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+
+CPACF_QAI(km, KM);
+CPACF_QAI(kmc, KMC);
+CPACF_QAI(kimd, KIMD);
+CPACF_QAI(klmd, KLMD);
+CPACF_QAI(kmac, KMAC);
+CPACF_QAI(pckmo, PCKMO);
+CPACF_QAI(kmf, KMF);
+CPACF_QAI(kmctr, KMCTR);
+CPACF_QAI(kmo, KMO);
+CPACF_QAI(pcc, PCC);
+CPACF_QAI(prno, PRNO);
+CPACF_QAI(kma, KMA);
+CPACF_QAI(kdsa, KDSA);
+
+static const struct bin_attribute *const cpacf_attrs[] = {
+ &bin_attr_km_query_raw,
+ &bin_attr_kmc_query_raw,
+ &bin_attr_kimd_query_raw,
+ &bin_attr_klmd_query_raw,
+ &bin_attr_kmac_query_raw,
+ &bin_attr_pckmo_query_raw,
+ &bin_attr_kmf_query_raw,
+ &bin_attr_kmctr_query_raw,
+ &bin_attr_kmo_query_raw,
+ &bin_attr_pcc_query_raw,
+ &bin_attr_prno_query_raw,
+ &bin_attr_kma_query_raw,
+ &bin_attr_kdsa_query_raw,
+ &bin_attr_km_query_auth_info_raw,
+ &bin_attr_kmc_query_auth_info_raw,
+ &bin_attr_kimd_query_auth_info_raw,
+ &bin_attr_klmd_query_auth_info_raw,
+ &bin_attr_kmac_query_auth_info_raw,
+ &bin_attr_pckmo_query_auth_info_raw,
+ &bin_attr_kmf_query_auth_info_raw,
+ &bin_attr_kmctr_query_auth_info_raw,
+ &bin_attr_kmo_query_auth_info_raw,
+ &bin_attr_pcc_query_auth_info_raw,
+ &bin_attr_prno_query_auth_info_raw,
+ &bin_attr_kma_query_auth_info_raw,
+ &bin_attr_kdsa_query_auth_info_raw,
+ NULL,
+};
+
+static const struct attribute_group cpacf_attr_grp = {
+ .name = "cpacf",
+ .bin_attrs_new = cpacf_attrs,
+};
+
+static int __init cpacf_init(void)
+{
+ struct device *cpu_root;
+ int rc = 0;
+
+ cpu_root = bus_get_dev_root(&cpu_subsys);
+ if (cpu_root) {
+ rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp);
+ put_device(cpu_root);
+ }
+ return rc;
+}
+device_initcall(cpacf_init);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index b210a29d3ee9..2f4174b961de 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -20,6 +20,7 @@
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
+#include <asm/asm.h>
static DEFINE_SPINLOCK(cpcmd_lock);
static char cpcmd_buf[241];
@@ -45,12 +46,11 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
ry.odd = *rlen;
asm volatile(
" diag %[rx],%[ry],0x8\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [ry] "+&d" (ry.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [ry] "+d" (ry.pair)
: [rx] "d" (rx.pair)
- : "cc");
- if (cc)
+ : CC_CLOBBER);
+ if (CC_TRANSFORM(cc))
*rlen += ry.odd;
else
*rlen = ry.odd;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d09ebb6f5262..276cb4c1e11b 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -63,9 +63,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
{
struct save_area *sa;
- sa = memblock_alloc(sizeof(*sa), 8);
- if (!sa)
- return NULL;
+ sa = memblock_alloc_or_panic(sizeof(*sa), 8);
if (is_boot_cpu)
list_add(&sa->list, &dump_save_areas);
@@ -237,6 +235,17 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
prot);
}
+/*
+ * Return true only when in a kdump or stand-alone kdump environment.
+ * Note that /proc/vmcore might also be available in "standard zfcp/nvme dump"
+ * environments, where this function returns false; see dump_available().
+ */
+bool is_kdump_kernel(void)
+{
+ return oldmem_data.start;
+}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
+
static const char *nt_name(Elf64_Word type)
{
const char *name = "LINUX";
@@ -451,7 +460,7 @@ static void *nt_final(void *ptr)
/*
* Initialize ELF header (new kernel)
*/
-static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
{
memset(ehdr, 0, sizeof(*ehdr));
memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -465,7 +474,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
ehdr->e_phoff = sizeof(Elf64_Ehdr);
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
ehdr->e_phentsize = sizeof(Elf64_Phdr);
- ehdr->e_phnum = mem_chunk_cnt + 1;
+ /* Number of PT_LOAD program headers plus PT_NOTE program header */
+ ehdr->e_phnum = phdr_count + 1;
return ehdr + 1;
}
@@ -496,27 +506,77 @@ static int get_mem_chunk_cnt(void)
return cnt;
}
+static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr,
+ unsigned long vaddr, unsigned long size)
+{
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = vaddr;
+ phdr->p_offset = paddr;
+ phdr->p_paddr = paddr;
+ phdr->p_filesz = size;
+ phdr->p_memsz = size;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
/*
* Initialize ELF loads (new kernel)
*/
-static void loads_init(Elf64_Phdr *phdr)
+static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
{
+ unsigned long old_identity_base = 0;
phys_addr_t start, end;
u64 idx;
+ if (os_info_has_vm)
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
- phdr->p_filesz = end - start;
- phdr->p_type = PT_LOAD;
- phdr->p_offset = start;
- phdr->p_vaddr = (unsigned long)__va(start);
- phdr->p_paddr = start;
- phdr->p_memsz = end - start;
- phdr->p_flags = PF_R | PF_W | PF_X;
- phdr->p_align = PAGE_SIZE;
+ fill_ptload(phdr, start, old_identity_base + start,
+ end - start);
phdr++;
}
}
+static bool os_info_has_vm(void)
+{
+ return os_info_old_value(OS_INFO_KASLR_OFFSET);
+}
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+/*
+ * Fill PT_LOAD for a physical memory range owned by a device and detected by
+ * its device driver.
+ */
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+ unsigned long long paddr, unsigned long long size)
+{
+ unsigned long old_identity_base = 0;
+
+ if (os_info_has_vm())
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ fill_ptload(phdr, paddr, old_identity_base + paddr, size);
+}
+#endif
+
+/*
+ * Prepare PT_LOAD type program header for kernel image region
+ */
+static void text_init(Elf64_Phdr *phdr)
+{
+ unsigned long start_phys = os_info_old_value(OS_INFO_IMAGE_PHYS);
+ unsigned long start = os_info_old_value(OS_INFO_IMAGE_START);
+ unsigned long end = os_info_old_value(OS_INFO_IMAGE_END);
+
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = start;
+ phdr->p_filesz = end - start;
+ phdr->p_memsz = end - start;
+ phdr->p_offset = start_phys;
+ phdr->p_paddr = start_phys;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
/*
* Initialize notes (new kernel)
*/
@@ -542,7 +602,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
return ptr;
}
-static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+static size_t get_elfcorehdr_size(int phdr_count)
{
size_t size;
@@ -558,7 +618,7 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
/* nt_final */
size += sizeof(Elf64_Nhdr);
/* PT_LOADS */
- size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+ size += phdr_count * sizeof(Elf64_Phdr);
return size;
}
@@ -568,9 +628,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
*/
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
- Elf64_Phdr *phdr_notes, *phdr_loads;
+ Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
+ int mem_chunk_cnt, phdr_text_cnt;
size_t alloc_size;
- int mem_chunk_cnt;
void *ptr, *hdr;
u64 hdr_off;
@@ -589,12 +649,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
}
mem_chunk_cnt = get_mem_chunk_cnt();
+ phdr_text_cnt = os_info_has_vm() ? 1 : 0;
- alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+ alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
hdr = kzalloc(alloc_size, GFP_KERNEL);
- /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+ /*
+ * Without elfcorehdr /proc/vmcore cannot be created. Thus creating
* a dump with this crash kernel will fail. Panic now to allow other
* dump mechanisms to take over.
*/
@@ -602,18 +664,25 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
panic("s390 kdump allocating elfcorehdr failed");
/* Init elf header */
- ptr = ehdr_init(hdr, mem_chunk_cnt);
+ phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
/* Init program headers */
- phdr_notes = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
- phdr_loads = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ if (phdr_text_cnt) {
+ phdr_text = phdr_notes + 1;
+ phdr_loads = phdr_text + 1;
+ } else {
+ phdr_loads = phdr_notes + 1;
+ }
+ ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
/* Init notes */
hdr_off = PTR_DIFF(ptr, hdr);
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+ /* Init kernel text program header */
+ if (phdr_text_cnt)
+ text_init(phdr_text);
/* Init loads */
+ loads_init(phdr_loads, phdr_text_cnt);
+ /* Finalize program headers */
hdr_off = PTR_DIFF(ptr, hdr);
- loads_init(phdr_loads);
*addr = (unsigned long long) hdr;
*size = (unsigned long long) hdr_off;
BUG_ON(elfcorehdr_size > alloc_size);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 85328a0ef3b6..ce038e9205f7 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/math.h>
#include <linux/minmax.h>
#include <linux/debugfs.h>
@@ -38,13 +39,13 @@
typedef struct file_private_info {
loff_t offset; /* offset of last read in file */
- int act_area; /* number of last formated area */
+ int act_area; /* number of last formatted area */
int act_page; /* act page in given area */
- int act_entry; /* last formated entry (offset */
+ int act_entry; /* last formatted entry (offset */
/* relative to beginning of last */
- /* formated page) */
+ /* formatted page) */
size_t act_entry_offset; /* up to this offset we copied */
- /* in last read the last formated */
+ /* in last read the last formatted */
/* entry to userland */
char temp_buf[2048]; /* buffer for output */
debug_info_t *debug_info_org; /* original debug information */
@@ -63,7 +64,7 @@ typedef struct {
long args[];
} debug_sprintf_entry_t;
-/* internal function prototyes */
+/* internal function prototypes */
static int debug_init(void);
static ssize_t debug_output(struct file *file, char __user *user_buf,
@@ -77,12 +78,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area,
static void debug_info_get(debug_info_t *);
static void debug_info_put(debug_info_t *);
static int debug_prolog_level_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf);
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_prolog_pages_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf);
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
@@ -90,9 +93,8 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf);
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *inbuf);
+ char *out_buf, size_t out_buf_size,
+ const char *in_buf);
static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
static void debug_events_append(debug_info_t *dest, debug_info_t *src);
@@ -163,7 +165,6 @@ static const struct file_operations debug_file_ops = {
.write = debug_input,
.open = debug_open,
.release = debug_close,
- .llseek = no_llseek,
};
static struct dentry *debug_debugfs_root_entry;
@@ -351,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
for (i = 0; i < in->nr_areas; i++) {
for (j = 0; j < in->pages_per_area; j++)
memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+ rc->active_pages[i] = in->active_pages[i];
+ rc->active_entries[i] = in->active_entries[i];
}
+ rc->active_area = in->active_area;
out:
spin_unlock_irqrestore(&in->lock, flags);
return rc;
@@ -381,7 +385,7 @@ static void debug_info_put(debug_info_t *db_info)
/*
* debug_format_entry:
- * - format one debug entry and return size of formated data
+ * - format one debug entry and return size of formatted data
*/
static int debug_format_entry(file_private_info_t *p_info)
{
@@ -392,8 +396,10 @@ static int debug_format_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
/* print prolog */
- if (view->prolog_proc)
- len += view->prolog_proc(id_snap, view, p_info->temp_buf);
+ if (view->prolog_proc) {
+ len += view->prolog_proc(id_snap, view, p_info->temp_buf,
+ sizeof(p_info->temp_buf));
+ }
goto out;
}
if (!id_snap->areas) /* this is true, if we have a prolog only view */
@@ -403,21 +409,31 @@ static int debug_format_entry(file_private_info_t *p_info)
if (act_entry->clock == 0LL)
goto out; /* empty entry */
- if (view->header_proc)
+ if (view->header_proc) {
len += view->header_proc(id_snap, view, p_info->act_area,
- act_entry, p_info->temp_buf + len);
- if (view->format_proc)
+ act_entry, p_info->temp_buf + len,
+ sizeof(p_info->temp_buf) - len);
+ }
+ if (view->format_proc) {
len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+ sizeof(p_info->temp_buf) - len,
DEBUG_DATA(act_entry));
+ }
out:
return len;
}
-/*
- * debug_next_entry:
- * - goto next entry in p_info
+/**
+ * debug_next_entry - Go to the next entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the next entry. If no further entry
+ * exists the current position is set to one after the end the return value
+ * indicates that no further entries exist.
+ *
+ * Return: True if there are more following entries, false otherwise
*/
-static inline int debug_next_entry(file_private_info_t *p_info)
+static inline bool debug_next_entry(file_private_info_t *p_info)
{
debug_info_t *id;
@@ -425,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
p_info->act_entry = 0;
p_info->act_page = 0;
- goto out;
+ return true;
}
if (!id->areas)
- return 1;
+ return false;
p_info->act_entry += id->entry_size;
/* switch to next page, if we reached the end of the page */
if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
@@ -441,16 +457,93 @@ static inline int debug_next_entry(file_private_info_t *p_info)
p_info->act_page = 0;
}
if (p_info->act_area >= id->nr_areas)
- return 1;
+ return false;
}
-out:
- return 0;
+ return true;
+}
+
+/**
+ * debug_to_act_entry - Go to the currently active entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the currently active
+ * entry of @p_info->debug_info_snap
+ */
+static void debug_to_act_entry(file_private_info_t *p_info)
+{
+ debug_info_t *snap_id;
+
+ snap_id = p_info->debug_info_snap;
+ p_info->act_area = snap_id->active_area;
+ p_info->act_page = snap_id->active_pages[snap_id->active_area];
+ p_info->act_entry = snap_id->active_entries[snap_id->active_area];
+}
+
+/**
+ * debug_prev_entry - Go to the previous entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the previous entry. If no previous entry
+ * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
+ * indicates that no previous entries exist.
+ *
+ * Return: True if there are more previous entries, false otherwise
+ */
+
+static inline bool debug_prev_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id;
+
+ id = p_info->debug_info_snap;
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
+ debug_to_act_entry(p_info);
+ if (!id->areas)
+ return false;
+ p_info->act_entry -= id->entry_size;
+ /* switch to prev page, if we reached the beginning of the page */
+ if (p_info->act_entry < 0) {
+ /* end of previous page */
+ p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
+ p_info->act_page--;
+ if (p_info->act_page < 0) {
+ /* previous area */
+ p_info->act_area--;
+ p_info->act_page = id->pages_per_area - 1;
+ }
+ if (p_info->act_area < 0)
+ p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
+ }
+ /* check full circle */
+ if (id->active_area == p_info->act_area &&
+ id->active_pages[id->active_area] == p_info->act_page &&
+ id->active_entries[id->active_area] == p_info->act_entry)
+ return false;
+ return true;
+}
+
+/**
+ * debug_move_entry - Go to next entry in either the forward or backward direction
+ * @p_info: Private info that is manipulated
+ * @reverse: If true go to the next entry in reverse i.e. previous
+ *
+ * Sets the current position in @p_info to the next (@reverse == false) or
+ * previous (@reverse == true) entry.
+ *
+ * Return: True if there are further entries in that direction,
+ * false otherwise.
+ */
+static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
+{
+ if (reverse)
+ return debug_prev_entry(p_info);
+ else
+ return debug_next_entry(p_info);
}
/*
* debug_output:
* - called for user read()
- * - copies formated debug entries to the user buffer
+ * - copies formatted debug entries to the user buffer
*/
static ssize_t debug_output(struct file *file, /* file descriptor */
char __user *user_buf, /* user buffer */
@@ -486,7 +579,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */
}
if (copy_size == formatted_line_residue) {
entry_offset = 0;
- if (debug_next_entry(p_info))
+ if (!debug_next_entry(p_info))
goto out;
}
}
@@ -521,15 +614,51 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
return rc; /* number of input characters */
}
+static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
+ struct debug_view *view)
+{
+ debug_info_t *debug_info_snapshot;
+ file_private_info_t *p_info;
+
+ /*
+ * Make snapshot of current debug areas to get it consistent.
+ * To copy all the areas is only needed, if we have a view which
+ * formats the debug areas.
+ */
+ if (!view->format_proc && !view->header_proc)
+ debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+ else
+ debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+ if (!debug_info_snapshot)
+ return NULL;
+ p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ if (!p_info) {
+ debug_info_free(debug_info_snapshot);
+ return NULL;
+ }
+ p_info->offset = 0;
+ p_info->debug_info_snap = debug_info_snapshot;
+ p_info->debug_info_org = debug_info;
+ p_info->view = view;
+ p_info->act_area = 0;
+ p_info->act_page = 0;
+ p_info->act_entry = DEBUG_PROLOG_ENTRY;
+ p_info->act_entry_offset = 0;
+ debug_info_get(debug_info);
+
+ return p_info;
+}
+
/*
* debug_open:
* - called for user open()
- * - copies formated output to private_data area of the file
+ * - copies formatted output to private_data area of the file
* handle
*/
static int debug_open(struct inode *inode, struct file *file)
{
- debug_info_t *debug_info, *debug_info_snapshot;
+ debug_info_t *debug_info;
file_private_info_t *p_info;
int i, rc = 0;
@@ -547,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file)
goto out;
found:
-
- /* Make snapshot of current debug areas to get it consistent. */
- /* To copy all the areas is only needed, if we have a view which */
- /* formats the debug areas. */
-
- if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
- debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
- else
- debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
-
- if (!debug_info_snapshot) {
- rc = -ENOMEM;
- goto out;
- }
- p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
if (!p_info) {
- debug_info_free(debug_info_snapshot);
rc = -ENOMEM;
goto out;
}
- p_info->offset = 0;
- p_info->debug_info_snap = debug_info_snapshot;
- p_info->debug_info_org = debug_info;
- p_info->view = debug_info->views[i];
- p_info->act_area = 0;
- p_info->act_page = 0;
- p_info->act_entry = DEBUG_PROLOG_ENTRY;
- p_info->act_entry_offset = 0;
file->private_data = p_info;
- debug_info_get(debug_info);
nonseekable_open(inode, file);
out:
mutex_unlock(&debug_mutex);
return rc;
}
+static void debug_file_private_free(file_private_info_t *p_info)
+{
+ if (p_info->debug_info_snap)
+ debug_info_free(p_info->debug_info_snap);
+ debug_info_put(p_info->debug_info_org);
+ kfree(p_info);
+}
+
/*
* debug_close:
* - called for user close()
@@ -593,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file)
file_private_info_t *p_info;
p_info = (file_private_info_t *) file->private_data;
- if (p_info->debug_info_snap)
- debug_info_free(p_info->debug_info_snap);
- debug_info_put(p_info->debug_info_org);
- kfree(file->private_data);
+ debug_file_private_free(p_info);
+ file->private_data = NULL;
return 0; /* success */
}
+/**
+ * debug_dump - Get a textual representation of debug info, or as much as fits
+ * @id: Debug information to use
+ * @view: View with which to dump the debug information
+ * @buf: Buffer the textual debug data representation is written to
+ * @buf_size: Size of the buffer, including the trailing '\0' byte
+ * @reverse: Go backwards from the last written entry
+ *
+ * This function may be used whenever a textual representation of the debug
+ * information is required without using an s390dbf file.
+ *
+ * Note: It is the callers responsibility to supply a view that is compatible
+ * with the debug information data.
+ *
+ * Return: On success returns the number of bytes written to the buffer not
+ * including the trailing '\0' byte. If bug_size == 0 the function returns 0.
+ * On failure an error code less than 0 is returned.
+ */
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+ char *buf, size_t buf_size, bool reverse)
+{
+ file_private_info_t *p_info;
+ size_t size, offset = 0;
+
+ /* Need space for '\0' byte */
+ if (buf_size < 1)
+ return 0;
+ buf_size--;
+
+ p_info = debug_file_private_alloc(id, view);
+ if (!p_info)
+ return -ENOMEM;
+
+ /* There is always at least the DEBUG_PROLOG_ENTRY */
+ do {
+ size = debug_format_entry(p_info);
+ size = min(size, buf_size - offset);
+ memcpy(buf + offset, p_info->temp_buf, size);
+ offset += size;
+ if (offset >= buf_size)
+ break;
+ } while (debug_move_entry(p_info, reverse));
+ debug_file_private_free(p_info);
+ buf[offset] = '\0';
+
+ return offset;
+}
+
/* Create debugfs entries and add to internal list. */
static void _debug_register(debug_info_t *id)
{
@@ -954,7 +1113,7 @@ static int debug_active = 1;
* always allow read, allow write only if debug_stoppable is set or
* if debug_active is already off
*/
-static int s390dbf_procactive(struct ctl_table *table, int write,
+static int s390dbf_procactive(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
@@ -963,7 +1122,7 @@ static int s390dbf_procactive(struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table s390dbf_table[] = {
+static const struct ctl_table s390dbf_table[] = {
{
.procname = "debug_stoppable",
.data = &debug_stoppable,
@@ -1293,9 +1452,9 @@ static inline int debug_get_uint(char *buf)
*/
static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf)
+ char *out_buf, size_t out_buf_size)
{
- return sprintf(out_buf, "%i\n", id->pages_per_area);
+ return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area);
}
/*
@@ -1342,14 +1501,14 @@ out:
* prints out actual debug level
*/
static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf)
+ char *out_buf, size_t out_buf_size)
{
int rc = 0;
if (id->level == DEBUG_OFF_LEVEL)
- rc = sprintf(out_buf, "-\n");
+ rc = scnprintf(out_buf, out_buf_size, "-\n");
else
- rc = sprintf(out_buf, "%i\n", id->level);
+ rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level);
return rc;
}
@@ -1466,22 +1625,24 @@ out:
* prints debug data in hex/ascii format
*/
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf)
+ char *out_buf, size_t out_buf_size, const char *in_buf)
{
int i, rc = 0;
- for (i = 0; i < id->buf_size; i++)
- rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
- rc += sprintf(out_buf + rc, "| ");
+ for (i = 0; i < id->buf_size; i++) {
+ rc += scnprintf(out_buf + rc, out_buf_size - rc,
+ "%02x ", ((unsigned char *)in_buf)[i]);
+ }
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "| ");
for (i = 0; i < id->buf_size; i++) {
unsigned char c = in_buf[i];
if (isascii(c) && isprint(c))
- rc += sprintf(out_buf + rc, "%c", c);
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c);
else
- rc += sprintf(out_buf + rc, ".");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, ".");
}
- rc += sprintf(out_buf + rc, "\n");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n");
return rc;
}
@@ -1489,7 +1650,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
* prints header for debug entry
*/
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf)
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
{
unsigned long sec, usec;
unsigned long caller;
@@ -1506,22 +1668,22 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
else
except_str = "-";
caller = (unsigned long) entry->caller;
- rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ",
- area, sec, usec, level, except_str,
- entry->cpu, (void *)caller);
+ rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ",
+ area, sec, usec, level, except_str,
+ entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
/*
- * prints debug data sprintf-formated:
+ * prints debug data sprintf-formatted:
* debug_sprinf_event/exception calls must be used together with this view
*/
#define DEBUG_SPRINTF_MAX_ARGS 10
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *inbuf)
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size, const char *inbuf)
{
debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
int num_longs, num_used_args = 0, i, rc = 0;
@@ -1534,8 +1696,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
goto out; /* bufsize of entry too small */
if (num_longs == 1) {
/* no args, we use only the string */
- strcpy(out_buf, curr_event->string);
- rc = strlen(curr_event->string);
+ rc = strscpy(out_buf, curr_event->string, out_buf_size);
+ if (rc == -E2BIG)
+ rc = out_buf_size;
goto out;
}
@@ -1547,15 +1710,17 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
for (i = 0; i < num_used_args; i++)
index[i] = i;
- rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
- curr_event->args[index[1]], curr_event->args[index[2]],
- curr_event->args[index[3]], curr_event->args[index[4]],
- curr_event->args[index[5]], curr_event->args[index[6]],
- curr_event->args[index[7]], curr_event->args[index[8]],
- curr_event->args[index[9]]);
+ rc = scnprintf(out_buf, out_buf_size,
+ curr_event->string, curr_event->args[index[0]],
+ curr_event->args[index[1]], curr_event->args[index[2]],
+ curr_event->args[index[3]], curr_event->args[index[4]],
+ curr_event->args[index[5]], curr_event->args[index[6]],
+ curr_event->args[index[7]], curr_event->args[index[8]],
+ curr_event->args[index[9]]);
out:
return rc;
}
+EXPORT_SYMBOL(debug_sprintf_format_fn);
/*
* debug_init:
diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile
new file mode 100644
index 000000000000..956aee6c4090
--- /dev/null
+++ b/arch/s390/kernel/diag/Makefile
@@ -0,0 +1 @@
+obj-y := diag_misc.o diag324.o diag.o diag310.o
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c
index 8dee9aa0ec95..e15b8dee3228 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag/diag.c
@@ -16,7 +16,8 @@
#include <asm/diag.h>
#include <asm/trace/diag.h>
#include <asm/sections.h>
-#include "entry.h"
+#include <asm/asm.h>
+#include "../entry.h"
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@@ -50,8 +51,11 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+ [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" },
[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+ [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
+ [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
@@ -185,6 +189,8 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
}
EXPORT_SYMBOL(diag14);
+#define DIAG204_BUSY_RC 8
+
static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
{
union register_pair rp = { .even = *subcode, .odd = size };
@@ -215,16 +221,18 @@ int diag204(unsigned long subcode, unsigned long size, void *addr)
{
if (addr) {
if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
- return -1;
+ return -EINVAL;
if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
- return -1;
+ return -EINVAL;
}
if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
diag_stat_inc(DIAG_STAT_X204);
size = __diag204(&subcode, size, addr);
- if (subcode)
- return -1;
+ if (subcode == DIAG204_BUSY_RC)
+ return -EBUSY;
+ else if (subcode)
+ return -EOPNOTSUPP;
return size;
}
EXPORT_SYMBOL(diag204);
@@ -278,12 +286,14 @@ int diag224(void *ptr)
int rc = -EOPNOTSUPP;
diag_stat_inc(DIAG_STAT_X224);
- asm volatile(
- " diag %1,%2,0x224\n"
- "0: lhi %0,0x0\n"
+ asm volatile("\n"
+ " diag %[type],%[addr],0x224\n"
+ "0: lhi %[rc],0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+d" (rc) :"d" (0), "d" (addr) : "memory");
+ : [rc] "+d" (rc)
+ , "=m" (*(struct { char buf[PAGE_SIZE]; } *)ptr)
+ : [type] "d" (0), [addr] "d" (addr));
return rc;
}
EXPORT_SYMBOL(diag224);
@@ -297,3 +307,18 @@ int diag26c(void *req, void *resp, enum diag26c_sc subcode)
return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
}
EXPORT_SYMBOL(diag26c);
+
+int diag49c(unsigned long subcode)
+{
+ int cc;
+
+ diag_stat_inc(DIAG_STAT_X49C);
+ asm volatile(
+ " diag %[subcode],0,0x49c\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [subcode] "d" (subcode)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
+}
+EXPORT_SYMBOL(diag49c);
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
new file mode 100644
index 000000000000..d6a34454aa5a
--- /dev/null
+++ b/arch/s390/kernel/diag/diag310.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request memory topology information via diag0x310.
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+#define DIAG310_LEVELMIN 1
+#define DIAG310_LEVELMAX 6
+
+enum diag310_sc {
+ DIAG310_SUBC_0 = 0,
+ DIAG310_SUBC_1 = 1,
+ DIAG310_SUBC_4 = 4,
+ DIAG310_SUBC_5 = 5
+};
+
+enum diag310_retcode {
+ DIAG310_RET_SUCCESS = 0x0001,
+ DIAG310_RET_BUSY = 0x0101,
+ DIAG310_RET_OPNOTSUPP = 0x0102,
+ DIAG310_RET_SC4_INVAL = 0x0401,
+ DIAG310_RET_SC4_NODATA = 0x0402,
+ DIAG310_RET_SC5_INVAL = 0x0501,
+ DIAG310_RET_SC5_NODATA = 0x0502,
+ DIAG310_RET_SC5_ESIZE = 0x0503
+};
+
+union diag310_response {
+ u64 response;
+ struct {
+ u64 result : 32;
+ u64 : 16;
+ u64 rc : 16;
+ };
+};
+
+union diag310_req_subcode {
+ u64 subcode;
+ struct {
+ u64 : 48;
+ u64 st : 8;
+ u64 sc : 8;
+ };
+};
+
+union diag310_req_size {
+ u64 size;
+ struct {
+ u64 page_count : 32;
+ u64 : 32;
+ };
+};
+
+static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr, .odd = size };
+
+ diag_stat_inc(DIAG_STAT_X310);
+ asm volatile("diag %[rp],%[subcode],0x310\n"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static int diag310_result_to_errno(unsigned int result)
+{
+ switch (result) {
+ case DIAG310_RET_BUSY:
+ return -EBUSY;
+ case DIAG310_RET_OPNOTSUPP:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int diag310_get_subcode_mask(unsigned long *mask)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_0, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *mask = res.response;
+ return 0;
+}
+
+static int diag310_get_memtop_stride(unsigned long *stride)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_1, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *stride = res.result;
+ return 0;
+}
+
+static int diag310_get_memtop_size(unsigned long *pages, unsigned long level)
+{
+ union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level };
+ union diag310_response res;
+
+ res.response = diag310(req.subcode, 0, NULL);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ *pages = res.result;
+ return 0;
+ case DIAG310_RET_SC4_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC4_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level)
+{
+ union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level };
+ union diag310_req_size req_size = { .page_count = pages };
+ union diag310_response res;
+
+ res.response = diag310(req_sc.subcode, req_size.size, buf);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ return 0;
+ case DIAG310_RET_SC5_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC5_ESIZE:
+ return -EOVERFLOW;
+ case DIAG310_RET_SC5_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_check_features(void)
+{
+ static int features_available;
+ unsigned long mask;
+ int rc;
+
+ if (READ_ONCE(features_available))
+ return 0;
+ if (!sclp.has_diag310)
+ return -EOPNOTSUPP;
+ rc = diag310_get_subcode_mask(&mask);
+ if (rc)
+ return rc;
+ if (!test_bit_inv(DIAG310_SUBC_1, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_4, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_5, &mask))
+ return -EOPNOTSUPP;
+ WRITE_ONCE(features_available, 1);
+ return 0;
+}
+
+static int memtop_get_stride_len(unsigned long *res)
+{
+ static unsigned long memtop_stride;
+ unsigned long stride;
+ int rc;
+
+ stride = READ_ONCE(memtop_stride);
+ if (!stride) {
+ rc = diag310_get_memtop_stride(&stride);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_stride, stride);
+ }
+ *res = stride;
+ return 0;
+}
+
+static int memtop_get_page_count(unsigned long *res, unsigned long level)
+{
+ static unsigned long memtop_pages[DIAG310_LEVELMAX];
+ unsigned long pages;
+ int rc;
+
+ if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN)
+ return -EINVAL;
+ pages = READ_ONCE(memtop_pages[level - 1]);
+ if (!pages) {
+ rc = diag310_get_memtop_size(&pages, level);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_pages[level - 1], pages);
+ }
+ *res = pages;
+ return 0;
+}
+
+long diag310_memtop_stride(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long stride;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ rc = memtop_get_stride_len(&stride);
+ if (rc)
+ return rc;
+ if (put_user(stride, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_len(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long pages, level;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, argp))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ if (put_user(pages * PAGE_SIZE, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_buf(unsigned long arg)
+{
+ struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg;
+ unsigned long level, pages, data_size;
+ u64 address;
+ void *buf;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, &udata->nesting_lvl))
+ return -EFAULT;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ data_size = pages * PAGE_SIZE;
+ buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ if (!buf)
+ return -ENOMEM;
+ rc = diag310_store_topology_map(buf, pages, level);
+ if (rc)
+ goto out;
+ if (copy_to_user((void __user *)address, buf, data_size))
+ rc = -EFAULT;
+out:
+ vfree(buf);
+ return rc;
+}
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
new file mode 100644
index 000000000000..7fa4c0b7eb6c
--- /dev/null
+++ b/arch/s390/kernel/diag/diag324.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request power readings for resources in a computing environment via
+ * diag 0x324. diag 0x324 stores the power readings in the power information
+ * block (pib).
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "diag324: " fmt
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <asm/timex.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+enum subcode {
+ DIAG324_SUBC_0 = 0,
+ DIAG324_SUBC_1 = 1,
+ DIAG324_SUBC_2 = 2,
+};
+
+enum retcode {
+ DIAG324_RET_SUCCESS = 0x0001,
+ DIAG324_RET_SUBC_NOTAVAIL = 0x0103,
+ DIAG324_RET_INSUFFICIENT_SIZE = 0x0104,
+ DIAG324_RET_READING_UNAVAILABLE = 0x0105,
+};
+
+union diag324_response {
+ u64 response;
+ struct {
+ u64 installed : 32;
+ u64 : 16;
+ u64 rc : 16;
+ } sc0;
+ struct {
+ u64 format : 16;
+ u64 : 16;
+ u64 pib_len : 16;
+ u64 rc : 16;
+ } sc1;
+ struct {
+ u64 : 48;
+ u64 rc : 16;
+ } sc2;
+};
+
+union diag324_request {
+ u64 request;
+ struct {
+ u64 : 32;
+ u64 allocated : 16;
+ u64 : 12;
+ u64 sc : 4;
+ } sc2;
+};
+
+struct pib {
+ u32 : 8;
+ u32 num : 8;
+ u32 len : 16;
+ u32 : 24;
+ u32 hlen : 8;
+ u64 : 64;
+ u64 intv;
+ u8 r[];
+} __packed;
+
+struct pibdata {
+ struct pib *pib;
+ ktime_t expire;
+ u64 sequence;
+ size_t len;
+ int rc;
+};
+
+static DEFINE_MUTEX(pibmutex);
+static struct pibdata pibdata;
+
+#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
+
+static void pibwork_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
+
+static unsigned long diag324(unsigned long subcode, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr };
+
+ diag_stat_inc(DIAG_STAT_X324);
+ asm volatile("diag %[rp],%[subcode],0x324\n"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static void pibwork_handler(struct work_struct *work)
+{
+ struct pibdata *data = &pibdata;
+ ktime_t timedout;
+
+ mutex_lock(&pibmutex);
+ timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
+ if (ktime_before(ktime_get(), timedout)) {
+ mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ goto out;
+ }
+ vfree(data->pib);
+ data->pib = NULL;
+out:
+ mutex_unlock(&pibmutex);
+}
+
+static void pib_update(struct pibdata *data)
+{
+ union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
+ union diag324_response res;
+ int rc;
+
+ memset(data->pib, 0, data->len);
+ res.response = diag324(req.request, data->pib);
+ switch (res.sc2.rc) {
+ case DIAG324_RET_SUCCESS:
+ rc = 0;
+ break;
+ case DIAG324_RET_SUBC_NOTAVAIL:
+ rc = -ENOENT;
+ break;
+ case DIAG324_RET_INSUFFICIENT_SIZE:
+ rc = -EMSGSIZE;
+ break;
+ case DIAG324_RET_READING_UNAVAILABLE:
+ rc = -EBUSY;
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ data->rc = rc;
+}
+
+long diag324_pibbuf(unsigned long arg)
+{
+ struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
+ struct pibdata *data = &pibdata;
+ static bool first = true;
+ u64 address;
+ int rc;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ mutex_lock(&pibmutex);
+ rc = -ENOMEM;
+ if (!data->pib)
+ data->pib = vmalloc(data->len);
+ if (!data->pib)
+ goto out;
+ if (first || ktime_after(ktime_get(), data->expire)) {
+ pib_update(data);
+ data->sequence++;
+ data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
+ mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ first = false;
+ }
+ rc = data->rc;
+ if (rc != 0 && rc != -EBUSY)
+ goto out;
+ rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
+ rc |= put_user(data->sequence, &udata->sequence);
+ if (rc)
+ rc = -EFAULT;
+out:
+ mutex_unlock(&pibmutex);
+ return rc;
+}
+
+long diag324_piblen(unsigned long arg)
+{
+ struct pibdata *data = &pibdata;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (put_user(data->len, (size_t __user *)arg))
+ return -EFAULT;
+ return 0;
+}
+
+static int __init diag324_init(void)
+{
+ union diag324_response res;
+ unsigned long installed;
+
+ if (!sclp.has_diag324)
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_0, NULL);
+ if (res.sc0.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ installed = res.response;
+ if (!test_bit_inv(DIAG324_SUBC_1, &installed))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG324_SUBC_2, &installed))
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_1, NULL);
+ if (res.sc1.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ pibdata.len = res.sc1.pib_len;
+ return 0;
+}
+device_initcall(diag324_init);
diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h
new file mode 100644
index 000000000000..7080be946785
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DIAG_IOCTL_H
+#define _DIAG_IOCTL_H
+
+#include <linux/types.h>
+
+long diag324_pibbuf(unsigned long arg);
+long diag324_piblen(unsigned long arg);
+
+long diag310_memtop_stride(unsigned long arg);
+long diag310_memtop_len(unsigned long arg);
+long diag310_memtop_buf(unsigned long arg);
+
+#endif /* _DIAG_IOCTL_H */
diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c
new file mode 100644
index 000000000000..efffe02ea02e
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_misc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide diagnose information via misc device /dev/diag.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ long rc;
+
+ switch (cmd) {
+ case DIAG324_GET_PIBLEN:
+ rc = diag324_piblen(arg);
+ break;
+ case DIAG324_GET_PIBBUF:
+ rc = diag324_pibbuf(arg);
+ break;
+ case DIAG310_GET_STRIDE:
+ rc = diag310_memtop_stride(arg);
+ break;
+ case DIAG310_GET_MEMTOPLEN:
+ rc = diag310_memtop_len(arg);
+ break;
+ case DIAG310_GET_MEMTOPBUF:
+ rc = diag310_memtop_buf(arg);
+ break;
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
+ }
+ return rc;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .unlocked_ioctl = diag_ioctl,
+};
+
+static struct miscdevice diagdev = {
+ .name = "diag",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &fops,
+ .mode = 0444,
+};
+
+static int diag_init(void)
+{
+ return misc_register(&diagdev);
+}
+
+device_initcall(diag_init);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 89dc826a8d2e..94eb8168ea44 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -122,6 +122,7 @@ enum {
U8_32, /* 8 bit unsigned value starting at 32 */
U12_16, /* 12 bit unsigned value starting at 16 */
U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_20, /* 16 bit unsigned value starting at 20 */
U16_32, /* 16 bit unsigned value starting at 32 */
U32_16, /* 32 bit unsigned value starting at 16 */
VX_12, /* Vector index register starting at position 12 */
@@ -184,6 +185,7 @@ static const struct s390_operand operands[] = {
[U8_32] = { 8, 32, 0 },
[U12_16] = { 12, 16, 0 },
[U16_16] = { 16, 16, 0 },
+ [U16_20] = { 16, 20, 0 },
[U16_32] = { 16, 32, 0 },
[U32_16] = { 32, 16, 0 },
[VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
@@ -257,7 +259,6 @@ static const unsigned char formats[][6] = {
[INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 },
[INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 },
- [INSTR_RSY_RDRU] = { R_8, D20_20, B_16, U4_12, 0, 0 },
[INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 },
@@ -300,14 +301,17 @@ static const unsigned char formats[][6] = {
[INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 },
[INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 },
[INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 },
+ [INSTR_VRI_VV0UU] = { V_8, V_12, U8_28, U4_24, 0, 0 },
[INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 },
[INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
[INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
[INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 },
[INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 },
[INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
- [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
+ [INSTR_VRI_VVV0UV] = { V_8, V_12, V_16, V_32, U8_24, 0 },
+ [INSTR_VRR_0V0U] = { V_12, U16_20, 0, 0, 0, 0 },
[INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
+ [INSTR_VRR_0VVU] = { V_12, V_16, U16_20, 0, 0, 0 },
[INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
[INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
@@ -455,21 +459,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
if (separator)
ptr += sprintf(ptr, "%c", separator);
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%r%i", value);
+ ptr += sprintf(ptr, "%%r%u", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%f%i", value);
+ ptr += sprintf(ptr, "%%f%u", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%a%i", value);
+ ptr += sprintf(ptr, "%%a%u", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%c%i", value);
+ ptr += sprintf(ptr, "%%c%u", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%v%i", value);
+ ptr += sprintf(ptr, "%%v%u", value);
else if (operand->flags & OPERAND_PCREL) {
void *pcrel = (void *)((int)value + addr);
ptr += sprintf(ptr, "%px", pcrel);
} else if (operand->flags & OPERAND_SIGNED)
- ptr += sprintf(ptr, "%i", value);
+ ptr += sprintf(ptr, "%i", (int)value);
else
ptr += sprintf(ptr, "%u", value);
if (operand->flags & OPERAND_DISP)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index d2012635b093..1ecd0580561f 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -61,28 +61,28 @@ static bool in_task_stack(unsigned long sp, struct task_struct *task,
static bool in_irq_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.async_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_IRQ, stack);
}
static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.nodat_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_NODAT, stack);
}
static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.mcck_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_MCCK, stack);
}
static bool in_restart_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.restart_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_RESTART, stack);
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c666271433fb..2fa25164df7d 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -7,6 +7,7 @@
#define KMSG_COMPONENT "setup"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/sched/debug.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -48,6 +49,8 @@ decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
+decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -56,7 +59,7 @@ static void __init kasan_early_init(void)
{
#ifdef CONFIG_KASAN
init_task.kasan_depth = 0;
- sclp_early_printk("KernelAddressSanitizer initialized\n");
+ pr_info("KernelAddressSanitizer initialized\n");
#endif
}
@@ -72,7 +75,7 @@ static void __init reset_tod_clock(void)
memset(&tod_clock_base, 0, sizeof(tod_clock_base));
tod_clock_base.tod = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+ get_lowcore()->last_update_clock = TOD_UNIX_EPOCH;
}
/*
@@ -99,7 +102,7 @@ static noinline __init void detect_machine_type(void)
/* Check current-configuration-level */
if (stsi(NULL, 0, 0, 0) <= 2) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR;
return;
}
/* Get virtual-machine cpu information. */
@@ -108,9 +111,9 @@ static noinline __init void detect_machine_type(void)
/* Detect known hypervisors */
if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_KVM;
else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
- S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_VM;
}
/* Remove leading, trailing and double whitespace. */
@@ -166,7 +169,7 @@ static __init void setup_topology(void)
if (!test_facility(11))
return;
- S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY;
for (max_mnest = 6; max_mnest > 1; max_mnest--) {
if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
break;
@@ -174,27 +177,45 @@ static __init void setup_topology(void)
topology_max_mnest = max_mnest;
}
-void __do_early_pgm_check(struct pt_regs *regs)
+void __init __do_early_pgm_check(struct pt_regs *regs)
{
- if (!fixup_exception(regs))
- disabled_wait();
+ struct lowcore *lc = get_lowcore();
+ unsigned long ip;
+
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = lc->trans_exc_code;
+ ip = __rewind_psw(regs->psw, regs->int_code >> 16);
+
+ /* Monitor Event? Might be a warning */
+ if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) {
+ if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN)
+ return;
+ }
+ if (fixup_exception(regs))
+ return;
+ /*
+ * Unhandled exception - system cannot continue but try to get some
+ * helpful messages to the console. Use early_printk() to print
+ * some basic information in case it is too early for printk().
+ */
+ register_early_console();
+ early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n",
+ regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr);
+ show_regs(regs);
+ disabled_wait();
}
static noinline __init void setup_lowcore_early(void)
{
+ struct lowcore *lc = get_lowcore();
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
psw.mask = PSW_KERNEL_BITS;
- S390_lowcore.program_new_psw = psw;
- S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
-}
-
-static noinline __init void setup_facility_list(void)
-{
- memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
- if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
- __clear_facility(82, alt_stfle_fac_list);
+ lc->program_new_psw = psw;
+ lc->preempt_count = INIT_PREEMPT_COUNT;
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
}
static __init void detect_diag9c(void)
@@ -211,43 +232,45 @@ static __init void detect_diag9c(void)
EX_TABLE(0b,1b)
: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C;
}
static __init void detect_machine_facilities(void)
{
if (test_facility(8)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1;
system_ctl_set_bit(0, CR0_EDAT_BIT);
}
if (test_facility(78))
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2;
if (test_facility(3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE;
if (test_facility(50) && test_facility(73)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_TE;
system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
}
if (test_facility(51))
- S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC;
if (test_facility(129))
system_ctl_set_bit(0, CR0_VECTOR_BIT);
if (test_facility(130))
- S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_NX;
if (test_facility(133))
- S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_GS;
if (test_facility(139) && (tod_clock_base.tod >> 63)) {
/* Enabled signed clock comparator comparisons */
- S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_SCC;
clock_comparator_max = -1ULL >> 1;
system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
}
if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO;
/* the control bit is set during PCI initialization */
}
if (test_facility(194))
- S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_RDP;
+ if (test_facility(85))
+ get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN;
}
static inline void save_vector_registers(void)
@@ -291,7 +314,6 @@ void __init startup_init(void)
lockdep_off();
sort_amode31_extable();
setup_lowcore_early();
- setup_facility_list();
detect_machine_type();
setup_arch_string();
setup_boot_command_line();
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index d9d53f44008a..cefe020a3be3 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -6,6 +6,7 @@
#include <linux/console.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <asm/setup.h>
#include <asm/sclp.h>
static void sclp_early_write(struct console *con, const char *s, unsigned int len)
@@ -20,6 +21,16 @@ static struct console sclp_early_console = {
.index = -1,
};
+void __init register_early_console(void)
+{
+ if (early_console)
+ return;
+ if (!sclp.has_linemode && !sclp.has_vt220)
+ return;
+ early_console = &sclp_early_console;
+ register_console(early_console);
+}
+
static int __init setup_early_printk(char *buf)
{
if (early_console)
@@ -27,10 +38,7 @@ static int __init setup_early_printk(char *buf)
/* Accept only "earlyprintk" and "earlyprintk=sclp" */
if (buf && !str_has_prefix(buf, "sclp"))
return 0;
- if (!sclp.has_linemode && !sclp.has_vt220)
- return 0;
- early_console = &sclp_early_console;
- register_console(early_console);
+ register_early_console();
return 0;
}
early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
deleted file mode 100644
index c634871f0d90..000000000000
--- a/arch/s390/kernel/earlypgm.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 2006, 2007
- * Author(s): Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-SYM_CODE_START(early_pgm_check_handler)
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- lgr %r2,%r11
- brasl %r14,__do_early_pgm_check
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
- lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- lpswe __LC_RETURN_PSW
-SYM_CODE_END(early_pgm_check_handler)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6a1e0fbbaa15..4cc3408c4dac 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -12,7 +12,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-extable.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/dwarf.h>
@@ -28,53 +28,46 @@
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/nospec-insn.h>
+#include <asm/lowcore.h>
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
.endm
.macro LBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
.endm
- .macro LPSWEY address,lpswe
- ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+ .macro LPSWEY address, lpswe
+ ALTERNATIVE_2 "b \lpswe;nopr", \
+ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \
+ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
+ ALT_LOWCORE
.endm
- .macro MBEAR reg
- ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+ .macro MBEAR reg, lowcore
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
+ ALT_FACILITY(193)
.endm
- .macro CHECK_STACK savearea
-#ifdef CONFIG_CHECK_STACK
- tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD
- lghi %r14,\savearea
- jz stack_overflow
-#endif
- .endm
-
- .macro CHECK_VMAP_STACK savearea,oklabel
-#ifdef CONFIG_VMAP_STACK
+ .macro CHECK_VMAP_STACK savearea, lowcore, oklabel
lgr %r14,%r15
nill %r14,0x10000 - THREAD_SIZE
oill %r14,STACK_INIT_OFFSET
- clg %r14,__LC_KERNEL_STACK
+ clg %r14,__LC_KERNEL_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_ASYNC_STACK
+ clg %r14,__LC_ASYNC_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_MCCK_STACK
+ clg %r14,__LC_MCCK_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_NODAT_STACK
+ clg %r14,__LC_NODAT_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_RESTART_STACK
+ clg %r14,__LC_RESTART_STACK(\lowcore)
je \oklabel
- lghi %r14,\savearea
+ la %r14,\savearea(\lowcore)
j stack_overflow
-#else
- j \oklabel
-#endif
.endm
/*
@@ -100,30 +93,31 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
.endm
.macro BPON
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
.macro BPENTER tif_ptr,tif_mask
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
- "j .+12; nop; nop", 82
+ "j .+12; nop; nop", ALT_SPEC(82)
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
- "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
+ "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
#if IS_ENABLED(CONFIG_KVM)
- .macro SIEEXIT sie_control
- lg %r9,\sie_control # get control block pointer
- ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
+ .macro SIEEXIT sie_control,lowcore
+ lg %r9,\sie_control # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
+ lg %r9,__LC_CURRENT(\lowcore)
+ mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
.endm
#endif
@@ -163,13 +157,14 @@ SYM_FUNC_START(__switch_to_asm)
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
lg %r15,0(%r4,%r3) # start of kernel stack of next
agr %r15,%r5 # end of kernel stack of next
- stg %r3,__LC_CURRENT # store task struct of next
- stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ GET_LC %r13
+ stg %r3,__LC_CURRENT(%r13) # store task struct of next
+ stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
aghi %r3,__TASK_pid
- mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
+ mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
SYM_FUNC_END(__switch_to_asm)
@@ -179,22 +174,21 @@ SYM_FUNC_END(__switch_to_asm)
* %r2 pointer to sie control block phys
* %r3 pointer to sie control block virt
* %r4 guest register save area
+ * %r5 guest asce
*/
SYM_FUNC_START(__sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- lg %r12,__LC_CURRENT
+ GET_LC %r13
+ lg %r14,__LC_CURRENT(%r13)
stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
- mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
- lg %r14,__LC_GMAP # get gmap pointer
- ltgr %r14,%r14
- jz .Lsie_gmap
- oi __LC_CPU_FLAGS+7,_CIF_SIE
- lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
-.Lsie_gmap:
+ mvi __TI_sie(%r14),1
+ lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
tm __SIE_PROG20+3(%r14),3 # last exit...
@@ -212,19 +206,10 @@ SYM_FUNC_START(__sie64a)
.Lsie_skip:
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
-# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
-# Other instructions between __sie64a and .Lsie_done should not cause program
-# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
-.Lrewind_pad6:
- nopr 7
-.Lrewind_pad4:
- nopr 7
-.Lrewind_pad2:
- nopr 7
+ GET_LC %r14
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
+ lg %r14,__LC_CURRENT(%r14)
+ mvi __TI_sie(%r14),0
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
@@ -236,15 +221,6 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
BR_EX %r14
-.Lsie_fault:
- lghi %r14,-EFAULT
- stg %r14,__SF_SIE_REASON(%r15) # set exit reason code
- j sie_exit
-
- EX_TABLE(.Lrewind_pad6,.Lsie_fault)
- EX_TABLE(.Lrewind_pad4,.Lsie_fault)
- EX_TABLE(.Lrewind_pad2,.Lsie_fault)
- EX_TABLE(sie_exit,.Lsie_fault)
SYM_FUNC_END(__sie64a)
EXPORT_SYMBOL(__sie64a)
EXPORT_SYMBOL(sie_exit)
@@ -256,14 +232,15 @@ EXPORT_SYMBOL(sie_exit)
*/
SYM_CODE_START(system_call)
- stpt __LC_SYS_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
lghi %r14,0
.Lsysc_per:
- STBEAR __LC_LAST_BREAK
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+ STBEAR __LC_LAST_BREAK(%r13)
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
# clear user controlled register to prevent speculative use
@@ -278,17 +255,17 @@ SYM_CODE_START(system_call)
xgr %r10,%r10
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
- mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
- MBEAR %r2
+ mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
+ MBEAR %r2,%r13
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(system_call)
@@ -299,12 +276,13 @@ SYM_CODE_START(ret_from_fork)
lgr %r3,%r11
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ GET_LC %r13
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(ret_from_fork)
@@ -313,41 +291,40 @@ SYM_CODE_END(ret_from_fork)
*/
SYM_CODE_START(pgm_check_handler)
- stpt __LC_SYS_ENTER_TIMER
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- lgr %r10,%r15
- lmg %r8,%r9,__LC_PGM_OLD_PSW
+ lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
+ xgr %r10,%r10
tmhh %r8,0x0001 # coming from user space?
jno .Lpgm_skip_asce
- lctlg %c1,%c1,__LC_KERNEL_ASCE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
j 3f # -> fault in user space
.Lpgm_skip_asce:
+#if IS_ENABLED(CONFIG_KVM)
+ lg %r11,__LC_CURRENT(%r13)
+ tm __TI_sie(%r11),0xff
+ jz 1f
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
+ lghi %r10,_PIF_GUEST_FAULT
+#endif
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
jnz 2f # -> enabled, can't be a double fault
- tm __LC_PGM_ILC+3,0x80 # check for per exception
+ tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-2: CHECK_STACK __LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
# CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-3: lg %r15,__LC_KERNEL_STACK
+ CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
+3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
- xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ stg %r10,__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
- stctg %c1,%c1,__PT_CR1(%r11)
-#if IS_ENABLED(CONFIG_KVM)
- ltg %r12,__LC_GMAP
- jz 5f
- clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
- jne 5f
- BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r10)
-#endif
-5: stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
+ stmg %r8,%r9,__PT_PSW(%r11)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -356,16 +333,17 @@ SYM_CODE_START(pgm_check_handler)
xgr %r5,%r5
xgr %r6,%r6
xgr %r7,%r7
+ xgr %r12,%r12
lgr %r2,%r11
brasl %r14,__do_pgm_check
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -374,11 +352,11 @@ SYM_CODE_START(pgm_check_handler)
# single stepped system call
#
.Lpgm_svcper:
- mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
larl %r14,.Lsysc_per
- stg %r14,__LC_RETURN_PSW+8
+ stg %r14,__LC_RETURN_PSW+8(%r13)
lghi %r14,1
- LBEAR __LC_PGM_LAST_BREAK
+ LBEAR __LC_PGM_LAST_BREAK(%r13)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
SYM_CODE_END(pgm_check_handler)
@@ -387,25 +365,26 @@ SYM_CODE_END(pgm_check_handler)
*/
.macro INT_HANDLER name,lc_old_psw,handler
SYM_CODE_START(\name)
- stckf __LC_INT_CLOCK
- stpt __LC_SYS_ENTER_TIMER
- STBEAR __LC_LAST_BREAK
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stckf __LC_INT_CLOCK(%r13)
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ STBEAR __LC_LAST_BREAK(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
- lmg %r8,%r9,\lc_old_psw
+ lmg %r8,%r9,\lc_old_psw(%r13)
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz 0f
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-0: CHECK_STACK __LC_SAVE_AREA_ASYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
-1: lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -419,84 +398,67 @@ SYM_CODE_START(\name)
xgr %r7,%r7
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
- MBEAR %r11
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ MBEAR %r11,%r13
stmg %r8,%r9,__PT_PSW(%r11)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
lmg %r0,%r15,__PT_R0(%r11)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(\name)
.endm
+ .section .irqentry.text, "ax"
+
INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
-/*
- * Load idle PSW.
- */
-SYM_FUNC_START(psw_idle)
- stg %r14,(__SF_GPRS+8*8)(%r15)
- stg %r3,__SF_EMPTY(%r15)
- larl %r1,psw_idle_exit
- stg %r1,__SF_EMPTY+8(%r15)
- larl %r1,smp_cpu_mtid
- llgf %r1,0(%r1)
- ltgr %r1,%r1
- jz .Lpsw_idle_stcctm
- .insn rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2)
-.Lpsw_idle_stcctm:
- oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
- BPON
- stckf __CLOCK_IDLE_ENTER(%r2)
- stpt __TIMER_IDLE_ENTER(%r2)
- lpswe __SF_EMPTY(%r15)
-SYM_INNER_LABEL(psw_idle_exit, SYM_L_GLOBAL)
- BR_EX %r14
-SYM_FUNC_END(psw_idle)
+ .section .kprobes.text, "ax"
/*
* Machine check handler routines
*/
SYM_CODE_START(mcck_int_handler)
BPOFF
- lmg %r8,%r9,__LC_MCK_OLD_PSW
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+ GET_LC %r13
+ lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
jno .Lmcck_panic # control registers invalid -> panic
ptlb
- lghi %r14,__LC_CPU_TIMER_SAVE_AREA
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+ lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
jo 3f
- la %r14,__LC_SYS_ENTER_TIMER
- clc 0(8,%r14),__LC_EXIT_TIMER
+ la %r14,__LC_SYS_ENTER_TIMER(%r13)
+ clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
jl 1f
- la %r14,__LC_EXIT_TIMER
-1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_EXIT_TIMER(%r13)
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
jl 2f
- la %r14,__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_LAST_UPDATE_TIMER(%r13)
2: spt 0(%r14)
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
jno .Lmcck_panic
tmhh %r8,0x0001 # interrupting from user ?
jnz .Lmcck_user
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz .Lmcck_user
- # Need to compare the address instead of a CIF_SIE* flag.
+ # Need to compare the address instead of __TI_SIE flag.
# Otherwise there would be a race between setting the flag
# and entering SIE (or leaving and clearing the flag). This
# would cause machine checks targeted at the guest to be
@@ -505,18 +467,19 @@ SYM_CODE_START(mcck_int_handler)
clgrjl %r9,%r14, 4f
larl %r14,.Lsie_leave
clgrjhe %r9,%r14, 4f
- oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+ lg %r10,__LC_PCPU
+ oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
.Lmcck_user:
- lg %r15,__LC_MCCK_STACK
+ lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stctg %c1,%c1,__PT_CR1(%r11)
- lctlg %c1,%c1,__LC_KERNEL_ASCE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lghi %r14,__LC_GPREGS_SAVE_AREA+64
- stmg %r0,%r7,__PT_R0(%r11)
+ lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
+ mvc __PT_R0(128,%r11),0(%r14)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -526,7 +489,6 @@ SYM_CODE_START(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
- mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
@@ -534,12 +496,13 @@ SYM_CODE_START(mcck_int_handler)
brasl %r14,s390_do_machine_check
lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
- tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
jno 0f
BPON
- stpt __LC_EXIT_TIMER
-0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+ stpt __LC_EXIT_TIMER(%r13)
+0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
+ ALT_FACILITY(193)
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -575,7 +538,7 @@ SYM_CODE_START(mcck_int_handler)
SYM_CODE_END(mcck_int_handler)
SYM_CODE_START(restart_int_handler)
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
@@ -583,15 +546,17 @@ SYM_CODE_START(restart_int_handler)
0: larl %r15,daton_psw
lpswe 0(%r15) # turn dat on, keep irqs off
.Ldaton:
- lg %r15,__LC_RESTART_STACK
+ GET_LC %r15
+ lg %r15,__LC_RESTART_STACK(%r15)
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
- mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+ GET_LC %r13
+ mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
+ mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
- lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
- lg %r2,__LC_RESTART_DATA
- lgf %r3,__LC_RESTART_SOURCE
+ lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA(%r13)
+ lgf %r3,__LC_RESTART_SOURCE(%r13)
ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
@@ -604,16 +569,34 @@ SYM_CODE_START(restart_int_handler)
3: j 3b
SYM_CODE_END(restart_int_handler)
+ __INIT
+SYM_CODE_START(early_pgm_check_handler)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ lgr %r2,%r11
+ brasl %r14,__do_early_pgm_check
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(early_pgm_check_handler)
+ __FINIT
+
.section .kprobes.text, "ax"
-#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/*
* The synchronous or the asynchronous stack overflowed. We are dead.
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
SYM_CODE_START(stack_overflow)
- lg %r15,__LC_NODAT_STACK # change to panic stack
+ GET_LC %r15
+ lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
@@ -623,7 +606,6 @@ SYM_CODE_START(stack_overflow)
lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
SYM_CODE_END(stack_overflow)
-#endif
.section .data, "aw"
.balign 4
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 21969520f947..a1f28879c87e 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -41,7 +41,6 @@ void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index fa90bbdc5ef9..6f2e87920288 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -113,7 +113,7 @@ void load_fpu_state(struct fpu *state, int flags)
int mask;
if (flags & KERNEL_FPC)
- fpu_lfpc(&state->fpc);
+ fpu_lfpc_safe(&state->fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_V0V7)
load_fp_regs_vx(state->vxrs);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..63ba6306632e 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,14 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/moduleloader.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/kmsan-checks.h>
#include <linux/kprobes.h>
+#include <linux/execmem.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/text-patching.h>
@@ -49,10 +50,6 @@ struct ftrace_insn {
s32 disp;
} __packed;
-#ifdef CONFIG_MODULES
-static char *ftrace_plt;
-#endif /* CONFIG_MODULES */
-
static const char *ftrace_shared_hotpatch_trampoline(const char **end)
{
const char *tstart, *tend;
@@ -72,19 +69,20 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
bool ftrace_need_init_nop(void)
{
- return true;
+ return !MACHINE_HAS_SEQ_INSN;
}
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
__ftrace_hotpatch_trampolines_start;
- static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+ static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 };
static struct ftrace_hotpatch_trampoline *trampoline;
struct ftrace_hotpatch_trampoline **next_trampoline;
struct ftrace_hotpatch_trampoline *trampolines_end;
struct ftrace_hotpatch_trampoline tmp;
struct ftrace_insn *insn;
+ struct ftrace_insn old;
const char *shared;
s32 disp;
@@ -98,7 +96,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
if (mod) {
next_trampoline = &mod->arch.next_trampoline;
trampolines_end = mod->arch.trampolines_end;
- shared = ftrace_plt;
}
#endif
@@ -106,8 +103,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
return -ENOMEM;
trampoline = (*next_trampoline)++;
+ if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old)))
+ return -EFAULT;
/* Check for the compiler-generated fentry nop (brcl 0, .). */
- if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+ if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old))))
return -EINVAL;
/* Generate the trampoline. */
@@ -143,8 +142,35 @@ static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrac
return trampoline;
}
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
+static inline struct ftrace_insn
+ftrace_generate_branch_insn(unsigned long ip, unsigned long target)
+{
+ /* brasl r0,target or brcl 0,0 */
+ return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004,
+ .disp = target ? (target - ip) / 2 : 0 };
+}
+
+static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target,
+ unsigned long target)
+{
+ struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target);
+ struct ftrace_insn new = ftrace_generate_branch_insn(ip, target);
+ struct ftrace_insn old;
+
+ if (!IS_ALIGNED(ip, 8))
+ return -EINVAL;
+ if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old)))
+ return -EFAULT;
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *)ip, &new, sizeof(new));
+ return 0;
+}
+
+static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
+ unsigned long old_addr,
+ unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
u64 old;
@@ -160,6 +186,15 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
return 0;
}
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ if (MACHINE_HAS_SEQ_INSN)
+ return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
+ else
+ return ftrace_modify_trampoline_call(rec, old_addr, addr);
+}
+
static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
{
u16 old;
@@ -178,11 +213,14 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- /* Expect brcl 0xf,... */
- return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+ /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */
+ if (MACHINE_HAS_SEQ_INSN)
+ return ftrace_patch_branch_insn(rec->ip, addr, 0);
+ else
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
}
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
@@ -194,6 +232,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
}
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ if (MACHINE_HAS_SEQ_INSN)
+ return ftrace_patch_branch_insn(rec->ip, 0, addr);
+ else
+ return ftrace_make_trampoline_call(rec, addr);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
ftrace_func = func;
@@ -214,75 +260,19 @@ void ftrace_arch_code_modify_post_process(void)
text_poke_sync_lock();
}
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int __init ftrace_plt_init(void)
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- const char *start, *end;
-
- ftrace_plt = module_alloc(PAGE_SIZE);
- if (!ftrace_plt)
- panic("cannot allocate ftrace plt\n");
+ unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
- start = ftrace_shared_hotpatch_trampoline(&end);
- memcpy(ftrace_plt, start, end - start);
- set_memory_rox((unsigned long)ftrace_plt, 1);
- return 0;
-}
-device_initcall(ftrace_plt_init);
-
-#endif /* CONFIG_MODULES */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * Hook the return address and push it in the stack of return addresses
- * in current thread info.
- */
-unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
- unsigned long ip)
-{
if (unlikely(ftrace_graph_is_dead()))
- goto out;
+ return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
- goto out;
- ip -= MCOUNT_INSN_SIZE;
- if (!function_graph_enter(ra, ip, 0, (void *) sp))
- ra = (unsigned long) return_to_handler;
-out:
- return ra;
-}
-NOKPROBE_SYMBOL(prepare_ftrace_return);
-
-/*
- * Patch the kernel code at ftrace_graph_caller location. The instruction
- * there is branch relative on condition. To enable the ftrace graph code
- * block, we simply patch the mask field of the instruction to zero and
- * turn the instruction into a nop.
- * To disable the ftrace graph code the mask field will be patched to
- * all ones, which turns the instruction into an unconditional branch.
- */
-int ftrace_enable_ftrace_graph_caller(void)
-{
- int rc;
-
- /* Expect brc 0xf,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- int rc;
-
- /* Expect brc 0x0,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
+ return;
+ if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs))
+ *parent = (unsigned long)&return_to_handler;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -296,10 +286,14 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
+ kmsan_unpoison_memory(fregs, ftrace_regs_size());
regs = ftrace_get_regs(fregs);
p = get_kprobe((kprobe_opcode_t *)ip);
if (!regs || unlikely(!p) || kprobe_disabled(p))
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
index 7f75a9616406..23337065f402 100644
--- a/arch/s390/kernel/ftrace.h
+++ b/arch/s390/kernel/ftrace.h
@@ -18,7 +18,5 @@ extern const char ftrace_shared_hotpatch_trampoline_br[];
extern const char ftrace_shared_hotpatch_trampoline_br_end[];
extern const char ftrace_shared_hotpatch_trampoline_exrl[];
extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
-extern const char ftrace_plt_template[];
-extern const char ftrace_plt_template_end[];
#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 45413b04efc5..396034b2fe67 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/lowcore.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
@@ -18,14 +19,15 @@
__HEAD
SYM_CODE_START(startup_continue)
larl %r1,tod_clock_base
- mvc 0(16,%r1),__LC_BOOT_CLOCK
+ GET_LC %r2
+ mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2)
#
# Setup stack
#
larl %r14,init_task
- stg %r14,__LC_CURRENT
+ stg %r14,__LC_CURRENT(%r2)
larl %r15,init_thread_union+STACK_INIT_OFFSET
- stg %r15,__LC_KERNEL_STACK
+ stg %r15,__LC_KERNEL_STACK(%r2)
brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
brasl %r14,startup_init # s390 specific early init
brasl %r14,start_kernel # common init code
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
new file mode 100644
index 000000000000..7857a7e8e56c
--- /dev/null
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "hd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+/*
+ * Hiperdispatch:
+ * Dynamically calculates the optimum number of high capacity COREs
+ * by considering the state the system is in. When hiperdispatch decides
+ * that a capacity update is necessary, it schedules a topology update.
+ * During topology updates the CPU capacities are always re-adjusted.
+ *
+ * There is two places where CPU capacities are being accessed within
+ * hiperdispatch.
+ * -> hiperdispatch's reoccuring work function reads CPU capacities to
+ * determine high capacity CPU count.
+ * -> during a topology update hiperdispatch's adjustment function
+ * updates CPU capacities.
+ * These two can run on different CPUs in parallel which can cause
+ * hiperdispatch to make wrong decisions. This can potentially cause
+ * some overhead by leading to extra rebuild_sched_domains() calls
+ * for correction. Access to capacities within hiperdispatch has to be
+ * serialized to prevent the overhead.
+ *
+ * Hiperdispatch decision making revolves around steal time.
+ * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time
+ * crosses the threshold value hiperdispatch falls back to giving high
+ * capacities to entitled CPUs. When steal time drops below the
+ * threshold boundary, hiperdispatch utilizes all CPUs by giving all
+ * of them high capacity.
+ *
+ * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread
+ * performance. Comparing the throughput of;
+ * - single CORE, with N threads, running N tasks
+ * - N separate COREs running N tasks,
+ * using individual COREs for individual tasks yield better
+ * performance. This performance difference is roughly ~30% (can change
+ * between machine generations)
+ *
+ * Hiperdispatch tries to hint scheduler to use individual COREs for
+ * each task, as long as steal time on those COREs are less than 30%,
+ * therefore delaying the throughput loss caused by using SMP threads.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kstrtox.h>
+#include <linux/ktime.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <asm/hiperdispatch.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hiperdispatch.h>
+
+#define HD_DELAY_FACTOR (4)
+#define HD_DELAY_INTERVAL (HZ / 4)
+#define HD_STEAL_THRESHOLD 30
+#define HD_STEAL_AVG_WEIGHT 16
+
+static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */
+static cpumask_t hd_vmvl_cpumask; /* Mask containing vertical medium and low CPUs */
+static int hd_high_capacity_cores; /* Current CORE count with high capacity */
+static int hd_entitled_cores; /* Total vertical high and medium CORE count */
+static int hd_online_cores; /* Current online CORE count */
+
+static unsigned long hd_previous_steal; /* Previous iteration's CPU steal timer total */
+static unsigned long hd_high_time; /* Total time spent while all cpus have high capacity */
+static unsigned long hd_low_time; /* Total time spent while vl cpus have low capacity */
+static atomic64_t hd_adjustments; /* Total occurrence count of hiperdispatch adjustments */
+
+static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD;
+static unsigned int hd_delay_factor = HD_DELAY_FACTOR;
+static int hd_enabled;
+
+static void hd_capacity_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
+
+static int hd_set_hiperdispatch_mode(int enable)
+{
+ if (!MACHINE_HAS_TOPOLOGY)
+ enable = 0;
+ if (hd_enabled == enable)
+ return 0;
+ hd_enabled = enable;
+ return 1;
+}
+
+void hd_reset_state(void)
+{
+ cpumask_clear(&hd_vl_coremask);
+ cpumask_clear(&hd_vmvl_cpumask);
+ hd_entitled_cores = 0;
+ hd_online_cores = 0;
+}
+
+void hd_add_core(int cpu)
+{
+ const struct cpumask *siblings;
+ int polarization;
+
+ hd_online_cores++;
+ polarization = smp_cpu_get_polarization(cpu);
+ siblings = topology_sibling_cpumask(cpu);
+ switch (polarization) {
+ case POLARIZATION_VH:
+ hd_entitled_cores++;
+ break;
+ case POLARIZATION_VM:
+ hd_entitled_cores++;
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ case POLARIZATION_VL:
+ cpumask_set_cpu(cpu, &hd_vl_coremask);
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ }
+}
+
+/* Serialize update and read operations of debug counters. */
+static DEFINE_MUTEX(hd_counter_mutex);
+
+static void hd_update_times(void)
+{
+ static ktime_t prev;
+ ktime_t now;
+
+ /*
+ * Check if hiperdispatch is active, if not set the prev to 0.
+ * This way it is possible to differentiate the first update iteration after
+ * enabling hiperdispatch.
+ */
+ if (hd_entitled_cores == 0 || hd_enabled == 0) {
+ prev = ktime_set(0, 0);
+ return;
+ }
+ now = ktime_get();
+ if (ktime_after(prev, 0)) {
+ if (hd_high_capacity_cores == hd_online_cores)
+ hd_high_time += ktime_ms_delta(now, prev);
+ else
+ hd_low_time += ktime_ms_delta(now, prev);
+ }
+ prev = now;
+}
+
+static void hd_update_capacities(void)
+{
+ int cpu, upscaling_cores;
+ unsigned long capacity;
+
+ upscaling_cores = hd_high_capacity_cores - hd_entitled_cores;
+ capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW;
+ hd_high_capacity_cores = hd_entitled_cores;
+ for_each_cpu(cpu, &hd_vl_coremask) {
+ smp_set_core_capacity(cpu, capacity);
+ if (capacity != CPU_CAPACITY_HIGH)
+ continue;
+ hd_high_capacity_cores++;
+ upscaling_cores--;
+ if (upscaling_cores == 0)
+ capacity = CPU_CAPACITY_LOW;
+ }
+}
+
+void hd_disable_hiperdispatch(void)
+{
+ cancel_delayed_work_sync(&hd_capacity_work);
+ hd_high_capacity_cores = hd_online_cores;
+ hd_previous_steal = 0;
+}
+
+int hd_enable_hiperdispatch(void)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ mutex_unlock(&hd_counter_mutex);
+ if (hd_enabled == 0)
+ return 0;
+ if (hd_entitled_cores == 0)
+ return 0;
+ if (hd_online_cores <= hd_entitled_cores)
+ return 0;
+ mod_delayed_work(system_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor);
+ hd_update_capacities();
+ return 1;
+}
+
+static unsigned long hd_steal_avg(unsigned long new)
+{
+ static unsigned long steal;
+
+ steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT;
+ return steal;
+}
+
+static unsigned long hd_calculate_steal_percentage(void)
+{
+ unsigned long time_delta, steal_delta, steal, percentage;
+ static ktime_t prev;
+ int cpus, cpu;
+ ktime_t now;
+
+ cpus = 0;
+ steal = 0;
+ percentage = 0;
+ for_each_cpu(cpu, &hd_vmvl_cpumask) {
+ steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+ cpus++;
+ }
+ /*
+ * If there is no vertical medium and low CPUs steal time
+ * is 0 as vertical high CPUs shouldn't experience steal time.
+ */
+ if (cpus == 0)
+ return percentage;
+ now = ktime_get();
+ time_delta = ktime_to_ns(ktime_sub(now, prev));
+ if (steal > hd_previous_steal && hd_previous_steal != 0) {
+ steal_delta = (steal - hd_previous_steal) * 100 / time_delta;
+ percentage = steal_delta / cpus;
+ }
+ hd_previous_steal = steal;
+ prev = now;
+ return percentage;
+}
+
+static void hd_capacity_work_fn(struct work_struct *work)
+{
+ unsigned long steal_percentage, new_cores;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ /*
+ * If online cores are less or equal to entitled cores hiperdispatch
+ * does not need to make any adjustments, call a topology update to
+ * disable hiperdispatch.
+ * Normally this check is handled on topology update, but during cpu
+ * unhotplug, topology and cpu mask updates are done in reverse
+ * order, causing hd_enable_hiperdispatch() to get stale data.
+ */
+ if (hd_online_cores <= hd_entitled_cores) {
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return;
+ }
+ steal_percentage = hd_steal_avg(hd_calculate_steal_percentage());
+ if (steal_percentage < hd_steal_threshold)
+ new_cores = hd_online_cores;
+ else
+ new_cores = hd_entitled_cores;
+ if (hd_high_capacity_cores != new_cores) {
+ trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores);
+ hd_high_capacity_cores = new_cores;
+ atomic64_inc(&hd_adjustments);
+ topology_schedule_update();
+ }
+ trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores);
+ mutex_unlock(&smp_cpu_state_mutex);
+ schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL);
+}
+
+static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int hiperdispatch;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &hiperdispatch,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ hiperdispatch = hd_enabled;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ mutex_lock(&smp_cpu_state_mutex);
+ if (hd_set_hiperdispatch_mode(hiperdispatch))
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return 0;
+}
+
+static const struct ctl_table hiperdispatch_ctl_table[] = {
+ {
+ .procname = "hiperdispatch",
+ .mode = 0644,
+ .proc_handler = hiperdispatch_ctl_handler,
+ },
+};
+
+static ssize_t hd_steal_threshold_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_steal_threshold);
+}
+
+static ssize_t hd_steal_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val > 100)
+ return -ERANGE;
+ hd_steal_threshold = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_steal_threshold);
+
+static ssize_t hd_delay_factor_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_delay_factor);
+}
+
+static ssize_t hd_delay_factor_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (!val)
+ return -ERANGE;
+ hd_delay_factor = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_delay_factor);
+
+static struct attribute *hd_attrs[] = {
+ &dev_attr_hd_steal_threshold.attr,
+ &dev_attr_hd_delay_factor.attr,
+ NULL,
+};
+
+static const struct attribute_group hd_attr_group = {
+ .name = "hiperdispatch",
+ .attrs = hd_attrs,
+};
+
+static int hd_greedy_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_high_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n");
+
+static int hd_conservative_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_low_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n");
+
+static int hd_adjustment_count_get(void *unused, u64 *val)
+{
+ *val = atomic64_read(&hd_adjustments);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n");
+
+static void __init hd_create_debugfs_counters(void)
+{
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir);
+ debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops);
+ debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops);
+ debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops);
+}
+
+static void __init hd_create_attributes(void)
+{
+ struct device *dev;
+
+ dev = bus_get_dev_root(&cpu_subsys);
+ if (!dev)
+ return;
+ if (sysfs_create_group(&dev->kobj, &hd_attr_group))
+ pr_warn("Unable to create hiperdispatch attribute group\n");
+ put_device(dev);
+}
+
+static int __init hd_init(void)
+{
+ if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) {
+ hd_set_hiperdispatch_mode(1);
+ topology_schedule_update();
+ }
+ if (!register_sysctl("s390", hiperdispatch_ctl_table))
+ pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n");
+ hd_create_debugfs_counters();
+ hd_create_attributes();
+ return 0;
+}
+late_initcall(hd_init);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index e7239aaf428b..39cb8d0ae348 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -24,6 +24,7 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ struct lowcore *lc = get_lowcore();
unsigned long idle_time;
u64 cycles_new[8];
int i;
@@ -34,13 +35,13 @@ void account_idle_time_irq(void)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}
- idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
+ idle_time = lc->int_clock - idle->clock_idle_enter;
- S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+ lc->last_update_clock = lc->int_clock;
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
- S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+ lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+ lc->last_update_timer = lc->sys_enter_timer;
/* Account time spent with enabled wait psw loaded as idle time. */
WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
@@ -57,9 +58,13 @@ void noinstr arch_cpu_idle(void)
psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
-
- /* psw_idle() returns with interrupts disabled. */
- psw_idle(idle, psw_mask);
+ set_cpu_flag(CIF_ENABLED_WAIT);
+ if (smp_cpu_mtid)
+ stcctm(MT_DIAG, smp_cpu_mtid, (u64 *)&idle->mt_cycles_enter);
+ idle->clock_idle_enter = get_tod_clock_fast();
+ idle->timer_idle_enter = get_cpu_timer();
+ bpon();
+ __load_psw_mask(psw_mask);
}
static ssize_t show_idle_count(struct device *dev,
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1486350a4177..69be2309cde0 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -20,6 +20,7 @@
#include <linux/gfp.h>
#include <linux/crash_dump.h>
#include <linux/debug_locks.h>
+#include <linux/vmalloc.h>
#include <asm/asm-extable.h>
#include <asm/diag.h>
#include <asm/ipl.h>
@@ -208,7 +209,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
- return scnprintf(page, PAGE_SIZE, _format, ##args); \
+ return sysfs_emit(page, _format, ##args); \
}
#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
@@ -266,7 +267,11 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
struct kobj_attribute *attr, \
const char *buf, size_t len) \
{ \
- strscpy(_value, buf, sizeof(_value)); \
+ if (len >= sizeof(_value)) \
+ return -E2BIG; \
+ len = strscpy(_value, buf, sizeof(_value)); \
+ if ((ssize_t)len < 0) \
+ return len; \
strim(_value); \
return len; \
} \
@@ -275,6 +280,61 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
+#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t size = _ipl_block.scp_data_len; \
+ void *scp_data = _ipl_block.scp_data; \
+ \
+ return memory_read_from_buffer(buf, count, &off, \
+ scp_data, size); \
+}
+
+#define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t scpdata_len = count; \
+ size_t padding; \
+ \
+ if (off) \
+ return -EINVAL; \
+ \
+ memcpy(_ipl_block.scp_data, buf, count); \
+ if (scpdata_len % 8) { \
+ padding = 8 - (scpdata_len % 8); \
+ memset(_ipl_block.scp_data + scpdata_len, \
+ 0, padding); \
+ scpdata_len += padding; \
+ } \
+ \
+ _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \
+ _ipl_block.len = _ipl_bp0_len + scpdata_len; \
+ _ipl_block.scp_data_len = scpdata_len; \
+ \
+ return count; \
+}
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
+ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \
+ NULL, _size)
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
+ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \
+ sys_##_prefix##_scp_data_store, _size)
+
/*
* ipl section
*/
@@ -312,7 +372,7 @@ EXPORT_SYMBOL_GPL(ipl_info);
static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(ipl_info.type));
}
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
@@ -320,7 +380,7 @@ static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
static ssize_t ipl_secure_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%i\n", !!ipl_secure_flag);
+ return sysfs_emit(page, "%i\n", !!ipl_secure_flag);
}
static struct kobj_attribute sys_ipl_secure_attr =
@@ -329,7 +389,7 @@ static struct kobj_attribute sys_ipl_secure_attr =
static ssize_t ipl_has_secure_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%i\n", !!sclp.has_sipl);
+ return sysfs_emit(page, "%i\n", !!sclp.has_sipl);
}
static struct kobj_attribute sys_ipl_has_secure_attr =
@@ -342,7 +402,7 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj,
if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
- return sprintf(page, "%s\n", parm);
+ return sysfs_emit(page, "%s\n", parm);
}
static struct kobj_attribute sys_ipl_vm_parm_attr =
@@ -353,18 +413,18 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
{
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
- ipl_block.ccw.devno);
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+ ipl_block.ccw.devno);
case IPL_TYPE_ECKD:
case IPL_TYPE_ECKD_DUMP:
- return sprintf(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
- ipl_block.eckd.devno);
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
+ ipl_block.eckd.devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+ return sysfs_emit(page, "0.0.%04x\n", ipl_block.fcp.devno);
case IPL_TYPE_NVME:
case IPL_TYPE_NVME_DUMP:
- return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
+ return sysfs_emit(page, "%08ux\n", ipl_block.nvme.fid);
default:
return 0;
}
@@ -373,71 +433,38 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
static struct kobj_attribute sys_ipl_device_attr =
__ATTR(device, 0444, sys_ipl_device_show, NULL);
-static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
+static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
return memory_read_from_buffer(buf, count, &off, &ipl_block,
ipl_block.hdr.len);
}
-static struct bin_attribute ipl_parameter_attr =
- __BIN_ATTR(binary_parameter, 0444, ipl_parameter_read, NULL,
+static const struct bin_attribute sys_ipl_parameter_attr =
+ __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL,
PAGE_SIZE);
-static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
-{
- unsigned int size = ipl_block.fcp.scp_data_len;
- void *scp_data = &ipl_block.fcp.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE);
-static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
-{
- unsigned int size = ipl_block.nvme.scp_data_len;
- void *scp_data = &ipl_block.nvme.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t ipl_eckd_scp_data_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
-{
- unsigned int size = ipl_block.eckd.scp_data_len;
- void *scp_data = &ipl_block.eckd.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static struct bin_attribute ipl_scp_data_attr =
- __BIN_ATTR(scp_data, 0444, ipl_scp_data_read, NULL, PAGE_SIZE);
-
-static struct bin_attribute ipl_nvme_scp_data_attr =
- __BIN_ATTR(scp_data, 0444, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
-
-static struct bin_attribute ipl_eckd_scp_data_attr =
- __BIN_ATTR(scp_data, 0444, ipl_eckd_scp_data_read, NULL, PAGE_SIZE);
-
-static struct bin_attribute *ipl_fcp_bin_attrs[] = {
- &ipl_parameter_attr,
- &ipl_scp_data_attr,
+static const struct bin_attribute *const ipl_fcp_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_fcp_scp_data_attr,
NULL,
};
-static struct bin_attribute *ipl_nvme_bin_attrs[] = {
- &ipl_parameter_attr,
- &ipl_nvme_scp_data_attr,
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE);
+
+static const struct bin_attribute *const ipl_nvme_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_nvme_scp_data_attr,
NULL,
};
-static struct bin_attribute *ipl_eckd_bin_attrs[] = {
- &ipl_parameter_attr,
- &ipl_eckd_scp_data_attr,
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE);
+
+static const struct bin_attribute *const ipl_eckd_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_eckd_scp_data_attr,
NULL,
};
@@ -476,12 +503,12 @@ static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj, \
if (!ipb->br_chr.cyl && \
!ipb->br_chr.head && \
!ipb->br_chr.record) \
- return sprintf(buf, "auto\n"); \
+ return sysfs_emit(buf, "auto\n"); \
\
- return sprintf(buf, "0x%x,0x%x,0x%x\n", \
- ipb->br_chr.cyl, \
- ipb->br_chr.head, \
- ipb->br_chr.record); \
+ return sysfs_emit(buf, "0x%x,0x%x,0x%x\n", \
+ ipb->br_chr.cyl, \
+ ipb->br_chr.head, \
+ ipb->br_chr.record); \
}
#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb) \
@@ -546,11 +573,11 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
char loadparm[LOADPARM_LEN + 1] = {};
if (!sclp_ipl_info.is_valid)
- return sprintf(page, "#unknown#\n");
+ return sysfs_emit(page, "#unknown#\n");
memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
strim(loadparm);
- return sprintf(page, "%s\n", loadparm);
+ return sysfs_emit(page, "%s\n", loadparm);
}
static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
@@ -566,9 +593,9 @@ static struct attribute *ipl_fcp_attrs[] = {
NULL,
};
-static struct attribute_group ipl_fcp_attr_group = {
+static const struct attribute_group ipl_fcp_attr_group = {
.attrs = ipl_fcp_attrs,
- .bin_attrs = ipl_fcp_bin_attrs,
+ .bin_attrs_new = ipl_fcp_bin_attrs,
};
static struct attribute *ipl_nvme_attrs[] = {
@@ -580,9 +607,9 @@ static struct attribute *ipl_nvme_attrs[] = {
NULL,
};
-static struct attribute_group ipl_nvme_attr_group = {
+static const struct attribute_group ipl_nvme_attr_group = {
.attrs = ipl_nvme_attrs,
- .bin_attrs = ipl_nvme_bin_attrs,
+ .bin_attrs_new = ipl_nvme_bin_attrs,
};
static struct attribute *ipl_eckd_attrs[] = {
@@ -593,9 +620,9 @@ static struct attribute *ipl_eckd_attrs[] = {
NULL,
};
-static struct attribute_group ipl_eckd_attr_group = {
+static const struct attribute_group ipl_eckd_attr_group = {
.attrs = ipl_eckd_attrs,
- .bin_attrs = ipl_eckd_bin_attrs,
+ .bin_attrs_new = ipl_eckd_bin_attrs,
};
/* CCW ipl device attributes */
@@ -613,11 +640,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
NULL,
};
-static struct attribute_group ipl_ccw_attr_group_vm = {
+static const struct attribute_group ipl_ccw_attr_group_vm = {
.attrs = ipl_ccw_attrs_vm,
};
-static struct attribute_group ipl_ccw_attr_group_lpar = {
+static const struct attribute_group ipl_ccw_attr_group_lpar = {
.attrs = ipl_ccw_attrs_lpar
};
@@ -628,7 +655,7 @@ static struct attribute *ipl_common_attrs[] = {
NULL,
};
-static struct attribute_group ipl_common_attr_group = {
+static const struct attribute_group ipl_common_attr_group = {
.attrs = ipl_common_attrs,
};
@@ -704,7 +731,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
- return sprintf(page, "%s\n", vmparm);
+ return sysfs_emit(page, "%s\n", vmparm);
}
static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
@@ -776,46 +803,12 @@ static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
/* FCP reipl device attributes */
-static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t size = reipl_block_fcp->fcp.scp_data_len;
- void *scp_data = reipl_block_fcp->fcp.scp_data;
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr,
+ reipl_block_fcp->fcp,
+ IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+ DIAG308_SCPDATA_SIZE);
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t scpdata_len = count;
- size_t padding;
-
-
- if (off)
- return -EINVAL;
-
- memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
- if (scpdata_len % 8) {
- padding = 8 - (scpdata_len % 8);
- memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
- 0, padding);
- scpdata_len += padding;
- }
-
- reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
- reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
- reipl_block_fcp->fcp.scp_data_len = scpdata_len;
-
- return count;
-}
-static struct bin_attribute sys_reipl_fcp_scp_data_attr =
- __BIN_ATTR(scp_data, 0644, reipl_fcp_scpdata_read,
- reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
-
-static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const reipl_fcp_bin_attrs[] = {
&sys_reipl_fcp_scp_data_attr,
NULL,
};
@@ -846,7 +839,7 @@ static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
char buf[LOADPARM_LEN + 1];
reipl_get_ascii_loadparm(buf, ipb);
- return sprintf(page, "%s\n", buf);
+ return sysfs_emit(page, "%s\n", buf);
}
static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
@@ -902,7 +895,7 @@ DEFINE_GENERIC_LOADPARM(eckd);
static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_fcp_clear);
+ return sysfs_emit(page, "%u\n", reipl_fcp_clear);
}
static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
@@ -924,9 +917,9 @@ static struct attribute *reipl_fcp_attrs[] = {
NULL,
};
-static struct attribute_group reipl_fcp_attr_group = {
+static const struct attribute_group reipl_fcp_attr_group = {
.attrs = reipl_fcp_attrs,
- .bin_attrs = reipl_fcp_bin_attrs,
+ .bin_attrs_new = reipl_fcp_bin_attrs,
};
static struct kobj_attribute sys_reipl_fcp_clear_attr =
@@ -934,46 +927,12 @@ static struct kobj_attribute sys_reipl_fcp_clear_attr =
/* NVME reipl device attributes */
-static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t size = reipl_block_nvme->nvme.scp_data_len;
- void *scp_data = reipl_block_nvme->nvme.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t scpdata_len = count;
- size_t padding;
-
- if (off)
- return -EINVAL;
-
- memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
- if (scpdata_len % 8) {
- padding = 8 - (scpdata_len % 8);
- memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
- 0, padding);
- scpdata_len += padding;
- }
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr,
+ reipl_block_nvme->nvme,
+ IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+ DIAG308_SCPDATA_SIZE);
- reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
- reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
- reipl_block_nvme->nvme.scp_data_len = scpdata_len;
-
- return count;
-}
-
-static struct bin_attribute sys_reipl_nvme_scp_data_attr =
- __BIN_ATTR(scp_data, 0644, reipl_nvme_scpdata_read,
- reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
-
-static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const reipl_nvme_bin_attrs[] = {
&sys_reipl_nvme_scp_data_attr,
NULL,
};
@@ -996,15 +955,15 @@ static struct attribute *reipl_nvme_attrs[] = {
NULL,
};
-static struct attribute_group reipl_nvme_attr_group = {
+static const struct attribute_group reipl_nvme_attr_group = {
.attrs = reipl_nvme_attrs,
- .bin_attrs = reipl_nvme_bin_attrs
+ .bin_attrs_new = reipl_nvme_bin_attrs
};
static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_nvme_clear);
+ return sysfs_emit(page, "%u\n", reipl_nvme_clear);
}
static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
@@ -1025,7 +984,7 @@ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_ccw_clear);
+ return sysfs_emit(page, "%u\n", reipl_ccw_clear);
}
static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
@@ -1067,46 +1026,12 @@ static struct attribute_group reipl_ccw_attr_group_lpar = {
/* ECKD reipl device attributes */
-static ssize_t reipl_eckd_scpdata_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t size = reipl_block_eckd->eckd.scp_data_len;
- void *scp_data = reipl_block_eckd->eckd.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_eckd_scpdata_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t scpdata_len = count;
- size_t padding;
-
- if (off)
- return -EINVAL;
-
- memcpy(reipl_block_eckd->eckd.scp_data, buf, count);
- if (scpdata_len % 8) {
- padding = 8 - (scpdata_len % 8);
- memset(reipl_block_eckd->eckd.scp_data + scpdata_len,
- 0, padding);
- scpdata_len += padding;
- }
-
- reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN + scpdata_len;
- reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN + scpdata_len;
- reipl_block_eckd->eckd.scp_data_len = scpdata_len;
-
- return count;
-}
-
-static struct bin_attribute sys_reipl_eckd_scp_data_attr =
- __BIN_ATTR(scp_data, 0644, reipl_eckd_scpdata_read,
- reipl_eckd_scpdata_write, DIAG308_SCPDATA_SIZE);
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr,
+ reipl_block_eckd->eckd,
+ IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+ DIAG308_SCPDATA_SIZE);
-static struct bin_attribute *reipl_eckd_bin_attrs[] = {
+static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
&sys_reipl_eckd_scp_data_attr,
NULL,
};
@@ -1123,15 +1048,15 @@ static struct attribute *reipl_eckd_attrs[] = {
NULL,
};
-static struct attribute_group reipl_eckd_attr_group = {
+static const struct attribute_group reipl_eckd_attr_group = {
.attrs = reipl_eckd_attrs,
- .bin_attrs = reipl_eckd_bin_attrs
+ .bin_attrs_new = reipl_eckd_bin_attrs
};
static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_eckd_clear);
+ return sysfs_emit(page, "%u\n", reipl_eckd_clear);
}
static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
@@ -1161,7 +1086,7 @@ static ssize_t reipl_nss_name_show(struct kobject *kobj,
char nss_name[NSS_NAME_SIZE + 1] = {};
reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
- return sprintf(page, "%s\n", nss_name);
+ return sysfs_emit(page, "%s\n", nss_name);
}
static ssize_t reipl_nss_name_store(struct kobject *kobj,
@@ -1209,8 +1134,8 @@ static struct attribute_group reipl_nss_attr_group = {
void set_os_info_reipl_block(void)
{
- os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
- reipl_block_actual->hdr.len);
+ os_info_entry_add_data(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block_actual->hdr.len);
}
/* reipl type */
@@ -1246,7 +1171,7 @@ static int reipl_set_type(enum ipl_type type)
static ssize_t reipl_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(reipl_type));
}
static ssize_t reipl_type_store(struct kobject *kobj,
@@ -1648,6 +1573,11 @@ DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
dump_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
+ dump_block_fcp->fcp,
+ IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+ DIAG308_SCPDATA_SIZE);
+
static struct attribute *dump_fcp_attrs[] = {
&sys_dump_fcp_device_attr.attr,
&sys_dump_fcp_wwpn_attr.attr,
@@ -1657,9 +1587,15 @@ static struct attribute *dump_fcp_attrs[] = {
NULL,
};
-static struct attribute_group dump_fcp_attr_group = {
+static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
+ &sys_dump_fcp_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_fcp_attr_group = {
.name = IPL_FCP_STR,
.attrs = dump_fcp_attrs,
+ .bin_attrs_new = dump_fcp_bin_attrs,
};
/* NVME dump device attributes */
@@ -1672,6 +1608,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
dump_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
+ dump_block_nvme->nvme,
+ IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+ DIAG308_SCPDATA_SIZE);
+
static struct attribute *dump_nvme_attrs[] = {
&sys_dump_nvme_fid_attr.attr,
&sys_dump_nvme_nsid_attr.attr,
@@ -1680,9 +1621,15 @@ static struct attribute *dump_nvme_attrs[] = {
NULL,
};
-static struct attribute_group dump_nvme_attr_group = {
+static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
+ &sys_dump_nvme_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_nvme_attr_group = {
.name = IPL_NVME_STR,
.attrs = dump_nvme_attrs,
+ .bin_attrs_new = dump_nvme_bin_attrs,
};
/* ECKD dump device attributes */
@@ -1696,6 +1643,11 @@ IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
static struct kobj_attribute sys_dump_eckd_br_chr_attr =
__ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store);
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_eckd, dump_block_eckd->hdr,
+ dump_block_eckd->eckd,
+ IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+ DIAG308_SCPDATA_SIZE);
+
static struct attribute *dump_eckd_attrs[] = {
&sys_dump_eckd_device_attr.attr,
&sys_dump_eckd_bootprog_attr.attr,
@@ -1703,9 +1655,15 @@ static struct attribute *dump_eckd_attrs[] = {
NULL,
};
-static struct attribute_group dump_eckd_attr_group = {
+static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
+ &sys_dump_eckd_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_eckd_attr_group = {
.name = IPL_ECKD_STR,
.attrs = dump_eckd_attrs,
+ .bin_attrs_new = dump_eckd_bin_attrs,
};
/* CCW dump device attributes */
@@ -1734,7 +1692,7 @@ static int dump_set_type(enum dump_type type)
static ssize_t dump_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", dump_type_str(dump_type));
+ return sysfs_emit(page, "%s\n", dump_type_str(dump_type));
}
static ssize_t dump_type_store(struct kobject *kobj,
@@ -1759,6 +1717,24 @@ static ssize_t dump_type_store(struct kobject *kobj,
static struct kobj_attribute dump_type_attr =
__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+static ssize_t dump_area_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%lu\n", sclp.hsa_size);
+}
+
+static struct kobj_attribute dump_area_size_attr = __ATTR_RO(dump_area_size);
+
+static struct attribute *dump_attrs[] = {
+ &dump_type_attr.attr,
+ &dump_area_size_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_attr_group = {
+ .attrs = dump_attrs,
+};
+
static struct kset *dump_kset;
static void diag308_dump(void *dump_block)
@@ -1858,9 +1834,9 @@ static int __init dump_nvme_init(void)
}
dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
- dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
- dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+ dump_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+ dump_block_nvme->nvme.pbt = IPL_PBT_NVME;
+ dump_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_DUMP;
dump_capabilities |= DUMP_TYPE_NVME;
return 0;
}
@@ -1895,7 +1871,7 @@ static int __init dump_init(void)
dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
if (!dump_kset)
return -ENOMEM;
- rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_attr_group);
if (rc) {
kset_unregister(dump_kset);
return rc;
@@ -1940,7 +1916,7 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
reipl_type == IPL_TYPE_NSS ||
reipl_type == IPL_TYPE_UNKNOWN)
os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
- os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
+ os_info_entry_add_data(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
csum = (__force unsigned int)cksm(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore();
abs_lc->ipib = __pa(reipl_block_actual);
@@ -1958,11 +1934,13 @@ static struct shutdown_action __refdata dump_reipl_action = {
* vmcmd shutdown action: Trigger vm command on shutdown.
*/
-static char vmcmd_on_reboot[128];
-static char vmcmd_on_panic[128];
-static char vmcmd_on_halt[128];
-static char vmcmd_on_poff[128];
-static char vmcmd_on_restart[128];
+#define VMCMD_MAX_SIZE 240
+
+static char vmcmd_on_reboot[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_panic[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_halt[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_poff[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_restart[VMCMD_MAX_SIZE + 1];
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
@@ -2074,7 +2052,7 @@ static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
static ssize_t on_reboot_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_reboot_trigger.action->name);
}
static ssize_t on_reboot_store(struct kobject *kobj,
@@ -2100,7 +2078,7 @@ static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
static ssize_t on_panic_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_panic_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_panic_trigger.action->name);
}
static ssize_t on_panic_store(struct kobject *kobj,
@@ -2126,7 +2104,7 @@ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
static ssize_t on_restart_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_restart_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_restart_trigger.action->name);
}
static ssize_t on_restart_store(struct kobject *kobj,
@@ -2152,7 +2130,7 @@ void do_restart(void *arg)
tracing_off();
debug_locks_off();
lgr_info_log();
- smp_call_online_cpu(__do_restart, arg);
+ smp_call_ipl_cpu(__do_restart, arg);
}
/* on halt */
@@ -2162,7 +2140,7 @@ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
static ssize_t on_halt_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_halt_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_halt_trigger.action->name);
}
static ssize_t on_halt_store(struct kobject *kobj,
@@ -2188,7 +2166,7 @@ static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
static ssize_t on_poff_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_poff_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_poff_trigger.action->name);
}
static ssize_t on_poff_store(struct kobject *kobj,
@@ -2288,8 +2266,8 @@ static int __init vmcmd_on_reboot_setup(char *str)
{
if (!MACHINE_IS_VM)
return 1;
- strncpy_skip_quote(vmcmd_on_reboot, str, 127);
- vmcmd_on_reboot[127] = 0;
+ strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE);
+ vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0;
on_reboot_trigger.action = &vmcmd_action;
return 1;
}
@@ -2299,8 +2277,8 @@ static int __init vmcmd_on_panic_setup(char *str)
{
if (!MACHINE_IS_VM)
return 1;
- strncpy_skip_quote(vmcmd_on_panic, str, 127);
- vmcmd_on_panic[127] = 0;
+ strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE);
+ vmcmd_on_panic[VMCMD_MAX_SIZE] = 0;
on_panic_trigger.action = &vmcmd_action;
return 1;
}
@@ -2310,8 +2288,8 @@ static int __init vmcmd_on_halt_setup(char *str)
{
if (!MACHINE_IS_VM)
return 1;
- strncpy_skip_quote(vmcmd_on_halt, str, 127);
- vmcmd_on_halt[127] = 0;
+ strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE);
+ vmcmd_on_halt[VMCMD_MAX_SIZE] = 0;
on_halt_trigger.action = &vmcmd_action;
return 1;
}
@@ -2321,8 +2299,8 @@ static int __init vmcmd_on_poff_setup(char *str)
{
if (!MACHINE_IS_VM)
return 1;
- strncpy_skip_quote(vmcmd_on_poff, str, 127);
- vmcmd_on_poff[127] = 0;
+ strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE);
+ vmcmd_on_poff[VMCMD_MAX_SIZE] = 0;
on_poff_trigger.action = &vmcmd_action;
return 1;
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 6f71b0ce1068..ef7be599e1f7 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -29,6 +29,8 @@
#include <asm/hw_irq.h>
#include <asm/stacktrace.h>
#include <asm/softirq_stack.h>
+#include <asm/vtime.h>
+#include <asm/asm.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -75,6 +77,7 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"},
{.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
{.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
{.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
@@ -99,8 +102,8 @@ static const struct irq_class irqclass_sub_desc[] = {
static void do_IRQ(struct pt_regs *regs, int irq)
{
- if (tod_after_eq(S390_lowcore.int_clock,
- S390_lowcore.clock_comparator))
+ if (tod_after_eq(get_lowcore()->int_clock,
+ get_lowcore()->clock_comparator))
/* Serve timer interrupts first. */
clock_comparator_work();
generic_handle_irq(irq);
@@ -110,7 +113,7 @@ static int on_async_stack(void)
{
unsigned long frame = current_frame_address();
- return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+ return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
}
static void do_irq_async(struct pt_regs *regs, int irq)
@@ -118,7 +121,7 @@ static void do_irq_async(struct pt_regs *regs, int irq)
if (on_async_stack()) {
do_IRQ(regs, irq);
} else {
- call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+ call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ,
struct pt_regs *, regs, int, irq);
}
}
@@ -127,9 +130,13 @@ static int irq_pending(struct pt_regs *regs)
{
int cc;
- asm volatile("tpi 0\n"
- "ipm %0" : "=d" (cc) : : "cc");
- return cc >> 28;
+ asm volatile(
+ " tpi 0\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ :
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
void noinstr do_io_irq(struct pt_regs *regs)
@@ -150,9 +157,10 @@ void noinstr do_io_irq(struct pt_regs *regs)
if (from_idle)
account_idle_time_irq();
+ set_cpu_flag(CIF_NOHZ_DELAY);
do {
- regs->tpi_info = S390_lowcore.tpi_info;
- if (S390_lowcore.tpi_info.adapter_IO)
+ regs->tpi_info = get_lowcore()->tpi_info;
+ if (get_lowcore()->tpi_info.adapter_IO)
do_irq_async(regs, THIN_INTERRUPT);
else
do_irq_async(regs, IO_INTERRUPT);
@@ -181,9 +189,9 @@ void noinstr do_ext_irq(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- regs->int_code = S390_lowcore.ext_int_code_addr;
- regs->int_parm = S390_lowcore.ext_params;
- regs->int_parm_long = S390_lowcore.ext_params2;
+ regs->int_code = get_lowcore()->ext_int_code_addr;
+ regs->int_parm = get_lowcore()->ext_params;
+ regs->int_parm_long = get_lowcore()->ext_params2;
from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
@@ -250,7 +258,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
goto out;
}
- if (index < nr_irqs) {
+ if (index < irq_get_nr_irqs()) {
show_msi_interrupt(p, index);
goto out;
}
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..8b80ea57125f 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
#define pr_fmt(fmt) "kprobes: " fmt
-#include <linux/moduleloader.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
@@ -21,10 +20,11 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
+#include <linux/execmem.h>
+#include <asm/text-patching.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
-#include "kprobes.h"
#include "entry.h"
DEFINE_PER_CPU(struct kprobe *, current_kprobe);
@@ -32,39 +32,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
-static int insn_page_in_use;
-
void *alloc_insn_page(void)
{
void *page;
- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
return page;
}
-static void *alloc_s390_insn_page(void)
-{
- if (xchg(&insn_page_in_use, 1) == 1)
- return NULL;
- return &kprobes_insn_page;
-}
-
-static void free_s390_insn_page(void *page)
-{
- xchg(&insn_page_in_use, 0);
-}
-
-struct kprobe_insn_cache kprobe_s390_insn_slots = {
- .mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
- .alloc = alloc_s390_insn_page,
- .free = free_s390_insn_page,
- .pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
- .insn_size = MAX_INSN_SIZE,
-};
-
static void copy_instruction(struct kprobe *p)
{
kprobe_opcode_t insn[MAX_INSN_SIZE];
@@ -78,10 +56,10 @@ static void copy_instruction(struct kprobe *p)
if (probe_is_insn_relative_long(&insn[0])) {
/*
* For pc-relative instructions in RIL-b or RIL-c format patch
- * the RI2 displacement field. We have already made sure that
- * the insn slot for the patched instruction is within the same
- * 2GB area as the original instruction (either kernel image or
- * module area). Therefore the new displacement will always fit.
+ * the RI2 displacement field. The insn slot for the to be
+ * patched instruction is within the same 4GB area like the
+ * original instruction. Therefore the new displacement will
+ * always fit.
*/
disp = *(s32 *)&insn[1];
addr = (u64)(unsigned long)p->addr;
@@ -93,34 +71,6 @@ static void copy_instruction(struct kprobe *p)
}
NOKPROBE_SYMBOL(copy_instruction);
-static int s390_get_insn_slot(struct kprobe *p)
-{
- /*
- * Get an insn slot that is within the same 2GB area like the original
- * instruction. That way instructions with a 32bit signed displacement
- * field can be patched and executed within the insn slot.
- */
- p->ainsn.insn = NULL;
- if (is_kernel((unsigned long)p->addr))
- p->ainsn.insn = get_s390_insn_slot();
- else if (is_module_addr(p->addr))
- p->ainsn.insn = get_insn_slot();
- return p->ainsn.insn ? 0 : -ENOMEM;
-}
-NOKPROBE_SYMBOL(s390_get_insn_slot);
-
-static void s390_free_insn_slot(struct kprobe *p)
-{
- if (!p->ainsn.insn)
- return;
- if (is_kernel((unsigned long)p->addr))
- free_s390_insn_slot(p->ainsn.insn, 0);
- else
- free_insn_slot(p->ainsn.insn, 0);
- p->ainsn.insn = NULL;
-}
-NOKPROBE_SYMBOL(s390_free_insn_slot);
-
/* Check if paddr is at an instruction boundary */
static bool can_probe(unsigned long paddr)
{
@@ -174,7 +124,8 @@ int arch_prepare_kprobe(struct kprobe *p)
/* Make sure the probe isn't going on a difficult instruction */
if (probe_is_prohibited_opcode(p->addr))
return -EINVAL;
- if (s390_get_insn_slot(p))
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
return -ENOMEM;
copy_instruction(p);
return 0;
@@ -202,7 +153,12 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (MACHINE_HAS_SEQ_INSN) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
@@ -210,13 +166,21 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (MACHINE_HAS_SEQ_INSN) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
void arch_remove_kprobe(struct kprobe *p)
{
- s390_free_insn_slot(p);
+ if (!p->ainsn.insn)
+ return;
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
}
NOKPROBE_SYMBOL(arch_remove_kprobe);
@@ -525,6 +489,12 @@ int __init arch_init_kprobes(void)
return 0;
}
+int __init arch_populate_kprobe_blacklist(void)
+{
+ return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+ (unsigned long)__irqentry_text_end);
+}
+
int arch_trampoline_kprobe(struct kprobe *p)
{
return 0;
diff --git a/arch/s390/kernel/kprobes.h b/arch/s390/kernel/kprobes.h
deleted file mode 100644
index dc3ed5098ee7..000000000000
--- a/arch/s390/kernel/kprobes.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-#ifndef _ARCH_S390_KPROBES_H
-#define _ARCH_S390_KPROBES_H
-
-#include <linux/kprobes.h>
-
-DEFINE_INSN_CACHE_OPS(s390_insn);
-
-#endif
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
deleted file mode 100644
index 0fe4d725e98b..000000000000
--- a/arch/s390/kernel/kprobes_insn_page.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/linkage.h>
-
-/*
- * insn_page is a special 4k aligned dummy function for kprobes.
- * It will contain all kprobed instructions that are out-of-line executed.
- * The page must be within the kernel image to guarantee that the
- * out-of-line instructions are within 2GB distance of their original
- * location. Using a dummy function ensures that the insn_page is within
- * the text section of the kernel and mapped read-only/executable from
- * the beginning on, thus avoiding to split large mappings if the page
- * would be in the data section instead.
- */
- .section .kprobes.text, "ax"
- .balign 4096
-SYM_CODE_START(kprobes_insn_page)
- .rept 2048
- .word 0x07fe
- .endr
-SYM_CODE_END(kprobes_insn_page)
- .previous
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 6652e54cf3db..6d1ffca5f798 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -166,7 +166,7 @@ static struct timer_list lgr_timer;
*/
static void lgr_timer_set(void)
{
- mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
+ mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS));
}
/*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3aee98efc374..8f681ccfb83a 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -52,7 +52,7 @@ static void __do_machine_kdump(void *data)
purgatory = (purgatory_t)image->start;
/* store_status() saved the prefix register to lowcore */
- prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+ prefix = (unsigned long)get_lowcore()->prefixreg_save_area;
/* Now do the reset */
s390_reset_system();
@@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data)
* This need to be done *after* s390_reset_system set the
* prefix register of this CPU to zero
*/
- memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
+ memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
call_nodat(1, int, purgatory, int, 1);
@@ -91,7 +91,7 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
- mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+ mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK);
if (cpu_has_vx())
save_vx_regs((__vector128 *) mcesa->vector_save_area);
if (MACHINE_HAS_GS) {
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index ae4d4fd9afcd..1fec370fecf4 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,6 +9,7 @@
#include <asm/ftrace.h>
#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
+#include <asm/march.h>
#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
@@ -88,7 +89,7 @@ SYM_CODE_START(ftrace_caller)
SYM_CODE_END(ftrace_caller)
SYM_CODE_START(ftrace_common)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
aghik %r2,%r0,-MCOUNT_INSN_SIZE
lgrl %r4,function_trace_op
lgrl %r1,ftrace_func
@@ -103,19 +104,8 @@ SYM_CODE_START(ftrace_common)
lgr %r3,%r14
la %r5,STACK_FREGS(%r15)
BASR_EX %r14,%r1
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# The j instruction gets runtime patched to a nop instruction.
-# See ftrace_enable_ftrace_graph_caller.
-SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
- j .Lftrace_graph_caller_end
- lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
- lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
- brasl %r14,prepare_ftrace_return
- stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
-.Lftrace_graph_caller_end:
-#endif
lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
locgrz %r1,%r0
#else
@@ -133,14 +123,14 @@ SYM_CODE_END(ftrace_common)
SYM_FUNC_START(return_to_handler)
stmg %r2,%r5,32(%r15)
lgr %r1,%r15
- aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
+ # allocate ftrace_regs and stack frame for ftrace_return_to_handler
+ aghi %r15,-STACK_FRAME_SIZE_FREGS
stg %r1,__SF_BACKCHAIN(%r15)
- la %r3,STACK_FRAME_OVERHEAD(%r15)
- stg %r1,__FGRAPH_RET_FP(%r3)
- stg %r2,__FGRAPH_RET_GPR2(%r3)
- lgr %r2,%r3
+ stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+ stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
brasl %r14,ftrace_return_to_handler
- aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
+ aghi %r15,STACK_FRAME_SIZE_FREGS
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
BR_EX %r14
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..91e207b50394 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <linux/memory.h>
+#include <linux/execmem.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
@@ -36,47 +37,10 @@
#define PLT_ENTRY_SIZE 22
-static unsigned long get_module_load_offset(void)
-{
- static DEFINE_MUTEX(module_kaslr_mutex);
- static unsigned long module_load_offset;
-
- if (!kaslr_enabled())
- return 0;
- /*
- * Calculate the module_load_offset the first time this code
- * is called. Once calculated it stays the same until reboot.
- */
- mutex_lock(&module_kaslr_mutex);
- if (!module_load_offset)
- module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
- mutex_unlock(&module_kaslr_mutex);
- return module_load_offset;
-}
-
-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask, PAGE_KERNEL,
- VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
- NUMA_NO_NODE, __builtin_return_address(0));
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
- vfree(p);
- return NULL;
- }
- return p;
-}
-
#ifdef CONFIG_FUNCTION_TRACER
void module_arch_cleanup(struct module *mod)
{
- module_memfree(mod->arch.trampolines_start);
+ execmem_free(mod->arch.trampolines_start);
}
#endif
@@ -510,7 +474,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
- start = module_alloc(numpages * PAGE_SIZE);
+ start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
if (!start)
return -ENOMEM;
set_memory_rox((unsigned long)start, numpages);
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index c77382a67325..fbd218b6fc8e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -31,6 +31,7 @@
#include <asm/crw.h>
#include <asm/asm-offsets.h>
#include <asm/pai.h>
+#include <asm/vtime.h>
struct mcck_struct {
unsigned int kill_task : 1;
@@ -116,6 +117,7 @@ static __always_inline char *u64_to_hex(char *dest, u64 val)
static notrace void s390_handle_damage(void)
{
+ struct lowcore *lc = get_lowcore();
union ctlreg0 cr0, cr0_new;
char message[100];
psw_t psw_save;
@@ -124,7 +126,7 @@ static notrace void s390_handle_damage(void)
smp_emergency_stop();
diag_amode31_ops.diag308_reset();
ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
- u64_to_hex(ptr, S390_lowcore.mcck_interruption_code);
+ u64_to_hex(ptr, lc->mcck_interruption_code);
/*
* Disable low address protection and make machine check new PSW a
@@ -134,17 +136,17 @@ static notrace void s390_handle_damage(void)
cr0_new = cr0;
cr0_new.lap = 0;
local_ctl_load(0, &cr0_new.reg);
- psw_save = S390_lowcore.mcck_new_psw;
- psw_bits(S390_lowcore.mcck_new_psw).io = 0;
- psw_bits(S390_lowcore.mcck_new_psw).ext = 0;
- psw_bits(S390_lowcore.mcck_new_psw).wait = 1;
+ psw_save = lc->mcck_new_psw;
+ psw_bits(lc->mcck_new_psw).io = 0;
+ psw_bits(lc->mcck_new_psw).ext = 0;
+ psw_bits(lc->mcck_new_psw).wait = 1;
sclp_emergency_printk(message);
/*
* Restore machine check new PSW and control register 0 to original
* values. This makes possible system dump analysis easier.
*/
- S390_lowcore.mcck_new_psw = psw_save;
+ lc->mcck_new_psw = psw_save;
local_ctl_load(0, &cr0.reg);
disabled_wait();
while (1);
@@ -225,7 +227,7 @@ static bool notrace nmi_registers_valid(union mci mci)
/*
* Set the clock comparator register to the next expected value.
*/
- set_clock_comparator(S390_lowcore.clock_comparator);
+ set_clock_comparator(get_lowcore()->clock_comparator);
if (!mci.gr || !mci.fp || !mci.fc)
return false;
/*
@@ -251,7 +253,7 @@ static bool notrace nmi_registers_valid(union mci mci)
* check handling must take care of this. The host values are saved by
* KVM and are not affected.
*/
- cr2.reg = S390_lowcore.cregs_save_area[2];
+ cr2.reg = get_lowcore()->cregs_save_area[2];
if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST))
return false;
if (!mci.ms || !mci.pm || !mci.ia)
@@ -277,11 +279,10 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
sie_page = container_of(sie_block, struct sie_page, sie_block);
mcck_backup = &sie_page->mcck_info;
- mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ mcck_backup->mcic = get_lowcore()->mcck_interruption_code &
~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
- mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
- mcck_backup->failing_storage_address
- = S390_lowcore.failing_storage_address;
+ mcck_backup->ext_damage_code = get_lowcore()->external_damage_code;
+ mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address;
}
NOKPROBE_SYMBOL(s390_backup_mcck_info);
@@ -301,6 +302,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
static unsigned long long last_ipd;
+ struct lowcore *lc = get_lowcore();
struct mcck_struct *mcck;
unsigned long long tmp;
irqentry_state_t irq_state;
@@ -313,7 +315,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (user_mode(regs))
update_timer_mcck();
inc_irq_stat(NMI_NMI);
- mci.val = S390_lowcore.mcck_interruption_code;
+ mci.val = lc->mcck_interruption_code;
mcck = this_cpu_ptr(&cpu_mcck);
/*
@@ -381,9 +383,9 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
}
if (mci.ed && mci.ec) {
/* External damage */
- if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ if (lc->external_damage_code & (1U << ED_STP_SYNC))
mcck->stp_queue |= stp_sync_check();
- if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ if (lc->external_damage_code & (1U << ED_STP_ISLAND))
mcck->stp_queue |= stp_island_check();
mcck_pending = 1;
}
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index d1b16d83e49a..e11ec15960a1 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -4,6 +4,8 @@
#include <linux/cpu.h>
#include <asm/nospec-branch.h>
+int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
+
static int __init nobp_setup_early(char *str)
{
bool enabled;
@@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str)
* The user explicitly requested nobp=1, enable it and
* disable the expoline support.
*/
- __set_facility(82, alt_stfle_fac_list);
+ nobp = 1;
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_disable = 1;
} else {
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
return 0;
}
@@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early);
static int __init nospec_setup_early(char *str)
{
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
return 0;
}
early_param("nospec", nospec_setup_early);
@@ -40,7 +42,7 @@ static int __init nospec_report(void)
pr_info("Spectre V2 mitigation: etokens\n");
if (nospec_uses_trampoline())
pr_info("Spectre V2 mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
+ if (nobp_enabled())
pr_info("Spectre V2 mitigation: limited branch prediction\n");
return 0;
}
@@ -66,14 +68,14 @@ void __init nospec_auto_detect(void)
*/
if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
} else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
*/
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
/*
* If the kernel has not been compiled with expolines the
@@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str)
{
if (str && !strncmp(str, "on", 2)) {
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
if (str && !strncmp(str, "off", 3))
nospec_disable = 1;
@@ -114,10 +116,10 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
type = BRASL_EXPOLINE; /* brasl instruction */
else
continue;
- thunk = instr + (*(int *)(instr + 2)) * 2;
+ thunk = instr + (long)(*(int *)(instr + 2)) * 2;
if (thunk[0] == 0xc6 && thunk[1] == 0x00)
/* exrl %r0,<target-br> */
- br = thunk + (*(int *)(thunk + 2)) * 2;
+ br = thunk + (long)(*(int *)(thunk + 2)) * 2;
else
continue;
if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index 52d4353188ad..5970dd3ee7c5 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -7,17 +7,17 @@
ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n");
}
ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf)
{
if (test_facility(156))
- return sprintf(buf, "Mitigation: etokens\n");
+ return sysfs_emit(buf, "Mitigation: etokens\n");
if (nospec_uses_trampoline())
- return sprintf(buf, "Mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
- return sprintf(buf, "Mitigation: limited branch prediction\n");
- return sprintf(buf, "Vulnerable\n");
+ return sysfs_emit(buf, "Mitigation: execute trampolines\n");
+ if (nobp_enabled())
+ return sysfs_emit(buf, "Mitigation: limited branch prediction\n");
+ return sysfs_emit(buf, "Vulnerable\n");
}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 23ab9f02f278..2fc40f97c0ad 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -14,9 +14,6 @@
#include <linux/node.h>
#include <asm/numa.h>
-struct pglist_data *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
void __init numa_setup(void)
{
int nid;
@@ -24,12 +21,8 @@ void __init numa_setup(void)
nodes_clear(node_possible_map);
node_set(0, node_possible_map);
node_set_online(0);
- for (nid = 0; nid < MAX_NUMNODES; nid++) {
- NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
- if (!NODE_DATA(nid))
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(pg_data_t), 8);
- }
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8);
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
NODE_DATA(0)->node_id = 0;
}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index a801e6bd5341..c2a468986212 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -15,8 +15,11 @@
#include <asm/checksum.h>
#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/physmem_info.h>
#include <asm/maccess.h>
#include <asm/asm-offsets.h>
+#include <asm/sections.h>
+#include <asm/ipl.h>
/*
* OS info structure has to be page aligned
@@ -43,9 +46,9 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size)
}
/*
- * Add OS info entry and update checksum
+ * Add OS info data entry and update checksum
*/
-void os_info_entry_add(int nr, void *ptr, u64 size)
+void os_info_entry_add_data(int nr, void *ptr, u64 size)
{
os_info.entry[nr].addr = __pa(ptr);
os_info.entry[nr].size = size;
@@ -54,15 +57,36 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
}
/*
+ * Add OS info value entry and update checksum
+ */
+void os_info_entry_add_val(int nr, u64 value)
+{
+ os_info.entry[nr].val = value;
+ os_info.entry[nr].size = 0;
+ os_info.entry[nr].csum = 0;
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
* Initialize OS info structure and set lowcore pointer
*/
void __init os_info_init(void)
{
struct lowcore *abs_lc;
+ BUILD_BUG_ON(sizeof(struct os_info) != PAGE_SIZE);
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
+ os_info_entry_add_val(OS_INFO_IDENTITY_BASE, __identity_base);
+ os_info_entry_add_val(OS_INFO_KASLR_OFFSET, kaslr_offset());
+ os_info_entry_add_val(OS_INFO_KASLR_OFF_PHYS, __kaslr_offset_phys);
+ os_info_entry_add_val(OS_INFO_VMEMMAP, (unsigned long)vmemmap);
+ os_info_entry_add_val(OS_INFO_AMODE31_START, AMODE31_START);
+ os_info_entry_add_val(OS_INFO_AMODE31_END, AMODE31_END);
+ os_info_entry_add_val(OS_INFO_IMAGE_START, (unsigned long)_stext);
+ os_info_entry_add_val(OS_INFO_IMAGE_END, (unsigned long)_end);
+ os_info_entry_add_val(OS_INFO_IMAGE_PHYS, __pa_symbol(_stext));
os_info.csum = os_info_csum(&os_info);
abs_lc = get_abs_lowcore();
abs_lc->os_info = __pa(&os_info);
@@ -125,7 +149,7 @@ static void os_info_old_init(void)
if (os_info_init)
return;
- if (!oldmem_data.start)
+ if (!oldmem_data.start && !is_ipl_type_dump())
goto fail;
if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
goto fail;
@@ -157,7 +181,7 @@ fail:
}
/*
- * Return pointer to os infor entry and its size
+ * Return pointer to os info entry and its size
*/
void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 41ed6e0f0a2a..33205dd410e4 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -22,6 +22,10 @@
#include <asm/hwctrset.h>
#include <asm/debug.h>
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
+#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */
+
enum cpumf_ctr_set {
CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
@@ -428,7 +432,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
case CPUMF_CTR_SET_CRYPTO:
if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
ctrset_size = 16;
- else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+ else if (cpumf_ctr_info.csvn >= 6)
ctrset_size = 20;
break;
case CPUMF_CTR_SET_EXT:
@@ -556,25 +560,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
struct cf_trailer_entry *trailer_start, *trailer_stop;
struct cf_ctrset_entry *ctrstart, *ctrstop;
size_t offset = 0;
+ int i;
- auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
- do {
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+ /* Counter set not authorized */
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue;
+ /* Counter set size zero was not saved */
+ if (!cpum_cf_read_setsize(i))
+ continue;
+
if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
pr_err_once("cpum_cf_diag counter set compare error "
"in set %i\n", ctrstart->set);
return 0;
}
- auth &= ~cpumf_ctr_ctl[ctrstart->set];
if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
cfdiag_diffctrset((u64 *)(ctrstart + 1),
(u64 *)(ctrstop + 1), ctrstart->ctr);
offset += ctrstart->ctr * sizeof(u64) +
sizeof(*ctrstart);
}
- } while (ctrstart->def && auth);
+ }
/* Save time_stamp from start of event in stop's trailer */
trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
@@ -825,7 +835,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
return validate_ctr_version(hwc->config, set);
}
-/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different
* attribute::type values:
* - PERF_TYPE_HARDWARE:
* - pmu->type:
@@ -869,8 +879,8 @@ static int hw_perf_event_reset(struct perf_event *event)
u64 prev, new;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err) {
if (err != 3)
@@ -882,7 +892,7 @@ static int hw_perf_event_reset(struct perf_event *event)
*/
new = 0;
}
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
return err;
}
@@ -892,12 +902,12 @@ static void hw_perf_event_update(struct perf_event *event)
u64 prev, new, delta;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err)
return;
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
delta = (prev <= new) ? new - prev
: (-1ULL - prev) + new + 1; /* overflow */
@@ -971,7 +981,7 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
@@ -1044,7 +1054,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
*
* When a new perf event has been added but not yet started, this can
* clear enable control and resets all counters in a set. Therefore,
- * cpumf_pmu_start() always has to reenable a counter set.
+ * cpumf_pmu_start() always has to re-enable a counter set.
*/
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
if (!atomic_read(&cpuhw->ctr_set[i]))
@@ -1688,7 +1698,6 @@ static const struct file_operations cfset_fops = {
.release = cfset_release,
.unlocked_ioctl = cfset_ioctl,
.compat_ioctl = cfset_ioctl,
- .llseek = no_llseek
};
static struct miscdevice cfset_dev = {
@@ -1854,7 +1863,7 @@ static const struct attribute_group *cfdiag_attr_groups[] = {
/* Performance monitoring unit for event CF_DIAG. Since this event
* is also started and stopped via the perf_event_open() system call, use
* the same event enable/disable call back functions. They do not
- * have a pointer to the perf_event strcture as first parameter.
+ * have a pointer to the perf_event structure as first parameter.
*
* The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
* Reuse them and distinguish the event (always first parameter) via
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 0d64aafd158f..e4a6bfc91080 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -855,16 +855,11 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
}
/* Determine version specific crypto set */
- switch (ci.csvn) {
- case 1 ... 5:
+ csvn = none;
+ if (ci.csvn >= 1 && ci.csvn <= 5)
csvn = cpumcf_svn_12345_pmu_event_attr;
- break;
- case 6 ... 7:
+ else if (ci.csvn >= 6)
csvn = cpumcf_svn_67_pmu_event_attr;
- break;
- default:
- csvn = none;
- }
/* Determine model-specific counter set(s) */
get_cpu_id(&cpu_id);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 06efad5b4f93..5f60248cb468 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -24,6 +24,22 @@
#include <asm/timex.h>
#include <linux/io.h>
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */
+
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPL_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
+
/* Minimum number of sample-data-block-tables:
* At least one table is required for the sampling buffer structure.
* A single table contains up to 511 pointers to sample-data-blocks.
@@ -113,17 +129,6 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
return USEC_PER_SEC * qsi->cpu_speed / rate;
}
-/* Return TOD timestamp contained in an trailer entry */
-static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
-{
- /* TOD in STCKE format */
- if (te->header.t)
- return *((unsigned long long *)&te->timestamp[1]);
-
- /* TOD in STCK format */
- return *((unsigned long long *)&te->timestamp[0]);
-}
-
/* Return pointer to trailer entry of an sample data block */
static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
{
@@ -154,12 +159,12 @@ static inline unsigned long *get_next_sdbt(unsigned long *s)
/*
* sf_disable() - Switch off sampling facility
*/
-static int sf_disable(void)
+static void sf_disable(void)
{
struct hws_lsctl_request_block sreq;
memset(&sreq, 0, sizeof(sreq));
- return lsctl(&sreq);
+ lsctl(&sreq);
}
/*
@@ -175,41 +180,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
*/
static void free_sampling_buffer(struct sf_buffer *sfb)
{
- unsigned long *sdbt, *curr;
-
- if (!sfb->sdbt)
- return;
+ unsigned long *sdbt, *curr, *head;
sdbt = sfb->sdbt;
- curr = sdbt;
-
+ if (!sdbt)
+ return;
+ sfb->sdbt = NULL;
/* Free the SDBT after all SDBs are processed... */
- while (1) {
- if (!*curr || !sdbt)
- break;
-
- /* Process table-link entries */
+ head = sdbt;
+ curr = sdbt;
+ do {
if (is_link_entry(curr)) {
+ /* Process table-link entries */
curr = get_next_sdbt(curr);
- if (sdbt)
- free_page((unsigned long)sdbt);
-
- /* If the origin is reached, sampling buffer is freed */
- if (curr == sfb->sdbt)
- break;
- else
- sdbt = curr;
+ free_page((unsigned long)sdbt);
+ sdbt = curr;
} else {
/* Process SDB pointer */
- if (*curr) {
- free_page((unsigned long)phys_to_virt(*curr));
- curr++;
- }
+ free_page((unsigned long)phys_to_virt(*curr));
+ curr++;
}
- }
-
- debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
- (unsigned long)sfb->sdbt);
+ } while (curr != head);
memset(sfb, 0, sizeof(*sfb));
}
@@ -265,10 +256,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* the sampling buffer origin.
*/
if (sfb->sdbt != get_next_sdbt(tail)) {
- debug_sprintf_event(sfdbg, 3, "%s: "
- "sampling buffer is not linked: origin %#lx"
- " tail %#lx\n", __func__,
- (unsigned long)sfb->sdbt,
+ debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n",
+ __func__, (unsigned long)sfb->sdbt,
(unsigned long)tail);
return -EINVAL;
}
@@ -318,9 +307,6 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
*tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
- debug_sprintf_event(sfdbg, 4, "%s: new buffer"
- " settings: sdbt %lu sdb %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
return rc;
}
@@ -357,15 +343,8 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
/* Allocate requested number of sample-data-blocks */
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
- if (rc) {
+ if (rc)
free_sampling_buffer(sfb);
- debug_sprintf_event(sfdbg, 4, "%s: "
- "realloc_sampling_buffer failed with rc %i\n",
- __func__, rc);
- } else
- debug_sprintf_event(sfdbg, 4,
- "%s: tear %#lx dear %#lx\n", __func__,
- (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
return rc;
}
@@ -377,8 +356,8 @@ static void sfb_set_limits(unsigned long min, unsigned long max)
CPUM_SF_MAX_SDB = max;
memset(&si, 0, sizeof(si));
- if (!qsi(&si))
- CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+ qsi(&si);
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
}
static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
@@ -397,12 +376,6 @@ static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
return 0;
}
-static int sfb_has_pending_allocs(struct sf_buffer *sfb,
- struct hw_perf_event *hwc)
-{
- return sfb_pending_allocs(sfb, hwc) > 0;
-}
-
static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
{
/* Limit the number of SDBs to not exceed the maximum */
@@ -419,14 +392,13 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
{
- if (cpuhw->sfb.sdbt)
+ if (sf_buffer_available(cpuhw))
free_sampling_buffer(&cpuhw->sfb);
}
static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
{
unsigned long n_sdb, freq;
- size_t sample_size;
/* Calculate sampling buffers using 4K pages
*
@@ -457,7 +429,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
* ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
* to 511 SDBs).
*/
- sample_size = sizeof(struct hws_basic_entry);
freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
@@ -473,12 +444,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
if (sf_buffer_available(cpuhw))
return 0;
- debug_sprintf_event(sfdbg, 3,
- "%s: rate %lu f %lu sdb %lu/%lu"
- " sample_size %lu cpuhw %p\n", __func__,
- SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
- sample_size, cpuhw);
-
return alloc_sampling_buffer(&cpuhw->sfb,
sfb_pending_allocs(&cpuhw->sfb, hwc));
}
@@ -535,8 +500,6 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
if (num)
sfb_account_allocs(num, hwc);
- debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
- __func__, OVERFLOW_REG(hwc), ratio, num);
OVERFLOW_REG(hwc) = 0;
}
@@ -554,13 +517,11 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
static void extend_sampling_buffer(struct sf_buffer *sfb,
struct hw_perf_event *hwc)
{
- unsigned long num, num_old;
- int rc;
+ unsigned long num;
num = sfb_pending_allocs(sfb, hwc);
if (!num)
return;
- num_old = sfb->num_sdb;
/* Disable the sampling facility to reset any states and also
* clear pending measurement alerts.
@@ -572,51 +533,32 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
* called by perf. Because this is a reallocation, it is fine if the
* new SDB-request cannot be satisfied immediately.
*/
- rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
- if (rc)
- debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
- __func__, rc);
-
- if (sfb_has_pending_allocs(sfb, hwc))
- debug_sprintf_event(sfdbg, 5, "%s: "
- "req %lu alloc %lu remaining %lu\n",
- __func__, num, sfb->num_sdb - num_old,
- sfb_pending_allocs(sfb, hwc));
+ realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
}
/* Number of perf events counting hardware events */
-static atomic_t num_events;
+static refcount_t num_events;
/* Used to avoid races in calling reserve/release_cpumf_hardware */
static DEFINE_MUTEX(pmc_reserve_mutex);
#define PMC_INIT 0
#define PMC_RELEASE 1
-#define PMC_FAILURE 2
static void setup_pmc_cpu(void *flags)
{
- struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
- int err = 0;
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ sf_disable();
switch (*((int *)flags)) {
case PMC_INIT:
- memset(cpusf, 0, sizeof(*cpusf));
- err = qsi(&cpusf->qsi);
- if (err)
- break;
- cpusf->flags |= PMU_F_RESERVED;
- err = sf_disable();
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ qsi(&cpuhw->qsi);
+ cpuhw->flags |= PMU_F_RESERVED;
break;
case PMC_RELEASE:
- cpusf->flags &= ~PMU_F_RESERVED;
- err = sf_disable();
- if (!err)
- deallocate_buffers(cpusf);
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ deallocate_buffers(cpuhw);
break;
}
- if (err) {
- *((int *)flags) |= PMC_FAILURE;
- pr_err("Switching off the sampling facility failed with rc %i\n", err);
- }
}
static void release_pmc_hardware(void)
@@ -627,27 +569,19 @@ static void release_pmc_hardware(void)
on_each_cpu(setup_pmc_cpu, &flags, 1);
}
-static int reserve_pmc_hardware(void)
+static void reserve_pmc_hardware(void)
{
int flags = PMC_INIT;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- if (flags & PMC_FAILURE) {
- release_pmc_hardware();
- return -ENODEV;
- }
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
}
static void hw_perf_event_destroy(struct perf_event *event)
{
/* Release PMC if this is the last perf event */
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- release_pmc_hardware();
+ if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) {
+ release_pmc_hardware();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -751,9 +685,6 @@ static unsigned long getrate(bool freq, unsigned long sample,
*/
if (sample_rate_to_freq(si, rate) >
sysctl_perf_event_sample_rate) {
- debug_sprintf_event(sfdbg, 1, "%s: "
- "Sampling rate exceeds maximum "
- "perf sample rate\n", __func__);
rate = 0;
}
}
@@ -798,9 +729,6 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
attr->sample_period = rate;
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
- debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
- __func__, event->cpu, event->attr.sample_period,
- event->attr.freq, SAMPLE_FREQ_MODE(hwc));
return 0;
}
@@ -810,23 +738,16 @@ static int __hw_perf_event_init(struct perf_event *event)
struct hws_qsi_info_block si;
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
- int cpu, err;
+ int cpu, err = 0;
/* Reserve CPU-measurement sampling facility */
- err = 0;
- if (!atomic_inc_not_zero(&num_events)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
- err = -EBUSY;
- else
- atomic_inc(&num_events);
- mutex_unlock(&pmc_reserve_mutex);
+ mutex_lock(&pmc_reserve_mutex);
+ if (!refcount_inc_not_zero(&num_events)) {
+ reserve_pmc_hardware();
+ refcount_set(&num_events, 1);
}
event->destroy = hw_perf_event_destroy;
- if (err)
- goto out;
-
/* Access per-CPU sampling information (query sampling info) */
/*
* The event->cpu value can be -1 to count on every CPU, for example,
@@ -838,9 +759,9 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
memset(&si, 0, sizeof(si));
cpuhw = NULL;
- if (event->cpu == -1)
+ if (event->cpu == -1) {
qsi(&si);
- else {
+ } else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
*/
@@ -881,13 +802,9 @@ static int __hw_perf_event_init(struct perf_event *event)
if (err)
goto out;
- /* Initialize sample data overflow accounting */
- hwc->extra_reg.reg = REG_OVERFLOW;
- OVERFLOW_REG(hwc) = 0;
-
/* Use AUX buffer. No need to allocate it by ourself */
if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
- return 0;
+ goto out;
/* Allocate the per-CPU sampling buffer using the CPU information
* from the event. If the event is not pinned to a particular
@@ -917,6 +834,7 @@ static int __hw_perf_event_init(struct perf_event *event)
if (is_default_overflow_handler(event))
event->overflow_handler = cpumsf_output_event_pid;
out:
+ mutex_unlock(&pmc_reserve_mutex);
return err;
}
@@ -979,10 +897,14 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
struct hw_perf_event *hwc;
int err;
- if (cpuhw->flags & PMU_F_ENABLED)
- return;
-
- if (cpuhw->flags & PMU_F_ERR_MASK)
+ /*
+ * Event must be
+ * - added/started on this CPU (PMU_F_IN_USE set)
+ * - and CPU must be available (PMU_F_RESERVED set)
+ * - and not already enabled (PMU_F_ENABLED not set)
+ * - and not in error condition (PMU_F_ERR_MASK not set)
+ */
+ if (cpuhw->flags != (PMU_F_IN_USE | PMU_F_RESERVED))
return;
/* Check whether to extent the sampling buffer.
@@ -996,39 +918,27 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
* facility, but it can be fully re-enabled using sampling controls that
* have been saved in cpumsf_pmu_disable().
*/
- if (cpuhw->event) {
- hwc = &cpuhw->event->hw;
- if (!(SAMPL_DIAG_MODE(hwc))) {
- /*
- * Account number of overflow-designated
- * buffer extents
- */
- sfb_account_overflows(cpuhw, hwc);
- extend_sampling_buffer(&cpuhw->sfb, hwc);
- }
- /* Rate may be adjusted with ioctl() */
- cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+ hwc = &cpuhw->event->hw;
+ if (!(SAMPL_DIAG_MODE(hwc))) {
+ /*
+ * Account number of overflow-designated buffer extents
+ */
+ sfb_account_overflows(cpuhw, hwc);
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
}
+ /* Rate may be adjusted with ioctl() */
+ cpuhw->lsctl.interval = SAMPL_RATE(hwc);
/* (Re)enable the PMU and sampling facility */
- cpuhw->flags |= PMU_F_ENABLED;
- barrier();
-
err = lsctl(&cpuhw->lsctl);
if (err) {
- cpuhw->flags &= ~PMU_F_ENABLED;
pr_err("Loading sampling controls failed: op 1 err %i\n", err);
return;
}
/* Load current program parameter */
- lpp(&S390_lowcore.lpp);
-
- debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
- "interval %#lx tear %#lx dear %#lx\n", __func__,
- cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
- cpuhw->lsctl.cd, cpuhw->lsctl.interval,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+ lpp(&get_lowcore()->lpp);
+ cpuhw->flags |= PMU_F_ENABLED;
}
static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1055,26 +965,23 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
return;
}
- /* Save state of TEAR and DEAR register contents */
- err = qsi(&si);
- if (!err) {
- /* TEAR/DEAR values are valid only if the sampling facility is
- * enabled. Note that cpumsf_pmu_disable() might be called even
- * for a disabled sampling facility because cpumsf_pmu_enable()
- * controls the enable/disable state.
- */
- if (si.es) {
- cpuhw->lsctl.tear = si.tear;
- cpuhw->lsctl.dear = si.dear;
- }
- } else
- debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
- __func__, err);
+ /*
+ * Save state of TEAR and DEAR register contents.
+ * TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ qsi(&si);
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
cpuhw->flags &= ~PMU_F_ENABLED;
}
-/* perf_exclude_event() - Filter event
+/* perf_event_exclude() - Filter event
* @event: The perf event
* @regs: pt_regs structure
* @sde_regs: Sample-data-entry (sde) regs structure
@@ -1083,7 +990,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
*
* Return non-zero if the event shall be excluded.
*/
-static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
struct perf_sf_sde_regs *sde_regs)
{
if (event->attr.exclude_user && user_mode(regs))
@@ -1166,7 +1073,7 @@ static int perf_push_sample(struct perf_event *event,
data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
overflow = 0;
- if (perf_exclude_event(event, &regs, sde_regs))
+ if (perf_event_exclude(event, &regs, sde_regs))
goto out;
if (perf_event_overflow(event, &data, &regs)) {
overflow = 1;
@@ -1235,11 +1142,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
/* Count discarded samples */
*overflow += 1;
} else {
- debug_sprintf_event(sfdbg, 4,
- "%s: Found unknown"
- " sampling data entry: te->f %i"
- " basic.def %#4x (%p)\n", __func__,
- te->header.f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1274,8 +1176,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
static void hw_perf_event_update(struct perf_event *event, int flush_all)
{
unsigned long long event_overflow, sampl_overflow, num_sdb;
- union hws_trailer_header old, prev, new;
struct hw_perf_event *hwc = &event->hw;
+ union hws_trailer_header prev, new;
struct hws_trailer_entry *te;
unsigned long *sdbt, sdb;
int done;
@@ -1284,7 +1186,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
* AUX buffer is used when in diagnostic sampling mode.
* No perf events/samples are created.
*/
- if (SAMPL_DIAG_MODE(&event->hw))
+ if (SAMPL_DIAG_MODE(hwc))
return;
sdbt = (unsigned long *)TEAR_REG(hwc);
@@ -1309,13 +1211,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
*/
sampl_overflow += te->header.overflow;
- /* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
- "overflow %llu timestamp %#llx\n",
- __func__, sdb, (unsigned long)sdbt,
- te->header.overflow,
- (te->header.f) ? trailer_timestamp(te) : 0ULL);
-
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
* is stopped and remaining samples will be discarded.
@@ -1326,13 +1221,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
/* Reset trailer (using compare-double-and-swap) */
prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- old.val = prev.val;
new.val = prev.val;
new.f = 0;
new.a = 1;
new.overflow = 0;
- prev.val = cmpxchg128(&te->header.val, old.val, new.val);
- } while (prev.val != old.val);
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
/* Advance to next sample-data-block */
sdbt++;
@@ -1340,7 +1233,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sdbt = get_next_sdbt(sdbt);
/* Update event hardware registers */
- TEAR_REG(hwc) = (unsigned long) sdbt;
+ TEAR_REG(hwc) = (unsigned long)sdbt;
/* Stop processing sample-data if all samples of the current
* sample-data-block were flushed even if it was not full.
@@ -1362,19 +1255,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
* are dropped.
* Slightly increase the interval to avoid hitting this limit.
*/
- if (event_overflow) {
+ if (event_overflow)
SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
- debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
- __func__,
- DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
- }
-
- if (sampl_overflow || event_overflow)
- debug_sprintf_event(sfdbg, 4, "%s: "
- "overflows: sample %llu event %llu"
- " total %llu num_sdb %llu\n",
- __func__, sampl_overflow, event_overflow,
- OVERFLOW_REG(hwc), num_sdb);
}
static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
@@ -1442,9 +1324,6 @@ static void aux_output_end(struct perf_output_handle *handle)
/* Remove alert indicators in the buffer */
te = aux_sdb_trailer(aux, aux->alert_mark);
te->header.a = 0;
-
- debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
- __func__, i, range_scan, aux->head);
}
/*
@@ -1463,7 +1342,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
unsigned long range, i, range_scan, idx, head, base, offset;
struct hws_trailer_entry *te;
- if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+ if (handle->head & ~PAGE_MASK)
return -EINVAL;
aux->head = handle->head >> PAGE_SHIFT;
@@ -1475,10 +1354,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
* SDBs between aux->head and aux->empty_mark are already ready
* for new data. range_scan is num of SDBs not within them.
*/
- debug_sprintf_event(sfdbg, 6,
- "%s: range %ld head %ld alert %ld empty %ld\n",
- __func__, range, aux->head, aux->alert_mark,
- aux->empty_mark);
if (range > aux_sdb_num_empty(aux)) {
range_scan = range - aux_sdb_num_empty(aux);
idx = aux->empty_mark + 1;
@@ -1504,12 +1379,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
- "index %ld tear %#lx dear %#lx\n", __func__,
- aux->head, aux->alert_mark, aux->empty_mark,
- head / CPUM_SF_SDB_PER_TABLE,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
-
return 0;
}
@@ -1522,16 +1391,15 @@ static int aux_output_begin(struct perf_output_handle *handle,
static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
unsigned long long *overflow)
{
- union hws_trailer_header old, prev, new;
+ union hws_trailer_header prev, new;
struct hws_trailer_entry *te;
te = aux_sdb_trailer(aux, alert_index);
prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- old.val = prev.val;
new.val = prev.val;
- *overflow = old.overflow;
- if (old.f) {
+ *overflow = prev.overflow;
+ if (prev.f) {
/*
* SDB is already set by hardware.
* Abort and try to set somewhere
@@ -1541,8 +1409,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
}
new.a = 1;
new.overflow = 0;
- prev.val = cmpxchg128(&te->header.val, old.val, new.val);
- } while (prev.val != old.val);
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
return true;
}
@@ -1571,14 +1438,11 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
- unsigned long i, range_scan, idx, idx_old;
- union hws_trailer_header old, prev, new;
+ union hws_trailer_header prev, new;
+ unsigned long i, range_scan, idx;
unsigned long long orig_overflow;
struct hws_trailer_entry *te;
- debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
- "empty %ld\n", __func__, range, aux->head,
- aux->alert_mark, aux->empty_mark);
if (range <= aux_sdb_num_empty(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
@@ -1601,31 +1465,26 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
* indicator fall into this range, set it.
*/
range_scan = range - aux_sdb_num_empty(aux);
- idx_old = idx = aux->empty_mark + 1;
+ idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- old.val = prev.val;
new.val = prev.val;
- orig_overflow = old.overflow;
+ orig_overflow = prev.overflow;
new.f = 0;
new.overflow = 0;
if (idx == aux->alert_mark)
new.a = 1;
else
new.a = 0;
- prev.val = cmpxchg128(&te->header.val, old.val, new.val);
- } while (prev.val != old.val);
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
*overflow += orig_overflow;
}
/* Update empty_mark to new position */
aux->empty_mark = aux->head + range - 1;
- debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
- "empty %ld\n", __func__, range_scan, idx_old,
- idx - 1, aux->empty_mark);
return true;
}
@@ -1642,12 +1501,12 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
unsigned long num_sdb;
aux = perf_get_aux(handle);
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Inform user space new data arrived */
size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
- debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+ debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__,
size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
@@ -1661,7 +1520,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
num_sdb);
break;
}
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Update head and alert_mark to new position */
@@ -1681,23 +1540,11 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
"pages to overflow\n", aux->sfb.num_sdb);
- debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
- "overflow %lld\n", __func__,
- aux->head, range, overflow);
} else {
size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "already full, try another\n",
- __func__,
- aux->head, aux->alert_mark);
}
}
-
- if (done)
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "empty %ld\n", __func__, aux->head,
- aux->alert_mark, aux->empty_mark);
}
/*
@@ -1719,8 +1566,6 @@ static void aux_buffer_free(void *data)
kfree(aux->sdbt_index);
kfree(aux->sdb_index);
kfree(aux);
-
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
}
static void aux_sdb_init(unsigned long sdb)
@@ -1828,9 +1673,6 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
*/
aux->empty_mark = sfb->num_sdb - 1;
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
-
return aux;
no_sdbt:
@@ -1863,8 +1705,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
memset(&si, 0, sizeof(si));
if (event->cpu == -1) {
- if (qsi(&si))
- return -ENODEV;
+ qsi(&si);
} else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
@@ -1874,7 +1715,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
si = cpuhw->qsi;
}
- do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+ do_freq = !!SAMPL_FREQ_MODE(&event->hw);
rate = getrate(do_freq, value, &si);
if (!rate)
return -EINVAL;
@@ -1882,10 +1723,6 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
event->attr.sample_period = rate;
SAMPL_RATE(&event->hw) = rate;
hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
- debug_sprintf_event(sfdbg, 4, "%s:"
- " cpu %d value %#llx period %#llx freq %d\n",
- __func__, event->cpu, value,
- event->attr.sample_period, do_freq);
return 0;
}
@@ -1896,12 +1733,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ if (!(event->hw.state & PERF_HES_STOPPED))
return;
-
- if (flags & PERF_EF_RELOAD)
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
perf_pmu_disable(event->pmu);
event->hw.state = 0;
cpuhw->lsctl.cs = 1;
@@ -1926,7 +1759,9 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
event->hw.state |= PERF_HES_STOPPED;
if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
- hw_perf_event_update(event, 1);
+ /* CPU hotplug off removes SDBs. No samples to extract. */
+ if (cpuhw->flags & PMU_F_RESERVED)
+ hw_perf_event_update(event, 1);
event->hw.state |= PERF_HES_UPTODATE;
}
perf_pmu_enable(event->pmu);
@@ -1936,15 +1771,14 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
struct aux_buffer *aux;
- int err;
+ int err = 0;
if (cpuhw->flags & PMU_F_IN_USE)
return -EAGAIN;
- if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
+ if (!SAMPL_DIAG_MODE(&event->hw) && !sf_buffer_available(cpuhw))
return -EINVAL;
- err = 0;
perf_pmu_disable(event->pmu);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -2104,18 +1938,17 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Program alert request */
if (alert & CPU_MF_INT_SF_PRA) {
- if (cpuhw->flags & PMU_F_IN_USE)
+ if (cpuhw->flags & PMU_F_IN_USE) {
if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
hw_collect_aux(cpuhw);
else
hw_perf_event_update(cpuhw->event, 0);
- else
- WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
}
/* Report measurement alerts only for non-PRA codes */
if (alert != CPU_MF_INT_SF_PRA)
- debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+ debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__,
alert);
/* Sampling authorization change request */
@@ -2131,7 +1964,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Invalid sampling buffer entry */
if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
- pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ pr_err("A sampling buffer entry is incorrect (alert=%#x)\n",
alert);
cpuhw->flags |= PMU_F_ERR_IBE;
sf_disable();
@@ -2143,7 +1976,7 @@ static int cpusf_pmu_setup(unsigned int cpu, int flags)
/* Ignore the notification if no events are scheduled on the PMU.
* This might be racy...
*/
- if (!atomic_read(&num_events))
+ if (!refcount_read(&num_events))
return 0;
local_irq_disable();
@@ -2205,10 +2038,12 @@ static const struct kernel_param_ops param_ops_sfb_size = {
.get = param_get_sfb_size,
};
-#define RS_INIT_FAILURE_QSI 0x0001
-#define RS_INIT_FAILURE_BSDES 0x0002
-#define RS_INIT_FAILURE_ALRT 0x0003
-#define RS_INIT_FAILURE_PERF 0x0004
+enum {
+ RS_INIT_FAILURE_BSDES = 2, /* Bad basic sampling size */
+ RS_INIT_FAILURE_ALRT = 3, /* IRQ registration failure */
+ RS_INIT_FAILURE_PERF = 4 /* PMU registration failure */
+};
+
static void __init pr_cpumsf_err(unsigned int reason)
{
pr_err("Sampling facility support for perf is not available: "
@@ -2224,11 +2059,7 @@ static int __init init_cpum_sampling_pmu(void)
return -ENODEV;
memset(&si, 0, sizeof(si));
- if (qsi(&si)) {
- pr_cpumsf_err(RS_INIT_FAILURE_QSI);
- return -ENODEV;
- }
-
+ qsi(&si);
if (!si.as && !si.ad)
return -ENODEV;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index dfa77da2fd2e..2b9611c4718e 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -57,7 +57,7 @@ static unsigned long instruction_pointer_guest(struct pt_regs *regs)
return sie_block(regs)->gpsw.addr;
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{
return is_in_guest(regs) ? instruction_pointer_guest(regs)
: instruction_pointer(regs);
@@ -84,7 +84,7 @@ static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
return flags;
}
-unsigned long perf_misc_flags(struct pt_regs *regs)
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{
/* Check if the cpum_sf PMU has created the pt_regs structure.
* In this case, perf misc flags can be easily extracted. Otherwise,
@@ -218,39 +218,7 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- struct stack_frame_user __user *sf;
- unsigned long ip, sp;
- bool first = true;
-
- if (is_compat_task())
- return;
- perf_callchain_store(entry, instruction_pointer(regs));
- sf = (void __user *)user_stack_pointer(regs);
- pagefault_disable();
- while (entry->nr < entry->max_stack) {
- if (__get_user(sp, &sf->back_chain))
- break;
- if (__get_user(ip, &sf->gprs[8]))
- break;
- if (ip & 0x1) {
- /*
- * If the instruction address is invalid, and this
- * is the first stack frame, assume r14 has not
- * been written to the stack yet. Otherwise exit.
- */
- if (first && !(regs->gprs[14] & 0x1))
- ip = regs->gprs[14];
- else
- break;
- }
- perf_callchain_store(entry, ip);
- /* Sanity check: ABI requires SP to be aligned 8 bytes. */
- if (!sp || sp & 0x7)
- break;
- sf = (void __user *)sp;
- first = false;
- }
- pagefault_enable();
+ arch_stack_walk_user_common(NULL, NULL, entry, regs, true);
}
/* Perf definitions for PMU event attributes in sysfs */
@@ -260,5 +228,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
- return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+ return sysfs_emit(page, "event=0x%04llx\n", pmu_attr->id);
}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 4ad472d130a3..10725f5a6f0f 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -36,8 +36,8 @@ struct paicrypt_map {
struct pai_userdata *save; /* Page to store no-zero counters */
unsigned int active_events; /* # of PAI crypto users */
refcount_t refcnt; /* Reference count mapped buffers */
- enum paievt_mode mode; /* Type of event */
struct perf_event *event; /* Perf event for sampling */
+ struct list_head syswide_list; /* List system-wide sampling events */
};
struct paicrypt_mapptr {
@@ -84,20 +84,16 @@ static DEFINE_MUTEX(pai_reserve_mutex);
/* Adjust usage counters and remove allocated memory when all users are
* gone.
*/
-static void paicrypt_event_destroy(struct perf_event *event)
+static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu)
{
- struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr,
- event->cpu);
+ struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
struct paicrypt_map *cpump = mp->mapptr;
- static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
- debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
- " mode %d refcnt %u\n", __func__,
- event->attr.config, event->cpu,
- cpump->active_events, cpump->mode,
+ debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d "
+ "refcnt %u\n", __func__, event->attr.config,
+ event->cpu, cpump->active_events,
refcount_read(&cpump->refcnt));
- free_page(PAI_SAVE_AREA(event));
if (refcount_dec_and_test(&cpump->refcnt)) {
debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
__func__, (unsigned long)cpump->page,
@@ -111,6 +107,23 @@ static void paicrypt_event_destroy(struct perf_event *event)
mutex_unlock(&pai_reserve_mutex);
}
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ static_branch_dec(&pai_key);
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+
+ for_each_cpu(cpu, mask)
+ paicrypt_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ paicrypt_event_destroy_cpu(event, event->cpu);
+ }
+}
+
static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
{
if (kernel)
@@ -156,23 +169,15 @@ static u64 paicrypt_getall(struct perf_event *event)
return sum;
}
-/* Used to avoid races in checking concurrent access of counting and
- * sampling for crypto events
- *
- * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
- * allowed and when this event is running, no counting event is allowed.
- * Several counting events are allowed in parallel, but no sampling event
- * is allowed while one (or more) counting events are running.
- *
+/* Check concurrent access of counting and sampling for crypto events.
* This function is called in process context and it is save to block.
* When the event initialization functions fails, no other call back will
* be invoked.
*
* Allocate the memory for the event.
*/
-static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
+static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu)
{
- struct perf_event_attr *a = &event->attr;
struct paicrypt_map *cpump = NULL;
struct paicrypt_mapptr *mp;
int rc;
@@ -185,7 +190,7 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
goto unlock;
/* Allocate node for this event */
- mp = per_cpu_ptr(paicrypt_root.mapptr, event->cpu);
+ mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
cpump = mp->mapptr;
if (!cpump) { /* Paicrypt_map allocated? */
cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
@@ -193,25 +198,9 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
rc = -ENOMEM;
goto free_root;
}
+ INIT_LIST_HEAD(&cpump->syswide_list);
}
- if (a->sample_period) { /* Sampling requested */
- if (cpump->mode != PAI_MODE_NONE)
- rc = -EBUSY; /* ... sampling/counting active */
- } else { /* Counting requested */
- if (cpump->mode == PAI_MODE_SAMPLING)
- rc = -EBUSY; /* ... and sampling active */
- }
- /*
- * This error case triggers when there is a conflict:
- * Either sampling requested and counting already active, or visa
- * versa. Therefore the struct paicrypto_map for this CPU is
- * needed or the error could not have occurred. Only adjust root
- * node refcount.
- */
- if (rc)
- goto free_root;
-
/* Allocate memory for counter page and counter extraction.
* Only the first counting event has to allocate a page.
*/
@@ -235,26 +224,58 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
/* Set mode and reference count */
rc = 0;
refcount_set(&cpump->refcnt, 1);
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING;
mp->mapptr = cpump;
- debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
- " mode %d refcnt %u page %#lx save %p rc %d\n",
- __func__, a->sample_period, cpump->active_events,
- cpump->mode, refcount_read(&cpump->refcnt),
+ debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx "
+ "save %p rc %d\n", __func__, cpump->active_events,
+ refcount_read(&cpump->refcnt),
(unsigned long)cpump->page, cpump->save, rc);
goto unlock;
free_paicrypt_map:
+ /* Undo memory allocation */
kfree(cpump);
mp->mapptr = NULL;
free_root:
paicrypt_root_free();
-
unlock:
mutex_unlock(&pai_reserve_mutex);
return rc ? ERR_PTR(rc) : cpump;
}
+static int paicrypt_event_init_all(struct perf_event *event)
+{
+ struct paicrypt_map *cpump;
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ cpump = paicrypt_busy(event, cpu);
+ if (IS_ERR(cpump)) {
+ for_each_cpu(cpu, maskptr)
+ paicrypt_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ rc = PTR_ERR(cpump);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in paicrypt_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
+ return rc;
+}
+
/* Might be called on different CPU than the one the event is intended for. */
static int paicrypt_event_init(struct perf_event *event)
{
@@ -269,10 +290,7 @@ static int paicrypt_event_init(struct perf_event *event)
if (a->config < PAI_CRYPTO_BASE ||
a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
return -EINVAL;
- /* Allow only CPU wide operation, no process context for now. */
- if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
- return -ENOENT;
- /* Allow only CRYPTO_ALL for sampling. */
+ /* Allow only CRYPTO_ALL for sampling */
if (a->sample_period && a->config != PAI_CRYPTO_BASE)
return -EINVAL;
/* Get a page to store last counter values for sampling */
@@ -284,13 +302,17 @@ static int paicrypt_event_init(struct perf_event *event)
}
}
- cpump = paicrypt_busy(event);
- if (IS_ERR(cpump)) {
+ if (event->cpu >= 0) {
+ cpump = paicrypt_busy(event, event->cpu);
+ if (IS_ERR(cpump))
+ rc = PTR_ERR(cpump);
+ } else {
+ rc = paicrypt_event_init_all(event);
+ }
+ if (rc) {
free_page(PAI_SAVE_AREA(event));
- rc = PTR_ERR(cpump);
goto out;
}
-
event->destroy = paicrypt_event_destroy;
if (a->sample_period) {
@@ -331,8 +353,14 @@ static void paicrypt_start(struct perf_event *event, int flags)
sum = paicrypt_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
} else { /* Sampling */
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
}
}
@@ -344,7 +372,7 @@ static int paicrypt_add(struct perf_event *event, int flags)
if (++cpump->active_events == 1) {
ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
- WRITE_ONCE(S390_lowcore.ccd, ccd);
+ WRITE_ONCE(get_lowcore()->ccd, ccd);
local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
}
if (flags & PERF_EF_START)
@@ -353,6 +381,7 @@ static int paicrypt_add(struct perf_event *event, int flags)
return 0;
}
+static void paicrypt_have_sample(struct perf_event *, struct paicrypt_map *);
static void paicrypt_stop(struct perf_event *event, int flags)
{
struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
@@ -361,8 +390,13 @@ static void paicrypt_stop(struct perf_event *event, int flags)
if (!event->attr.sample_period) { /* Counting */
paicrypt_read(event);
} else { /* Sampling */
- perf_sched_cb_dec(event->pmu);
- cpump->event = NULL;
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ perf_sched_cb_dec(event->pmu);
+ list_del(PAI_SWLIST(event));
+ } else {
+ paicrypt_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
}
event->hw.state = PERF_HES_STOPPED;
}
@@ -375,7 +409,7 @@ static void paicrypt_del(struct perf_event *event, int flags)
paicrypt_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
- WRITE_ONCE(S390_lowcore.ccd, 0);
+ WRITE_ONCE(get_lowcore()->ccd, 0);
}
}
@@ -444,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
@@ -455,23 +489,30 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
}
/* Check if there is data to be saved on schedule out of a task. */
-static int paicrypt_have_sample(void)
+static void paicrypt_have_sample(struct perf_event *event,
+ struct paicrypt_map *cpump)
{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
size_t rawsize;
- int rc = 0;
if (!event) /* No event active */
- return 0;
+ return;
rawsize = paicrypt_copy(cpump->save, cpump->page,
(unsigned long *)PAI_SAVE_AREA(event),
- cpump->event->attr.exclude_user,
- cpump->event->attr.exclude_kernel);
+ event->attr.exclude_user,
+ event->attr.exclude_kernel);
if (rawsize) /* No incremented counters */
- rc = paicrypt_push_sample(rawsize, cpump, event);
- return rc;
+ paicrypt_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paicrypt_have_samples(void)
+{
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ paicrypt_have_sample(event, cpump);
}
/* Called on schedule-in and schedule-out. No access to event structure,
@@ -480,10 +521,10 @@ static int paicrypt_have_sample(void)
static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
+ * results on schedule_out and if page was dirty, save old values.
*/
if (!sched_in)
- paicrypt_have_sample();
+ paicrypt_have_samples();
}
/* Attribute definitions for paicrypt interface. As with other CPU
@@ -527,7 +568,7 @@ static const struct attribute_group *paicrypt_attr_groups[] = {
/* Performance monitoring unit for mapped counters */
static struct pmu paicrypt = {
- .task_ctx_nr = perf_invalid_context,
+ .task_ctx_nr = perf_hw_context,
.event_init = paicrypt_event_init,
.add = paicrypt_add,
.del = paicrypt_del,
@@ -697,6 +738,22 @@ static const char * const paicrypt_ctrnames[] = {
[154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
[155] = "IBM_RESERVED_155",
[156] = "IBM_RESERVED_156",
+ [157] = "KM_FULL_XTS_AES_128",
+ [158] = "KM_FULL_XTS_AES_256",
+ [159] = "KM_FULL_XTS_ENCRYPTED_AES_128",
+ [160] = "KM_FULL_XTS_ENCRYPTED_AES_256",
+ [161] = "KMAC_HMAC_SHA_224",
+ [162] = "KMAC_HMAC_SHA_256",
+ [163] = "KMAC_HMAC_SHA_384",
+ [164] = "KMAC_HMAC_SHA_512",
+ [165] = "KMAC_HMAC_ENCRYPTED_SHA_224",
+ [166] = "KMAC_HMAC_ENCRYPTED_SHA_256",
+ [167] = "KMAC_HMAC_ENCRYPTED_SHA_384",
+ [168] = "KMAC_HMAC_ENCRYPTED_SHA_512",
+ [169] = "PCKMO_ENCRYPT_HMAC_512_KEY",
+ [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY",
+ [171] = "PCKMO_ENCRYPT_AES_XTS_128",
+ [172] = "PCKMO_ENCRYPT_AES_XTS_256",
};
static void __init attr_event_free(struct attribute **attrs, int num)
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index a6da7e0cc7a6..a8f0bad99cf0 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -47,11 +47,11 @@ struct paiext_cb { /* PAI extension 1 control block */
struct paiext_map {
unsigned long *area; /* Area for CPU to store counters */
struct pai_userdata *save; /* Area to store non-zero counters */
- enum paievt_mode mode; /* Type of event */
unsigned int active_events; /* # of PAI Extension users */
refcount_t refcnt;
struct perf_event *event; /* Perf event for sampling */
struct paiext_cb *paiext_cb; /* PAI extension control block area */
+ struct list_head syswide_list; /* List system-wide sampling events */
};
struct paiext_mapptr {
@@ -70,6 +70,8 @@ static void paiext_root_free(void)
free_percpu(paiext_root.mapptr);
paiext_root.mapptr = NULL;
}
+ debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__,
+ refcount_read(&paiext_root.refcnt));
}
/* On initialization of first event also allocate per CPU data dynamically.
@@ -115,20 +117,34 @@ static void paiext_free(struct paiext_mapptr *mp)
}
/* Release the PMU if event is the last perf event */
-static void paiext_event_destroy(struct perf_event *event)
+static void paiext_event_destroy_cpu(struct perf_event *event, int cpu)
{
- struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu);
struct paiext_map *cpump = mp->mapptr;
- free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
mutex_unlock(&paiext_reserve_mutex);
- debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
- event->cpu, mp->mapptr);
+}
+
+static void paiext_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+ for_each_cpu(cpu, mask)
+ paiext_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ paiext_event_destroy_cpu(event, event->cpu);
+ }
+ debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__,
+ event->cpu);
}
/* Used to avoid races in checking concurrent access of counting and
@@ -145,19 +161,18 @@ static void paiext_event_destroy(struct perf_event *event)
*
* Allocate the memory for the event.
*/
-static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+static int paiext_alloc_cpu(struct perf_event *event, int cpu)
{
struct paiext_mapptr *mp;
struct paiext_map *cpump;
int rc;
mutex_lock(&paiext_reserve_mutex);
-
rc = paiext_root_alloc();
if (rc)
goto unlock;
- mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ mp = per_cpu_ptr(paiext_root.mapptr, cpu);
cpump = mp->mapptr;
if (!cpump) { /* Paiext_map allocated? */
rc = -ENOMEM;
@@ -185,24 +200,13 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
paiext_free(mp);
goto undo;
}
+ INIT_LIST_HEAD(&cpump->syswide_list);
refcount_set(&cpump->refcnt, 1);
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
- : PAI_MODE_COUNTING;
+ rc = 0;
} else {
- /* Multiple invocation, check what is active.
- * Supported are multiple counter events or only one sampling
- * event concurrently at any one time.
- */
- if (cpump->mode == PAI_MODE_SAMPLING ||
- (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
- rc = -EBUSY;
- goto undo;
- }
refcount_inc(&cpump->refcnt);
}
- rc = 0;
-
undo:
if (rc) {
/* Error in allocation of event, decrement anchor. Since
@@ -217,6 +221,38 @@ unlock:
return rc;
}
+static int paiext_alloc(struct perf_event *event)
+{
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ rc = paiext_alloc_cpu(event, cpu);
+ if (rc) {
+ for_each_cpu(cpu, maskptr)
+ paiext_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in paicrypt_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
+ return rc;
+}
+
/* The PAI extension 1 control block supports up to 128 entries. Return
* the index within PAIE1_CB given the event number. Also validate event
* number.
@@ -246,9 +282,6 @@ static int paiext_event_init(struct perf_event *event)
rc = paiext_event_valid(event);
if (rc)
return rc;
- /* Allow only CPU wide operation, no process context for now. */
- if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
- return -ENOENT;
/* Allow only event NNPA_ALL for sampling. */
if (a->sample_period && a->config != PAI_NNPA_BASE)
return -EINVAL;
@@ -262,7 +295,10 @@ static int paiext_event_init(struct perf_event *event)
return -ENOMEM;
}
- rc = paiext_alloc(a, event);
+ if (event->cpu >= 0)
+ rc = paiext_alloc_cpu(event, event->cpu);
+ else
+ rc = paiext_alloc(event);
if (rc) {
free_page(PAI_SAVE_AREA(event));
return rc;
@@ -334,8 +370,15 @@ static void paiext_start(struct perf_event *event, int flags)
sum = paiext_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
} else { /* Sampling */
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
+ PAIE1_CTRBLOCK_SZ);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
}
}
@@ -346,12 +389,10 @@ static int paiext_add(struct perf_event *event, int flags)
struct paiext_cb *pcb = cpump->paiext_cb;
if (++cpump->active_events == 1) {
- S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+ get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb);
pcb->acc = virt_to_phys(cpump->area) | 0x1;
/* Enable CPU instruction lookup for PAIE1 control block */
local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
}
if (flags & PERF_EF_START)
paiext_start(event, PERF_EF_RELOAD);
@@ -359,6 +400,7 @@ static int paiext_add(struct perf_event *event, int flags)
return 0;
}
+static void paiext_have_sample(struct perf_event *, struct paiext_map *);
static void paiext_stop(struct perf_event *event, int flags)
{
struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
@@ -367,8 +409,13 @@ static void paiext_stop(struct perf_event *event, int flags)
if (!event->attr.sample_period) { /* Counting */
paiext_read(event);
} else { /* Sampling */
- perf_sched_cb_dec(event->pmu);
- cpump->event = NULL;
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_del(PAI_SWLIST(event));
+ perf_sched_cb_dec(event->pmu);
+ } else {
+ paiext_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
}
event->hw.state = PERF_HES_STOPPED;
}
@@ -384,9 +431,7 @@ static void paiext_del(struct perf_event *event, int flags)
/* Disable CPU instruction lookup for PAIE1 control block */
local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
pcb->acc = 0;
- S390_lowcore.aicd = 0;
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
+ get_lowcore()->aicd = 0;
}
}
@@ -458,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
@@ -470,21 +515,28 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
}
/* Check if there is data to be saved on schedule out of a task. */
-static int paiext_have_sample(void)
+static void paiext_have_sample(struct perf_event *event,
+ struct paiext_map *cpump)
{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
size_t rawsize;
- int rc = 0;
if (!event)
- return 0;
+ return;
rawsize = paiext_copy(cpump->save, cpump->area,
(unsigned long *)PAI_SAVE_AREA(event));
if (rawsize) /* Incremented counters */
- rc = paiext_push_sample(rawsize, cpump, event);
- return rc;
+ paiext_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paiext_have_samples(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ paiext_have_sample(event, cpump);
}
/* Called on schedule-in and schedule-out. No access to event structure,
@@ -493,10 +545,10 @@ static int paiext_have_sample(void)
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
+ * results on schedule_out and if page was dirty, save old values.
*/
if (!sched_in)
- paiext_have_sample();
+ paiext_have_samples();
}
/* Attribute definitions for pai extension1 interface. As with other CPU
@@ -542,7 +594,7 @@ static const struct attribute_group *paiext_attr_groups[] = {
/* Performance monitoring unit for mapped counters */
static struct pmu paiext = {
- .task_ctx_nr = perf_invalid_context,
+ .task_ctx_nr = perf_hw_context,
.event_init = paiext_event_init,
.add = paiext_add,
.del = paiext_del,
@@ -583,6 +635,15 @@ static const char * const paiext_ctrnames[] = {
[25] = "NNPA_1MFRAME",
[26] = "NNPA_2GFRAME",
[27] = "NNPA_ACCESSEXCEPT",
+ [28] = "NNPA_TRANSFORM",
+ [29] = "NNPA_GELU",
+ [30] = "NNPA_MOMENTS",
+ [31] = "NNPA_LAYERNORM",
+ [32] = "NNPA_MATMUL_OP_BCAST1",
+ [33] = "NNPA_SQRT",
+ [34] = "NNPA_INVSQRT",
+ [35] = "NNPA_NORM",
+ [36] = "NNPA_REDUCE",
};
static void __init attr_event_free(struct attribute **attrs, int num)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index dd456b475861..9637aee43c40 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -71,10 +71,10 @@ void flush_thread(void)
void arch_setup_new_exec(void)
{
- if (S390_lowcore.current_pid != current->pid) {
- S390_lowcore.current_pid = current->pid;
+ if (get_lowcore()->current_pid != current->pid) {
+ get_lowcore()->current_pid = current->pid;
if (test_facility(40))
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
}
}
@@ -86,11 +86,6 @@ void arch_release_task_struct(struct task_struct *tsk)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- /*
- * Save the floating-point or vector register state of the current
- * task and set the TIF_FPU flag to lazy restore the FPU register
- * state when returning to user space.
- */
save_user_fpu_regs();
*dst = *src;
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 65c1464eea4f..5ce9a795a0fe 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -17,7 +17,8 @@
#include <linux/mm_types.h>
#include <linux/delay.h>
#include <linux/cpu.h>
-
+#include <linux/smp.h>
+#include <asm/text-patching.h>
#include <asm/diag.h>
#include <asm/facility.h>
#include <asm/elf.h>
@@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
}
}
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
+
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+ cpus_read_lock();
+ text_poke_sync();
+ cpus_read_unlock();
+}
+
/*
* cpu_init - initializes state that is per-CPU.
*/
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 88087a32ebc6..69fcaf54d5ca 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -9,6 +9,7 @@
#include <asm/asm-offsets.h>
#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+#include <asm/lowcore.h>
GEN_BR_THUNK %r9
@@ -20,20 +21,15 @@
# r3 = Parameter for function
#
SYM_CODE_START(store_status)
- /* Save register one and load save area base */
- stg %r1,__LC_SAVE_AREA_RESTART
+ STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA
/* General purpose registers */
- lghi %r1,__LC_GPREGS_SAVE_AREA
- stmg %r0,%r15,0(%r1)
- mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
+ GET_LC %r13
/* Control registers */
- lghi %r1,__LC_CREGS_SAVE_AREA
- stctg %c0,%c15,0(%r1)
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
/* Access registers */
- lghi %r1,__LC_AREGS_SAVE_AREA
- stam %a0,%a15,0(%r1)
+ stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
/* Floating point registers */
- lghi %r1,__LC_FPREGS_SAVE_AREA
+ lay %r1,__LC_FPREGS_SAVE_AREA(%r13)
std %f0, 0x00(%r1)
std %f1, 0x08(%r1)
std %f2, 0x10(%r1)
@@ -51,21 +47,21 @@ SYM_CODE_START(store_status)
std %f14,0x70(%r1)
std %f15,0x78(%r1)
/* Floating point control register */
- lghi %r1,__LC_FP_CREG_SAVE_AREA
+ lay %r1,__LC_FP_CREG_SAVE_AREA(%r13)
stfpc 0(%r1)
/* CPU timer */
- lghi %r1,__LC_CPU_TIMER_SAVE_AREA
+ lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
stpt 0(%r1)
/* Store prefix register */
- lghi %r1,__LC_PREFIX_SAVE_AREA
+ lay %r1,__LC_PREFIX_SAVE_AREA(%r13)
stpx 0(%r1)
/* Clock comparator - seven bytes */
- lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
larl %r4,clkcmp
stckc 0(%r4)
+ lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
mvc 1(7,%r1),1(%r4)
/* Program status word */
- lghi %r1,__LC_PSW_SAVE_AREA
+ lay %r1,__LC_PSW_SAVE_AREA(%r13)
epsw %r4,%r5
st %r4,0(%r1)
st %r5,4(%r1)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 24ed33f044ec..d78bcfe707b5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -146,30 +146,35 @@ static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
unsigned long __bootdata_preserved(max_mappable);
-unsigned long __bootdata(ident_map_size);
struct physmem_info __bootdata(physmem_info);
-unsigned long __bootdata_preserved(__kaslr_offset);
+struct vm_layout __bootdata_preserved(vm_layout);
+EXPORT_SYMBOL(vm_layout);
int __bootdata_preserved(__kaslr_enabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
-u64 __bootdata_preserved(alt_stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
-unsigned long VMALLOC_START;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
-unsigned long VMALLOC_END;
+unsigned long __bootdata_preserved(VMALLOC_END);
EXPORT_SYMBOL(VMALLOC_END);
-struct page *vmemmap;
+struct page *__bootdata_preserved(vmemmap);
EXPORT_SYMBOL(vmemmap);
-unsigned long vmemmap_size;
+unsigned long __bootdata_preserved(vmemmap_size);
-unsigned long MODULES_VADDR;
-unsigned long MODULES_END;
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
/* An array with a pointer to the lowcore of every CPU. */
struct lowcore *lowcore_ptr[NR_CPUS];
@@ -360,36 +365,24 @@ void *restart_stack;
unsigned long stack_alloc(void)
{
-#ifdef CONFIG_VMAP_STACK
- void *ret;
+ void *stack;
- ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
- NUMA_NO_NODE, __builtin_return_address(0));
- kmemleak_not_leak(ret);
- return (unsigned long)ret;
-#else
- return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-#endif
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ kmemleak_not_leak(stack);
+ return (unsigned long)stack;
}
void stack_free(unsigned long stack)
{
-#ifdef CONFIG_VMAP_STACK
- vfree((void *) stack);
-#else
- free_pages(stack, THREAD_SIZE_ORDER);
-#endif
+ vfree((void *)stack);
}
static unsigned long __init stack_alloc_early(void)
{
unsigned long stack;
- stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- if (!stack) {
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, THREAD_SIZE, THREAD_SIZE);
- }
+ stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
return stack;
}
@@ -406,6 +399,7 @@ static void __init setup_lowcore(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
+ lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
lc->restart_psw.addr = __pa(restart_int_handler);
lc->external_new_psw.mask = PSW_KERNEL_BITS;
@@ -421,16 +415,16 @@ static void __init setup_lowcore(void)
lc->clock_comparator = clock_comparator_max;
lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
- lc->machine_flags = S390_lowcore.machine_flags;
- lc->preempt_count = S390_lowcore.preempt_count;
+ lc->machine_flags = get_lowcore()->machine_flags;
+ lc->preempt_count = get_lowcore()->preempt_count;
nmi_alloc_mcesa_early(&lc->mcesad);
- lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
- lc->exit_timer = S390_lowcore.exit_timer;
- lc->user_timer = S390_lowcore.user_timer;
- lc->system_timer = S390_lowcore.system_timer;
- lc->steal_timer = S390_lowcore.steal_timer;
- lc->last_update_timer = S390_lowcore.last_update_timer;
- lc->last_update_clock = S390_lowcore.last_update_clock;
+ lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
+ lc->exit_timer = get_lowcore()->exit_timer;
+ lc->user_timer = get_lowcore()->user_timer;
+ lc->system_timer = get_lowcore()->system_timer;
+ lc->steal_timer = get_lowcore()->steal_timer;
+ lc->last_update_timer = get_lowcore()->last_update_timer;
+ lc->last_update_clock = get_lowcore()->last_update_clock;
/*
* Allocate the global restart stack which is the same for
* all CPUs in case *one* of them does a PSW restart.
@@ -439,7 +433,7 @@ static void __init setup_lowcore(void)
lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
- lc->kernel_stack = S390_lowcore.kernel_stack;
+ lc->kernel_stack = get_lowcore()->kernel_stack;
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
* restart data to the absolute zero lowcore. This is necessary if
@@ -455,8 +449,8 @@ static void __init setup_lowcore(void)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
- lc->kernel_asce = S390_lowcore.kernel_asce;
- lc->user_asce = S390_lowcore.user_asce;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
+ lc->user_asce = get_lowcore()->user_asce;
system_ctlreg_init_save_area(lc);
abs_lc = get_abs_lowcore();
@@ -512,10 +506,7 @@ static void __init setup_resources(void)
bss_resource.end = __pa_symbol(__bss_stop) - 1;
for_each_mem_range(i, &start, &end) {
- res = memblock_alloc(sizeof(*res), 8);
- if (!res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*res), 8);
+ res = memblock_alloc_or_panic(sizeof(*res), 8);
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
@@ -534,10 +525,7 @@ static void __init setup_resources(void)
std_res->start > res->end)
continue;
if (std_res->end > res->end) {
- sub_res = memblock_alloc(sizeof(*sub_res), 8);
- if (!sub_res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*sub_res), 8);
+ sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
*sub_res = *std_res;
sub_res->end = res->end;
std_res->start = res->end + 1;
@@ -704,7 +692,7 @@ static void __init reserve_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_reserve(addr, size);
}
@@ -712,7 +700,7 @@ static void __init free_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_phys_free(addr, size);
}
@@ -734,7 +722,23 @@ static void __init memblock_add_physmem_info(void)
}
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * Reserve memory used for lowcore.
+ */
+static void __init reserve_lowcore(void)
+{
+ void *lowcore_start = get_lowcore();
+ void *lowcore_end = lowcore_start + sizeof(struct lowcore);
+ void *start, *end;
+
+ if (absolute_pointer(__identity_base) < lowcore_end) {
+ start = max(lowcore_start, (void *)__identity_base);
+ end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
+ memblock_reserve(__pa(start), __pa(end));
+ }
+}
+
+/*
+ * Reserve memory used for absolute lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
@@ -765,7 +769,7 @@ static void __init relocate_amode31_section(void)
unsigned long amode31_size = __eamode31 - __samode31;
long amode31_offset, *ptr;
- amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
+ amode31_offset = AMODE31_START - (unsigned long)__samode31;
pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
/* Move original AMODE31 section to the new one */
@@ -808,9 +812,7 @@ static void __init setup_randomness(void)
{
struct sysinfo_3_2_2 *vmms;
- vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!vmms)
- panic("Failed to allocate memory for sysinfo structure\n");
+ vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
memblock_free(vmms, PAGE_SIZE);
@@ -870,6 +872,23 @@ static void __init log_component_list(void)
}
/*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+ char fmt[] = KERN_SOH "0boot: %s";
+ int level = printk_get_level(buf);
+
+ buf = skip_timestamp(printk_skip_level(buf));
+ if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+ return;
+
+ fmt[1] = level;
+ printk(fmt, buf);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -888,6 +907,12 @@ void __init setup_arch(char **cmdline_p)
pr_info("Linux is running natively in 64-bit mode\n");
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ /* Print decompressor messages if not already printed */
+ if (!boot_earlyprintk)
+ boot_rb_foreach(print_rb_entry);
+
+ if (have_relocated_lowcore())
+ pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
log_component_list();
@@ -915,6 +940,7 @@ void __init setup_arch(char **cmdline_p)
/* Do some memory reservations *before* memory is added to memblock */
reserve_pgtables();
+ reserve_lowcore();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
@@ -959,6 +985,7 @@ void __init setup_arch(char **cmdline_p)
if (test_facility(193))
static_branch_enable(&cpu_has_bear);
+ setup_protection_map();
/*
* Create kernel page tables.
*/
@@ -986,3 +1013,8 @@ void __init setup_arch(char **cmdline_p)
/* Add system specific data to the random pool */
setup_randomness();
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ sclp_init();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6c2cb345402f..e48013cd832c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -30,9 +30,9 @@
#include <linux/compat.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
+#include <asm/vdso-symbols.h>
#include <asm/access-regs.h>
#include <asm/lowcore.h>
-#include <asm/vdso.h>
#include "entry.h"
/*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0324649aae0a..7b08399b0846 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -74,18 +74,15 @@ enum {
CPU_STATE_CONFIGURED,
};
-static DEFINE_PER_CPU(struct cpu *, cpu_device);
-
-struct pcpu {
- unsigned long ec_mask; /* bit mask for ec_xxx functions */
- unsigned long ec_clk; /* sigp timestamp for ec_xxx */
- signed char state; /* physical cpu state */
- signed char polarization; /* physical polarization */
- u16 address; /* physical cpu address */
-};
-
static u8 boot_core_type;
-static struct pcpu pcpu_devices[NR_CPUS];
+DEFINE_PER_CPU(struct pcpu, pcpu_devices);
+/*
+ * Pointer to the pcpu area of the boot CPU. This is required when a restart
+ * interrupt is triggered on an offline CPU. For that case accessing percpu
+ * data with the common primitives does not work, since the percpu offset is
+ * stored in a non existent lowcore.
+ */
+static struct pcpu *ipl_pcpu;
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -176,8 +173,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
int cpu;
for_each_cpu(cpu, mask)
- if (pcpu_devices[cpu].address == address)
- return pcpu_devices + cpu;
+ if (per_cpu(pcpu_devices, cpu).address == address)
+ return &per_cpu(pcpu_devices, cpu);
return NULL;
}
@@ -203,7 +200,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
mcck_stack = stack_alloc();
if (!lc || !nodat_stack || !async_stack || !mcck_stack)
goto out;
- memcpy(lc, &S390_lowcore, 512);
+ memcpy(lc, get_lowcore(), 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + STACK_INIT_OFFSET;
lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
@@ -232,13 +229,11 @@ out:
return -ENOMEM;
}
-static void pcpu_free_lowcore(struct pcpu *pcpu)
+static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
async_stack = lc->async_stack - STACK_INIT_OFFSET;
@@ -261,13 +256,14 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
+ lc->pcpu = (unsigned long)pcpu;
lc->restart_flags = RESTART_FLAG_CTLREGS;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
- lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
lc->user_asce = s390_invalid_asce;
- lc->machine_flags = S390_lowcore.machine_flags;
+ lc->machine_flags = get_lowcore()->machine_flags;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
abs_lc = get_abs_lowcore();
@@ -279,12 +275,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
arch_spin_lock_setup(cpu);
}
-static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+static void pcpu_attach_task(int cpu, struct task_struct *tsk)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
lc->current_task = (unsigned long)tsk;
@@ -298,18 +292,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
lc->steal_timer = 0;
}
-static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
lc->restart_source = -1U;
- pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+ pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
}
typedef void (pcpu_delegate_fn)(void *);
@@ -322,14 +314,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
func(data); /* should not return */
}
-static void pcpu_delegate(struct pcpu *pcpu,
+static void pcpu_delegate(struct pcpu *pcpu, int cpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
- lc = lowcore_ptr[pcpu - pcpu_devices];
+ lc = lowcore_ptr[cpu];
source_cpu = stap();
if (pcpu->address == source_cpu) {
@@ -379,38 +371,22 @@ static int pcpu_set_smt(unsigned int mtid)
smp_cpu_mt_shift = 0;
while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
smp_cpu_mt_shift++;
- pcpu_devices[0].address = stap();
+ per_cpu(pcpu_devices, 0).address = stap();
}
return cc;
}
/*
- * Call function on an online CPU.
- */
-void smp_call_online_cpu(void (*func)(void *), void *data)
-{
- struct pcpu *pcpu;
-
- /* Use the current cpu if it is online. */
- pcpu = pcpu_find_address(cpu_online_mask, stap());
- if (!pcpu)
- /* Use the first online cpu. */
- pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
- pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
-}
-
-/*
* Call function on the ipl CPU.
*/
void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
struct lowcore *lc = lowcore_ptr[0];
- if (pcpu_devices[0].address == stap())
- lc = &S390_lowcore;
+ if (ipl_pcpu->address == stap())
+ lc = get_lowcore();
- pcpu_delegate(&pcpu_devices[0], func, data,
- lc->nodat_stack);
+ pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@@ -418,21 +394,21 @@ int smp_find_processor_id(u16 address)
int cpu;
for_each_present_cpu(cpu)
- if (pcpu_devices[cpu].address == address)
+ if (per_cpu(pcpu_devices, cpu).address == address)
return cpu;
return -1;
}
void schedule_mcck_handler(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
}
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
return false;
- if (pcpu_running(pcpu_devices + cpu))
+ if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
return false;
return true;
}
@@ -444,7 +420,7 @@ void notrace smp_yield_cpu(int cpu)
return;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
+ : : "d" (per_cpu(pcpu_devices, cpu).address));
}
EXPORT_SYMBOL_GPL(smp_yield_cpu);
@@ -465,7 +441,7 @@ void notrace smp_emergency_stop(void)
end = get_tod_clock() + (1000000UL << 12);
for_each_cpu(cpu, &cpumask) {
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
@@ -474,7 +450,7 @@ void notrace smp_emergency_stop(void)
}
while (get_tod_clock() < end) {
for_each_cpu(cpu, &cpumask)
- if (pcpu_stopped(pcpu_devices + cpu))
+ if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
cpumask_clear_cpu(cpu, &cpumask);
if (cpumask_empty(&cpumask))
break;
@@ -489,6 +465,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop);
*/
void smp_send_stop(void)
{
+ struct pcpu *pcpu;
int cpu;
/* Disable all interrupts/machine checks */
@@ -504,8 +481,9 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
- pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
- while (!pcpu_stopped(pcpu_devices + cpu))
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
@@ -519,7 +497,7 @@ static void smp_handle_ext_call(void)
unsigned long bits;
/* handle bit signal external calls */
- bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
@@ -544,12 +522,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
/*
@@ -559,13 +537,13 @@ void arch_send_call_function_single_ipi(int cpu)
*/
void arch_smp_send_reschedule(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
}
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
}
#endif
@@ -577,7 +555,7 @@ int smp_store_status(int cpu)
struct pcpu *pcpu;
unsigned long pa;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
lc = lowcore_ptr[cpu];
pa = __pa(&lc->floating_pt_save_area);
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
@@ -596,7 +574,7 @@ int smp_store_status(int cpu)
/*
* Collect CPU state of the previous, crashed system.
- * There are four cases:
+ * There are three cases:
* 1) standard zfcp/nvme dump
* condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
* The state for all CPUs except the boot CPU needs to be collected
@@ -609,16 +587,16 @@ int smp_store_status(int cpu)
* with sigp stop-and-store-status. The firmware or the boot-loader
* stored the registers of the boot CPU in the absolute lowcore in the
* memory of the old system.
- * 3) kdump and the old kernel did not store the CPU state,
- * or stand-alone kdump for DASD
- * condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ * 3) kdump or stand-alone kdump for DASD
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The kexec code or the boot-loader
* stored the registers of the boot CPU in the memory of the old system.
- * 4) kdump and the old kernel stored the CPU state
- * condition: OLDMEM_BASE != NULL && is_kdump_kernel()
- * This case does not exist for s390 anymore, setup_arch explicitly
- * deactivates the elfcorehdr= kernel parameter
+ *
+ * Note that the legacy kdump mode where the old kernel stored the CPU states
+ * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr=
+ * kernel parameter. The is_kdump_kernel() implementation on s390 is independent
+ * of the elfcorehdr= parameter.
*/
static bool dump_available(void)
{
@@ -633,9 +611,7 @@ void __init smp_save_dump_ipl_cpu(void)
if (!dump_available())
return;
sa = save_area_alloc(true);
- regs = memblock_alloc(512, 8);
- if (!sa || !regs)
- panic("could not allocate memory for boot CPU save area\n");
+ regs = memblock_alloc_or_panic(512, 8);
copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
memblock_free(regs, 512);
@@ -668,8 +644,6 @@ void __init smp_save_dump_secondary_cpus(void)
SIGP_CC_NOT_OPERATIONAL)
continue;
sa = save_area_alloc(false);
- if (!sa)
- panic("could not allocate memory for save area\n");
__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
save_area_add_regs(sa, page);
if (cpu_has_vx()) {
@@ -685,17 +659,36 @@ void __init smp_save_dump_secondary_cpus(void)
void smp_cpu_set_polarization(int cpu, int val)
{
- pcpu_devices[cpu].polarization = val;
+ per_cpu(pcpu_devices, cpu).polarization = val;
}
int smp_cpu_get_polarization(int cpu)
{
- return pcpu_devices[cpu].polarization;
+ return per_cpu(pcpu_devices, cpu).polarization;
+}
+
+void smp_cpu_set_capacity(int cpu, unsigned long val)
+{
+ per_cpu(pcpu_devices, cpu).capacity = val;
+}
+
+unsigned long smp_cpu_get_capacity(int cpu)
+{
+ return per_cpu(pcpu_devices, cpu).capacity;
+}
+
+void smp_set_core_capacity(int cpu, unsigned long val)
+{
+ int i;
+
+ cpu = smp_get_base_cpu(cpu);
+ for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++)
+ smp_cpu_set_capacity(i, val);
}
int smp_cpu_get_cpu_address(int cpu)
{
- return pcpu_devices[cpu].address;
+ return per_cpu(pcpu_devices, cpu).address;
}
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
@@ -719,8 +712,6 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
}
}
-static int smp_add_present_cpu(int cpu);
-
static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
bool configured, bool early)
{
@@ -736,15 +727,16 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu->address = address + i;
if (configured)
pcpu->state = CPU_STATE_CONFIGURED;
else
pcpu->state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
set_cpu_present(cpu, true);
- if (!early && smp_add_present_cpu(cpu) != 0)
+ if (!early && arch_register_cpu(cpu))
set_cpu_present(cpu, false);
else
nr++;
@@ -771,7 +763,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
* that all SMT threads get subsequent logical CPU numbers.
*/
if (early) {
- core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
for (i = 0; i < info->configured; i++) {
core = &info->core[i];
if (core->core_id == core_id) {
@@ -796,10 +788,7 @@ void __init smp_detect_cpus(void)
u16 address;
/* Get CPU information */
- info = memblock_alloc(sizeof(*info), 8);
- if (!info)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*info), 8);
+ info = memblock_alloc_or_panic(sizeof(*info), 8);
smp_get_core_info(info, 1);
/* Find boot CPU type */
if (sclp.has_core_type) {
@@ -831,9 +820,6 @@ void __init smp_detect_cpus(void)
s_cpus += smp_cpu_mtid + 1;
}
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
-
- /* Add CPUs present at boot */
- __smp_rescan_cpus(info, true);
memblock_free(info, sizeof(*info));
}
@@ -842,15 +828,16 @@ void __init smp_detect_cpus(void)
*/
static void smp_start_secondary(void *cpuvoid)
{
+ struct lowcore *lc = get_lowcore();
int cpu = raw_smp_processor_id();
- S390_lowcore.last_update_clock = get_tod_clock();
- S390_lowcore.restart_stack = (unsigned long)restart_stack;
- S390_lowcore.restart_fn = (unsigned long)do_restart;
- S390_lowcore.restart_data = 0;
- S390_lowcore.restart_source = -1U;
- S390_lowcore.restart_flags = 0;
- restore_access_regs(S390_lowcore.access_regs_save_area);
+ lc->last_update_clock = get_tod_clock();
+ lc->restart_stack = (unsigned long)restart_stack;
+ lc->restart_fn = (unsigned long)do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1U;
+ lc->restart_flags = 0;
+ restore_access_regs(lc->access_regs_save_area);
cpu_init();
rcutree_report_cpu_starting(cpu);
init_cpu_timer();
@@ -873,7 +860,7 @@ static void smp_start_secondary(void *cpuvoid)
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
int rc;
if (pcpu->state != CPU_STATE_CONFIGURED)
@@ -891,8 +878,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
*/
system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
- pcpu_attach_task(pcpu, tidle);
- pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ pcpu_attach_task(cpu, tidle);
+ pcpu_start_fn(cpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
@@ -937,18 +924,19 @@ void __cpu_die(unsigned int cpu)
struct pcpu *pcpu;
/* Wait until target cpu is down */
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
- pcpu_free_lowcore(pcpu);
+ pcpu_free_lowcore(pcpu, cpu);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ pcpu->flags = 0;
}
void __noreturn cpu_die(void)
{
idle_task_exit();
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
for (;;) ;
}
@@ -973,24 +961,30 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
system_ctl_set_bit(0, 13);
+ smp_rescan_cpus(true);
}
void __init smp_prepare_boot_cpu(void)
{
- struct pcpu *pcpu = pcpu_devices;
+ struct lowcore *lc = get_lowcore();
WARN_ON(!cpu_present(0) || !cpu_online(0));
- pcpu->state = CPU_STATE_CONFIGURED;
- S390_lowcore.percpu_offset = __per_cpu_offset[0];
+ lc->percpu_offset = __per_cpu_offset[0];
+ ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
+ ipl_pcpu->state = CPU_STATE_CONFIGURED;
+ lc->pcpu = (unsigned long)ipl_pcpu;
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH);
}
void __init smp_setup_processor_id(void)
{
- pcpu_devices[0].address = stap();
- S390_lowcore.cpu_nr = 0;
- S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
- S390_lowcore.spinlock_index = 0;
+ struct lowcore *lc = get_lowcore();
+
+ lc->cpu_nr = 0;
+ per_cpu(pcpu_devices, 0).address = stap();
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
}
/*
@@ -1010,7 +1004,7 @@ static ssize_t cpu_configure_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -1036,7 +1030,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_online(cpu + i))
goto out;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
rc = 0;
switch (val) {
case 0:
@@ -1048,7 +1042,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_STANDBY;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1063,7 +1057,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_CONFIGURED;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1082,7 +1076,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+ return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
@@ -1108,35 +1102,34 @@ static struct attribute_group cpu_online_attr_group = {
static int smp_cpu_online(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
}
static int smp_cpu_pre_down(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
return 0;
}
-static int smp_add_present_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
{
- struct device *s;
- struct cpu *c;
+ return !!cpu;
+}
+
+int arch_register_cpu(int cpu)
+{
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
int rc;
- c = kzalloc(sizeof(*c), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
- per_cpu(cpu_device, cpu) = c;
- s = &c->dev;
- c->hotpluggable = !!cpu;
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
rc = register_cpu(c, cpu);
if (rc)
goto out;
- rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+ rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group);
if (rc)
goto out_cpu;
rc = topology_cpu_init(c);
@@ -1145,14 +1138,14 @@ static int smp_add_present_cpu(int cpu)
return 0;
out_topology:
- sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group);
out_cpu:
unregister_cpu(c);
out:
return rc;
}
-int __ref smp_rescan_cpus(void)
+int __ref smp_rescan_cpus(bool early)
{
struct sclp_core_info *info;
int nr;
@@ -1161,7 +1154,7 @@ int __ref smp_rescan_cpus(void)
if (!info)
return -ENOMEM;
smp_get_core_info(info, 0);
- nr = __smp_rescan_cpus(info, false);
+ nr = __smp_rescan_cpus(info, early);
kfree(info);
if (nr)
topology_schedule_update();
@@ -1178,7 +1171,7 @@ static ssize_t __ref rescan_store(struct device *dev,
rc = lock_device_hotplug_sysfs();
if (rc)
return rc;
- rc = smp_rescan_cpus();
+ rc = smp_rescan_cpus(false);
unlock_device_hotplug();
return rc ? rc : count;
}
@@ -1187,7 +1180,7 @@ static DEVICE_ATTR_WO(rescan);
static int __init s390_smp_init(void)
{
struct device *dev_root;
- int cpu, rc = 0;
+ int rc;
dev_root = bus_get_dev_root(&cpu_subsys);
if (dev_root) {
@@ -1196,17 +1189,9 @@ static int __init s390_smp_init(void)
if (rc)
return rc;
}
-
- for_each_present_cpu(cpu) {
- rc = smp_add_present_cpu(cpu);
- if (rc)
- goto out;
- }
-
rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
smp_cpu_online, smp_cpu_pre_down);
rc = rc <= 0 ? rc : 0;
-out:
return rc;
}
subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 94f440e38303..40edfde25f5b 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -5,6 +5,7 @@
* Copyright IBM Corp. 2006
*/
+#include <linux/perf_event.h>
#include <linux/stacktrace.h>
#include <linux/uaccess.h>
#include <linux/compat.h>
@@ -62,42 +63,102 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
return 0;
}
-void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
- const struct pt_regs *regs)
+static inline bool store_ip(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry, bool perf,
+ unsigned long ip)
+{
+#ifdef CONFIG_PERF_EVENTS
+ if (perf) {
+ if (perf_callchain_store(entry, ip))
+ return false;
+ return true;
+ }
+#endif
+ return consume_entry(cookie, ip);
+}
+
+static inline bool ip_invalid(unsigned long ip)
{
+ /*
+ * Perform some basic checks if an instruction address taken
+ * from unreliable source is invalid.
+ */
+ if (ip & 1)
+ return true;
+ if (ip < mmap_min_addr)
+ return true;
+ if (ip >= current->mm->context.asce_limit)
+ return true;
+ return false;
+}
+
+static inline bool ip_within_vdso(unsigned long ip)
+{
+ return in_range(ip, current->mm->context.vdso_base, vdso_text_size());
+}
+
+void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry,
+ const struct pt_regs *regs, bool perf)
+{
+ struct stack_frame_vdso_wrapper __user *sf_vdso;
struct stack_frame_user __user *sf;
unsigned long ip, sp;
bool first = true;
if (is_compat_task())
return;
- if (!consume_entry(cookie, instruction_pointer(regs)))
+ if (!current->mm)
+ return;
+ ip = instruction_pointer(regs);
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
return;
sf = (void __user *)user_stack_pointer(regs);
pagefault_disable();
while (1) {
if (__get_user(sp, &sf->back_chain))
break;
- if (__get_user(ip, &sf->gprs[8]))
+ /*
+ * VDSO entry code has a non-standard stack frame layout.
+ * See VDSO user wrapper code for details.
+ */
+ if (!sp && ip_within_vdso(ip)) {
+ sf_vdso = (void __user *)sf;
+ if (__get_user(ip, &sf_vdso->return_address))
+ break;
+ sp = (unsigned long)sf + STACK_FRAME_VDSO_OVERHEAD;
+ sf = (void __user *)sp;
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ } else {
+ sf = (void __user *)sp;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ }
+ /* Sanity check: ABI requires SP to be 8 byte aligned. */
+ if (sp & 0x7)
break;
- if (ip & 0x1) {
+ if (ip_invalid(ip)) {
/*
* If the instruction address is invalid, and this
* is the first stack frame, assume r14 has not
* been written to the stack yet. Otherwise exit.
*/
- if (first && !(regs->gprs[14] & 0x1))
- ip = regs->gprs[14];
- else
+ if (!first)
+ break;
+ ip = regs->gprs[14];
+ if (ip_invalid(ip))
break;
}
- if (!consume_entry(cookie, ip))
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
break;
- /* Sanity check: ABI requires SP to be aligned 8 bytes. */
- if (!sp || sp & 0x7)
- break;
- sf = (void __user *)sp;
first = false;
}
pagefault_enable();
}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
+}
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 30bb20461db4..d40f0b983e74 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -17,6 +17,7 @@
#include <asm/ebcdic.h>
#include <asm/facility.h>
#include <asm/sthyi.h>
+#include <asm/asm.h>
#include "entry.h"
#define DED_WEIGHT 0xffff
@@ -300,33 +301,56 @@ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
return (struct diag204_x_part_block *)&block->cpus[i];
}
-static void fill_diag(struct sthyi_sctns *sctns)
+static void *diag204_get_data(bool diag204_allow_busy)
{
- int i, r, pages;
- bool this_lpar;
+ unsigned long subcode;
void *diag204_buf;
- void *diag224_buf = NULL;
- struct diag204_x_info_blk_hdr *ti_hdr;
- struct diag204_x_part_block *part_block;
- struct diag204_x_phys_block *phys_block;
- struct lpar_cpu_inf lpar_inf = {};
-
- /* Errors are handled through the validity bits in the response. */
- pages = diag204((unsigned long)DIAG204_SUBC_RSI |
- (unsigned long)DIAG204_INFO_EXT, 0, NULL);
- if (pages <= 0)
- return;
-
+ int pages, rc;
+
+ subcode = DIAG204_SUBC_RSI;
+ subcode |= DIAG204_INFO_EXT;
+ pages = diag204(subcode, 0, NULL);
+ if (pages < 0)
+ return ERR_PTR(pages);
+ if (pages == 0)
+ return ERR_PTR(-ENODATA);
diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
if (!diag204_buf)
- return;
+ return ERR_PTR(-ENOMEM);
+ subcode = DIAG204_SUBC_STIB7;
+ subcode |= DIAG204_INFO_EXT;
+ if (diag204_has_bif() && diag204_allow_busy)
+ subcode |= DIAG204_BIF_BIT;
+ rc = diag204(subcode, pages, diag204_buf);
+ if (rc < 0) {
+ vfree(diag204_buf);
+ return ERR_PTR(rc);
+ }
+ return diag204_buf;
+}
- r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
- (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
- if (r < 0)
- goto out;
+static bool is_diag204_cached(struct sthyi_sctns *sctns)
+{
+ /*
+ * Check if validity bits are set when diag204 data
+ * is gathered.
+ */
+ if (sctns->par.infpval1)
+ return true;
+ return false;
+}
+
+static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf)
+{
+ int i;
+ bool this_lpar;
+ void *diag224_buf = NULL;
+ struct diag204_x_info_blk_hdr *ti_hdr;
+ struct diag204_x_part_block *part_block;
+ struct diag204_x_phys_block *phys_block;
+ struct lpar_cpu_inf lpar_inf = {};
diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
if (!diag224_buf || diag224(diag224_buf))
@@ -392,7 +416,6 @@ static void fill_diag(struct sthyi_sctns *sctns)
out:
free_page((unsigned long)diag224_buf);
- vfree(diag204_buf);
}
static int sthyi(u64 vaddr, u64 *rc)
@@ -403,30 +426,41 @@ static int sthyi(u64 vaddr, u64 *rc)
asm volatile(
".insn rre,0xB2560000,%[r1],%[r2]\n"
- "ipm %[cc]\n"
- "srl %[cc],28\n"
- : [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r2] "+&d" (r2.pair)
: [r1] "d" (r1.pair)
- : "memory", "cc");
+ : CC_CLOBBER_LIST("memory"));
*rc = r2.odd;
- return cc;
+ return CC_TRANSFORM(cc);
}
static int fill_dst(void *dst, u64 *rc)
{
+ void *diag204_buf;
+
struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
/*
* If the facility is on, we don't want to emulate the instruction.
* We ask the hypervisor to provide the data.
*/
- if (test_facility(74))
+ if (test_facility(74)) {
+ memset(dst, 0, PAGE_SIZE);
return sthyi((u64)dst, rc);
-
+ }
+ /*
+ * When emulating, if diag204 returns BUSY don't reset dst buffer
+ * and use cached data.
+ */
+ *rc = 0;
+ diag204_buf = diag204_get_data(is_diag204_cached(sctns));
+ if (IS_ERR(diag204_buf))
+ return PTR_ERR(diag204_buf);
+ memset(dst, 0, PAGE_SIZE);
fill_hdr(sctns);
fill_stsi(sctns);
- fill_diag(sctns);
- *rc = 0;
+ fill_diag(sctns, diag204_buf);
+ vfree(diag204_buf);
return 0;
}
@@ -445,11 +479,14 @@ static int sthyi_update_cache(u64 *rc)
{
int r;
- memset(sthyi_cache.info, 0, PAGE_SIZE);
r = fill_dst(sthyi_cache.info, rc);
- if (r)
- return r;
- sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ if (r == 0) {
+ sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ } else if (r == -EBUSY) {
+ /* mark as expired and return 0 to keep using cached data */
+ sthyi_cache.end = jiffies - 1;
+ r = 0;
+ }
return r;
}
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index dc2355c623d6..5ec28028315b 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -38,33 +38,6 @@
#include "entry.h"
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
- struct s390_mmap_arg_struct a;
- int error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
#ifdef CONFIG_SYSVIPC
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls.
@@ -151,8 +124,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
{
add_random_kstack_offset();
enter_from_user_mode(regs);
- regs->psw = S390_lowcore.svc_old_psw;
- regs->int_code = S390_lowcore.svc_int_code;
+ regs->psw = get_lowcore()->svc_old_psw;
+ regs->int_code = get_lowcore()->svc_int_code;
update_timer_sys();
if (static_branch_likely(&cpu_has_bear))
current->thread.last_break = regs->last_break;
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
index fb85e797946d..c5d958a09ff4 100644
--- a/arch/s390/kernel/syscalls/Makefile
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -4,15 +4,15 @@ gen := arch/$(ARCH)/include/generated
kapi := $(gen)/asm
uapi := $(gen)/uapi/asm
-syscall := $(srctree)/$(src)/syscall.tbl
-systbl := $(srctree)/$(src)/syscalltbl
+syscall := $(src)/syscall.tbl
+systbl := $(src)/syscalltbl
gen-y := $(kapi)/syscall_table.h
kapi-hdrs-y := $(kapi)/unistd_nr.h
uapi-hdrs-y := $(uapi)/unistd_32.h
uapi-hdrs-y += $(uapi)/unistd_64.h
-targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
+targets += $(addprefix ../../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
PHONY += kapi uapi
@@ -23,23 +23,26 @@ uapi: $(uapi-hdrs-y)
# Create output directory if not already present
$(shell mkdir -p $(uapi) $(kapi))
-filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$@" < $< > $@
-filechk_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
+quiet_cmd_sysnr = SYSNR $@
+ cmd_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $< > $@
-filechk_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $<
+quiet_cmd_syscalls = SYSTBL $@
+ cmd_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $< > $@
syshdr_abi_unistd_32 := common,32
-$(uapi)/unistd_32.h: $(syscall) FORCE
- $(call filechk,syshdr,$@)
+$(uapi)/unistd_32.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,syshdr)
syshdr_abi_unistd_64 := common,64
-$(uapi)/unistd_64.h: $(syscall) FORCE
- $(call filechk,syshdr,$@)
+$(uapi)/unistd_64.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,syshdr)
-$(kapi)/syscall_table.h: $(syscall) FORCE
- $(call filechk,syscalls)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,syscalls)
sysnr_abi_unistd_nr := common,32,64
-$(kapi)/unistd_nr.h: $(syscall) FORCE
- $(call filechk,sysnr)
+$(kapi)/unistd_nr.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,sysnr)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 095bb86339a7..e9115b4d8b63 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
@@ -464,3 +464,8 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal sys_mseal
+463 common setxattrat sys_setxattrat sys_setxattrat
+464 common getxattrat sys_getxattrat sys_getxattrat
+465 common listxattrat sys_listxattrat sys_listxattrat
+466 common removexattrat sys_removexattrat sys_removexattrat
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 2be30a96696a..88055f58fbda 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -498,7 +498,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
.open = stsi_open_##fc##_##s1##_##s2, \
.release = stsi_release, \
.read = stsi_read, \
- .llseek = no_llseek, \
};
static int stsi_release(struct inode *inode, struct file *file)
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index c0a70efa2426..26f2981aa09e 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -18,8 +18,7 @@
* affects a few functions that are not performance-relevant.
*/
.macro BR_EX_AMODE31_r14
- larl %r1,0f
- ex 0,0(%r1)
+ exrl 0,0f
j .
0: br %r14
.endm
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index fb9f31f36628..e9f47c3a6197 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -36,7 +36,6 @@
#include <linux/profile.h>
#include <linux/timex.h>
#include <linux/notifier.h>
-#include <linux/timekeeper_internal.h>
#include <linux/clockchips.h>
#include <linux/gfp.h>
#include <linux/kprobes.h>
@@ -131,7 +130,7 @@ void clock_comparator_work(void)
{
struct clock_event_device *cd;
- S390_lowcore.clock_comparator = clock_comparator_max;
+ get_lowcore()->clock_comparator = clock_comparator_max;
cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
@@ -139,8 +138,8 @@ void clock_comparator_work(void)
static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- S390_lowcore.clock_comparator = get_tod_clock() + delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = get_tod_clock() + delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
return 0;
}
@@ -153,8 +152,8 @@ void init_cpu_timer(void)
struct clock_event_device *cd;
int cpu;
- S390_lowcore.clock_comparator = clock_comparator_max;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ set_clock_comparator(get_lowcore()->clock_comparator);
cpu = smp_processor_id();
cd = &per_cpu(comparators, cpu);
@@ -184,8 +183,8 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
- if (S390_lowcore.clock_comparator == clock_comparator_max)
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator == clock_comparator_max)
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
static void stp_timing_alert(struct stp_irq_parm *);
@@ -255,6 +254,7 @@ static struct clocksource clocksource_tod = {
.shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
+ .id = CSID_S390_TOD,
};
struct clocksource * __init clocksource_default_clock(void)
@@ -408,12 +408,12 @@ static void clock_sync_global(long delta)
static void clock_sync_local(long delta)
{
/* Add the delta to the clock comparator. */
- if (S390_lowcore.clock_comparator != clock_comparator_max) {
- S390_lowcore.clock_comparator += delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator != clock_comparator_max) {
+ get_lowcore()->clock_comparator += delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
/* Adjust the last_update_clock time-stamp. */
- S390_lowcore.last_update_clock += delta;
+ get_lowcore()->last_update_clock += delta;
}
/* Single threaded workqueue used for stp sync events */
@@ -468,6 +468,12 @@ static void __init stp_reset(void)
}
}
+bool stp_enabled(void)
+{
+ return test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online;
+}
+EXPORT_SYMBOL(stp_enabled);
+
static void stp_timeout(struct timer_list *unused)
{
queue_work(time_sync_wq, &stp_work);
@@ -656,12 +662,12 @@ static void stp_check_leap(void)
if (ret < 0)
pr_err("failed to set leap second flags\n");
/* arm Timer to clear leap second flags */
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+ mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400));
} else {
/* The day the leap second is scheduled for hasn't been reached. Retry
* in one hour.
*/
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+ mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600));
}
}
@@ -729,8 +735,8 @@ static ssize_t ctn_id_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%016lx\n",
- *(unsigned long *) stp_info.ctnid);
+ ret = sysfs_emit(buf, "%016lx\n",
+ *(unsigned long *)stp_info.ctnid);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -745,7 +751,7 @@ static ssize_t ctn_type_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.ctn);
+ ret = sysfs_emit(buf, "%i\n", stp_info.ctn);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -760,7 +766,7 @@ static ssize_t dst_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x2000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.dsto);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -775,7 +781,7 @@ static ssize_t leap_seconds_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x8000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.leaps);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -801,11 +807,11 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev,
return ret;
if (!stzi.lsoib.p)
- return sprintf(buf, "0,0\n");
+ return sysfs_emit(buf, "0,0\n");
- return sprintf(buf, "%lu,%d\n",
- tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
- stzi.lsoib.nlso - stzi.lsoib.also);
+ return sysfs_emit(buf, "%lu,%d\n",
+ tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+ stzi.lsoib.nlso - stzi.lsoib.also);
}
static DEVICE_ATTR_RO(leap_seconds_scheduled);
@@ -818,7 +824,7 @@ static ssize_t stratum_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.stratum);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -833,7 +839,7 @@ static ssize_t time_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x0800))
- ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+ ret = sysfs_emit(buf, "%i\n", (int)stp_info.tto);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -848,7 +854,7 @@ static ssize_t time_zone_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x4000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.tzo);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -863,7 +869,7 @@ static ssize_t timing_mode_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.tmd);
+ ret = sysfs_emit(buf, "%i\n", stp_info.tmd);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -878,7 +884,7 @@ static ssize_t timing_state_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.tst);
+ ret = sysfs_emit(buf, "%i\n", stp_info.tst);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -889,7 +895,7 @@ static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%i\n", stp_online);
+ return sysfs_emit(buf, "%i\n", stp_online);
}
static ssize_t online_store(struct device *dev,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 89e91b8ce842..211cc8382e4a 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -24,7 +24,9 @@
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/node.h>
+#include <asm/hiperdispatch.h>
#include <asm/sysinfo.h>
+#include <asm/asm.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -47,6 +49,7 @@ static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
+static int cpu_management;
static DECLARE_WORK(topology_work, topology_work_fn);
@@ -144,6 +147,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
cpumask_set_cpu(cpu, &book->mask);
cpumask_set_cpu(cpu, &socket->mask);
smp_cpu_set_polarization(cpu, tl_core->pp);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
}
}
}
@@ -221,15 +225,15 @@ static void topology_update_polarization_simple(void)
static int ptf(unsigned long fc)
{
- int rc;
+ int cc;
asm volatile(
- " .insn rre,0xb9a20000,%1,%1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc)
- : "d" (fc) : "cc");
- return rc;
+ " .insn rre,0xb9a20000,%[fc],%[fc]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [fc] "d" (fc)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
int topology_set_cpu_management(int fc)
@@ -270,6 +274,7 @@ void update_cpu_masks(void)
topo->drawer_id = id;
}
}
+ hd_reset_state();
for_each_online_cpu(cpu) {
topo = &cpu_topology[cpu];
pkg_first = cpumask_first(&topo->core_mask);
@@ -278,8 +283,10 @@ void update_cpu_masks(void)
for_each_cpu(sibling, &topo->core_mask) {
topo_sibling = &cpu_topology[sibling];
smt_first = cpumask_first(&topo_sibling->thread_mask);
- if (sibling == smt_first)
+ if (sibling == smt_first) {
topo_package->booted_cores++;
+ hd_add_core(sibling);
+ }
}
} else {
topo->booted_cores = topo_package->booted_cores;
@@ -303,8 +310,10 @@ static void __arch_update_dedicated_flag(void *arg)
static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
- int rc = 0;
+ int rc, hd_status;
+ hd_status = 0;
+ rc = 0;
mutex_lock(&smp_cpu_state_mutex);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
@@ -314,22 +323,20 @@ static int __arch_update_cpu_topology(void)
update_cpu_masks();
if (!MACHINE_HAS_TOPOLOGY)
topology_update_polarization_simple();
+ if (cpu_management == 1)
+ hd_status = hd_enable_hiperdispatch();
mutex_unlock(&smp_cpu_state_mutex);
+ if (hd_status == 0)
+ hd_disable_hiperdispatch();
return rc;
}
int arch_update_cpu_topology(void)
{
- struct device *dev;
- int cpu, rc;
+ int rc;
rc = __arch_update_cpu_topology();
on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
- for_each_online_cpu(cpu) {
- dev = get_cpu_device(cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- }
return rc;
}
@@ -364,7 +371,7 @@ static void set_topology_timer(void)
if (atomic_add_unless(&topology_poll, -1, 0))
mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
else
- mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
+ mod_timer(&topology_timer, jiffies + secs_to_jiffies(60));
}
void topology_expect_change(void)
@@ -380,7 +387,24 @@ void topology_expect_change(void)
set_topology_timer();
}
-static int cpu_management;
+static int set_polarization(int polarization)
+{
+ int rc = 0;
+
+ cpus_read_lock();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == polarization)
+ goto out;
+ rc = topology_set_cpu_management(polarization);
+ if (rc)
+ goto out;
+ cpu_management = polarization;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ cpus_read_unlock();
+ return rc;
+}
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
@@ -389,7 +413,7 @@ static ssize_t dispatching_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", cpu_management);
+ count = sysfs_emit(buf, "%d\n", cpu_management);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -406,19 +430,7 @@ static ssize_t dispatching_store(struct device *dev,
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- rc = 0;
- cpus_read_lock();
- mutex_lock(&smp_cpu_state_mutex);
- if (cpu_management == val)
- goto out;
- rc = topology_set_cpu_management(val);
- if (rc)
- goto out;
- cpu_management = val;
- topology_expect_change();
-out:
- mutex_unlock(&smp_cpu_state_mutex);
- cpus_read_unlock();
+ rc = set_polarization(val);
return rc ? rc : count;
}
static DEVICE_ATTR_RW(dispatching);
@@ -432,19 +444,19 @@ static ssize_t cpu_polarization_show(struct device *dev,
mutex_lock(&smp_cpu_state_mutex);
switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
- count = sprintf(buf, "horizontal\n");
+ count = sysfs_emit(buf, "horizontal\n");
break;
case POLARIZATION_VL:
- count = sprintf(buf, "vertical:low\n");
+ count = sysfs_emit(buf, "vertical:low\n");
break;
case POLARIZATION_VM:
- count = sprintf(buf, "vertical:medium\n");
+ count = sysfs_emit(buf, "vertical:medium\n");
break;
case POLARIZATION_VH:
- count = sprintf(buf, "vertical:high\n");
+ count = sysfs_emit(buf, "vertical:high\n");
break;
default:
- count = sprintf(buf, "unknown\n");
+ count = sysfs_emit(buf, "unknown\n");
break;
}
mutex_unlock(&smp_cpu_state_mutex);
@@ -468,7 +480,7 @@ static ssize_t cpu_dedicated_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
+ count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu));
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -536,14 +548,21 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
- mask->next = memblock_alloc(sizeof(*mask->next), 8);
- if (!mask->next)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*mask->next), 8);
+ mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8);
mask = mask->next;
}
}
+static int __init detect_polarization(union topology_entry *tle)
+{
+ struct topology_core *tl_core;
+
+ while (tle->nl)
+ tle = next_tle(tle);
+ tl_core = (struct topology_core *)tle;
+ return tl_core->pp != POLARIZATION_HRZ;
+}
+
void __init topology_init_early(void)
{
struct sysinfo_15_1_x *info;
@@ -557,12 +576,10 @@ void __init topology_init_early(void)
}
if (!MACHINE_HAS_TOPOLOGY)
goto out;
- tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!tl_info)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
+ tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
info = tl_info;
store_topology(info);
+ cpu_management = detect_polarization(info->tle);
pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
info->mag[0], info->mag[1], info->mag[2], info->mag[3],
info->mag[4], info->mag[5], info->mnest);
@@ -600,7 +617,7 @@ static int __init topology_setup(char *str)
}
early_param("topology", topology_setup);
-static int topology_ctl_handler(struct ctl_table *ctl, int write,
+static int topology_ctl_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
@@ -630,12 +647,37 @@ static int topology_ctl_handler(struct ctl_table *ctl, int write,
return rc;
}
-static struct ctl_table topology_ctl_table[] = {
+static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int polarization;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &polarization,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ polarization = cpu_management;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ return set_polarization(polarization);
+}
+
+static const struct ctl_table topology_ctl_table[] = {
{
.procname = "topology",
.mode = 0644,
.proc_handler = topology_ctl_handler,
},
+ {
+ .procname = "polarization",
+ .mode = 0644,
+ .proc_handler = polarization_ctl_handler,
+ },
};
static int __init topology_init(void)
@@ -648,6 +690,8 @@ static int __init topology_init(void)
set_topology_timer();
else
topology_update_polarization_simple();
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL))
+ set_polarization(1);
register_sysctl("s390", topology_ctl_table);
dev_root = bus_get_dev_root(&cpu_subsys);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 52578b5cecbd..24fee11b030d 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -27,9 +27,11 @@
#include <linux/uaccess.h>
#include <linux/cpu.h>
#include <linux/entry-common.h>
+#include <linux/kmsan.h>
#include <asm/asm-extable.h>
#include <asm/vtime.h>
#include <asm/fpu.h>
+#include <asm/fault.h>
#include "entry.h"
static inline void __user *get_trap_ip(struct pt_regs *regs)
@@ -262,6 +264,11 @@ static void monitor_event_exception(struct pt_regs *regs)
void kernel_stack_overflow(struct pt_regs *regs)
{
+ /*
+ * Normally regs are unpoisoned by the generic entry code, but
+ * kernel_stack_overflow() is a rare case that is called bypassing it.
+ */
+ kmsan_unpoison_entry_regs(regs);
bust_spinlocks(1);
printk("Kernel stack overflow.\n");
show_regs(regs);
@@ -288,15 +295,16 @@ static void __init test_monitor_call(void)
void __init trap_init(void)
{
+ struct lowcore *lc = get_lowcore();
unsigned long flags;
struct ctlreg cr0;
local_irq_save(flags);
cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
- psw_bits(S390_lowcore.external_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.program_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.svc_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.io_new_psw).mcheck = 1;
+ psw_bits(lc->external_new_psw).mcheck = 1;
+ psw_bits(lc->program_new_psw).mcheck = 1;
+ psw_bits(lc->svc_new_psw).mcheck = 1;
+ psw_bits(lc->io_new_psw).mcheck = 1;
local_ctl_load(0, &cr0);
local_irq_restore(flags);
local_mcck_enable();
@@ -307,11 +315,27 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
void noinstr __do_pgm_check(struct pt_regs *regs)
{
- unsigned int trapnr;
+ struct lowcore *lc = get_lowcore();
irqentry_state_t state;
+ unsigned int trapnr;
+ union teid teid;
+
+ teid.val = lc->trans_exc_code;
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = teid.val;
- regs->int_code = S390_lowcore.pgm_int_code;
- regs->int_parm_long = S390_lowcore.trans_exc_code;
+ /*
+ * In case of a guest fault, short-circuit the fault handler and return.
+ * This way the sie64a() function will return 0; fault address and
+ * other relevant bits are saved in current->thread.gmap_teid, and
+ * the fault number in current->thread.gmap_int_code. KVM will be
+ * able to use this information to handle the fault.
+ */
+ if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) {
+ current->thread.gmap_teid.val = regs->int_parm_long;
+ current->thread.gmap_int_code = regs->int_code & 0xffff;
+ return;
+ }
state = irqentry_enter(regs);
@@ -324,19 +348,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- if (S390_lowcore.pgm_code & 0x0200) {
+ if (lc->pgm_code & 0x0200) {
/* transaction abort */
- current->thread.trap_tdb = S390_lowcore.pgm_tdb;
+ current->thread.trap_tdb = lc->pgm_tdb;
}
- if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+ if (lc->pgm_code & PGM_INT_CODE_PER) {
if (user_mode(regs)) {
struct per_event *ev = &current->thread.per_event;
set_thread_flag(TIF_PER_TRAP);
- ev->address = S390_lowcore.per_address;
- ev->cause = S390_lowcore.per_code_combined;
- ev->paid = S390_lowcore.per_access_id;
+ ev->address = lc->per_address;
+ ev->cause = lc->per_code_combined;
+ ev->paid = lc->per_access_id;
} else {
/* PER event in kernel is kprobes */
__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
@@ -400,8 +424,8 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
[0x3b] = do_dat_exception,
[0x3c] = default_trap_handler,
[0x3d] = do_secure_storage_access,
- [0x3e] = do_non_secure_storage_access,
- [0x3f] = do_secure_storage_violation,
+ [0x3e] = default_trap_handler,
+ [0x3f] = default_trap_handler,
[0x40] = monitor_event_exception,
[0x41 ... 0x7f] = default_trap_handler,
};
@@ -412,5 +436,3 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
__stringify(default_trap_handler))
COND_TRAP(do_secure_storage_access);
-COND_TRAP(do_non_secure_storage_access);
-COND_TRAP(do_secure_storage_violation);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 0ece156fdd7c..cd44be2b6ce8 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -49,6 +49,8 @@ static inline bool is_final_pt_regs(struct unwind_state *state,
READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
}
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
@@ -118,6 +120,8 @@ out_stop:
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long first_frame)
{
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index fc07bc39e698..9f05df2da2f7 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -2,7 +2,7 @@
/*
* Common Ultravisor functions and initialization
*
- * Copyright IBM Corp. 2019, 2020
+ * Copyright IBM Corp. 2019, 2024
*/
#define KMSG_COMPONENT "prot_virt"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
@@ -14,14 +14,14 @@
#include <linux/memblock.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/pagewalk.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
-#endif
+EXPORT_SYMBOL(prot_virt_guest);
/*
* uv_info contains both host and guest information but it's currently only
@@ -34,7 +34,6 @@ int __bootdata_preserved(prot_virt_guest);
struct uv_info __bootdata_preserved(uv_info);
EXPORT_SYMBOL(uv_info);
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
@@ -109,7 +108,7 @@ EXPORT_SYMBOL_GPL(uv_pin_shared);
*
* @paddr: Absolute host address of page to be destroyed
*/
-static int uv_destroy_page(unsigned long paddr)
+static int uv_destroy(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_DESTR_SEC_STOR,
@@ -130,20 +129,33 @@ static int uv_destroy_page(unsigned long paddr)
}
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio
*/
-int uv_destroy_owned_page(unsigned long paddr)
+int uv_destroy_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_destroy_page(paddr);
+ /* See gmap_make_secure(): large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_destroy(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags);
+ folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_destroy_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_destroy_folio(pfn_folio(pte_pfn(pte)));
+}
/*
* Requests the Ultravisor to encrypt a guest page and make it
@@ -163,54 +175,88 @@ int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio.
*/
-int uv_convert_owned_from_secure(unsigned long paddr)
+int uv_convert_from_secure_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_convert_from_secure(paddr);
+ /* See gmap_make_secure(): large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags);
+ folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_convert_from_secure_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
+}
/*
- * Calculate the expected ref_count for a page that would otherwise have no
+ * Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
* the kernel, but with some slight modifications. We know that a secure
- * page can not be a huge page for example.
+ * folio can not be a large folio, for example.
*/
-static int expected_page_refs(struct page *page)
+static int expected_folio_refs(struct folio *folio)
{
int res;
- res = page_mapcount(page);
- if (PageSwapCache(page)) {
+ res = folio_mapcount(folio);
+ if (folio_test_swapcache(folio)) {
res++;
- } else if (page_mapping(page)) {
+ } else if (folio_mapping(folio)) {
res++;
- if (page_has_private(page))
+ if (folio->private)
res++;
}
return res;
}
-static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
+/**
+ * make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -E2BIG if the folio is large;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio())
+ */
+int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
- if (PageWriteback(page))
- return -EAGAIN;
- expected = expected_page_refs(page);
- if (!page_ref_freeze(page, expected))
+ if (folio_test_large(folio))
+ return -E2BIG;
+ if (folio_test_writeback(folio))
return -EBUSY;
- set_bit(PG_arch_1, &page->flags);
+ expected = expected_folio_refs(folio) + 1;
+ if (!folio_ref_freeze(folio, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &folio->flags);
/*
* If the UVC does not succeed or fail immediately, we don't want to
* loop for long, or we might get stall notifications.
@@ -220,9 +266,9 @@ static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
* -EAGAIN and we let the callers deal with it.
*/
cc = __uv_call(0, (u64)uvcb);
- page_ref_unfreeze(page, expected);
+ folio_ref_unfreeze(folio, expected);
/*
- * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
+ * Return -ENXIO if the folio was not mapped, -EINVAL for other errors.
* If busy or partially completed, return -EAGAIN.
*/
if (cc == UVC_CC_OK)
@@ -231,245 +277,49 @@ static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
return -EAGAIN;
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-
-/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
- *
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
- */
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
-{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
-
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
- struct vm_area_struct *vma;
- bool local_drain = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct page *page;
- pte_t *ptep;
- int rc;
-
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- page = pte_page(*ptep);
- rc = -EAGAIN;
- if (trylock_page(page)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(page_to_phys(page));
- rc = make_page_secure(page, uvcb);
- unlock_page(page);
- }
- }
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
-
- if (rc == -EAGAIN) {
- /*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
- */
- wait_on_page_writeback(page);
- } else if (rc == -EBUSY) {
- /*
- * If we have tried a local drain and the page refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (local_drain) {
- lru_add_drain_all();
- /* We give up here, and let the caller try again */
- return -EAGAIN;
- }
- /*
- * We are here if the page refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
- */
- lru_add_drain();
- local_drain = true;
- /* And now we try again immediately after draining */
- goto again;
- } else if (rc == -ENXIO) {
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
-
- return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
- struct vm_area_struct *vma;
- unsigned long uaddr;
- struct page *page;
- int rc;
-
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = 0;
- /* we take an extra reference here */
- page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
- if (IS_ERR_OR_NULL(page))
- goto out;
- rc = uv_destroy_owned_page(page_to_phys(page));
- /*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
- */
- if (rc)
- rc = uv_convert_owned_from_secure(page_to_phys(page));
- put_page(page);
-out:
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(make_folio_secure);
/*
- * To be called with the page locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the page concurrently. Having 2
- * parallel make_page_accessible is fine, as the UV calls will become a
- * no-op if the page is already exported.
+ * To be called with the folio locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the folio concurrently. Having 2
+ * parallel arch_make_folio_accessible is fine, as the UV calls will become a
+ * no-op if the folio is already exported.
*/
-int arch_make_page_accessible(struct page *page)
+int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
- /* Hugepage cannot be protected, so nothing to do */
- if (PageHuge(page))
+ /* See gmap_make_secure(): large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
return 0;
/*
- * PG_arch_1 is used in 3 places:
- * 1. for kernel page tables during early boot
- * 2. for storage keys of huge pages and KVM
- * 3. As an indication that this page might be secure. This can
+ * PG_arch_1 is used in 2 places:
+ * 1. for storage keys of hugetlb folios and KVM
+ * 2. As an indication that this small folio might be secure. This can
* overindicate, e.g. we set the bit before calling
* convert_to_secure.
- * As secure pages are never huge, all 3 variants can co-exists.
+ * As secure pages are never large folios, both variants can co-exists.
*/
- if (!test_bit(PG_arch_1, &page->flags))
+ if (!test_bit(PG_arch_1, &folio->flags))
return 0;
- rc = uv_pin_shared(page_to_phys(page));
+ rc = uv_pin_shared(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
return 0;
}
- rc = uv_convert_from_secure(page_to_phys(page));
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
return 0;
}
return rc;
}
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -612,12 +462,32 @@ static struct kobj_attribute uv_query_supp_secret_types_attr =
static ssize_t uv_query_max_secrets(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sysfs_emit(buf, "%d\n", uv_info.max_secrets);
+ return sysfs_emit(buf, "%d\n",
+ uv_info.max_assoc_secrets + uv_info.max_retr_secrets);
}
static struct kobj_attribute uv_query_max_secrets_attr =
__ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+static ssize_t uv_query_max_retr_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_retr_secrets_attr =
+ __ATTR(max_retr_secrets, 0444, uv_query_max_retr_secrets, NULL);
+
+static ssize_t uv_query_max_assoc_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_assoc_secrets);
+}
+
+static struct kobj_attribute uv_query_max_assoc_secrets_attr =
+ __ATTR(max_assoc_secrets, 0444, uv_query_max_assoc_secrets, NULL);
+
static struct attribute *uv_query_attrs[] = {
&uv_query_facilities_attr.attr,
&uv_query_feature_indications_attr.attr,
@@ -635,34 +505,91 @@ static struct attribute *uv_query_attrs[] = {
&uv_query_supp_add_secret_pcf_attr.attr,
&uv_query_supp_secret_types_attr.attr,
&uv_query_max_secrets_attr.attr,
+ &uv_query_max_assoc_secrets_attr.attr,
+ &uv_query_max_retr_secrets_attr.attr,
NULL,
};
+static inline struct uv_cb_query_keys uv_query_keys(void)
+{
+ struct uv_cb_query_keys uvcb = {
+ .header.cmd = UVC_CMD_QUERY_KEYS,
+ .header.len = sizeof(uvcb)
+ };
+
+ uv_call(0, (uint64_t)&uvcb);
+ return uvcb;
+}
+
+static inline ssize_t emit_hash(struct uv_key_hash *hash, char *buf, int at)
+{
+ return sysfs_emit_at(buf, at, "%016llx%016llx%016llx%016llx\n",
+ hash->dword[0], hash->dword[1], hash->dword[2], hash->dword[3]);
+}
+
+static ssize_t uv_keys_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_host_key_attr =
+ __ATTR(host_key, 0444, uv_keys_host_key, NULL);
+
+static ssize_t uv_keys_backup_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_BACK_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_backup_host_key_attr =
+ __ATTR(backup_host_key, 0444, uv_keys_backup_host_key, NULL);
+
+static ssize_t uv_keys_all(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(uvcb.key_hashes); i++)
+ len += emit_hash(uvcb.key_hashes + i, buf, len);
+
+ return len;
+}
+
+static struct kobj_attribute uv_keys_all_attr =
+ __ATTR(all, 0444, uv_keys_all, NULL);
+
static struct attribute_group uv_query_attr_group = {
.attrs = uv_query_attrs,
};
+static struct attribute *uv_keys_attrs[] = {
+ &uv_keys_host_key_attr.attr,
+ &uv_keys_backup_host_key_attr.attr,
+ &uv_keys_all_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_keys_attr_group = {
+ .attrs = uv_keys_attrs,
+};
+
static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
- val = prot_virt_guest;
-#endif
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_guest);
}
static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#if IS_ENABLED(CONFIG_KVM)
- val = prot_virt_host;
-#endif
-
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_host);
}
static struct kobj_attribute uv_prot_virt_guest =
@@ -678,9 +605,27 @@ static const struct attribute *uv_prot_virt_attrs[] = {
};
static struct kset *uv_query_kset;
+static struct kset *uv_keys_kset;
static struct kobject *uv_kobj;
-static int __init uv_info_init(void)
+static int __init uv_sysfs_dir_init(const struct attribute_group *grp,
+ struct kset **uv_dir_kset, const char *name)
+{
+ struct kset *kset;
+ int rc;
+
+ kset = kset_create_and_add(name, NULL, uv_kobj);
+ if (!kset)
+ return -ENOMEM;
+ *uv_dir_kset = kset;
+
+ rc = sysfs_create_group(&kset->kobj, grp);
+ if (rc)
+ kset_unregister(kset);
+ return rc;
+}
+
+static int __init uv_sysfs_init(void)
{
int rc = -ENOMEM;
@@ -695,17 +640,16 @@ static int __init uv_info_init(void)
if (rc)
goto out_kobj;
- uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
- if (!uv_query_kset) {
- rc = -ENOMEM;
+ rc = uv_sysfs_dir_init(&uv_query_attr_group, &uv_query_kset, "query");
+ if (rc)
goto out_ind_files;
- }
- rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
- if (!rc)
- return 0;
+ /* Get installed key hashes if available, ignore any errors */
+ if (test_bit_inv(BIT_UVC_CMD_QUERY_KEYS, uv_info.inst_calls_list))
+ uv_sysfs_dir_init(&uv_keys_attr_group, &uv_keys_kset, "keys");
+
+ return 0;
- kset_unregister(uv_query_kset);
out_ind_files:
sysfs_remove_files(uv_kobj, uv_prot_virt_attrs);
out_kobj:
@@ -713,5 +657,131 @@ out_kobj:
kobject_put(uv_kobj);
return rc;
}
-device_initcall(uv_info_init);
-#endif
+device_initcall(uv_sysfs_init);
+
+/*
+ * Find the secret with the secret_id in the provided list.
+ *
+ * Context: might sleep.
+ */
+static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
+ const struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 i;
+
+ for (i = 0; i < list->total_num_secrets; i++) {
+ if (memcmp(secret_id, list->secrets[i].id, UV_SECRET_ID_LEN) == 0) {
+ *secret = list->secrets[i].hdr;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/*
+ * Do the actual search for `uv_get_secret_metadata`.
+ *
+ * Context: might sleep.
+ */
+static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 start_idx = 0;
+ u16 list_rc;
+ int ret;
+
+ do {
+ uv_list_secrets(list, start_idx, &list_rc, NULL);
+ if (list_rc != UVC_RC_EXECUTED && list_rc != UVC_RC_MORE_DATA) {
+ if (list_rc == UVC_RC_INV_CMD)
+ return -ENODEV;
+ else
+ return -EIO;
+ }
+ ret = find_secret_in_page(secret_id, list, secret);
+ if (ret == 0)
+ return ret;
+ start_idx = list->next_secret_idx;
+ } while (list_rc == UVC_RC_MORE_DATA && start_idx < list->next_secret_idx);
+
+ return -ENOENT;
+}
+
+/**
+ * uv_get_secret_metadata() - get secret metadata for a given secret id.
+ * @secret_id: search pattern.
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
+ *
+ * Context: might sleep.
+ *
+ * Return:
+ * * %0: - Found entry; secret->idx and secret->type are valid.
+ * * %ENOENT - No entry found.
+ * * %ENODEV: - Not supported: UV not available or command not available.
+ * * %EIO: - Other unexpected UV error.
+ */
+int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list_item_hdr *secret)
+{
+ struct uv_secret_list *buf;
+ int rc;
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ rc = find_secret(secret_id, buf, secret);
+ kfree(buf);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(uv_get_secret_metadata);
+
+/**
+ * uv_retrieve_secret() - get the secret value for the secret index.
+ * @secret_idx: Secret index for which the secret should be retrieved.
+ * @buf: Buffer to store retrieved secret.
+ * @buf_size: Size of the buffer. The correct buffer size is reported as part of
+ * the result from `uv_get_secret_metadata`.
+ *
+ * Calls the Retrieve Secret UVC and translates the UV return code into an errno.
+ *
+ * Context: might sleep.
+ *
+ * Return:
+ * * %0 - Entry found; buffer contains a valid secret.
+ * * %ENOENT: - No entry found or secret at the index is non-retrievable.
+ * * %ENODEV: - Not supported: UV not available or command not available.
+ * * %EINVAL: - Buffer too small for content.
+ * * %EIO: - Other unexpected UV error.
+ */
+int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size)
+{
+ struct uv_cb_retr_secr uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_RETR_SECRET,
+ .secret_idx = secret_idx,
+ .buf_addr = (u64)buf,
+ .buf_size = buf_size,
+ };
+
+ uv_call_sched(0, (u64)&uvcb);
+
+ switch (uvcb.header.rc) {
+ case UVC_RC_EXECUTED:
+ return 0;
+ case UVC_RC_INV_CMD:
+ return -ENODEV;
+ case UVC_RC_RETR_SECR_STORE_EMPTY:
+ case UVC_RC_RETR_SECR_INV_SECRET:
+ case UVC_RC_RETR_SECR_INV_IDX:
+ return -ENOENT;
+ case UVC_RC_RETR_SECR_BUF_SMALL:
+ return -EINVAL;
+ default:
+ return -EIO;
+ }
+}
+EXPORT_SYMBOL_GPL(uv_retrieve_secret);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index a45b3a4c91db..598b512cde01 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -12,12 +12,15 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/time_namespace.h>
#include <linux/random.h>
#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/alternative.h>
#include <asm/vdso.h>
extern char vdso64_start[], vdso64_end[];
@@ -29,12 +32,6 @@ static union vdso_data_store vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
#ifdef CONFIG_TIME_NS
struct vdso_data *arch_get_vdso_data(void *vvar_page)
{
@@ -210,17 +207,22 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
return addr;
}
-unsigned long vdso_size(void)
+unsigned long vdso_text_size(void)
{
- unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+ unsigned long size;
if (is_compat_task())
- size += vdso32_end - vdso32_start;
+ size = vdso32_end - vdso32_start;
else
- size += vdso64_end - vdso64_start;
+ size = vdso64_end - vdso64_start;
return PAGE_ALIGN(size);
}
+unsigned long vdso_size(void)
+{
+ return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE;
+}
+
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
unsigned long addr = VDSO_BASE;
@@ -245,8 +247,25 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
return pagelist;
}
+static void vdso_apply_alternatives(void)
+{
+ const struct elf64_shdr *alt, *shdr;
+ struct alt_instr *start, *end;
+ const struct elf64_hdr *hdr;
+
+ hdr = (struct elf64_hdr *)vdso64_start;
+ shdr = (void *)hdr + hdr->e_shoff;
+ alt = find_section(hdr, shdr, ".altinstructions");
+ if (!alt)
+ return;
+ start = (void *)hdr + alt->sh_offset;
+ end = (void *)hdr + alt->sh_offset + alt->sh_size;
+ apply_alternatives(start, end);
+}
+
static int __init vdso_init(void)
{
+ vdso_apply_alternatives();
vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
if (IS_ENABLED(CONFIG_COMPAT))
vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index b12a274cbb47..2c5afb88d298 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso
-KCOV_INSTRUMENT := n
-
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso32 = vdso_user_wrapper-32.o note-32.o
@@ -19,8 +17,10 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
KBUILD_AFLAGS_32 += -m31 -s
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
-KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables
LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \
--hash-style=both --build-id=sha1 -melf_s390 -T
@@ -32,19 +32,13 @@ obj-y += vdso32_wrapper.o
targets += vdso32.lds
CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
# Force dependency (incbin is bad)
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
quiet_cmd_vdso_and_check = VDSO $@
cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
+$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) FORCE
$(call if_changed,vdso_and_check)
# strip rule for the .so file
@@ -62,7 +56,7 @@ quiet_cmd_vdso32cc = VDSO32C $@
cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index 65b9513a5a0e..c916c4f73f76 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -16,7 +16,7 @@ SECTIONS
#ifdef CONFIG_TIME_NS
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index ef9832726097..ad206f2068d8 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,16 +1,19 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso
-KCOV_INSTRUMENT := n
-
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
-obj-vdso64 = vdso_user_wrapper.o note.o
-obj-cvdso64 = vdso64_generic.o getcpu.o
-VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
+obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
# Build rules
targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
@@ -24,9 +27,11 @@ KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
KBUILD_AFLAGS_64 += -m64
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64))
KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64))
-KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin
+KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables
ldflags-y := -shared -soname=linux-vdso64.so.1 \
--hash-style=both --build-id=sha1 -T
@@ -37,12 +42,6 @@ obj-y += vdso64_wrapper.o
targets += vdso64.lds
CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
# Force dependency (incbin is bad)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
@@ -50,7 +49,7 @@ quiet_cmd_vdso_and_check = VDSO $@
cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
$(call if_changed,vdso_and_check)
# strip rule for the .so file
@@ -72,7 +71,7 @@ quiet_cmd_vdso64cc = VDSO64C $@
cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h
index 34c7a2312f9d..9e5397e7b590 100644
--- a/arch/s390/kernel/vdso64/vdso.h
+++ b/arch/s390/kernel/vdso64/vdso.h
@@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse
int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 37e2a505e81d..ec42b7d9cb53 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -4,6 +4,7 @@
* library
*/
+#include <asm/vdso/vsyscall.h>
#include <asm/page.h>
#include <asm/vdso.h>
@@ -13,10 +14,11 @@ OUTPUT_ARCH(s390:64-bit)
SECTIONS
{
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+ PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
#ifdef CONFIG_TIME_NS
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -42,6 +44,10 @@ SECTIONS
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
+ . = ALIGN(8);
+ .altinstructions : { *(.altinstructions) }
+ .altinstr_replacement : { *(.altinstr_replacement) }
+
.dynamic : { *(.dynamic) } :text :dynamic
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
@@ -140,6 +146,7 @@ VERSION
__kernel_restart_syscall;
__kernel_rt_sigreturn;
__kernel_sigreturn;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index 57f62596e53b..aa06c85bcbd3 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -6,8 +6,6 @@
#include <asm/dwarf.h>
#include <asm/ptrace.h>
-#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
-
/*
* Older glibc version called vdso without allocating a stackframe. This wrapper
* is just used to allocate a stackframe. See
@@ -15,23 +13,23 @@
* for details.
*/
.macro vdso_func func
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
- aghi %r15,-WRAPPER_FRAME_SIZE
- CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- stg %r14,STACK_FRAME_OVERHEAD(%r15)
+ aghi %r15,-STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
+ CFI_VAL_OFFSET 15,-STACK_FRAME_USER_OVERHEAD
+ stg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
+ CFI_REL_OFFSET 14,__SFVDSO_RETURN_ADDRESS
+ xc __SFUSER_BACKCHAIN(8,%r15),__SFUSER_BACKCHAIN(%r15)
brasl %r14,__s390_vdso_\func
- lg %r14,STACK_FRAME_OVERHEAD(%r15)
- aghi %r15,WRAPPER_FRAME_SIZE
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ lg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
+ CFI_RESTORE 14
+ aghi %r15,STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET STACK_FRAME_USER_OVERHEAD
CFI_RESTORE 15
br %r14
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_func gettimeofday
@@ -40,16 +38,13 @@ vdso_func clock_gettime
vdso_func getcpu
.macro vdso_syscall func,syscall
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
svc \syscall
/* Make sure we notice when a syscall returns, which shouldn't happen */
.word 0
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_syscall restart_syscall,__NR_restart_syscall
diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
new file mode 100644
index 000000000000..09c034c2f853
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/dwarf.h>
+#include <asm/fpu-insn.h>
+
+#define STATE0 %v0
+#define STATE1 %v1
+#define STATE2 %v2
+#define STATE3 %v3
+#define COPY0 %v4
+#define COPY1 %v5
+#define COPY2 %v6
+#define COPY3 %v7
+#define BEPERM %v19
+#define TMP0 %v20
+#define TMP1 %v21
+#define TMP2 %v22
+#define TMP3 %v23
+
+ .section .rodata
+
+ .balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
+ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+ .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
+
+ .text
+/*
+ * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+ CFI_STARTPROC
+ larl %r1,chacha20_constants
+
+ /* COPY0 = "expand 32-byte k" */
+ VL COPY0,0,,%r1
+
+ /* BEPERM = byte selectors for VPERM */
+ ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
+
+ /* COPY1,COPY2 = key */
+ VLM COPY1,COPY2,0,%r3
+
+ /* COPY3 = counter || zero nonce */
+ lg %r3,0(%r4)
+ VZERO COPY3
+ VLVGG COPY3,%r3,0
+
+ lghi %r1,0
+.Lblock:
+ VLR STATE0,COPY0
+ VLR STATE1,COPY1
+ VLR STATE2,COPY2
+ VLR STATE3,COPY3
+
+ lghi %r0,10
+.Ldoubleround:
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
+ VSLDB STATE1,STATE1,STATE1,4
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
+ VSLDB STATE3,STATE3,STATE3,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
+ VSLDB STATE1,STATE1,STATE1,12
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
+ VSLDB STATE3,STATE3,STATE3,4
+ brctg %r0,.Ldoubleround
+
+ /* OUTPUT0 = STATE0 + COPY0 */
+ VAF STATE0,STATE0,COPY0
+ /* OUTPUT1 = STATE1 + COPY1 */
+ VAF STATE1,STATE1,COPY1
+ /* OUTPUT2 = STATE2 + COPY2 */
+ VAF STATE2,STATE2,COPY2
+ /* OUTPUT3 = STATE3 + COPY3 */
+ VAF STATE3,STATE3,COPY3
+
+ ALTERNATIVE \
+ __stringify( \
+ /* Convert STATE to little endian and store to OUTPUT */\
+ VPERM TMP0,STATE0,STATE0,BEPERM; \
+ VPERM TMP1,STATE1,STATE1,BEPERM; \
+ VPERM TMP2,STATE2,STATE2,BEPERM; \
+ VPERM TMP3,STATE3,STATE3,BEPERM; \
+ VSTM TMP0,TMP3,0,%r2), \
+ __stringify( \
+ /* 32 bit wise little endian store to OUTPUT */ \
+ VSTBRF STATE0,0,,%r2; \
+ VSTBRF STATE1,16,,%r2; \
+ VSTBRF STATE2,32,,%r2; \
+ VSTBRF STATE3,48,,%r2; \
+ brcl 0,0), \
+ ALT_FACILITY(148)
+
+ /* ++COPY3.COUNTER */
+ /* alsih %r3,1 */
+ .insn rilu,0xcc0a00000000,%r3,1
+ alcr %r3,%r1
+ VLVGG COPY3,%r3,0
+
+ /* OUTPUT += 64, --NBLOCKS */
+ aghi %r2,64
+ brctg %r5,.Lblock
+
+ /* COUNTER = COPY3.COUNTER */
+ stg %r3,0(%r4)
+
+ /* Zero out potentially sensitive regs */
+ VZERO STATE0
+ VZERO STATE1
+ VZERO STATE2
+ VZERO STATE3
+ VZERO COPY1
+ VZERO COPY2
+
+ /* Early exit if TMP0-TMP3 have not been used */
+ ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
+
+ VZERO TMP0
+ VZERO TMP1
+ VZERO TMP2
+ VZERO TMP3
+
+ br %r14
+ CFI_ENDPROC
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso64/vgetrandom.c
new file mode 100644
index 000000000000..b5268b507fb5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/facility.h>
+#include <uapi/asm-generic/errno.h>
+#include "vdso.h"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (test_facility(129))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
index d296dfc22191..cc8933e04ff7 100644
--- a/arch/s390/kernel/vmcore_info.c
+++ b/arch/s390/kernel/vmcore_info.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/vmcore_info.h>
-#include <asm/abs_lowcore.h>
#include <linux/mm.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
#include <asm/setup.h>
void arch_crash_save_vmcoreinfo(void)
@@ -14,7 +15,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+ vmcoreinfo_append_str("IDENTITYBASE=%lx\n", __identity_base);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ vmcoreinfo_append_str("KERNELOFFPHYS=%lx\n", __kaslr_offset_phys);
abs_lc = get_abs_lowcore();
abs_lc->vmcore_info = paddr_vmcoreinfo_note();
put_abs_lowcore(abs_lc);
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 48de296e8905..ff1ddba96352 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -39,7 +39,7 @@ PHDRS {
SECTIONS
{
- . = 0x100000;
+ . = TEXT_OFFSET;
.text : {
_stext = .; /* Start of text section */
_text = .; /* Text and read-only data */
@@ -52,21 +52,12 @@ SECTIONS
SOFTIRQENTRY_TEXT
FTRACE_HOTPATCH_TRAMPOLINES_TEXT
*(.text.*_indirect_*)
- *(.fixup)
*(.gnu.warning)
. = ALIGN(PAGE_SIZE);
_etext = .; /* End of text section */
} :text = 0x0700
RO_DATA(PAGE_SIZE)
- .data.rel.ro : {
- *(.data.rel.ro .data.rel.ro.*)
- }
- .got : {
- __got_start = .;
- *(.got)
- __got_end = .;
- }
. = ALIGN(PAGE_SIZE);
_sdata = .; /* Start of data section */
@@ -80,6 +71,15 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
+ .data.rel.ro : {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+ .got : {
+ __got_start = .;
+ *(.got)
+ __got_end = .;
+ }
+
RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
.data.rel : {
*(.data.rel*)
@@ -183,39 +183,16 @@ SECTIONS
.amode31.data : {
*(.amode31.data)
}
- . = ALIGN(PAGE_SIZE);
+ . = _samode31 + AMODE31_SIZE;
_eamode31 = .;
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
- PERCPU_SECTION(0x100)
+ RUNTIME_CONST_VARIABLES
-#ifdef CONFIG_PIE_BUILD
- .dynsym ALIGN(8) : {
- __dynsym_start = .;
- *(.dynsym)
- __dynsym_end = .;
- }
- .rela.dyn ALIGN(8) : {
- __rela_dyn_start = .;
- *(.rela*)
- __rela_dyn_end = .;
- }
- .dynamic ALIGN(8) : {
- *(.dynamic)
- }
- .dynstr ALIGN(8) : {
- *(.dynstr)
- }
-#endif
- .hash ALIGN(8) : {
- *(.hash)
- }
- .gnu.hash ALIGN(8) : {
- *(.gnu.hash)
- }
+ PERCPU_SECTION(0x100)
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
@@ -230,7 +207,6 @@ SECTIONS
* it should match struct vmlinux_info
*/
.vmlinux.info 0 (INFO) : {
- QUAD(_stext) /* default_lma */
QUAD(startup_continue) /* entry */
QUAD(__bss_start - _stext) /* image_size */
QUAD(__bss_stop - __bss_start) /* bss_size */
@@ -239,18 +215,14 @@ SECTIONS
QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */
QUAD(__boot_data_preserved_end -
__boot_data_preserved_start) /* bootdata_preserved_size */
-#ifdef CONFIG_PIE_BUILD
- QUAD(__dynsym_start) /* dynsym_start */
- QUAD(__rela_dyn_start) /* rela_dyn_start */
- QUAD(__rela_dyn_end) /* rela_dyn_end */
-#else
QUAD(__got_start) /* got_start */
QUAD(__got_end) /* got_end */
-#endif
QUAD(_eamode31 - _samode31) /* amode31_size */
QUAD(init_mm)
QUAD(swapper_pg_dir)
QUAD(invalid_pg_dir)
+ QUAD(__alt_instructions)
+ QUAD(__alt_instructions_end)
#ifdef CONFIG_KASAN
QUAD(kasan_early_shadow_page)
QUAD(kasan_early_shadow_pte)
@@ -282,12 +254,10 @@ SECTIONS
*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
}
ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
-#ifndef CONFIG_PIE_BUILD
.rela.dyn : {
*(.rela.*) *(.rela_*)
}
ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
-#endif
/* Sections to be discarded */
DISCARDS
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 24a18e5ef6e8..234a0ba30510 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -33,24 +33,17 @@ static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
-static inline u64 get_vtimer(void)
-{
- u64 timer;
-
- asm volatile("stpt %0" : "=Q" (timer));
- return timer;
-}
-
static inline void set_vtimer(u64 expires)
{
+ struct lowcore *lc = get_lowcore();
u64 timer;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
: "=Q" (timer) : "Q" (expires));
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
- S390_lowcore.last_update_timer = expires;
+ lc->system_timer += lc->last_update_timer - timer;
+ lc->last_update_timer = expires;
}
static inline int virt_timer_forward(u64 elapsed)
@@ -125,22 +118,23 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
static int do_account_vtime(struct task_struct *tsk)
{
u64 timer, clock, user, guest, system, hardirq, softirq;
+ struct lowcore *lc = get_lowcore();
- timer = S390_lowcore.last_update_timer;
- clock = S390_lowcore.last_update_clock;
+ timer = lc->last_update_timer;
+ clock = lc->last_update_clock;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" stckf %1" /* Store current tod clock value */
- : "=Q" (S390_lowcore.last_update_timer),
- "=Q" (S390_lowcore.last_update_clock)
+ : "=Q" (lc->last_update_timer),
+ "=Q" (lc->last_update_clock)
: : "cc");
- clock = S390_lowcore.last_update_clock - clock;
- timer -= S390_lowcore.last_update_timer;
+ clock = lc->last_update_clock - clock;
+ timer -= lc->last_update_timer;
if (hardirq_count())
- S390_lowcore.hardirq_timer += timer;
+ lc->hardirq_timer += timer;
else
- S390_lowcore.system_timer += timer;
+ lc->system_timer += timer;
/* Update MT utilization calculation */
if (smp_cpu_mtid &&
@@ -149,16 +143,16 @@ static int do_account_vtime(struct task_struct *tsk)
/* Calculate cputime delta */
user = update_tsk_timer(&tsk->thread.user_timer,
- READ_ONCE(S390_lowcore.user_timer));
+ READ_ONCE(lc->user_timer));
guest = update_tsk_timer(&tsk->thread.guest_timer,
- READ_ONCE(S390_lowcore.guest_timer));
+ READ_ONCE(lc->guest_timer));
system = update_tsk_timer(&tsk->thread.system_timer,
- READ_ONCE(S390_lowcore.system_timer));
+ READ_ONCE(lc->system_timer));
hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
- READ_ONCE(S390_lowcore.hardirq_timer));
+ READ_ONCE(lc->hardirq_timer));
softirq = update_tsk_timer(&tsk->thread.softirq_timer,
- READ_ONCE(S390_lowcore.softirq_timer));
- S390_lowcore.steal_timer +=
+ READ_ONCE(lc->softirq_timer));
+ lc->steal_timer +=
clock - user - guest - system - hardirq - softirq;
/* Push account value */
@@ -184,17 +178,19 @@ static int do_account_vtime(struct task_struct *tsk)
void vtime_task_switch(struct task_struct *prev)
{
+ struct lowcore *lc = get_lowcore();
+
do_account_vtime(prev);
- prev->thread.user_timer = S390_lowcore.user_timer;
- prev->thread.guest_timer = S390_lowcore.guest_timer;
- prev->thread.system_timer = S390_lowcore.system_timer;
- prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
- prev->thread.softirq_timer = S390_lowcore.softirq_timer;
- S390_lowcore.user_timer = current->thread.user_timer;
- S390_lowcore.guest_timer = current->thread.guest_timer;
- S390_lowcore.system_timer = current->thread.system_timer;
- S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
- S390_lowcore.softirq_timer = current->thread.softirq_timer;
+ prev->thread.user_timer = lc->user_timer;
+ prev->thread.guest_timer = lc->guest_timer;
+ prev->thread.system_timer = lc->system_timer;
+ prev->thread.hardirq_timer = lc->hardirq_timer;
+ prev->thread.softirq_timer = lc->softirq_timer;
+ lc->user_timer = current->thread.user_timer;
+ lc->guest_timer = current->thread.guest_timer;
+ lc->system_timer = current->thread.system_timer;
+ lc->hardirq_timer = current->thread.hardirq_timer;
+ lc->softirq_timer = current->thread.softirq_timer;
}
/*
@@ -204,28 +200,29 @@ void vtime_task_switch(struct task_struct *prev)
*/
void vtime_flush(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 steal, avg_steal;
if (do_account_vtime(tsk))
virt_timer_expire();
- steal = S390_lowcore.steal_timer;
- avg_steal = S390_lowcore.avg_steal_timer;
+ steal = lc->steal_timer;
+ avg_steal = lc->avg_steal_timer;
if ((s64) steal > 0) {
- S390_lowcore.steal_timer = 0;
+ lc->steal_timer = 0;
account_steal_time(cputime_to_nsecs(steal));
avg_steal += steal;
}
- S390_lowcore.avg_steal_timer = avg_steal / 2;
+ lc->avg_steal_timer = avg_steal / 2;
}
static u64 vtime_delta(void)
{
- u64 timer = S390_lowcore.last_update_timer;
-
- S390_lowcore.last_update_timer = get_vtimer();
+ struct lowcore *lc = get_lowcore();
+ u64 timer = lc->last_update_timer;
- return timer - S390_lowcore.last_update_timer;
+ lc->last_update_timer = get_cpu_timer();
+ return timer - lc->last_update_timer;
}
/*
@@ -234,12 +231,13 @@ static u64 vtime_delta(void)
*/
void vtime_account_kernel(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 delta = vtime_delta();
if (tsk->flags & PF_VCPU)
- S390_lowcore.guest_timer += delta;
+ lc->guest_timer += delta;
else
- S390_lowcore.system_timer += delta;
+ lc->system_timer += delta;
virt_timer_forward(delta);
}
@@ -249,7 +247,7 @@ void vtime_account_softirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.softirq_timer += delta;
+ get_lowcore()->softirq_timer += delta;
virt_timer_forward(delta);
}
@@ -258,7 +256,7 @@ void vtime_account_hardirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.hardirq_timer += delta;
+ get_lowcore()->hardirq_timer += delta;
virt_timer_forward(delta);
}
diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c
new file mode 100644
index 000000000000..949fdbf0e8b6
--- /dev/null
+++ b/arch/s390/kernel/wti.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for warning track interruption
+ *
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/smpboot.h>
+#include <linux/irq.h>
+#include <uapi/linux/sched/types.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+
+#define WTI_DBF_LEN 64
+
+struct wti_debug {
+ unsigned long missed;
+ unsigned long addr;
+ pid_t pid;
+};
+
+struct wti_state {
+ /* debug data for s390dbf */
+ struct wti_debug dbg;
+ /*
+ * Represents the real-time thread responsible to
+ * acknowledge the warning-track interrupt and trigger
+ * preliminary and postliminary precautions.
+ */
+ struct task_struct *thread;
+ /*
+ * If pending is true, the real-time thread must be scheduled.
+ * If not, a wake up of that thread will remain a noop.
+ */
+ bool pending;
+};
+
+static DEFINE_PER_CPU(struct wti_state, wti_state);
+
+static debug_info_t *wti_dbg;
+
+/*
+ * During a warning-track grace period, interrupts are disabled
+ * to prevent delays of the warning-track acknowledgment.
+ *
+ * Once the CPU is physically dispatched again, interrupts are
+ * re-enabled.
+ */
+
+static void wti_irq_disable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* disable all I/O interrupts */
+ cr6.val &= ~0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void wti_irq_enable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* enable all I/O interrupts */
+ cr6.val |= 0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void store_debug_data(struct wti_state *st)
+{
+ struct pt_regs *regs = get_irq_regs();
+
+ st->dbg.pid = current->pid;
+ st->dbg.addr = 0;
+ if (!user_mode(regs))
+ st->dbg.addr = regs->psw.addr;
+}
+
+static void wti_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct wti_state *st = this_cpu_ptr(&wti_state);
+
+ inc_irq_stat(IRQEXT_WTI);
+ wti_irq_disable();
+ store_debug_data(st);
+ st->pending = true;
+ wake_up_process(st->thread);
+}
+
+static int wti_pending(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ return st->pending;
+}
+
+static void wti_dbf_grace_period(struct wti_state *st)
+{
+ struct wti_debug *wdi = &st->dbg;
+ char buf[WTI_DBF_LEN];
+
+ if (wdi->addr)
+ snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr);
+ else
+ snprintf(buf, sizeof(buf), "%d <user>", wdi->pid);
+ debug_text_event(wti_dbg, 2, buf);
+ wdi->missed++;
+}
+
+static int wti_show(struct seq_file *seq, void *v)
+{
+ struct wti_state *st;
+ int cpu;
+
+ cpus_read_lock();
+ seq_puts(seq, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(seq, "CPU%-8d", cpu);
+ seq_putc(seq, '\n');
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ seq_printf(seq, " %10lu", st->dbg.missed);
+ }
+ seq_putc(seq, '\n');
+ cpus_read_unlock();
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wti);
+
+static void wti_thread_fn(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ st->pending = false;
+ /*
+ * Yield CPU voluntarily to the hypervisor. Control
+ * resumes when hypervisor decides to dispatch CPU
+ * to this LPAR again.
+ */
+ if (diag49c(DIAG49C_SUBC_ACK))
+ wti_dbf_grace_period(st);
+ wti_irq_enable();
+}
+
+static struct smp_hotplug_thread wti_threads = {
+ .store = &wti_state.thread,
+ .thread_should_run = wti_pending,
+ .thread_fn = wti_thread_fn,
+ .thread_comm = "cpuwti/%u",
+ .selfparking = false,
+};
+
+static int __init wti_init(void)
+{
+ struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 };
+ struct dentry *wti_dir;
+ struct wti_state *st;
+ int cpu, rc;
+
+ rc = -EOPNOTSUPP;
+ if (!sclp.has_wti)
+ goto out;
+ rc = smpboot_register_percpu_thread(&wti_threads);
+ if (WARN_ON(rc))
+ goto out;
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param);
+ }
+ rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+ if (rc) {
+ pr_warn("Couldn't request external interrupt 0x1007\n");
+ goto out_thread;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK);
+ rc = diag49c(DIAG49C_SUBC_REG);
+ if (rc) {
+ pr_warn("Failed to register warning track interrupt through DIAG 49C\n");
+ rc = -EOPNOTSUPP;
+ goto out_subclass;
+ }
+ wti_dir = debugfs_create_dir("wti", arch_debugfs_dir);
+ debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops);
+ wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN);
+ if (!wti_dbg) {
+ rc = -ENOMEM;
+ goto out_debug_register;
+ }
+ rc = debug_register_view(wti_dbg, &debug_hex_ascii_view);
+ if (rc)
+ goto out_debug_register;
+ goto out;
+out_debug_register:
+ debug_unregister(wti_dbg);
+out_subclass:
+ irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK);
+ unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+out_thread:
+ smpboot_unregister_percpu_thread(&wti_threads);
+out:
+ return rc;
+}
+late_initcall(wti_init);