summaryrefslogtreecommitdiff
path: root/arch/s390/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/Makefile7
-rw-r--r--arch/s390/kernel/asm-offsets.c3
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/cpacf.c119
-rw-r--r--arch/s390/kernel/debug.c1
-rw-r--r--arch/s390/kernel/diag.c17
-rw-r--r--arch/s390/kernel/dis.c20
-rw-r--r--arch/s390/kernel/early.c38
-rw-r--r--arch/s390/kernel/early_printk.c16
-rw-r--r--arch/s390/kernel/earlypgm.S23
-rw-r--r--arch/s390/kernel/entry.S38
-rw-r--r--arch/s390/kernel/ftrace.c106
-rw-r--r--arch/s390/kernel/ftrace.h2
-rw-r--r--arch/s390/kernel/hiperdispatch.c430
-rw-r--r--arch/s390/kernel/irq.c1
-rw-r--r--arch/s390/kernel/kprobes.c15
-rw-r--r--arch/s390/kernel/mcount.S5
-rw-r--r--arch/s390/kernel/numa.c3
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c5
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c309
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c16
-rw-r--r--arch/s390/kernel/perf_pai_ext.c9
-rw-r--r--arch/s390/kernel/signal.c2
-rw-r--r--arch/s390/kernel/smp.c21
-rw-r--r--arch/s390/kernel/stacktrace.c19
-rw-r--r--arch/s390/kernel/sysinfo.c1
-rw-r--r--arch/s390/kernel/topology.c76
-rw-r--r--arch/s390/kernel/uv.c23
-rw-r--r--arch/s390/kernel/vdso.c26
-rw-r--r--arch/s390/kernel/vdso64/Makefile9
-rw-r--r--arch/s390/kernel/vdso64/vdso.h1
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S7
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S14
-rw-r--r--arch/s390/kernel/vdso64/vgetrandom-chacha.S181
-rw-r--r--arch/s390/kernel/vdso64/vgetrandom.c14
-rw-r--r--arch/s390/kernel/vmlinux.lds.S3
-rw-r--r--arch/s390/kernel/wti.c215
37 files changed, 1387 insertions, 410 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index e47a4be54ff8..48caae8c7e10 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -36,22 +36,23 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
extra-y += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
+obj-$(CONFIG_SYSFS) += cpacf.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
+obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ffa0dd2dbaac..5529248d84fb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -112,8 +112,7 @@ int main(void)
OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
/* software defined lowcore locations 0x200 - 0xdff*/
- OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
- OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+ OFFSET(__LC_SAVE_AREA, lowcore, save_area);
OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
OFFSET(__LC_PCPU, lowcore, pcpu);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 1942e2a9f8db..5a86b9d1da71 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -24,11 +24,11 @@
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
+#include <asm/vdso-symbols.h>
#include <asm/access-regs.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
#include <asm/lowcore.h>
-#include <asm/vdso.h>
#include <asm/fpu.h>
#include "compat_linux.h"
#include "compat_ptrace.h"
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
new file mode 100644
index 000000000000..c8575dbc890d
--- /dev/null
+++ b/arch/s390/kernel/cpacf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "cpacf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <asm/cpacf.h>
+
+#define CPACF_QUERY(name, instruction) \
+static ssize_t name##_query_raw_read(struct file *fp, \
+ struct kobject *kobj, \
+ struct bin_attribute *attr, \
+ char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_mask_t mask; \
+ \
+ if (!cpacf_query(CPACF_##instruction, &mask)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \
+} \
+static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+
+CPACF_QUERY(km, KM);
+CPACF_QUERY(kmc, KMC);
+CPACF_QUERY(kimd, KIMD);
+CPACF_QUERY(klmd, KLMD);
+CPACF_QUERY(kmac, KMAC);
+CPACF_QUERY(pckmo, PCKMO);
+CPACF_QUERY(kmf, KMF);
+CPACF_QUERY(kmctr, KMCTR);
+CPACF_QUERY(kmo, KMO);
+CPACF_QUERY(pcc, PCC);
+CPACF_QUERY(prno, PRNO);
+CPACF_QUERY(kma, KMA);
+CPACF_QUERY(kdsa, KDSA);
+
+#define CPACF_QAI(name, instruction) \
+static ssize_t name##_query_auth_info_raw_read( \
+ struct file *fp, struct kobject *kobj, \
+ struct bin_attribute *attr, char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_qai_t qai; \
+ \
+ if (!cpacf_qai(CPACF_##instruction, &qai)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &qai, \
+ sizeof(qai)); \
+} \
+static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+
+CPACF_QAI(km, KM);
+CPACF_QAI(kmc, KMC);
+CPACF_QAI(kimd, KIMD);
+CPACF_QAI(klmd, KLMD);
+CPACF_QAI(kmac, KMAC);
+CPACF_QAI(pckmo, PCKMO);
+CPACF_QAI(kmf, KMF);
+CPACF_QAI(kmctr, KMCTR);
+CPACF_QAI(kmo, KMO);
+CPACF_QAI(pcc, PCC);
+CPACF_QAI(prno, PRNO);
+CPACF_QAI(kma, KMA);
+CPACF_QAI(kdsa, KDSA);
+
+static struct bin_attribute *cpacf_attrs[] = {
+ &bin_attr_km_query_raw,
+ &bin_attr_kmc_query_raw,
+ &bin_attr_kimd_query_raw,
+ &bin_attr_klmd_query_raw,
+ &bin_attr_kmac_query_raw,
+ &bin_attr_pckmo_query_raw,
+ &bin_attr_kmf_query_raw,
+ &bin_attr_kmctr_query_raw,
+ &bin_attr_kmo_query_raw,
+ &bin_attr_pcc_query_raw,
+ &bin_attr_prno_query_raw,
+ &bin_attr_kma_query_raw,
+ &bin_attr_kdsa_query_raw,
+ &bin_attr_km_query_auth_info_raw,
+ &bin_attr_kmc_query_auth_info_raw,
+ &bin_attr_kimd_query_auth_info_raw,
+ &bin_attr_klmd_query_auth_info_raw,
+ &bin_attr_kmac_query_auth_info_raw,
+ &bin_attr_pckmo_query_auth_info_raw,
+ &bin_attr_kmf_query_auth_info_raw,
+ &bin_attr_kmctr_query_auth_info_raw,
+ &bin_attr_kmo_query_auth_info_raw,
+ &bin_attr_pcc_query_auth_info_raw,
+ &bin_attr_prno_query_auth_info_raw,
+ &bin_attr_kma_query_auth_info_raw,
+ &bin_attr_kdsa_query_auth_info_raw,
+ NULL,
+};
+
+static const struct attribute_group cpacf_attr_grp = {
+ .name = "cpacf",
+ .bin_attrs = cpacf_attrs,
+};
+
+static int __init cpacf_init(void)
+{
+ struct device *cpu_root;
+ int rc = 0;
+
+ cpu_root = bus_get_dev_root(&cpu_subsys);
+ if (cpu_root) {
+ rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp);
+ put_device(cpu_root);
+ }
+ return rc;
+}
+device_initcall(cpacf_init);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index bce50ca75ea7..e62bea9ab21e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -163,7 +163,6 @@ static const struct file_operations debug_file_ops = {
.write = debug_input,
.open = debug_open,
.release = debug_close,
- .llseek = no_llseek,
};
static struct dentry *debug_debugfs_root_entry;
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index ac7b8c8e3133..007e1795670e 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -52,6 +52,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+ [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
@@ -303,3 +304,19 @@ int diag26c(void *req, void *resp, enum diag26c_sc subcode)
return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
}
EXPORT_SYMBOL(diag26c);
+
+int diag49c(unsigned long subcode)
+{
+ int rc;
+
+ diag_stat_inc(DIAG_STAT_X49C);
+ asm volatile(
+ " diag %[subcode],0,0x49c\n"
+ " ipm %[rc]\n"
+ " srl %[rc],28\n"
+ : [rc] "=d" (rc)
+ : [subcode] "d" (subcode)
+ : "cc");
+ return rc;
+}
+EXPORT_SYMBOL(diag49c);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 89dc826a8d2e..94eb8168ea44 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -122,6 +122,7 @@ enum {
U8_32, /* 8 bit unsigned value starting at 32 */
U12_16, /* 12 bit unsigned value starting at 16 */
U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_20, /* 16 bit unsigned value starting at 20 */
U16_32, /* 16 bit unsigned value starting at 32 */
U32_16, /* 32 bit unsigned value starting at 16 */
VX_12, /* Vector index register starting at position 12 */
@@ -184,6 +185,7 @@ static const struct s390_operand operands[] = {
[U8_32] = { 8, 32, 0 },
[U12_16] = { 12, 16, 0 },
[U16_16] = { 16, 16, 0 },
+ [U16_20] = { 16, 20, 0 },
[U16_32] = { 16, 32, 0 },
[U32_16] = { 32, 16, 0 },
[VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
@@ -257,7 +259,6 @@ static const unsigned char formats[][6] = {
[INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 },
[INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 },
- [INSTR_RSY_RDRU] = { R_8, D20_20, B_16, U4_12, 0, 0 },
[INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 },
@@ -300,14 +301,17 @@ static const unsigned char formats[][6] = {
[INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 },
[INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 },
[INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 },
+ [INSTR_VRI_VV0UU] = { V_8, V_12, U8_28, U4_24, 0, 0 },
[INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 },
[INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
[INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
[INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 },
[INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 },
[INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
- [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
+ [INSTR_VRI_VVV0UV] = { V_8, V_12, V_16, V_32, U8_24, 0 },
+ [INSTR_VRR_0V0U] = { V_12, U16_20, 0, 0, 0, 0 },
[INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
+ [INSTR_VRR_0VVU] = { V_12, V_16, U16_20, 0, 0, 0 },
[INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
[INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
@@ -455,21 +459,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
if (separator)
ptr += sprintf(ptr, "%c", separator);
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%r%i", value);
+ ptr += sprintf(ptr, "%%r%u", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%f%i", value);
+ ptr += sprintf(ptr, "%%f%u", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%a%i", value);
+ ptr += sprintf(ptr, "%%a%u", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%c%i", value);
+ ptr += sprintf(ptr, "%%c%u", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%v%i", value);
+ ptr += sprintf(ptr, "%%v%u", value);
else if (operand->flags & OPERAND_PCREL) {
void *pcrel = (void *)((int)value + addr);
ptr += sprintf(ptr, "%px", pcrel);
} else if (operand->flags & OPERAND_SIGNED)
- ptr += sprintf(ptr, "%i", value);
+ ptr += sprintf(ptr, "%i", (int)value);
else
ptr += sprintf(ptr, "%u", value);
if (operand->flags & OPERAND_DISP)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 14d324865e33..62f8f5a750a3 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -7,6 +7,7 @@
#define KMSG_COMPONENT "setup"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/sched/debug.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -175,20 +176,45 @@ static __init void setup_topology(void)
topology_max_mnest = max_mnest;
}
-void __do_early_pgm_check(struct pt_regs *regs)
+void __init __do_early_pgm_check(struct pt_regs *regs)
{
- if (!fixup_exception(regs))
- disabled_wait();
+ struct lowcore *lc = get_lowcore();
+ unsigned long ip;
+
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = lc->trans_exc_code;
+ ip = __rewind_psw(regs->psw, regs->int_code >> 16);
+
+ /* Monitor Event? Might be a warning */
+ if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) {
+ if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN)
+ return;
+ }
+ if (fixup_exception(regs))
+ return;
+ /*
+ * Unhandled exception - system cannot continue but try to get some
+ * helpful messages to the console. Use early_printk() to print
+ * some basic information in case it is too early for printk().
+ */
+ register_early_console();
+ early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n",
+ regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr);
+ show_regs(regs);
+ disabled_wait();
}
static noinline __init void setup_lowcore_early(void)
{
+ struct lowcore *lc = get_lowcore();
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
psw.mask = PSW_KERNEL_BITS;
- get_lowcore()->program_new_psw = psw;
- get_lowcore()->preempt_count = INIT_PREEMPT_COUNT;
+ lc->program_new_psw = psw;
+ lc->preempt_count = INIT_PREEMPT_COUNT;
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
}
static __init void detect_diag9c(void)
@@ -242,6 +268,8 @@ static __init void detect_machine_facilities(void)
}
if (test_facility(194))
get_lowcore()->machine_flags |= MACHINE_FLAG_RDP;
+ if (test_facility(85))
+ get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN;
}
static inline void save_vector_registers(void)
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index d9d53f44008a..cefe020a3be3 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -6,6 +6,7 @@
#include <linux/console.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <asm/setup.h>
#include <asm/sclp.h>
static void sclp_early_write(struct console *con, const char *s, unsigned int len)
@@ -20,6 +21,16 @@ static struct console sclp_early_console = {
.index = -1,
};
+void __init register_early_console(void)
+{
+ if (early_console)
+ return;
+ if (!sclp.has_linemode && !sclp.has_vt220)
+ return;
+ early_console = &sclp_early_console;
+ register_console(early_console);
+}
+
static int __init setup_early_printk(char *buf)
{
if (early_console)
@@ -27,10 +38,7 @@ static int __init setup_early_printk(char *buf)
/* Accept only "earlyprintk" and "earlyprintk=sclp" */
if (buf && !str_has_prefix(buf, "sclp"))
return 0;
- if (!sclp.has_linemode && !sclp.has_vt220)
- return 0;
- early_console = &sclp_early_console;
- register_console(early_console);
+ register_early_console();
return 0;
}
early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
deleted file mode 100644
index c634871f0d90..000000000000
--- a/arch/s390/kernel/earlypgm.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 2006, 2007
- * Author(s): Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-SYM_CODE_START(early_pgm_check_handler)
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- lgr %r2,%r11
- brasl %r14,__do_early_pgm_check
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
- lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- lpswe __LC_RETURN_PSW
-SYM_CODE_END(early_pgm_check_handler)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 749410cfdbc0..d6d5317f768e 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -42,7 +42,7 @@ _LPP_OFFSET = __LC_LPP
.macro LPSWEY address, lpswe
ALTERNATIVE_2 "b \lpswe;nopr", \
- ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \
+ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \
__stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
ALT_LOWCORE
.endm
@@ -264,7 +264,7 @@ EXPORT_SYMBOL(sie_exit)
*/
SYM_CODE_START(system_call)
- STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
GET_LC %r13
stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
@@ -287,7 +287,7 @@ SYM_CODE_START(system_call)
xgr %r10,%r10
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
- mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13)
+ mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
MBEAR %r2,%r13
lgr %r3,%r14
brasl %r14,__do_syscall
@@ -323,7 +323,7 @@ SYM_CODE_END(ret_from_fork)
*/
SYM_CODE_START(pgm_check_handler)
- STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
GET_LC %r13
stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
@@ -338,16 +338,16 @@ SYM_CODE_START(pgm_check_handler)
jnz 2f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13
+2: CHECK_STACK __LC_SAVE_AREA,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
# CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f
+ CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
@@ -398,7 +398,7 @@ SYM_CODE_END(pgm_check_handler)
*/
.macro INT_HANDLER name,lc_old_psw,handler
SYM_CODE_START(\name)
- STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
GET_LC %r13
stckf __LC_INT_CLOCK(%r13)
stpt __LC_SYS_ENTER_TIMER(%r13)
@@ -414,7 +414,7 @@ SYM_CODE_START(\name)
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13
+0: CHECK_STACK __LC_SAVE_AREA,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
@@ -432,7 +432,7 @@ SYM_CODE_START(\name)
xgr %r7,%r7
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
MBEAR %r11,%r13
stmg %r8,%r9,__PT_PSW(%r11)
lgr %r2,%r11 # pass pointer to pt_regs
@@ -599,6 +599,24 @@ SYM_CODE_START(restart_int_handler)
3: j 3b
SYM_CODE_END(restart_int_handler)
+ __INIT
+SYM_CODE_START(early_pgm_check_handler)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ lgr %r2,%r11
+ brasl %r14,__do_early_pgm_check
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(early_pgm_check_handler)
+ __FINIT
+
.section .kprobes.text, "ax"
#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0bd6adc40a34..0b6e62d1d8b8 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -50,10 +50,6 @@ struct ftrace_insn {
s32 disp;
} __packed;
-#ifdef CONFIG_MODULES
-static char *ftrace_plt;
-#endif /* CONFIG_MODULES */
-
static const char *ftrace_shared_hotpatch_trampoline(const char **end)
{
const char *tstart, *tend;
@@ -73,19 +69,20 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
bool ftrace_need_init_nop(void)
{
- return true;
+ return !MACHINE_HAS_SEQ_INSN;
}
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
__ftrace_hotpatch_trampolines_start;
- static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+ static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 };
static struct ftrace_hotpatch_trampoline *trampoline;
struct ftrace_hotpatch_trampoline **next_trampoline;
struct ftrace_hotpatch_trampoline *trampolines_end;
struct ftrace_hotpatch_trampoline tmp;
struct ftrace_insn *insn;
+ struct ftrace_insn old;
const char *shared;
s32 disp;
@@ -99,7 +96,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
if (mod) {
next_trampoline = &mod->arch.next_trampoline;
trampolines_end = mod->arch.trampolines_end;
- shared = ftrace_plt;
}
#endif
@@ -107,8 +103,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
return -ENOMEM;
trampoline = (*next_trampoline)++;
+ if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old)))
+ return -EFAULT;
/* Check for the compiler-generated fentry nop (brcl 0, .). */
- if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+ if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old))))
return -EINVAL;
/* Generate the trampoline. */
@@ -144,8 +142,35 @@ static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrac
return trampoline;
}
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
+static inline struct ftrace_insn
+ftrace_generate_branch_insn(unsigned long ip, unsigned long target)
+{
+ /* brasl r0,target or brcl 0,0 */
+ return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004,
+ .disp = target ? (target - ip) / 2 : 0 };
+}
+
+static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target,
+ unsigned long target)
+{
+ struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target);
+ struct ftrace_insn new = ftrace_generate_branch_insn(ip, target);
+ struct ftrace_insn old;
+
+ if (!IS_ALIGNED(ip, 8))
+ return -EINVAL;
+ if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old)))
+ return -EFAULT;
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *)ip, &new, sizeof(new));
+ return 0;
+}
+
+static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
+ unsigned long old_addr,
+ unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
u64 old;
@@ -161,6 +186,15 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
return 0;
}
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ if (MACHINE_HAS_SEQ_INSN)
+ return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
+ else
+ return ftrace_modify_trampoline_call(rec, old_addr, addr);
+}
+
static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
{
u16 old;
@@ -179,11 +213,14 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- /* Expect brcl 0xf,... */
- return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+ /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */
+ if (MACHINE_HAS_SEQ_INSN)
+ return ftrace_patch_branch_insn(rec->ip, addr, 0);
+ else
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
}
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
@@ -195,6 +232,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
}
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ if (MACHINE_HAS_SEQ_INSN)
+ return ftrace_patch_branch_insn(rec->ip, 0, addr);
+ else
+ return ftrace_make_trampoline_call(rec, addr);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
ftrace_func = func;
@@ -215,25 +260,6 @@ void ftrace_arch_code_modify_post_process(void)
text_poke_sync_lock();
}
-#ifdef CONFIG_MODULES
-
-static int __init ftrace_plt_init(void)
-{
- const char *start, *end;
-
- ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
- if (!ftrace_plt)
- panic("cannot allocate ftrace plt\n");
-
- start = ftrace_shared_hotpatch_trampoline(&end);
- memcpy(ftrace_plt, start, end - start);
- set_memory_rox((unsigned long)ftrace_plt, 1);
- return 0;
-}
-device_initcall(ftrace_plt_init);
-
-#endif /* CONFIG_MODULES */
-
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* Hook the return address and push it in the stack of return addresses
@@ -264,26 +290,14 @@ NOKPROBE_SYMBOL(prepare_ftrace_return);
*/
int ftrace_enable_ftrace_graph_caller(void)
{
- int rc;
-
/* Expect brc 0xf,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
+ return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
}
int ftrace_disable_ftrace_graph_caller(void)
{
- int rc;
-
/* Expect brc 0x0,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
+ return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
index 7f75a9616406..23337065f402 100644
--- a/arch/s390/kernel/ftrace.h
+++ b/arch/s390/kernel/ftrace.h
@@ -18,7 +18,5 @@ extern const char ftrace_shared_hotpatch_trampoline_br[];
extern const char ftrace_shared_hotpatch_trampoline_br_end[];
extern const char ftrace_shared_hotpatch_trampoline_exrl[];
extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
-extern const char ftrace_plt_template[];
-extern const char ftrace_plt_template_end[];
#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
new file mode 100644
index 000000000000..2a99a216ab62
--- /dev/null
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "hd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+/*
+ * Hiperdispatch:
+ * Dynamically calculates the optimum number of high capacity COREs
+ * by considering the state the system is in. When hiperdispatch decides
+ * that a capacity update is necessary, it schedules a topology update.
+ * During topology updates the CPU capacities are always re-adjusted.
+ *
+ * There is two places where CPU capacities are being accessed within
+ * hiperdispatch.
+ * -> hiperdispatch's reoccuring work function reads CPU capacities to
+ * determine high capacity CPU count.
+ * -> during a topology update hiperdispatch's adjustment function
+ * updates CPU capacities.
+ * These two can run on different CPUs in parallel which can cause
+ * hiperdispatch to make wrong decisions. This can potentially cause
+ * some overhead by leading to extra rebuild_sched_domains() calls
+ * for correction. Access to capacities within hiperdispatch has to be
+ * serialized to prevent the overhead.
+ *
+ * Hiperdispatch decision making revolves around steal time.
+ * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time
+ * crosses the threshold value hiperdispatch falls back to giving high
+ * capacities to entitled CPUs. When steal time drops below the
+ * threshold boundary, hiperdispatch utilizes all CPUs by giving all
+ * of them high capacity.
+ *
+ * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread
+ * performance. Comparing the throughput of;
+ * - single CORE, with N threads, running N tasks
+ * - N separate COREs running N tasks,
+ * using individual COREs for individual tasks yield better
+ * performance. This performance difference is roughly ~30% (can change
+ * between machine generations)
+ *
+ * Hiperdispatch tries to hint scheduler to use individual COREs for
+ * each task, as long as steal time on those COREs are less than 30%,
+ * therefore delaying the throughput loss caused by using SMP threads.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kstrtox.h>
+#include <linux/ktime.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <asm/hiperdispatch.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hiperdispatch.h>
+
+#define HD_DELAY_FACTOR (4)
+#define HD_DELAY_INTERVAL (HZ / 4)
+#define HD_STEAL_THRESHOLD 30
+#define HD_STEAL_AVG_WEIGHT 16
+
+static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */
+static cpumask_t hd_vmvl_cpumask; /* Mask containing vertical medium and low CPUs */
+static int hd_high_capacity_cores; /* Current CORE count with high capacity */
+static int hd_entitled_cores; /* Total vertical high and medium CORE count */
+static int hd_online_cores; /* Current online CORE count */
+
+static unsigned long hd_previous_steal; /* Previous iteration's CPU steal timer total */
+static unsigned long hd_high_time; /* Total time spent while all cpus have high capacity */
+static unsigned long hd_low_time; /* Total time spent while vl cpus have low capacity */
+static atomic64_t hd_adjustments; /* Total occurrence count of hiperdispatch adjustments */
+
+static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD;
+static unsigned int hd_delay_factor = HD_DELAY_FACTOR;
+static int hd_enabled;
+
+static void hd_capacity_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
+
+static int hd_set_hiperdispatch_mode(int enable)
+{
+ if (!MACHINE_HAS_TOPOLOGY)
+ enable = 0;
+ if (hd_enabled == enable)
+ return 0;
+ hd_enabled = enable;
+ return 1;
+}
+
+void hd_reset_state(void)
+{
+ cpumask_clear(&hd_vl_coremask);
+ cpumask_clear(&hd_vmvl_cpumask);
+ hd_entitled_cores = 0;
+ hd_online_cores = 0;
+}
+
+void hd_add_core(int cpu)
+{
+ const struct cpumask *siblings;
+ int polarization;
+
+ hd_online_cores++;
+ polarization = smp_cpu_get_polarization(cpu);
+ siblings = topology_sibling_cpumask(cpu);
+ switch (polarization) {
+ case POLARIZATION_VH:
+ hd_entitled_cores++;
+ break;
+ case POLARIZATION_VM:
+ hd_entitled_cores++;
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ case POLARIZATION_VL:
+ cpumask_set_cpu(cpu, &hd_vl_coremask);
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ }
+}
+
+/* Serialize update and read operations of debug counters. */
+static DEFINE_MUTEX(hd_counter_mutex);
+
+static void hd_update_times(void)
+{
+ static ktime_t prev;
+ ktime_t now;
+
+ /*
+ * Check if hiperdispatch is active, if not set the prev to 0.
+ * This way it is possible to differentiate the first update iteration after
+ * enabling hiperdispatch.
+ */
+ if (hd_entitled_cores == 0 || hd_enabled == 0) {
+ prev = ktime_set(0, 0);
+ return;
+ }
+ now = ktime_get();
+ if (ktime_after(prev, 0)) {
+ if (hd_high_capacity_cores == hd_online_cores)
+ hd_high_time += ktime_ms_delta(now, prev);
+ else
+ hd_low_time += ktime_ms_delta(now, prev);
+ }
+ prev = now;
+}
+
+static void hd_update_capacities(void)
+{
+ int cpu, upscaling_cores;
+ unsigned long capacity;
+
+ upscaling_cores = hd_high_capacity_cores - hd_entitled_cores;
+ capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW;
+ hd_high_capacity_cores = hd_entitled_cores;
+ for_each_cpu(cpu, &hd_vl_coremask) {
+ smp_set_core_capacity(cpu, capacity);
+ if (capacity != CPU_CAPACITY_HIGH)
+ continue;
+ hd_high_capacity_cores++;
+ upscaling_cores--;
+ if (upscaling_cores == 0)
+ capacity = CPU_CAPACITY_LOW;
+ }
+}
+
+void hd_disable_hiperdispatch(void)
+{
+ cancel_delayed_work_sync(&hd_capacity_work);
+ hd_high_capacity_cores = hd_online_cores;
+ hd_previous_steal = 0;
+}
+
+int hd_enable_hiperdispatch(void)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ mutex_unlock(&hd_counter_mutex);
+ if (hd_enabled == 0)
+ return 0;
+ if (hd_entitled_cores == 0)
+ return 0;
+ if (hd_online_cores <= hd_entitled_cores)
+ return 0;
+ mod_delayed_work(system_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor);
+ hd_update_capacities();
+ return 1;
+}
+
+static unsigned long hd_steal_avg(unsigned long new)
+{
+ static unsigned long steal;
+
+ steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT;
+ return steal;
+}
+
+static unsigned long hd_calculate_steal_percentage(void)
+{
+ unsigned long time_delta, steal_delta, steal, percentage;
+ static ktime_t prev;
+ int cpus, cpu;
+ ktime_t now;
+
+ cpus = 0;
+ steal = 0;
+ percentage = 0;
+ for_each_cpu(cpu, &hd_vmvl_cpumask) {
+ steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+ cpus++;
+ }
+ /*
+ * If there is no vertical medium and low CPUs steal time
+ * is 0 as vertical high CPUs shouldn't experience steal time.
+ */
+ if (cpus == 0)
+ return percentage;
+ now = ktime_get();
+ time_delta = ktime_to_ns(ktime_sub(now, prev));
+ if (steal > hd_previous_steal && hd_previous_steal != 0) {
+ steal_delta = (steal - hd_previous_steal) * 100 / time_delta;
+ percentage = steal_delta / cpus;
+ }
+ hd_previous_steal = steal;
+ prev = now;
+ return percentage;
+}
+
+static void hd_capacity_work_fn(struct work_struct *work)
+{
+ unsigned long steal_percentage, new_cores;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ /*
+ * If online cores are less or equal to entitled cores hiperdispatch
+ * does not need to make any adjustments, call a topology update to
+ * disable hiperdispatch.
+ * Normally this check is handled on topology update, but during cpu
+ * unhotplug, topology and cpu mask updates are done in reverse
+ * order, causing hd_enable_hiperdispatch() to get stale data.
+ */
+ if (hd_online_cores <= hd_entitled_cores) {
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return;
+ }
+ steal_percentage = hd_steal_avg(hd_calculate_steal_percentage());
+ if (steal_percentage < hd_steal_threshold)
+ new_cores = hd_online_cores;
+ else
+ new_cores = hd_entitled_cores;
+ if (hd_high_capacity_cores != new_cores) {
+ trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores);
+ hd_high_capacity_cores = new_cores;
+ atomic64_inc(&hd_adjustments);
+ topology_schedule_update();
+ }
+ trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores);
+ mutex_unlock(&smp_cpu_state_mutex);
+ schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL);
+}
+
+static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int hiperdispatch;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &hiperdispatch,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ hiperdispatch = hd_enabled;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ mutex_lock(&smp_cpu_state_mutex);
+ if (hd_set_hiperdispatch_mode(hiperdispatch))
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return 0;
+}
+
+static struct ctl_table hiperdispatch_ctl_table[] = {
+ {
+ .procname = "hiperdispatch",
+ .mode = 0644,
+ .proc_handler = hiperdispatch_ctl_handler,
+ },
+};
+
+static ssize_t hd_steal_threshold_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_steal_threshold);
+}
+
+static ssize_t hd_steal_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val > 100)
+ return -ERANGE;
+ hd_steal_threshold = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_steal_threshold);
+
+static ssize_t hd_delay_factor_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_delay_factor);
+}
+
+static ssize_t hd_delay_factor_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (!val)
+ return -ERANGE;
+ hd_delay_factor = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_delay_factor);
+
+static struct attribute *hd_attrs[] = {
+ &dev_attr_hd_steal_threshold.attr,
+ &dev_attr_hd_delay_factor.attr,
+ NULL,
+};
+
+static const struct attribute_group hd_attr_group = {
+ .name = "hiperdispatch",
+ .attrs = hd_attrs,
+};
+
+static int hd_greedy_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_high_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n");
+
+static int hd_conservative_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_low_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n");
+
+static int hd_adjustment_count_get(void *unused, u64 *val)
+{
+ *val = atomic64_read(&hd_adjustments);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n");
+
+static void __init hd_create_debugfs_counters(void)
+{
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir);
+ debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops);
+ debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops);
+ debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops);
+}
+
+static void __init hd_create_attributes(void)
+{
+ struct device *dev;
+
+ dev = bus_get_dev_root(&cpu_subsys);
+ if (!dev)
+ return;
+ if (sysfs_create_group(&dev->kobj, &hd_attr_group))
+ pr_warn("Unable to create hiperdispatch attribute group\n");
+ put_device(dev);
+}
+
+static int __init hd_init(void)
+{
+ if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) {
+ hd_set_hiperdispatch_mode(1);
+ topology_schedule_update();
+ }
+ if (!register_sysctl("s390", hiperdispatch_ctl_table))
+ pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n");
+ hd_create_debugfs_counters();
+ hd_create_attributes();
+ return 0;
+}
+late_initcall(hd_init);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 1af5a08d72ab..2639a3d12736 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -76,6 +76,7 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"},
{.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
{.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
{.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 05c83505e979..6295faf0987d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -21,6 +21,7 @@
#include <linux/hardirq.h>
#include <linux/ftrace.h>
#include <linux/execmem.h>
+#include <asm/text-patching.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
@@ -152,7 +153,12 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (MACHINE_HAS_SEQ_INSN) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
@@ -160,7 +166,12 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (MACHINE_HAS_SEQ_INSN) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index ae4d4fd9afcd..7e267ef63a7f 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,6 +9,7 @@
#include <asm/ftrace.h>
#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
+#include <asm/march.h>
#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
@@ -88,7 +89,7 @@ SYM_CODE_START(ftrace_caller)
SYM_CODE_END(ftrace_caller)
SYM_CODE_START(ftrace_common)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
aghik %r2,%r0,-MCOUNT_INSN_SIZE
lgrl %r4,function_trace_op
lgrl %r1,ftrace_func
@@ -115,7 +116,7 @@ SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
.Lftrace_graph_caller_end:
#endif
lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
locgrz %r1,%r0
#else
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 23ab9f02f278..ddc1448ea2e1 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -14,9 +14,6 @@
#include <linux/node.h>
#include <asm/numa.h>
-struct pglist_data *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
void __init numa_setup(void)
{
int nid;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 6968be98af11..e2e0aa463fbd 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -22,6 +22,10 @@
#include <asm/hwctrset.h>
#include <asm/debug.h>
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
+#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */
+
enum cpumf_ctr_set {
CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
@@ -1694,7 +1698,6 @@ static const struct file_operations cfset_fops = {
.release = cfset_release,
.unlocked_ioctl = cfset_ioctl,
.compat_ioctl = cfset_ioctl,
- .llseek = no_llseek
};
static struct miscdevice cfset_dev = {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 736c1d9632dd..5b765e3ccf0c 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -24,6 +24,22 @@
#include <asm/timex.h>
#include <linux/io.h>
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */
+
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPL_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
+
/* Minimum number of sample-data-block-tables:
* At least one table is required for the sampling buffer structure.
* A single table contains up to 511 pointers to sample-data-blocks.
@@ -113,17 +129,6 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
return USEC_PER_SEC * qsi->cpu_speed / rate;
}
-/* Return TOD timestamp contained in an trailer entry */
-static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
-{
- /* TOD in STCKE format */
- if (te->header.t)
- return *((unsigned long long *)&te->timestamp[1]);
-
- /* TOD in STCK format */
- return *((unsigned long long *)&te->timestamp[0]);
-}
-
/* Return pointer to trailer entry of an sample data block */
static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
{
@@ -154,12 +159,12 @@ static inline unsigned long *get_next_sdbt(unsigned long *s)
/*
* sf_disable() - Switch off sampling facility
*/
-static int sf_disable(void)
+static void sf_disable(void)
{
struct hws_lsctl_request_block sreq;
memset(&sreq, 0, sizeof(sreq));
- return lsctl(&sreq);
+ lsctl(&sreq);
}
/*
@@ -208,8 +213,6 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
}
}
- debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
- (unsigned long)sfb->sdbt);
memset(sfb, 0, sizeof(*sfb));
}
@@ -265,10 +268,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* the sampling buffer origin.
*/
if (sfb->sdbt != get_next_sdbt(tail)) {
- debug_sprintf_event(sfdbg, 3, "%s: "
- "sampling buffer is not linked: origin %#lx"
- " tail %#lx\n", __func__,
- (unsigned long)sfb->sdbt,
+ debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n",
+ __func__, (unsigned long)sfb->sdbt,
(unsigned long)tail);
return -EINVAL;
}
@@ -318,9 +319,6 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
*tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
- debug_sprintf_event(sfdbg, 4, "%s: new buffer"
- " settings: sdbt %lu sdb %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
return rc;
}
@@ -357,15 +355,8 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
/* Allocate requested number of sample-data-blocks */
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
- if (rc) {
+ if (rc)
free_sampling_buffer(sfb);
- debug_sprintf_event(sfdbg, 4, "%s: "
- "realloc_sampling_buffer failed with rc %i\n",
- __func__, rc);
- } else
- debug_sprintf_event(sfdbg, 4,
- "%s: tear %#lx dear %#lx\n", __func__,
- (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
return rc;
}
@@ -377,8 +368,8 @@ static void sfb_set_limits(unsigned long min, unsigned long max)
CPUM_SF_MAX_SDB = max;
memset(&si, 0, sizeof(si));
- if (!qsi(&si))
- CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+ qsi(&si);
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
}
static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
@@ -397,12 +388,6 @@ static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
return 0;
}
-static int sfb_has_pending_allocs(struct sf_buffer *sfb,
- struct hw_perf_event *hwc)
-{
- return sfb_pending_allocs(sfb, hwc) > 0;
-}
-
static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
{
/* Limit the number of SDBs to not exceed the maximum */
@@ -426,7 +411,6 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
{
unsigned long n_sdb, freq;
- size_t sample_size;
/* Calculate sampling buffers using 4K pages
*
@@ -457,7 +441,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
* ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
* to 511 SDBs).
*/
- sample_size = sizeof(struct hws_basic_entry);
freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
@@ -473,12 +456,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
if (sf_buffer_available(cpuhw))
return 0;
- debug_sprintf_event(sfdbg, 3,
- "%s: rate %lu f %lu sdb %lu/%lu"
- " sample_size %lu cpuhw %p\n", __func__,
- SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
- sample_size, cpuhw);
-
return alloc_sampling_buffer(&cpuhw->sfb,
sfb_pending_allocs(&cpuhw->sfb, hwc));
}
@@ -535,8 +512,6 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
if (num)
sfb_account_allocs(num, hwc);
- debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
- __func__, OVERFLOW_REG(hwc), ratio, num);
OVERFLOW_REG(hwc) = 0;
}
@@ -554,13 +529,11 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
static void extend_sampling_buffer(struct sf_buffer *sfb,
struct hw_perf_event *hwc)
{
- unsigned long num, num_old;
- int rc;
+ unsigned long num;
num = sfb_pending_allocs(sfb, hwc);
if (!num)
return;
- num_old = sfb->num_sdb;
/* Disable the sampling facility to reset any states and also
* clear pending measurement alerts.
@@ -572,51 +545,33 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
* called by perf. Because this is a reallocation, it is fine if the
* new SDB-request cannot be satisfied immediately.
*/
- rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
- if (rc)
- debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
- __func__, rc);
-
- if (sfb_has_pending_allocs(sfb, hwc))
- debug_sprintf_event(sfdbg, 5, "%s: "
- "req %lu alloc %lu remaining %lu\n",
- __func__, num, sfb->num_sdb - num_old,
- sfb_pending_allocs(sfb, hwc));
+ realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
}
/* Number of perf events counting hardware events */
-static atomic_t num_events;
+static refcount_t num_events;
/* Used to avoid races in calling reserve/release_cpumf_hardware */
static DEFINE_MUTEX(pmc_reserve_mutex);
#define PMC_INIT 0
#define PMC_RELEASE 1
-#define PMC_FAILURE 2
static void setup_pmc_cpu(void *flags)
{
- struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
- int err = 0;
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
switch (*((int *)flags)) {
case PMC_INIT:
- memset(cpusf, 0, sizeof(*cpusf));
- err = qsi(&cpusf->qsi);
- if (err)
- break;
- cpusf->flags |= PMU_F_RESERVED;
- err = sf_disable();
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ qsi(&cpuhw->qsi);
+ cpuhw->flags |= PMU_F_RESERVED;
+ sf_disable();
break;
case PMC_RELEASE:
- cpusf->flags &= ~PMU_F_RESERVED;
- err = sf_disable();
- if (!err)
- deallocate_buffers(cpusf);
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ sf_disable();
+ deallocate_buffers(cpuhw);
break;
}
- if (err) {
- *((int *)flags) |= PMC_FAILURE;
- pr_err("Switching off the sampling facility failed with rc %i\n", err);
- }
}
static void release_pmc_hardware(void)
@@ -627,27 +582,19 @@ static void release_pmc_hardware(void)
on_each_cpu(setup_pmc_cpu, &flags, 1);
}
-static int reserve_pmc_hardware(void)
+static void reserve_pmc_hardware(void)
{
int flags = PMC_INIT;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- if (flags & PMC_FAILURE) {
- release_pmc_hardware();
- return -ENODEV;
- }
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
}
static void hw_perf_event_destroy(struct perf_event *event)
{
/* Release PMC if this is the last perf event */
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- release_pmc_hardware();
+ if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) {
+ release_pmc_hardware();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -751,9 +698,6 @@ static unsigned long getrate(bool freq, unsigned long sample,
*/
if (sample_rate_to_freq(si, rate) >
sysctl_perf_event_sample_rate) {
- debug_sprintf_event(sfdbg, 1, "%s: "
- "Sampling rate exceeds maximum "
- "perf sample rate\n", __func__);
rate = 0;
}
}
@@ -798,9 +742,6 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
attr->sample_period = rate;
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
- debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
- __func__, event->cpu, event->attr.sample_period,
- event->attr.freq, SAMPLE_FREQ_MODE(hwc));
return 0;
}
@@ -810,23 +751,17 @@ static int __hw_perf_event_init(struct perf_event *event)
struct hws_qsi_info_block si;
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
- int cpu, err;
+ int cpu, err = 0;
/* Reserve CPU-measurement sampling facility */
- err = 0;
- if (!atomic_inc_not_zero(&num_events)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
- err = -EBUSY;
- else
- atomic_inc(&num_events);
- mutex_unlock(&pmc_reserve_mutex);
+ mutex_lock(&pmc_reserve_mutex);
+ if (!refcount_inc_not_zero(&num_events)) {
+ reserve_pmc_hardware();
+ refcount_set(&num_events, 1);
}
+ mutex_unlock(&pmc_reserve_mutex);
event->destroy = hw_perf_event_destroy;
- if (err)
- goto out;
-
/* Access per-CPU sampling information (query sampling info) */
/*
* The event->cpu value can be -1 to count on every CPU, for example,
@@ -838,9 +773,9 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
memset(&si, 0, sizeof(si));
cpuhw = NULL;
- if (event->cpu == -1)
+ if (event->cpu == -1) {
qsi(&si);
- else {
+ } else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
*/
@@ -881,10 +816,6 @@ static int __hw_perf_event_init(struct perf_event *event)
if (err)
goto out;
- /* Initialize sample data overflow accounting */
- hwc->extra_reg.reg = REG_OVERFLOW;
- OVERFLOW_REG(hwc) = 0;
-
/* Use AUX buffer. No need to allocate it by ourself */
if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
return 0;
@@ -1007,7 +938,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
extend_sampling_buffer(&cpuhw->sfb, hwc);
}
/* Rate may be adjusted with ioctl() */
- cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+ cpuhw->lsctl.interval = SAMPL_RATE(hwc);
}
/* (Re)enable the PMU and sampling facility */
@@ -1023,12 +954,6 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
/* Load current program parameter */
lpp(&get_lowcore()->lpp);
-
- debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
- "interval %#lx tear %#lx dear %#lx\n", __func__,
- cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
- cpuhw->lsctl.cd, cpuhw->lsctl.interval,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
}
static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1055,21 +980,18 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
return;
}
- /* Save state of TEAR and DEAR register contents */
- err = qsi(&si);
- if (!err) {
- /* TEAR/DEAR values are valid only if the sampling facility is
- * enabled. Note that cpumsf_pmu_disable() might be called even
- * for a disabled sampling facility because cpumsf_pmu_enable()
- * controls the enable/disable state.
- */
- if (si.es) {
- cpuhw->lsctl.tear = si.tear;
- cpuhw->lsctl.dear = si.dear;
- }
- } else
- debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
- __func__, err);
+ /*
+ * Save state of TEAR and DEAR register contents.
+ * TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ qsi(&si);
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
cpuhw->flags &= ~PMU_F_ENABLED;
}
@@ -1235,11 +1157,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
/* Count discarded samples */
*overflow += 1;
} else {
- debug_sprintf_event(sfdbg, 4,
- "%s: Found unknown"
- " sampling data entry: te->f %i"
- " basic.def %#4x (%p)\n", __func__,
- te->header.f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1284,7 +1201,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
* AUX buffer is used when in diagnostic sampling mode.
* No perf events/samples are created.
*/
- if (SAMPL_DIAG_MODE(&event->hw))
+ if (SAMPL_DIAG_MODE(hwc))
return;
sdbt = (unsigned long *)TEAR_REG(hwc);
@@ -1309,13 +1226,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
*/
sampl_overflow += te->header.overflow;
- /* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
- "overflow %llu timestamp %#llx\n",
- __func__, sdb, (unsigned long)sdbt,
- te->header.overflow,
- (te->header.f) ? trailer_timestamp(te) : 0ULL);
-
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
* is stopped and remaining samples will be discarded.
@@ -1340,7 +1250,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sdbt = get_next_sdbt(sdbt);
/* Update event hardware registers */
- TEAR_REG(hwc) = (unsigned long) sdbt;
+ TEAR_REG(hwc) = (unsigned long)sdbt;
/* Stop processing sample-data if all samples of the current
* sample-data-block were flushed even if it was not full.
@@ -1362,19 +1272,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
* are dropped.
* Slightly increase the interval to avoid hitting this limit.
*/
- if (event_overflow) {
+ if (event_overflow)
SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
- debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
- __func__,
- DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
- }
-
- if (sampl_overflow || event_overflow)
- debug_sprintf_event(sfdbg, 4, "%s: "
- "overflows: sample %llu event %llu"
- " total %llu num_sdb %llu\n",
- __func__, sampl_overflow, event_overflow,
- OVERFLOW_REG(hwc), num_sdb);
}
static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
@@ -1442,9 +1341,6 @@ static void aux_output_end(struct perf_output_handle *handle)
/* Remove alert indicators in the buffer */
te = aux_sdb_trailer(aux, aux->alert_mark);
te->header.a = 0;
-
- debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
- __func__, i, range_scan, aux->head);
}
/*
@@ -1463,7 +1359,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
unsigned long range, i, range_scan, idx, head, base, offset;
struct hws_trailer_entry *te;
- if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+ if (handle->head & ~PAGE_MASK)
return -EINVAL;
aux->head = handle->head >> PAGE_SHIFT;
@@ -1475,10 +1371,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
* SDBs between aux->head and aux->empty_mark are already ready
* for new data. range_scan is num of SDBs not within them.
*/
- debug_sprintf_event(sfdbg, 6,
- "%s: range %ld head %ld alert %ld empty %ld\n",
- __func__, range, aux->head, aux->alert_mark,
- aux->empty_mark);
if (range > aux_sdb_num_empty(aux)) {
range_scan = range - aux_sdb_num_empty(aux);
idx = aux->empty_mark + 1;
@@ -1504,12 +1396,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
- "index %ld tear %#lx dear %#lx\n", __func__,
- aux->head, aux->alert_mark, aux->empty_mark,
- head / CPUM_SF_SDB_PER_TABLE,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
-
return 0;
}
@@ -1571,14 +1457,11 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
- unsigned long i, range_scan, idx, idx_old;
union hws_trailer_header old, prev, new;
+ unsigned long i, range_scan, idx;
unsigned long long orig_overflow;
struct hws_trailer_entry *te;
- debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
- "empty %ld\n", __func__, range, aux->head,
- aux->alert_mark, aux->empty_mark);
if (range <= aux_sdb_num_empty(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
@@ -1601,7 +1484,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
* indicator fall into this range, set it.
*/
range_scan = range - aux_sdb_num_empty(aux);
- idx_old = idx = aux->empty_mark + 1;
+ idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
prev.val = READ_ONCE_ALIGNED_128(te->header.val);
@@ -1623,9 +1506,6 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
/* Update empty_mark to new position */
aux->empty_mark = aux->head + range - 1;
- debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
- "empty %ld\n", __func__, range_scan, idx_old,
- idx - 1, aux->empty_mark);
return true;
}
@@ -1642,12 +1522,12 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
unsigned long num_sdb;
aux = perf_get_aux(handle);
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Inform user space new data arrived */
size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
- debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+ debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__,
size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
@@ -1661,7 +1541,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
num_sdb);
break;
}
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Update head and alert_mark to new position */
@@ -1681,23 +1561,11 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
"pages to overflow\n", aux->sfb.num_sdb);
- debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
- "overflow %lld\n", __func__,
- aux->head, range, overflow);
} else {
size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "already full, try another\n",
- __func__,
- aux->head, aux->alert_mark);
}
}
-
- if (done)
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "empty %ld\n", __func__, aux->head,
- aux->alert_mark, aux->empty_mark);
}
/*
@@ -1719,8 +1587,6 @@ static void aux_buffer_free(void *data)
kfree(aux->sdbt_index);
kfree(aux->sdb_index);
kfree(aux);
-
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
}
static void aux_sdb_init(unsigned long sdb)
@@ -1828,9 +1694,6 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
*/
aux->empty_mark = sfb->num_sdb - 1;
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
-
return aux;
no_sdbt:
@@ -1863,8 +1726,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
memset(&si, 0, sizeof(si));
if (event->cpu == -1) {
- if (qsi(&si))
- return -ENODEV;
+ qsi(&si);
} else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
@@ -1874,7 +1736,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
si = cpuhw->qsi;
}
- do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+ do_freq = !!SAMPL_FREQ_MODE(&event->hw);
rate = getrate(do_freq, value, &si);
if (!rate)
return -EINVAL;
@@ -1882,10 +1744,6 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
event->attr.sample_period = rate;
SAMPL_RATE(&event->hw) = rate;
hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
- debug_sprintf_event(sfdbg, 4, "%s:"
- " cpu %d value %#llx period %#llx freq %d\n",
- __func__, event->cpu, value,
- event->attr.sample_period, do_freq);
return 0;
}
@@ -1896,12 +1754,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ if (!(event->hw.state & PERF_HES_STOPPED))
return;
-
- if (flags & PERF_EF_RELOAD)
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
perf_pmu_disable(event->pmu);
event->hw.state = 0;
cpuhw->lsctl.cs = 1;
@@ -1936,7 +1790,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
struct aux_buffer *aux;
- int err;
+ int err = 0;
if (cpuhw->flags & PMU_F_IN_USE)
return -EAGAIN;
@@ -1944,7 +1798,6 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
return -EINVAL;
- err = 0;
perf_pmu_disable(event->pmu);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -2115,7 +1968,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Report measurement alerts only for non-PRA codes */
if (alert != CPU_MF_INT_SF_PRA)
- debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+ debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__,
alert);
/* Sampling authorization change request */
@@ -2143,7 +1996,7 @@ static int cpusf_pmu_setup(unsigned int cpu, int flags)
/* Ignore the notification if no events are scheduled on the PMU.
* This might be racy...
*/
- if (!atomic_read(&num_events))
+ if (!refcount_read(&num_events))
return 0;
local_irq_disable();
@@ -2205,10 +2058,12 @@ static const struct kernel_param_ops param_ops_sfb_size = {
.get = param_get_sfb_size,
};
-#define RS_INIT_FAILURE_QSI 0x0001
-#define RS_INIT_FAILURE_BSDES 0x0002
-#define RS_INIT_FAILURE_ALRT 0x0003
-#define RS_INIT_FAILURE_PERF 0x0004
+enum {
+ RS_INIT_FAILURE_BSDES = 2, /* Bad basic sampling size */
+ RS_INIT_FAILURE_ALRT = 3, /* IRQ registration failure */
+ RS_INIT_FAILURE_PERF = 4 /* PMU registration failure */
+};
+
static void __init pr_cpumsf_err(unsigned int reason)
{
pr_err("Sampling facility support for perf is not available: "
@@ -2224,11 +2079,7 @@ static int __init init_cpum_sampling_pmu(void)
return -ENODEV;
memset(&si, 0, sizeof(si));
- if (qsi(&si)) {
- pr_cpumsf_err(RS_INIT_FAILURE_QSI);
- return -ENODEV;
- }
-
+ qsi(&si);
if (!si.as && !si.ad)
return -ENODEV;
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 2f5a20e300f6..fa7325454266 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -738,6 +738,22 @@ static const char * const paicrypt_ctrnames[] = {
[154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
[155] = "IBM_RESERVED_155",
[156] = "IBM_RESERVED_156",
+ [157] = "KM_FULL_XTS_AES_128",
+ [158] = "KM_FULL_XTS_AES_256",
+ [159] = "KM_FULL_XTS_ENCRYPTED_AES_128",
+ [160] = "KM_FULL_XTS_ENCRYPTED_AES_256",
+ [161] = "KMAC_HMAC_SHA_224",
+ [162] = "KMAC_HMAC_SHA_256",
+ [163] = "KMAC_HMAC_SHA_384",
+ [164] = "KMAC_HMAC_SHA_512",
+ [165] = "KMAC_HMAC_ENCRYPTED_SHA_224",
+ [166] = "KMAC_HMAC_ENCRYPTED_SHA_256",
+ [167] = "KMAC_HMAC_ENCRYPTED_SHA_384",
+ [168] = "KMAC_HMAC_ENCRYPTED_SHA_512",
+ [169] = "PCKMO_ENCRYPT_HMAC_512_KEY",
+ [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY",
+ [171] = "PCKMO_ENCRYPT_AES_XTS_128",
+ [172] = "PCKMO_ENCRYPT_AES_XTS_256",
};
static void __init attr_event_free(struct attribute **attrs, int num)
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 6295531b39a2..7f462bef1fc0 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -635,6 +635,15 @@ static const char * const paiext_ctrnames[] = {
[25] = "NNPA_1MFRAME",
[26] = "NNPA_2GFRAME",
[27] = "NNPA_ACCESSEXCEPT",
+ [28] = "NNPA_TRANSFORM",
+ [29] = "NNPA_GELU",
+ [30] = "NNPA_MOMENTS",
+ [31] = "NNPA_LAYERNORM",
+ [32] = "NNPA_MATMUL_OP_BCAST1",
+ [33] = "NNPA_SQRT",
+ [34] = "NNPA_INVSQRT",
+ [35] = "NNPA_NORM",
+ [36] = "NNPA_REDUCE",
};
static void __init attr_event_free(struct attribute **attrs, int num)
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6c2cb345402f..e48013cd832c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -30,9 +30,9 @@
#include <linux/compat.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
+#include <asm/vdso-symbols.h>
#include <asm/access-regs.h>
#include <asm/lowcore.h>
-#include <asm/vdso.h>
#include "entry.h"
/*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index fbba37ec53cf..4df56fdb2488 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -671,6 +671,25 @@ int smp_cpu_get_polarization(int cpu)
return per_cpu(pcpu_devices, cpu).polarization;
}
+void smp_cpu_set_capacity(int cpu, unsigned long val)
+{
+ per_cpu(pcpu_devices, cpu).capacity = val;
+}
+
+unsigned long smp_cpu_get_capacity(int cpu)
+{
+ return per_cpu(pcpu_devices, cpu).capacity;
+}
+
+void smp_set_core_capacity(int cpu, unsigned long val)
+{
+ int i;
+
+ cpu = smp_get_base_cpu(cpu);
+ for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++)
+ smp_cpu_set_capacity(i, val);
+}
+
int smp_cpu_get_cpu_address(int cpu)
{
return per_cpu(pcpu_devices, cpu).address;
@@ -719,6 +738,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
else
pcpu->state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
set_cpu_present(cpu, true);
if (!early && arch_register_cpu(cpu))
set_cpu_present(cpu, false);
@@ -961,6 +981,7 @@ void __init smp_prepare_boot_cpu(void)
ipl_pcpu->state = CPU_STATE_CONFIGURED;
lc->pcpu = (unsigned long)ipl_pcpu;
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH);
}
void __init smp_setup_processor_id(void)
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 640363b2a105..9f59837d159e 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -162,22 +162,3 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
{
arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
}
-
-unsigned long return_address(unsigned int n)
-{
- struct unwind_state state;
- unsigned long addr;
-
- /* Increment to skip current stack entry */
- n++;
-
- unwind_for_each_frame(&state, NULL, NULL, 0) {
- addr = unwind_get_return_address(&state);
- if (!addr)
- break;
- if (!n--)
- return addr;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 2be30a96696a..88055f58fbda 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -498,7 +498,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
.open = stsi_open_##fc##_##s1##_##s2, \
.release = stsi_release, \
.read = stsi_read, \
- .llseek = no_llseek, \
};
static int stsi_release(struct inode *inode, struct file *file)
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 22029ecae1c5..813e5da9a973 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -24,6 +24,7 @@
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/node.h>
+#include <asm/hiperdispatch.h>
#include <asm/sysinfo.h>
#define PTF_HORIZONTAL (0UL)
@@ -47,6 +48,7 @@ static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
+static int cpu_management;
static DECLARE_WORK(topology_work, topology_work_fn);
@@ -144,6 +146,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
cpumask_set_cpu(cpu, &book->mask);
cpumask_set_cpu(cpu, &socket->mask);
smp_cpu_set_polarization(cpu, tl_core->pp);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
}
}
}
@@ -270,6 +273,7 @@ void update_cpu_masks(void)
topo->drawer_id = id;
}
}
+ hd_reset_state();
for_each_online_cpu(cpu) {
topo = &cpu_topology[cpu];
pkg_first = cpumask_first(&topo->core_mask);
@@ -278,8 +282,10 @@ void update_cpu_masks(void)
for_each_cpu(sibling, &topo->core_mask) {
topo_sibling = &cpu_topology[sibling];
smt_first = cpumask_first(&topo_sibling->thread_mask);
- if (sibling == smt_first)
+ if (sibling == smt_first) {
topo_package->booted_cores++;
+ hd_add_core(sibling);
+ }
}
} else {
topo->booted_cores = topo_package->booted_cores;
@@ -303,8 +309,10 @@ static void __arch_update_dedicated_flag(void *arg)
static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
- int rc = 0;
+ int rc, hd_status;
+ hd_status = 0;
+ rc = 0;
mutex_lock(&smp_cpu_state_mutex);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
@@ -314,7 +322,11 @@ static int __arch_update_cpu_topology(void)
update_cpu_masks();
if (!MACHINE_HAS_TOPOLOGY)
topology_update_polarization_simple();
+ if (cpu_management == 1)
+ hd_status = hd_enable_hiperdispatch();
mutex_unlock(&smp_cpu_state_mutex);
+ if (hd_status == 0)
+ hd_disable_hiperdispatch();
return rc;
}
@@ -374,7 +386,24 @@ void topology_expect_change(void)
set_topology_timer();
}
-static int cpu_management;
+static int set_polarization(int polarization)
+{
+ int rc = 0;
+
+ cpus_read_lock();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == polarization)
+ goto out;
+ rc = topology_set_cpu_management(polarization);
+ if (rc)
+ goto out;
+ cpu_management = polarization;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ cpus_read_unlock();
+ return rc;
+}
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
@@ -400,19 +429,7 @@ static ssize_t dispatching_store(struct device *dev,
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- rc = 0;
- cpus_read_lock();
- mutex_lock(&smp_cpu_state_mutex);
- if (cpu_management == val)
- goto out;
- rc = topology_set_cpu_management(val);
- if (rc)
- goto out;
- cpu_management = val;
- topology_expect_change();
-out:
- mutex_unlock(&smp_cpu_state_mutex);
- cpus_read_unlock();
+ rc = set_polarization(val);
return rc ? rc : count;
}
static DEVICE_ATTR_RW(dispatching);
@@ -624,12 +641,37 @@ static int topology_ctl_handler(const struct ctl_table *ctl, int write,
return rc;
}
+static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int polarization;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &polarization,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ polarization = cpu_management;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ return set_polarization(polarization);
+}
+
static struct ctl_table topology_ctl_table[] = {
{
.procname = "topology",
.mode = 0644,
.proc_handler = topology_ctl_handler,
},
+ {
+ .procname = "polarization",
+ .mode = 0644,
+ .proc_handler = polarization_ctl_handler,
+ },
};
static int __init topology_init(void)
@@ -642,6 +684,8 @@ static int __init topology_init(void)
set_topology_timer();
else
topology_update_polarization_simple();
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL))
+ set_polarization(1);
register_sysctl("s390", topology_ctl_table);
dev_root = bus_get_dev_root(&cpu_subsys);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 36db065c7cf7..9646f773208a 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -14,6 +14,7 @@
#include <linux/memblock.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/pagewalk.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
@@ -462,9 +463,9 @@ EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
{
struct vm_area_struct *vma;
+ struct folio_walk fw;
unsigned long uaddr;
struct folio *folio;
- struct page *page;
int rc;
rc = -EFAULT;
@@ -483,11 +484,15 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
goto out;
rc = 0;
- /* we take an extra reference here */
- page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
- if (IS_ERR_OR_NULL(page))
+ folio = folio_walk_start(&fw, vma, uaddr, 0);
+ if (!folio)
goto out;
- folio = page_folio(page);
+ /*
+ * See gmap_make_secure(): large folios cannot be secure. Small
+ * folio implies FW_LEVEL_PTE.
+ */
+ if (folio_test_large(folio) || !pte_write(fw.pte))
+ goto out_walk_end;
rc = uv_destroy_folio(folio);
/*
* Fault handlers can race; it is possible that two CPUs will fault
@@ -500,7 +505,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
*/
if (rc)
rc = uv_convert_from_secure_folio(folio);
- folio_put(folio);
+out_walk_end:
+ folio_walk_end(&fw, vma);
out:
mmap_read_unlock(gmap->mm);
return rc;
@@ -548,11 +554,6 @@ int arch_make_folio_accessible(struct folio *folio)
}
EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
-int arch_make_page_accessible(struct page *page)
-{
- return arch_make_folio_accessible(page_folio(page));
-}
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 2f967ac2b8e3..598b512cde01 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -12,12 +12,15 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/time_namespace.h>
#include <linux/random.h>
#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/alternative.h>
#include <asm/vdso.h>
extern char vdso64_start[], vdso64_end[];
@@ -29,12 +32,6 @@ static union vdso_data_store vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
#ifdef CONFIG_TIME_NS
struct vdso_data *arch_get_vdso_data(void *vvar_page)
{
@@ -250,8 +247,25 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
return pagelist;
}
+static void vdso_apply_alternatives(void)
+{
+ const struct elf64_shdr *alt, *shdr;
+ struct alt_instr *start, *end;
+ const struct elf64_hdr *hdr;
+
+ hdr = (struct elf64_hdr *)vdso64_start;
+ shdr = (void *)hdr + hdr->e_shoff;
+ alt = find_section(hdr, shdr, ".altinstructions");
+ if (!alt)
+ return;
+ start = (void *)hdr + alt->sh_offset;
+ end = (void *)hdr + alt->sh_offset + alt->sh_size;
+ apply_alternatives(start, end);
+}
+
static int __init vdso_init(void)
{
+ vdso_apply_alternatives();
vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
if (IS_ENABLED(CONFIG_COMPAT))
vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index ba19c0ca7c87..37bb4b761229 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -3,12 +3,17 @@
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
-obj-vdso64 = vdso_user_wrapper.o note.o
-obj-cvdso64 = vdso64_generic.o getcpu.o
+obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
+obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
# Build rules
targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h
index 34c7a2312f9d..9e5397e7b590 100644
--- a/arch/s390/kernel/vdso64/vdso.h
+++ b/arch/s390/kernel/vdso64/vdso.h
@@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse
int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 37e2a505e81d..753040a4b5ab 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -4,6 +4,7 @@
* library
*/
+#include <asm/vdso/vsyscall.h>
#include <asm/page.h>
#include <asm/vdso.h>
@@ -13,6 +14,7 @@ OUTPUT_ARCH(s390:64-bit)
SECTIONS
{
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+ PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
#ifdef CONFIG_TIME_NS
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
#endif
@@ -42,6 +44,10 @@ SECTIONS
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
+ . = ALIGN(8);
+ .altinstructions : { *(.altinstructions) }
+ .altinstr_replacement : { *(.altinstr_replacement) }
+
.dynamic : { *(.dynamic) } :text :dynamic
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
@@ -140,6 +146,7 @@ VERSION
__kernel_restart_syscall;
__kernel_rt_sigreturn;
__kernel_sigreturn;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index e26e68675c08..aa06c85bcbd3 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -13,10 +13,7 @@
* for details.
*/
.macro vdso_func func
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
aghi %r15,-STACK_FRAME_VDSO_OVERHEAD
CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
@@ -32,7 +29,7 @@ __kernel_\func:
CFI_RESTORE 15
br %r14
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_func gettimeofday
@@ -41,16 +38,13 @@ vdso_func clock_gettime
vdso_func getcpu
.macro vdso_syscall func,syscall
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
svc \syscall
/* Make sure we notice when a syscall returns, which shouldn't happen */
.word 0
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_syscall restart_syscall,__NR_restart_syscall
diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
new file mode 100644
index 000000000000..09c034c2f853
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/dwarf.h>
+#include <asm/fpu-insn.h>
+
+#define STATE0 %v0
+#define STATE1 %v1
+#define STATE2 %v2
+#define STATE3 %v3
+#define COPY0 %v4
+#define COPY1 %v5
+#define COPY2 %v6
+#define COPY3 %v7
+#define BEPERM %v19
+#define TMP0 %v20
+#define TMP1 %v21
+#define TMP2 %v22
+#define TMP3 %v23
+
+ .section .rodata
+
+ .balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
+ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+ .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
+
+ .text
+/*
+ * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+ CFI_STARTPROC
+ larl %r1,chacha20_constants
+
+ /* COPY0 = "expand 32-byte k" */
+ VL COPY0,0,,%r1
+
+ /* BEPERM = byte selectors for VPERM */
+ ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
+
+ /* COPY1,COPY2 = key */
+ VLM COPY1,COPY2,0,%r3
+
+ /* COPY3 = counter || zero nonce */
+ lg %r3,0(%r4)
+ VZERO COPY3
+ VLVGG COPY3,%r3,0
+
+ lghi %r1,0
+.Lblock:
+ VLR STATE0,COPY0
+ VLR STATE1,COPY1
+ VLR STATE2,COPY2
+ VLR STATE3,COPY3
+
+ lghi %r0,10
+.Ldoubleround:
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
+ VSLDB STATE1,STATE1,STATE1,4
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
+ VSLDB STATE3,STATE3,STATE3,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
+ VSLDB STATE1,STATE1,STATE1,12
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
+ VSLDB STATE3,STATE3,STATE3,4
+ brctg %r0,.Ldoubleround
+
+ /* OUTPUT0 = STATE0 + COPY0 */
+ VAF STATE0,STATE0,COPY0
+ /* OUTPUT1 = STATE1 + COPY1 */
+ VAF STATE1,STATE1,COPY1
+ /* OUTPUT2 = STATE2 + COPY2 */
+ VAF STATE2,STATE2,COPY2
+ /* OUTPUT3 = STATE3 + COPY3 */
+ VAF STATE3,STATE3,COPY3
+
+ ALTERNATIVE \
+ __stringify( \
+ /* Convert STATE to little endian and store to OUTPUT */\
+ VPERM TMP0,STATE0,STATE0,BEPERM; \
+ VPERM TMP1,STATE1,STATE1,BEPERM; \
+ VPERM TMP2,STATE2,STATE2,BEPERM; \
+ VPERM TMP3,STATE3,STATE3,BEPERM; \
+ VSTM TMP0,TMP3,0,%r2), \
+ __stringify( \
+ /* 32 bit wise little endian store to OUTPUT */ \
+ VSTBRF STATE0,0,,%r2; \
+ VSTBRF STATE1,16,,%r2; \
+ VSTBRF STATE2,32,,%r2; \
+ VSTBRF STATE3,48,,%r2; \
+ brcl 0,0), \
+ ALT_FACILITY(148)
+
+ /* ++COPY3.COUNTER */
+ /* alsih %r3,1 */
+ .insn rilu,0xcc0a00000000,%r3,1
+ alcr %r3,%r1
+ VLVGG COPY3,%r3,0
+
+ /* OUTPUT += 64, --NBLOCKS */
+ aghi %r2,64
+ brctg %r5,.Lblock
+
+ /* COUNTER = COPY3.COUNTER */
+ stg %r3,0(%r4)
+
+ /* Zero out potentially sensitive regs */
+ VZERO STATE0
+ VZERO STATE1
+ VZERO STATE2
+ VZERO STATE3
+ VZERO COPY1
+ VZERO COPY2
+
+ /* Early exit if TMP0-TMP3 have not been used */
+ ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
+
+ VZERO TMP0
+ VZERO TMP1
+ VZERO TMP2
+ VZERO TMP3
+
+ br %r14
+ CFI_ENDPROC
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso64/vgetrandom.c
new file mode 100644
index 000000000000..b5268b507fb5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/facility.h>
+#include <uapi/asm-generic/errno.h>
+#include "vdso.h"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (test_facility(129))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index ae5d0a9d6911..377b9aaf8c92 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -191,8 +191,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
- RUNTIME_CONST(shift, d_hash_shift)
- RUNTIME_CONST(ptr, dentry_hashtable)
+ RUNTIME_CONST_VARIABLES
PERCPU_SECTION(0x100)
diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c
new file mode 100644
index 000000000000..949fdbf0e8b6
--- /dev/null
+++ b/arch/s390/kernel/wti.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for warning track interruption
+ *
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/smpboot.h>
+#include <linux/irq.h>
+#include <uapi/linux/sched/types.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+
+#define WTI_DBF_LEN 64
+
+struct wti_debug {
+ unsigned long missed;
+ unsigned long addr;
+ pid_t pid;
+};
+
+struct wti_state {
+ /* debug data for s390dbf */
+ struct wti_debug dbg;
+ /*
+ * Represents the real-time thread responsible to
+ * acknowledge the warning-track interrupt and trigger
+ * preliminary and postliminary precautions.
+ */
+ struct task_struct *thread;
+ /*
+ * If pending is true, the real-time thread must be scheduled.
+ * If not, a wake up of that thread will remain a noop.
+ */
+ bool pending;
+};
+
+static DEFINE_PER_CPU(struct wti_state, wti_state);
+
+static debug_info_t *wti_dbg;
+
+/*
+ * During a warning-track grace period, interrupts are disabled
+ * to prevent delays of the warning-track acknowledgment.
+ *
+ * Once the CPU is physically dispatched again, interrupts are
+ * re-enabled.
+ */
+
+static void wti_irq_disable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* disable all I/O interrupts */
+ cr6.val &= ~0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void wti_irq_enable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* enable all I/O interrupts */
+ cr6.val |= 0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void store_debug_data(struct wti_state *st)
+{
+ struct pt_regs *regs = get_irq_regs();
+
+ st->dbg.pid = current->pid;
+ st->dbg.addr = 0;
+ if (!user_mode(regs))
+ st->dbg.addr = regs->psw.addr;
+}
+
+static void wti_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct wti_state *st = this_cpu_ptr(&wti_state);
+
+ inc_irq_stat(IRQEXT_WTI);
+ wti_irq_disable();
+ store_debug_data(st);
+ st->pending = true;
+ wake_up_process(st->thread);
+}
+
+static int wti_pending(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ return st->pending;
+}
+
+static void wti_dbf_grace_period(struct wti_state *st)
+{
+ struct wti_debug *wdi = &st->dbg;
+ char buf[WTI_DBF_LEN];
+
+ if (wdi->addr)
+ snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr);
+ else
+ snprintf(buf, sizeof(buf), "%d <user>", wdi->pid);
+ debug_text_event(wti_dbg, 2, buf);
+ wdi->missed++;
+}
+
+static int wti_show(struct seq_file *seq, void *v)
+{
+ struct wti_state *st;
+ int cpu;
+
+ cpus_read_lock();
+ seq_puts(seq, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(seq, "CPU%-8d", cpu);
+ seq_putc(seq, '\n');
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ seq_printf(seq, " %10lu", st->dbg.missed);
+ }
+ seq_putc(seq, '\n');
+ cpus_read_unlock();
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wti);
+
+static void wti_thread_fn(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ st->pending = false;
+ /*
+ * Yield CPU voluntarily to the hypervisor. Control
+ * resumes when hypervisor decides to dispatch CPU
+ * to this LPAR again.
+ */
+ if (diag49c(DIAG49C_SUBC_ACK))
+ wti_dbf_grace_period(st);
+ wti_irq_enable();
+}
+
+static struct smp_hotplug_thread wti_threads = {
+ .store = &wti_state.thread,
+ .thread_should_run = wti_pending,
+ .thread_fn = wti_thread_fn,
+ .thread_comm = "cpuwti/%u",
+ .selfparking = false,
+};
+
+static int __init wti_init(void)
+{
+ struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 };
+ struct dentry *wti_dir;
+ struct wti_state *st;
+ int cpu, rc;
+
+ rc = -EOPNOTSUPP;
+ if (!sclp.has_wti)
+ goto out;
+ rc = smpboot_register_percpu_thread(&wti_threads);
+ if (WARN_ON(rc))
+ goto out;
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param);
+ }
+ rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+ if (rc) {
+ pr_warn("Couldn't request external interrupt 0x1007\n");
+ goto out_thread;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK);
+ rc = diag49c(DIAG49C_SUBC_REG);
+ if (rc) {
+ pr_warn("Failed to register warning track interrupt through DIAG 49C\n");
+ rc = -EOPNOTSUPP;
+ goto out_subclass;
+ }
+ wti_dir = debugfs_create_dir("wti", arch_debugfs_dir);
+ debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops);
+ wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN);
+ if (!wti_dbg) {
+ rc = -ENOMEM;
+ goto out_debug_register;
+ }
+ rc = debug_register_view(wti_dbg, &debug_hex_ascii_view);
+ if (rc)
+ goto out_debug_register;
+ goto out;
+out_debug_register:
+ debug_unregister(wti_dbg);
+out_subclass:
+ irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK);
+ unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+out_thread:
+ smpboot_unregister_percpu_thread(&wti_threads);
+out:
+ return rc;
+}
+late_initcall(wti_init);