37 files changed, 1387 insertions, 410 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index e47a4be54ff8..48caae8c7e10 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -36,22 +36,23 @@ CFLAGS_stacktrace.o	+= -fno-optimize-sibling-calls
 CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
 CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
 
-obj-y	:= head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y	:= head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
 obj-y	+= sysinfo.o lgr.o os_info.o ctlreg.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
 obj-y	+= entry.o reipl.o kdebugfs.o alternative.o
 obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o
+obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
 
 extra-y				+= vmlinux.lds
 
 obj-$(CONFIG_SYSFS)		+= nospec-sysfs.o
 CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
 
+obj-$(CONFIG_SYSFS)		+= cpacf.o
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o
+obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o hiperdispatch.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ffa0dd2dbaac..5529248d84fb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -112,8 +112,7 @@ int main(void)
 	OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
 	OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
 	/* software defined lowcore locations 0x200 - 0xdff*/
-	OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
-	OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+	OFFSET(__LC_SAVE_AREA, lowcore, save_area);
 	OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
 	OFFSET(__LC_PCPU, lowcore, pcpu);
 	OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 1942e2a9f8db..5a86b9d1da71 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -24,11 +24,11 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/binfmts.h>
+#include <asm/vdso-symbols.h>
 #include <asm/access-regs.h>
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
 #include <asm/lowcore.h>
-#include <asm/vdso.h>
 #include <asm/fpu.h>
 #include "compat_linux.h"
 #include "compat_ptrace.h"
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
new file mode 100644
index 000000000000..c8575dbc890d
--- /dev/null
+++ b/arch/s390/kernel/cpacf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "cpacf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <asm/cpacf.h>
+
+#define CPACF_QUERY(name, instruction)						\
+static ssize_t name##_query_raw_read(struct file *fp,				\
+				     struct kobject *kobj,			\
+				     struct bin_attribute *attr,		\
+				     char *buf, loff_t offs,			\
+				     size_t count)				\
+{										\
+	cpacf_mask_t mask;							\
+										\
+	if (!cpacf_query(CPACF_##instruction, &mask))				\
+		return -EOPNOTSUPP;						\
+	return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask));	\
+}										\
+static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+
+CPACF_QUERY(km, KM);
+CPACF_QUERY(kmc, KMC);
+CPACF_QUERY(kimd, KIMD);
+CPACF_QUERY(klmd, KLMD);
+CPACF_QUERY(kmac, KMAC);
+CPACF_QUERY(pckmo, PCKMO);
+CPACF_QUERY(kmf, KMF);
+CPACF_QUERY(kmctr, KMCTR);
+CPACF_QUERY(kmo, KMO);
+CPACF_QUERY(pcc, PCC);
+CPACF_QUERY(prno, PRNO);
+CPACF_QUERY(kma, KMA);
+CPACF_QUERY(kdsa, KDSA);
+
+#define CPACF_QAI(name, instruction)				\
+static ssize_t name##_query_auth_info_raw_read(			\
+	struct file *fp, struct kobject *kobj,			\
+	struct bin_attribute *attr, char *buf, loff_t offs,	\
+	size_t count)						\
+{								\
+	cpacf_qai_t qai;					\
+								\
+	if (!cpacf_qai(CPACF_##instruction, &qai))		\
+		return -EOPNOTSUPP;				\
+	return memory_read_from_buffer(buf, count, &offs, &qai, \
+					sizeof(qai));		\
+}								\
+static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+
+CPACF_QAI(km, KM);
+CPACF_QAI(kmc, KMC);
+CPACF_QAI(kimd, KIMD);
+CPACF_QAI(klmd, KLMD);
+CPACF_QAI(kmac, KMAC);
+CPACF_QAI(pckmo, PCKMO);
+CPACF_QAI(kmf, KMF);
+CPACF_QAI(kmctr, KMCTR);
+CPACF_QAI(kmo, KMO);
+CPACF_QAI(pcc, PCC);
+CPACF_QAI(prno, PRNO);
+CPACF_QAI(kma, KMA);
+CPACF_QAI(kdsa, KDSA);
+
+static struct bin_attribute *cpacf_attrs[] = {
+	&bin_attr_km_query_raw,
+	&bin_attr_kmc_query_raw,
+	&bin_attr_kimd_query_raw,
+	&bin_attr_klmd_query_raw,
+	&bin_attr_kmac_query_raw,
+	&bin_attr_pckmo_query_raw,
+	&bin_attr_kmf_query_raw,
+	&bin_attr_kmctr_query_raw,
+	&bin_attr_kmo_query_raw,
+	&bin_attr_pcc_query_raw,
+	&bin_attr_prno_query_raw,
+	&bin_attr_kma_query_raw,
+	&bin_attr_kdsa_query_raw,
+	&bin_attr_km_query_auth_info_raw,
+	&bin_attr_kmc_query_auth_info_raw,
+	&bin_attr_kimd_query_auth_info_raw,
+	&bin_attr_klmd_query_auth_info_raw,
+	&bin_attr_kmac_query_auth_info_raw,
+	&bin_attr_pckmo_query_auth_info_raw,
+	&bin_attr_kmf_query_auth_info_raw,
+	&bin_attr_kmctr_query_auth_info_raw,
+	&bin_attr_kmo_query_auth_info_raw,
+	&bin_attr_pcc_query_auth_info_raw,
+	&bin_attr_prno_query_auth_info_raw,
+	&bin_attr_kma_query_auth_info_raw,
+	&bin_attr_kdsa_query_auth_info_raw,
+	NULL,
+};
+
+static const struct attribute_group cpacf_attr_grp = {
+	.name = "cpacf",
+	.bin_attrs = cpacf_attrs,
+};
+
+static int __init cpacf_init(void)
+{
+	struct device *cpu_root;
+	int rc = 0;
+
+	cpu_root = bus_get_dev_root(&cpu_subsys);
+	if (cpu_root) {
+		rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp);
+		put_device(cpu_root);
+	}
+	return rc;
+}
+device_initcall(cpacf_init);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index bce50ca75ea7..e62bea9ab21e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -163,7 +163,6 @@ static const struct file_operations debug_file_ops = {
 	.write	 = debug_input,
 	.open	 = debug_open,
 	.release = debug_close,
-	.llseek  = no_llseek,
 };
 
 static struct dentry *debug_debugfs_root_entry;
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index ac7b8c8e3133..007e1795670e 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -52,6 +52,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
 	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
 	[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
 	[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+	[DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
 	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
 };
 
@@ -303,3 +304,19 @@ int diag26c(void *req, void *resp, enum diag26c_sc subcode)
 	return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
 }
 EXPORT_SYMBOL(diag26c);
+
+int diag49c(unsigned long subcode)
+{
+	int rc;
+
+	diag_stat_inc(DIAG_STAT_X49C);
+	asm volatile(
+		"	diag	%[subcode],0,0x49c\n"
+		"	ipm	%[rc]\n"
+		"	srl	%[rc],28\n"
+		: [rc] "=d" (rc)
+		: [subcode] "d" (subcode)
+		: "cc");
+	return rc;
+}
+EXPORT_SYMBOL(diag49c);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 89dc826a8d2e..94eb8168ea44 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -122,6 +122,7 @@ enum {
 	U8_32,	/* 8 bit unsigned value starting at 32 */
 	U12_16, /* 12 bit unsigned value starting at 16 */
 	U16_16, /* 16 bit unsigned value starting at 16 */
+	U16_20, /* 16 bit unsigned value starting at 20 */
 	U16_32, /* 16 bit unsigned value starting at 32 */
 	U32_16, /* 32 bit unsigned value starting at 16 */
 	VX_12,	/* Vector index register starting at position 12 */
@@ -184,6 +185,7 @@ static const struct s390_operand operands[] = {
 	[U8_32]	 = {  8, 32, 0 },
 	[U12_16] = { 12, 16, 0 },
 	[U16_16] = { 16, 16, 0 },
+	[U16_20] = { 16, 20, 0 },
 	[U16_32] = { 16, 32, 0 },
 	[U32_16] = { 32, 16, 0 },
 	[VX_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_VR },
@@ -257,7 +259,6 @@ static const unsigned char formats[][6] = {
 	[INSTR_RSL_R0RD]     = { D_20, L4_8, B_16, 0, 0, 0 },
 	[INSTR_RSY_AARD]     = { A_8, A_12, D20_20, B_16, 0, 0 },
 	[INSTR_RSY_CCRD]     = { C_8, C_12, D20_20, B_16, 0, 0 },
-	[INSTR_RSY_RDRU]     = { R_8, D20_20, B_16, U4_12, 0, 0 },
 	[INSTR_RSY_RRRD]     = { R_8, R_12, D20_20, B_16, 0, 0 },
 	[INSTR_RSY_RURD]     = { R_8, U4_12, D20_20, B_16, 0, 0 },
 	[INSTR_RSY_RURD2]    = { R_8, D20_20, B_16, U4_12, 0, 0 },
@@ -300,14 +301,17 @@ static const unsigned char formats[][6] = {
 	[INSTR_VRI_V0UU2]    = { V_8, U16_16, U4_32, 0, 0, 0 },
 	[INSTR_VRI_V0UUU]    = { V_8, U8_16, U8_24, U4_32, 0, 0 },
 	[INSTR_VRI_VR0UU]    = { V_8, R_12, U8_28, U4_24, 0, 0 },
+	[INSTR_VRI_VV0UU]    = { V_8, V_12, U8_28, U4_24, 0, 0 },
 	[INSTR_VRI_VVUU]     = { V_8, V_12, U16_16, U4_32, 0, 0 },
 	[INSTR_VRI_VVUUU]    = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
 	[INSTR_VRI_VVUUU2]   = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
 	[INSTR_VRI_VVV0U]    = { V_8, V_12, V_16, U8_24, 0, 0 },
 	[INSTR_VRI_VVV0UU]   = { V_8, V_12, V_16, U8_24, U4_32, 0 },
 	[INSTR_VRI_VVV0UU2]  = { V_8, V_12, V_16, U8_28, U4_24, 0 },
-	[INSTR_VRR_0V]	     = { V_12, 0, 0, 0, 0, 0 },
+	[INSTR_VRI_VVV0UV]   = { V_8, V_12, V_16, V_32, U8_24, 0 },
+	[INSTR_VRR_0V0U]     = { V_12, U16_20, 0, 0, 0, 0 },
 	[INSTR_VRR_0VV0U]    = { V_12, V_16, U4_24, 0, 0, 0 },
+	[INSTR_VRR_0VVU]     = { V_12, V_16, U16_20, 0, 0, 0 },
 	[INSTR_VRR_RV0UU]    = { R_8, V_12, U4_24, U4_28, 0, 0 },
 	[INSTR_VRR_VRR]	     = { V_8, R_12, R_16, 0, 0, 0 },
 	[INSTR_VRR_VV]	     = { V_8, V_12, 0, 0, 0, 0 },
@@ -455,21 +459,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
 			if (separator)
 				ptr += sprintf(ptr, "%c", separator);
 			if (operand->flags & OPERAND_GPR)
-				ptr += sprintf(ptr, "%%r%i", value);
+				ptr += sprintf(ptr, "%%r%u", value);
 			else if (operand->flags & OPERAND_FPR)
-				ptr += sprintf(ptr, "%%f%i", value);
+				ptr += sprintf(ptr, "%%f%u", value);
 			else if (operand->flags & OPERAND_AR)
-				ptr += sprintf(ptr, "%%a%i", value);
+				ptr += sprintf(ptr, "%%a%u", value);
 			else if (operand->flags & OPERAND_CR)
-				ptr += sprintf(ptr, "%%c%i", value);
+				ptr += sprintf(ptr, "%%c%u", value);
 			else if (operand->flags & OPERAND_VR)
-				ptr += sprintf(ptr, "%%v%i", value);
+				ptr += sprintf(ptr, "%%v%u", value);
 			else if (operand->flags & OPERAND_PCREL) {
 				void *pcrel = (void *)((int)value + addr);
 
 				ptr += sprintf(ptr, "%px", pcrel);
 			} else if (operand->flags & OPERAND_SIGNED)
-				ptr += sprintf(ptr, "%i", value);
+				ptr += sprintf(ptr, "%i", (int)value);
 			else
 				ptr += sprintf(ptr, "%u", value);
 			if (operand->flags & OPERAND_DISP)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 14d324865e33..62f8f5a750a3 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -7,6 +7,7 @@
 #define KMSG_COMPONENT "setup"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/sched/debug.h>
 #include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -175,20 +176,45 @@ static __init void setup_topology(void)
 	topology_max_mnest = max_mnest;
 }
 
-void __do_early_pgm_check(struct pt_regs *regs)
+void __init __do_early_pgm_check(struct pt_regs *regs)
 {
-	if (!fixup_exception(regs))
-		disabled_wait();
+	struct lowcore *lc = get_lowcore();
+	unsigned long ip;
+
+	regs->int_code = lc->pgm_int_code;
+	regs->int_parm_long = lc->trans_exc_code;
+	ip = __rewind_psw(regs->psw, regs->int_code >> 16);
+
+	/* Monitor Event? Might be a warning */
+	if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) {
+		if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN)
+			return;
+	}
+	if (fixup_exception(regs))
+		return;
+	/*
+	 * Unhandled exception - system cannot continue but try to get some
+	 * helpful messages to the console. Use early_printk() to print
+	 * some basic information in case it is too early for printk().
+	 */
+	register_early_console();
+	early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n",
+		     regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr);
+	show_regs(regs);
+	disabled_wait();
 }
 
 static noinline __init void setup_lowcore_early(void)
 {
+	struct lowcore *lc = get_lowcore();
 	psw_t psw;
 
 	psw.addr = (unsigned long)early_pgm_check_handler;
 	psw.mask = PSW_KERNEL_BITS;
-	get_lowcore()->program_new_psw = psw;
-	get_lowcore()->preempt_count = INIT_PREEMPT_COUNT;
+	lc->program_new_psw = psw;
+	lc->preempt_count = INIT_PREEMPT_COUNT;
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 }
 
 static __init void detect_diag9c(void)
@@ -242,6 +268,8 @@ static __init void detect_machine_facilities(void)
 	}
 	if (test_facility(194))
 		get_lowcore()->machine_flags |= MACHINE_FLAG_RDP;
+	if (test_facility(85))
+		get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN;
 }
 
 static inline void save_vector_registers(void)
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index d9d53f44008a..cefe020a3be3 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -6,6 +6,7 @@
 #include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <asm/setup.h>
 #include <asm/sclp.h>
 
 static void sclp_early_write(struct console *con, const char *s, unsigned int len)
@@ -20,6 +21,16 @@ static struct console sclp_early_console = {
 	.index = -1,
 };
 
+void __init register_early_console(void)
+{
+	if (early_console)
+		return;
+	if (!sclp.has_linemode && !sclp.has_vt220)
+		return;
+	early_console = &sclp_early_console;
+	register_console(early_console);
+}
+
 static int __init setup_early_printk(char *buf)
 {
 	if (early_console)
@@ -27,10 +38,7 @@ static int __init setup_early_printk(char *buf)
 	/* Accept only "earlyprintk" and "earlyprintk=sclp" */
 	if (buf && !str_has_prefix(buf, "sclp"))
 		return 0;
-	if (!sclp.has_linemode && !sclp.has_vt220)
-		return 0;
-	early_console = &sclp_early_console;
-	register_console(early_console);
+	register_early_console();
 	return 0;
 }
 early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
deleted file mode 100644
index c634871f0d90..000000000000
--- a/arch/s390/kernel/earlypgm.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *    Copyright IBM Corp. 2006, 2007
- *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-SYM_CODE_START(early_pgm_check_handler)
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	aghi	%r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_PSW(16,%r11),__LC_PGM_OLD_PSW
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-	lgr	%r2,%r11
-	brasl	%r14,__do_early_pgm_check
-	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
-	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-	lpswe	__LC_RETURN_PSW
-SYM_CODE_END(early_pgm_check_handler)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 749410cfdbc0..d6d5317f768e 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -42,7 +42,7 @@ _LPP_OFFSET	= __LC_LPP
 
 	.macro LPSWEY address, lpswe
 	ALTERNATIVE_2 "b \lpswe;nopr", \
-		".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193),		\
+		".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193),		\
 		__stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0),	\
 		ALT_LOWCORE
 	.endm
@@ -264,7 +264,7 @@ EXPORT_SYMBOL(sie_exit)
  */
 
 SYM_CODE_START(system_call)
-	STMG_LC	%r8,%r15,__LC_SAVE_AREA_SYNC
+	STMG_LC	%r8,%r15,__LC_SAVE_AREA
 	GET_LC	%r13
 	stpt	__LC_SYS_ENTER_TIMER(%r13)
 	BPOFF
@@ -287,7 +287,7 @@ SYM_CODE_START(system_call)
 	xgr	%r10,%r10
 	xgr	%r11,%r11
 	la	%r2,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-	mvc	__PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13)
+	mvc	__PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
 	MBEAR	%r2,%r13
 	lgr	%r3,%r14
 	brasl	%r14,__do_syscall
@@ -323,7 +323,7 @@ SYM_CODE_END(ret_from_fork)
  */
 
 SYM_CODE_START(pgm_check_handler)
-	STMG_LC	%r8,%r15,__LC_SAVE_AREA_SYNC
+	STMG_LC	%r8,%r15,__LC_SAVE_AREA
 	GET_LC	%r13
 	stpt	__LC_SYS_ENTER_TIMER(%r13)
 	BPOFF
@@ -338,16 +338,16 @@ SYM_CODE_START(pgm_check_handler)
 	jnz	2f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3(%r13),0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
-2:	CHECK_STACK __LC_SAVE_AREA_SYNC,%r13
+2:	CHECK_STACK __LC_SAVE_AREA,%r13
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	# CHECK_VMAP_STACK branches to stack_overflow or 4f
-	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f
+	CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
 3:	lg	%r15,__LC_KERNEL_STACK(%r13)
 4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
 	mvc	__PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
 	stctg	%c1,%c1,__PT_CR1(%r11)
 #if IS_ENABLED(CONFIG_KVM)
@@ -398,7 +398,7 @@ SYM_CODE_END(pgm_check_handler)
  */
 .macro INT_HANDLER name,lc_old_psw,handler
 SYM_CODE_START(\name)
-	STMG_LC	%r8,%r15,__LC_SAVE_AREA_ASYNC
+	STMG_LC	%r8,%r15,__LC_SAVE_AREA
 	GET_LC	%r13
 	stckf	__LC_INT_CLOCK(%r13)
 	stpt	__LC_SYS_ENTER_TIMER(%r13)
@@ -414,7 +414,7 @@ SYM_CODE_START(\name)
 	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
 	SIEEXIT __SF_SIE_CONTROL(%r15),%r13
 #endif
-0:	CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13
+0:	CHECK_STACK __LC_SAVE_AREA,%r13
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	2f
 1:	lctlg	%c1,%c1,__LC_KERNEL_ASCE(%r13)
@@ -432,7 +432,7 @@ SYM_CODE_START(\name)
 	xgr	%r7,%r7
 	xgr	%r10,%r10
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
 	MBEAR	%r11,%r13
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	lgr	%r2,%r11		# pass pointer to pt_regs
@@ -599,6 +599,24 @@ SYM_CODE_START(restart_int_handler)
 3:	j	3b
 SYM_CODE_END(restart_int_handler)
 
+	__INIT
+SYM_CODE_START(early_pgm_check_handler)
+	STMG_LC %r8,%r15,__LC_SAVE_AREA
+	GET_LC	%r13
+	aghi	%r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+	lgr	%r2,%r11
+	brasl	%r14,__do_early_pgm_check
+	mvc	__LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(early_pgm_check_handler)
+	__FINIT
+
 	.section .kprobes.text, "ax"
 
 #if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0bd6adc40a34..0b6e62d1d8b8 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -50,10 +50,6 @@ struct ftrace_insn {
 	s32 disp;
 } __packed;
 
-#ifdef CONFIG_MODULES
-static char *ftrace_plt;
-#endif /* CONFIG_MODULES */
-
 static const char *ftrace_shared_hotpatch_trampoline(const char **end)
 {
 	const char *tstart, *tend;
@@ -73,19 +69,20 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
 
 bool ftrace_need_init_nop(void)
 {
-	return true;
+	return !MACHINE_HAS_SEQ_INSN;
 }
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
 {
 	static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
 		__ftrace_hotpatch_trampolines_start;
-	static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+	static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 };
 	static struct ftrace_hotpatch_trampoline *trampoline;
 	struct ftrace_hotpatch_trampoline **next_trampoline;
 	struct ftrace_hotpatch_trampoline *trampolines_end;
 	struct ftrace_hotpatch_trampoline tmp;
 	struct ftrace_insn *insn;
+	struct ftrace_insn old;
 	const char *shared;
 	s32 disp;
 
@@ -99,7 +96,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
 	if (mod) {
 		next_trampoline = &mod->arch.next_trampoline;
 		trampolines_end = mod->arch.trampolines_end;
-		shared = ftrace_plt;
 	}
 #endif
 
@@ -107,8 +103,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
 		return -ENOMEM;
 	trampoline = (*next_trampoline)++;
 
+	if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old)))
+		return -EFAULT;
 	/* Check for the compiler-generated fentry nop (brcl 0, .). */
-	if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+	if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old))))
 		return -EINVAL;
 
 	/* Generate the trampoline. */
@@ -144,8 +142,35 @@ static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrac
 	return trampoline;
 }
 
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
-		       unsigned long addr)
+static inline struct ftrace_insn
+ftrace_generate_branch_insn(unsigned long ip, unsigned long target)
+{
+	/* brasl r0,target or brcl 0,0 */
+	return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004,
+				     .disp = target ? (target - ip) / 2 : 0 };
+}
+
+static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target,
+				    unsigned long target)
+{
+	struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target);
+	struct ftrace_insn new = ftrace_generate_branch_insn(ip, target);
+	struct ftrace_insn old;
+
+	if (!IS_ALIGNED(ip, 8))
+		return -EINVAL;
+	if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old)))
+		return -EFAULT;
+	/* Verify that the to be replaced code matches what we expect. */
+	if (memcmp(&orig, &old, sizeof(old)))
+		return -EINVAL;
+	s390_kernel_write((void *)ip, &new, sizeof(new));
+	return 0;
+}
+
+static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
+					 unsigned long old_addr,
+					 unsigned long addr)
 {
 	struct ftrace_hotpatch_trampoline *trampoline;
 	u64 old;
@@ -161,6 +186,15 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 	return 0;
 }
 
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	if (MACHINE_HAS_SEQ_INSN)
+		return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
+	else
+		return ftrace_modify_trampoline_call(rec, old_addr, addr);
+}
+
 static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
 {
 	u16 old;
@@ -179,11 +213,14 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
 int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 		    unsigned long addr)
 {
-	/* Expect brcl 0xf,... */
-	return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+	/* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */
+	if (MACHINE_HAS_SEQ_INSN)
+		return ftrace_patch_branch_insn(rec->ip, addr, 0);
+	else
+		return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
 }
 
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	struct ftrace_hotpatch_trampoline *trampoline;
 
@@ -195,6 +232,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
 }
 
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	if (MACHINE_HAS_SEQ_INSN)
+		return ftrace_patch_branch_insn(rec->ip, 0, addr);
+	else
+		return ftrace_make_trampoline_call(rec, addr);
+}
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	ftrace_func = func;
@@ -215,25 +260,6 @@ void ftrace_arch_code_modify_post_process(void)
 	text_poke_sync_lock();
 }
 
-#ifdef CONFIG_MODULES
-
-static int __init ftrace_plt_init(void)
-{
-	const char *start, *end;
-
-	ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
-	if (!ftrace_plt)
-		panic("cannot allocate ftrace plt\n");
-
-	start = ftrace_shared_hotpatch_trampoline(&end);
-	memcpy(ftrace_plt, start, end - start);
-	set_memory_rox((unsigned long)ftrace_plt, 1);
-	return 0;
-}
-device_initcall(ftrace_plt_init);
-
-#endif /* CONFIG_MODULES */
-
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
  * Hook the return address and push it in the stack of return addresses
@@ -264,26 +290,14 @@ NOKPROBE_SYMBOL(prepare_ftrace_return);
  */
 int ftrace_enable_ftrace_graph_caller(void)
 {
-	int rc;
-
 	/* Expect brc 0xf,... */
-	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
-	if (rc)
-		return rc;
-	text_poke_sync_lock();
-	return 0;
+	return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-	int rc;
-
 	/* Expect brc 0x0,... */
-	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
-	if (rc)
-		return rc;
-	text_poke_sync_lock();
-	return 0;
+	return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
 }
 
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
index 7f75a9616406..23337065f402 100644
--- a/arch/s390/kernel/ftrace.h
+++ b/arch/s390/kernel/ftrace.h
@@ -18,7 +18,5 @@ extern const char ftrace_shared_hotpatch_trampoline_br[];
 extern const char ftrace_shared_hotpatch_trampoline_br_end[];
 extern const char ftrace_shared_hotpatch_trampoline_exrl[];
 extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
-extern const char ftrace_plt_template[];
-extern const char ftrace_plt_template_end[];
 
 #endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
new file mode 100644
index 000000000000..2a99a216ab62
--- /dev/null
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "hd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+/*
+ * Hiperdispatch:
+ * Dynamically calculates the optimum number of high capacity COREs
+ * by considering the state the system is in. When hiperdispatch decides
+ * that a capacity update is necessary, it schedules a topology update.
+ * During topology updates the CPU capacities are always re-adjusted.
+ *
+ * There is two places where CPU capacities are being accessed within
+ * hiperdispatch.
+ * -> hiperdispatch's reoccuring work function reads CPU capacities to
+ *    determine high capacity CPU count.
+ * -> during a topology update hiperdispatch's adjustment function
+ *    updates CPU capacities.
+ * These two can run on different CPUs in parallel which can cause
+ * hiperdispatch to make wrong decisions. This can potentially cause
+ * some overhead by leading to extra rebuild_sched_domains() calls
+ * for correction. Access to capacities within hiperdispatch has to be
+ * serialized to prevent the overhead.
+ *
+ * Hiperdispatch decision making revolves around steal time.
+ * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time
+ * crosses the threshold value hiperdispatch falls back to giving high
+ * capacities to entitled CPUs. When steal time drops below the
+ * threshold boundary, hiperdispatch utilizes all CPUs by giving all
+ * of them high capacity.
+ *
+ * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread
+ * performance. Comparing the throughput of;
+ * - single CORE, with N threads, running N tasks
+ * - N separate COREs running N tasks,
+ * using individual COREs for individual tasks yield better
+ * performance. This performance difference is roughly ~30% (can change
+ * between machine generations)
+ *
+ * Hiperdispatch tries to hint scheduler to use individual COREs for
+ * each task, as long as steal time on those COREs are less than 30%,
+ * therefore delaying the throughput loss caused by using SMP threads.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kstrtox.h>
+#include <linux/ktime.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <asm/hiperdispatch.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hiperdispatch.h>
+
+#define HD_DELAY_FACTOR			(4)
+#define HD_DELAY_INTERVAL		(HZ / 4)
+#define HD_STEAL_THRESHOLD		30
+#define HD_STEAL_AVG_WEIGHT		16
+
+static cpumask_t hd_vl_coremask;	/* Mask containing all vertical low COREs */
+static cpumask_t hd_vmvl_cpumask;	/* Mask containing vertical medium and low CPUs */
+static int hd_high_capacity_cores;	/* Current CORE count with high capacity */
+static int hd_entitled_cores;		/* Total vertical high and medium CORE count */
+static int hd_online_cores;		/* Current online CORE count */
+
+static unsigned long hd_previous_steal;	/* Previous iteration's CPU steal timer total */
+static unsigned long hd_high_time;	/* Total time spent while all cpus have high capacity */
+static unsigned long hd_low_time;	/* Total time spent while vl cpus have low capacity */
+static atomic64_t hd_adjustments;	/* Total occurrence count of hiperdispatch adjustments */
+
+static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD;
+static unsigned int hd_delay_factor = HD_DELAY_FACTOR;
+static int hd_enabled;
+
+static void hd_capacity_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
+
+static int hd_set_hiperdispatch_mode(int enable)
+{
+	if (!MACHINE_HAS_TOPOLOGY)
+		enable = 0;
+	if (hd_enabled == enable)
+		return 0;
+	hd_enabled = enable;
+	return 1;
+}
+
+void hd_reset_state(void)
+{
+	cpumask_clear(&hd_vl_coremask);
+	cpumask_clear(&hd_vmvl_cpumask);
+	hd_entitled_cores = 0;
+	hd_online_cores = 0;
+}
+
+void hd_add_core(int cpu)
+{
+	const struct cpumask *siblings;
+	int polarization;
+
+	hd_online_cores++;
+	polarization = smp_cpu_get_polarization(cpu);
+	siblings = topology_sibling_cpumask(cpu);
+	switch (polarization) {
+	case POLARIZATION_VH:
+		hd_entitled_cores++;
+		break;
+	case POLARIZATION_VM:
+		hd_entitled_cores++;
+		cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+		break;
+	case POLARIZATION_VL:
+		cpumask_set_cpu(cpu, &hd_vl_coremask);
+		cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+		break;
+	}
+}
+
+/* Serialize update and read operations of debug counters. */
+static DEFINE_MUTEX(hd_counter_mutex);
+
+static void hd_update_times(void)
+{
+	static ktime_t prev;
+	ktime_t now;
+
+	/*
+	 * Check if hiperdispatch is active, if not set the prev to 0.
+	 * This way it is possible to differentiate the first update iteration after
+	 * enabling hiperdispatch.
+	 */
+	if (hd_entitled_cores == 0 || hd_enabled == 0) {
+		prev = ktime_set(0, 0);
+		return;
+	}
+	now = ktime_get();
+	if (ktime_after(prev, 0)) {
+		if (hd_high_capacity_cores == hd_online_cores)
+			hd_high_time += ktime_ms_delta(now, prev);
+		else
+			hd_low_time += ktime_ms_delta(now, prev);
+	}
+	prev = now;
+}
+
+static void hd_update_capacities(void)
+{
+	int cpu, upscaling_cores;
+	unsigned long capacity;
+
+	upscaling_cores = hd_high_capacity_cores - hd_entitled_cores;
+	capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW;
+	hd_high_capacity_cores = hd_entitled_cores;
+	for_each_cpu(cpu, &hd_vl_coremask) {
+		smp_set_core_capacity(cpu, capacity);
+		if (capacity != CPU_CAPACITY_HIGH)
+			continue;
+		hd_high_capacity_cores++;
+		upscaling_cores--;
+		if (upscaling_cores == 0)
+			capacity = CPU_CAPACITY_LOW;
+	}
+}
+
+void hd_disable_hiperdispatch(void)
+{
+	cancel_delayed_work_sync(&hd_capacity_work);
+	hd_high_capacity_cores = hd_online_cores;
+	hd_previous_steal = 0;
+}
+
+int hd_enable_hiperdispatch(void)
+{
+	mutex_lock(&hd_counter_mutex);
+	hd_update_times();
+	mutex_unlock(&hd_counter_mutex);
+	if (hd_enabled == 0)
+		return 0;
+	if (hd_entitled_cores == 0)
+		return 0;
+	if (hd_online_cores <= hd_entitled_cores)
+		return 0;
+	mod_delayed_work(system_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor);
+	hd_update_capacities();
+	return 1;
+}
+
+static unsigned long hd_steal_avg(unsigned long new)
+{
+	static unsigned long steal;
+
+	steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT;
+	return steal;
+}
+
+static unsigned long hd_calculate_steal_percentage(void)
+{
+	unsigned long time_delta, steal_delta, steal, percentage;
+	static ktime_t prev;
+	int cpus, cpu;
+	ktime_t now;
+
+	cpus = 0;
+	steal = 0;
+	percentage = 0;
+	for_each_cpu(cpu, &hd_vmvl_cpumask) {
+		steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+		cpus++;
+	}
+	/*
+	 * If there is no vertical medium and low CPUs steal time
+	 * is 0 as vertical high CPUs shouldn't experience steal time.
+	 */
+	if (cpus == 0)
+		return percentage;
+	now = ktime_get();
+	time_delta = ktime_to_ns(ktime_sub(now, prev));
+	if (steal > hd_previous_steal && hd_previous_steal != 0) {
+		steal_delta = (steal - hd_previous_steal) * 100 / time_delta;
+		percentage = steal_delta / cpus;
+	}
+	hd_previous_steal = steal;
+	prev = now;
+	return percentage;
+}
+
+static void hd_capacity_work_fn(struct work_struct *work)
+{
+	unsigned long steal_percentage, new_cores;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	/*
+	 * If online cores are less or equal to entitled cores hiperdispatch
+	 * does not need to make any adjustments, call a topology update to
+	 * disable hiperdispatch.
+	 * Normally this check is handled on topology update, but during cpu
+	 * unhotplug, topology and cpu mask updates are done in reverse
+	 * order, causing hd_enable_hiperdispatch() to get stale data.
+	 */
+	if (hd_online_cores <= hd_entitled_cores) {
+		topology_schedule_update();
+		mutex_unlock(&smp_cpu_state_mutex);
+		return;
+	}
+	steal_percentage = hd_steal_avg(hd_calculate_steal_percentage());
+	if (steal_percentage < hd_steal_threshold)
+		new_cores = hd_online_cores;
+	else
+		new_cores = hd_entitled_cores;
+	if (hd_high_capacity_cores != new_cores) {
+		trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores);
+		hd_high_capacity_cores = new_cores;
+		atomic64_inc(&hd_adjustments);
+		topology_schedule_update();
+	}
+	trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores);
+	mutex_unlock(&smp_cpu_state_mutex);
+	schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL);
+}
+
+static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
+				     void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int hiperdispatch;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &hiperdispatch,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	hiperdispatch = hd_enabled;
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+	mutex_lock(&smp_cpu_state_mutex);
+	if (hd_set_hiperdispatch_mode(hiperdispatch))
+		topology_schedule_update();
+	mutex_unlock(&smp_cpu_state_mutex);
+	return 0;
+}
+
+static struct ctl_table hiperdispatch_ctl_table[] = {
+	{
+		.procname	= "hiperdispatch",
+		.mode		= 0644,
+		.proc_handler	= hiperdispatch_ctl_handler,
+	},
+};
+
+static ssize_t hd_steal_threshold_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return sysfs_emit(buf, "%u\n", hd_steal_threshold);
+}
+
+static ssize_t hd_steal_threshold_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf,
+					size_t count)
+{
+	unsigned int val;
+	int rc;
+
+	rc = kstrtouint(buf, 0, &val);
+	if (rc)
+		return rc;
+	if (val > 100)
+		return -ERANGE;
+	hd_steal_threshold = val;
+	return count;
+}
+
+static DEVICE_ATTR_RW(hd_steal_threshold);
+
+static ssize_t hd_delay_factor_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	return sysfs_emit(buf, "%u\n", hd_delay_factor);
+}
+
+static ssize_t hd_delay_factor_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf,
+				     size_t count)
+{
+	unsigned int val;
+	int rc;
+
+	rc = kstrtouint(buf, 0, &val);
+	if (rc)
+		return rc;
+	if (!val)
+		return -ERANGE;
+	hd_delay_factor = val;
+	return count;
+}
+
+static DEVICE_ATTR_RW(hd_delay_factor);
+
+static struct attribute *hd_attrs[] = {
+	&dev_attr_hd_steal_threshold.attr,
+	&dev_attr_hd_delay_factor.attr,
+	NULL,
+};
+
+static const struct attribute_group hd_attr_group = {
+	.name  = "hiperdispatch",
+	.attrs = hd_attrs,
+};
+
+static int hd_greedy_time_get(void *unused, u64 *val)
+{
+	mutex_lock(&hd_counter_mutex);
+	hd_update_times();
+	*val = hd_high_time;
+	mutex_unlock(&hd_counter_mutex);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n");
+
+static int hd_conservative_time_get(void *unused, u64 *val)
+{
+	mutex_lock(&hd_counter_mutex);
+	hd_update_times();
+	*val = hd_low_time;
+	mutex_unlock(&hd_counter_mutex);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n");
+
+static int hd_adjustment_count_get(void *unused, u64 *val)
+{
+	*val = atomic64_read(&hd_adjustments);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n");
+
+static void __init hd_create_debugfs_counters(void)
+{
+	struct dentry *dir;
+
+	dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir);
+	debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops);
+	debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops);
+	debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops);
+}
+
+static void __init hd_create_attributes(void)
+{
+	struct device *dev;
+
+	dev = bus_get_dev_root(&cpu_subsys);
+	if (!dev)
+		return;
+	if (sysfs_create_group(&dev->kobj, &hd_attr_group))
+		pr_warn("Unable to create hiperdispatch attribute group\n");
+	put_device(dev);
+}
+
+static int __init hd_init(void)
+{
+	if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) {
+		hd_set_hiperdispatch_mode(1);
+		topology_schedule_update();
+	}
+	if (!register_sysctl("s390", hiperdispatch_ctl_table))
+		pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n");
+	hd_create_debugfs_counters();
+	hd_create_attributes();
+	return 0;
+}
+late_initcall(hd_init);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 1af5a08d72ab..2639a3d12736 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -76,6 +76,7 @@ static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
 	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
 	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+	{.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"},
 	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
 	{.irq = IRQIO_DAS,  .name = "DAS", .desc = "[I/O] DASD"},
 	{.irq = IRQIO_C15,  .name = "C15", .desc = "[I/O] 3215"},
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 05c83505e979..6295faf0987d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -21,6 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/ftrace.h>
 #include <linux/execmem.h>
+#include <asm/text-patching.h>
 #include <asm/set_memory.h>
 #include <asm/sections.h>
 #include <asm/dis.h>
@@ -152,7 +153,12 @@ void arch_arm_kprobe(struct kprobe *p)
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
 
-	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	if (MACHINE_HAS_SEQ_INSN) {
+		swap_instruction(&args);
+		text_poke_sync();
+	} else {
+		stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	}
 }
 NOKPROBE_SYMBOL(arch_arm_kprobe);
 
@@ -160,7 +166,12 @@ void arch_disarm_kprobe(struct kprobe *p)
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
 
-	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	if (MACHINE_HAS_SEQ_INSN) {
+		swap_instruction(&args);
+		text_poke_sync();
+	} else {
+		stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	}
 }
 NOKPROBE_SYMBOL(arch_disarm_kprobe);
 
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index ae4d4fd9afcd..7e267ef63a7f 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,6 +9,7 @@
 #include <asm/ftrace.h>
 #include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
+#include <asm/march.h>
 
 #define STACK_FRAME_SIZE_PTREGS		(STACK_FRAME_OVERHEAD + __PT_SIZE)
 #define STACK_PTREGS			(STACK_FRAME_OVERHEAD)
@@ -88,7 +89,7 @@ SYM_CODE_START(ftrace_caller)
 SYM_CODE_END(ftrace_caller)
 
 SYM_CODE_START(ftrace_common)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
 	aghik	%r2,%r0,-MCOUNT_INSN_SIZE
 	lgrl	%r4,function_trace_op
 	lgrl	%r1,ftrace_func
@@ -115,7 +116,7 @@ SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
 .Lftrace_graph_caller_end:
 #endif
 	lg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
 	ltg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 	locgrz	%r1,%r0
 #else
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 23ab9f02f278..ddc1448ea2e1 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -14,9 +14,6 @@
 #include <linux/node.h>
 #include <asm/numa.h>
 
-struct pglist_data *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
 void __init numa_setup(void)
 {
 	int nid;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 6968be98af11..e2e0aa463fbd 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -22,6 +22,10 @@
 #include <asm/hwctrset.h>
 #include <asm/debug.h>
 
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR		0xffffUL  /* Max ctr for ECCTR */
+#define PERF_EVENT_CPUM_CF_DIAG		0xBC000UL /* Event: Counter sets */
+
 enum cpumf_ctr_set {
 	CPUMF_CTR_SET_BASIC   = 0,    /* Basic Counter Set */
 	CPUMF_CTR_SET_USER    = 1,    /* Problem-State Counter Set */
@@ -1694,7 +1698,6 @@ static const struct file_operations cfset_fops = {
 	.release = cfset_release,
 	.unlocked_ioctl	= cfset_ioctl,
 	.compat_ioctl = cfset_ioctl,
-	.llseek = no_llseek
 };
 
 static struct miscdevice cfset_dev = {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 736c1d9632dd..5b765e3ccf0c 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -24,6 +24,22 @@
 #include <asm/timex.h>
 #include <linux/io.h>
 
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR		2
+#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_FREQ_MODE		0x0008	  /* Sampling with frequency */
+
+#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc)		((hwc)->last_tag)
+#define SAMPL_RATE(hwc)		((hwc)->event_base)
+#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPL_FREQ_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
+
 /* Minimum number of sample-data-block-tables:
  * At least one table is required for the sampling buffer structure.
  * A single table contains up to 511 pointers to sample-data-blocks.
@@ -113,17 +129,6 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
 	return USEC_PER_SEC * qsi->cpu_speed / rate;
 }
 
-/* Return TOD timestamp contained in an trailer entry */
-static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
-{
-	/* TOD in STCKE format */
-	if (te->header.t)
-		return *((unsigned long long *)&te->timestamp[1]);
-
-	/* TOD in STCK format */
-	return *((unsigned long long *)&te->timestamp[0]);
-}
-
 /* Return pointer to trailer entry of an sample data block */
 static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
 {
@@ -154,12 +159,12 @@ static inline unsigned long *get_next_sdbt(unsigned long *s)
 /*
  * sf_disable() - Switch off sampling facility
  */
-static int sf_disable(void)
+static void sf_disable(void)
 {
 	struct hws_lsctl_request_block sreq;
 
 	memset(&sreq, 0, sizeof(sreq));
-	return lsctl(&sreq);
+	lsctl(&sreq);
 }
 
 /*
@@ -208,8 +213,6 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
 		}
 	}
 
-	debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
-			    (unsigned long)sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
@@ -265,10 +268,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	 * the sampling buffer origin.
 	 */
 	if (sfb->sdbt != get_next_sdbt(tail)) {
-		debug_sprintf_event(sfdbg, 3, "%s: "
-				    "sampling buffer is not linked: origin %#lx"
-				    " tail %#lx\n", __func__,
-				    (unsigned long)sfb->sdbt,
+		debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n",
+				    __func__, (unsigned long)sfb->sdbt,
 				    (unsigned long)tail);
 		return -EINVAL;
 	}
@@ -318,9 +319,6 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	*tail = virt_to_phys(sfb->sdbt) + 1;
 	sfb->tail = tail;
 
-	debug_sprintf_event(sfdbg, 4, "%s: new buffer"
-			    " settings: sdbt %lu sdb %lu\n", __func__,
-			    sfb->num_sdbt, sfb->num_sdb);
 	return rc;
 }
 
@@ -357,15 +355,8 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 
 	/* Allocate requested number of sample-data-blocks */
 	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
-	if (rc) {
+	if (rc)
 		free_sampling_buffer(sfb);
-		debug_sprintf_event(sfdbg, 4, "%s: "
-			"realloc_sampling_buffer failed with rc %i\n",
-			__func__, rc);
-	} else
-		debug_sprintf_event(sfdbg, 4,
-			"%s: tear %#lx dear %#lx\n", __func__,
-			(unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
 	return rc;
 }
 
@@ -377,8 +368,8 @@ static void sfb_set_limits(unsigned long min, unsigned long max)
 	CPUM_SF_MAX_SDB = max;
 
 	memset(&si, 0, sizeof(si));
-	if (!qsi(&si))
-		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+	qsi(&si);
+	CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
 }
 
 static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
@@ -397,12 +388,6 @@ static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
 	return 0;
 }
 
-static int sfb_has_pending_allocs(struct sf_buffer *sfb,
-				   struct hw_perf_event *hwc)
-{
-	return sfb_pending_allocs(sfb, hwc) > 0;
-}
-
 static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
 {
 	/* Limit the number of SDBs to not exceed the maximum */
@@ -426,7 +411,6 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
 static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
 	unsigned long n_sdb, freq;
-	size_t sample_size;
 
 	/* Calculate sampling buffers using 4K pages
 	 *
@@ -457,7 +441,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
 	 *	 to 511 SDBs).
 	 */
-	sample_size = sizeof(struct hws_basic_entry);
 	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
 	n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
 
@@ -473,12 +456,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 	if (sf_buffer_available(cpuhw))
 		return 0;
 
-	debug_sprintf_event(sfdbg, 3,
-			    "%s: rate %lu f %lu sdb %lu/%lu"
-			    " sample_size %lu cpuhw %p\n", __func__,
-			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
-			    sample_size, cpuhw);
-
 	return alloc_sampling_buffer(&cpuhw->sfb,
 				     sfb_pending_allocs(&cpuhw->sfb, hwc));
 }
@@ -535,8 +512,6 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
 	if (num)
 		sfb_account_allocs(num, hwc);
 
-	debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
-			    __func__, OVERFLOW_REG(hwc), ratio, num);
 	OVERFLOW_REG(hwc) = 0;
 }
 
@@ -554,13 +529,11 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
 static void extend_sampling_buffer(struct sf_buffer *sfb,
 				   struct hw_perf_event *hwc)
 {
-	unsigned long num, num_old;
-	int rc;
+	unsigned long num;
 
 	num = sfb_pending_allocs(sfb, hwc);
 	if (!num)
 		return;
-	num_old = sfb->num_sdb;
 
 	/* Disable the sampling facility to reset any states and also
 	 * clear pending measurement alerts.
@@ -572,51 +545,33 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
 	 * called by perf.  Because this is a reallocation, it is fine if the
 	 * new SDB-request cannot be satisfied immediately.
 	 */
-	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
-	if (rc)
-		debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
-				    __func__, rc);
-
-	if (sfb_has_pending_allocs(sfb, hwc))
-		debug_sprintf_event(sfdbg, 5, "%s: "
-				    "req %lu alloc %lu remaining %lu\n",
-				    __func__, num, sfb->num_sdb - num_old,
-				    sfb_pending_allocs(sfb, hwc));
+	realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
 }
 
 /* Number of perf events counting hardware events */
-static atomic_t num_events;
+static refcount_t num_events;
 /* Used to avoid races in calling reserve/release_cpumf_hardware */
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 #define PMC_INIT      0
 #define PMC_RELEASE   1
-#define PMC_FAILURE   2
 static void setup_pmc_cpu(void *flags)
 {
-	struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
-	int err = 0;
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
 
 	switch (*((int *)flags)) {
 	case PMC_INIT:
-		memset(cpusf, 0, sizeof(*cpusf));
-		err = qsi(&cpusf->qsi);
-		if (err)
-			break;
-		cpusf->flags |= PMU_F_RESERVED;
-		err = sf_disable();
+		memset(cpuhw, 0, sizeof(*cpuhw));
+		qsi(&cpuhw->qsi);
+		cpuhw->flags |= PMU_F_RESERVED;
+		sf_disable();
 		break;
 	case PMC_RELEASE:
-		cpusf->flags &= ~PMU_F_RESERVED;
-		err = sf_disable();
-		if (!err)
-			deallocate_buffers(cpusf);
+		cpuhw->flags &= ~PMU_F_RESERVED;
+		sf_disable();
+		deallocate_buffers(cpuhw);
 		break;
 	}
-	if (err) {
-		*((int *)flags) |= PMC_FAILURE;
-		pr_err("Switching off the sampling facility failed with rc %i\n", err);
-	}
 }
 
 static void release_pmc_hardware(void)
@@ -627,27 +582,19 @@ static void release_pmc_hardware(void)
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 }
 
-static int reserve_pmc_hardware(void)
+static void reserve_pmc_hardware(void)
 {
 	int flags = PMC_INIT;
 
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
-	if (flags & PMC_FAILURE) {
-		release_pmc_hardware();
-		return -ENODEV;
-	}
 	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
-	return 0;
 }
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	/* Release PMC if this is the last perf event */
-	if (!atomic_add_unless(&num_events, -1, 1)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_dec_return(&num_events) == 0)
-			release_pmc_hardware();
+	if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 }
@@ -751,9 +698,6 @@ static unsigned long getrate(bool freq, unsigned long sample,
 		 */
 		if (sample_rate_to_freq(si, rate) >
 		    sysctl_perf_event_sample_rate) {
-			debug_sprintf_event(sfdbg, 1, "%s: "
-					    "Sampling rate exceeds maximum "
-					    "perf sample rate\n", __func__);
 			rate = 0;
 		}
 	}
@@ -798,9 +742,6 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
 	attr->sample_period = rate;
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
-	debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
-			    __func__, event->cpu, event->attr.sample_period,
-			    event->attr.freq, SAMPLE_FREQ_MODE(hwc));
 	return 0;
 }
 
@@ -810,23 +751,17 @@ static int __hw_perf_event_init(struct perf_event *event)
 	struct hws_qsi_info_block si;
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
-	int cpu, err;
+	int cpu, err = 0;
 
 	/* Reserve CPU-measurement sampling facility */
-	err = 0;
-	if (!atomic_inc_not_zero(&num_events)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
-			err = -EBUSY;
-		else
-			atomic_inc(&num_events);
-		mutex_unlock(&pmc_reserve_mutex);
+	mutex_lock(&pmc_reserve_mutex);
+	if (!refcount_inc_not_zero(&num_events)) {
+		reserve_pmc_hardware();
+		refcount_set(&num_events, 1);
 	}
+	mutex_unlock(&pmc_reserve_mutex);
 	event->destroy = hw_perf_event_destroy;
 
-	if (err)
-		goto out;
-
 	/* Access per-CPU sampling information (query sampling info) */
 	/*
 	 * The event->cpu value can be -1 to count on every CPU, for example,
@@ -838,9 +773,9 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	memset(&si, 0, sizeof(si));
 	cpuhw = NULL;
-	if (event->cpu == -1)
+	if (event->cpu == -1) {
 		qsi(&si);
-	else {
+	} else {
 		/* Event is pinned to a particular CPU, retrieve the per-CPU
 		 * sampling structure for accessing the CPU-specific QSI.
 		 */
@@ -881,10 +816,6 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (err)
 		goto out;
 
-	/* Initialize sample data overflow accounting */
-	hwc->extra_reg.reg = REG_OVERFLOW;
-	OVERFLOW_REG(hwc) = 0;
-
 	/* Use AUX buffer. No need to allocate it by ourself */
 	if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
 		return 0;
@@ -1007,7 +938,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 			extend_sampling_buffer(&cpuhw->sfb, hwc);
 		}
 		/* Rate may be adjusted with ioctl() */
-		cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+		cpuhw->lsctl.interval = SAMPL_RATE(hwc);
 	}
 
 	/* (Re)enable the PMU and sampling facility */
@@ -1023,12 +954,6 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 
 	/* Load current program parameter */
 	lpp(&get_lowcore()->lpp);
-
-	debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
-			    "interval %#lx tear %#lx dear %#lx\n", __func__,
-			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
-			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
-			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
 }
 
 static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1055,21 +980,18 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 		return;
 	}
 
-	/* Save state of TEAR and DEAR register contents */
-	err = qsi(&si);
-	if (!err) {
-		/* TEAR/DEAR values are valid only if the sampling facility is
-		 * enabled.  Note that cpumsf_pmu_disable() might be called even
-		 * for a disabled sampling facility because cpumsf_pmu_enable()
-		 * controls the enable/disable state.
-		 */
-		if (si.es) {
-			cpuhw->lsctl.tear = si.tear;
-			cpuhw->lsctl.dear = si.dear;
-		}
-	} else
-		debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
-				    __func__, err);
+	/*
+	 * Save state of TEAR and DEAR register contents.
+	 * TEAR/DEAR values are valid only if the sampling facility is
+	 * enabled.  Note that cpumsf_pmu_disable() might be called even
+	 * for a disabled sampling facility because cpumsf_pmu_enable()
+	 * controls the enable/disable state.
+	 */
+	qsi(&si);
+	if (si.es) {
+		cpuhw->lsctl.tear = si.tear;
+		cpuhw->lsctl.dear = si.dear;
+	}
 
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
@@ -1235,11 +1157,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 				/* Count discarded samples */
 				*overflow += 1;
 		} else {
-			debug_sprintf_event(sfdbg, 4,
-					    "%s: Found unknown"
-					    " sampling data entry: te->f %i"
-					    " basic.def %#4x (%p)\n", __func__,
-					    te->header.f, sample->def, sample);
 			/* Sample slot is not yet written or other record.
 			 *
 			 * This condition can occur if the buffer was reused
@@ -1284,7 +1201,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	 * AUX buffer is used when in diagnostic sampling mode.
 	 * No perf events/samples are created.
 	 */
-	if (SAMPL_DIAG_MODE(&event->hw))
+	if (SAMPL_DIAG_MODE(hwc))
 		return;
 
 	sdbt = (unsigned long *)TEAR_REG(hwc);
@@ -1309,13 +1226,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			 */
 			sampl_overflow += te->header.overflow;
 
-		/* Timestamps are valid for full sample-data-blocks only */
-		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
-				    "overflow %llu timestamp %#llx\n",
-				    __func__, sdb, (unsigned long)sdbt,
-				    te->header.overflow,
-				    (te->header.f) ? trailer_timestamp(te) : 0ULL);
-
 		/* Collect all samples from a single sample-data-block and
 		 * flag if an (perf) event overflow happened.  If so, the PMU
 		 * is stopped and remaining samples will be discarded.
@@ -1340,7 +1250,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			sdbt = get_next_sdbt(sdbt);
 
 		/* Update event hardware registers */
-		TEAR_REG(hwc) = (unsigned long) sdbt;
+		TEAR_REG(hwc) = (unsigned long)sdbt;
 
 		/* Stop processing sample-data if all samples of the current
 		 * sample-data-block were flushed even if it was not full.
@@ -1362,19 +1272,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	 * are dropped.
 	 * Slightly increase the interval to avoid hitting this limit.
 	 */
-	if (event_overflow) {
+	if (event_overflow)
 		SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
-		debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
-				    __func__,
-				    DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
-	}
-
-	if (sampl_overflow || event_overflow)
-		debug_sprintf_event(sfdbg, 4, "%s: "
-				    "overflows: sample %llu event %llu"
-				    " total %llu num_sdb %llu\n",
-				    __func__, sampl_overflow, event_overflow,
-				    OVERFLOW_REG(hwc), num_sdb);
 }
 
 static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
@@ -1442,9 +1341,6 @@ static void aux_output_end(struct perf_output_handle *handle)
 	/* Remove alert indicators in the buffer */
 	te = aux_sdb_trailer(aux, aux->alert_mark);
 	te->header.a = 0;
-
-	debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
-			    __func__, i, range_scan, aux->head);
 }
 
 /*
@@ -1463,7 +1359,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	unsigned long range, i, range_scan, idx, head, base, offset;
 	struct hws_trailer_entry *te;
 
-	if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+	if (handle->head & ~PAGE_MASK)
 		return -EINVAL;
 
 	aux->head = handle->head >> PAGE_SHIFT;
@@ -1475,10 +1371,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	 * SDBs between aux->head and aux->empty_mark are already ready
 	 * for new data. range_scan is num of SDBs not within them.
 	 */
-	debug_sprintf_event(sfdbg, 6,
-			    "%s: range %ld head %ld alert %ld empty %ld\n",
-			    __func__, range, aux->head, aux->alert_mark,
-			    aux->empty_mark);
 	if (range > aux_sdb_num_empty(aux)) {
 		range_scan = range - aux_sdb_num_empty(aux);
 		idx = aux->empty_mark + 1;
@@ -1504,12 +1396,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
 	cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
 
-	debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
-			    "index %ld tear %#lx dear %#lx\n", __func__,
-			    aux->head, aux->alert_mark, aux->empty_mark,
-			    head / CPUM_SF_SDB_PER_TABLE,
-			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
-
 	return 0;
 }
 
@@ -1571,14 +1457,11 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
 static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 			     unsigned long long *overflow)
 {
-	unsigned long i, range_scan, idx, idx_old;
 	union hws_trailer_header old, prev, new;
+	unsigned long i, range_scan, idx;
 	unsigned long long orig_overflow;
 	struct hws_trailer_entry *te;
 
-	debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
-			    "empty %ld\n", __func__, range, aux->head,
-			    aux->alert_mark, aux->empty_mark);
 	if (range <= aux_sdb_num_empty(aux))
 		/*
 		 * No need to scan. All SDBs in range are marked as empty.
@@ -1601,7 +1484,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 	 * indicator fall into this range, set it.
 	 */
 	range_scan = range - aux_sdb_num_empty(aux);
-	idx_old = idx = aux->empty_mark + 1;
+	idx = aux->empty_mark + 1;
 	for (i = 0; i < range_scan; i++, idx++) {
 		te = aux_sdb_trailer(aux, idx);
 		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
@@ -1623,9 +1506,6 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 	/* Update empty_mark to new position */
 	aux->empty_mark = aux->head + range - 1;
 
-	debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
-			    "empty %ld\n", __func__, range_scan, idx_old,
-			    idx - 1, aux->empty_mark);
 	return true;
 }
 
@@ -1642,12 +1522,12 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 	unsigned long num_sdb;
 
 	aux = perf_get_aux(handle);
-	if (WARN_ON_ONCE(!aux))
+	if (!aux)
 		return;
 
 	/* Inform user space new data arrived */
 	size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
-	debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+	debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__,
 			    size >> PAGE_SHIFT);
 	perf_aux_output_end(handle, size);
 
@@ -1661,7 +1541,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 				num_sdb);
 			break;
 		}
-		if (WARN_ON_ONCE(!aux))
+		if (!aux)
 			return;
 
 		/* Update head and alert_mark to new position */
@@ -1681,23 +1561,11 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			perf_aux_output_end(&cpuhw->handle, size);
 			pr_err("Sample data caused the AUX buffer with %lu "
 			       "pages to overflow\n", aux->sfb.num_sdb);
-			debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
-					    "overflow %lld\n", __func__,
-					    aux->head, range, overflow);
 		} else {
 			size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
-			debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
-					    "already full, try another\n",
-					    __func__,
-					    aux->head, aux->alert_mark);
 		}
 	}
-
-	if (done)
-		debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
-				    "empty %ld\n", __func__, aux->head,
-				    aux->alert_mark, aux->empty_mark);
 }
 
 /*
@@ -1719,8 +1587,6 @@ static void aux_buffer_free(void *data)
 	kfree(aux->sdbt_index);
 	kfree(aux->sdb_index);
 	kfree(aux);
-
-	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
 }
 
 static void aux_sdb_init(unsigned long sdb)
@@ -1828,9 +1694,6 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
 	 */
 	aux->empty_mark = sfb->num_sdb - 1;
 
-	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
-			    sfb->num_sdbt, sfb->num_sdb);
-
 	return aux;
 
 no_sdbt:
@@ -1863,8 +1726,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 
 	memset(&si, 0, sizeof(si));
 	if (event->cpu == -1) {
-		if (qsi(&si))
-			return -ENODEV;
+		qsi(&si);
 	} else {
 		/* Event is pinned to a particular CPU, retrieve the per-CPU
 		 * sampling structure for accessing the CPU-specific QSI.
@@ -1874,7 +1736,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 		si = cpuhw->qsi;
 	}
 
-	do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+	do_freq = !!SAMPL_FREQ_MODE(&event->hw);
 	rate = getrate(do_freq, value, &si);
 	if (!rate)
 		return -EINVAL;
@@ -1882,10 +1744,6 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 	event->attr.sample_period = rate;
 	SAMPL_RATE(&event->hw) = rate;
 	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
-	debug_sprintf_event(sfdbg, 4, "%s:"
-			    " cpu %d value %#llx period %#llx freq %d\n",
-			    __func__, event->cpu, value,
-			    event->attr.sample_period, do_freq);
 	return 0;
 }
 
@@ -1896,12 +1754,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
 {
 	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
 
-	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+	if (!(event->hw.state & PERF_HES_STOPPED))
 		return;
-
-	if (flags & PERF_EF_RELOAD)
-		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
 	perf_pmu_disable(event->pmu);
 	event->hw.state = 0;
 	cpuhw->lsctl.cs = 1;
@@ -1936,7 +1790,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
 	struct aux_buffer *aux;
-	int err;
+	int err = 0;
 
 	if (cpuhw->flags & PMU_F_IN_USE)
 		return -EAGAIN;
@@ -1944,7 +1798,6 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 	if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
 		return -EINVAL;
 
-	err = 0;
 	perf_pmu_disable(event->pmu);
 
 	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -2115,7 +1968,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 
 	/* Report measurement alerts only for non-PRA codes */
 	if (alert != CPU_MF_INT_SF_PRA)
-		debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+		debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__,
 				    alert);
 
 	/* Sampling authorization change request */
@@ -2143,7 +1996,7 @@ static int cpusf_pmu_setup(unsigned int cpu, int flags)
 	/* Ignore the notification if no events are scheduled on the PMU.
 	 * This might be racy...
 	 */
-	if (!atomic_read(&num_events))
+	if (!refcount_read(&num_events))
 		return 0;
 
 	local_irq_disable();
@@ -2205,10 +2058,12 @@ static const struct kernel_param_ops param_ops_sfb_size = {
 	.get = param_get_sfb_size,
 };
 
-#define RS_INIT_FAILURE_QSI	  0x0001
-#define RS_INIT_FAILURE_BSDES	  0x0002
-#define RS_INIT_FAILURE_ALRT	  0x0003
-#define RS_INIT_FAILURE_PERF	  0x0004
+enum {
+	RS_INIT_FAILURE_BSDES	= 2,	/* Bad basic sampling size */
+	RS_INIT_FAILURE_ALRT	= 3,	/* IRQ registration failure */
+	RS_INIT_FAILURE_PERF	= 4	/* PMU registration failure */
+};
+
 static void __init pr_cpumsf_err(unsigned int reason)
 {
 	pr_err("Sampling facility support for perf is not available: "
@@ -2224,11 +2079,7 @@ static int __init init_cpum_sampling_pmu(void)
 		return -ENODEV;
 
 	memset(&si, 0, sizeof(si));
-	if (qsi(&si)) {
-		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
-		return -ENODEV;
-	}
-
+	qsi(&si);
 	if (!si.as && !si.ad)
 		return -ENODEV;
 
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 2f5a20e300f6..fa7325454266 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -738,6 +738,22 @@ static const char * const paicrypt_ctrnames[] = {
 	[154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
 	[155] = "IBM_RESERVED_155",
 	[156] = "IBM_RESERVED_156",
+	[157] = "KM_FULL_XTS_AES_128",
+	[158] = "KM_FULL_XTS_AES_256",
+	[159] = "KM_FULL_XTS_ENCRYPTED_AES_128",
+	[160] = "KM_FULL_XTS_ENCRYPTED_AES_256",
+	[161] = "KMAC_HMAC_SHA_224",
+	[162] = "KMAC_HMAC_SHA_256",
+	[163] = "KMAC_HMAC_SHA_384",
+	[164] = "KMAC_HMAC_SHA_512",
+	[165] = "KMAC_HMAC_ENCRYPTED_SHA_224",
+	[166] = "KMAC_HMAC_ENCRYPTED_SHA_256",
+	[167] = "KMAC_HMAC_ENCRYPTED_SHA_384",
+	[168] = "KMAC_HMAC_ENCRYPTED_SHA_512",
+	[169] = "PCKMO_ENCRYPT_HMAC_512_KEY",
+	[170] = "PCKMO_ENCRYPT_HMAC_1024_KEY",
+	[171] = "PCKMO_ENCRYPT_AES_XTS_128",
+	[172] = "PCKMO_ENCRYPT_AES_XTS_256",
 };
 
 static void __init attr_event_free(struct attribute **attrs, int num)
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 6295531b39a2..7f462bef1fc0 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -635,6 +635,15 @@ static const char * const paiext_ctrnames[] = {
 	[25] = "NNPA_1MFRAME",
 	[26] = "NNPA_2GFRAME",
 	[27] = "NNPA_ACCESSEXCEPT",
+	[28] = "NNPA_TRANSFORM",
+	[29] = "NNPA_GELU",
+	[30] = "NNPA_MOMENTS",
+	[31] = "NNPA_LAYERNORM",
+	[32] = "NNPA_MATMUL_OP_BCAST1",
+	[33] = "NNPA_SQRT",
+	[34] = "NNPA_INVSQRT",
+	[35] = "NNPA_NORM",
+	[36] = "NNPA_REDUCE",
 };
 
 static void __init attr_event_free(struct attribute **attrs, int num)
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6c2cb345402f..e48013cd832c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -30,9 +30,9 @@
 #include <linux/compat.h>
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
+#include <asm/vdso-symbols.h>
 #include <asm/access-regs.h>
 #include <asm/lowcore.h>
-#include <asm/vdso.h>
 #include "entry.h"
 
 /*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index fbba37ec53cf..4df56fdb2488 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -671,6 +671,25 @@ int smp_cpu_get_polarization(int cpu)
 	return per_cpu(pcpu_devices, cpu).polarization;
 }
 
+void smp_cpu_set_capacity(int cpu, unsigned long val)
+{
+	per_cpu(pcpu_devices, cpu).capacity = val;
+}
+
+unsigned long smp_cpu_get_capacity(int cpu)
+{
+	return per_cpu(pcpu_devices, cpu).capacity;
+}
+
+void smp_set_core_capacity(int cpu, unsigned long val)
+{
+	int i;
+
+	cpu = smp_get_base_cpu(cpu);
+	for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++)
+		smp_cpu_set_capacity(i, val);
+}
+
 int smp_cpu_get_cpu_address(int cpu)
 {
 	return per_cpu(pcpu_devices, cpu).address;
@@ -719,6 +738,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
 		else
 			pcpu->state = CPU_STATE_STANDBY;
 		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+		smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
 		set_cpu_present(cpu, true);
 		if (!early && arch_register_cpu(cpu))
 			set_cpu_present(cpu, false);
@@ -961,6 +981,7 @@ void __init smp_prepare_boot_cpu(void)
 	ipl_pcpu->state = CPU_STATE_CONFIGURED;
 	lc->pcpu = (unsigned long)ipl_pcpu;
 	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+	smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH);
 }
 
 void __init smp_setup_processor_id(void)
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 640363b2a105..9f59837d159e 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -162,22 +162,3 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
 {
 	arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
 }
-
-unsigned long return_address(unsigned int n)
-{
-	struct unwind_state state;
-	unsigned long addr;
-
-	/* Increment to skip current stack entry */
-	n++;
-
-	unwind_for_each_frame(&state, NULL, NULL, 0) {
-		addr = unwind_get_return_address(&state);
-		if (!addr)
-			break;
-		if (!n--)
-			return addr;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 2be30a96696a..88055f58fbda 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -498,7 +498,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = {       \
 	.open		= stsi_open_##fc##_##s1##_##s2,			       \
 	.release	= stsi_release,					       \
 	.read		= stsi_read,					       \
-	.llseek		= no_llseek,					       \
 };
 
 static int stsi_release(struct inode *inode, struct file *file)
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 22029ecae1c5..813e5da9a973 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -24,6 +24,7 @@
 #include <linux/mm.h>
 #include <linux/nodemask.h>
 #include <linux/node.h>
+#include <asm/hiperdispatch.h>
 #include <asm/sysinfo.h>
 
 #define PTF_HORIZONTAL	(0UL)
@@ -47,6 +48,7 @@ static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
 static void set_topology_timer(void);
 static void topology_work_fn(struct work_struct *work);
 static struct sysinfo_15_1_x *tl_info;
+static int cpu_management;
 
 static DECLARE_WORK(topology_work, topology_work_fn);
 
@@ -144,6 +146,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
 			cpumask_set_cpu(cpu, &book->mask);
 			cpumask_set_cpu(cpu, &socket->mask);
 			smp_cpu_set_polarization(cpu, tl_core->pp);
+			smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
 		}
 	}
 }
@@ -270,6 +273,7 @@ void update_cpu_masks(void)
 			topo->drawer_id = id;
 		}
 	}
+	hd_reset_state();
 	for_each_online_cpu(cpu) {
 		topo = &cpu_topology[cpu];
 		pkg_first = cpumask_first(&topo->core_mask);
@@ -278,8 +282,10 @@ void update_cpu_masks(void)
 			for_each_cpu(sibling, &topo->core_mask) {
 				topo_sibling = &cpu_topology[sibling];
 				smt_first = cpumask_first(&topo_sibling->thread_mask);
-				if (sibling == smt_first)
+				if (sibling == smt_first) {
 					topo_package->booted_cores++;
+					hd_add_core(sibling);
+				}
 			}
 		} else {
 			topo->booted_cores = topo_package->booted_cores;
@@ -303,8 +309,10 @@ static void __arch_update_dedicated_flag(void *arg)
 static int __arch_update_cpu_topology(void)
 {
 	struct sysinfo_15_1_x *info = tl_info;
-	int rc = 0;
+	int rc, hd_status;
 
+	hd_status = 0;
+	rc = 0;
 	mutex_lock(&smp_cpu_state_mutex);
 	if (MACHINE_HAS_TOPOLOGY) {
 		rc = 1;
@@ -314,7 +322,11 @@ static int __arch_update_cpu_topology(void)
 	update_cpu_masks();
 	if (!MACHINE_HAS_TOPOLOGY)
 		topology_update_polarization_simple();
+	if (cpu_management == 1)
+		hd_status = hd_enable_hiperdispatch();
 	mutex_unlock(&smp_cpu_state_mutex);
+	if (hd_status == 0)
+		hd_disable_hiperdispatch();
 	return rc;
 }
 
@@ -374,7 +386,24 @@ void topology_expect_change(void)
 	set_topology_timer();
 }
 
-static int cpu_management;
+static int set_polarization(int polarization)
+{
+	int rc = 0;
+
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	if (cpu_management == polarization)
+		goto out;
+	rc = topology_set_cpu_management(polarization);
+	if (rc)
+		goto out;
+	cpu_management = polarization;
+	topology_expect_change();
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return rc;
+}
 
 static ssize_t dispatching_show(struct device *dev,
 				struct device_attribute *attr,
@@ -400,19 +429,7 @@ static ssize_t dispatching_store(struct device *dev,
 		return -EINVAL;
 	if (val != 0 && val != 1)
 		return -EINVAL;
-	rc = 0;
-	cpus_read_lock();
-	mutex_lock(&smp_cpu_state_mutex);
-	if (cpu_management == val)
-		goto out;
-	rc = topology_set_cpu_management(val);
-	if (rc)
-		goto out;
-	cpu_management = val;
-	topology_expect_change();
-out:
-	mutex_unlock(&smp_cpu_state_mutex);
-	cpus_read_unlock();
+	rc = set_polarization(val);
 	return rc ? rc : count;
 }
 static DEVICE_ATTR_RW(dispatching);
@@ -624,12 +641,37 @@ static int topology_ctl_handler(const struct ctl_table *ctl, int write,
 	return rc;
 }
 
+static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
+				    void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int polarization;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &polarization,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	polarization = cpu_management;
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+	return set_polarization(polarization);
+}
+
 static struct ctl_table topology_ctl_table[] = {
 	{
 		.procname	= "topology",
 		.mode		= 0644,
 		.proc_handler	= topology_ctl_handler,
 	},
+	{
+		.procname	= "polarization",
+		.mode		= 0644,
+		.proc_handler	= polarization_ctl_handler,
+	},
 };
 
 static int __init topology_init(void)
@@ -642,6 +684,8 @@ static int __init topology_init(void)
 		set_topology_timer();
 	else
 		topology_update_polarization_simple();
+	if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL))
+		set_polarization(1);
 	register_sysctl("s390", topology_ctl_table);
 
 	dev_root = bus_get_dev_root(&cpu_subsys);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 36db065c7cf7..9646f773208a 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -14,6 +14,7 @@
 #include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/pagewalk.h>
 #include <asm/facility.h>
 #include <asm/sections.h>
 #include <asm/uv.h>
@@ -462,9 +463,9 @@ EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
 int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
 {
 	struct vm_area_struct *vma;
+	struct folio_walk fw;
 	unsigned long uaddr;
 	struct folio *folio;
-	struct page *page;
 	int rc;
 
 	rc = -EFAULT;
@@ -483,11 +484,15 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
 		goto out;
 
 	rc = 0;
-	/* we take an extra reference here */
-	page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
-	if (IS_ERR_OR_NULL(page))
+	folio = folio_walk_start(&fw, vma, uaddr, 0);
+	if (!folio)
 		goto out;
-	folio = page_folio(page);
+	/*
+	 * See gmap_make_secure(): large folios cannot be secure. Small
+	 * folio implies FW_LEVEL_PTE.
+	 */
+	if (folio_test_large(folio) || !pte_write(fw.pte))
+		goto out_walk_end;
 	rc = uv_destroy_folio(folio);
 	/*
 	 * Fault handlers can race; it is possible that two CPUs will fault
@@ -500,7 +505,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
 	 */
 	if (rc)
 		rc = uv_convert_from_secure_folio(folio);
-	folio_put(folio);
+out_walk_end:
+	folio_walk_end(&fw, vma);
 out:
 	mmap_read_unlock(gmap->mm);
 	return rc;
@@ -548,11 +554,6 @@ int arch_make_folio_accessible(struct folio *folio)
 }
 EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
 
-int arch_make_page_accessible(struct page *page)
-{
-	return arch_make_folio_accessible(page_folio(page));
-}
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
 static ssize_t uv_query_facilities(struct kobject *kobj,
 				   struct kobj_attribute *attr, char *buf)
 {
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 2f967ac2b8e3..598b512cde01 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -12,12 +12,15 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/time_namespace.h>
 #include <linux/random.h>
 #include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/alternative.h>
 #include <asm/vdso.h>
 
 extern char vdso64_start[], vdso64_end[];
@@ -29,12 +32,6 @@ static union vdso_data_store vdso_data_store __page_aligned_data;
 
 struct vdso_data *vdso_data = vdso_data_store.data;
 
-enum vvar_pages {
-	VVAR_DATA_PAGE_OFFSET,
-	VVAR_TIMENS_PAGE_OFFSET,
-	VVAR_NR_PAGES,
-};
-
 #ifdef CONFIG_TIME_NS
 struct vdso_data *arch_get_vdso_data(void *vvar_page)
 {
@@ -250,8 +247,25 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
 	return pagelist;
 }
 
+static void vdso_apply_alternatives(void)
+{
+	const struct elf64_shdr *alt, *shdr;
+	struct alt_instr *start, *end;
+	const struct elf64_hdr *hdr;
+
+	hdr = (struct elf64_hdr *)vdso64_start;
+	shdr = (void *)hdr + hdr->e_shoff;
+	alt = find_section(hdr, shdr, ".altinstructions");
+	if (!alt)
+		return;
+	start = (void *)hdr + alt->sh_offset;
+	end = (void *)hdr + alt->sh_offset + alt->sh_size;
+	apply_alternatives(start, end);
+}
+
 static int __init vdso_init(void)
 {
+	vdso_apply_alternatives();
 	vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
 	if (IS_ENABLED(CONFIG_COMPAT))
 		vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index ba19c0ca7c87..37bb4b761229 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -3,12 +3,17 @@
 
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
-obj-vdso64 = vdso_user_wrapper.o note.o
-obj-cvdso64 = vdso64_generic.o getcpu.o
+obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
+obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
 VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
 CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
 CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
 
+ifneq ($(c-getrandom-y),)
+	CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
 # Build rules
 
 targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h
index 34c7a2312f9d..9e5397e7b590 100644
--- a/arch/s390/kernel/vdso64/vdso.h
+++ b/arch/s390/kernel/vdso64/vdso.h
@@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse
 int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
 int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
 int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
 
 #endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 37e2a505e81d..753040a4b5ab 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -4,6 +4,7 @@
  * library
  */
 
+#include <asm/vdso/vsyscall.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
 
@@ -13,6 +14,7 @@ OUTPUT_ARCH(s390:64-bit)
 SECTIONS
 {
 	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+	PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
 #ifdef CONFIG_TIME_NS
 	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
 #endif
@@ -42,6 +44,10 @@ SECTIONS
 	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
 	.rodata1	: { *(.rodata1) }
 
+	. = ALIGN(8);
+	.altinstructions	: { *(.altinstructions) }
+	.altinstr_replacement	: { *(.altinstr_replacement) }
+
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
 	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
@@ -140,6 +146,7 @@ VERSION
 		__kernel_restart_syscall;
 		__kernel_rt_sigreturn;
 		__kernel_sigreturn;
+		__kernel_getrandom;
 	local: *;
 	};
 }
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index e26e68675c08..aa06c85bcbd3 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -13,10 +13,7 @@
  * for details.
  */
 .macro vdso_func func
-	.globl __kernel_\func
-	.type  __kernel_\func,@function
-	__ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
 	CFI_STARTPROC
 	aghi	%r15,-STACK_FRAME_VDSO_OVERHEAD
 	CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
@@ -32,7 +29,7 @@ __kernel_\func:
 	CFI_RESTORE 15
 	br	%r14
 	CFI_ENDPROC
-	.size	__kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
 .endm
 
 vdso_func gettimeofday
@@ -41,16 +38,13 @@ vdso_func clock_gettime
 vdso_func getcpu
 
 .macro vdso_syscall func,syscall
-	.globl __kernel_\func
-	.type  __kernel_\func,@function
-	__ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
 	CFI_STARTPROC
 	svc	\syscall
 	/* Make sure we notice when a syscall returns, which shouldn't happen */
 	.word	0
 	CFI_ENDPROC
-	.size	__kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
 .endm
 
 vdso_syscall restart_syscall,__NR_restart_syscall
diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
new file mode 100644
index 000000000000..09c034c2f853
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/dwarf.h>
+#include <asm/fpu-insn.h>
+
+#define STATE0	%v0
+#define STATE1	%v1
+#define STATE2	%v2
+#define STATE3	%v3
+#define COPY0	%v4
+#define COPY1	%v5
+#define COPY2	%v6
+#define COPY3	%v7
+#define BEPERM	%v19
+#define TMP0	%v20
+#define TMP1	%v21
+#define TMP2	%v22
+#define TMP3	%v23
+
+	.section .rodata
+
+	.balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
+	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
+
+	.text
+/*
+ * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ *				       const uint8_t *key,
+ *				       uint32_t *counter,
+ *				       size_t nblocks)
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+	CFI_STARTPROC
+	larl	%r1,chacha20_constants
+
+	/* COPY0 = "expand 32-byte k" */
+	VL	COPY0,0,,%r1
+
+	/* BEPERM = byte selectors for VPERM */
+	ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
+
+	/* COPY1,COPY2 = key */
+	VLM	COPY1,COPY2,0,%r3
+
+	/* COPY3 = counter || zero nonce  */
+	lg	%r3,0(%r4)
+	VZERO	COPY3
+	VLVGG	COPY3,%r3,0
+
+	lghi	%r1,0
+.Lblock:
+	VLR	STATE0,COPY0
+	VLR	STATE1,COPY1
+	VLR	STATE2,COPY2
+	VLR	STATE3,COPY3
+
+	lghi	%r0,10
+.Ldoubleround:
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,16
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,12
+
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,8
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,7
+
+	/* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
+	VSLDB	STATE1,STATE1,STATE1,4
+	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+	VSLDB	STATE2,STATE2,STATE2,8
+	/* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
+	VSLDB	STATE3,STATE3,STATE3,12
+
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,16
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,12
+
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,8
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,7
+
+	/* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
+	VSLDB	STATE1,STATE1,STATE1,12
+	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+	VSLDB	STATE2,STATE2,STATE2,8
+	/* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
+	VSLDB	STATE3,STATE3,STATE3,4
+	brctg	%r0,.Ldoubleround
+
+	/* OUTPUT0 = STATE0 + COPY0 */
+	VAF	STATE0,STATE0,COPY0
+	/* OUTPUT1 = STATE1 + COPY1 */
+	VAF	STATE1,STATE1,COPY1
+	/* OUTPUT2 = STATE2 + COPY2 */
+	VAF	STATE2,STATE2,COPY2
+	/* OUTPUT3 = STATE3 + COPY3 */
+	VAF	STATE3,STATE3,COPY3
+
+	ALTERNATIVE							\
+		__stringify(						\
+		/* Convert STATE to little endian and store to OUTPUT */\
+		VPERM	TMP0,STATE0,STATE0,BEPERM;			\
+		VPERM	TMP1,STATE1,STATE1,BEPERM;			\
+		VPERM	TMP2,STATE2,STATE2,BEPERM;			\
+		VPERM	TMP3,STATE3,STATE3,BEPERM;			\
+		VSTM	TMP0,TMP3,0,%r2),				\
+		__stringify(						\
+		/* 32 bit wise little endian store to OUTPUT */		\
+		VSTBRF	STATE0,0,,%r2;					\
+		VSTBRF	STATE1,16,,%r2;					\
+		VSTBRF	STATE2,32,,%r2;					\
+		VSTBRF	STATE3,48,,%r2;					\
+		brcl	0,0),						\
+		ALT_FACILITY(148)
+
+	/* ++COPY3.COUNTER */
+	/* alsih %r3,1 */
+	.insn	rilu,0xcc0a00000000,%r3,1
+	alcr	%r3,%r1
+	VLVGG	COPY3,%r3,0
+
+	/* OUTPUT += 64, --NBLOCKS */
+	aghi	%r2,64
+	brctg	%r5,.Lblock
+
+	/* COUNTER = COPY3.COUNTER */
+	stg	%r3,0(%r4)
+
+	/* Zero out potentially sensitive regs */
+	VZERO	STATE0
+	VZERO	STATE1
+	VZERO	STATE2
+	VZERO	STATE3
+	VZERO	COPY1
+	VZERO	COPY2
+
+	/* Early exit if TMP0-TMP3 have not been used */
+	ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
+
+	VZERO	TMP0
+	VZERO	TMP1
+	VZERO	TMP2
+	VZERO	TMP3
+
+	br	%r14
+	CFI_ENDPROC
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso64/vgetrandom.c
new file mode 100644
index 000000000000..b5268b507fb5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/facility.h>
+#include <uapi/asm-generic/errno.h>
+#include "vdso.h"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+	if (test_facility(129))
+		return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+	if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+		return -ENOSYS;
+	return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index ae5d0a9d6911..377b9aaf8c92 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -191,8 +191,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	INIT_DATA_SECTION(0x100)
 
-	RUNTIME_CONST(shift, d_hash_shift)
-	RUNTIME_CONST(ptr, dentry_hashtable)
+	RUNTIME_CONST_VARIABLES
 
 	PERCPU_SECTION(0x100)
 
diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c
new file mode 100644
index 000000000000..949fdbf0e8b6
--- /dev/null
+++ b/arch/s390/kernel/wti.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for warning track interruption
+ *
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/smpboot.h>
+#include <linux/irq.h>
+#include <uapi/linux/sched/types.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+
+#define WTI_DBF_LEN 64
+
+struct wti_debug {
+	unsigned long	missed;
+	unsigned long	addr;
+	pid_t		pid;
+};
+
+struct wti_state {
+	/* debug data for s390dbf */
+	struct wti_debug	dbg;
+	/*
+	 * Represents the real-time thread responsible to
+	 * acknowledge the warning-track interrupt and trigger
+	 * preliminary and postliminary precautions.
+	 */
+	struct task_struct	*thread;
+	/*
+	 * If pending is true, the real-time thread must be scheduled.
+	 * If not, a wake up of that thread will remain a noop.
+	 */
+	bool			pending;
+};
+
+static DEFINE_PER_CPU(struct wti_state, wti_state);
+
+static debug_info_t *wti_dbg;
+
+/*
+ * During a warning-track grace period, interrupts are disabled
+ * to prevent delays of the warning-track acknowledgment.
+ *
+ * Once the CPU is physically dispatched again, interrupts are
+ * re-enabled.
+ */
+
+static void wti_irq_disable(void)
+{
+	unsigned long flags;
+	struct ctlreg cr6;
+
+	local_irq_save(flags);
+	local_ctl_store(6, &cr6);
+	/* disable all I/O interrupts */
+	cr6.val &= ~0xff000000UL;
+	local_ctl_load(6, &cr6);
+	local_irq_restore(flags);
+}
+
+static void wti_irq_enable(void)
+{
+	unsigned long flags;
+	struct ctlreg cr6;
+
+	local_irq_save(flags);
+	local_ctl_store(6, &cr6);
+	/* enable all I/O interrupts */
+	cr6.val |= 0xff000000UL;
+	local_ctl_load(6, &cr6);
+	local_irq_restore(flags);
+}
+
+static void store_debug_data(struct wti_state *st)
+{
+	struct pt_regs *regs = get_irq_regs();
+
+	st->dbg.pid = current->pid;
+	st->dbg.addr = 0;
+	if (!user_mode(regs))
+		st->dbg.addr = regs->psw.addr;
+}
+
+static void wti_interrupt(struct ext_code ext_code,
+			  unsigned int param32, unsigned long param64)
+{
+	struct wti_state *st = this_cpu_ptr(&wti_state);
+
+	inc_irq_stat(IRQEXT_WTI);
+	wti_irq_disable();
+	store_debug_data(st);
+	st->pending = true;
+	wake_up_process(st->thread);
+}
+
+static int wti_pending(unsigned int cpu)
+{
+	struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+	return st->pending;
+}
+
+static void wti_dbf_grace_period(struct wti_state *st)
+{
+	struct wti_debug *wdi = &st->dbg;
+	char buf[WTI_DBF_LEN];
+
+	if (wdi->addr)
+		snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr);
+	else
+		snprintf(buf, sizeof(buf), "%d <user>", wdi->pid);
+	debug_text_event(wti_dbg, 2, buf);
+	wdi->missed++;
+}
+
+static int wti_show(struct seq_file *seq, void *v)
+{
+	struct wti_state *st;
+	int cpu;
+
+	cpus_read_lock();
+	seq_puts(seq, "       ");
+	for_each_online_cpu(cpu)
+		seq_printf(seq, "CPU%-8d", cpu);
+	seq_putc(seq, '\n');
+	for_each_online_cpu(cpu) {
+		st = per_cpu_ptr(&wti_state, cpu);
+		seq_printf(seq, " %10lu", st->dbg.missed);
+	}
+	seq_putc(seq, '\n');
+	cpus_read_unlock();
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wti);
+
+static void wti_thread_fn(unsigned int cpu)
+{
+	struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+	st->pending = false;
+	/*
+	 * Yield CPU voluntarily to the hypervisor. Control
+	 * resumes when hypervisor decides to dispatch CPU
+	 * to this LPAR again.
+	 */
+	if (diag49c(DIAG49C_SUBC_ACK))
+		wti_dbf_grace_period(st);
+	wti_irq_enable();
+}
+
+static struct smp_hotplug_thread wti_threads = {
+	.store			= &wti_state.thread,
+	.thread_should_run	= wti_pending,
+	.thread_fn		= wti_thread_fn,
+	.thread_comm		= "cpuwti/%u",
+	.selfparking		= false,
+};
+
+static int __init wti_init(void)
+{
+	struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 };
+	struct dentry *wti_dir;
+	struct wti_state *st;
+	int cpu, rc;
+
+	rc = -EOPNOTSUPP;
+	if (!sclp.has_wti)
+		goto out;
+	rc = smpboot_register_percpu_thread(&wti_threads);
+	if (WARN_ON(rc))
+		goto out;
+	for_each_online_cpu(cpu) {
+		st = per_cpu_ptr(&wti_state, cpu);
+		sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param);
+	}
+	rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+	if (rc) {
+		pr_warn("Couldn't request external interrupt 0x1007\n");
+		goto out_thread;
+	}
+	irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK);
+	rc = diag49c(DIAG49C_SUBC_REG);
+	if (rc) {
+		pr_warn("Failed to register warning track interrupt through DIAG 49C\n");
+		rc = -EOPNOTSUPP;
+		goto out_subclass;
+	}
+	wti_dir = debugfs_create_dir("wti", arch_debugfs_dir);
+	debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops);
+	wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN);
+	if (!wti_dbg) {
+		rc = -ENOMEM;
+		goto out_debug_register;
+	}
+	rc = debug_register_view(wti_dbg, &debug_hex_ascii_view);
+	if (rc)
+		goto out_debug_register;
+	goto out;
+out_debug_register:
+	debug_unregister(wti_dbg);
+out_subclass:
+	irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK);
+	unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+out_thread:
+	smpboot_unregister_percpu_thread(&wti_threads);
+out:
+	return rc;
+}
+late_initcall(wti_init);