16 files changed, 458 insertions, 384 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index f6c2db7a8669..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -9,6 +9,8 @@ obj-y		+= page-states.o pageattr.o pgtable.o pgalloc.o extable.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
+obj-$(CONFIG_PTDUMP)		+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
 obj-$(CONFIG_PFAULT)		+= pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM))	+= gmap_helpers.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 39f44b6256e0..e2a6eb92420f 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -201,7 +201,7 @@ static void cmm_set_timer(void)
 {
 	if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
 		if (timer_pending(&cmm_timer))
-			del_timer(&cmm_timer);
+			timer_delete(&cmm_timer);
 		return;
 	}
 	mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds));
@@ -424,7 +424,7 @@ out_smsg:
 #endif
 	unregister_sysctl_table(cmm_sysctl_header);
 out_sysctl:
-	del_timer_sync(&cmm_timer);
+	timer_delete_sync(&cmm_timer);
 	return rc;
 }
 module_init(cmm_init);
@@ -437,7 +437,7 @@ static void __exit cmm_exit(void)
 #endif
 	unregister_oom_notifier(&cmm_oom_nb);
 	kthread_stop(cmm_thread_ptr);
-	del_timer_sync(&cmm_timer);
+	timer_delete_sync(&cmm_timer);
 	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
 	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
 }
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index fa54f3bc0c8d..ac604b176660 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -1,4 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cpufeature.h>
 #include <linux/set_memory.h>
 #include <linux/ptdump.h>
 #include <linux/seq_file.h>
@@ -82,7 +84,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 	 * in which case we have two lpswe instructions in lowcore that need
 	 * to be executable.
 	 */
-	if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
+	if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear()))
 		return;
 	WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
 		  "s390/mm: Found insecure W+X mapping at address %pS\n",
@@ -145,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 	}
 }
 
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+	note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+	note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+	note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+	note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+	note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+	pte_t pte_zero = {0};
+
+	note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
 bool ptdump_check_wx(void)
 {
 	struct pg_state st = {
 		.ptdump = {
-			.note_page = note_page,
+			.note_page_pte = note_page_pte,
+			.note_page_pmd = note_page_pmd,
+			.note_page_pud = note_page_pud,
+			.note_page_p4d = note_page_p4d,
+			.note_page_pgd = note_page_pgd,
+			.note_page_flush = note_page_flush,
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
@@ -167,7 +206,7 @@ bool ptdump_check_wx(void)
 		},
 	};
 
-	if (!MACHINE_HAS_NX)
+	if (!cpu_has_nx())
 		return true;
 	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 	if (st.wx_pages) {
@@ -176,7 +215,7 @@ bool ptdump_check_wx(void)
 		return false;
 	} else {
 		pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
-			(nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
+			(nospec_uses_trampoline() || !cpu_has_bear()) ?
 			"unexpected " : "");
 
 		return true;
@@ -188,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v)
 {
 	struct pg_state st = {
 		.ptdump = {
-			.note_page = note_page,
+			.note_page_pte = note_page_pte,
+			.note_page_pmd = note_page_pmd,
+			.note_page_pud = note_page_pud,
+			.note_page_p4d = note_page_p4d,
+			.note_page_pgd = note_page_pgd,
+			.note_page_flush = note_page_flush,
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index a046be1715cf..7498e858c401 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -73,6 +73,49 @@ static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_reg
 	return true;
 }
 
+struct insn_ssf {
+	u64	opc1 : 8;
+	u64	r3   : 4;
+	u64	opc2 : 4;
+	u64	b1   : 4;
+	u64	d1   : 12;
+	u64	b2   : 4;
+	u64	d2   : 12;
+} __packed;
+
+static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex,
+				bool from, struct pt_regs *regs)
+{
+	unsigned long uaddr, remainder;
+	struct insn_ssf *insn;
+
+	/*
+	 * If the faulting user space access crossed a page boundary retry by
+	 * limiting the access to the first page (adjust length accordingly).
+	 * Then the mvcos instruction will either complete with condition code
+	 * zero, or generate another fault where the user space access did not
+	 * cross a page boundary.
+	 * If the faulting user space access did not cross a page boundary set
+	 * length to zero and retry. In this case no user space access will
+	 * happen, and the mvcos instruction will complete with condition code
+	 * zero.
+	 * In both cases the instruction will complete with condition code
+	 * zero (copying finished), and the register which contains the
+	 * length, indicates the number of bytes copied.
+	 */
+	regs->psw.addr = extable_fixup(ex);
+	insn = (struct insn_ssf *)regs->psw.addr;
+	if (from)
+		uaddr = regs->gprs[insn->b2] + insn->d2;
+	else
+		uaddr = regs->gprs[insn->b1] + insn->d1;
+	remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1));
+	if (regs->gprs[insn->r3] <= remainder)
+		remainder = 0;
+	regs->gprs[insn->r3] = remainder;
+	return true;
+}
+
 bool fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *ex;
@@ -95,6 +138,10 @@ bool fixup_exception(struct pt_regs *regs)
 		return ex_handler_zeropad(ex, regs);
 	case EX_TYPE_FPC:
 		return ex_handler_fpc(ex, regs);
+	case EX_TYPE_UA_MVCOS_TO:
+		return ex_handler_ua_mvcos(ex, false, regs);
+	case EX_TYPE_UA_MVCOS_FROM:
+		return ex_handler_ua_mvcos(ex, true, regs);
 	}
 	panic("invalid exception table entry");
 }
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 4692136c0af1..f7da53e212f5 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -21,6 +21,7 @@
 #include <linux/ioport.h>
 #include <linux/refcount.h>
 #include <linux/pgtable.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
 #include <asm/page.h>
 #include <asm/ebcdic.h>
@@ -255,7 +256,7 @@ segment_type (char* name)
 	int rc;
 	struct dcss_segment seg;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return -ENOSYS;
 
 	dcss_mkname(name, seg.dcss_name);
@@ -418,7 +419,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr,
 	struct dcss_segment *seg;
 	int rc;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return -ENOSYS;
 
 	mutex_lock(&dcss_lock);
@@ -529,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared)
 	return rc;
 }
 
+static void __dcss_diag_purge_on_cpu_0(void *data)
+{
+	struct dcss_segment *seg = (struct dcss_segment *)data;
+	unsigned long dummy;
+
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+}
+
 /*
  * Decrease the use count of a DCSS segment and remove
  * it from the address space if nobody is using it
@@ -537,10 +546,9 @@ segment_modify_shared (char *name, int do_nonshared)
 void
 segment_unload(char *name)
 {
-	unsigned long dummy;
 	struct dcss_segment *seg;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return;
 
 	mutex_lock(&dcss_lock);
@@ -555,7 +563,14 @@ segment_unload(char *name)
 	kfree(seg->res);
 	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 	list_del(&seg->list);
-	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	/*
+	 * Workaround for z/VM issue, where calling the DCSS unload diag on
+	 * a non-IPL CPU would cause bogus sclp maximum memory detection on
+	 * next IPL.
+	 * IPL CPU 0 cannot be set offline, so the dcss_diag() call can
+	 * directly be scheduled to that CPU.
+	 */
+	smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1);
 	kfree(seg);
 out_unlock:
 	mutex_unlock(&dcss_lock);
@@ -572,7 +587,7 @@ segment_save(char *name)
 	char cmd2[80];
 	int i, response;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return;
 
 	mutex_lock(&dcss_lock);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9b681f74dccc..e1ad05bfd28a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -11,11 +11,11 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/mmu_context.h>
+#include <linux/cpufeature.h>
 #include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
-#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
@@ -40,22 +40,11 @@
 #include <asm/ptrace.h>
 #include <asm/fault.h>
 #include <asm/diag.h>
-#include <asm/gmap.h>
 #include <asm/irq.h>
 #include <asm/facility.h>
 #include <asm/uv.h>
 #include "../kernel/entry.h"
 
-static DEFINE_STATIC_KEY_FALSE(have_store_indication);
-
-static int __init fault_init(void)
-{
-	if (test_facility(75))
-		static_branch_enable(&have_store_indication);
-	return 0;
-}
-early_initcall(fault_init);
-
 /*
  * Find out which address space caused the exception.
  */
@@ -81,7 +70,7 @@ static __always_inline bool fault_is_write(struct pt_regs *regs)
 {
 	union teid teid = { .val = regs->int_parm_long };
 
-	if (static_branch_likely(&have_store_indication))
+	if (test_facility(75))
 		return teid.fsi == TEID_FSI_STORE;
 	return false;
 }
@@ -175,6 +164,23 @@ static void dump_fault_info(struct pt_regs *regs)
 
 int show_unhandled_signals = 1;
 
+static const struct ctl_table s390_fault_sysctl_table[] = {
+	{
+		.procname	= "userprocess_debug",
+		.data		= &show_unhandled_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+};
+
+static int __init init_s390_fault_sysctls(void)
+{
+	register_sysctl_init("kernel", s390_fault_sysctl_table);
+	return 0;
+}
+arch_initcall(init_s390_fault_sysctls);
+
 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
 {
 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
@@ -369,6 +375,7 @@ void do_protection_exception(struct pt_regs *regs)
 	if (unlikely(!teid.b61)) {
 		if (user_mode(regs)) {
 			/* Low-address protection in user mode: cannot happen */
+			dump_fault_info(regs);
 			die(regs, "Low-address protection");
 		}
 		/*
@@ -377,7 +384,7 @@ void do_protection_exception(struct pt_regs *regs)
 		 */
 		return handle_fault_error_nolock(regs, 0);
 	}
-	if (unlikely(MACHINE_HAS_NX && teid.b56)) {
+	if (unlikely(cpu_has_nx() && teid.b56)) {
 		regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK);
 		return handle_fault_error_nolock(regs, SEGV_ACCERR);
 	}
@@ -434,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs)
 		if (rc)
 			BUG();
 	} else {
+		if (faulthandler_disabled())
+			return handle_fault_error_nolock(regs, 0);
 		mm = current->mm;
 		mmap_read_lock(mm);
 		vma = find_vma(mm, addr);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 94d927785800..012a4366a2ad 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -8,6 +8,7 @@
  *		 Janosch Frank <frankja@linux.vnet.ibm.com>
  */
 
+#include <linux/cpufeature.h>
 #include <linux/kernel.h>
 #include <linux/pagewalk.h>
 #include <linux/swap.h>
@@ -20,9 +21,10 @@
 #include <linux/pgtable.h>
 #include <asm/page-states.h>
 #include <asm/pgalloc.h>
+#include <asm/machine.h>
+#include <asm/gmap_helpers.h>
 #include <asm/gmap.h>
 #include <asm/page.h>
-#include <asm/tlb.h>
 
 /*
  * The address is saved in a radix tree directly; NULL would be ambiguous,
@@ -135,7 +137,7 @@ EXPORT_SYMBOL_GPL(gmap_create);
 
 static void gmap_flush_tlb(struct gmap *gmap)
 {
-	if (MACHINE_HAS_IDTE)
+	if (cpu_has_idte())
 		__tlb_flush_idte(gmap->asce);
 	else
 		__tlb_flush_global();
@@ -618,63 +620,20 @@ EXPORT_SYMBOL(__gmap_link);
  */
 void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
-	struct vm_area_struct *vma;
 	unsigned long vmaddr;
-	spinlock_t *ptl;
-	pte_t *ptep;
+
+	mmap_assert_locked(gmap->mm);
 
 	/* Find the vm address for the guest address */
 	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
 						   gaddr >> PMD_SHIFT);
 	if (vmaddr) {
 		vmaddr |= gaddr & ~PMD_MASK;
-
-		vma = vma_lookup(gmap->mm, vmaddr);
-		if (!vma || is_vm_hugetlb_page(vma))
-			return;
-
-		/* Get pointer to the page table entry */
-		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
-		if (likely(ptep)) {
-			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
-			pte_unmap_unlock(ptep, ptl);
-		}
+		gmap_helper_zap_one_page(gmap->mm, vmaddr);
 	}
 }
 EXPORT_SYMBOL_GPL(__gmap_zap);
 
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
-{
-	unsigned long gaddr, vmaddr, size;
-	struct vm_area_struct *vma;
-
-	mmap_read_lock(gmap->mm);
-	for (gaddr = from; gaddr < to;
-	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
-		/* Find the vm address for the guest address */
-		vmaddr = (unsigned long)
-			radix_tree_lookup(&gmap->guest_to_host,
-					  gaddr >> PMD_SHIFT);
-		if (!vmaddr)
-			continue;
-		vmaddr |= gaddr & ~PMD_MASK;
-		/* Find vma in the parent mm */
-		vma = find_vma(gmap->mm, vmaddr);
-		if (!vma)
-			continue;
-		/*
-		 * We do not discard pages that are backed by
-		 * hugetlbfs, so we don't have to refault them.
-		 */
-		if (is_vm_hugetlb_page(vma))
-			continue;
-		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range_single(vma, vmaddr, size, NULL);
-	}
-	mmap_read_unlock(gmap->mm);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
 static LIST_HEAD(gmap_notifier_list);
 static DEFINE_SPINLOCK(gmap_notifier_lock);
 
@@ -2025,10 +1984,10 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
 	gaddr &= HPAGE_MASK;
 	pmdp_notify_gmap(gmap, pmdp, gaddr);
 	new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
 			    IDTE_GLOBAL);
-	else if (MACHINE_HAS_IDTE)
+	else if (cpu_has_idte())
 		__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
 	else
 		__pmdp_csp(pmdp);
@@ -2103,10 +2062,10 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
 			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
 						   _SEGMENT_ENTRY_GMAP_UC |
 						   _SEGMENT_ENTRY));
-			if (MACHINE_HAS_TLB_GUEST)
+			if (machine_has_tlb_guest())
 				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
 					    gmap->asce, IDTE_LOCAL);
-			else if (MACHINE_HAS_IDTE)
+			else if (cpu_has_idte())
 				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
 			*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
 		}
@@ -2136,10 +2095,10 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
 			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
 						   _SEGMENT_ENTRY_GMAP_UC |
 						   _SEGMENT_ENTRY));
-			if (MACHINE_HAS_TLB_GUEST)
+			if (machine_has_tlb_guest())
 				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
 					    gmap->asce, IDTE_GLOBAL);
-			else if (MACHINE_HAS_IDTE)
+			else if (cpu_has_idte())
 				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
 			else
 				__pmdp_csp(pmdp);
@@ -2258,9 +2217,6 @@ int s390_enable_sie(void)
 	/* Do we have pgstes? if yes, we are done */
 	if (mm_has_pgste(mm))
 		return 0;
-	/* Fail if the page tables are 2K */
-	if (!mm_alloc_pgste(mm))
-		return -EINVAL;
 	mmap_write_lock(mm);
 	mm->context.has_pgste = 1;
 	/* split thp mappings and disable thp for future mappings */
@@ -2270,138 +2226,6 @@ int s390_enable_sie(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
-static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
-				   unsigned long end, struct mm_walk *walk)
-{
-	unsigned long *found_addr = walk->private;
-
-	/* Return 1 of the page is a zeropage. */
-	if (is_zero_pfn(pte_pfn(*pte))) {
-		/*
-		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
-		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
-		 * currently only works in COW mappings, which is also where
-		 * mm_forbids_zeropage() is checked.
-		 */
-		if (!is_cow_mapping(walk->vma->vm_flags))
-			return -EFAULT;
-
-		*found_addr = addr;
-		return 1;
-	}
-	return 0;
-}
-
-static const struct mm_walk_ops find_zeropage_ops = {
-	.pte_entry	= find_zeropage_pte_entry,
-	.walk_lock	= PGWALK_WRLOCK,
-};
-
-/*
- * Unshare all shared zeropages, replacing them by anonymous pages. Note that
- * we cannot simply zap all shared zeropages, because this could later
- * trigger unexpected userfaultfd missing events.
- *
- * This must be called after mm->context.allow_cow_sharing was
- * set to 0, to avoid future mappings of shared zeropages.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * and racing with walk_page_range_vma() calling pte_offset_map_lock()
- * would fail, it will never insert a page table containing empty zero
- * pages once mm_forbids_zeropage(mm) i.e.
- * mm->context.allow_cow_sharing is set to 0.
- */
-static int __s390_unshare_zeropages(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma;
-	VMA_ITERATOR(vmi, mm, 0);
-	unsigned long addr;
-	vm_fault_t fault;
-	int rc;
-
-	for_each_vma(vmi, vma) {
-		/*
-		 * We could only look at COW mappings, but it's more future
-		 * proof to catch unexpected zeropages in other mappings and
-		 * fail.
-		 */
-		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
-			continue;
-		addr = vma->vm_start;
-
-retry:
-		rc = walk_page_range_vma(vma, addr, vma->vm_end,
-					 &find_zeropage_ops, &addr);
-		if (rc < 0)
-			return rc;
-		else if (!rc)
-			continue;
-
-		/* addr was updated by find_zeropage_pte_entry() */
-		fault = handle_mm_fault(vma, addr,
-					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
-					NULL);
-		if (fault & VM_FAULT_OOM)
-			return -ENOMEM;
-		/*
-		 * See break_ksm(): even after handle_mm_fault() returned 0, we
-		 * must start the lookup from the current address, because
-		 * handle_mm_fault() may back out if there's any difficulty.
-		 *
-		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
-		 * maybe they could trigger in the future on concurrent
-		 * truncation. In that case, the shared zeropage would be gone
-		 * and we can simply retry and make progress.
-		 */
-		cond_resched();
-		goto retry;
-	}
-
-	return 0;
-}
-
-static int __s390_disable_cow_sharing(struct mm_struct *mm)
-{
-	int rc;
-
-	if (!mm->context.allow_cow_sharing)
-		return 0;
-
-	mm->context.allow_cow_sharing = 0;
-
-	/* Replace all shared zeropages by anonymous pages. */
-	rc = __s390_unshare_zeropages(mm);
-	/*
-	 * Make sure to disable KSM (if enabled for the whole process or
-	 * individual VMAs). Note that nothing currently hinders user space
-	 * from re-enabling it.
-	 */
-	if (!rc)
-		rc = ksm_disable(mm);
-	if (rc)
-		mm->context.allow_cow_sharing = 1;
-	return rc;
-}
-
-/*
- * Disable most COW-sharing of memory pages for the whole process:
- * (1) Disable KSM and unmerge/unshare any KSM pages.
- * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
- *
- * Not that we currently don't bother with COW-shared pages that are shared
- * with parent/child processes due to fork().
- */
-int s390_disable_cow_sharing(void)
-{
-	int rc;
-
-	mmap_write_lock(current->mm);
-	rc = __s390_disable_cow_sharing(current->mm);
-	mmap_write_unlock(current->mm);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
-
 /*
  * Enable storage key handling from now on and initialize the storage
  * keys with the default key.
@@ -2469,7 +2293,7 @@ int s390_enable_skey(void)
 		goto out_up;
 
 	mm->context.uses_skeys = 1;
-	rc = __s390_disable_cow_sharing(mm);
+	rc = gmap_helper_disable_cow_sharing();
 	if (rc) {
 		mm->context.uses_skeys = 0;
 		goto out_up;
@@ -2626,31 +2450,3 @@ int s390_replace_asce(struct gmap *gmap)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(s390_replace_asce);
-
-/**
- * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split
- * @mm:    the mm containing the folio to work on
- * @folio: the folio
- * @split: whether to split a large folio
- *
- * Context: Must be called while holding an extra reference to the folio;
- *          the mm lock should not be held.
- */
-int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
-{
-	int rc;
-
-	lockdep_assert_not_held(&mm->mmap_lock);
-	folio_wait_writeback(folio);
-	lru_add_drain_all();
-	if (split) {
-		folio_lock(folio);
-		rc = split_folio(folio);
-		folio_unlock(folio);
-
-		if (rc != -EBUSY)
-			return rc;
-	}
-	return -EAGAIN;
-}
-EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio);
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..a45d417ad951
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2025
+ */
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+/**
+ * ptep_zap_swap_entry() - discard a swap entry.
+ * @mm: the mm
+ * @entry: the swap entry that needs to be zapped
+ *
+ * Discards the given swap entry. If the swap entry was an actual swap
+ * entry (and not a migration entry, for example), the actual swapped
+ * page is also discarded from swap.
+ */
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+	if (!non_swap_entry(entry))
+		dec_mm_counter(mm, MM_SWAPENTS);
+	else if (is_migration_entry(entry))
+		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+	free_swap_and_cache(entry);
+}
+
+/**
+ * gmap_helper_zap_one_page() - discard a page if it was swapped.
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address that needs to be discarded
+ *
+ * If the given address maps to a swap entry, discard it.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+{
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	mmap_assert_locked(mm);
+
+	/* Find the vm address for the guest address */
+	vma = vma_lookup(mm, vmaddr);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return;
+
+	/* Get pointer to the page table entry */
+	ptep = get_locked_pte(mm, vmaddr, &ptl);
+	if (unlikely(!ptep))
+		return;
+	if (pte_swap(*ptep))
+		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
+
+/**
+ * gmap_helper_discard() - discard user pages in the given range
+ * @mm: the mm
+ * @vmaddr: starting userspace address
+ * @end: end address (first address outside the range)
+ *
+ * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+	struct vm_area_struct *vma;
+
+	mmap_assert_locked(mm);
+
+	while (vmaddr < end) {
+		vma = find_vma_intersection(mm, vmaddr, end);
+		if (!vma)
+			return;
+		if (!is_vm_hugetlb_page(vma))
+			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+		vmaddr = vma->vm_end;
+	}
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+				   unsigned long end, struct mm_walk *walk)
+{
+	unsigned long *found_addr = walk->private;
+
+	/* Return 1 of the page is a zeropage. */
+	if (is_zero_pfn(pte_pfn(*pte))) {
+		/*
+		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
+		 * currently only works in COW mappings, which is also where
+		 * mm_forbids_zeropage() is checked.
+		 */
+		if (!is_cow_mapping(walk->vma->vm_flags))
+			return -EFAULT;
+
+		*found_addr = addr;
+		return 1;
+	}
+	return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+	.pte_entry      = find_zeropage_pte_entry,
+	.walk_lock      = PGWALK_WRLOCK,
+};
+
+/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
+ * @mm: the mm whose zero pages are to be unshared
+ *
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
+	unsigned long addr;
+	vm_fault_t fault;
+	int rc;
+
+	for_each_vma(vmi, vma) {
+		/*
+		 * We could only look at COW mappings, but it's more future
+		 * proof to catch unexpected zeropages in other mappings and
+		 * fail.
+		 */
+		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+			continue;
+		addr = vma->vm_start;
+
+retry:
+		rc = walk_page_range_vma(vma, addr, vma->vm_end,
+					 &find_zeropage_ops, &addr);
+		if (rc < 0)
+			return rc;
+		else if (!rc)
+			continue;
+
+		/* addr was updated by find_zeropage_pte_entry() */
+		fault = handle_mm_fault(vma, addr,
+					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+					NULL);
+		if (fault & VM_FAULT_OOM)
+			return -ENOMEM;
+		/*
+		 * See break_ksm(): even after handle_mm_fault() returned 0, we
+		 * must start the lookup from the current address, because
+		 * handle_mm_fault() may back out if there's any difficulty.
+		 *
+		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+		 * maybe they could trigger in the future on concurrent
+		 * truncation. In that case, the shared zeropage would be gone
+		 * and we can simply retry and make progress.
+		 */
+		cond_resched();
+		goto retry;
+	}
+
+	return 0;
+}
+
+/**
+ * gmap_helper_disable_cow_sharing() - disable all COW sharing
+ *
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+	struct mm_struct *mm = current->mm;
+	int rc;
+
+	mmap_assert_write_locked(mm);
+
+	if (!mm->context.allow_cow_sharing)
+		return 0;
+
+	mm->context.allow_cow_sharing = 0;
+
+	/* Replace all shared zeropages by anonymous pages. */
+	rc = __gmap_helper_unshare_zeropages(mm);
+	/*
+	 * Make sure to disable KSM (if enabled for the whole process or
+	 * individual VMAs). Note that nothing currently hinders user space
+	 * from re-enabling it.
+	 */
+	if (!rc)
+		rc = ksm_disable(mm);
+	if (rc)
+		mm->context.allow_cow_sharing = 1;
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 2e568f175cd4..e88c02c9e642 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -9,12 +9,13 @@
 #define KMSG_COMPONENT "hugetlb"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <asm/pgalloc.h>
+#include <linux/cpufeature.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/security.h>
+#include <asm/pgalloc.h>
 
 /*
  * If the bit selected by single-bit bitmask "a" is set within "x", move
@@ -248,9 +249,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
-	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+	if (cpu_has_edat1() && size == PMD_SIZE)
 		return true;
-	else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+	else if (cpu_has_edat2() && size == PUD_SIZE)
 		return true;
 	else
 		return false;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f2298f7a3f21..074bf4fb4ce2 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -8,6 +8,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
+#include <linux/cpufeature.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -39,7 +40,6 @@
 #include <asm/kfence.h>
 #include <asm/dma.h>
 #include <asm/abs_lowcore.h>
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/sclp.h>
@@ -73,8 +73,6 @@ static void __init setup_zero_pages(void)
 {
 	unsigned long total_pages = memblock_estimated_nr_free_pages();
 	unsigned int order;
-	struct page *page;
-	int i;
 
 	/* Latest machines require a mapping granularity of 512KB */
 	order = 7;
@@ -83,16 +81,7 @@ static void __init setup_zero_pages(void)
 	while (order > 2 && (total_pages >> 10) < (1UL << order))
 		order--;
 
-	empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-	if (!empty_zero_page)
-		panic("Out of memory in setup_zero_pages");
-
-	page = virt_to_page((void *) empty_zero_page);
-	split_page(page, order);
-	for (i = 1 << order; i > 0; i--) {
-		mark_page_reserved(page);
-		page++;
-	}
+	empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE);
 
 	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
 }
@@ -117,7 +106,7 @@ void mark_rodata_ro(void)
 {
 	unsigned long size = __end_ro_after_init - __start_ro_after_init;
 
-	if (MACHINE_HAS_NX)
+	if (cpu_has_nx())
 		system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
 	__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
 	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
@@ -165,19 +154,13 @@ static void pv_init(void)
 	swiotlb_update_mem_attributes();
 }
 
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
 {
 	cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 
-	set_max_mapnr(max_low_pfn);
-        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
 	pv_init();
-	kfence_split_mapping();
 
-	/* this will put all low memory onto the freelists */
-	memblock_free_all();
 	setup_zero_pages();	/* Setup zeroed pages. */
 }
 
@@ -239,16 +222,13 @@ struct s390_cma_mem_data {
 static int s390_cma_check_range(struct cma *cma, void *data)
 {
 	struct s390_cma_mem_data *mem_data;
-	unsigned long start, end;
 
 	mem_data = data;
-	start = cma_get_base(cma);
-	end = start + cma_get_size(cma);
-	if (end < mem_data->start)
-		return 0;
-	if (start >= mem_data->end)
-		return 0;
-	return -EBUSY;
+
+	if (cma_intersects(cma, mem_data->start, mem_data->end))
+		return -EBUSY;
+
+	return 0;
 }
 
 static int s390_cma_mem_notifier(struct notifier_block *nb,
@@ -285,7 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	unsigned long size_pages = PFN_DOWN(size);
 	int rc;
 
-	if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
+	if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL)))
 		return -EINVAL;
 
 	VM_BUG_ON(!mhp_range_allowed(start, size, true));
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 76f376876e0d..40a526d28184 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -51,7 +51,6 @@ static inline unsigned long mmap_base(unsigned long rnd,
 {
 	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
-	unsigned long gap_min, gap_max;
 
 	/* Values close to RLIM_INFINITY can overflow. */
 	if (gap + pad > gap)
@@ -61,13 +60,7 @@ static inline unsigned long mmap_base(unsigned long rnd,
 	 * Top of mmap area (just below the process stack).
 	 * Leave at least a ~128 MB hole.
 	 */
-	gap_min = SZ_128M;
-	gap_max = (STACK_TOP / 6) * 5;
-
-	if (gap < gap_min)
-		gap = gap_min;
-	else if (gap > gap_max)
-		gap = gap_max;
+	gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5);
 
 	return PAGE_ALIGN(STACK_TOP - gap - rnd);
 }
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index eae97fb61712..348e759840e7 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -3,6 +3,7 @@
  * Copyright IBM Corp. 2011
  * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
  */
+#include <linux/cpufeature.h>
 #include <linux/hugetlb.h>
 #include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
@@ -27,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 	unsigned long boundary, size;
 
 	while (start < end) {
-		if (MACHINE_HAS_EDAT1) {
+		if (cpu_has_edat1()) {
 			/* set storage keys for a 1MB frame */
 			size = 1UL << 20;
 			boundary = (start + size) & ~(size - 1);
@@ -63,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
 	unsigned long *table, mask;
 
 	mask = 0;
-	if (MACHINE_HAS_EDAT2) {
+	if (cpu_has_edat2()) {
 		switch (dtt) {
 		case CRDTE_DTT_REGION3:
 			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
@@ -77,7 +78,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
 		}
 		table = (unsigned long *)((unsigned long)old & mask);
 		crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val);
-	} else if (MACHINE_HAS_IDTE) {
+	} else if (cpu_has_idte()) {
 		cspg(old, *old, new);
 	} else {
 		csp((unsigned int *)old + 1, *old, new);
@@ -373,7 +374,7 @@ int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags
 	unsigned long end;
 	int rc;
 
-	if (!MACHINE_HAS_NX)
+	if (!cpu_has_nx())
 		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
 	if (!flags)
 		return 0;
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1aac13bb8f53..e6175d75e4b0 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
 #include <asm/pfault.h>
 #include <asm/diag.h>
 
@@ -56,7 +57,7 @@ int __pfault_init(void)
 	if (pfault_disable)
 		return rc;
 	diag_stat_inc(DIAG_STAT_X258);
-	asm volatile(
+	asm_inline volatile(
 		"	diag	%[refbk],%[rc],0x258\n"
 		"0:	nopr	%%r7\n"
 		EX_TABLE(0b, 0b)
@@ -78,7 +79,7 @@ void __pfault_fini(void)
 	if (pfault_disable)
 		return;
 	diag_stat_inc(DIAG_STAT_X258);
-	asm volatile(
+	asm_inline volatile(
 		"	diag	%[refbk],0,0x258\n"
 		"0:	nopr	%%r7\n"
 		EX_TABLE(0b, 0b)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 30387a6e98ff..b449fd2605b0 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -12,35 +12,8 @@
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
 #include <asm/pgalloc.h>
-#include <asm/gmap.h>
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
-#ifdef CONFIG_PGSTE
-
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static const struct ctl_table page_table_sysctl[] = {
-	{
-		.procname	= "allocate_pgste",
-		.data		= &page_table_allocate_pgste,
-		.maxlen		= sizeof(int),
-		.mode		= S_IRUGO | S_IWUSR,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-};
-
-static int __init page_table_register_sysctl(void)
-{
-	return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#endif /* CONFIG_PGSTE */
-
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
 	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
@@ -63,11 +36,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
 static void __crst_table_upgrade(void *arg)
 {
 	struct mm_struct *mm = arg;
+	struct ctlreg asce;
 
 	/* change all active ASCEs to avoid the creation of new TLBs */
 	if (current->active_mm == mm) {
-		get_lowcore()->user_asce.val = mm->context.asce;
-		local_ctl_load(7, &get_lowcore()->user_asce);
+		asce.val = mm->context.asce;
+		get_lowcore()->user_asce = asce;
+		local_ctl_load(7, &asce);
+		if (!test_thread_flag(TIF_ASCE_PRIMARY))
+			local_ctl_load(1, &asce);
 	}
 	__tlb_flush_local();
 }
@@ -77,6 +54,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 	unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
 	unsigned long asce_limit = mm->context.asce_limit;
 
+	mmap_assert_write_locked(mm);
+
 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
 	VM_BUG_ON(asce_limit < _REGION2_SIZE);
 
@@ -100,13 +79,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 
 	spin_lock_bh(&mm->page_table_lock);
 
-	/*
-	 * This routine gets called with mmap_lock lock held and there is
-	 * no reason to optimize for the case of otherwise. However, if
-	 * that would ever change, the below check will let us know.
-	 */
-	VM_BUG_ON(asce_limit != mm->context.asce_limit);
-
 	if (p4d) {
 		__pgd = (unsigned long *) mm->pgd;
 		p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
@@ -170,7 +142,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
 	if (!ptdesc)
 		return NULL;
-	if (!pagetable_pte_ctor(ptdesc)) {
+	if (!pagetable_pte_ctor(mm, ptdesc)) {
 		pagetable_free(ptdesc);
 		return NULL;
 	}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index f05e62e037c2..7df70cd8f739 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -4,6 +4,7 @@
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
+#include <linux/cpufeature.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -19,10 +20,10 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
+#include <asm/machine.h>
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
 {
@@ -34,22 +35,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot)
 }
 EXPORT_SYMBOL_GPL(pgprot_writecombine);
 
-pgprot_t pgprot_writethrough(pgprot_t prot)
-{
-	/*
-	 * mio_wb_bit_mask may be set on a different CPU, but it is only set
-	 * once at init and only read afterwards.
-	 */
-	return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
-}
-EXPORT_SYMBOL_GPL(pgprot_writethrough);
-
 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
 				   pte_t *ptep, int nodat)
 {
 	unsigned long opt, asce;
 
-	if (MACHINE_HAS_TLB_GUEST) {
+	if (machine_has_tlb_guest()) {
 		opt = 0;
 		asce = READ_ONCE(mm->context.gmap_asce);
 		if (asce == 0UL || nodat)
@@ -69,7 +60,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
 {
 	unsigned long opt, asce;
 
-	if (MACHINE_HAS_TLB_GUEST) {
+	if (machine_has_tlb_guest()) {
 		opt = 0;
 		asce = READ_ONCE(mm->context.gmap_asce);
 		if (asce == 0UL || nodat)
@@ -94,7 +85,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
+	if (cpu_has_tlb_lc() &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		ptep_ipte_local(mm, addr, ptep, nodat);
 	else
@@ -173,10 +164,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
 	skey = (unsigned long) page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 	/* Transfer page changed & referenced bit to guest bits in pgste */
-	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
+	pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
 	/* Copy page access key and fetch protection bit to pgste */
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
-	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
 #endif
 	return pgste;
 
@@ -210,7 +201,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 	if ((pte_val(entry) & _PAGE_PRESENT) &&
 	    (pte_val(entry) & _PAGE_WRITE) &&
 	    !(pte_val(entry) & _PAGE_INVALID)) {
-		if (!MACHINE_HAS_ESOP) {
+		if (!machine_has_esop()) {
 			/*
 			 * Without enhanced suppression-on-protection force
 			 * the dirty bit on for all writable ptes.
@@ -220,7 +211,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 		}
 		if (!(pte_val(entry) & _PAGE_PROTECT))
 			/* This pte allows write access, set user-dirty */
-			pgste_val(pgste) |= PGSTE_UC_BIT;
+			pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
 	}
 #endif
 	set_pte(ptep, entry);
@@ -236,7 +227,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
 
 	bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
 	if (bits) {
-		pgste_val(pgste) ^= bits;
+		pgste = __pgste(pgste_val(pgste) ^ bits);
 		ptep_notify(mm, addr, ptep, bits);
 	}
 #endif
@@ -374,7 +365,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 static inline void pmdp_idte_local(struct mm_struct *mm,
 				   unsigned long addr, pmd_t *pmdp)
 {
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_LOCAL);
 	else
@@ -386,12 +377,12 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
 static inline void pmdp_idte_global(struct mm_struct *mm,
 				    unsigned long addr, pmd_t *pmdp)
 {
-	if (MACHINE_HAS_TLB_GUEST) {
+	if (machine_has_tlb_guest()) {
 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_GLOBAL);
 		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 			gmap_pmdp_idte_global(mm, addr);
-	} else if (MACHINE_HAS_IDTE) {
+	} else if (cpu_has_idte()) {
 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
 		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 			gmap_pmdp_idte_global(mm, addr);
@@ -411,7 +402,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 		return old;
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
+	if (cpu_has_tlb_lc() &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		pmdp_idte_local(mm, addr, pmdp);
 	else
@@ -505,7 +496,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy);
 static inline void pudp_idte_local(struct mm_struct *mm,
 				   unsigned long addr, pud_t *pudp)
 {
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_LOCAL);
 	else
@@ -515,10 +506,10 @@ static inline void pudp_idte_local(struct mm_struct *mm,
 static inline void pudp_idte_global(struct mm_struct *mm,
 				    unsigned long addr, pud_t *pudp)
 {
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_GLOBAL);
-	else if (MACHINE_HAS_IDTE)
+	else if (cpu_has_idte())
 		__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
 	else
 		/*
@@ -537,7 +528,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm,
 	if (pud_val(old) & _REGION_ENTRY_INVALID)
 		return old;
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
+	if (cpu_has_tlb_lc() &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		pudp_idte_local(mm, addr, pudp);
 	else
@@ -609,7 +600,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	/* the mm_has_pgste() check is done in set_pte_at() */
 	preempt_disable();
 	pgste = pgste_get_lock(ptep);
-	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+	pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
 	pgste_set_key(ptep, pgste, entry, mm);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste_set_unlock(ptep, pgste);
@@ -622,7 +613,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 
 	preempt_disable();
 	pgste = pgste_get_lock(ptep);
-	pgste_val(pgste) |= PGSTE_IN_BIT;
+	pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
 	pgste_set_unlock(ptep, pgste);
 	preempt_enable();
 }
@@ -667,7 +658,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
 		entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
 		entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
 	}
-	pgste_val(pgste) |= bit;
+	pgste = set_pgste_bit(pgste, bit);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste_set_unlock(ptep, pgste);
 	return 0;
@@ -687,7 +678,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
 	if (!(pte_val(spte) & _PAGE_INVALID) &&
 	    !((pte_val(spte) & _PAGE_PROTECT) &&
 	      !(pte_val(pte) & _PAGE_PROTECT))) {
-		pgste_val(spgste) |= PGSTE_VSIE_BIT;
+		spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
 		tpgste = pgste_get_lock(tptep);
 		tpte = __pte((pte_val(spte) & PAGE_MASK) |
 			     (pte_val(pte) & _PAGE_PROTECT));
@@ -745,7 +736,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 		pte_clear(mm, addr, ptep);
 	}
 	if (reset)
-		pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
+		pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
 	pgste_set_unlock(ptep, pgste);
 	preempt_enable();
 }
@@ -758,8 +749,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	/* Clear storage key ACC and F, but set R/C */
 	preempt_disable();
 	pgste = pgste_get_lock(ptep);
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
-	pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
+	pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
 	ptev = pte_val(*ptep);
 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
@@ -780,13 +771,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
 
 	pgste = pgste_get_lock(ptep);
 	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
-	pgste_val(pgste) &= ~PGSTE_UC_BIT;
+	pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
 		nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 		ptep_ipte_global(mm, addr, ptep, nodat);
-		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+		if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
 			pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
 		else
 			pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
@@ -842,11 +833,11 @@ again:
 	if (!ptep)
 		goto again;
 	new = old = pgste_get_lock(ptep);
-	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
-			    PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
+				   PGSTE_ACC_BITS | PGSTE_FP_BIT);
 	keyul = (unsigned long) key;
-	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
-	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
+	new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 		unsigned long bits, skey;
 
@@ -857,12 +848,12 @@ again:
 		/* Set storage key ACC and FP */
 		page_set_storage_key(paddr, skey, !nq);
 		/* Merge host changed & referenced into pgste  */
-		pgste_val(new) |= bits << 52;
+		new = set_pgste_bit(new, bits << 52);
 	}
 	/* changing the guest storage key is considered a change of the page */
 	if ((pgste_val(new) ^ pgste_val(old)) &
 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
-		pgste_val(new) |= PGSTE_UC_BIT;
+		new = set_pgste_bit(new, PGSTE_UC_BIT);
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(ptep, ptl);
@@ -950,19 +941,19 @@ again:
 		goto again;
 	new = old = pgste_get_lock(ptep);
 	/* Reset guest reference bit only */
-	pgste_val(new) &= ~PGSTE_GR_BIT;
+	new = clear_pgste_bit(new, PGSTE_GR_BIT);
 
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 		paddr = pte_val(*ptep) & PAGE_MASK;
 		cc = page_reset_referenced(paddr);
 		/* Merge real referenced bit into host-set */
-		pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
+		new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
 	}
 	/* Reflect guest's logical view, not physical */
 	cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
 	/* Changing the guest storage key is considered a change of the page */
 	if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
-		pgste_val(new) |= PGSTE_UC_BIT;
+		new = set_pgste_bit(new, PGSTE_UC_BIT);
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(ptep, ptl);
@@ -1126,7 +1117,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 	if (res)
 		pgstev |= _PGSTE_GPS_ZERO;
 
-	pgste_val(pgste) = pgstev;
+	pgste = __pgste(pgstev);
 	pgste_set_unlock(ptep, pgste);
 	pte_unmap_unlock(ptep, ptl);
 	return res;
@@ -1159,8 +1150,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
 		return -EFAULT;
 	new = pgste_get_lock(ptep);
 
-	pgste_val(new) &= ~bits;
-	pgste_val(new) |= value & bits;
+	new = clear_pgste_bit(new, bits);
+	new = set_pgste_bit(new, value & bits);
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(ptep, ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8ead999e340b..448dd6ed1069 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/memory_hotplug.h>
+#include <linux/cpufeature.h>
 #include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
@@ -249,12 +250,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 		} else if (pmd_none(*pmd)) {
 			if (IS_ALIGNED(addr, PMD_SIZE) &&
 			    IS_ALIGNED(next, PMD_SIZE) &&
-			    MACHINE_HAS_EDAT1 && direct &&
+			    cpu_has_edat1() && direct &&
 			    !debug_pagealloc_enabled()) {
 				set_pmd(pmd, __pmd(__pa(addr) | prot));
 				pages++;
 				continue;
-			} else if (!direct && MACHINE_HAS_EDAT1) {
+			} else if (!direct && cpu_has_edat1()) {
 				void *new_page;
 
 				/*
@@ -335,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 		} else if (pud_none(*pud)) {
 			if (IS_ALIGNED(addr, PUD_SIZE) &&
 			    IS_ALIGNED(next, PUD_SIZE) &&
-			    MACHINE_HAS_EDAT2 && direct &&
+			    cpu_has_edat2() && direct &&
 			    !debug_pagealloc_enabled()) {
 				set_pud(pud, __pud(__pa(addr) | prot));
 				pages++;
@@ -659,7 +660,7 @@ void __init vmem_map_init(void)
 	 * prefix page is used to return to the previous context with
 	 * an LPSWE instruction and therefore must be executable.
 	 */
-	if (!static_key_enabled(&cpu_has_bear))
+	if (!cpu_has_bear())
 		set_memory_x(0, 1);
 	if (debug_pagealloc_enabled())
 		__set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size);