diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/s390/mm/cmm.c | 6 | ||||
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 54 | ||||
-rw-r--r-- | arch/s390/mm/extable.c | 47 | ||||
-rw-r--r-- | arch/s390/mm/extmem.c | 27 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 37 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 232 | ||||
-rw-r--r-- | arch/s390/mm/gmap_helpers.c | 221 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 7 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 40 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 9 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 9 | ||||
-rw-r--r-- | arch/s390/mm/pfault.c | 5 | ||||
-rw-r--r-- | arch/s390/mm/pgalloc.c | 46 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 89 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 9 |
16 files changed, 458 insertions, 384 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f6c2db7a8669..bd0401cc7ca5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -9,6 +9,8 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o obj-$(CONFIG_PFAULT) += pfault.o + +obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 39f44b6256e0..e2a6eb92420f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -201,7 +201,7 @@ static void cmm_set_timer(void) { if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { if (timer_pending(&cmm_timer)) - del_timer(&cmm_timer); + timer_delete(&cmm_timer); return; } mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds)); @@ -424,7 +424,7 @@ out_smsg: #endif unregister_sysctl_table(cmm_sysctl_header); out_sysctl: - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); return rc; } module_init(cmm_init); @@ -437,7 +437,7 @@ static void __exit cmm_exit(void) #endif unregister_oom_notifier(&cmm_oom_nb); kthread_stop(cmm_thread_ptr); - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index fa54f3bc0c8d..ac604b176660 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 + +#include <linux/cpufeature.h> #include <linux/set_memory.h> #include <linux/ptdump.h> #include <linux/seq_file.h> @@ -82,7 +84,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) * in which case we have two lpswe instructions in lowcore that need * to be executable. */ - if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear))) + if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear())) return; WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX), "s390/mm: Found insecure W+X mapping at address %pS\n", @@ -145,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } +static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) +{ + note_page(pt_st, addr, 4, pte_val(pte)); +} + +static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) +{ + note_page(pt_st, addr, 3, pmd_val(pmd)); +} + +static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) +{ + note_page(pt_st, addr, 2, pud_val(pud)); +} + +static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) +{ + note_page(pt_st, addr, 1, p4d_val(p4d)); +} + +static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) +{ + note_page(pt_st, addr, 0, pgd_val(pgd)); +} + +static void note_page_flush(struct ptdump_state *pt_st) +{ + pte_t pte_zero = {0}; + + note_page(pt_st, 0, -1, pte_val(pte_zero)); +} + bool ptdump_check_wx(void) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, @@ -167,7 +206,7 @@ bool ptdump_check_wx(void) }, }; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) return true; ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); if (st.wx_pages) { @@ -176,7 +215,7 @@ bool ptdump_check_wx(void) return false; } else { pr_info("Checked W+X mappings: passed, no %sW+X pages found\n", - (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? + (nospec_uses_trampoline() || !cpu_has_bear()) ? "unexpected " : ""); return true; @@ -188,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index a046be1715cf..7498e858c401 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -73,6 +73,49 @@ static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_reg return true; } +struct insn_ssf { + u64 opc1 : 8; + u64 r3 : 4; + u64 opc2 : 4; + u64 b1 : 4; + u64 d1 : 12; + u64 b2 : 4; + u64 d2 : 12; +} __packed; + +static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex, + bool from, struct pt_regs *regs) +{ + unsigned long uaddr, remainder; + struct insn_ssf *insn; + + /* + * If the faulting user space access crossed a page boundary retry by + * limiting the access to the first page (adjust length accordingly). + * Then the mvcos instruction will either complete with condition code + * zero, or generate another fault where the user space access did not + * cross a page boundary. + * If the faulting user space access did not cross a page boundary set + * length to zero and retry. In this case no user space access will + * happen, and the mvcos instruction will complete with condition code + * zero. + * In both cases the instruction will complete with condition code + * zero (copying finished), and the register which contains the + * length, indicates the number of bytes copied. + */ + regs->psw.addr = extable_fixup(ex); + insn = (struct insn_ssf *)regs->psw.addr; + if (from) + uaddr = regs->gprs[insn->b2] + insn->d2; + else + uaddr = regs->gprs[insn->b1] + insn->d1; + remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1)); + if (regs->gprs[insn->r3] <= remainder) + remainder = 0; + regs->gprs[insn->r3] = remainder; + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -95,6 +138,10 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_zeropad(ex, regs); case EX_TYPE_FPC: return ex_handler_fpc(ex, regs); + case EX_TYPE_UA_MVCOS_TO: + return ex_handler_ua_mvcos(ex, false, regs); + case EX_TYPE_UA_MVCOS_FROM: + return ex_handler_ua_mvcos(ex, true, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 4692136c0af1..f7da53e212f5 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -21,6 +21,7 @@ #include <linux/ioport.h> #include <linux/refcount.h> #include <linux/pgtable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/page.h> #include <asm/ebcdic.h> @@ -255,7 +256,7 @@ segment_type (char* name) int rc; struct dcss_segment seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; dcss_mkname(name, seg.dcss_name); @@ -418,7 +419,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, struct dcss_segment *seg; int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; mutex_lock(&dcss_lock); @@ -529,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared) return rc; } +static void __dcss_diag_purge_on_cpu_0(void *data) +{ + struct dcss_segment *seg = (struct dcss_segment *)data; + unsigned long dummy; + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); +} + /* * Decrease the use count of a DCSS segment and remove * it from the address space if nobody is using it @@ -537,10 +546,9 @@ segment_modify_shared (char *name, int do_nonshared) void segment_unload(char *name) { - unsigned long dummy; struct dcss_segment *seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); @@ -555,7 +563,14 @@ segment_unload(char *name) kfree(seg->res); vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + /* + * Workaround for z/VM issue, where calling the DCSS unload diag on + * a non-IPL CPU would cause bogus sclp maximum memory detection on + * next IPL. + * IPL CPU 0 cannot be set offline, so the dcss_diag() call can + * directly be scheduled to that CPU. + */ + smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -572,7 +587,7 @@ segment_save(char *name) char cmd2[80]; int i, response; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9b681f74dccc..e1ad05bfd28a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -11,11 +11,11 @@ #include <linux/kernel_stat.h> #include <linux/mmu_context.h> +#include <linux/cpufeature.h> #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/debug.h> -#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> @@ -40,22 +40,11 @@ #include <asm/ptrace.h> #include <asm/fault.h> #include <asm/diag.h> -#include <asm/gmap.h> #include <asm/irq.h> #include <asm/facility.h> #include <asm/uv.h> #include "../kernel/entry.h" -static DEFINE_STATIC_KEY_FALSE(have_store_indication); - -static int __init fault_init(void) -{ - if (test_facility(75)) - static_branch_enable(&have_store_indication); - return 0; -} -early_initcall(fault_init); - /* * Find out which address space caused the exception. */ @@ -81,7 +70,7 @@ static __always_inline bool fault_is_write(struct pt_regs *regs) { union teid teid = { .val = regs->int_parm_long }; - if (static_branch_likely(&have_store_indication)) + if (test_facility(75)) return teid.fsi == TEID_FSI_STORE; return false; } @@ -175,6 +164,23 @@ static void dump_fault_info(struct pt_regs *regs) int show_unhandled_signals = 1; +static const struct ctl_table s390_fault_sysctl_table[] = { + { + .procname = "userprocess_debug", + .data = &show_unhandled_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_fault_sysctls(void) +{ + register_sysctl_init("kernel", s390_fault_sysctl_table); + return 0; +} +arch_initcall(init_s390_fault_sysctls); + void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -369,6 +375,7 @@ void do_protection_exception(struct pt_regs *regs) if (unlikely(!teid.b61)) { if (user_mode(regs)) { /* Low-address protection in user mode: cannot happen */ + dump_fault_info(regs); die(regs, "Low-address protection"); } /* @@ -377,7 +384,7 @@ void do_protection_exception(struct pt_regs *regs) */ return handle_fault_error_nolock(regs, 0); } - if (unlikely(MACHINE_HAS_NX && teid.b56)) { + if (unlikely(cpu_has_nx() && teid.b56)) { regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK); return handle_fault_error_nolock(regs, SEGV_ACCERR); } @@ -434,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); } else { + if (faulthandler_disabled()) + return handle_fault_error_nolock(regs, 0); mm = current->mm; mmap_read_lock(mm); vma = find_vma(mm, addr); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 94d927785800..012a4366a2ad 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -8,6 +8,7 @@ * Janosch Frank <frankja@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/pagewalk.h> #include <linux/swap.h> @@ -20,9 +21,10 @@ #include <linux/pgtable.h> #include <asm/page-states.h> #include <asm/pgalloc.h> +#include <asm/machine.h> +#include <asm/gmap_helpers.h> #include <asm/gmap.h> #include <asm/page.h> -#include <asm/tlb.h> /* * The address is saved in a radix tree directly; NULL would be ambiguous, @@ -135,7 +137,7 @@ EXPORT_SYMBOL_GPL(gmap_create); static void gmap_flush_tlb(struct gmap *gmap) { - if (MACHINE_HAS_IDTE) + if (cpu_has_idte()) __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); @@ -618,63 +620,20 @@ EXPORT_SYMBOL(__gmap_link); */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { - struct vm_area_struct *vma; unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + + mmap_assert_locked(gmap->mm); /* Find the vm address for the guest address */ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); if (vmaddr) { vmaddr |= gaddr & ~PMD_MASK; - - vma = vma_lookup(gmap->mm, vmaddr); - if (!vma || is_vm_hugetlb_page(vma)) - return; - - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) { - ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); - } + gmap_helper_zap_one_page(gmap->mm, vmaddr); } } EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) -{ - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - mmap_read_lock(gmap->mm); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - if (!vma) - continue; - /* - * We do not discard pages that are backed by - * hugetlbfs, so we don't have to refault them. - */ - if (is_vm_hugetlb_page(vma)) - continue; - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range_single(vma, vmaddr, size, NULL); - } - mmap_read_unlock(gmap->mm); -} -EXPORT_SYMBOL_GPL(gmap_discard); - static LIST_HEAD(gmap_notifier_list); static DEFINE_SPINLOCK(gmap_notifier_lock); @@ -2025,10 +1984,10 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, gaddr &= HPAGE_MASK; pmdp_notify_gmap(gmap, pmdp, gaddr); new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); @@ -2103,10 +2062,10 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } @@ -2136,10 +2095,10 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); @@ -2258,9 +2217,6 @@ int s390_enable_sie(void) /* Do we have pgstes? if yes, we are done */ if (mm_has_pgste(mm)) return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; mmap_write_lock(mm); mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ @@ -2270,138 +2226,6 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); -static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, - unsigned long end, struct mm_walk *walk) -{ - unsigned long *found_addr = walk->private; - - /* Return 1 of the page is a zeropage. */ - if (is_zero_pfn(pte_pfn(*pte))) { - /* - * Shared zeropage in e.g., a FS DAX mapping? We cannot do the - * right thing and likely don't care: FAULT_FLAG_UNSHARE - * currently only works in COW mappings, which is also where - * mm_forbids_zeropage() is checked. - */ - if (!is_cow_mapping(walk->vma->vm_flags)) - return -EFAULT; - - *found_addr = addr; - return 1; - } - return 0; -} - -static const struct mm_walk_ops find_zeropage_ops = { - .pte_entry = find_zeropage_pte_entry, - .walk_lock = PGWALK_WRLOCK, -}; - -/* - * Unshare all shared zeropages, replacing them by anonymous pages. Note that - * we cannot simply zap all shared zeropages, because this could later - * trigger unexpected userfaultfd missing events. - * - * This must be called after mm->context.allow_cow_sharing was - * set to 0, to avoid future mappings of shared zeropages. - * - * mm contracts with s390, that even if mm were to remove a page table, - * and racing with walk_page_range_vma() calling pte_offset_map_lock() - * would fail, it will never insert a page table containing empty zero - * pages once mm_forbids_zeropage(mm) i.e. - * mm->context.allow_cow_sharing is set to 0. - */ -static int __s390_unshare_zeropages(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - unsigned long addr; - vm_fault_t fault; - int rc; - - for_each_vma(vmi, vma) { - /* - * We could only look at COW mappings, but it's more future - * proof to catch unexpected zeropages in other mappings and - * fail. - */ - if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) - continue; - addr = vma->vm_start; - -retry: - rc = walk_page_range_vma(vma, addr, vma->vm_end, - &find_zeropage_ops, &addr); - if (rc < 0) - return rc; - else if (!rc) - continue; - - /* addr was updated by find_zeropage_pte_entry() */ - fault = handle_mm_fault(vma, addr, - FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, - NULL); - if (fault & VM_FAULT_OOM) - return -ENOMEM; - /* - * See break_ksm(): even after handle_mm_fault() returned 0, we - * must start the lookup from the current address, because - * handle_mm_fault() may back out if there's any difficulty. - * - * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but - * maybe they could trigger in the future on concurrent - * truncation. In that case, the shared zeropage would be gone - * and we can simply retry and make progress. - */ - cond_resched(); - goto retry; - } - - return 0; -} - -static int __s390_disable_cow_sharing(struct mm_struct *mm) -{ - int rc; - - if (!mm->context.allow_cow_sharing) - return 0; - - mm->context.allow_cow_sharing = 0; - - /* Replace all shared zeropages by anonymous pages. */ - rc = __s390_unshare_zeropages(mm); - /* - * Make sure to disable KSM (if enabled for the whole process or - * individual VMAs). Note that nothing currently hinders user space - * from re-enabling it. - */ - if (!rc) - rc = ksm_disable(mm); - if (rc) - mm->context.allow_cow_sharing = 1; - return rc; -} - -/* - * Disable most COW-sharing of memory pages for the whole process: - * (1) Disable KSM and unmerge/unshare any KSM pages. - * (2) Disallow shared zeropages and unshare any zerpages that are mapped. - * - * Not that we currently don't bother with COW-shared pages that are shared - * with parent/child processes due to fork(). - */ -int s390_disable_cow_sharing(void) -{ - int rc; - - mmap_write_lock(current->mm); - rc = __s390_disable_cow_sharing(current->mm); - mmap_write_unlock(current->mm); - return rc; -} -EXPORT_SYMBOL_GPL(s390_disable_cow_sharing); - /* * Enable storage key handling from now on and initialize the storage * keys with the default key. @@ -2469,7 +2293,7 @@ int s390_enable_skey(void) goto out_up; mm->context.uses_skeys = 1; - rc = __s390_disable_cow_sharing(mm); + rc = gmap_helper_disable_cow_sharing(); if (rc) { mm->context.uses_skeys = 0; goto out_up; @@ -2626,31 +2450,3 @@ int s390_replace_asce(struct gmap *gmap) return 0; } EXPORT_SYMBOL_GPL(s390_replace_asce); - -/** - * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split - * @mm: the mm containing the folio to work on - * @folio: the folio - * @split: whether to split a large folio - * - * Context: Must be called while holding an extra reference to the folio; - * the mm lock should not be held. - */ -int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) -{ - int rc; - - lockdep_assert_not_held(&mm->mmap_lock); - folio_wait_writeback(folio); - lru_add_drain_all(); - if (split) { - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - - if (rc != -EBUSY) - return rc; - } - return -EAGAIN; -} -EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio); diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c new file mode 100644 index 000000000000..a45d417ad951 --- /dev/null +++ b/arch/s390/mm/gmap_helpers.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helper functions for KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2025 + */ +#include <linux/mm_types.h> +#include <linux/mmap_lock.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pagewalk.h> +#include <linux/ksm.h> +#include <asm/gmap_helpers.h> + +/** + * ptep_zap_swap_entry() - discard a swap entry. + * @mm: the mm + * @entry: the swap entry that needs to be zapped + * + * Discards the given swap entry. If the swap entry was an actual swap + * entry (and not a migration entry, for example), the actual swapped + * page is also discarded from swap. + */ +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) + dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); + free_swap_and_cache(entry); +} + +/** + * gmap_helper_zap_one_page() - discard a page if it was swapped. + * @mm: the mm + * @vmaddr: the userspace virtual address that needs to be discarded + * + * If the given address maps to a swap entry, discard it. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) +{ + struct vm_area_struct *vma; + spinlock_t *ptl; + pte_t *ptep; + + mmap_assert_locked(mm); + + /* Find the vm address for the guest address */ + vma = vma_lookup(mm, vmaddr); + if (!vma || is_vm_hugetlb_page(vma)) + return; + + /* Get pointer to the page table entry */ + ptep = get_locked_pte(mm, vmaddr, &ptl); + if (unlikely(!ptep)) + return; + if (pte_swap(*ptep)) + ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + pte_unmap_unlock(ptep, ptl); +} +EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); + +/** + * gmap_helper_discard() - discard user pages in the given range + * @mm: the mm + * @vmaddr: starting userspace address + * @end: end address (first address outside the range) + * + * All userpace pages in the range [@vamddr, @end) are discarded and unmapped. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end) +{ + struct vm_area_struct *vma; + + mmap_assert_locked(mm); + + while (vmaddr < end) { + vma = find_vma_intersection(mm, vmaddr, end); + if (!vma) + return; + if (!is_vm_hugetlb_page(vma)) + zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL); + vmaddr = vma->vm_end; + } +} +EXPORT_SYMBOL_GPL(gmap_helper_discard); + +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + unsigned long *found_addr = walk->private; + + /* Return 1 of the page is a zeropage. */ + if (is_zero_pfn(pte_pfn(*pte))) { + /* + * Shared zeropage in e.g., a FS DAX mapping? We cannot do the + * right thing and likely don't care: FAULT_FLAG_UNSHARE + * currently only works in COW mappings, which is also where + * mm_forbids_zeropage() is checked. + */ + if (!is_cow_mapping(walk->vma->vm_flags)) + return -EFAULT; + + *found_addr = addr; + return 1; + } + return 0; +} + +static const struct mm_walk_ops find_zeropage_ops = { + .pte_entry = find_zeropage_pte_entry, + .walk_lock = PGWALK_WRLOCK, +}; + +/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages + * @mm: the mm whose zero pages are to be unshared + * + * Unshare all shared zeropages, replacing them by anonymous pages. Note that + * we cannot simply zap all shared zeropages, because this could later + * trigger unexpected userfaultfd missing events. + * + * This must be called after mm->context.allow_cow_sharing was + * set to 0, to avoid future mappings of shared zeropages. + * + * mm contracts with s390, that even if mm were to remove a page table, + * and racing with walk_page_range_vma() calling pte_offset_map_lock() + * would fail, it will never insert a page table containing empty zero + * pages once mm_forbids_zeropage(mm) i.e. + * mm->context.allow_cow_sharing is set to 0. + */ +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + unsigned long addr; + vm_fault_t fault; + int rc; + + for_each_vma(vmi, vma) { + /* + * We could only look at COW mappings, but it's more future + * proof to catch unexpected zeropages in other mappings and + * fail. + */ + if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) + continue; + addr = vma->vm_start; + +retry: + rc = walk_page_range_vma(vma, addr, vma->vm_end, + &find_zeropage_ops, &addr); + if (rc < 0) + return rc; + else if (!rc) + continue; + + /* addr was updated by find_zeropage_pte_entry() */ + fault = handle_mm_fault(vma, addr, + FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, + NULL); + if (fault & VM_FAULT_OOM) + return -ENOMEM; + /* + * See break_ksm(): even after handle_mm_fault() returned 0, we + * must start the lookup from the current address, because + * handle_mm_fault() may back out if there's any difficulty. + * + * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but + * maybe they could trigger in the future on concurrent + * truncation. In that case, the shared zeropage would be gone + * and we can simply retry and make progress. + */ + cond_resched(); + goto retry; + } + + return 0; +} + +/** + * gmap_helper_disable_cow_sharing() - disable all COW sharing + * + * Disable most COW-sharing of memory pages for the whole process: + * (1) Disable KSM and unmerge/unshare any KSM pages. + * (2) Disallow shared zeropages and unshare any zerpages that are mapped. + * + * Not that we currently don't bother with COW-shared pages that are shared + * with parent/child processes due to fork(). + */ +int gmap_helper_disable_cow_sharing(void) +{ + struct mm_struct *mm = current->mm; + int rc; + + mmap_assert_write_locked(mm); + + if (!mm->context.allow_cow_sharing) + return 0; + + mm->context.allow_cow_sharing = 0; + + /* Replace all shared zeropages by anonymous pages. */ + rc = __gmap_helper_unshare_zeropages(mm); + /* + * Make sure to disable KSM (if enabled for the whole process or + * individual VMAs). Note that nothing currently hinders user space + * from re-enabling it. + */ + if (!rc) + rc = ksm_disable(mm); + if (rc) + mm->context.allow_cow_sharing = 1; + return rc; +} +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 2e568f175cd4..e88c02c9e642 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -9,12 +9,13 @@ #define KMSG_COMPONENT "hugetlb" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <asm/pgalloc.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/sched/mm.h> #include <linux/security.h> +#include <asm/pgalloc.h> /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -248,9 +249,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, bool __init arch_hugetlb_valid_size(unsigned long size) { - if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) + if (cpu_has_edat1() && size == PMD_SIZE) return true; - else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) + else if (cpu_has_edat2() && size == PUD_SIZE) return true; else return false; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f2298f7a3f21..074bf4fb4ce2 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -8,6 +8,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include <linux/cpufeature.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -39,7 +40,6 @@ #include <asm/kfence.h> #include <asm/dma.h> #include <asm/abs_lowcore.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/sclp.h> @@ -73,8 +73,6 @@ static void __init setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; - struct page *page; - int i; /* Latest machines require a mapping granularity of 512KB */ order = 7; @@ -83,16 +81,7 @@ static void __init setup_zero_pages(void) while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Out of memory in setup_zero_pages"); - - page = virt_to_page((void *) empty_zero_page); - split_page(page, order); - for (i = 1 << order; i > 0; i--) { - mark_page_reserved(page); - page++; - } + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -117,7 +106,7 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; - if (MACHINE_HAS_NX) + if (cpu_has_nx()) system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); @@ -165,19 +154,13 @@ static void pv_init(void) swiotlb_update_mem_attributes(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - set_max_mapnr(max_low_pfn); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - pv_init(); - kfence_split_mapping(); - /* this will put all low memory onto the freelists */ - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ } @@ -239,16 +222,13 @@ struct s390_cma_mem_data { static int s390_cma_check_range(struct cma *cma, void *data) { struct s390_cma_mem_data *mem_data; - unsigned long start, end; mem_data = data; - start = cma_get_base(cma); - end = start + cma_get_size(cma); - if (end < mem_data->start) - return 0; - if (start >= mem_data->end) - return 0; - return -EBUSY; + + if (cma_intersects(cma, mem_data->start, mem_data->end)) + return -EBUSY; + + return 0; } static int s390_cma_mem_notifier(struct notifier_block *nb, @@ -285,7 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL))) return -EINVAL; VM_BUG_ON(!mhp_range_allowed(start, size, true)); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 76f376876e0d..40a526d28184 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -51,7 +51,6 @@ static inline unsigned long mmap_base(unsigned long rnd, { unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size() + stack_guard_gap; - unsigned long gap_min, gap_max; /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) @@ -61,13 +60,7 @@ static inline unsigned long mmap_base(unsigned long rnd, * Top of mmap area (just below the process stack). * Leave at least a ~128 MB hole. */ - gap_min = SZ_128M; - gap_max = (STACK_TOP / 6) * 5; - - if (gap < gap_min) - gap = gap_min; - else if (gap > gap_max) - gap = gap_max; + gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5); return PAGE_ALIGN(STACK_TOP - gap - rnd); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index eae97fb61712..348e759840e7 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -3,6 +3,7 @@ * Copyright IBM Corp. 2011 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/hugetlb.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> @@ -27,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) unsigned long boundary, size; while (start < end) { - if (MACHINE_HAS_EDAT1) { + if (cpu_has_edat1()) { /* set storage keys for a 1MB frame */ size = 1UL << 20; boundary = (start + size) & ~(size - 1); @@ -63,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, unsigned long *table, mask; mask = 0; - if (MACHINE_HAS_EDAT2) { + if (cpu_has_edat2()) { switch (dtt) { case CRDTE_DTT_REGION3: mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); @@ -77,7 +78,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, } table = (unsigned long *)((unsigned long)old & mask); crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { cspg(old, *old, new); } else { csp((unsigned int *)old + 1, *old, new); @@ -373,7 +374,7 @@ int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags unsigned long end; int rc; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); if (!flags) return 0; diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 1aac13bb8f53..e6175d75e4b0 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <asm/asm-extable.h> +#include <asm/asm-offsets.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -56,7 +57,7 @@ int __pfault_init(void) if (pfault_disable) return rc; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],%[rc],0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) @@ -78,7 +79,7 @@ void __pfault_fini(void) if (pfault_disable) return; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],0,0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 30387a6e98ff..b449fd2605b0 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -12,35 +12,8 @@ #include <asm/mmu_context.h> #include <asm/page-states.h> #include <asm/pgalloc.h> -#include <asm/gmap.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> -#ifdef CONFIG_PGSTE - -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static const struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#endif /* CONFIG_PGSTE */ - unsigned long *crst_table_alloc(struct mm_struct *mm) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); @@ -63,11 +36,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; + struct ctlreg asce; /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { - get_lowcore()->user_asce.val = mm->context.asce; - local_ctl_load(7, &get_lowcore()->user_asce); + asce.val = mm->context.asce; + get_lowcore()->user_asce = asce; + local_ctl_load(7, &asce); + if (!test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &asce); } __tlb_flush_local(); } @@ -77,6 +54,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) unsigned long *pgd = NULL, *p4d = NULL, *__pgd; unsigned long asce_limit = mm->context.asce_limit; + mmap_assert_write_locked(mm); + /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ VM_BUG_ON(asce_limit < _REGION2_SIZE); @@ -100,13 +79,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) spin_lock_bh(&mm->page_table_lock); - /* - * This routine gets called with mmap_lock lock held and there is - * no reason to optimize for the case of otherwise. However, if - * that would ever change, the below check will let us know. - */ - VM_BUG_ON(asce_limit != mm->context.asce_limit); - if (p4d) { __pgd = (unsigned long *) mm->pgd; p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd); @@ -170,7 +142,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; - if (!pagetable_pte_ctor(ptdesc)) { + if (!pagetable_pte_ctor(mm, ptdesc)) { pagetable_free(ptdesc); return NULL; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index f05e62e037c2..7df70cd8f739 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -19,10 +20,10 @@ #include <linux/ksm.h> #include <linux/mman.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/page-states.h> +#include <asm/machine.h> pgprot_t pgprot_writecombine(pgprot_t prot) { @@ -34,22 +35,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot) } EXPORT_SYMBOL_GPL(pgprot_writecombine); -pgprot_t pgprot_writethrough(pgprot_t prot) -{ - /* - * mio_wb_bit_mask may be set on a different CPU, but it is only set - * once at init and only read afterwards. - */ - return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); -} -EXPORT_SYMBOL_GPL(pgprot_writethrough); - static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int nodat) { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -69,7 +60,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -94,7 +85,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) ptep_ipte_local(mm, addr, ptep, nodat); else @@ -173,10 +164,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */ /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); #endif return pgste; @@ -210,7 +201,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) if ((pte_val(entry) & _PAGE_PRESENT) && (pte_val(entry) & _PAGE_WRITE) && !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { + if (!machine_has_esop()) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. @@ -220,7 +211,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) } if (!(pte_val(entry) & _PAGE_PROTECT)) /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; + pgste = set_pgste_bit(pgste, PGSTE_UC_BIT); } #endif set_pte(ptep, entry); @@ -236,7 +227,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm, bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); if (bits) { - pgste_val(pgste) ^= bits; + pgste = __pgste(pgste_val(pgste) ^ bits); ptep_notify(mm, addr, ptep, bits); } #endif @@ -374,7 +365,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, static inline void pmdp_idte_local(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -386,12 +377,12 @@ static inline void pmdp_idte_local(struct mm_struct *mm, static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); @@ -411,7 +402,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pmdp_idte_local(mm, addr, pmdp); else @@ -505,7 +496,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy); static inline void pudp_idte_local(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -515,10 +506,10 @@ static inline void pudp_idte_local(struct mm_struct *mm, static inline void pudp_idte_global(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); else /* @@ -537,7 +528,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm, if (pud_val(old) & _REGION_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pudp_idte_local(mm, addr, pudp); else @@ -609,7 +600,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, /* the mm_has_pgste() check is done in set_pte_at() */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO); pgste_set_key(ptep, pgste, entry, mm); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); @@ -622,7 +613,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; + pgste = set_pgste_bit(pgste, PGSTE_IN_BIT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -667,7 +658,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } - pgste_val(pgste) |= bit; + pgste = set_pgste_bit(pgste, bit); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); return 0; @@ -687,7 +678,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, if (!(pte_val(spte) & _PAGE_INVALID) && !((pte_val(spte) & _PAGE_PROTECT) && !(pte_val(pte) & _PAGE_PROTECT))) { - pgste_val(spgste) |= PGSTE_VSIE_BIT; + spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT); tpgste = pgste_get_lock(tptep); tpte = __pte((pte_val(spte) & PAGE_MASK) | (pte_val(pte) & _PAGE_PROTECT)); @@ -745,7 +736,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -758,8 +749,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* Clear storage key ACC and F, but set R/C */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT); ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); @@ -780,13 +771,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT); pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); ptep_ipte_global(mm, addr, ptep, nodat); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE)) pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); else pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); @@ -842,11 +833,11 @@ again: if (!ptep) goto again; new = old = pgste_get_lock(ptep); - pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | - PGSTE_ACC_BITS | PGSTE_FP_BIT); + new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); keyul = (unsigned long) key; - pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48); + new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); if (!(pte_val(*ptep) & _PAGE_INVALID)) { unsigned long bits, skey; @@ -857,12 +848,12 @@ again: /* Set storage key ACC and FP */ page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ - pgste_val(new) |= bits << 52; + new = set_pgste_bit(new, bits << 52); } /* changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -950,19 +941,19 @@ again: goto again; new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ - pgste_val(new) &= ~PGSTE_GR_BIT; + new = clear_pgste_bit(new, PGSTE_GR_BIT); if (!(pte_val(*ptep) & _PAGE_INVALID)) { paddr = pte_val(*ptep) & PAGE_MASK; cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ - pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; + new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT); } /* Reflect guest's logical view, not physical */ cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; /* Changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -1126,7 +1117,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, if (res) pgstev |= _PGSTE_GPS_ZERO; - pgste_val(pgste) = pgstev; + pgste = __pgste(pgstev); pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); return res; @@ -1159,8 +1150,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva, return -EFAULT; new = pgste_get_lock(ptep); - pgste_val(new) &= ~bits; - pgste_val(new) |= value & bits; + new = clear_pgste_bit(new, bits); + new = set_pgste_bit(new, value & bits); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8ead999e340b..448dd6ed1069 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,6 +4,7 @@ */ #include <linux/memory_hotplug.h> +#include <linux/cpufeature.h> #include <linux/memblock.h> #include <linux/pfn.h> #include <linux/mm.h> @@ -249,12 +250,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && direct && + cpu_has_edat1() && direct && !debug_pagealloc_enabled()) { set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; continue; - } else if (!direct && MACHINE_HAS_EDAT1) { + } else if (!direct && cpu_has_edat1()) { void *new_page; /* @@ -335,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && direct && + cpu_has_edat2() && direct && !debug_pagealloc_enabled()) { set_pud(pud, __pud(__pa(addr) | prot)); pages++; @@ -659,7 +660,7 @@ void __init vmem_map_init(void) * prefix page is used to return to the previous context with * an LPSWE instruction and therefore must be executable. */ - if (!static_key_enabled(&cpu_has_bear)) + if (!cpu_has_bear()) set_memory_x(0, 1); if (debug_pagealloc_enabled()) __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size); |