diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 23 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 48 | ||||
-rw-r--r-- | arch/s390/mm/gup.c | 35 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 33 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 39 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 90 | ||||
-rw-r--r-- | arch/s390/mm/page-states.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 41 | ||||
-rw-r--r-- | arch/s390/mm/pgalloc.c | 61 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 159 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 45 |
12 files changed, 402 insertions, 177 deletions
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 1b553d847140..049c3c455b32 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, } static void walk_pud_level(struct seq_file *m, struct pg_state *st, - pgd_t *pgd, unsigned long addr) + p4d_t *p4d, unsigned long addr) { unsigned int prot; pud_t *pud; @@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { st->current_address = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { prot = pud_val(*pud) & @@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, } } +static void walk_p4d_level(struct seq_file *m, struct pg_state *st, + pgd_t *pgd, unsigned long addr) +{ + p4d_t *p4d; + int i; + + for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) { + st->current_address = addr; + p4d = p4d_offset(pgd, addr); + if (!p4d_none(*p4d)) + walk_pud_level(m, st, p4d, addr); + else + note_page(m, st, _PAGE_INVALID, 2); + addr += P4D_SIZE; + } +} + static void walk_pgd_level(struct seq_file *m) { unsigned long addr = 0; @@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m) st.current_address = addr; pgd = pgd_offset_k(addr); if (!pgd_none(*pgd)) - walk_pud_level(m, &st, pgd, addr); + walk_p4d_level(m, &st, pgd, addr); else note_page(m, &st, _PAGE_INVALID, 1); addr += PGDIR_SIZE; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 5845d3028ffc..14f25798b001 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -130,7 +130,7 @@ static int bad_address(void *p) static void dump_pagetable(unsigned long asce, unsigned long address) { - unsigned long *table = __va(asce & PAGE_MASK); + unsigned long *table = __va(asce & _ASCE_ORIGIN); pr_alert("AS:%016lx ", asce); switch (asce & _ASCE_TYPE_MASK) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a07b1ec1391d..4fb3d3cdb370 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -125,7 +125,7 @@ static void gmap_radix_tree_free(struct radix_tree_root *root) struct radix_tree_iter iter; unsigned long indices[16]; unsigned long index; - void **slot; + void __rcu **slot; int i, nr; /* A radix tree is freed by deleting all of its entries */ @@ -150,7 +150,7 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) struct radix_tree_iter iter; unsigned long indices[16]; unsigned long index; - void **slot; + void __rcu **slot; int i, nr; /* A radix tree is freed by deleting all of its entries */ @@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) + from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; @@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) unsigned long *table; spinlock_t *ptl; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; int rc; @@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) mm = gmap->mm; pgd = pgd_offset(mm, vmaddr); VM_BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, vmaddr); + p4d = p4d_offset(pgd, vmaddr); + VM_BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, vmaddr); VM_BUG_ON(pud_none(*pud)); /* large puds cannot yet be handled */ if (pud_large(*pud)) @@ -1008,7 +1011,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table); static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, struct gmap_rmap *rmap) { - void **slot; + void __rcu **slot; BUG_ON(!gmap_is_shadow(sg)); slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); @@ -2004,20 +2007,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page); * Called with sg->parent->shadow_lock. */ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, - unsigned long offset, pte_t *pte) + unsigned long gaddr, pte_t *pte) { struct gmap_rmap *rmap, *rnext, *head; - unsigned long gaddr, start, end, bits, raddr; - unsigned long *table; + unsigned long start, end, bits, raddr; BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->parent->guest_table_lock); - table = radix_tree_lookup(&sg->parent->host_to_guest, - vmaddr >> PMD_SHIFT); - gaddr = table ? __gmap_segment_gaddr(table) + offset : 0; - spin_unlock(&sg->parent->guest_table_lock); - if (!table) - return; spin_lock(&sg->guest_table_lock); if (sg->removed) { @@ -2076,7 +2071,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { - unsigned long offset, gaddr; + unsigned long offset, gaddr = 0; unsigned long *table; struct gmap *gmap, *sg, *next; @@ -2084,22 +2079,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, offset = offset * (4096 / sizeof(pte_t)); rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { - if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { - spin_lock(&gmap->shadow_lock); - list_for_each_entry_safe(sg, next, - &gmap->children, list) - gmap_shadow_notify(sg, vmaddr, offset, pte); - spin_unlock(&gmap->shadow_lock); - } - if (!(bits & PGSTE_IN_BIT)) - continue; spin_lock(&gmap->guest_table_lock); table = radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); if (table) gaddr = __gmap_segment_gaddr(table) + offset; spin_unlock(&gmap->guest_table_lock); - if (table) + if (!table) + continue; + + if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { + spin_lock(&gmap->shadow_lock); + list_for_each_entry_safe(sg, next, + &gmap->children, list) + gmap_shadow_notify(sg, vmaddr, gaddr, pte); + spin_unlock(&gmap->shadow_lock); + } + if (bits & PGSTE_IN_BIT) gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); } rcu_read_unlock(); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 18d4107e10ee..8ecc25e760fa 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, return 1; } -static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, +static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp, pud; - pudp = (pud_t *) pgdp; - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pudp = (pud_t *) pgd_deref(pgd); + pudp = (pud_t *) p4dp; + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) p4d_deref(p4d); pudp += pud_index(addr); do { pud = *pudp; @@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, return 1; } +static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + p4d_t *p4dp, p4d; + + p4dp = (p4d_t *) pgdp; + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) + p4dp = (p4d_t *) pgd_deref(pgd); + p4dp += p4d_index(addr); + do { + p4d = *p4dp; + barrier(); + next = p4d_addr_end(addr, end); + if (p4d_none(p4d)) + return 0; + if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr)) + return 0; + } while (p4dp++, addr = next, addr != end); + + return 1; +} + /* * Like get_user_pages_fast() except its IRQ-safe in that it won't fall * back to the regular GUP. @@ -211,7 +234,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if ((end <= start) || (end > TASK_SIZE)) + if ((end <= start) || (end > mm->context.asce_limit)) return 0; /* * local_irq_save() doesn't prevent pagetable teardown, but does @@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) break; - if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 9b4050caa4e9..44a8e6f0391e 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -162,33 +162,42 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); - pudp = pud_alloc(mm, pgdp, addr); - if (pudp) { - if (sz == PUD_SIZE) - return (pte_t *) pudp; - else if (sz == PMD_SIZE) - pmdp = pmd_alloc(mm, pudp, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (p4dp) { + pudp = pud_alloc(mm, p4dp, addr); + if (pudp) { + if (sz == PUD_SIZE) + return (pte_t *) pudp; + else if (sz == PMD_SIZE) + pmdp = pmd_alloc(mm, pudp, addr); + } } return (pte_t *) pmdp; } -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_offset(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); if (pgd_present(*pgdp)) { - pudp = pud_offset(pgdp, addr); - if (pud_present(*pudp)) { - if (pud_large(*pudp)) - return (pte_t *) pudp; - pmdp = pmd_offset(pudp, addr); + p4dp = p4d_offset(pgdp, addr); + if (p4d_present(*p4dp)) { + pudp = pud_offset(p4dp, addr); + if (pud_present(*pudp)) { + if (pud_large(*pudp)) + return (pte_t *) pudp; + pmdp = pmd_offset(pudp, addr); + } } } return (pte_t *) pmdp; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ee5066718b21..8111694ce55a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -39,6 +39,7 @@ #include <asm/sections.h> #include <asm/ctl_reg.h> #include <asm/sclp.h> +#include <asm/set_memory.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); @@ -80,6 +81,7 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long pgd_type, asce_bits; + psw_t psw; init_mm.pgd = swapper_pg_dir; if (VMALLOC_END > (1UL << 42)) { @@ -99,7 +101,10 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); - __arch_local_irq_stosm(0x04); + psw.mask = __extract_psw(); + psw_bits(psw).dat = 1; + psw_bits(psw).as = PSW_BITS_AS_HOME; + __load_psw_mask(psw.mask); sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); @@ -161,43 +166,17 @@ unsigned long memory_block_size_bytes(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) +int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) { - unsigned long zone_start_pfn, zone_end_pfn, nr_pages; unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); - pg_data_t *pgdat = NODE_DATA(nid); - struct zone *zone; - int rc, i; + int rc; rc = vmem_add_mapping(start, size); if (rc) return rc; - for (i = 0; i < MAX_NR_ZONES; i++) { - zone = pgdat->node_zones + i; - if (zone_idx(zone) != ZONE_MOVABLE) { - /* Add range within existing zone limits, if possible */ - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone->zone_start_pfn + - zone->spanned_pages; - } else { - /* Add remaining range to ZONE_MOVABLE */ - zone_start_pfn = start_pfn; - zone_end_pfn = start_pfn + size_pages; - } - if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) - continue; - nr_pages = (start_pfn + size_pages > zone_end_pfn) ? - zone_end_pfn - start_pfn : size_pages; - rc = __add_pages(nid, zone, start_pfn, nr_pages); - if (rc) - break; - start_pfn += nr_pages; - size_pages -= nr_pages; - if (!size_pages) - break; - } + rc = __add_pages(nid, start_pfn, size_pages, want_memblock); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 50618614881f..2e10d2b8ad35 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; + int rc; if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) - return addr; + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; } info.flags = 0; @@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, else info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; - return vm_unmapped_area(&info); + addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + + return addr; } unsigned long @@ -125,21 +137,22 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; + int rc; /* requested length too big for entire address space */ if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) - return addr; + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; @@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = TASK_SIZE; addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; } - return addr; -} - -int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) -{ - if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE) - return 0; - if (!(flags & MAP_FIXED)) - addr = 0; - if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm); - return 0; -} - -static unsigned long -s390_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm); +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { + rc = crst_table_upgrade(mm, addr + len); if (rc) return (unsigned long) rc; - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); } - return area; -} -static unsigned long -s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - - area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm); - if (rc) - return (unsigned long) rc; - area = arch_get_unmapped_area_topdown(filp, addr, len, - pgoff, flags); - } - return area; + return addr; } + /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: @@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = mmap_base_legacy(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area; + mm->get_unmapped_area = arch_get_unmapped_area; } else { mm->mmap_base = mmap_base(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area_topdown; + mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 3330ea124eec..69a7b01ae746 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -13,8 +13,7 @@ #include <linux/gfp.h> #include <linux/init.h> -#define ESSA_SET_STABLE 1 -#define ESSA_SET_UNUSED 2 +#include <asm/page-states.h> static int cmma_flag = 1; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index fc5dc33bb141..180481589246 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -8,6 +8,7 @@ #include <asm/facility.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/set_memory.h> static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { @@ -94,7 +95,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, new = pte_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pte_mkwrite(pte_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pte_val(new) |= _PAGE_NOEXEC; else if (flags & SET_MEMORY_X) pte_val(new) &= ~_PAGE_NOEXEC; @@ -144,7 +145,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, new = pmd_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pmd_mkwrite(pmd_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; @@ -221,21 +222,21 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, new = pud_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pud_mkwrite(pud_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pud_val(new) |= _REGION_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pud_val(new) &= ~_REGION_ENTRY_NOEXEC; pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } -static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, +static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long flags) { unsigned long next; pud_t *pudp; int rc = 0; - pudp = pud_offset(pgd, addr); + pudp = pud_offset(p4d, addr); do { if (pud_none(*pudp)) return -EINVAL; @@ -258,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, return rc; } +static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long flags) +{ + unsigned long next; + p4d_t *p4dp; + int rc = 0; + + p4dp = p4d_offset(pgd, addr); + do { + if (p4d_none(*p4dp)) + return -EINVAL; + next = p4d_addr_end(addr, end); + rc = walk_pud_level(p4dp, addr, next, flags); + p4dp++; + addr = next; + cond_resched(); + } while (addr < end && !rc); + return rc; +} + static DEFINE_MUTEX(cpa_mutex); static int change_page_attr(unsigned long addr, unsigned long end, @@ -277,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end, if (pgd_none(*pgdp)) break; next = pgd_addr_end(addr, end); - rc = walk_pud_level(pgdp, addr, next, flags); + rc = walk_p4d_level(pgdp, addr, next, flags); if (rc) break; cond_resched(); @@ -288,6 +309,10 @@ static int change_page_attr(unsigned long addr, unsigned long end, int __set_memory(unsigned long addr, int numpages, unsigned long flags) { + if (!MACHINE_HAS_NX) + flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); + if (!flags) + return 0; addr &= PAGE_MASK; return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); } @@ -314,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) unsigned long address; int nr, i, j; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -321,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) for (i = 0; i < numpages;) { address = page_to_phys(page + i); pgd = pgd_offset_k(address); - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + pud = pud_offset(p4d, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); nr = (unsigned long)pte >> ilog2(sizeof(long)); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 995f78532cc2..18918e394ce4 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -76,30 +76,46 @@ static void __crst_table_upgrade(void *arg) __tlb_flush_local(); } -int crst_table_upgrade(struct mm_struct *mm) +int crst_table_upgrade(struct mm_struct *mm, unsigned long end) { unsigned long *table, *pgd; + int rc, notify; - /* upgrade should only happen from 3 to 4 levels */ - BUG_ON(mm->context.asce_limit != (1UL << 42)); - - table = crst_table_alloc(mm); - if (!table) + /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ + BUG_ON(mm->context.asce_limit < (1UL << 42)); + if (end >= TASK_SIZE_MAX) return -ENOMEM; - - spin_lock_bh(&mm->page_table_lock); - pgd = (unsigned long *) mm->pgd; - crst_table_init(table, _REGION2_ENTRY_EMPTY); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->context.asce_limit = 1UL << 53; - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION2; - mm->task_size = mm->context.asce_limit; - spin_unlock_bh(&mm->page_table_lock); - - on_each_cpu(__crst_table_upgrade, mm, 0); - return 0; + rc = 0; + notify = 0; + while (mm->context.asce_limit < end) { + table = crst_table_alloc(mm); + if (!table) { + rc = -ENOMEM; + break; + } + spin_lock_bh(&mm->page_table_lock); + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit == (1UL << 42)) { + crst_table_init(table, _REGION2_ENTRY_EMPTY); + p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = 1UL << 53; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + } else { + crst_table_init(table, _REGION1_ENTRY_EMPTY); + pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = -PAGE_SIZE; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION1; + } + notify = 1; + spin_unlock_bh(&mm->page_table_lock); + } + if (notify) + on_each_cpu(__crst_table_upgrade, mm, 0); + return rc; } void crst_table_downgrade(struct mm_struct *mm) @@ -119,7 +135,6 @@ void crst_table_downgrade(struct mm_struct *mm) mm->context.asce_limit = 1UL << 31; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; - mm->task_size = mm->context.asce_limit; crst_table_free(mm, (unsigned long *) pgd); if (current->active_mm == mm) @@ -144,7 +159,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) struct page *page; unsigned long *table; - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (page) { table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); @@ -276,7 +291,7 @@ static void __tlb_remove_table(void *_table) struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); switch (mask) { - case 0: /* pmd or pud */ + case 0: /* pmd, pud, or p4d */ free_pages((unsigned long) table, 2); break; case 1: /* lower 2K of a 4K page table */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 463e5ef02304..d4d409ba206b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -23,6 +23,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/page-states.h> static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -609,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgste_t pgste; @@ -617,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) bool dirty; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return false; + pud = pud_alloc(mm, p4d, addr); if (!pud) return false; pmd = pmd_alloc(mm, pud, addr); @@ -787,4 +792,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, return 0; } EXPORT_SYMBOL(get_guest_storage_key); + +/** + * pgste_perform_essa - perform ESSA actions on the PGSTE. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @orc: the specific action to perform, see the ESSA_SET_* macros. + * @oldpte: the PTE will be saved there if the pointer is not NULL. + * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. + * + * Return: 1 if the page is to be added to the CBRL, otherwise 0, + * or < 0 in case of error. -EINVAL is returned for invalid values + * of orc, -EFAULT for invalid addresses. + */ +int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, + unsigned long *oldpte, unsigned long *oldpgste) +{ + unsigned long pgstev; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + int res = 0; + + WARN_ON_ONCE(orc > ESSA_MAX); + if (unlikely(orc > ESSA_MAX)) + return -EINVAL; + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + if (oldpte) + *oldpte = pte_val(*ptep); + if (oldpgste) + *oldpgste = pgstev; + + switch (orc) { + case ESSA_GET_STATE: + break; + case ESSA_SET_STABLE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + break; + case ESSA_SET_UNUSED: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_UNUSED; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_POT_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; + break; + } + if (pgstev & _PGSTE_GPS_ZERO) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + break; + } + if (!(pgstev & PGSTE_GC_BIT)) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + res = 1; + break; + } + break; + case ESSA_SET_STABLE_RESIDENT: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + /* + * Since the resident state can go away any time after this + * call, we will not make this page resident. We can revisit + * this decision if a guest will ever start using this. + */ + break; + case ESSA_SET_STABLE_IF_RESIDENT: + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + } + break; + default: + /* we should never get here! */ + break; + } + /* If we are discarding a page, set it to logical zero */ + if (res) + pgstev |= _PGSTE_GPS_ZERO; + + pgste_val(pgste) = pgstev; + pgste_set_unlock(ptep, pgste); + pte_unmap_unlock(ptep, ptl); + return res; +} +EXPORT_SYMBOL(pgste_perform_essa); + +/** + * set_pgste_bits - set specific PGSTE bits. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @bits: a bitmask representing the bits that will be touched + * @value: the values of the bits to be written. Only the bits in the mask + * will be written. + * + * Return: 0 on success, < 0 in case of error. + */ +int set_pgste_bits(struct mm_struct *mm, unsigned long hva, + unsigned long bits, unsigned long value) +{ + spinlock_t *ptl; + pgste_t new; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + new = pgste_get_lock(ptep); + + pgste_val(new) &= ~bits; + pgste_val(new) |= value & bits; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(set_pgste_bits); + +/** + * get_pgste - get the current PGSTE for the given address. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @pgstep: will be written with the current PGSTE for the given address. + * + * Return: 0 on success, < 0 in case of error. + */ +int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) +{ + spinlock_t *ptl; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + *pgstep = pgste_val(pgste_get(ptep)); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(get_pgste); #endif diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 60d38993f232..d8398962a723 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -17,6 +17,7 @@ #include <asm/setup.h> #include <asm/tlbflush.h> #include <asm/sections.h> +#include <asm/set_memory.h> static DEFINE_MUTEX(vmem_mutex); @@ -37,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_alloc(size, size); } +static inline p4d_t *vmem_p4d_alloc(void) +{ + p4d_t *p4d = NULL; + + p4d = vmem_alloc_pages(2); + if (!p4d) + return NULL; + clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); + return p4d; +} + static inline pud_t *vmem_pud_alloc(void) { pud_t *pud = NULL; @@ -84,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -101,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size) while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + p4_dir = vmem_p4d_alloc(); + if (!p4_dir) + goto out; + pgd_populate(&init_mm, pg_dir, p4_dir); + } + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate(&init_mm, pg_dir, pu_dir); + p4d_populate(&init_mm, p4_dir, pu_dir); } - pu_dir = pud_offset(pg_dir, address); + pu_dir = pud_offset(p4_dir, address); if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && !debug_pagealloc_enabled()) { @@ -160,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -171,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size) address += PGDIR_SIZE; continue; } - pu_dir = pud_offset(pg_dir, address); + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { + address += P4D_SIZE; + continue; + } + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { address += PUD_SIZE; continue; @@ -212,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) unsigned long pgt_prot, sgt_prot; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -226,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + p4_dir = vmem_p4d_alloc(); + if (!p4_dir) + goto out; + pgd_populate(&init_mm, pg_dir, p4_dir); + } + + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate(&init_mm, pg_dir, pu_dir); + p4d_populate(&init_mm, p4_dir, pu_dir); } - pu_dir = pud_offset(pg_dir, address); + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) |