summaryrefslogtreecommitdiff
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/dump_pagetables.c23
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/mm/gmap.c48
-rw-r--r--arch/s390/mm/gup.c35
-rw-r--r--arch/s390/mm/hugetlbpage.c33
-rw-r--r--arch/s390/mm/init.c39
-rw-r--r--arch/s390/mm/mmap.c90
-rw-r--r--arch/s390/mm/page-states.c3
-rw-r--r--arch/s390/mm/pageattr.c41
-rw-r--r--arch/s390/mm/pgalloc.c61
-rw-r--r--arch/s390/mm/pgtable.c159
-rw-r--r--arch/s390/mm/vmem.c45
12 files changed, 402 insertions, 177 deletions
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 1b553d847140..049c3c455b32 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
}
static void walk_pud_level(struct seq_file *m, struct pg_state *st,
- pgd_t *pgd, unsigned long addr)
+ p4d_t *p4d, unsigned long addr)
{
unsigned int prot;
pud_t *pud;
@@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
st->current_address = addr;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
if (!pud_none(*pud))
if (pud_large(*pud)) {
prot = pud_val(*pud) &
@@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
}
}
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
+ pgd_t *pgd, unsigned long addr)
+{
+ p4d_t *p4d;
+ int i;
+
+ for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
+ st->current_address = addr;
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d))
+ walk_pud_level(m, st, p4d, addr);
+ else
+ note_page(m, st, _PAGE_INVALID, 2);
+ addr += P4D_SIZE;
+ }
+}
+
static void walk_pgd_level(struct seq_file *m)
{
unsigned long addr = 0;
@@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m)
st.current_address = addr;
pgd = pgd_offset_k(addr);
if (!pgd_none(*pgd))
- walk_pud_level(m, &st, pgd, addr);
+ walk_p4d_level(m, &st, pgd, addr);
else
note_page(m, &st, _PAGE_INVALID, 1);
addr += PGDIR_SIZE;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 5845d3028ffc..14f25798b001 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -130,7 +130,7 @@ static int bad_address(void *p)
static void dump_pagetable(unsigned long asce, unsigned long address)
{
- unsigned long *table = __va(asce & PAGE_MASK);
+ unsigned long *table = __va(asce & _ASCE_ORIGIN);
pr_alert("AS:%016lx ", asce);
switch (asce & _ASCE_TYPE_MASK) {
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a07b1ec1391d..4fb3d3cdb370 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -125,7 +125,7 @@ static void gmap_radix_tree_free(struct radix_tree_root *root)
struct radix_tree_iter iter;
unsigned long indices[16];
unsigned long index;
- void **slot;
+ void __rcu **slot;
int i, nr;
/* A radix tree is freed by deleting all of its entries */
@@ -150,7 +150,7 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
struct radix_tree_iter iter;
unsigned long indices[16];
unsigned long index;
- void **slot;
+ void __rcu **slot;
int i, nr;
/* A radix tree is freed by deleting all of its entries */
@@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
if (len == 0 || from + len < from || to + len < to ||
- from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
+ from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
return -EINVAL;
flush = 0;
@@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
unsigned long *table;
spinlock_t *ptl;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
int rc;
@@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
VM_BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, vmaddr);
+ p4d = p4d_offset(pgd, vmaddr);
+ VM_BUG_ON(p4d_none(*p4d));
+ pud = pud_offset(p4d, vmaddr);
VM_BUG_ON(pud_none(*pud));
/* large puds cannot yet be handled */
if (pud_large(*pud))
@@ -1008,7 +1011,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table);
static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
struct gmap_rmap *rmap)
{
- void **slot;
+ void __rcu **slot;
BUG_ON(!gmap_is_shadow(sg));
slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
@@ -2004,20 +2007,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
* Called with sg->parent->shadow_lock.
*/
static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
- unsigned long offset, pte_t *pte)
+ unsigned long gaddr, pte_t *pte)
{
struct gmap_rmap *rmap, *rnext, *head;
- unsigned long gaddr, start, end, bits, raddr;
- unsigned long *table;
+ unsigned long start, end, bits, raddr;
BUG_ON(!gmap_is_shadow(sg));
- spin_lock(&sg->parent->guest_table_lock);
- table = radix_tree_lookup(&sg->parent->host_to_guest,
- vmaddr >> PMD_SHIFT);
- gaddr = table ? __gmap_segment_gaddr(table) + offset : 0;
- spin_unlock(&sg->parent->guest_table_lock);
- if (!table)
- return;
spin_lock(&sg->guest_table_lock);
if (sg->removed) {
@@ -2076,7 +2071,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
pte_t *pte, unsigned long bits)
{
- unsigned long offset, gaddr;
+ unsigned long offset, gaddr = 0;
unsigned long *table;
struct gmap *gmap, *sg, *next;
@@ -2084,22 +2079,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
offset = offset * (4096 / sizeof(pte_t));
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
- if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
- spin_lock(&gmap->shadow_lock);
- list_for_each_entry_safe(sg, next,
- &gmap->children, list)
- gmap_shadow_notify(sg, vmaddr, offset, pte);
- spin_unlock(&gmap->shadow_lock);
- }
- if (!(bits & PGSTE_IN_BIT))
- continue;
spin_lock(&gmap->guest_table_lock);
table = radix_tree_lookup(&gmap->host_to_guest,
vmaddr >> PMD_SHIFT);
if (table)
gaddr = __gmap_segment_gaddr(table) + offset;
spin_unlock(&gmap->guest_table_lock);
- if (table)
+ if (!table)
+ continue;
+
+ if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
+ spin_lock(&gmap->shadow_lock);
+ list_for_each_entry_safe(sg, next,
+ &gmap->children, list)
+ gmap_shadow_notify(sg, vmaddr, gaddr, pte);
+ spin_unlock(&gmap->shadow_lock);
+ }
+ if (bits & PGSTE_IN_BIT)
gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
}
rcu_read_unlock();
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 18d4107e10ee..8ecc25e760fa 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
return 1;
}
-static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp, pud;
- pudp = (pud_t *) pgdp;
- if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pudp = (pud_t *) pgd_deref(pgd);
+ pudp = (pud_t *) p4dp;
+ if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pudp = (pud_t *) p4d_deref(p4d);
pudp += pud_index(addr);
do {
pud = *pudp;
@@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
return 1;
}
+static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ p4d_t *p4dp, p4d;
+
+ p4dp = (p4d_t *) pgdp;
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+ p4dp = (p4d_t *) pgd_deref(pgd);
+ p4dp += p4d_index(addr);
+ do {
+ p4d = *p4dp;
+ barrier();
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(p4d))
+ return 0;
+ if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
+ return 0;
+ } while (p4dp++, addr = next, addr != end);
+
+ return 1;
+}
+
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
@@ -211,7 +234,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
- if ((end <= start) || (end > TASK_SIZE))
+ if ((end <= start) || (end > mm->context.asce_limit))
return 0;
/*
* local_irq_save() doesn't prevent pagetable teardown, but does
@@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
- if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
+ if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 9b4050caa4e9..44a8e6f0391e 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -162,33 +162,42 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp = NULL;
pgdp = pgd_offset(mm, addr);
- pudp = pud_alloc(mm, pgdp, addr);
- if (pudp) {
- if (sz == PUD_SIZE)
- return (pte_t *) pudp;
- else if (sz == PMD_SIZE)
- pmdp = pmd_alloc(mm, pudp, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (p4dp) {
+ pudp = pud_alloc(mm, p4dp, addr);
+ if (pudp) {
+ if (sz == PUD_SIZE)
+ return (pte_t *) pudp;
+ else if (sz == PMD_SIZE)
+ pmdp = pmd_alloc(mm, pudp, addr);
+ }
}
return (pte_t *) pmdp;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp = NULL;
pgdp = pgd_offset(mm, addr);
if (pgd_present(*pgdp)) {
- pudp = pud_offset(pgdp, addr);
- if (pud_present(*pudp)) {
- if (pud_large(*pudp))
- return (pte_t *) pudp;
- pmdp = pmd_offset(pudp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ if (p4d_present(*p4dp)) {
+ pudp = pud_offset(p4dp, addr);
+ if (pud_present(*pudp)) {
+ if (pud_large(*pudp))
+ return (pte_t *) pudp;
+ pmdp = pmd_offset(pudp, addr);
+ }
}
}
return (pte_t *) pmdp;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ee5066718b21..8111694ce55a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -39,6 +39,7 @@
#include <asm/sections.h>
#include <asm/ctl_reg.h>
#include <asm/sclp.h>
+#include <asm/set_memory.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
@@ -80,6 +81,7 @@ void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
unsigned long pgd_type, asce_bits;
+ psw_t psw;
init_mm.pgd = swapper_pg_dir;
if (VMALLOC_END > (1UL << 42)) {
@@ -99,7 +101,10 @@ void __init paging_init(void)
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
- __arch_local_irq_stosm(0x04);
+ psw.mask = __extract_psw();
+ psw_bits(psw).dat = 1;
+ psw_bits(psw).as = PSW_BITS_AS_HOME;
+ __load_psw_mask(psw.mask);
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
@@ -161,43 +166,17 @@ unsigned long memory_block_size_bytes(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
{
- unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
- pg_data_t *pgdat = NODE_DATA(nid);
- struct zone *zone;
- int rc, i;
+ int rc;
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
- for (i = 0; i < MAX_NR_ZONES; i++) {
- zone = pgdat->node_zones + i;
- if (zone_idx(zone) != ZONE_MOVABLE) {
- /* Add range within existing zone limits, if possible */
- zone_start_pfn = zone->zone_start_pfn;
- zone_end_pfn = zone->zone_start_pfn +
- zone->spanned_pages;
- } else {
- /* Add remaining range to ZONE_MOVABLE */
- zone_start_pfn = start_pfn;
- zone_end_pfn = start_pfn + size_pages;
- }
- if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
- continue;
- nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
- zone_end_pfn - start_pfn : size_pages;
- rc = __add_pages(nid, zone, start_pfn, nr_pages);
- if (rc)
- break;
- start_pfn += nr_pages;
- size_pages -= nr_pages;
- if (!size_pages)
- break;
- }
+ rc = __add_pages(nid, start_pfn, size_pages, want_memblock);
if (rc)
vmem_remove_mapping(start, size);
return rc;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 50618614881f..2e10d2b8ad35 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
+ int rc;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
- return addr;
+ goto check_asce_limit;
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
- return addr;
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
}
info.flags = 0;
@@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
else
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
- return vm_unmapped_area(&info);
+ addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+
+ return addr;
}
unsigned long
@@ -125,21 +137,22 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
+ int rc;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
- return addr;
+ goto check_asce_limit;
/* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
- return addr;
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
}
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = TASK_SIZE;
addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
}
- return addr;
-}
-
-int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
-{
- if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
- return 0;
- if (!(flags & MAP_FIXED))
- addr = 0;
- if ((addr + len) >= TASK_SIZE)
- return crst_table_upgrade(current->mm);
- return 0;
-}
-
-static unsigned long
-s390_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- unsigned long area;
- int rc;
-
- area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
- if (!(area & ~PAGE_MASK))
- return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
- /* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm);
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit) {
+ rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
- area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
}
- return area;
-}
-static unsigned long
-s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- unsigned long area;
- int rc;
-
- area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
- if (!(area & ~PAGE_MASK))
- return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
- /* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm);
- if (rc)
- return (unsigned long) rc;
- area = arch_get_unmapped_area_topdown(filp, addr, len,
- pgoff, flags);
- }
- return area;
+ return addr;
}
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
@@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
*/
if (mmap_is_legacy()) {
mm->mmap_base = mmap_base_legacy(random_factor);
- mm->get_unmapped_area = s390_get_unmapped_area;
+ mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
- mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 3330ea124eec..69a7b01ae746 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -13,8 +13,7 @@
#include <linux/gfp.h>
#include <linux/init.h>
-#define ESSA_SET_STABLE 1
-#define ESSA_SET_UNUSED 2
+#include <asm/page-states.h>
static int cmma_flag = 1;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index fc5dc33bb141..180481589246 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -8,6 +8,7 @@
#include <asm/facility.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/set_memory.h>
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
@@ -94,7 +95,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
new = pte_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pte_mkwrite(pte_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pte_val(new) |= _PAGE_NOEXEC;
else if (flags & SET_MEMORY_X)
pte_val(new) &= ~_PAGE_NOEXEC;
@@ -144,7 +145,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
new = pmd_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pmd_mkwrite(pmd_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
else if (flags & SET_MEMORY_X)
pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
@@ -221,21 +222,21 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
new = pud_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pud_mkwrite(pud_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pud_val(new) |= _REGION_ENTRY_NOEXEC;
else if (flags & SET_MEMORY_X)
pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
-static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
pud_t *pudp;
int rc = 0;
- pudp = pud_offset(pgd, addr);
+ pudp = pud_offset(p4d, addr);
do {
if (pud_none(*pudp))
return -EINVAL;
@@ -258,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
return rc;
}
+static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+ unsigned long flags)
+{
+ unsigned long next;
+ p4d_t *p4dp;
+ int rc = 0;
+
+ p4dp = p4d_offset(pgd, addr);
+ do {
+ if (p4d_none(*p4dp))
+ return -EINVAL;
+ next = p4d_addr_end(addr, end);
+ rc = walk_pud_level(p4dp, addr, next, flags);
+ p4dp++;
+ addr = next;
+ cond_resched();
+ } while (addr < end && !rc);
+ return rc;
+}
+
static DEFINE_MUTEX(cpa_mutex);
static int change_page_attr(unsigned long addr, unsigned long end,
@@ -277,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end,
if (pgd_none(*pgdp))
break;
next = pgd_addr_end(addr, end);
- rc = walk_pud_level(pgdp, addr, next, flags);
+ rc = walk_p4d_level(pgdp, addr, next, flags);
if (rc)
break;
cond_resched();
@@ -288,6 +309,10 @@ static int change_page_attr(unsigned long addr, unsigned long end,
int __set_memory(unsigned long addr, int numpages, unsigned long flags)
{
+ if (!MACHINE_HAS_NX)
+ flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
+ if (!flags)
+ return 0;
addr &= PAGE_MASK;
return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
}
@@ -314,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
unsigned long address;
int nr, i, j;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -321,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
for (i = 0; i < numpages;) {
address = page_to_phys(page + i);
pgd = pgd_offset_k(address);
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
nr = (unsigned long)pte >> ilog2(sizeof(long));
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 995f78532cc2..18918e394ce4 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -76,30 +76,46 @@ static void __crst_table_upgrade(void *arg)
__tlb_flush_local();
}
-int crst_table_upgrade(struct mm_struct *mm)
+int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
{
unsigned long *table, *pgd;
+ int rc, notify;
- /* upgrade should only happen from 3 to 4 levels */
- BUG_ON(mm->context.asce_limit != (1UL << 42));
-
- table = crst_table_alloc(mm);
- if (!table)
+ /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
+ BUG_ON(mm->context.asce_limit < (1UL << 42));
+ if (end >= TASK_SIZE_MAX)
return -ENOMEM;
-
- spin_lock_bh(&mm->page_table_lock);
- pgd = (unsigned long *) mm->pgd;
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
- pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = 1UL << 53;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- mm->task_size = mm->context.asce_limit;
- spin_unlock_bh(&mm->page_table_lock);
-
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return 0;
+ rc = 0;
+ notify = 0;
+ while (mm->context.asce_limit < end) {
+ table = crst_table_alloc(mm);
+ if (!table) {
+ rc = -ENOMEM;
+ break;
+ }
+ spin_lock_bh(&mm->page_table_lock);
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit == (1UL << 42)) {
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ } else {
+ crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->context.asce_limit = -PAGE_SIZE;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ }
+ notify = 1;
+ spin_unlock_bh(&mm->page_table_lock);
+ }
+ if (notify)
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+ return rc;
}
void crst_table_downgrade(struct mm_struct *mm)
@@ -119,7 +135,6 @@ void crst_table_downgrade(struct mm_struct *mm)
mm->context.asce_limit = 1UL << 31;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- mm->task_size = mm->context.asce_limit;
crst_table_free(mm, (unsigned long *) pgd);
if (current->active_mm == mm)
@@ -144,7 +159,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
struct page *page;
unsigned long *table;
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ page = alloc_page(GFP_KERNEL);
if (page) {
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -276,7 +291,7 @@ static void __tlb_remove_table(void *_table)
struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
switch (mask) {
- case 0: /* pmd or pud */
+ case 0: /* pmd, pud, or p4d */
free_pages((unsigned long) table, 2);
break;
case 1: /* lower 2K of a 4K page table */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 463e5ef02304..d4d409ba206b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/page-states.h>
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
@@ -609,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgste_t pgste;
@@ -617,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
bool dirty;
pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return false;
+ pud = pud_alloc(mm, p4d, addr);
if (!pud)
return false;
pmd = pmd_alloc(mm, pud, addr);
@@ -787,4 +792,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
return 0;
}
EXPORT_SYMBOL(get_guest_storage_key);
+
+/**
+ * pgste_perform_essa - perform ESSA actions on the PGSTE.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @orc: the specific action to perform, see the ESSA_SET_* macros.
+ * @oldpte: the PTE will be saved there if the pointer is not NULL.
+ * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
+ *
+ * Return: 1 if the page is to be added to the CBRL, otherwise 0,
+ * or < 0 in case of error. -EINVAL is returned for invalid values
+ * of orc, -EFAULT for invalid addresses.
+ */
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ unsigned long *oldpte, unsigned long *oldpgste)
+{
+ unsigned long pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep;
+ int res = 0;
+
+ WARN_ON_ONCE(orc > ESSA_MAX);
+ if (unlikely(orc > ESSA_MAX))
+ return -EINVAL;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
+ if (oldpte)
+ *oldpte = pte_val(*ptep);
+ if (oldpgste)
+ *oldpgste = pgstev;
+
+ switch (orc) {
+ case ESSA_GET_STATE:
+ break;
+ case ESSA_SET_STABLE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ break;
+ case ESSA_SET_UNUSED:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_UNUSED;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_POT_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
+ break;
+ }
+ if (pgstev & _PGSTE_GPS_ZERO) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ break;
+ }
+ if (!(pgstev & PGSTE_GC_BIT)) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ res = 1;
+ break;
+ }
+ break;
+ case ESSA_SET_STABLE_RESIDENT:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ /*
+ * Since the resident state can go away any time after this
+ * call, we will not make this page resident. We can revisit
+ * this decision if a guest will ever start using this.
+ */
+ break;
+ case ESSA_SET_STABLE_IF_RESIDENT:
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ }
+ break;
+ default:
+ /* we should never get here! */
+ break;
+ }
+ /* If we are discarding a page, set it to logical zero */
+ if (res)
+ pgstev |= _PGSTE_GPS_ZERO;
+
+ pgste_val(pgste) = pgstev;
+ pgste_set_unlock(ptep, pgste);
+ pte_unmap_unlock(ptep, ptl);
+ return res;
+}
+EXPORT_SYMBOL(pgste_perform_essa);
+
+/**
+ * set_pgste_bits - set specific PGSTE bits.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @bits: a bitmask representing the bits that will be touched
+ * @value: the values of the bits to be written. Only the bits in the mask
+ * will be written.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
+ unsigned long bits, unsigned long value)
+{
+ spinlock_t *ptl;
+ pgste_t new;
+ pte_t *ptep;
+
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ new = pgste_get_lock(ptep);
+
+ pgste_val(new) &= ~bits;
+ pgste_val(new) |= value & bits;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(set_pgste_bits);
+
+/**
+ * get_pgste - get the current PGSTE for the given address.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @pgstep: will be written with the current PGSTE for the given address.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
+{
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ *pgstep = pgste_val(pgste_get(ptep));
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(get_pgste);
#endif
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 60d38993f232..d8398962a723 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -17,6 +17,7 @@
#include <asm/setup.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/set_memory.h>
static DEFINE_MUTEX(vmem_mutex);
@@ -37,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return (void *) memblock_alloc(size, size);
}
+static inline p4d_t *vmem_p4d_alloc(void)
+{
+ p4d_t *p4d = NULL;
+
+ p4d = vmem_alloc_pages(2);
+ if (!p4d)
+ return NULL;
+ clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
+ return p4d;
+}
+
static inline pud_t *vmem_pud_alloc(void)
{
pud_t *pud = NULL;
@@ -84,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
unsigned long end = start + size;
unsigned long address = start;
pgd_t *pg_dir;
+ p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
@@ -101,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
+ p4_dir = vmem_p4d_alloc();
+ if (!p4_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
- pgd_populate(&init_mm, pg_dir, pu_dir);
+ p4d_populate(&init_mm, p4_dir, pu_dir);
}
- pu_dir = pud_offset(pg_dir, address);
+ pu_dir = pud_offset(p4_dir, address);
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
!debug_pagealloc_enabled()) {
@@ -160,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
unsigned long end = start + size;
unsigned long address = start;
pgd_t *pg_dir;
+ p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
@@ -171,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
address += PGDIR_SIZE;
continue;
}
- pu_dir = pud_offset(pg_dir, address);
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
+ address += P4D_SIZE;
+ continue;
+ }
+ pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
address += PUD_SIZE;
continue;
@@ -212,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
unsigned long pgt_prot, sgt_prot;
unsigned long address = start;
pgd_t *pg_dir;
+ p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
@@ -226,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
+ p4_dir = vmem_p4d_alloc();
+ if (!p4_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
- pgd_populate(&init_mm, pg_dir, pu_dir);
+ p4d_populate(&init_mm, p4_dir, pu_dir);
}
- pu_dir = pud_offset(pg_dir, address);
+ pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
pm_dir = vmem_pmd_alloc();
if (!pm_dir)