diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/mm/extable.c | 81 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/s390/mm/gup.c | 48 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 114 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 2 |
7 files changed, 247 insertions, 12 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index d98fe9004a52..0f5536b0c1a1 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,7 +3,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ - page-states.o gup.o + page-states.o gup.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 000000000000..4d1ee88864e8 --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,81 @@ +#include <linux/module.h> +#include <linux/sort.h> +#include <asm/uaccess.h> + +/* + * Search one exception table for an entry corresponding to the + * given instruction address, and return the address of the entry, + * or NULL if none is found. + * We use a binary search, and thus we assume that the table is + * already sorted. + */ +const struct exception_table_entry * +search_extable(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + const struct exception_table_entry *mid; + unsigned long addr; + + while (first <= last) { + mid = ((last - first) >> 1) + first; + addr = extable_insn(mid); + if (addr < value) + first = mid + 1; + else if (addr > value) + last = mid - 1; + else + return mid; + } + return NULL; +} + +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + * + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* This compare is only valid after normalization. */ + return x->insn - y->insn; +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + struct exception_table_entry *p; + int i; + + /* Normalize entries to being relative to the start of the section */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn += i; + sort(start, finish - start, sizeof(*start), cmp_ex, NULL); + /* Denormalize all entries */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn -= i; +} + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /* Trim the beginning */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /* Trim the end */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6c013f544146..04ad4001a289 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -111,7 +111,7 @@ static inline int user_space_fault(unsigned long trans_exc_code) if (trans_exc_code == 2) /* Access via secondary space, set_fs setting decides */ return current->thread.mm_segment.ar4; - if (addressing_mode == HOME_SPACE_MODE) + if (s390_user_mode == HOME_SPACE_MODE) /* User space if the access has been done via home space. */ return trans_exc_code == 3; /* @@ -163,7 +163,7 @@ static noinline void do_no_context(struct pt_regs *regs) /* Are we prepared to handle this kernel fault? */ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) { - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; return; } @@ -367,6 +367,7 @@ retry: /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; } @@ -628,9 +629,8 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self, struct thread_struct *thread, *next; struct task_struct *tsk; - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_DEAD: - case CPU_DEAD_FROZEN: spin_lock_irq(&pfault_lock); list_for_each_entry_safe(thread, next, &pfault_list, list) { thread->pfault_wait = 0; diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 65cb06e2af4e..60acb93a4680 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -115,7 +115,16 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, pmd = *pmdp; barrier(); next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + /* + * The pmd_trans_splitting() check below explains why + * pmdp_splitting_flush() has to serialize with + * smp_call_function() against our disabled IRQs, to stop + * this gup-fast code from running while we set the + * splitting bit in the pmd. Returning zero will take + * the slow path that will call wait_split_huge_page() + * if the pmd is still in splitting state. + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; if (unlikely(pmd_huge(pmd))) { if (!gup_huge_pmd(pmdp, pmd, addr, next, @@ -154,6 +163,43 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, return 1; } +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next, flags; + pgd_t *pgdp, pgd; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd = *pgdp; + barrier(); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6adbc082618a..81e596c65dee 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -42,7 +42,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); -static unsigned long setup_zero_pages(void) +static unsigned long __init setup_zero_pages(void) { struct cpuid cpu_id; unsigned int order; @@ -212,7 +212,7 @@ void free_initmem(void) } #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end) { free_init_pages("initrd memory", start, end); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 18df31d1f2c9..c8188a18af05 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -609,8 +609,8 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) */ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) { - struct page *page; - unsigned long *table; + unsigned long *uninitialized_var(table); + struct page *uninitialized_var(page); unsigned int mask, bit; if (mm_has_pgste(mm)) @@ -787,6 +787,30 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) tlb_table_flush(tlb); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void thp_split_vma(struct vm_area_struct *vma) +{ + unsigned long addr; + struct page *page; + + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { + page = follow_page(vma, addr, FOLL_SPLIT); + } +} + +void thp_split_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vma = mm->mmap; + + while (vma != NULL) { + thp_split_vma(vma); + vma->vm_flags &= ~VM_HUGEPAGE; + vma->vm_flags |= VM_NOHUGEPAGE; + vma = vma->vm_next; + } +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * switch on pgstes for its userspace process (for kvm) */ @@ -796,7 +820,7 @@ int s390_enable_sie(void) struct mm_struct *mm, *old_mm; /* Do we have switched amode? If no, we cannot do sie */ - if (addressing_mode == HOME_SPACE_MODE) + if (s390_user_mode == HOME_SPACE_MODE) return -EINVAL; /* Do we have pgstes? if yes, we are done */ @@ -824,6 +848,12 @@ int s390_enable_sie(void) if (!mm) return -ENOMEM; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* split thp mappings and disable thp for future mappings */ + thp_split_mm(mm); + mm->def_flags |= VM_NOHUGEPAGE; +#endif + /* Now lets check again if something happened */ task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || @@ -866,3 +896,81 @@ bool kernel_page_present(struct page *page) return cc == 0; } #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + /* No need to flush TLB + * On s390 reference bits are in storage key and never in TLB */ + return pmdp_test_and_clear_young(vma, address, pmdp); +} + +int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + if (pmd_same(*pmdp, entry)) + return 0; + pmdp_invalidate(vma, address, pmdp); + set_pmd_at(vma->vm_mm, address, pmdp, entry); + return 1; +} + +static void pmdp_splitting_flush_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, + (unsigned long *) pmdp)) { + /* need to serialize against gup-fast (IRQ disabled) */ + smp_call_function(pmdp_splitting_flush_sync, NULL, 1); + } +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!mm->pmd_huge_pte) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) mm->pmd_huge_pte); + mm->pmd_huge_pte = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +{ + struct list_head *lh; + pgtable_t pgtable; + pte_t *ptep; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + pgtable = mm->pmd_huge_pte; + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + mm->pmd_huge_pte = NULL; + else { + mm->pmd_huge_pte = (pgtable_t) lh->next; + list_del(lh); + } + ptep = (pte_t *) pgtable; + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + ptep++; + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + return pgtable; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 6f896e75ab49..c22abf900c9e 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -107,7 +107,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); pm_dir = pmd_offset(pu_dir, address); -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && (address + HPAGE_SIZE <= start + size) && (address >= HPAGE_SIZE)) { |