diff options
Diffstat (limited to 'arch/s390/mm/hugetlbpage.c')
| -rw-r--r-- | arch/s390/mm/hugetlbpage.c | 289 |
1 files changed, 207 insertions, 82 deletions
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 121089d57802..d42e61c7594e 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -1,132 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007,2020 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ +#define pr_fmt(fmt) "hugetlb: " fmt + +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/hugetlb.h> +#include <linux/mman.h> +#include <linux/sched/mm.h> +#include <linux/security.h> +#include <asm/pgalloc.h> +/* + * If the bit selected by single-bit bitmask "a" is set within "x", move + * it to the position indicated by single-bit bitmask "b". + */ +#define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *pteptr, pte_t pteval) +static inline unsigned long __pte_to_rste(pte_t pte) { - pmd_t *pmdp = (pmd_t *) pteptr; - unsigned long mask; - - if (!MACHINE_HAS_HPAGE) { - pteptr = (pte_t *) pte_page(pteval)[1].index; - mask = pte_val(pteval) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - } + swp_entry_t arch_entry; + unsigned long rste; - pmd_val(*pmdp) = pte_val(pteval); + /* + * Convert encoding pte bits pmd / pud bits + * lIR.uswrdy.p dy..R...I...wr + * empty 010.000000.0 -> 00..0...1...00 + * prot-none, clean, old 111.000000.1 -> 00..1...1...00 + * prot-none, clean, young 111.000001.1 -> 01..1...1...00 + * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 + * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 + * read-only, clean, old 111.000100.1 -> 00..1...1...01 + * read-only, clean, young 101.000101.1 -> 01..1...0...01 + * read-only, dirty, old 111.000110.1 -> 10..1...1...01 + * read-only, dirty, young 101.000111.1 -> 11..1...0...01 + * read-write, clean, old 111.001100.1 -> 00..1...1...11 + * read-write, clean, young 101.001101.1 -> 01..1...0...11 + * read-write, dirty, old 110.001110.1 -> 10..0...1...11 + * read-write, dirty, young 100.001111.1 -> 11..0...0...11 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large + */ + if (pte_present(pte)) { + rste = pte_val(pte) & PAGE_MASK; + rste |= _SEGMENT_ENTRY_PRESENT; + rste |= move_set_bit(pte_val(pte), _PAGE_READ, + _SEGMENT_ENTRY_READ); + rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, + _SEGMENT_ENTRY_WRITE); + rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, + _SEGMENT_ENTRY_INVALID); + rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, + _SEGMENT_ENTRY_PROTECT); + rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, + _SEGMENT_ENTRY_DIRTY); + rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, + _SEGMENT_ENTRY_YOUNG); +#ifdef CONFIG_MEM_SOFT_DIRTY + rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, + _SEGMENT_ENTRY_SOFT_DIRTY); +#endif + rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, + _SEGMENT_ENTRY_NOEXEC); + } else if (!pte_none(pte)) { + /* swap pte */ + arch_entry = __pte_to_swp_entry(pte); + rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)); + } else + rste = _SEGMENT_ENTRY_EMPTY; + return rste; } -int arch_prepare_hugepage(struct page *page) +static inline pte_t __rste_to_pte(unsigned long rste) { - unsigned long addr = page_to_phys(page); + swp_entry_t arch_entry; + unsigned long pteval; + int present, none; pte_t pte; - pte_t *ptep; - int i; - if (MACHINE_HAS_HPAGE) - return 0; + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + present = pud_present(__pud(rste)); + none = pud_none(__pud(rste)); + } else { + present = pmd_present(__pmd(rste)); + none = pmd_none(__pmd(rste)); + } - ptep = (pte_t *) pte_alloc_one(&init_mm, addr); - if (!ptep) - return -ENOMEM; + /* + * Convert encoding pmd / pud bits pte bits + * dy..R...I...wr lIR.uswrdy.p + * empty 00..0...1...00 -> 010.000000.0 + * prot-none, clean, old 00..1...1...00 -> 111.000000.1 + * prot-none, clean, young 01..1...1...00 -> 111.000001.1 + * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 + * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 + * read-only, clean, old 00..1...1...01 -> 111.000100.1 + * read-only, clean, young 01..1...0...01 -> 101.000101.1 + * read-only, dirty, old 10..1...1...01 -> 111.000110.1 + * read-only, dirty, young 11..1...0...01 -> 101.000111.1 + * read-write, clean, old 00..1...1...11 -> 111.001100.1 + * read-write, clean, young 01..1...0...11 -> 101.001101.1 + * read-write, dirty, old 10..0...1...11 -> 110.001110.1 + * read-write, dirty, young 11..0...0...11 -> 100.001111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large + */ + if (present) { + pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; + pteval |= _PAGE_LARGE | _PAGE_PRESENT; + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG); +#ifdef CONFIG_MEM_SOFT_DIRTY + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); +#endif + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); + } else if (!none) { + /* swap rste */ + arch_entry = __rste_to_swp_entry(rste); + pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry)); + pteval = pte_val(pte); + } else + pteval = _PAGE_INVALID; + return __pte(pteval); +} + +static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) +{ + struct folio *folio; + unsigned long size, paddr; + + if (!mm_uses_skeys(mm) || + rste & _SEGMENT_ENTRY_INVALID) + return; - pte_val(pte) = addr; - for (i = 0; i < PTRS_PER_PTE; i++) { - set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); - pte_val(pte) += PAGE_SIZE; + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + folio = page_folio(pud_page(__pud(rste))); + size = PUD_SIZE; + paddr = rste & PUD_MASK; + } else { + folio = page_folio(pmd_page(__pmd(rste))); + size = PMD_SIZE; + paddr = rste & PMD_MASK; } - page[1].index = (unsigned long) ptep; - return 0; + + if (!test_and_set_bit(PG_arch_1, &folio->flags.f)) + __storage_key_init_range(paddr, paddr + size); } -void arch_release_hugepage(struct page *page) +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - pte_t *ptep; + unsigned long rste; - if (MACHINE_HAS_HPAGE) - return; + rste = __pte_to_rste(pte); - ptep = (pte_t *) page[1].index; - if (!ptep) - return; - clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, - PTRS_PER_PTE * sizeof(pte_t)); - page_table_free(&init_mm, (unsigned long *) ptep); - page[1].index = 0; + /* Set correct table type for 2G hugepages */ + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + if (likely(pte_present(pte))) + rste |= _REGION3_ENTRY_LARGE; + rste |= _REGION_ENTRY_TYPE_R3; + } else if (likely(pte_present(pte))) + rste |= _SEGMENT_ENTRY_LARGE; + + clear_huge_pte_skeys(mm, rste); + set_pte(ptep, __pte(rste)); +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz) +{ + __set_huge_pte_at(mm, addr, ptep, pte); +} + +pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + return __rste_to_pte(pte_val(*ptep)); +} + +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = huge_ptep_get(mm, addr, ptep); + pmd_t *pmdp = (pmd_t *) ptep; + pud_t *pudp = (pud_t *) ptep; + + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); + else + pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return pte; } -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); - pudp = pud_alloc(mm, pgdp, addr); - if (pudp) - pmdp = pmd_alloc(mm, pudp, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (p4dp) { + pudp = pud_alloc(mm, p4dp, addr); + if (pudp) { + if (sz == PUD_SIZE) + return (pte_t *) pudp; + else if (sz == PMD_SIZE) + pmdp = pmd_alloc(mm, pudp, addr); + } + } return (pte_t *) pmdp; } -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_offset(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); if (pgd_present(*pgdp)) { - pudp = pud_offset(pgdp, addr); - if (pud_present(*pudp)) - pmdp = pmd_offset(pudp, addr); + p4dp = p4d_offset(pgdp, addr); + if (p4d_present(*p4dp)) { + pudp = pud_offset(p4dp, addr); + if (sz == PUD_SIZE) + return (pte_t *)pudp; + if (pud_present(*pudp)) + pmdp = pmd_offset(pudp, addr); + } } return (pte_t *) pmdp; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +bool __init arch_hugetlb_valid_size(unsigned long size) { - return 0; -} - -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - -int pmd_huge(pmd_t pmd) -{ - if (!MACHINE_HAS_HPAGE) - return 0; - - return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); -} - -int pud_huge(pud_t pud) -{ - return 0; -} - -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmdp, int write) -{ - struct page *page; - - if (!MACHINE_HAS_HPAGE) - return NULL; - - page = pmd_page(*pmdp); - if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; + if (cpu_has_edat1() && size == PMD_SIZE) + return true; + else if (cpu_has_edat2() && size == PUD_SIZE) + return true; + else + return false; } |
