From 5d3a551c28c6669dc43be40d8fafafbc2ec8f42b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 8 Oct 2012 16:29:32 -0700 Subject: mm: hugetlb: add arch hook for clearing page flags before entering pool The core page allocator ensures that page flags are zeroed when freeing pages via free_pages_check. A number of architectures (ARM, PPC, MIPS) rely on this property to treat new pages as dirty with respect to the data cache and perform the appropriate flushing before mapping the pages into userspace. This can lead to cache synchronisation problems when using hugepages, since the allocator keeps its own pool of pages above the usual page allocator and does not reset the page flags when freeing a page into the pool. This patch adds a new architecture hook, arch_clear_hugepage_flags, so that architectures which rely on the page flags being in a particular state for fresh allocations can adjust the flags accordingly when a page is freed into the pool. Signed-off-by: Will Deacon Cc: Michal Hocko Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/hugetlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 2d6e6e380564..fc322421b1cc 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -33,6 +33,7 @@ static inline int prepare_hugepage_range(struct file *file, } #define hugetlb_prefault_arch_hook(mm) do { } while (0) +#define arch_clear_hugepage_flags(page) do { } while (0) int arch_prepare_hugepage(struct page *page); void arch_release_hugepage(struct page *page); -- cgit From 75077afbec1ac89178c1542b23a70d0f960b0aaf Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 8 Oct 2012 16:30:15 -0700 Subject: thp, s390: thp splitting backend for s390 This patch is part of the architecture backend for thp on s390. It provides the functions related to thp splitting, including serialization against gup. Unlike other archs, pmdp_splitting_flush() cannot use a tlb flushing operation to serialize against gup on s390, because that wouldn't be stopped by the disabled IRQs. So instead, smp_call_function() is called with an empty function, which will have the expected effect. Signed-off-by: Gerald Schaefer Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Hugh Dickins Cc: Hillf Danton Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6bd7d7483017..c3f0775e71f1 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -347,6 +347,8 @@ extern struct page *vmemmap; #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ +#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) /* Page status table bits for virtualization */ #define RCP_ACC_BITS 0xf000000000000000UL @@ -506,6 +508,10 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; } +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + static inline int pte_none(pte_t pte) { return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); @@ -1159,6 +1165,13 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * 31 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. -- cgit From 9501d09fa3c4ca18971083dfb0c9aa1afc85f19c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 8 Oct 2012 16:30:18 -0700 Subject: thp, s390: thp pagetable pre-allocation for s390 This patch is part of the architecture backend for thp on s390. It provides the pagetable pre-allocation functions pgtable_trans_huge_deposit() and pgtable_trans_huge_withdraw(). Unlike other archs, s390 has no struct page * as pgtable_t, but rather a pointer to the page table. So instead of saving the pagetable pre- allocation list info inside the struct page, it is being saved within the pagetable itself. Signed-off-by: Gerald Schaefer Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Hugh Dickins Cc: Hillf Danton Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c3f0775e71f1..2c8f00d31cfe 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1166,6 +1166,12 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_unmap(pte) do { } while (0) #ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); + static inline int pmd_trans_splitting(pmd_t pmd) { return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; -- cgit From 1ae1c1d09f220ded48ee9a7d91a65e94f95c4af1 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 8 Oct 2012 16:30:24 -0700 Subject: thp, s390: architecture backend for thp on s390 This implements the architecture backend for transparent hugepages on s390. Signed-off-by: Gerald Schaefer Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Hugh Dickins Cc: Hillf Danton Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/hugetlb.h | 18 +--- arch/s390/include/asm/pgtable.h | 190 ++++++++++++++++++++++++++++++++++++++++ arch/s390/include/asm/setup.h | 5 +- arch/s390/include/asm/tlb.h | 1 + 4 files changed, 196 insertions(+), 18 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index fc322421b1cc..593753ee07f3 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -78,23 +78,6 @@ static inline void __pmd_csp(pmd_t *pmdp) " csp %1,%3" : "=m" (*pmdp) : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); - pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; -} - -static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) -{ - unsigned long sto = (unsigned long) pmdp - - pmd_index(address) * sizeof(pmd_t); - - if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { - asm volatile( - " .insn rrf,0xb98e0000,%2,%3,0,0" - : "=m" (*pmdp) - : "m" (*pmdp), "a" (sto), - "a" ((address & HPAGE_MASK)) - ); - } - pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; } static inline void huge_ptep_invalidate(struct mm_struct *mm, @@ -106,6 +89,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, __pmd_idte(address, pmdp); else __pmd_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; } static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2c8f00d31cfe..ed14fc2db6e0 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -350,6 +350,10 @@ extern struct page *vmemmap; #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ #define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) +/* Set of bits not changed in pmd_modify */ +#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ + | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) + /* Page status table bits for virtualization */ #define RCP_ACC_BITS 0xf000000000000000UL #define RCP_FP_BIT 0x0800000000000000UL @@ -512,6 +516,26 @@ static inline int pmd_bad(pmd_t pmd) extern void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +#define __HAVE_ARCH_PMD_WRITE +static inline int pmd_write(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0; +} + +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} + static inline int pte_none(pte_t pte) { return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); @@ -1165,6 +1189,22 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) +static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto = (unsigned long) pmdp - + pmd_index(address) * sizeof(pmd_t); + + if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,0" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), + "a" ((address & HPAGE_MASK)) + : "cc" + ); + } +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); @@ -1176,6 +1216,156 @@ static inline int pmd_trans_splitting(pmd_t pmd) { return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; } + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t entry) +{ + *pmdp = entry; +} + +static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) +{ + unsigned long pgprot_pmd = 0; + + if (pgprot_val(pgprot) & _PAGE_INVALID) { + if (pgprot_val(pgprot) & _PAGE_SWT) + pgprot_pmd |= _HPAGE_TYPE_NONE; + pgprot_pmd |= _SEGMENT_ENTRY_INV; + } + if (pgprot_val(pgprot) & _PAGE_RO) + pgprot_pmd |= _SEGMENT_ENTRY_RO; + return pgprot_pmd; +} + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pmd_val(pmd) &= _SEGMENT_CHG_MASK; + pmd_val(pmd) |= massage_pgprot_pmd(newprot); + return pmd; +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + return pmd; +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; + return pmd; +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_RO; + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + /* No dirty bit in the segment table entry. */ + return pmd; +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ + /* No referenced bit in the segment table entry. */ + return pmd; +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + /* No referenced bit in the segment table entry. */ + return pmd; +} + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; + long tmp, rc; + int counter; + + rc = 0; + if (MACHINE_HAS_RRBM) { + counter = PTRS_PER_PTE >> 6; + asm volatile( + "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */ + " ogr %1,%0\n" + " la %3,0(%4,%3)\n" + " brct %2,0b\n" + : "=&d" (tmp), "+&d" (rc), "+d" (counter), + "+a" (pmd_addr) + : "a" (64 * 4096UL) : "cc"); + rc = !!rc; + } else { + counter = PTRS_PER_PTE; + asm volatile( + "0: rrbe 0,%2\n" + " la %2,0(%3,%2)\n" + " brc 12,1f\n" + " lhi %0,1\n" + "1: brct %1,0b\n" + : "+d" (rc), "+d" (counter), "+a" (pmd_addr) + : "a" (4096UL) : "cc"); + } + return rc; +} + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; + + __pmd_idte(address, pmdp); + pmd_clear(pmdp); + return pmd; +} + +#define __HAVE_ARCH_PMDP_CLEAR_FLUSH +static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + return pmdp_get_and_clear(vma->vm_mm, address, pmdp); +} + +#define __HAVE_ARCH_PMDP_INVALIDATE +static inline void pmdp_invalidate(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + __pmd_idte(address, pmdp); +} + +static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) +{ + pmd_t __pmd; + pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); + return __pmd; +} + +#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; +} + +static inline int has_transparent_hugepage(void) +{ + return MACHINE_HAS_HPAGE ? 1 : 0; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + if (pmd_trans_huge(pmd)) + return pmd_val(pmd) >> HPAGE_SHIFT; + else + return pmd_val(pmd) >> PAGE_SHIFT; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 87b47ca954f1..8cfd731a18d8 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -81,6 +81,7 @@ extern unsigned int s390_user_mode; #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_FLAG_TE (1UL << 15) +#define MACHINE_FLAG_RRBM (1UL << 16) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -99,7 +100,8 @@ extern unsigned int s390_user_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) -#define MACHINE_HAS_TE (0) +#define MACHINE_HAS_TE (0) +#define MACHINE_HAS_RRBM (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -112,6 +114,7 @@ extern unsigned int s390_user_mode; #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) +#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) #endif /* CONFIG_64BIT */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 06e5acbc84bd..b75d7d686684 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -137,6 +137,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) +#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0) #define tlb_migrate_finish(mm) do { } while (0) #endif /* _S390_TLB_H */ -- cgit From b113da65785d5f3f9ff1451ec0fe43d6d76da25b Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 8 Oct 2012 16:34:25 -0700 Subject: mm: Add and use update_mmu_cache_pmd() in transparent huge page code. The transparent huge page code passes a PMD pointer in as the third argument of update_mmu_cache(), which expects a PTE pointer. This never got noticed because X86 implements update_mmu_cache() as a macro and thus we don't get any type checking, and X86 is the only architecture which supports transparent huge pages currently. Before other architectures can support transparent huge pages properly we need to add a new interface which will take a PMD pointer as the third argument rather than a PTE pointer. [akpm@linux-foundation.org: implement update_mm_cache_pmd() for s390] Signed-off-by: David S. Miller Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Gerald Schaefer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index ed14fc2db6e0..979fe3dc0788 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -42,6 +42,7 @@ extern void fault_init(void); * tables contain all the necessary information. */ #define update_mmu_cache(vma, address, ptep) do { } while (0) +#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0) /* * ZERO_PAGE is a global shared page that is always zero; used -- cgit