summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2020-08-11 18:31:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-12 10:57:56 -0700
commit34ae204f18519f0920bd50a644abd6fefc8dbfcf (patch)
tree60a3093bfd2f412b5f4eabfb0d630662326e4b27
parent15568299b7d9988063afce60731df605ab236e2a (diff)
hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem
Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem in at least read mode. This is because the explicit locking in huge_pmd_share (called by huge_pte_alloc) was removed. When restructuring the code, the call to huge_pte_alloc in the else block at the beginning of hugetlb_fault was missed. Unfortunately, that else clause is exercised when there is no page table entry. This will likely lead to a call to huge_pmd_share. If huge_pmd_share thinks pmd sharing is possible, it will traverse the mapping tree (i_mmap) without holding i_mmap_rwsem. If someone else is modifying the tree, bad things such as addressing exceptions or worse could happen. Simply remove the else clause. It should have been removed previously. The code following the else will call huge_pte_alloc with the appropriate locking. To prevent this type of issue in the future, add routines to assert that i_mmap_rwsem is held, and call these routines in huge pmd sharing routines. Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") Suggested-by: Matthew Wilcox <willy@infradead.org> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A.Shutemov" <kirill.shutemov@linux.intel.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Prakash Sangappa <prakash.sangappa@oracle.com> Cc: <stable@vger.kernel.org> Link: http://lkml.kernel.org/r/e670f327-5cf9-1959-96e4-6dc7cc30d3d5@oracle.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/fs.h10
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--mm/hugetlb.c15
-rw-r--r--mm/rmap.c2
4 files changed, 23 insertions, 12 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 011af396aa17..7c69dd7c6160 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(struct address_space *mapping)
up_read(&mapping->i_mmap_rwsem);
}
+static inline void i_mmap_assert_locked(struct address_space *mapping)
+{
+ lockdep_assert_held(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_assert_write_locked(struct address_space *mapping)
+{
+ lockdep_assert_held_write(&mapping->i_mmap_rwsem);
+}
+
/*
* Might pages of this file be mapped into userspace?
*/
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 50650d0d01b9..a520bf26e5d8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -164,7 +164,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -203,8 +204,9 @@ static inline struct address_space *hugetlb_page_mapping_lock_write(
return NULL;
}
-static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
- pte_t *ptep)
+static inline int huge_pmd_unshare(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
return 0;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dffafb5bf2ed..8a18f1234e80 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3967,7 +3967,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, &address, ptep)) {
spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
@@ -4554,10 +4554,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
- } else {
- ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
}
/*
@@ -5034,7 +5030,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, &address, ptep)) {
pages++;
spin_unlock(ptl);
shared_pmd = true;
@@ -5415,12 +5411,14 @@ out:
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
p4d_t *p4d = p4d_offset(pgd, *addr);
pud_t *pud = pud_offset(p4d, *addr);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
@@ -5438,7 +5436,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
return NULL;
}
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
return 0;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 5fe2dedce1fc..6cce9ef06753 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* do this outside rmap routines.
*/
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
- if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
/*
* huge_pmd_unshare unmapped an entire PMD
* page. There is no way of knowing exactly