diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/huge_memory.c | 87 | ||||
| -rw-r--r-- | mm/z3fold.c | 9 | ||||
| -rw-r--r-- | mm/zsmalloc.c | 2 |
3 files changed, 67 insertions, 31 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fef4cf210cc7..f3c4f9d22821 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, deactivate_page(page); if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) { - orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, - tlb->fullmm); + pmdp_invalidate(vma, addr, pmd); orig_pmd = pmd_mkold(orig_pmd); orig_pmd = pmd_mkclean(orig_pmd); @@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; - int ret = 0; + pmd_t entry; + bool preserve_write; + int ret; ptl = __pmd_trans_huge_lock(pmd, vma); - if (ptl) { - pmd_t entry; - bool preserve_write = prot_numa && pmd_write(*pmd); - ret = 1; + if (!ptl) + return 0; - /* - * Avoid trapping faults against the zero page. The read-only - * data is likely to be read-cached on the local CPU and - * local/remote hits to the zero page are not interesting. - */ - if (prot_numa && is_huge_zero_pmd(*pmd)) { - spin_unlock(ptl); - return ret; - } + preserve_write = prot_numa && pmd_write(*pmd); + ret = 1; - if (!prot_numa || !pmd_protnone(*pmd)) { - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); - entry = pmd_modify(entry, newprot); - if (preserve_write) - entry = pmd_mk_savedwrite(entry); - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); - BUG_ON(vma_is_anonymous(vma) && !preserve_write && - pmd_write(entry)); - } - spin_unlock(ptl); - } + /* + * Avoid trapping faults against the zero page. The read-only + * data is likely to be read-cached on the local CPU and + * local/remote hits to the zero page are not interesting. + */ + if (prot_numa && is_huge_zero_pmd(*pmd)) + goto unlock; + + if (prot_numa && pmd_protnone(*pmd)) + goto unlock; + + /* + * In case prot_numa, we are under down_read(mmap_sem). It's critical + * to not clear pmd intermittently to avoid race with MADV_DONTNEED + * which is also under down_read(mmap_sem): + * + * CPU0: CPU1: + * change_huge_pmd(prot_numa=1) + * pmdp_huge_get_and_clear_notify() + * madvise_dontneed() + * zap_pmd_range() + * pmd_trans_huge(*pmd) == 0 (without ptl) + * // skip the pmd + * set_pmd_at(); + * // pmd is re-established + * + * The race makes MADV_DONTNEED miss the huge pmd and don't clear it + * which may break userspace. + * + * pmdp_invalidate() is required to make sure we don't miss + * dirty/young flags set by hardware. + */ + entry = *pmd; + pmdp_invalidate(vma, addr, pmd); + + /* + * Recover dirty/young flags. It relies on pmdp_invalidate to not + * corrupt them. + */ + if (pmd_dirty(*pmd)) + entry = pmd_mkdirty(entry); + if (pmd_young(*pmd)) + entry = pmd_mkyoung(entry); + entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mk_savedwrite(entry); + ret = HPAGE_PMD_NR; + set_pmd_at(mm, addr, pmd, entry); + BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); +unlock: + spin_unlock(ptl); return ret; } diff --git a/mm/z3fold.c b/mm/z3fold.c index f9492bccfd79..54f63c4a809a 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr) spin_lock(&zhdr->page_lock); } +/* Try to lock a z3fold page */ +static inline int z3fold_page_trylock(struct z3fold_header *zhdr) +{ + return spin_trylock(&zhdr->page_lock); +} + /* Unlock a z3fold page */ static inline void z3fold_page_unlock(struct z3fold_header *zhdr) { @@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, spin_lock(&pool->lock); zhdr = list_first_entry_or_null(&pool->unbuddied[i], struct z3fold_header, buddy); - if (!zhdr) { + if (!zhdr || !z3fold_page_trylock(zhdr)) { spin_unlock(&pool->lock); continue; } @@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, spin_unlock(&pool->lock); page = virt_to_page(zhdr); - z3fold_page_lock(zhdr); if (zhdr->first_chunks == 0) { if (zhdr->middle_chunks != 0 && chunks >= zhdr->start_middle) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b7ee9c34dbd6..d41edd28298b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -276,7 +276,7 @@ struct zs_pool { struct zspage { struct { unsigned int fullness:FULLNESS_BITS; - unsigned int class:CLASS_BITS; + unsigned int class:CLASS_BITS + 1; unsigned int isolated:ISOLATED_BITS; unsigned int magic:MAGIC_VAL_BITS; }; |
