diff options
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 172 |
1 files changed, 106 insertions, 66 deletions
@@ -35,6 +35,9 @@ #include <linux/iomap.h> #include "internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/fs_dax.h> + /* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) @@ -691,8 +694,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pgoff_t index, unsigned long pfn) { struct vm_area_struct *vma; - pte_t *ptep; - pte_t pte; + pte_t pte, *ptep = NULL; + pmd_t *pmdp = NULL; spinlock_t *ptl; bool changed; @@ -707,21 +710,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, address = pgoff_address(index, vma); changed = false; - if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) + if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) continue; - if (pfn != pte_pfn(*ptep)) - goto unlock; - if (!pte_dirty(*ptep) && !pte_write(*ptep)) - goto unlock; - flush_cache_page(vma, address, pfn); - pte = ptep_clear_flush(vma, address, ptep); - pte = pte_wrprotect(pte); - pte = pte_mkclean(pte); - set_pte_at(vma->vm_mm, address, ptep, pte); - changed = true; -unlock: - pte_unmap_unlock(ptep, ptl); + if (pmdp) { +#ifdef CONFIG_FS_DAX_PMD + pmd_t pmd; + + if (pfn != pmd_pfn(*pmdp)) + goto unlock_pmd; + if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) + goto unlock_pmd; + + flush_cache_page(vma, address, pfn); + pmd = pmdp_huge_clear_flush(vma, address, pmdp); + pmd = pmd_wrprotect(pmd); + pmd = pmd_mkclean(pmd); + set_pmd_at(vma->vm_mm, address, pmdp, pmd); + changed = true; +unlock_pmd: + spin_unlock(ptl); +#endif + } else { + if (pfn != pte_pfn(*ptep)) + goto unlock_pte; + if (!pte_dirty(*ptep) && !pte_write(*ptep)) + goto unlock_pte; + + flush_cache_page(vma, address, pfn); + pte = ptep_clear_flush(vma, address, ptep); + pte = pte_wrprotect(pte); + pte = pte_mkclean(pte); + set_pte_at(vma->vm_mm, address, ptep, pte); + changed = true; +unlock_pte: + pte_unmap_unlock(ptep, ptl); + } if (changed) mmu_notifier_invalidate_page(vma->vm_mm, address); @@ -969,7 +993,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL_GPL(__dax_zero_page_range); -#ifdef CONFIG_FS_IOMAP static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) { return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); @@ -1011,6 +1034,11 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct blk_dax_ctl dax = { 0 }; ssize_t map_len; + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + dax.sector = dax_iomap_sector(iomap, pos); dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; map_len = dax_map_atomic(iomap->bdev, &dax); @@ -1054,15 +1082,19 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, */ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, - struct iomap_ops *ops) + const struct iomap_ops *ops) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos, ret = 0, done = 0; unsigned flags = 0; - if (iov_iter_rw(iter) == WRITE) + if (iov_iter_rw(iter) == WRITE) { + lockdep_assert_held_exclusive(&inode->i_rwsem); flags |= IOMAP_WRITE; + } else { + lockdep_assert_held(&inode->i_rwsem); + } while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, @@ -1098,7 +1130,7 @@ static int dax_fault_return(int error) * necessary locking for the page fault to proceed successfully. */ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops) + const struct iomap_ops *ops) { struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; @@ -1224,21 +1256,21 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); */ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) -static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, - struct vm_fault *vmf, unsigned long address, - struct iomap *iomap, loff_t pos, bool write, void **entryp) +static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos, void **entryp) { - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct block_device *bdev = iomap->bdev; + struct inode *inode = mapping->host; struct blk_dax_ctl dax = { .sector = dax_iomap_sector(iomap, pos), .size = PMD_SIZE, }; long length = dax_map_atomic(bdev, &dax); - void *ret; + void *ret = NULL; if (length < 0) /* dax_map_atomic() failed */ - return VM_FAULT_FALLBACK; + goto fallback; if (length < PMD_SIZE) goto unmap_fallback; if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) @@ -1251,67 +1283,86 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector, RADIX_DAX_PMD); if (IS_ERR(ret)) - return VM_FAULT_FALLBACK; + goto fallback; *entryp = ret; - return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write); + trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret); + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, + dax.pfn, vmf->flags & FAULT_FLAG_WRITE); unmap_fallback: dax_unmap_atomic(bdev, &dax); +fallback: + trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, + dax.pfn, ret); return VM_FAULT_FALLBACK; } -static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, - struct vm_fault *vmf, unsigned long address, - struct iomap *iomap, void **entryp) +static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, + void **entryp) { - struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long pmd_addr = address & PMD_MASK; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; + unsigned long pmd_addr = vmf->address & PMD_MASK; + struct inode *inode = mapping->host; struct page *zero_page; + void *ret = NULL; spinlock_t *ptl; pmd_t pmd_entry; - void *ret; - zero_page = mm_get_huge_zero_page(vma->vm_mm); + zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); if (unlikely(!zero_page)) - return VM_FAULT_FALLBACK; + goto fallback; ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0, RADIX_DAX_PMD | RADIX_DAX_HZP); if (IS_ERR(ret)) - return VM_FAULT_FALLBACK; + goto fallback; *entryp = ret; - ptl = pmd_lock(vma->vm_mm, pmd); - if (!pmd_none(*pmd)) { + ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); + if (!pmd_none(*(vmf->pmd))) { spin_unlock(ptl); - return VM_FAULT_FALLBACK; + goto fallback; } - pmd_entry = mk_pmd(zero_page, vma->vm_page_prot); + pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); pmd_entry = pmd_mkhuge(pmd_entry); - set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry); + set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); spin_unlock(ptl); + trace_dax_pmd_load_hole(inode, vmf, zero_page, ret); return VM_FAULT_NOPAGE; + +fallback: + trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret); + return VM_FAULT_FALLBACK; } -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops) +int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops) { + struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long pmd_addr = address & PMD_MASK; - bool write = flags & FAULT_FLAG_WRITE; + unsigned long pmd_addr = vmf->address & PMD_MASK; + bool write = vmf->flags & FAULT_FLAG_WRITE; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; struct inode *inode = mapping->host; int result = VM_FAULT_FALLBACK; struct iomap iomap = { 0 }; pgoff_t max_pgoff, pgoff; - struct vm_fault vmf; void *entry; loff_t pos; int error; + /* + * Check whether offset isn't beyond end of file now. Caller is + * supposed to hold locks serializing us with truncate / punch hole so + * this is a reliable test. + */ + pgoff = linear_page_index(vma, pmd_addr); + max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; + + trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; @@ -1322,16 +1373,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((pmd_addr + PMD_SIZE) > vma->vm_end) goto fallback; - /* - * Check whether offset isn't beyond end of file now. Caller is - * supposed to hold locks serializing us with truncate / punch hole so - * this is a reliable test. - */ - pgoff = linear_page_index(vma, pmd_addr); - max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; - - if (pgoff > max_pgoff) - return VM_FAULT_SIGBUS; + if (pgoff > max_pgoff) { + result = VM_FAULT_SIGBUS; + goto out; + } /* If the PMD would extend beyond the file size */ if ((pgoff | PG_PMD_COLOUR) > max_pgoff) @@ -1360,21 +1405,15 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (IS_ERR(entry)) goto finish_iomap; - vmf.pgoff = pgoff; - vmf.flags = flags; - vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; - switch (iomap.type) { case IOMAP_MAPPED: - result = dax_pmd_insert_mapping(vma, pmd, &vmf, address, - &iomap, pos, write, &entry); + result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry); break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) goto unlock_entry; - result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, - &entry); + result = dax_pmd_load_hole(vmf, &iomap, &entry); break; default: WARN_ON_ONCE(1); @@ -1400,11 +1439,12 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, } fallback: if (result == VM_FAULT_FALLBACK) { - split_huge_pmd(vma, pmd, address); + split_huge_pmd(vma, vmf->pmd, vmf->address); count_vm_event(THP_FAULT_FALLBACK); } +out: + trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result); return result; } EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); #endif /* CONFIG_FS_DAX_PMD */ -#endif /* CONFIG_FS_IOMAP */ |