summaryrefslogtreecommitdiff
path: root/fs/dax.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dax.c')
-rw-r--r--fs/dax.c172
1 files changed, 106 insertions, 66 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 5c74f60d0a50..3f1181563fb1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -35,6 +35,9 @@
#include <linux/iomap.h>
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs_dax.h>
+
/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
@@ -691,8 +694,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
pgoff_t index, unsigned long pfn)
{
struct vm_area_struct *vma;
- pte_t *ptep;
- pte_t pte;
+ pte_t pte, *ptep = NULL;
+ pmd_t *pmdp = NULL;
spinlock_t *ptl;
bool changed;
@@ -707,21 +710,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
address = pgoff_address(index, vma);
changed = false;
- if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+ if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
continue;
- if (pfn != pte_pfn(*ptep))
- goto unlock;
- if (!pte_dirty(*ptep) && !pte_write(*ptep))
- goto unlock;
- flush_cache_page(vma, address, pfn);
- pte = ptep_clear_flush(vma, address, ptep);
- pte = pte_wrprotect(pte);
- pte = pte_mkclean(pte);
- set_pte_at(vma->vm_mm, address, ptep, pte);
- changed = true;
-unlock:
- pte_unmap_unlock(ptep, ptl);
+ if (pmdp) {
+#ifdef CONFIG_FS_DAX_PMD
+ pmd_t pmd;
+
+ if (pfn != pmd_pfn(*pmdp))
+ goto unlock_pmd;
+ if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
+ goto unlock_pmd;
+
+ flush_cache_page(vma, address, pfn);
+ pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmd_wrprotect(pmd);
+ pmd = pmd_mkclean(pmd);
+ set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+ changed = true;
+unlock_pmd:
+ spin_unlock(ptl);
+#endif
+ } else {
+ if (pfn != pte_pfn(*ptep))
+ goto unlock_pte;
+ if (!pte_dirty(*ptep) && !pte_write(*ptep))
+ goto unlock_pte;
+
+ flush_cache_page(vma, address, pfn);
+ pte = ptep_clear_flush(vma, address, ptep);
+ pte = pte_wrprotect(pte);
+ pte = pte_mkclean(pte);
+ set_pte_at(vma->vm_mm, address, ptep, pte);
+ changed = true;
+unlock_pte:
+ pte_unmap_unlock(ptep, ptl);
+ }
if (changed)
mmu_notifier_invalidate_page(vma->vm_mm, address);
@@ -969,7 +993,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
-#ifdef CONFIG_FS_IOMAP
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
@@ -1011,6 +1034,11 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct blk_dax_ctl dax = { 0 };
ssize_t map_len;
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+
dax.sector = dax_iomap_sector(iomap, pos);
dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
map_len = dax_map_atomic(iomap->bdev, &dax);
@@ -1054,15 +1082,19 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
*/
ssize_t
dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
unsigned flags = 0;
- if (iov_iter_rw(iter) == WRITE)
+ if (iov_iter_rw(iter) == WRITE) {
+ lockdep_assert_held_exclusive(&inode->i_rwsem);
flags |= IOMAP_WRITE;
+ } else {
+ lockdep_assert_held(&inode->i_rwsem);
+ }
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
@@ -1098,7 +1130,7 @@ static int dax_fault_return(int error)
* necessary locking for the page fault to proceed successfully.
*/
int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
@@ -1224,21 +1256,21 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
*/
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
-static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, loff_t pos, bool write, void **entryp)
+static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
+ loff_t pos, void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct block_device *bdev = iomap->bdev;
+ struct inode *inode = mapping->host;
struct blk_dax_ctl dax = {
.sector = dax_iomap_sector(iomap, pos),
.size = PMD_SIZE,
};
long length = dax_map_atomic(bdev, &dax);
- void *ret;
+ void *ret = NULL;
if (length < 0) /* dax_map_atomic() failed */
- return VM_FAULT_FALLBACK;
+ goto fallback;
if (length < PMD_SIZE)
goto unmap_fallback;
if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
@@ -1251,67 +1283,86 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
RADIX_DAX_PMD);
if (IS_ERR(ret))
- return VM_FAULT_FALLBACK;
+ goto fallback;
*entryp = ret;
- return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
+ trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret);
+ return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+ dax.pfn, vmf->flags & FAULT_FLAG_WRITE);
unmap_fallback:
dax_unmap_atomic(bdev, &dax);
+fallback:
+ trace_dax_pmd_insert_mapping_fallback(inode, vmf, length,
+ dax.pfn, ret);
return VM_FAULT_FALLBACK;
}
-static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, void **entryp)
+static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
+ void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct inode *inode = mapping->host;
struct page *zero_page;
+ void *ret = NULL;
spinlock_t *ptl;
pmd_t pmd_entry;
- void *ret;
- zero_page = mm_get_huge_zero_page(vma->vm_mm);
+ zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
if (unlikely(!zero_page))
- return VM_FAULT_FALLBACK;
+ goto fallback;
ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
RADIX_DAX_PMD | RADIX_DAX_HZP);
if (IS_ERR(ret))
- return VM_FAULT_FALLBACK;
+ goto fallback;
*entryp = ret;
- ptl = pmd_lock(vma->vm_mm, pmd);
- if (!pmd_none(*pmd)) {
+ ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
+ if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
- return VM_FAULT_FALLBACK;
+ goto fallback;
}
- pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
+ pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
- set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
+ set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
+ trace_dax_pmd_load_hole(inode, vmf, zero_page, ret);
return VM_FAULT_NOPAGE;
+
+fallback:
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret);
+ return VM_FAULT_FALLBACK;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
+int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops)
{
+ struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
- bool write = flags & FAULT_FLAG_WRITE;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
struct inode *inode = mapping->host;
int result = VM_FAULT_FALLBACK;
struct iomap iomap = { 0 };
pgoff_t max_pgoff, pgoff;
- struct vm_fault vmf;
void *entry;
loff_t pos;
int error;
+ /*
+ * Check whether offset isn't beyond end of file now. Caller is
+ * supposed to hold locks serializing us with truncate / punch hole so
+ * this is a reliable test.
+ */
+ pgoff = linear_page_index(vma, pmd_addr);
+ max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+
+ trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
+
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
goto fallback;
@@ -1322,16 +1373,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
- /*
- * Check whether offset isn't beyond end of file now. Caller is
- * supposed to hold locks serializing us with truncate / punch hole so
- * this is a reliable test.
- */
- pgoff = linear_page_index(vma, pmd_addr);
- max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
-
- if (pgoff > max_pgoff)
- return VM_FAULT_SIGBUS;
+ if (pgoff > max_pgoff) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
/* If the PMD would extend beyond the file size */
if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
@@ -1360,21 +1405,15 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (IS_ERR(entry))
goto finish_iomap;
- vmf.pgoff = pgoff;
- vmf.flags = flags;
- vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
-
switch (iomap.type) {
case IOMAP_MAPPED:
- result = dax_pmd_insert_mapping(vma, pmd, &vmf, address,
- &iomap, pos, write, &entry);
+ result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
goto unlock_entry;
- result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
- &entry);
+ result = dax_pmd_load_hole(vmf, &iomap, &entry);
break;
default:
WARN_ON_ONCE(1);
@@ -1400,11 +1439,12 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
fallback:
if (result == VM_FAULT_FALLBACK) {
- split_huge_pmd(vma, pmd, address);
+ split_huge_pmd(vma, vmf->pmd, vmf->address);
count_vm_event(THP_FAULT_FALLBACK);
}
+out:
+ trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
return result;
}
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
#endif /* CONFIG_FS_DAX_PMD */
-#endif /* CONFIG_FS_IOMAP */