From 31a6f1a6e5a4a26040b67d8fa4256539b36f5893 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:32 +0100 Subject: dax: Simplify arguments of dax_insert_mapping() dax_insert_mapping() has lots of arguments and a lot of them is actuall duplicated by passing vm_fault structure as well. Change the function to take the same arguments as dax_pmd_insert_mapping(). Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index f001d8c72a06..0bc42ac294ca 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -820,23 +820,30 @@ out: } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); -static int dax_insert_mapping(struct address_space *mapping, - struct block_device *bdev, struct dax_device *dax_dev, - sector_t sector, size_t size, void *entry, - struct vm_area_struct *vma, struct vm_fault *vmf) +static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) +{ + return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); +} + +static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos, void *entry) { + const sector_t sector = dax_iomap_sector(iomap, pos); + struct vm_area_struct *vma = vmf->vma; + struct address_space *mapping = vma->vm_file->f_mapping; unsigned long vaddr = vmf->address; void *ret, *kaddr; pgoff_t pgoff; int id, rc; pfn_t pfn; - rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); + rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff); if (rc) return rc; id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); + rc = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), + &kaddr, &pfn); if (rc < 0) { dax_read_unlock(id); return rc; @@ -936,11 +943,6 @@ int __dax_zero_page_range(struct block_device *bdev, } EXPORT_SYMBOL_GPL(__dax_zero_page_range); -static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) -{ - return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); -} - static loff_t dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap *iomap) @@ -1087,7 +1089,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, struct inode *inode = mapping->host; unsigned long vaddr = vmf->address; loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; - sector_t sector; struct iomap iomap = { 0 }; unsigned flags = IOMAP_FAULT; int error, major = 0; @@ -1140,9 +1141,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, goto error_finish_iomap; } - sector = dax_iomap_sector(&iomap, pos); - if (vmf->cow_page) { + sector_t sector = dax_iomap_sector(&iomap, pos); + switch (iomap.type) { case IOMAP_HOLE: case IOMAP_UNWRITTEN: @@ -1175,8 +1176,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } - error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev, - sector, PAGE_SIZE, entry, vmf->vma, vmf); + error = dax_insert_mapping(vmf, &iomap, pos, entry); /* -EBUSY is fine, somebody else faulted on the same PTE */ if (error == -EBUSY) error = 0; -- cgit From 5e161e4066d3ebeaff95a4b979b42f8bf00494d5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:33 +0100 Subject: dax: Factor out getting of pfn out of iomap Factor out code to get pfn out of iomap that is shared between PTE and PMD fault path. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 83 +++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 40 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 0bc42ac294ca..116eef8d6c69 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -825,30 +825,53 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); } -static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, - loff_t pos, void *entry) +static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, + pfn_t *pfnp) { const sector_t sector = dax_iomap_sector(iomap, pos); - struct vm_area_struct *vma = vmf->vma; - struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long vaddr = vmf->address; - void *ret, *kaddr; pgoff_t pgoff; + void *kaddr; int id, rc; - pfn_t pfn; + long length; - rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff); + rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff); if (rc) return rc; - id = dax_read_lock(); - rc = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), - &kaddr, &pfn); - if (rc < 0) { - dax_read_unlock(id); - return rc; + length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), + &kaddr, pfnp); + if (length < 0) { + rc = length; + goto out; } + rc = -EINVAL; + if (PFN_PHYS(length) < size) + goto out; + if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) + goto out; + /* For larger pages we need devmap */ + if (length > 1 && !pfn_t_devmap(*pfnp)) + goto out; + rc = 0; +out: dax_read_unlock(id); + return rc; +} + +static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos, void *entry) +{ + const sector_t sector = dax_iomap_sector(iomap, pos); + struct vm_area_struct *vma = vmf->vma; + struct address_space *mapping = vma->vm_file->f_mapping; + unsigned long vaddr = vmf->address; + void *ret; + int rc; + pfn_t pfn; + + rc = dax_iomap_pfn(iomap, pos, PAGE_SIZE, &pfn); + if (rc < 0) + return rc; ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0); if (IS_ERR(ret)) @@ -1223,46 +1246,26 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, { struct address_space *mapping = vmf->vma->vm_file->f_mapping; const sector_t sector = dax_iomap_sector(iomap, pos); - struct dax_device *dax_dev = iomap->dax_dev; - struct block_device *bdev = iomap->bdev; struct inode *inode = mapping->host; - const size_t size = PMD_SIZE; - void *ret = NULL, *kaddr; - long length = 0; - pgoff_t pgoff; + void *ret = NULL; pfn_t pfn = {}; - int id; + int rc; - if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0) + rc = dax_iomap_pfn(iomap, pos, PMD_SIZE, &pfn); + if (rc < 0) goto fallback; - id = dax_read_lock(); - length = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); - if (length < 0) - goto unlock_fallback; - length = PFN_PHYS(length); - - if (length < size) - goto unlock_fallback; - if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR) - goto unlock_fallback; - if (!pfn_t_devmap(pfn)) - goto unlock_fallback; - dax_read_unlock(id); - ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, RADIX_DAX_PMD); if (IS_ERR(ret)) goto fallback; - trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret); + trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, ret); return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn, vmf->flags & FAULT_FLAG_WRITE); -unlock_fallback: - dax_read_unlock(id); fallback: - trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, pfn, ret); + trace_dax_pmd_insert_mapping_fallback(inode, vmf, PMD_SIZE, pfn, ret); return VM_FAULT_FALLBACK; } -- cgit From a0987ad5c576626fdb40547f5ac99b4f90608dda Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:34 +0100 Subject: dax: Create local variable for VMA in dax_iomap_pte_fault() There are already two users and more are coming. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 116eef8d6c69..c09465884bbe 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1108,7 +1108,8 @@ static int dax_fault_return(int error) static int dax_iomap_pte_fault(struct vm_fault *vmf, const struct iomap_ops *ops) { - struct address_space *mapping = vmf->vma->vm_file->f_mapping; + struct vm_area_struct *vma = vmf->vma; + struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; unsigned long vaddr = vmf->address; loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; @@ -1196,7 +1197,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); - count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); + count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } error = dax_insert_mapping(vmf, &iomap, pos, entry); -- cgit From d2c43ef13327478b299db0f58193495367c6d4ae Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:35 +0100 Subject: dax: Create local variable for vmf->flags & FAULT_FLAG_WRITE test There are already two users and more are coming. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index c09465884bbe..5ea71381dba0 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1116,6 +1116,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, struct iomap iomap = { 0 }; unsigned flags = IOMAP_FAULT; int error, major = 0; + bool write = vmf->flags & FAULT_FLAG_WRITE; int vmf_ret = 0; void *entry; @@ -1130,7 +1131,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, goto out; } - if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) + if (write && !vmf->cow_page) flags |= IOMAP_WRITE; entry = grab_mapping_entry(mapping, vmf->pgoff, 0); @@ -1207,7 +1208,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: - if (!(vmf->flags & FAULT_FLAG_WRITE)) { + if (!write) { vmf_ret = dax_load_hole(mapping, entry, vmf); goto finish_iomap; } -- cgit From 1b5a1cb21e0cdfb001050c76dc31039cdece1a63 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:36 +0100 Subject: dax: Inline dax_insert_mapping() into the callsite dax_insert_mapping() has only one callsite and we will need to further fine tune what it does for synchronous faults. Just inline it into the callsite so that we don't have to pass awkward bools around. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 5ea71381dba0..5b20c6456926 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -858,32 +858,6 @@ out: return rc; } -static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, - loff_t pos, void *entry) -{ - const sector_t sector = dax_iomap_sector(iomap, pos); - struct vm_area_struct *vma = vmf->vma; - struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long vaddr = vmf->address; - void *ret; - int rc; - pfn_t pfn; - - rc = dax_iomap_pfn(iomap, pos, PAGE_SIZE, &pfn); - if (rc < 0) - return rc; - - ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0); - if (IS_ERR(ret)) - return PTR_ERR(ret); - - trace_dax_insert_mapping(mapping->host, vmf, ret); - if (vmf->flags & FAULT_FLAG_WRITE) - return vm_insert_mixed_mkwrite(vma, vaddr, pfn); - else - return vm_insert_mixed(vma, vaddr, pfn); -} - /* * The user has performed a load from a hole in the file. Allocating a new * page in the file would cause excessive storage usage for workloads with @@ -1119,6 +1093,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool write = vmf->flags & FAULT_FLAG_WRITE; int vmf_ret = 0; void *entry; + pfn_t pfn; trace_dax_pte_fault(inode, vmf, vmf_ret); /* @@ -1201,7 +1176,24 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } - error = dax_insert_mapping(vmf, &iomap, pos, entry); + error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); + if (error < 0) + goto error_finish_iomap; + + entry = dax_insert_mapping_entry(mapping, vmf, entry, + dax_iomap_sector(&iomap, pos), + 0); + if (IS_ERR(entry)) { + error = PTR_ERR(entry); + goto error_finish_iomap; + } + + trace_dax_insert_mapping(inode, vmf, entry); + if (write) + error = vm_insert_mixed_mkwrite(vma, vaddr, pfn); + else + error = vm_insert_mixed(vma, vaddr, pfn); + /* -EBUSY is fine, somebody else faulted on the same PTE */ if (error == -EBUSY) error = 0; -- cgit From 302a5e312b3a106fec49ba08da3f6545c9b7ee18 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:37 +0100 Subject: dax: Inline dax_pmd_insert_mapping() into the callsite dax_pmd_insert_mapping() has only one callsite and we will need to further fine tune what it does for synchronous faults. Just inline it into the callsite so that we don't have to pass awkward bools around. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 5b20c6456926..675fab8ec41f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1235,33 +1235,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, } #ifdef CONFIG_FS_DAX_PMD -static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, - loff_t pos, void *entry) -{ - struct address_space *mapping = vmf->vma->vm_file->f_mapping; - const sector_t sector = dax_iomap_sector(iomap, pos); - struct inode *inode = mapping->host; - void *ret = NULL; - pfn_t pfn = {}; - int rc; - - rc = dax_iomap_pfn(iomap, pos, PMD_SIZE, &pfn); - if (rc < 0) - goto fallback; - - ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, - RADIX_DAX_PMD); - if (IS_ERR(ret)) - goto fallback; - - trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, ret); - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, - pfn, vmf->flags & FAULT_FLAG_WRITE); - -fallback: - trace_dax_pmd_insert_mapping_fallback(inode, vmf, PMD_SIZE, pfn, ret); - return VM_FAULT_FALLBACK; -} +/* + * The 'colour' (ie low bits) within a PMD of a page offset. This comes up + * more often than one might expect in the below functions. + */ +#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, void *entry) @@ -1317,6 +1295,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, void *entry; loff_t pos; int error; + pfn_t pfn; /* * Check whether offset isn't beyond end of file now. Caller is @@ -1394,7 +1373,19 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, switch (iomap.type) { case IOMAP_MAPPED: - result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry); + error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); + if (error < 0) + goto finish_iomap; + + entry = dax_insert_mapping_entry(mapping, vmf, entry, + dax_iomap_sector(&iomap, pos), + RADIX_DAX_PMD); + if (IS_ERR(entry)) + goto finish_iomap; + + trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); + result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, + write); break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: -- cgit From cec04e8c825eaa5a4bb1a3ce0d3784628e965196 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:38 +0100 Subject: dax: Fix comment describing dax_iomap_fault() Add missing argument description. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 675fab8ec41f..5214ed9ba508 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1435,7 +1435,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, /** * dax_iomap_fault - handle a page fault on a DAX file * @vmf: The description of the fault - * @ops: iomap ops passed from the file system + * @pe_size: Size of the page to fault in + * @ops: Iomap ops passed from the file system * * When a page fault occurs, filesystems may call this helper in * their fault handler for DAX files. dax_iomap_fault() assumes the caller -- cgit From 9a0dd42251439de635088b97533109a935864a84 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:39 +0100 Subject: dax: Allow dax_iomap_fault() to return pfn For synchronous page fault dax_iomap_fault() will need to return PFN which will then need to be inserted into page tables after fsync() completes. Add necessary parameter to dax_iomap_fault(). Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 5214ed9ba508..5ddf15161390 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1079,7 +1079,7 @@ static int dax_fault_return(int error) return VM_FAULT_SIGBUS; } -static int dax_iomap_pte_fault(struct vm_fault *vmf, +static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; @@ -1280,7 +1280,7 @@ fallback: return VM_FAULT_FALLBACK; } -static int dax_iomap_pmd_fault(struct vm_fault *vmf, +static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; @@ -1425,7 +1425,7 @@ out: return result; } #else -static int dax_iomap_pmd_fault(struct vm_fault *vmf, +static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, const struct iomap_ops *ops) { return VM_FAULT_FALLBACK; @@ -1436,6 +1436,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, * dax_iomap_fault - handle a page fault on a DAX file * @vmf: The description of the fault * @pe_size: Size of the page to fault in + * @pfnp: PFN to insert for synchronous faults if fsync is required * @ops: Iomap ops passed from the file system * * When a page fault occurs, filesystems may call this helper in @@ -1444,13 +1445,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, * successfully. */ int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - const struct iomap_ops *ops) + pfn_t *pfnp, const struct iomap_ops *ops) { switch (pe_size) { case PE_SIZE_PTE: - return dax_iomap_pte_fault(vmf, ops); + return dax_iomap_pte_fault(vmf, pfnp, ops); case PE_SIZE_PMD: - return dax_iomap_pmd_fault(vmf, ops); + return dax_iomap_pmd_fault(vmf, pfnp, ops); default: return VM_FAULT_FALLBACK; } -- cgit From f5b7b74876cff5664ea8b2ef7f002c54cd6e7c90 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:40 +0100 Subject: dax: Allow tuning whether dax_insert_mapping_entry() dirties entry Currently we dirty radix tree entry whenever dax_insert_mapping_entry() gets called for a write fault. With synchronous page faults we would like to insert clean radix tree entry and dirty it only once we call fdatasync() and update page tables to save some unnecessary cache flushing. Add 'dirty' argument to dax_insert_mapping_entry() for that. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 5ddf15161390..efc210ff6665 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -526,13 +526,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, static void *dax_insert_mapping_entry(struct address_space *mapping, struct vm_fault *vmf, void *entry, sector_t sector, - unsigned long flags) + unsigned long flags, bool dirty) { struct radix_tree_root *page_tree = &mapping->page_tree; void *new_entry; pgoff_t index = vmf->pgoff; - if (vmf->flags & FAULT_FLAG_WRITE) + if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { @@ -569,7 +569,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, entry = new_entry; } - if (vmf->flags & FAULT_FLAG_WRITE) + if (dirty) radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); spin_unlock_irq(&mapping->tree_lock); @@ -881,7 +881,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry, } entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0, - RADIX_DAX_ZERO_PAGE); + RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(entry2)) { ret = VM_FAULT_SIGBUS; goto out; @@ -1182,7 +1182,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, dax_iomap_sector(&iomap, pos), - 0); + 0, write); if (IS_ERR(entry)) { error = PTR_ERR(entry); goto error_finish_iomap; @@ -1258,7 +1258,7 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, goto fallback; ret = dax_insert_mapping_entry(mapping, vmf, entry, 0, - RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE); + RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(ret)) goto fallback; @@ -1379,7 +1379,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, dax_iomap_sector(&iomap, pos), - RADIX_DAX_PMD); + RADIX_DAX_PMD, write); if (IS_ERR(entry)) goto finish_iomap; -- cgit From caa51d26f85c248f1c4f43a870ad3ef84bf9eb8f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:42 +0100 Subject: dax, iomap: Add support for synchronous faults Add a flag to iomap interface informing the caller that inode needs fdstasync(2) for returned extent to become persistent and use it in DAX fault code so that we don't map such extents into page tables immediately. Instead we propagate the information that fdatasync(2) is necessary from dax_iomap_fault() with a new VM_FAULT_NEEDDSYNC flag. Filesystem fault handler is then responsible for calling fdatasync(2) and inserting pfn into page tables. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index efc210ff6665..bb9ff907738c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1091,6 +1091,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, unsigned flags = IOMAP_FAULT; int error, major = 0; bool write = vmf->flags & FAULT_FLAG_WRITE; + bool sync; int vmf_ret = 0; void *entry; pfn_t pfn; @@ -1169,6 +1170,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, goto finish_iomap; } + sync = (vma->vm_flags & VM_SYNC) && (iomap.flags & IOMAP_F_DIRTY); + switch (iomap.type) { case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { @@ -1182,12 +1185,27 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, dax_iomap_sector(&iomap, pos), - 0, write); + 0, write && !sync); if (IS_ERR(entry)) { error = PTR_ERR(entry); goto error_finish_iomap; } + /* + * If we are doing synchronous page fault and inode needs fsync, + * we can insert PTE into page tables only after that happens. + * Skip insertion for now and return the pfn so that caller can + * insert it after fsync is done. + */ + if (sync) { + if (WARN_ON_ONCE(!pfnp)) { + error = -EIO; + goto error_finish_iomap; + } + *pfnp = pfn; + vmf_ret = VM_FAULT_NEEDDSYNC | major; + goto finish_iomap; + } trace_dax_insert_mapping(inode, vmf, entry); if (write) error = vm_insert_mixed_mkwrite(vma, vaddr, pfn); @@ -1287,6 +1305,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, struct address_space *mapping = vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; + bool sync; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; struct inode *inode = mapping->host; int result = VM_FAULT_FALLBACK; @@ -1371,6 +1390,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; + sync = (vma->vm_flags & VM_SYNC) && (iomap.flags & IOMAP_F_DIRTY); + switch (iomap.type) { case IOMAP_MAPPED: error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); @@ -1379,10 +1400,24 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, dax_iomap_sector(&iomap, pos), - RADIX_DAX_PMD, write); + RADIX_DAX_PMD, write && !sync); if (IS_ERR(entry)) goto finish_iomap; + /* + * If we are doing synchronous page fault and inode needs fsync, + * we can insert PMD into page tables only after that happens. + * Skip insertion for now and return the pfn so that caller can + * insert it after fsync is done. + */ + if (sync) { + if (WARN_ON_ONCE(!pfnp)) + goto finish_iomap; + *pfnp = pfn; + result = VM_FAULT_NEEDDSYNC; + goto finish_iomap; + } + trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, write); -- cgit From 71eab6dfd91eabf06fe782a590c07e8a0795f5ea Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:43 +0100 Subject: dax: Implement dax_finish_sync_fault() Implement a function that filesystems can call to finish handling of synchronous page faults. It takes care of syncing appropriare file range and insertion of page table entry. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index bb9ff907738c..78233c716757 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1492,3 +1492,86 @@ int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, } } EXPORT_SYMBOL_GPL(dax_iomap_fault); + +/** + * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables + * @vmf: The description of the fault + * @pe_size: Size of entry to be inserted + * @pfn: PFN to insert + * + * This function inserts writeable PTE or PMD entry into page tables for mmaped + * DAX file. It takes care of marking corresponding radix tree entry as dirty + * as well. + */ +static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, + enum page_entry_size pe_size, + pfn_t pfn) +{ + struct address_space *mapping = vmf->vma->vm_file->f_mapping; + void *entry, **slot; + pgoff_t index = vmf->pgoff; + int vmf_ret, error; + + spin_lock_irq(&mapping->tree_lock); + entry = get_unlocked_mapping_entry(mapping, index, &slot); + /* Did we race with someone splitting entry or so? */ + if (!entry || + (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || + (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { + put_unlocked_mapping_entry(mapping, index, entry); + spin_unlock_irq(&mapping->tree_lock); + trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, + VM_FAULT_NOPAGE); + return VM_FAULT_NOPAGE; + } + radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY); + entry = lock_slot(mapping, slot); + spin_unlock_irq(&mapping->tree_lock); + switch (pe_size) { + case PE_SIZE_PTE: + error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); + vmf_ret = dax_fault_return(error); + break; +#ifdef CONFIG_FS_DAX_PMD + case PE_SIZE_PMD: + vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, + pfn, true); + break; +#endif + default: + vmf_ret = VM_FAULT_FALLBACK; + } + put_locked_mapping_entry(mapping, index); + trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret); + return vmf_ret; +} + +/** + * dax_finish_sync_fault - finish synchronous page fault + * @vmf: The description of the fault + * @pe_size: Size of entry to be inserted + * @pfn: PFN to insert + * + * This function ensures that the file range touched by the page fault is + * stored persistently on the media and handles inserting of appropriate page + * table entry. + */ +int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, + pfn_t pfn) +{ + int err; + loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; + size_t len = 0; + + if (pe_size == PE_SIZE_PTE) + len = PAGE_SIZE; + else if (pe_size == PE_SIZE_PMD) + len = PMD_SIZE; + else + WARN_ON_ONCE(1); + err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1); + if (err) + return VM_FAULT_SIGBUS; + return dax_insert_pfn_mkwrite(vmf, pe_size, pfn); +} +EXPORT_SYMBOL_GPL(dax_finish_sync_fault); -- cgit From aaa422c4c3f6ee958ea9d6c9260ac40f90a3f4e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Nov 2017 16:38:44 -0800 Subject: fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core While reviewing whether MAP_SYNC should strengthen its current guarantee of syncing writes from the initiating process to also include third-party readers observing dirty metadata, Dave pointed out that the check of IOMAP_WRITE is misplaced. The policy of what to with IOMAP_F_DIRTY should be separated from the generic filesystem mechanism of reporting dirty metadata. Move this policy to the fs-dax core to simplify the per-filesystem iomap handlers, and further centralize code that implements the MAP_SYNC policy. This otherwise should not change behavior, it just makes it easier to change behavior in the future. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Cc: Ross Zwisler Reported-by: Dave Chinner Signed-off-by: Dan Williams --- fs/dax.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 78233c716757..27ba300660ff 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1079,6 +1079,17 @@ static int dax_fault_return(int error) return VM_FAULT_SIGBUS; } +/* + * MAP_SYNC on a dax mapping guarantees dirty metadata is + * flushed on write-faults (non-cow), but not read-faults. + */ +static bool dax_fault_is_synchronous(unsigned long flags, + struct vm_area_struct *vma, struct iomap *iomap) +{ + return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) + && (iomap->flags & IOMAP_F_DIRTY); +} + static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, const struct iomap_ops *ops) { @@ -1170,7 +1181,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, goto finish_iomap; } - sync = (vma->vm_flags & VM_SYNC) && (iomap.flags & IOMAP_F_DIRTY); + sync = dax_fault_is_synchronous(flags, vma, &iomap); switch (iomap.type) { case IOMAP_MAPPED: @@ -1390,7 +1401,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; - sync = (vma->vm_flags & VM_SYNC) && (iomap.flags & IOMAP_F_DIRTY); + sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap); switch (iomap.type) { case IOMAP_MAPPED: -- cgit From 957ac8c421ad8b5eef9b17fe98e146d8311a541e Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 14 Nov 2017 20:37:27 -0500 Subject: dax: fix PMD faults on zero-length files PMD faults on a zero length file on a file system mounted with -o dax will not generate SIGBUS as expected. fd = open(...O_TRUNC); addr = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); *addr = 'a'; The problem is this code in dax_iomap_pmd_fault: max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; If the inode size is zero, we end up with a max_pgoff that is way larger than 0. :) Fix it by using DIV_ROUND_UP, as is done elsewhere in the kernel. I tested this with some simple test code that ensured that SIGBUS was received where expected. Cc: Fixes: 642261ac995e ("dax: add struct iomap based DAX PMD support") Signed-off-by: Jeff Moyer Signed-off-by: Dan Williams --- fs/dax.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 27ba300660ff..f757cd0e2d07 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1333,7 +1333,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * this is a reliable test. */ pgoff = linear_page_index(vma, pmd_addr); - max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; + max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); @@ -1357,13 +1357,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, if ((pmd_addr + PMD_SIZE) > vma->vm_end) goto fallback; - if (pgoff > max_pgoff) { + if (pgoff >= max_pgoff) { result = VM_FAULT_SIGBUS; goto out; } /* If the PMD would extend beyond the file size */ - if ((pgoff | PG_PMD_COLOUR) > max_pgoff) + if ((pgoff | PG_PMD_COLOUR) >= max_pgoff) goto fallback; /* -- cgit