summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r--fs/xfs/xfs_file.c123
1 files changed, 43 insertions, 80 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 309e26c9dddb..299aee4b7b0b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -44,23 +44,10 @@
#include <linux/falloc.h>
#include <linux/pagevec.h>
#include <linux/backing-dev.h>
+#include <linux/mman.h>
static const struct vm_operations_struct xfs_file_vm_ops;
-/*
- * Clear the specified ranges to zero through either the pagecache or DAX.
- * Holes and unwritten extents will be left as-is as they already are zeroed.
- */
-int
-xfs_zero_range(
- struct xfs_inode *ip,
- xfs_off_t pos,
- xfs_off_t count,
- bool *did_zero)
-{
- return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
-}
-
int
xfs_update_prealloc_flags(
struct xfs_inode *ip,
@@ -121,7 +108,7 @@ xfs_dir_fsync(
if (!lsn)
return 0;
- return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+ return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
STATIC int
@@ -181,7 +168,7 @@ xfs_file_fsync(
}
if (lsn) {
- error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+ error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
ip->i_itemp->ili_fsync_fields = 0;
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -237,11 +224,13 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
return -EAGAIN;
+ } else {
xfs_ilock(ip, XFS_IOLOCK_SHARED);
}
+
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -259,9 +248,10 @@ xfs_file_buffered_aio_read(
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
return -EAGAIN;
+ } else {
xfs_ilock(ip, XFS_IOLOCK_SHARED);
}
ret = generic_file_read_iter(iocb, to);
@@ -297,31 +287,6 @@ xfs_file_read_iter(
}
/*
- * Zero any on disk space between the current EOF and the new, larger EOF.
- *
- * This handles the normal case of zeroing the remainder of the last block in
- * the file and the unusual case of zeroing blocks out beyond the size of the
- * file. This second case only happens with fixed size extents and when the
- * system crashes before the inode size was updated but after blocks were
- * allocated.
- *
- * Expects the iolock to be held exclusive, and will take the ilock internally.
- */
-int /* error (positive) */
-xfs_zero_eof(
- struct xfs_inode *ip,
- xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize, /* current inode size */
- bool *did_zeroing)
-{
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- ASSERT(offset > isize);
-
- trace_xfs_zero_eof(ip, isize, offset - isize);
- return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
-}
-
-/*
* Common pre-write limit and setup checks.
*
* Called with the iolocked held either shared and exclusive according to
@@ -340,6 +305,7 @@ xfs_file_aio_write_checks(
ssize_t error = 0;
size_t count = iov_iter_count(from);
bool drained_dio = false;
+ loff_t isize;
restart:
error = generic_write_checks(iocb, from);
@@ -376,7 +342,8 @@ restart:
* and hence be able to correctly determine if we need to run zeroing.
*/
spin_lock(&ip->i_flags_lock);
- if (iocb->ki_pos > i_size_read(inode)) {
+ isize = i_size_read(inode);
+ if (iocb->ki_pos > isize) {
spin_unlock(&ip->i_flags_lock);
if (!drained_dio) {
if (*iolock == XFS_IOLOCK_SHARED) {
@@ -397,7 +364,10 @@ restart:
drained_dio = true;
goto restart;
}
- error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
+
+ trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
+ error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
+ NULL, &xfs_iomap_ops);
if (error)
return error;
} else
@@ -552,9 +522,10 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- if (!xfs_ilock_nowait(ip, iolock)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, iolock))
return -EAGAIN;
+ } else {
xfs_ilock(ip, iolock);
}
@@ -606,9 +577,10 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
- if (!xfs_ilock_nowait(ip, iolock)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, iolock))
return -EAGAIN;
+ } else {
xfs_ilock(ip, iolock);
}
@@ -764,7 +736,7 @@ xfs_file_fallocate(
enum xfs_prealloc_flags flags = 0;
uint iolock = XFS_IOLOCK_EXCL;
loff_t new_size = 0;
- bool do_file_insert = 0;
+ bool do_file_insert = false;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -825,7 +797,7 @@ xfs_file_fallocate(
error = -EINVAL;
goto out_unlock;
}
- do_file_insert = 1;
+ do_file_insert = true;
} else {
flags |= XFS_PREALLOC_SET;
@@ -979,7 +951,7 @@ xfs_file_readdir(
* point we can change the ->readdir prototype to include the
* buffer size. For now we use the current glibc buffer size.
*/
- bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
+ bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size);
return xfs_readdir(NULL, ip, ctx, bufsize);
}
@@ -1040,7 +1012,11 @@ __xfs_filemap_fault(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
+ pfn_t pfn;
+
+ ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops);
+ if (ret & VM_FAULT_NEEDDSYNC)
+ ret = dax_finish_sync_fault(vmf, pe_size, pfn);
} else {
if (write_fault)
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
@@ -1085,37 +1061,16 @@ xfs_filemap_page_mkwrite(
}
/*
- * pfn_mkwrite was originally inteneded to ensure we capture time stamp
- * updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
- * to ensure we serialise the fault barrier in place.
+ * pfn_mkwrite was originally intended to ensure we capture time stamp updates
+ * on write faults. In reality, it needs to serialise against truncate and
+ * prepare memory for writing so handle is as standard write fault.
*/
static int
xfs_filemap_pfn_mkwrite(
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct xfs_inode *ip = XFS_I(inode);
- int ret = VM_FAULT_NOPAGE;
- loff_t size;
-
- trace_xfs_filemap_pfn_mkwrite(ip);
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
-
- /* check if the faulting page hasn't raced with truncate */
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (vmf->pgoff >= size)
- ret = VM_FAULT_SIGBUS;
- else if (IS_DAX(inode))
- ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
- sb_end_pagefault(inode->i_sb);
- return ret;
-
+ return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
}
static const struct vm_operations_struct xfs_file_vm_ops = {
@@ -1131,6 +1086,13 @@ xfs_file_mmap(
struct file *filp,
struct vm_area_struct *vma)
{
+ /*
+ * We don't support synchronous mappings for non-DAX files. At least
+ * until someone comes with a sensible use case.
+ */
+ if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
+ return -EOPNOTSUPP;
+
file_accessed(filp);
vma->vm_ops = &xfs_file_vm_ops;
if (IS_DAX(file_inode(filp)))
@@ -1149,6 +1111,7 @@ const struct file_operations xfs_file_operations = {
.compat_ioctl = xfs_file_compat_ioctl,
#endif
.mmap = xfs_file_mmap,
+ .mmap_supported_flags = MAP_SYNC,
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,