summaryrefslogtreecommitdiff
path: root/fs/ext2/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext2/file.c')
-rw-r--r--fs/ext2/file.c306
1 files changed, 269 insertions, 37 deletions
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index a5b3a5db3120..76bddce462fc 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext2/file.c
*
@@ -20,10 +21,121 @@
#include <linux/time.h>
#include <linux/pagemap.h>
+#include <linux/dax.h>
#include <linux/quotaops.h>
+#include <linux/iomap.h>
+#include <linux/uio.h>
+#include <linux/buffer_head.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
+#include "trace.h"
+
+#ifdef CONFIG_FS_DAX
+static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ ssize_t ret;
+
+ if (!iov_iter_count(to))
+ return 0; /* skip atime */
+
+ inode_lock_shared(inode);
+ ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
+ inode_unlock_shared(inode);
+
+ file_accessed(iocb->ki_filp);
+ return ret;
+}
+
+static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out_unlock;
+ ret = file_update_time(file);
+ if (ret)
+ goto out_unlock;
+
+ ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
+ if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+ i_size_write(inode, iocb->ki_pos);
+ mark_inode_dirty(inode);
+ }
+
+out_unlock:
+ inode_unlock(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ return ret;
+}
+
+/*
+ * The lock ordering for ext2 DAX fault paths is:
+ *
+ * mmap_lock (MM)
+ * sb_start_pagefault (vfs, freeze)
+ * address_space->invalidate_lock
+ * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
+ * ext2_inode_info->truncate_mutex
+ *
+ * The default page_lock and i_size verification done by non-DAX fault paths
+ * is sufficient because ext2 doesn't support hole punching.
+ */
+static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ vm_fault_t ret;
+ bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
+ (vmf->vma->vm_flags & VM_SHARED);
+
+ if (write) {
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vmf->vma->vm_file);
+ }
+ filemap_invalidate_lock_shared(inode->i_mapping);
+
+ ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops);
+
+ filemap_invalidate_unlock_shared(inode->i_mapping);
+ if (write)
+ sb_end_pagefault(inode->i_sb);
+ return ret;
+}
+
+static const struct vm_operations_struct ext2_dax_vm_ops = {
+ .fault = ext2_dax_fault,
+ /*
+ * .huge_fault is not supported for DAX because allocation in ext2
+ * cannot be reliably aligned to huge page sizes and so pmd faults
+ * will always fail and fail back to regular faults.
+ */
+ .page_mkwrite = ext2_dax_fault,
+ .pfn_mkwrite = ext2_dax_fault,
+};
+
+static int ext2_file_mmap_prepare(struct vm_area_desc *desc)
+{
+ struct file *file = desc->file;
+
+ if (!IS_DAX(file_inode(file)))
+ return generic_file_mmap_prepare(desc);
+
+ file_accessed(file);
+ desc->vm_ops = &ext2_dax_vm_ops;
+ return 0;
+}
+#else
+#define ext2_file_mmap_prepare generic_file_mmap_prepare
+#endif
/*
* Called when filp is released. This happens when all file descriptors
@@ -44,64 +156,184 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
struct super_block *sb = file->f_mapping->host->i_sb;
- struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
- ret = generic_file_fsync(file, start, end, datasync);
- if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
+ ret = generic_buffers_fsync(file, start, end, datasync);
+ if (ret == -EIO)
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,
"detected IO error when writing metadata buffers");
- ret = -EIO;
+ return ret;
+}
+
+static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+
+ trace_ext2_dio_read_begin(iocb, to, 0);
+ inode_lock_shared(inode);
+ ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
+ inode_unlock_shared(inode);
+ trace_ext2_dio_read_end(iocb, to, ret);
+
+ return ret;
+}
+
+static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
+ int error, unsigned int flags)
+{
+ loff_t pos = iocb->ki_pos;
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (error)
+ goto out;
+
+ /*
+ * If we are extending the file, we have to update i_size here before
+ * page cache gets invalidated in iomap_dio_rw(). This prevents racing
+ * buffered reads from zeroing out too much from page cache pages.
+ * Note that all extending writes always happens synchronously with
+ * inode lock held by ext2_dio_write_iter(). So it is safe to update
+ * inode size here for extending file writes.
+ */
+ pos += size;
+ if (pos > i_size_read(inode)) {
+ i_size_write(inode, pos);
+ mark_inode_dirty(inode);
+ }
+out:
+ trace_ext2_dio_write_endio(iocb, size, error);
+ return error;
+}
+
+static const struct iomap_dio_ops ext2_dio_write_ops = {
+ .end_io = ext2_dio_write_end_io,
+};
+
+static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+ unsigned int flags = 0;
+ unsigned long blocksize = inode->i_sb->s_blocksize;
+ loff_t offset = iocb->ki_pos;
+ loff_t count = iov_iter_count(from);
+ ssize_t status = 0;
+
+ trace_ext2_dio_write_begin(iocb, from, 0);
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
+
+ ret = kiocb_modified(iocb);
+ if (ret)
+ goto out_unlock;
+
+ /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
+ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
+ (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
+ flags |= IOMAP_DIO_FORCE_WAIT;
+
+ ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
+ flags, NULL, 0);
+
+ /* ENOTBLK is magic return value for fallback to buffered-io */
+ if (ret == -ENOTBLK)
+ ret = 0;
+
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ ext2_write_failed(inode->i_mapping, offset + count);
+
+ /* handle case for partial write and for fallback to buffered write */
+ if (ret >= 0 && iov_iter_count(from)) {
+ loff_t pos, endbyte;
+ int ret2;
+
+ iocb->ki_flags &= ~IOCB_DIRECT;
+ pos = iocb->ki_pos;
+ status = generic_perform_write(iocb, from);
+ if (unlikely(status < 0)) {
+ ret = status;
+ goto out_unlock;
+ }
+
+ ret += status;
+ endbyte = pos + status - 1;
+ ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
+ endbyte);
+ if (!ret2)
+ invalidate_mapping_pages(inode->i_mapping,
+ pos >> PAGE_SHIFT,
+ endbyte >> PAGE_SHIFT);
+ if (ret > 0)
+ generic_write_sync(iocb, ret);
}
+
+out_unlock:
+ inode_unlock(inode);
+ if (status)
+ trace_ext2_dio_write_buff_end(iocb, from, status);
+ trace_ext2_dio_write_end(iocb, from, ret);
return ret;
}
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
-const struct file_operations ext2_file_operations = {
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
- .unlocked_ioctl = ext2_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext2_compat_ioctl,
+static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+#ifdef CONFIG_FS_DAX
+ if (IS_DAX(iocb->ki_filp->f_mapping->host))
+ return ext2_dax_read_iter(iocb, to);
#endif
- .mmap = generic_file_mmap,
- .open = dquot_file_open,
- .release = ext2_release_file,
- .fsync = ext2_fsync,
- .splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
-};
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return ext2_dio_read_iter(iocb, to);
+
+ return generic_file_read_iter(iocb, to);
+}
-#ifdef CONFIG_EXT2_FS_XIP
-const struct file_operations ext2_xip_file_operations = {
+static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+#ifdef CONFIG_FS_DAX
+ if (IS_DAX(iocb->ki_filp->f_mapping->host))
+ return ext2_dax_write_iter(iocb, from);
+#endif
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return ext2_dio_write_iter(iocb, from);
+
+ return generic_file_write_iter(iocb, from);
+}
+
+static int ext2_file_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode |= FMODE_CAN_ODIRECT;
+ return dquot_file_open(inode, filp);
+}
+
+const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read = xip_file_read,
- .write = xip_file_write,
+ .read_iter = ext2_file_read_iter,
+ .write_iter = ext2_file_write_iter,
.unlocked_ioctl = ext2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext2_compat_ioctl,
#endif
- .mmap = xip_file_mmap,
- .open = dquot_file_open,
+ .mmap_prepare = ext2_file_mmap_prepare,
+ .open = ext2_file_open,
.release = ext2_release_file,
.fsync = ext2_fsync,
+ .get_unmapped_area = thp_get_unmapped_area,
+ .splice_read = filemap_splice_read,
+ .splice_write = iter_file_splice_write,
};
-#endif
const struct inode_operations ext2_file_inode_operations = {
-#ifdef CONFIG_EXT2_FS_XATTR
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
.listxattr = ext2_listxattr,
- .removexattr = generic_removexattr,
-#endif
+ .getattr = ext2_getattr,
.setattr = ext2_setattr,
- .get_acl = ext2_get_acl,
+ .get_inode_acl = ext2_get_acl,
+ .set_acl = ext2_set_acl,
.fiemap = ext2_fiemap,
+ .fileattr_get = ext2_fileattr_get,
+ .fileattr_set = ext2_fileattr_set,
};