summaryrefslogtreecommitdiff
path: root/fs/ext4/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/file.c')
-rw-r--r--fs/ext4/file.c405
1 files changed, 218 insertions, 187 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 7ac0a81bd371..7a8b30932189 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -131,7 +131,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (!iov_iter_count(to))
@@ -147,6 +147,17 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return generic_file_read_iter(iocb, to);
}
+static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct inode *inode = file_inode(in);
+
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
+ return -EIO;
+ return filemap_splice_read(in, ppos, pipe, len, flags);
+}
+
/*
* Called when an inode is released. Note that this is different
* from ext4_file_open: open gets called at every open, but release
@@ -163,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
(atomic_read(&inode->i_writecount) == 1) &&
!EXT4_I(inode)->i_reserved_data_blocks) {
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode, 0);
+ ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
}
if (is_dx(inode) && filp->private_data)
@@ -202,8 +213,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
return false;
}
-/* Is IO overwriting allocated and initialized blocks? */
-static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
+/* Is IO overwriting allocated or initialized blocks? */
+static bool ext4_overwrite_io(struct inode *inode,
+ loff_t pos, loff_t len, bool *unwritten)
{
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
@@ -217,12 +229,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
blklen = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0);
+ if (err != blklen)
+ return false;
/*
* 'err==len' means that all of the blocks have been preallocated,
- * regardless of whether they have been initialized or not. To exclude
- * unwritten extents, we need to check m_flags.
+ * regardless of whether they have been initialized or not. We need to
+ * check m_flags to distinguish the unwritten extents.
*/
- return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
+ *unwritten = !(map.m_flags & EXT4_MAP_MAPPED);
+ return true;
}
static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
@@ -281,95 +296,48 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
if (ret <= 0)
goto out;
- current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;
out:
inode_unlock(inode);
- if (likely(ret > 0)) {
- iocb->ki_pos += ret;
- ret = generic_write_sync(iocb, ret);
- }
-
- return ret;
+ if (unlikely(ret <= 0))
+ return ret;
+ return generic_write_sync(iocb, ret);
}
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
- ssize_t written, size_t count)
+ ssize_t written, ssize_t count)
{
handle_t *handle;
- bool truncate = false;
- u8 blkbits = inode->i_blkbits;
- ext4_lblk_t written_blk, end_blk;
- int ret;
-
- /*
- * Note that EXT4_I(inode)->i_disksize can get extended up to
- * inode->i_size while the I/O was running due to writeback of delalloc
- * blocks. But, the code in ext4_iomap_alloc() is careful to use
- * zeroed/unwritten extents if this is possible; thus we won't leave
- * uninitialized blocks in a file even if we didn't succeed in writing
- * as much as we intended.
- */
- WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
- if (offset + count <= EXT4_I(inode)->i_disksize) {
- /*
- * We need to ensure that the inode is removed from the orphan
- * list if it has been added prematurely, due to writeback of
- * delalloc blocks.
- */
- if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
-
- if (IS_ERR(handle)) {
- ext4_orphan_del(NULL, inode);
- return PTR_ERR(handle);
- }
-
- ext4_orphan_del(handle, inode);
- ext4_journal_stop(handle);
- }
-
- return written;
- }
-
- if (written < 0)
- goto truncate;
+ lockdep_assert_held_write(&inode->i_rwsem);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
- if (IS_ERR(handle)) {
- written = PTR_ERR(handle);
- goto truncate;
- }
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
if (ext4_update_inode_size(inode, offset + written)) {
- ret = ext4_mark_inode_dirty(handle, inode);
+ int ret = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret)) {
- written = ret;
ext4_journal_stop(handle);
- goto truncate;
+ return ret;
}
}
- /*
- * We may need to truncate allocated but not written blocks beyond EOF.
- */
- written_blk = ALIGN(offset + written, 1 << blkbits);
- end_blk = ALIGN(offset + count, 1 << blkbits);
- if (written_blk < end_blk && ext4_can_truncate(inode))
- truncate = true;
-
- /*
- * Remove the inode from the orphan list if it has been extended and
- * everything went OK.
- */
- if (!truncate && inode->i_nlink)
+ if ((written == count) && inode->i_nlink)
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
- if (truncate) {
-truncate:
+ return written;
+}
+
+/*
+ * Clean up the inode after DIO or DAX extending write has completed and the
+ * inode size has been updated using ext4_handle_inode_extension().
+ */
+static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
+{
+ lockdep_assert_held_write(&inode->i_rwsem);
+ if (need_trunc) {
ext4_truncate_failed_write(inode);
/*
* If the truncate operation failed early, then the inode may
@@ -378,9 +346,29 @@ truncate:
*/
if (inode->i_nlink)
ext4_orphan_del(NULL, inode);
+ return;
}
+ /*
+ * If i_disksize got extended either due to writeback of delalloc
+ * blocks or extending truncate while the DIO was running we could fail
+ * to cleanup the orphan list in ext4_handle_inode_extension(). Do it
+ * now.
+ */
+ if (ext4_inode_orphan_tracked(inode) && inode->i_nlink) {
+ handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
- return written;
+ if (IS_ERR(handle)) {
+ /*
+ * The write has successfully completed. Not much to
+ * do with the error here so just cleanup the orphan
+ * list and hope for the best.
+ */
+ ext4_orphan_del(NULL, inode);
+ return;
+ }
+ ext4_orphan_del(handle, inode);
+ ext4_journal_stop(handle);
+ }
}
static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
@@ -389,31 +377,29 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(iocb->ki_filp);
- if (error)
- return error;
- if (size && flags & IOMAP_DIO_UNWRITTEN) {
+ if (!error && size && (flags & IOMAP_DIO_UNWRITTEN) &&
+ (iocb->ki_flags & IOCB_ATOMIC))
+ error = ext4_convert_unwritten_extents_atomic(NULL, inode, pos,
+ size);
+ else if (!error && size && flags & IOMAP_DIO_UNWRITTEN)
error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
- if (error < 0)
- return error;
- }
+ if (error)
+ return error;
/*
- * If we are extending the file, we have to update i_size here before
- * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
- * buffered reads could zero out too much from page cache pages. Update
- * of on-disk size will happen later in ext4_dio_write_iter() where
- * we have enough information to also perform orphan list handling etc.
- * Note that we perform all extending writes synchronously under
- * i_rwsem held exclusively so i_size update is safe here in that case.
- * If the write was not extending, we cannot see pos > i_size here
- * because operations reducing i_size like truncate wait for all
- * outstanding DIO before updating i_size.
+ * Note that EXT4_I(inode)->i_disksize can get extended up to
+ * inode->i_size while the I/O was running due to writeback of delalloc
+ * blocks. But the code in ext4_iomap_alloc() is careful to use
+ * zeroed/unwritten extents if this is possible; thus we won't leave
+ * uninitialized blocks in a file even if we didn't succeed in writing
+ * as much as we intended. Also we can race with truncate or write
+ * expanding the file so we have to be a bit careful here.
*/
- pos += size;
- if (pos > i_size_read(inode))
- i_size_write(inode, pos);
-
- return 0;
+ if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) &&
+ pos + size <= i_size_read(inode))
+ return 0;
+ error = ext4_handle_inode_extension(inode, pos, size, size);
+ return error < 0 ? error : 0;
}
static const struct iomap_dio_ops ext4_dio_write_ops = {
@@ -431,17 +417,23 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
* - For extending writes case we don't take the shared lock, since it requires
* updating inode i_disksize and/or orphan handling with exclusive lock.
*
- * - shared locking will only be true mostly with overwrites. Otherwise we will
- * switch to exclusive i_rwsem lock.
+ * - shared locking will only be true mostly with overwrites, including
+ * initialized blocks and unwritten blocks. For overwrite unwritten blocks
+ * we protect splitting extents by i_data_sem in ext4_inode_info, so we can
+ * also release exclusive i_rwsem lock.
+ *
+ * - Otherwise we will switch to exclusive i_rwsem lock.
*/
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
- bool *ilock_shared, bool *extend)
+ bool *ilock_shared, bool *extend,
+ bool *unwritten, int *dio_flags)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t offset;
size_t count;
ssize_t ret;
+ bool overwrite, unaligned_io;
restart:
ret = ext4_generic_write_checks(iocb, from);
@@ -450,16 +442,25 @@ restart:
offset = iocb->ki_pos;
count = ret;
- if (ext4_extending_io(inode, offset, count))
- *extend = true;
+
+ unaligned_io = ext4_unaligned_io(inode, from, offset);
+ *extend = ext4_extending_io(inode, offset, count);
+ overwrite = ext4_overwrite_io(inode, offset, count, unwritten);
+
/*
- * Determine whether the IO operation will overwrite allocated
- * and initialized blocks.
- * We need exclusive i_rwsem for changing security info
- * in file_modified().
+ * Determine whether we need to upgrade to an exclusive lock. This is
+ * required to change security info in file_modified(), for extending
+ * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
+ * extents (as partial block zeroing may be required).
+ *
+ * Note that unaligned writes are allowed under shared lock so long as
+ * they are pure overwrites. Otherwise, concurrent unaligned writes risk
+ * data corruption due to partial block zeroing in the dio layer, and so
+ * the I/O must occur exclusively.
*/
- if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
- !ext4_overwrite_io(inode, offset, count))) {
+ if (*ilock_shared &&
+ ((!IS_NOSEC(inode) || *extend || !overwrite ||
+ (unaligned_io && *unwritten)))) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
@@ -470,6 +471,23 @@ restart:
goto restart;
}
+ /*
+ * Now that locking is settled, determine dio flags and exclusivity
+ * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
+ * behavior already. The inode lock is already held exclusive if the
+ * write is non-overwrite or extending, so drain all outstanding dio and
+ * set the force wait dio flag.
+ */
+ if (!*ilock_shared && (unaligned_io || *extend)) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (unaligned_io && (!overwrite || *unwritten))
+ inode_dio_wait(inode);
+ *dio_flags = IOMAP_DIO_FORCE_WAIT;
+ }
+
ret = file_modified(file);
if (ret < 0)
goto out;
@@ -491,18 +509,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
- bool extend = false, unaligned_io = false;
+ bool extend = false, unwritten = false;
bool ilock_shared = true;
+ int dio_flags = 0;
/*
- * We initially start with shared inode lock unless it is
- * unaligned IO which needs exclusive lock anyways.
- */
- if (ext4_unaligned_io(inode, from, offset)) {
- unaligned_io = true;
- ilock_shared = false;
- }
- /*
* Quick check here without any i_rwsem lock to see if it is extending
* IO. A more reliable check is done in ext4_dio_write_checks() with
* proper locking in place.
@@ -534,38 +545,23 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_buffered_write_iter(iocb, from);
}
- ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
- if (ret <= 0)
- return ret;
-
- /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
- if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
- ret = -EAGAIN;
- goto out;
- }
/*
- * Make sure inline data cannot be created anymore since we are going
- * to allocate blocks for DIO. We know the inode does not have any
- * inline data now because ext4_dio_supported() checked for that.
+ * Prevent inline data from being created since we are going to allocate
+ * blocks for DIO. We know the inode does not currently have inline data
+ * because ext4_should_use_dio() checked for it, but we have to clear
+ * the state flag before the write checks because a lock cycle could
+ * introduce races with other writers.
*/
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend,
+ &unwritten, &dio_flags);
+ if (ret <= 0)
+ return ret;
+
offset = iocb->ki_pos;
count = ret;
- /*
- * Unaligned direct IO must be serialized among each other as zeroing
- * of partial blocks of two competing unaligned IOs can result in data
- * corruption.
- *
- * So we make sure we don't allow any unaligned IO in flight.
- * For IOs where we need not wait (like unaligned non-AIO DIO),
- * below inode_dio_wait() may anyway become a no-op, since we start
- * with exclusive lock.
- */
- if (unaligned_io)
- inode_dio_wait(inode);
-
if (extend) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
@@ -574,24 +570,27 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
ret = ext4_orphan_add(handle, inode);
- if (ret) {
- ext4_journal_stop(handle);
- goto out;
- }
-
ext4_journal_stop(handle);
+ if (ret)
+ goto out;
}
- if (ilock_shared)
+ if (ilock_shared && !unwritten)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
- (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
- NULL, 0);
+ dio_flags, NULL, 0);
if (ret == -ENOTBLK)
ret = 0;
-
- if (extend)
- ret = ext4_handle_inode_extension(inode, offset, ret, count);
+ if (extend) {
+ /*
+ * We always perform extending DIO write synchronously so by
+ * now the IO is completed and ext4_handle_inode_extension()
+ * was called. Cleanup the inode in case of error or race with
+ * writeback of delalloc blocks.
+ */
+ WARN_ON_ONCE(ret == -EIOCBQUEUED);
+ ext4_inode_extension_cleanup(inode, ret < 0);
+ }
out:
if (ilock_shared)
@@ -603,6 +602,13 @@ out:
ssize_t err;
loff_t endbyte;
+ /*
+ * There is no support for atomic writes on buffered-io yet,
+ * we should never fallback to buffered-io for DIO atomic
+ * writes.
+ */
+ WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC);
+
offset = iocb->ki_pos;
err = ext4_buffered_write_iter(iocb, from);
if (err < 0)
@@ -672,8 +678,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
- if (extend)
+ if (extend) {
ret = ext4_handle_inode_extension(inode, offset, ret, count);
+ ext4_inode_extension_cleanup(inode, ret < (ssize_t)count);
+ }
out:
inode_unlock(inode);
if (ret > 0)
@@ -685,15 +693,30 @@ out:
static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
+ int ret;
struct inode *inode = file_inode(iocb->ki_filp);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
- return -EIO;
+ ret = ext4_emergency_state(inode->i_sb);
+ if (unlikely(ret))
+ return ret;
#ifdef CONFIG_FS_DAX
if (IS_DAX(inode))
return ext4_dax_write_iter(iocb, from);
#endif
+
+ if (iocb->ki_flags & IOCB_ATOMIC) {
+ size_t len = iov_iter_count(from);
+
+ if (len < EXT4_SB(inode->i_sb)->s_awu_min ||
+ len > EXT4_SB(inode->i_sb)->s_awu_max)
+ return -EINVAL;
+
+ ret = generic_atomic_write_valid(iocb, from);
+ if (ret)
+ return ret;
+ }
+
if (iocb->ki_flags & IOCB_DIRECT)
return ext4_dio_write_iter(iocb, from);
else
@@ -701,8 +724,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
#ifdef CONFIG_FS_DAX
-static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
- enum page_entry_size pe_size)
+static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
{
int error = 0;
vm_fault_t result;
@@ -718,14 +740,14 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
* read-only.
*
* We check for VM_SHARED rather than vmf->cow_page since the latter is
- * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
+ * unset for order != 0 (i.e. only in do_cow_fault); for
* other sizes, dax_iomap_fault will handle splitting / fallback so that
* we eventually come back with a COW page.
*/
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
- pfn_t pfn;
+ unsigned long pfn;
if (write) {
sb_start_pagefault(sb);
@@ -742,7 +764,7 @@ retry:
} else {
filemap_invalidate_lock_shared(mapping);
}
- result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
+ result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
if (write) {
ext4_journal_stop(handle);
@@ -751,7 +773,7 @@ retry:
goto retry;
/* Handling synchronous page fault? */
if (result & VM_FAULT_NEEDDSYNC)
- result = dax_finish_sync_fault(vmf, pe_size, pfn);
+ result = dax_finish_sync_fault(vmf, order, pfn);
filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(sb);
} else {
@@ -763,7 +785,7 @@ retry:
static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
{
- return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
+ return ext4_dax_huge_fault(vmf, 0);
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
@@ -782,28 +804,33 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
.page_mkwrite = ext4_page_mkwrite,
};
-static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int ext4_file_mmap_prepare(struct vm_area_desc *desc)
{
+ int ret;
+ struct file *file = desc->file;
struct inode *inode = file->f_mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct dax_device *dax_dev = sbi->s_daxdev;
+ struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
- if (unlikely(ext4_forced_shutdown(sbi)))
- return -EIO;
+ if (file->f_mode & FMODE_WRITE)
+ ret = ext4_emergency_state(inode->i_sb);
+ else
+ ret = ext4_forced_shutdown(inode->i_sb) ? -EIO : 0;
+ if (unlikely(ret))
+ return ret;
/*
* We don't support synchronous mappings for non-DAX files and
* for DAX files if underneath dax_device is not synchronous.
*/
- if (!daxdev_mapping_supported(vma, dax_dev))
+ if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), dax_dev))
return -EOPNOTSUPP;
file_accessed(file);
if (IS_DAX(file_inode(file))) {
- vma->vm_ops = &ext4_dax_vm_ops;
- vma->vm_flags |= VM_HUGEPAGE;
+ desc->vm_ops = &ext4_dax_vm_ops;
+ desc->vm_flags |= VM_HUGEPAGE;
} else {
- vma->vm_ops = &ext4_file_vm_ops;
+ desc->vm_ops = &ext4_file_vm_ops;
}
return 0;
}
@@ -820,7 +847,8 @@ static int ext4_sample_last_mounted(struct super_block *sb,
if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
return 0;
- if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
+ if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
+ !sb_start_intwrite_trylock(sb))
return 0;
ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
@@ -848,8 +876,7 @@ static int ext4_sample_last_mounted(struct super_block *sb,
if (err)
goto out_journal;
lock_buffer(sbi->s_sbh);
- strncpy(sbi->s_es->s_last_mounted, cp,
- sizeof(sbi->s_es->s_last_mounted));
+ strtomem_pad(sbi->s_es->s_last_mounted, cp, 0);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh);
ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
@@ -864,8 +891,12 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
{
int ret;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
- return -EIO;
+ if (filp->f_mode & FMODE_WRITE)
+ ret = ext4_emergency_state(inode->i_sb);
+ else
+ ret = ext4_forced_shutdown(inode->i_sb) ? -EIO : 0;
+ if (unlikely(ret))
+ return ret;
ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
if (ret)
@@ -889,7 +920,10 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret;
}
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+ if (ext4_inode_can_atomic_write(inode))
+ filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
+
+ filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}
@@ -901,12 +935,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
- loff_t maxbytes;
-
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
- else
- maxbytes = inode->i_sb->s_maxbytes;
+ loff_t maxbytes = ext4_get_maxbytes(inode);
switch (whence) {
default:
@@ -940,15 +969,17 @@ const struct file_operations ext4_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
- .mmap = ext4_file_mmap,
- .mmap_supported_flags = MAP_SYNC,
+ .mmap_prepare = ext4_file_mmap_prepare,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
.get_unmapped_area = thp_get_unmapped_area,
- .splice_read = generic_file_splice_read,
+ .splice_read = ext4_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
+ .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
+ FOP_DIO_PARALLEL_WRITE |
+ FOP_DONTCACHE,
};
const struct inode_operations ext4_file_inode_operations = {