diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 131 |
1 files changed, 114 insertions, 17 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 32825dee81d4..4879e93c91d3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3295,7 +3295,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, } /* - * For ext4 extent files, ext4 will do direct-io write to holes, + * Handling of direct IO writes. + * + * For ext4 extent files, ext4 will do direct-io write even to holes, * preallocated extents, and those write extend the file, no need to * fall back to buffered IO. * @@ -3313,21 +3315,37 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * if the machine crashes during the write. * */ -static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct ext4_inode_info *ei = EXT4_I(inode); ssize_t ret; size_t count = iov_iter_count(iter); int overwrite = 0; get_block_t *get_block_func = NULL; int dio_flags = 0; loff_t final_size = offset + count; + int orphan = 0; + handle_t *handle; - /* Use the old path for reads and writes beyond i_size. */ - if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) - return ext4_ind_direct_IO(iocb, iter, offset); + if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + ret = ext4_orphan_add(handle, inode); + if (ret) { + ext4_journal_stop(handle); + goto out; + } + orphan = 1; + ei->i_disksize = inode->i_size; + ext4_journal_stop(handle); + } BUG_ON(iocb->private == NULL); @@ -3336,8 +3354,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * conversion. This also disallows race between truncate() and * overwrite DIO as i_dio_count needs to be incremented under i_mutex. */ - if (iov_iter_rw(iter) == WRITE) - inode_dio_begin(inode); + inode_dio_begin(inode); /* If we do a overwrite dio, i_mutex locking can be released */ overwrite = *((int *)iocb->private); @@ -3346,7 +3363,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, inode_unlock(inode); /* - * We could direct write to holes and fallocate. + * For extent mapped files we could direct write to holes and fallocate. * * Allocated blocks to fill the hole are marked as unwritten to prevent * parallel buffered read to expose the stale data before DIO complete @@ -3368,7 +3385,11 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, iocb->private = NULL; if (overwrite) get_block_func = ext4_dio_get_block_overwrite; - else if (is_sync_kiocb(iocb)) { + else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || + round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) { + get_block_func = ext4_dio_get_block; + dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; + } else if (is_sync_kiocb(iocb)) { get_block_func = ext4_dio_get_block_unwritten_sync; dio_flags = DIO_LOCKING; } else { @@ -3378,10 +3399,11 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, #ifdef CONFIG_EXT4_FS_ENCRYPTION BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); #endif - if (IS_DAX(inode)) + if (IS_DAX(inode)) { + dio_flags &= ~DIO_SKIP_HOLES; ret = dax_do_io(iocb, inode, iter, offset, get_block_func, ext4_end_io_dio, dio_flags); - else + } else ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, get_block_func, @@ -3401,12 +3423,87 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } - if (iov_iter_rw(iter) == WRITE) - inode_dio_end(inode); + inode_dio_end(inode); /* take i_mutex locking again if we do a ovewrite dio */ if (overwrite) inode_lock(inode); + if (ret < 0 && final_size > inode->i_size) + ext4_truncate_failed_write(inode); + + /* Handle extending of i_size after direct IO write */ + if (orphan) { + int err; + + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + + goto out; + } + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + if (ret > 0) { + loff_t end = offset + ret; + if (end > inode->i_size) { + ei->i_disksize = end; + i_size_write(inode, end); + /* + * We're going to return a positive `ret' + * here due to non-zero-length I/O, so there's + * no way of reporting error returns from + * ext4_mark_inode_dirty() to userspace. So + * ignore it. + */ + ext4_mark_inode_dirty(handle, inode); + } + } + err = ext4_journal_stop(handle); + if (ret == 0) + ret = err; + } +out: + return ret; +} + +static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) +{ + int unlocked = 0; + struct inode *inode = iocb->ki_filp->f_mapping->host; + ssize_t ret; + + if (ext4_should_dioread_nolock(inode)) { + /* + * Nolock dioread optimization may be dynamically disabled + * via ext4_inode_block_unlocked_dio(). Check inode's state + * while holding extra i_dio_count ref. + */ + inode_dio_begin(inode); + smp_mb(); + if (unlikely(ext4_test_inode_state(inode, + EXT4_STATE_DIOREAD_LOCK))) + inode_dio_end(inode); + else + unlocked = 1; + } + if (IS_DAX(inode)) { + ret = dax_do_io(iocb, inode, iter, offset, ext4_dio_get_block, + NULL, unlocked ? 0 : DIO_LOCKING); + } else { + ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, offset, ext4_dio_get_block, + NULL, NULL, + unlocked ? 0 : DIO_LOCKING); + } + if (unlocked) + inode_dio_end(inode); return ret; } @@ -3434,10 +3531,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return 0; trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(iocb, iter, offset); + if (iov_iter_rw(iter) == READ) + ret = ext4_direct_IO_read(iocb, iter, offset); else - ret = ext4_ind_direct_IO(iocb, iter, offset); + ret = ext4_direct_IO_write(iocb, iter, offset); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); return ret; } |