summaryrefslogtreecommitdiff
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c290
1 files changed, 209 insertions, 81 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bf596467c234..03c2253005f0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -101,8 +101,8 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
return provided == calculated;
}
-static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
- struct ext4_inode_info *ei)
+void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
+ struct ext4_inode_info *ei)
{
__u32 csum;
@@ -514,7 +514,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
return -EFSCORRUPTED;
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
@@ -729,6 +730,8 @@ out_sem:
if (ret)
return ret;
}
+ ext4_fc_track_range(inode, map->m_lblk,
+ map->m_lblk + map->m_len - 1);
}
if (retval < 0)
@@ -825,7 +828,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
int create = map_flags & EXT4_GET_BLOCKS_CREATE;
int err;
- J_ASSERT(handle != NULL || create == 0);
+ J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ || handle != NULL || create == 0);
map.m_lblk = block;
map.m_len = 1;
@@ -841,7 +845,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
return ERR_PTR(-ENOMEM);
if (map.m_flags & EXT4_MAP_NEW) {
J_ASSERT(create != 0);
- J_ASSERT(handle != NULL);
+ J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ || (handle != NULL));
/*
* Now that we do not always journal data, we should
@@ -878,18 +883,20 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
ext4_lblk_t block, int map_flags)
{
struct buffer_head *bh;
+ int ret;
bh = ext4_getblk(handle, inode, block, map_flags);
if (IS_ERR(bh))
return bh;
if (!bh || ext4_buffer_uptodate(bh))
return bh;
- ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
- wait_on_buffer(bh);
- if (buffer_uptodate(bh))
- return bh;
- put_bh(bh);
- return ERR_PTR(-EIO);
+
+ ret = ext4_read_bh_lock(bh, REQ_META | REQ_PRIO, true);
+ if (ret) {
+ put_bh(bh);
+ return ERR_PTR(ret);
+ }
+ return bh;
}
/* Read a contiguous batch of blocks. */
@@ -910,8 +917,7 @@ int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
for (i = 0; i < bh_count; i++)
/* Note that NULL bhs[i] is valid because of holes. */
if (bhs[i] && !ext4_buffer_uptodate(bhs[i]))
- ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
- &bhs[i]);
+ ext4_read_bh_lock(bhs[i], REQ_META | REQ_PRIO, false);
if (!wait)
return 0;
@@ -1081,7 +1087,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
- ll_rw_block(REQ_OP_READ, 0, 1, &bh);
+ ext4_read_bh_lock(bh, 0, false);
wait[nr_wait++] = bh;
}
}
@@ -1912,6 +1918,9 @@ static int __ext4_journalled_writepage(struct page *page,
}
if (ret == 0)
ret = err;
+ err = ext4_jbd2_inode_add_write(handle, inode, 0, len);
+ if (ret == 0)
+ ret = err;
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
err = ext4_journal_stop(handle);
if (!ret)
@@ -2254,7 +2263,7 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
err = PTR_ERR(io_end_vec);
goto out;
}
- io_end_vec->offset = mpd->map.m_lblk << blkbits;
+ io_end_vec->offset = (loff_t)mpd->map.m_lblk << blkbits;
}
*map_bh = true;
goto out;
@@ -2785,7 +2794,7 @@ retry:
* ext4_journal_stop() can wait for transaction commit
* to finish which may depend on writeback of pages to
* complete or on page lock to be released. In that
- * case, we have to wait until after after we have
+ * case, we have to wait until after we have
* submitted all the IO, released page locks we hold,
* and dropped io_end reference (for extent conversion
* to be able to complete) before stopping the handle.
@@ -3296,9 +3305,14 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
{
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- if (journal)
- return !jbd2_transaction_committed(journal,
- EXT4_I(inode)->i_datasync_tid);
+ if (journal) {
+ if (jbd2_transaction_committed(journal,
+ EXT4_I(inode)->i_datasync_tid))
+ return true;
+ return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) >=
+ EXT4_I(inode)->i_fc_committed_subtid;
+ }
+
/* Any metadata buffers to write? */
if (!list_empty(&inode->i_mapping->private_list))
return true;
@@ -3436,14 +3450,26 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits,
EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1;
- if (flags & IOMAP_WRITE)
+ if (flags & IOMAP_WRITE) {
+ /*
+ * We check here if the blocks are already allocated, then we
+ * don't need to start a journal txn and we can directly return
+ * the mapping information. This could boost performance
+ * especially in multi-threaded overwrite requests.
+ */
+ if (offset + length <= i_size_read(inode)) {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret > 0 && (map.m_flags & EXT4_MAP_MAPPED))
+ goto out;
+ }
ret = ext4_iomap_alloc(inode, &map, flags);
- else
+ } else {
ret = ext4_map_blocks(NULL, inode, &map, 0);
+ }
if (ret < 0)
return ret;
-
+out:
ext4_set_iomap(inode, iomap, &map, offset, length);
return 0;
@@ -3601,6 +3627,13 @@ static int ext4_set_page_dirty(struct page *page)
return __set_page_dirty_buffers(page);
}
+static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
+ struct file *file, sector_t *span)
+{
+ return iomap_swapfile_activate(sis, file, span,
+ &ext4_iomap_report_ops);
+}
+
static const struct address_space_operations ext4_aops = {
.readpage = ext4_readpage,
.readahead = ext4_readahead,
@@ -3616,6 +3649,7 @@ static const struct address_space_operations ext4_aops = {
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
+ .swap_activate = ext4_iomap_swap_activate,
};
static const struct address_space_operations ext4_journalled_aops = {
@@ -3632,6 +3666,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.direct_IO = noop_direct_IO,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
+ .swap_activate = ext4_iomap_swap_activate,
};
static const struct address_space_operations ext4_da_aops = {
@@ -3649,6 +3684,7 @@ static const struct address_space_operations ext4_da_aops = {
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
+ .swap_activate = ext4_iomap_swap_activate,
};
static const struct address_space_operations ext4_dax_aops = {
@@ -3657,6 +3693,7 @@ static const struct address_space_operations ext4_dax_aops = {
.set_page_dirty = noop_set_page_dirty,
.bmap = ext4_bmap,
.invalidatepage = noop_invalidatepage,
+ .swap_activate = ext4_iomap_swap_activate,
};
void ext4_set_aops(struct inode *inode)
@@ -3730,11 +3767,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
- err = -EIO;
- ll_rw_block(REQ_OP_READ, 0, 1, &bh);
- wait_on_buffer(bh);
- /* Uhhuh. Read error. Complain and punt. */
- if (!buffer_uptodate(bh))
+ err = ext4_read_bh_lock(bh, 0, true);
+ if (err)
goto unlock;
if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
/* We expect the key to be set. */
@@ -4073,6 +4107,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
up_write(&EXT4_I(inode)->i_data_sem);
}
+ ext4_fc_track_range(inode, first_block, stop_block);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
@@ -4252,22 +4287,22 @@ out_trace:
* data in memory that is needed to recreate the on-disk version of this
* inode.
*/
-static int __ext4_get_inode_loc(struct inode *inode,
- struct ext4_iloc *iloc, int in_mem)
+static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
+ struct ext4_iloc *iloc, int in_mem,
+ ext4_fsblk_t *ret_block)
{
struct ext4_group_desc *gdp;
struct buffer_head *bh;
- struct super_block *sb = inode->i_sb;
ext4_fsblk_t block;
struct blk_plug plug;
int inodes_per_block, inode_offset;
iloc->bh = NULL;
- if (inode->i_ino < EXT4_ROOT_INO ||
- inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
+ if (ino < EXT4_ROOT_INO ||
+ ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
return -EFSCORRUPTED;
- iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+ iloc->block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
if (!gdp)
return -EIO;
@@ -4276,7 +4311,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
* Figure out the offset within the block group inode table
*/
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
- inode_offset = ((inode->i_ino - 1) %
+ inode_offset = ((ino - 1) %
EXT4_INODES_PER_GROUP(sb));
block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
@@ -4289,16 +4324,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
- /*
- * If the buffer has the write error flag, we have failed
- * to write out another inode in the same block. In this
- * case, we don't have to read the block because we may
- * read the old inode data successfully.
- */
- if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
- set_buffer_uptodate(bh);
-
- if (buffer_uptodate(bh)) {
+ if (ext4_buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
goto has_buffer;
@@ -4369,7 +4395,7 @@ make_io:
if (end > table)
end = table;
while (b <= end)
- sb_breadahead_unmovable(sb, b++);
+ ext4_sb_breadahead_unmovable(sb, b++);
}
/*
@@ -4377,16 +4403,14 @@ make_io:
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
- trace_ext4_load_inode(inode);
- get_bh(bh);
- bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+ trace_ext4_load_inode(sb, ino);
+ ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL);
blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
simulate_eio:
- ext4_error_inode_block(inode, block, EIO,
- "unable to read itable block");
+ if (ret_block)
+ *ret_block = block;
brelse(bh);
return -EIO;
}
@@ -4396,11 +4420,43 @@ has_buffer:
return 0;
}
+static int __ext4_get_inode_loc_noinmem(struct inode *inode,
+ struct ext4_iloc *iloc)
+{
+ ext4_fsblk_t err_blk;
+ int ret;
+
+ ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0,
+ &err_blk);
+
+ if (ret == -EIO)
+ ext4_error_inode_block(inode, err_blk, EIO,
+ "unable to read itable block");
+
+ return ret;
+}
+
int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
{
+ ext4_fsblk_t err_blk;
+ int ret;
+
/* We have all inode data except xattrs in memory here. */
- return __ext4_get_inode_loc(inode, iloc,
- !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
+ ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc,
+ !ext4_test_inode_state(inode, EXT4_STATE_XATTR), &err_blk);
+
+ if (ret == -EIO)
+ ext4_error_inode_block(inode, err_blk, EIO,
+ "unable to read itable block");
+
+ return ret;
+}
+
+
+int ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino,
+ struct ext4_iloc *iloc)
+{
+ return __ext4_get_inode_loc(sb, ino, iloc, 0, NULL);
}
static bool ext4_should_enable_dax(struct inode *inode)
@@ -4566,7 +4622,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ei = EXT4_I(inode);
iloc.bh = NULL;
- ret = __ext4_get_inode_loc(inode, &iloc, 0);
+ ret = __ext4_get_inode_loc_noinmem(inode, &iloc);
if (ret < 0)
goto bad_inode;
raw_inode = ext4_raw_inode(&iloc);
@@ -4612,10 +4668,11 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
sizeof(gen));
}
- if (!ext4_inode_csum_verify(inode, raw_inode, ei) ||
- ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) {
- ext4_error_inode_err(inode, function, line, 0, EFSBADCRC,
- "iget: checksum invalid");
+ if ((!ext4_inode_csum_verify(inode, raw_inode, ei) ||
+ ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) &&
+ (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))) {
+ ext4_error_inode_err(inode, function, line, 0,
+ EFSBADCRC, "iget: checksum invalid");
ret = -EFSBADCRC;
goto bad_inode;
}
@@ -4703,6 +4760,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
for (block = 0; block < EXT4_N_BLOCKS; block++)
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
+ ext4_fc_init_inode(&ei->vfs_inode);
/*
* Set transaction id's of transactions that have to be committed
@@ -4768,9 +4826,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
goto bad_inode;
} else if (!ext4_has_inline_data(inode)) {
/* validate the block references in the inode */
- if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- (S_ISLNK(inode->i_mode) &&
- !ext4_inode_is_fast_symlink(inode))) {
+ if (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
+ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ (S_ISLNK(inode->i_mode) &&
+ !ext4_inode_is_fast_symlink(inode)))) {
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ret = ext4_ext_check_inode(inode);
else
@@ -4971,6 +5030,12 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+ err = ext4_inode_blocks_set(handle, raw_inode, ei);
+ if (err) {
+ spin_unlock(&ei->i_raw_lock);
+ goto out_brelse;
+ }
+
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
@@ -5004,11 +5069,6 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
- err = ext4_inode_blocks_set(handle, raw_inode, ei);
- if (err) {
- spin_unlock(&ei->i_raw_lock);
- goto out_brelse;
- }
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
@@ -5149,12 +5209,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
return 0;
- err = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+ err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid);
} else {
struct ext4_iloc iloc;
- err = __ext4_get_inode_loc(inode, &iloc, 0);
+ err = __ext4_get_inode_loc_noinmem(inode, &iloc);
if (err)
return err;
/*
@@ -5278,6 +5338,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
}
+ ext4_fc_start_update(inode);
if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
(ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
handle_t *handle;
@@ -5301,6 +5362,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error) {
ext4_journal_stop(handle);
+ ext4_fc_stop_update(inode);
return error;
}
/* Update corresponding info in inode so that everything is in
@@ -5323,11 +5385,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (attr->ia_size > sbi->s_bitmap_maxbytes)
+ if (attr->ia_size > sbi->s_bitmap_maxbytes) {
+ ext4_fc_stop_update(inode);
return -EFBIG;
+ }
}
- if (!S_ISREG(inode->i_mode))
+ if (!S_ISREG(inode->i_mode)) {
+ ext4_fc_stop_update(inode);
return -EINVAL;
+ }
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
@@ -5351,7 +5417,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
rc = ext4_break_layouts(inode);
if (rc) {
up_write(&EXT4_I(inode)->i_mmap_sem);
- return rc;
+ goto err_out;
}
if (attr->ia_size != inode->i_size) {
@@ -5372,6 +5438,21 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_mtime = current_time(inode);
inode->i_ctime = inode->i_mtime;
}
+
+ if (shrink)
+ ext4_fc_track_range(inode,
+ (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
+ inode->i_sb->s_blocksize_bits,
+ (oldsize > 0 ? oldsize - 1 : 0) >>
+ inode->i_sb->s_blocksize_bits);
+ else
+ ext4_fc_track_range(
+ inode,
+ (oldsize > 0 ? oldsize - 1 : oldsize) >>
+ inode->i_sb->s_blocksize_bits,
+ (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
+ inode->i_sb->s_blocksize_bits);
+
down_write(&EXT4_I(inode)->i_data_sem);
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
@@ -5430,9 +5511,11 @@ out_mmap_sem:
rc = posix_acl_chmod(inode, inode->i_mode);
err_out:
- ext4_std_error(inode->i_sb, error);
+ if (error)
+ ext4_std_error(inode->i_sb, error);
if (!error)
error = rc;
+ ext4_fc_stop_update(inode);
return error;
}
@@ -5614,6 +5697,8 @@ int ext4_mark_iloc_dirty(handle_t *handle,
put_bh(iloc->bh);
return -EIO;
}
+ ext4_fc_track_inode(inode);
+
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
@@ -5937,6 +6022,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if (IS_ERR(handle))
return PTR_ERR(handle);
+ ext4_fc_mark_ineligible(inode->i_sb,
+ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
err = ext4_mark_inode_dirty(handle, inode);
ext4_handle_sync(handle);
ext4_journal_stop(handle);
@@ -5977,9 +6064,17 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
if (err)
goto out_ret;
+ /*
+ * On data journalling we skip straight to the transaction handle:
+ * there's no delalloc; page truncated will be checked later; the
+ * early return w/ all buffers mapped (calculates size/len) can't
+ * be used; and there's no dioread_nolock, so only ext4_get_block.
+ */
+ if (ext4_should_journal_data(inode))
+ goto retry_alloc;
+
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
- !ext4_should_journal_data(inode) &&
!ext4_nonda_switch(inode->i_sb)) {
do {
err = block_page_mkwrite(vma, vmf,
@@ -6005,6 +6100,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
/*
* Return if we have all the buffers mapped. This avoids the need to do
* journal_start/journal_stop which can block and take a long time
+ *
+ * This cannot be done for data journalling, as we have to add the
+ * inode to the transaction's list to writeprotect pages on commit.
*/
if (page_has_buffers(page)) {
if (!ext4_walk_page_buffers(NULL, page_buffers(page),
@@ -6029,16 +6127,42 @@ retry_alloc:
ret = VM_FAULT_SIGBUS;
goto out;
}
- err = block_page_mkwrite(vma, vmf, get_block);
- if (!err && ext4_should_journal_data(inode)) {
- if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
- PAGE_SIZE, NULL, do_journal_get_write_access)) {
- unlock_page(page);
+ /*
+ * Data journalling can't use block_page_mkwrite() because it
+ * will set_buffer_dirty() before do_journal_get_write_access()
+ * thus might hit warning messages for dirty metadata buffers.
+ */
+ if (!ext4_should_journal_data(inode)) {
+ err = block_page_mkwrite(vma, vmf, get_block);
+ } else {
+ lock_page(page);
+ size = i_size_read(inode);
+ /* Page got truncated from under us? */
+ if (page->mapping != mapping || page_offset(page) > size) {
+ ret = VM_FAULT_NOPAGE;
+ goto out_error;
+ }
+
+ if (page->index == size >> PAGE_SHIFT)
+ len = size & ~PAGE_MASK;
+ else
+ len = PAGE_SIZE;
+
+ err = __block_write_begin(page, 0, len, ext4_get_block);
+ if (!err) {
ret = VM_FAULT_SIGBUS;
- ext4_journal_stop(handle);
- goto out;
+ if (ext4_walk_page_buffers(handle, page_buffers(page),
+ 0, len, NULL, do_journal_get_write_access))
+ goto out_error;
+ if (ext4_walk_page_buffers(handle, page_buffers(page),
+ 0, len, NULL, write_end_fn))
+ goto out_error;
+ if (ext4_jbd2_inode_add_write(handle, inode, 0, len))
+ goto out_error;
+ ext4_set_inode_state(inode, EXT4_STATE_JDATA);
+ } else {
+ unlock_page(page);
}
- ext4_set_inode_state(inode, EXT4_STATE_JDATA);
}
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -6049,6 +6173,10 @@ out:
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(inode->i_sb);
return ret;
+out_error:
+ unlock_page(page);
+ ext4_journal_stop(handle);
+ goto out;
}
vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)