From 6db3c1575a750fd417a70e0178bdf6efa0dd5037 Mon Sep 17 00:00:00 2001 From: "Luis Henriques (SUSE)" Date: Wed, 17 Jul 2024 18:22:20 +0100 Subject: ext4: fix fast commit inode enqueueing during a full journal commit When a full journal commit is on-going, any fast commit has to be enqueued into a different queue: FC_Q_STAGING instead of FC_Q_MAIN. This enqueueing is done only once, i.e. if an inode is already queued in a previous fast commit entry it won't be enqueued again. However, if a full commit starts _after_ the inode is enqueued into FC_Q_MAIN, the next fast commit needs to be done into FC_Q_STAGING. And this is not being done in function ext4_fc_track_template(). This patch fixes the issue by re-enqueuing an inode into the STAGING queue during the fast commit clean-up callback when doing a full commit. However, to prevent a race with a fast-commit, the clean-up callback has to be called with the journal locked. This bug was found using fstest generic/047. This test creates several 32k bytes files, sync'ing each of them after it's creation, and then shutting down the filesystem. Some data may be loss in this operation; for example a file may have it's size truncated to zero. Suggested-by: Jan Kara Signed-off-by: Luis Henriques (SUSE) Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240717172220.14201-1-luis.henriques@linux.dev Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/ext4/fast_commit.c') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 3926a05eceee..df71fd5b1fed 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1288,8 +1288,21 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) list_del_init(&iter->i_fc_list); ext4_clear_inode_state(&iter->vfs_inode, EXT4_STATE_FC_COMMITTING); - if (tid_geq(tid, iter->i_sync_tid)) + if (tid_geq(tid, iter->i_sync_tid)) { ext4_fc_reset_inode(&iter->vfs_inode); + } else if (full) { + /* + * We are called after a full commit, inode has been + * modified while the commit was running. Re-enqueue + * the inode into STAGING, which will then be splice + * back into MAIN. This cannot happen during + * fastcommit because the journal is locked all the + * time in that case (and tid doesn't increase so + * tid check above isn't reliable). + */ + list_add_tail(&EXT4_I(&iter->vfs_inode)->i_fc_list, + &sbi->s_fc_q[FC_Q_STAGING]); + } /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ smp_mb(); #if (BITS_PER_LONG < 64) -- cgit From ebc4b2c1ac92fc0f8bf3f5a9c285a871d5084a6b Mon Sep 17 00:00:00 2001 From: "Luis Henriques (SUSE)" Date: Wed, 24 Jul 2024 17:11:18 +0100 Subject: ext4: fix incorrect tid assumption in ext4_fc_mark_ineligible() Function jbd2_journal_shrink_checkpoint_list() assumes that '0' is not a valid value for transaction IDs, which is incorrect. Furthermore, the sbi->s_fc_ineligible_tid handling also makes the same assumption by being initialised to '0'. Fortunately, the sb flag EXT4_MF_FC_INELIGIBLE can be used to check whether sbi->s_fc_ineligible_tid has been previously set instead of comparing it with '0'. Signed-off-by: Luis Henriques (SUSE) Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240724161119.13448-5-luis.henriques@linux.dev Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/ext4/fast_commit.c') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index df71fd5b1fed..326c16a4e51e 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -339,22 +339,29 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl { struct ext4_sb_info *sbi = EXT4_SB(sb); tid_t tid; + bool has_transaction = true; + bool is_ineligible; if (ext4_fc_disabled(sb)) return; - ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); if (handle && !IS_ERR(handle)) tid = handle->h_transaction->t_tid; else { read_lock(&sbi->s_journal->j_state_lock); - tid = sbi->s_journal->j_running_transaction ? - sbi->s_journal->j_running_transaction->t_tid : 0; + if (sbi->s_journal->j_running_transaction) + tid = sbi->s_journal->j_running_transaction->t_tid; + else + has_transaction = false; read_unlock(&sbi->s_journal->j_state_lock); } spin_lock(&sbi->s_fc_lock); - if (tid_gt(tid, sbi->s_fc_ineligible_tid)) + is_ineligible = ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + if (has_transaction && + (!is_ineligible || + (is_ineligible && tid_gt(tid, sbi->s_fc_ineligible_tid)))) sbi->s_fc_ineligible_tid = tid; + ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); spin_unlock(&sbi->s_fc_lock); WARN_ON(reason >= EXT4_FC_REASON_MAX); sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; -- cgit From f7d1331f16a869c76a5102caebb58e840e1d509c Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 22 Aug 2024 10:35:34 +0800 Subject: ext4: get rid of ppath in ext4_ext_insert_extent() The use of path and ppath is now very confusing, so to make the code more readable, pass path between functions uniformly, and get rid of ppath. To get rid of the ppath in ext4_ext_insert_extent(), the following is done here: * Free the extents path when an error is encountered. * Its caller needs to update ppath if it uses ppath. * Free path when npath is used, free npath when it is not used. * The got_allocated_blocks label in ext4_ext_map_blocks() does not update err now, so err is updated to 0 if the err returned by ext4_ext_search_right() is greater than 0 and is about to enter got_allocated_blocks. No functional changes. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Ojaswin Mujoo Tested-by: Ojaswin Mujoo Link: https://patch.msgid.link/20240822023545.1994557-15-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4/fast_commit.c') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 326c16a4e51e..afcb2e5cdac7 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1803,12 +1803,12 @@ static int ext4_fc_replay_add_range(struct super_block *sb, if (ext4_ext_is_unwritten(ex)) ext4_ext_mark_unwritten(&newex); down_write(&EXT4_I(inode)->i_data_sem); - ret = ext4_ext_insert_extent( - NULL, inode, &path, &newex, 0); + path = ext4_ext_insert_extent(NULL, inode, + path, &newex, 0); up_write((&EXT4_I(inode)->i_data_sem)); - ext4_free_ext_path(path); - if (ret) + if (IS_ERR(path)) goto out; + ext4_free_ext_path(path); goto next; } -- cgit From 2352e3e461926b59f01c1e39fbb0494891cff997 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 22 Aug 2024 10:35:44 +0800 Subject: ext4: make some fast commit functions reuse extents path The ext4_find_extent() can update the extent path so that it does not have to allocate and free the path repeatedly, thus reducing the consumption of memory allocation and freeing in the following functions: ext4_ext_clear_bb ext4_ext_replay_set_iblocks ext4_fc_replay_add_range ext4_fc_set_bitmaps_and_counters No functional changes. Note that ext4_find_extent() does not support error pointers, so in this case set path to NULL first. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Ojaswin Mujoo Tested-by: Ojaswin Mujoo Link: https://patch.msgid.link/20240822023545.1994557-25-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/ext4/fast_commit.c') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index afcb2e5cdac7..eaa5f5b51f50 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1792,7 +1792,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, if (ret == 0) { /* Range is not mapped */ - path = ext4_find_extent(inode, cur, NULL, 0); + path = ext4_find_extent(inode, cur, path, 0); if (IS_ERR(path)) goto out; memset(&newex, 0, sizeof(newex)); @@ -1808,7 +1808,6 @@ static int ext4_fc_replay_add_range(struct super_block *sb, up_write((&EXT4_I(inode)->i_data_sem)); if (IS_ERR(path)) goto out; - ext4_free_ext_path(path); goto next; } @@ -1856,6 +1855,7 @@ next: ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); out: + ext4_free_ext_path(path); iput(inode); return 0; } @@ -1956,12 +1956,13 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) break; if (ret > 0) { - path = ext4_find_extent(inode, map.m_lblk, NULL, 0); + path = ext4_find_extent(inode, map.m_lblk, path, 0); if (!IS_ERR(path)) { for (j = 0; j < path->p_depth; j++) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, true); - ext4_free_ext_path(path); + } else { + path = NULL; } cur += ret; ext4_mb_mark_bb(inode->i_sb, map.m_pblk, @@ -1972,6 +1973,8 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) } iput(inode); } + + ext4_free_ext_path(path); } /* -- cgit