summaryrefslogtreecommitdiff
path: root/fs/jbd2/commit.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r--fs/jbd2/commit.c345
1 files changed, 187 insertions, 158 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3c1c31321d9b..7203d2d2624d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* linux/fs/jbd2/commit.c
*
@@ -5,10 +6,6 @@
*
* Copyright 1998 Red Hat corp --- All Rights Reserved
*
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
* Journal commit routines for the generic filesystem journaling code;
* part of the ext2fs journaling system.
*/
@@ -60,32 +57,30 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
* So here, we have a buffer which has just come off the forget list. Look to
* see if we can strip all buffers from the backing page.
*
- * Called under lock_journal(), and possibly under journal_datalist_lock. The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under j_list_lock. The caller provided us with a ref against the
+ * buffer, and we drop that here.
*/
static void release_buffer_page(struct buffer_head *bh)
{
- struct page *page;
+ struct folio *folio;
if (buffer_dirty(bh))
goto nope;
if (atomic_read(&bh->b_count) != 1)
goto nope;
- page = bh->b_page;
- if (!page)
- goto nope;
- if (page->mapping)
+ folio = bh->b_folio;
+ if (folio->mapping)
goto nope;
/* OK, it's a truncated page */
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto nope;
- get_page(page);
+ folio_get(folio);
__brelse(bh);
- try_to_free_buffers(page);
- unlock_page(page);
- put_page(page);
+ try_to_free_buffers(folio);
+ folio_unlock(folio);
+ folio_put(folio);
return;
nope:
@@ -104,7 +99,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
h->h_chksum_type = 0;
h->h_chksum_size = 0;
h->h_chksum[0] = 0;
- csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
+ csum = jbd2_chksum(j->j_csum_seed, bh->b_data, j->j_blocksize);
h->h_chksum[0] = cpu_to_be32(csum);
}
@@ -123,8 +118,8 @@ static int journal_submit_commit_record(journal_t *journal,
{
struct commit_header *tmp;
struct buffer_head *bh;
- int ret;
- struct timespec64 now = current_kernel_time64();
+ struct timespec64 now;
+ blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS;
*cbh = NULL;
@@ -137,6 +132,7 @@ static int journal_submit_commit_record(journal_t *journal,
return 1;
tmp = (struct commit_header *)bh->b_data;
+ ktime_get_coarse_real_ts64(&now);
tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
@@ -155,13 +151,11 @@ static int journal_submit_commit_record(journal_t *journal,
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_has_feature_async_commit(journal))
- ret = submit_bh(REQ_OP_WRITE,
- REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh);
- else
- ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+ write_flags |= REQ_PREFLUSH | REQ_FUA;
+ submit_bh(write_flags, bh);
*cbh = bh;
- return ret;
+ return 0;
}
/*
@@ -183,25 +177,28 @@ static int journal_wait_on_commit_record(journal_t *journal,
return ret;
}
-/*
- * write the filemap data using writepage() address_space_operations.
- * We don't do block allocation here even for delalloc. We don't
- * use writepages() because with dealyed allocation we may be doing
- * block allocation in writepages().
- */
-static int journal_submit_inode_data_buffers(struct address_space *mapping)
+/* Send all the data buffers related to an inode */
+int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
- int ret;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = mapping->nrpages * 2,
- .range_start = 0,
- .range_end = i_size_read(mapping->host),
- };
-
- ret = generic_writepages(mapping, &wbc);
- return ret;
+ if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
+ return 0;
+
+ trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
+ return journal->j_submit_inode_data_buffers(jinode);
+
+}
+EXPORT_SYMBOL(jbd2_submit_inode_data);
+
+int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
+{
+ if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
+ !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
+ return 0;
+ return filemap_fdatawait_range_keep_errors(
+ jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
+ jinode->i_dirty_end);
}
+EXPORT_SYMBOL(jbd2_wait_inode_data);
/*
* Submit all the data buffers of inode associated with the transaction to
@@ -216,25 +213,20 @@ static int journal_submit_data_buffers(journal_t *journal,
{
struct jbd2_inode *jinode;
int err, ret = 0;
- struct address_space *mapping;
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
- mapping = jinode->i_vfs_inode->i_mapping;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- /*
- * submit the inode data buffers. We use writepage
- * instead of writepages. Because writepages can do
- * block allocation with delalloc. We need to write
- * only allocated blocks here.
- */
+ /* submit the inode data buffers. */
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
- err = journal_submit_inode_data_buffers(mapping);
- if (!ret)
- ret = err;
+ if (journal->j_submit_inode_data_buffers) {
+ err = journal->j_submit_inode_data_buffers(jinode);
+ if (!ret)
+ ret = err;
+ }
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
@@ -245,6 +237,15 @@ static int journal_submit_data_buffers(journal_t *journal,
return ret;
}
+int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+
+ return filemap_fdatawait_range_keep_errors(mapping,
+ jinode->i_dirty_start,
+ jinode->i_dirty_end);
+}
+
/*
* Wait for data submitted for writeout, refile inodes to proper
* transaction if needed.
@@ -263,10 +264,13 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- err = filemap_fdatawait_keep_errors(
- jinode->i_vfs_inode->i_mapping);
- if (!ret)
- ret = err;
+ /* wait for the inode data buffers writeout. */
+ if (journal->j_finish_inode_data_buffers) {
+ err = journal->j_finish_inode_data_buffers(jinode);
+ if (!ret)
+ ret = err;
+ }
+ cond_resched();
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
smp_mb();
@@ -284,6 +288,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
}
}
spin_unlock(&journal->j_list_lock);
@@ -293,14 +299,12 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
{
- struct page *page = bh->b_page;
char *addr;
__u32 checksum;
- addr = kmap_atomic(page);
- checksum = crc32_be(crc32_sum,
- (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
- kunmap_atomic(addr);
+ addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
+ checksum = crc32_be(crc32_sum, addr, bh->b_size);
+ kunmap_local(addr);
return checksum;
}
@@ -317,7 +321,6 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct buffer_head *bh, __u32 sequence)
{
journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
- struct page *page = bh->b_page;
__u8 *addr;
__u32 csum32;
__be32 seq;
@@ -326,11 +329,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
return;
seq = cpu_to_be32(sequence);
- addr = kmap_atomic(page);
- csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
- csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
- bh->b_size);
- kunmap_atomic(addr);
+ addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
+ csum32 = jbd2_chksum(j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
+ csum32 = jbd2_chksum(csum32, addr, bh->b_size);
+ kunmap_local(addr);
if (jbd2_has_feature_csum3(j))
tag3->t_checksum = cpu_to_be32(csum32);
@@ -351,7 +353,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
struct buffer_head *descriptor;
struct buffer_head **wbuf = journal->j_wbuf;
int bufs;
- int flags;
+ int escape;
int err;
unsigned long long blocknr;
ktime_t start_time;
@@ -384,7 +386,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
- jbd_debug(3, "super block updated\n");
+ jbd2_debug(3, "super block updated\n");
mutex_lock_io(&journal->j_checkpoint_mutex);
/*
* We hold j_checkpoint_mutex so tail cannot change under us.
@@ -394,23 +396,46 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
jbd2_journal_update_sb_log_tail(journal,
journal->j_tail_sequence,
- journal->j_tail,
- REQ_SYNC);
+ journal->j_tail, 0);
mutex_unlock(&journal->j_checkpoint_mutex);
} else {
- jbd_debug(3, "superblock not updated\n");
+ jbd2_debug(3, "superblock not updated\n");
}
J_ASSERT(journal->j_running_transaction != NULL);
J_ASSERT(journal->j_committing_transaction == NULL);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
+ while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&journal->j_fc_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ write_unlock(&journal->j_state_lock);
+ schedule();
+ write_lock(&journal->j_state_lock);
+ finish_wait(&journal->j_fc_wait, &wait);
+ /*
+ * TODO: by blocking fast commits here, we are increasing
+ * fsync() latency slightly. Strictly speaking, we don't need
+ * to block fast commits until the transaction enters T_FLUSH
+ * state. So an optimization is possible where we block new fast
+ * commits here and wait for existing ones to complete
+ * just before we enter T_FLUSH. That way, the existing fast
+ * commits and this full commit can proceed parallely.
+ */
+ }
+ write_unlock(&journal->j_state_lock);
+
commit_transaction = journal->j_running_transaction;
trace_jbd2_start_commit(journal, commit_transaction);
- jbd_debug(1, "JBD2: starting commit of transaction %d\n",
+ jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
commit_transaction->t_tid);
write_lock(&journal->j_state_lock);
+ journal->j_fc_off = 0;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
commit_transaction->t_state = T_LOCKED;
@@ -425,22 +450,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
stats.run.rs_locked);
- spin_lock(&commit_transaction->t_handle_lock);
- while (atomic_read(&commit_transaction->t_updates)) {
- DEFINE_WAIT(wait);
+ // waits for any t_updates to finish
+ jbd2_journal_wait_updates(journal);
- prepare_to_wait(&journal->j_wait_updates, &wait,
- TASK_UNINTERRUPTIBLE);
- if (atomic_read(&commit_transaction->t_updates)) {
- spin_unlock(&commit_transaction->t_handle_lock);
- write_unlock(&journal->j_state_lock);
- schedule();
- write_lock(&journal->j_state_lock);
- spin_lock(&commit_transaction->t_handle_lock);
- }
- finish_wait(&journal->j_wait_updates, &wait);
- }
- spin_unlock(&commit_transaction->t_handle_lock);
+ commit_transaction->t_state = T_SWITCH;
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers);
@@ -460,6 +473,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* has reserved. This is consistent with the existing behaviour
* that multiple jbd2_journal_get_write_access() calls to the same
* buffer are perfectly permissible.
+ * We use journal->j_state_lock here to serialize processing of
+ * t_reserved_list with eviction of buffers from journal_unmap_buffer().
*/
while (commit_transaction->t_reserved_list) {
jh = commit_transaction->t_reserved_list;
@@ -471,24 +486,25 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (jh->b_committed_data) {
struct buffer_head *bh = jh2bh(jh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
jbd2_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
}
jbd2_journal_refile_buffer(journal, jh);
}
+ write_unlock(&journal->j_state_lock);
/*
* Now try to drop any written-back buffers from the journal's
* checkpoint lists. We do this *before* commit because it potentially
* frees some memory
*/
spin_lock(&journal->j_list_lock);
- __jbd2_journal_clean_checkpoint_list(journal, false);
+ __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
spin_unlock(&journal->j_list_lock);
- jbd_debug(3, "JBD2: commit phase 1\n");
+ jbd2_debug(3, "JBD2: commit phase 1\n");
/*
* Clear revoked flag to reflect there is no revoked buffers
@@ -501,6 +517,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
jbd2_journal_switch_revoke_table(journal);
+ write_lock(&journal->j_state_lock);
/*
* Reserved credits cannot be claimed anymore, free them
*/
@@ -517,10 +534,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
journal->j_running_transaction = NULL;
start_time = ktime_get();
commit_transaction->t_log_start = journal->j_head;
- wake_up(&journal->j_wait_transaction_locked);
+ wake_up_all(&journal->j_wait_transaction_locked);
write_unlock(&journal->j_state_lock);
- jbd_debug(3, "JBD2: commit phase 2a\n");
+ jbd2_debug(3, "JBD2: commit phase 2a\n");
/*
* Now start flushing things to disk, in the order they appear
@@ -533,7 +550,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
blk_start_plug(&plug);
jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
- jbd_debug(3, "JBD2: commit phase 2b\n");
+ jbd2_debug(3, "JBD2: commit phase 2b\n");
/*
* Way to go: we have now written out all of the data for a
@@ -548,14 +565,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_logging = jiffies;
stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
stats.run.rs_logging);
- stats.run.rs_blocks =
- atomic_read(&commit_transaction->t_outstanding_credits);
+ stats.run.rs_blocks = commit_transaction->t_nr_buffers;
stats.run.rs_blocks_logged = 0;
J_ASSERT(commit_transaction->t_nr_buffers <=
atomic_read(&commit_transaction->t_outstanding_credits));
- err = 0;
bufs = 0;
descriptor = NULL;
while (commit_transaction->t_buffers) {
@@ -590,7 +605,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (!descriptor) {
J_ASSERT (bufs == 0);
- jbd_debug(4, "JBD2: get descriptor\n");
+ jbd2_debug(4, "JBD2: get descriptor\n");
descriptor = jbd2_journal_get_descriptor_buffer(
commit_transaction,
@@ -600,7 +615,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
continue;
}
- jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
+ jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
(unsigned long long)descriptor->b_blocknr,
descriptor->b_data);
tagp = &descriptor->b_data[sizeof(journal_header_t)];
@@ -630,8 +645,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/*
* start_this_handle() uses t_outstanding_credits to determine
- * the free space in the log, but this counter is changed
- * by jbd2_journal_next_log_block() also.
+ * the free space in the log.
*/
atomic_dec(&commit_transaction->t_outstanding_credits);
@@ -646,19 +660,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
set_bit(BH_JWrite, &jh2bh(jh)->b_state);
JBUFFER_TRACE(jh, "ph3: write metadata");
- flags = jbd2_journal_write_metadata_buffer(commit_transaction,
+ escape = jbd2_journal_write_metadata_buffer(commit_transaction,
jh, &wbuf[bufs], blocknr);
- if (flags < 0) {
- jbd2_journal_abort(journal, flags);
- continue;
- }
jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
/* Record the new block's tag in the current descriptor
buffer */
tag_flag = 0;
- if (flags & 1)
+ if (escape)
tag_flag |= JBD2_FLAG_ESCAPE;
if (!first_tag)
tag_flag |= JBD2_FLAG_SAME_UUID;
@@ -686,18 +696,21 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_buffers == NULL ||
space_left < tag_bytes + 16 + csum_size) {
- jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
+ jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
/* Write an end-of-descriptor marker before
submitting the IOs. "tag" still points to
the last tag we set up. */
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
-
- jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io:
+ if (descriptor)
+ jbd2_descriptor_block_csum_set(journal,
+ descriptor);
+
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
+
/*
* Compute checksum.
*/
@@ -710,10 +723,10 @@ start_journal_io:
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
- submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+ submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS,
+ bh);
}
cond_resched();
- stats.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next
time round the loop. */
@@ -725,10 +738,8 @@ start_journal_io:
err = journal_finish_inode_data_buffers(journal, commit_transaction);
if (err) {
printk(KERN_WARNING
- "JBD2: Detected IO errors while flushing file data "
- "on %s\n", journal->j_devname);
- if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
- jbd2_journal_abort(journal, err);
+ "JBD2: Detected IO errors %d while flushing file data on %s\n",
+ err, journal->j_devname);
err = 0;
}
@@ -749,29 +760,29 @@ start_journal_io:
if (first_block < journal->j_tail)
freed += journal->j_last - journal->j_first;
/* Update tail only if we free significant amount of space */
- if (freed < journal->j_maxlen / 4)
+ if (freed < journal->j_max_transaction_buffers)
update_tail = 0;
}
J_ASSERT(commit_transaction->t_state == T_COMMIT);
commit_transaction->t_state = T_COMMIT_DFLUSH;
write_unlock(&journal->j_state_lock);
- /*
+ /*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
- * the commit record
+ * the commit record and update the journal tail sequence.
*/
- if (commit_transaction->t_need_data_flush &&
+ if ((commit_transaction->t_need_data_flush || update_tail) &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
+ blkdev_issue_flush(journal->j_fs_dev);
/* Done it all: now write the commit record asynchronously. */
if (jbd2_has_feature_async_commit(journal)) {
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
}
blk_finish_plug(&plug);
@@ -787,7 +798,7 @@ start_journal_io:
so we incur less scheduling load.
*/
- jbd_debug(3, "JBD2: commit phase 3\n");
+ jbd2_debug(3, "JBD2: commit phase 3\n");
while (!list_empty(&io_bufs)) {
struct buffer_head *bh = list_entry(io_bufs.prev,
@@ -800,6 +811,7 @@ start_journal_io:
if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
jbd2_unfile_log_bh(bh);
+ stats.run.rs_blocks_logged++;
/*
* The list contains temporary buffer heads created by
@@ -829,7 +841,7 @@ start_journal_io:
J_ASSERT (commit_transaction->t_shadow_list == NULL);
- jbd_debug(3, "JBD2: commit phase 4\n");
+ jbd2_debug(3, "JBD2: commit phase 4\n");
/* Here we wait for the revoke record and descriptor record buffers */
while (!list_empty(&log_bufs)) {
@@ -845,6 +857,7 @@ start_journal_io:
BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
clear_buffer_jwrite(bh);
jbd2_unfile_log_bh(bh);
+ stats.run.rs_blocks_logged++;
__brelse(bh); /* One for getblk */
/* AKPM: bforget here */
}
@@ -852,7 +865,7 @@ start_journal_io:
if (err)
jbd2_journal_abort(journal, err);
- jbd_debug(3, "JBD2: commit phase 5\n");
+ jbd2_debug(3, "JBD2: commit phase 5\n");
write_lock(&journal->j_state_lock);
J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -862,18 +875,22 @@ start_journal_io:
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
}
if (cbh)
err = journal_wait_on_commit_record(journal, cbh);
+ stats.run.rs_blocks_logged++;
if (jbd2_has_feature_async_commit(journal) &&
journal->j_flags & JBD2_BARRIER) {
- blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
+ blkdev_issue_flush(journal->j_dev);
}
if (err)
jbd2_journal_abort(journal, err);
+ WARN_ON_ONCE(
+ atomic_read(&commit_transaction->t_outstanding_credits) < 0);
+
/*
* Now disk caches for filesystem device are flushed so we are safe to
* erase checkpointed transactions from the log by updating journal
@@ -887,7 +904,7 @@ start_journal_io:
transaction can be removed from any checkpoint list it was on
before. */
- jbd_debug(3, "JBD2: commit phase 6\n");
+ jbd2_debug(3, "JBD2: commit phase 6\n");
J_ASSERT(list_empty(&commit_transaction->t_inode_list));
J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -904,6 +921,7 @@ restart_loop:
transaction_t *cp_transaction;
struct buffer_head *bh;
int try_to_free = 0;
+ bool drop_ref;
jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
@@ -913,7 +931,7 @@ restart_loop:
* done with it.
*/
get_bh(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
/*
@@ -958,29 +976,34 @@ restart_loop:
* it. */
/*
- * A buffer which has been freed while still being journaled by
- * a previous transaction.
- */
- if (buffer_freed(bh)) {
+ * A buffer which has been freed while still being journaled
+ * by a previous transaction, refile the buffer to BJ_Forget of
+ * the running transaction. If the just committed transaction
+ * contains "add to orphan" operation, we can completely
+ * invalidate the buffer now. We are rather through in that
+ * since the buffer may be still accessible when blocksize <
+ * pagesize and it is attached to the last partial page.
+ */
+ if (buffer_freed(bh) && !jh->b_next_transaction) {
+ struct address_space *mapping;
+
+ clear_buffer_freed(bh);
+ clear_buffer_jbddirty(bh);
+
/*
- * If the running transaction is the one containing
- * "add to orphan" operation (b_next_transaction !=
- * NULL), we have to wait for that transaction to
- * commit before we can really get rid of the buffer.
- * So just clear b_modified to not confuse transaction
- * credit accounting and refile the buffer to
- * BJ_Forget of the running transaction. If the just
- * committed transaction contains "add to orphan"
- * operation, we can completely invalidate the buffer
- * now. We are rather through in that since the
- * buffer may be still accessible when blocksize <
- * pagesize and it is attached to the last partial
- * page.
+ * Block device buffers need to stay mapped all the
+ * time, so it is enough to clear buffer_jbddirty and
+ * buffer_freed bits. For the file mapping buffers (i.e.
+ * journalled data) we need to unmap buffer and clear
+ * more bits. We also need to be careful about the check
+ * because the data page mapping can get cleared under
+ * our hands. Note that if mapping == NULL, we don't
+ * need to make buffer unmapped because the page is
+ * already detached from the mapping and buffers cannot
+ * get reused.
*/
- jh->b_modified = 0;
- if (!jh->b_next_transaction) {
- clear_buffer_freed(bh);
- clear_buffer_jbddirty(bh);
+ mapping = READ_ONCE(bh->b_folio->mapping);
+ if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
clear_buffer_mapped(bh);
clear_buffer_new(bh);
clear_buffer_req(bh);
@@ -1008,8 +1031,10 @@ restart_loop:
try_to_free = 1;
}
JBUFFER_TRACE(jh, "refile or unfile buffer");
- __jbd2_journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
+ drop_ref = __jbd2_journal_refile_buffer(jh);
+ spin_unlock(&jh->b_state_lock);
+ if (drop_ref)
+ jbd2_journal_put_journal_head(jh);
if (try_to_free)
release_buffer_page(bh); /* Drops bh reference */
else
@@ -1056,7 +1081,7 @@ restart_loop:
/* Done with this transaction! */
- jbd_debug(3, "JBD2: commit phase 7\n");
+ jbd2_debug(3, "JBD2: commit phase 7\n");
J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
@@ -1076,7 +1101,7 @@ restart_loop:
commit_transaction->t_state = T_COMMIT_CALLBACK;
J_ASSERT(commit_transaction == journal->j_committing_transaction);
- journal->j_commit_sequence = commit_transaction->t_tid;
+ WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid);
journal->j_committing_transaction = NULL;
commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
@@ -1094,23 +1119,27 @@ restart_loop:
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
+ if (journal->j_fc_cleanup_callback)
+ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
trace_jbd2_end_commit(journal, commit_transaction);
- jbd_debug(1, "JBD2: commit %d complete, head %d\n",
+ jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
+ journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
spin_lock(&journal->j_list_lock);
commit_transaction->t_state = T_FINISHED;
/* Check if the transaction can be dropped now that we are finished */
- if (commit_transaction->t_checkpoint_list == NULL &&
- commit_transaction->t_checkpoint_io_list == NULL) {
+ if (commit_transaction->t_checkpoint_list == NULL) {
__jbd2_journal_drop_transaction(journal, commit_transaction);
jbd2_journal_free_transaction(commit_transaction);
}
spin_unlock(&journal->j_list_lock);
write_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_done_commit);
+ wake_up(&journal->j_fc_wait);
/*
* Calculate overall stats