diff options
Diffstat (limited to 'fs/jbd2/commit.c')
| -rw-r--r-- | fs/jbd2/commit.c | 345 |
1 files changed, 187 insertions, 158 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 3c1c31321d9b..7203d2d2624d 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/commit.c * @@ -5,10 +6,6 @@ * * Copyright 1998 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Journal commit routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */ @@ -60,32 +57,30 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) * So here, we have a buffer which has just come off the forget list. Look to * see if we can strip all buffers from the backing page. * - * Called under lock_journal(), and possibly under journal_datalist_lock. The - * caller provided us with a ref against the buffer, and we drop that here. + * Called under j_list_lock. The caller provided us with a ref against the + * buffer, and we drop that here. */ static void release_buffer_page(struct buffer_head *bh) { - struct page *page; + struct folio *folio; if (buffer_dirty(bh)) goto nope; if (atomic_read(&bh->b_count) != 1) goto nope; - page = bh->b_page; - if (!page) - goto nope; - if (page->mapping) + folio = bh->b_folio; + if (folio->mapping) goto nope; /* OK, it's a truncated page */ - if (!trylock_page(page)) + if (!folio_trylock(folio)) goto nope; - get_page(page); + folio_get(folio); __brelse(bh); - try_to_free_buffers(page); - unlock_page(page); - put_page(page); + try_to_free_buffers(folio); + folio_unlock(folio); + folio_put(folio); return; nope: @@ -104,7 +99,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) h->h_chksum_type = 0; h->h_chksum_size = 0; h->h_chksum[0] = 0; - csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); + csum = jbd2_chksum(j->j_csum_seed, bh->b_data, j->j_blocksize); h->h_chksum[0] = cpu_to_be32(csum); } @@ -123,8 +118,8 @@ static int journal_submit_commit_record(journal_t *journal, { struct commit_header *tmp; struct buffer_head *bh; - int ret; - struct timespec64 now = current_kernel_time64(); + struct timespec64 now; + blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS; *cbh = NULL; @@ -137,6 +132,7 @@ static int journal_submit_commit_record(journal_t *journal, return 1; tmp = (struct commit_header *)bh->b_data; + ktime_get_coarse_real_ts64(&now); tmp->h_commit_sec = cpu_to_be64(now.tv_sec); tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); @@ -155,13 +151,11 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) - ret = submit_bh(REQ_OP_WRITE, - REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh); - else - ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + write_flags |= REQ_PREFLUSH | REQ_FUA; + submit_bh(write_flags, bh); *cbh = bh; - return ret; + return 0; } /* @@ -183,25 +177,28 @@ static int journal_wait_on_commit_record(journal_t *journal, return ret; } -/* - * write the filemap data using writepage() address_space_operations. - * We don't do block allocation here even for delalloc. We don't - * use writepages() because with dealyed allocation we may be doing - * block allocation in writepages(). - */ -static int journal_submit_inode_data_buffers(struct address_space *mapping) +/* Send all the data buffers related to an inode */ +int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode) { - int ret; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = mapping->nrpages * 2, - .range_start = 0, - .range_end = i_size_read(mapping->host), - }; - - ret = generic_writepages(mapping, &wbc); - return ret; + if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) + return 0; + + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); + return journal->j_submit_inode_data_buffers(jinode); + +} +EXPORT_SYMBOL(jbd2_submit_inode_data); + +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) +{ + if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || + !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) + return 0; + return filemap_fdatawait_range_keep_errors( + jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, + jinode->i_dirty_end); } +EXPORT_SYMBOL(jbd2_wait_inode_data); /* * Submit all the data buffers of inode associated with the transaction to @@ -216,25 +213,20 @@ static int journal_submit_data_buffers(journal_t *journal, { struct jbd2_inode *jinode; int err, ret = 0; - struct address_space *mapping; spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { if (!(jinode->i_flags & JI_WRITE_DATA)) continue; - mapping = jinode->i_vfs_inode->i_mapping; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - /* - * submit the inode data buffers. We use writepage - * instead of writepages. Because writepages can do - * block allocation with delalloc. We need to write - * only allocated blocks here. - */ + /* submit the inode data buffers. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = journal_submit_inode_data_buffers(mapping); - if (!ret) - ret = err; + if (journal->j_submit_inode_data_buffers) { + err = journal->j_submit_inode_data_buffers(jinode); + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); jinode->i_flags &= ~JI_COMMIT_RUNNING; @@ -245,6 +237,15 @@ static int journal_submit_data_buffers(journal_t *journal, return ret; } +int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) +{ + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; + + return filemap_fdatawait_range_keep_errors(mapping, + jinode->i_dirty_start, + jinode->i_dirty_end); +} + /* * Wait for data submitted for writeout, refile inodes to proper * transaction if needed. @@ -263,10 +264,13 @@ static int journal_finish_inode_data_buffers(journal_t *journal, continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawait_keep_errors( - jinode->i_vfs_inode->i_mapping); - if (!ret) - ret = err; + /* wait for the inode data buffers writeout. */ + if (journal->j_finish_inode_data_buffers) { + err = journal->j_finish_inode_data_buffers(jinode); + if (!ret) + ret = err; + } + cond_resched(); spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); @@ -284,6 +288,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, &jinode->i_transaction->t_inode_list); } else { jinode->i_transaction = NULL; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; } } spin_unlock(&journal->j_list_lock); @@ -293,14 +299,12 @@ static int journal_finish_inode_data_buffers(journal_t *journal, static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) { - struct page *page = bh->b_page; char *addr; __u32 checksum; - addr = kmap_atomic(page); - checksum = crc32_be(crc32_sum, - (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); - kunmap_atomic(addr); + addr = kmap_local_folio(bh->b_folio, bh_offset(bh)); + checksum = crc32_be(crc32_sum, addr, bh->b_size); + kunmap_local(addr); return checksum; } @@ -317,7 +321,6 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, struct buffer_head *bh, __u32 sequence) { journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; - struct page *page = bh->b_page; __u8 *addr; __u32 csum32; __be32 seq; @@ -326,11 +329,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, return; seq = cpu_to_be32(sequence); - addr = kmap_atomic(page); - csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); - csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), - bh->b_size); - kunmap_atomic(addr); + addr = kmap_local_folio(bh->b_folio, bh_offset(bh)); + csum32 = jbd2_chksum(j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); + csum32 = jbd2_chksum(csum32, addr, bh->b_size); + kunmap_local(addr); if (jbd2_has_feature_csum3(j)) tag3->t_checksum = cpu_to_be32(csum32); @@ -351,7 +353,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) struct buffer_head *descriptor; struct buffer_head **wbuf = journal->j_wbuf; int bufs; - int flags; + int escape; int err; unsigned long long blocknr; ktime_t start_time; @@ -384,7 +386,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { - jbd_debug(3, "super block updated\n"); + jbd2_debug(3, "super block updated\n"); mutex_lock_io(&journal->j_checkpoint_mutex); /* * We hold j_checkpoint_mutex so tail cannot change under us. @@ -394,23 +396,46 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, - journal->j_tail, - REQ_SYNC); + journal->j_tail, 0); mutex_unlock(&journal->j_checkpoint_mutex); } else { - jbd_debug(3, "superblock not updated\n"); + jbd2_debug(3, "superblock not updated\n"); } J_ASSERT(journal->j_running_transaction != NULL); J_ASSERT(journal->j_committing_transaction == NULL); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; + while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_fc_wait, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock(&journal->j_state_lock); + schedule(); + write_lock(&journal->j_state_lock); + finish_wait(&journal->j_fc_wait, &wait); + /* + * TODO: by blocking fast commits here, we are increasing + * fsync() latency slightly. Strictly speaking, we don't need + * to block fast commits until the transaction enters T_FLUSH + * state. So an optimization is possible where we block new fast + * commits here and wait for existing ones to complete + * just before we enter T_FLUSH. That way, the existing fast + * commits and this full commit can proceed parallely. + */ + } + write_unlock(&journal->j_state_lock); + commit_transaction = journal->j_running_transaction; trace_jbd2_start_commit(journal, commit_transaction); - jbd_debug(1, "JBD2: starting commit of transaction %d\n", + jbd2_debug(1, "JBD2: starting commit of transaction %d\n", commit_transaction->t_tid); write_lock(&journal->j_state_lock); + journal->j_fc_off = 0; J_ASSERT(commit_transaction->t_state == T_RUNNING); commit_transaction->t_state = T_LOCKED; @@ -425,22 +450,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start, stats.run.rs_locked); - spin_lock(&commit_transaction->t_handle_lock); - while (atomic_read(&commit_transaction->t_updates)) { - DEFINE_WAIT(wait); + // waits for any t_updates to finish + jbd2_journal_wait_updates(journal); - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&commit_transaction->t_updates)) { - spin_unlock(&commit_transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - write_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); - } - finish_wait(&journal->j_wait_updates, &wait); - } - spin_unlock(&commit_transaction->t_handle_lock); + commit_transaction->t_state = T_SWITCH; J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= journal->j_max_transaction_buffers); @@ -460,6 +473,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) * has reserved. This is consistent with the existing behaviour * that multiple jbd2_journal_get_write_access() calls to the same * buffer are perfectly permissible. + * We use journal->j_state_lock here to serialize processing of + * t_reserved_list with eviction of buffers from journal_unmap_buffer(). */ while (commit_transaction->t_reserved_list) { jh = commit_transaction->t_reserved_list; @@ -471,24 +486,25 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (jh->b_committed_data) { struct buffer_head *bh = jh2bh(jh); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); jbd2_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); } jbd2_journal_refile_buffer(journal, jh); } + write_unlock(&journal->j_state_lock); /* * Now try to drop any written-back buffers from the journal's * checkpoint lists. We do this *before* commit because it potentially * frees some memory */ spin_lock(&journal->j_list_lock); - __jbd2_journal_clean_checkpoint_list(journal, false); + __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP); spin_unlock(&journal->j_list_lock); - jbd_debug(3, "JBD2: commit phase 1\n"); + jbd2_debug(3, "JBD2: commit phase 1\n"); /* * Clear revoked flag to reflect there is no revoked buffers @@ -501,6 +517,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + write_lock(&journal->j_state_lock); /* * Reserved credits cannot be claimed anymore, free them */ @@ -517,10 +534,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) journal->j_running_transaction = NULL; start_time = ktime_get(); commit_transaction->t_log_start = journal->j_head; - wake_up(&journal->j_wait_transaction_locked); + wake_up_all(&journal->j_wait_transaction_locked); write_unlock(&journal->j_state_lock); - jbd_debug(3, "JBD2: commit phase 2a\n"); + jbd2_debug(3, "JBD2: commit phase 2a\n"); /* * Now start flushing things to disk, in the order they appear @@ -533,7 +550,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) blk_start_plug(&plug); jbd2_journal_write_revoke_records(commit_transaction, &log_bufs); - jbd_debug(3, "JBD2: commit phase 2b\n"); + jbd2_debug(3, "JBD2: commit phase 2b\n"); /* * Way to go: we have now written out all of the data for a @@ -548,14 +565,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) stats.run.rs_logging = jiffies; stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, stats.run.rs_logging); - stats.run.rs_blocks = - atomic_read(&commit_transaction->t_outstanding_credits); + stats.run.rs_blocks = commit_transaction->t_nr_buffers; stats.run.rs_blocks_logged = 0; J_ASSERT(commit_transaction->t_nr_buffers <= atomic_read(&commit_transaction->t_outstanding_credits)); - err = 0; bufs = 0; descriptor = NULL; while (commit_transaction->t_buffers) { @@ -590,7 +605,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (!descriptor) { J_ASSERT (bufs == 0); - jbd_debug(4, "JBD2: get descriptor\n"); + jbd2_debug(4, "JBD2: get descriptor\n"); descriptor = jbd2_journal_get_descriptor_buffer( commit_transaction, @@ -600,7 +615,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) continue; } - jbd_debug(4, "JBD2: got buffer %llu (%p)\n", + jbd2_debug(4, "JBD2: got buffer %llu (%p)\n", (unsigned long long)descriptor->b_blocknr, descriptor->b_data); tagp = &descriptor->b_data[sizeof(journal_header_t)]; @@ -630,8 +645,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* * start_this_handle() uses t_outstanding_credits to determine - * the free space in the log, but this counter is changed - * by jbd2_journal_next_log_block() also. + * the free space in the log. */ atomic_dec(&commit_transaction->t_outstanding_credits); @@ -646,19 +660,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ set_bit(BH_JWrite, &jh2bh(jh)->b_state); JBUFFER_TRACE(jh, "ph3: write metadata"); - flags = jbd2_journal_write_metadata_buffer(commit_transaction, + escape = jbd2_journal_write_metadata_buffer(commit_transaction, jh, &wbuf[bufs], blocknr); - if (flags < 0) { - jbd2_journal_abort(journal, flags); - continue; - } jbd2_file_log_bh(&io_bufs, wbuf[bufs]); /* Record the new block's tag in the current descriptor buffer */ tag_flag = 0; - if (flags & 1) + if (escape) tag_flag |= JBD2_FLAG_ESCAPE; if (!first_tag) tag_flag |= JBD2_FLAG_SAME_UUID; @@ -686,18 +696,21 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_buffers == NULL || space_left < tag_bytes + 16 + csum_size) { - jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); + jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs); /* Write an end-of-descriptor marker before submitting the IOs. "tag" still points to the last tag we set up. */ tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); - - jbd2_descriptor_block_csum_set(journal, descriptor); start_journal_io: + if (descriptor) + jbd2_descriptor_block_csum_set(journal, + descriptor); + for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; + /* * Compute checksum. */ @@ -710,10 +723,10 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS, + bh); } cond_resched(); - stats.run.rs_blocks_logged += bufs; /* Force a new descriptor to be generated next time round the loop. */ @@ -725,10 +738,8 @@ start_journal_io: err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { printk(KERN_WARNING - "JBD2: Detected IO errors while flushing file data " - "on %s\n", journal->j_devname); - if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) - jbd2_journal_abort(journal, err); + "JBD2: Detected IO errors %d while flushing file data on %s\n", + err, journal->j_devname); err = 0; } @@ -749,29 +760,29 @@ start_journal_io: if (first_block < journal->j_tail) freed += journal->j_last - journal->j_first; /* Update tail only if we free significant amount of space */ - if (freed < journal->j_maxlen / 4) + if (freed < journal->j_max_transaction_buffers) update_tail = 0; } J_ASSERT(commit_transaction->t_state == T_COMMIT); commit_transaction->t_state = T_COMMIT_DFLUSH; write_unlock(&journal->j_state_lock); - /* + /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue - * the commit record + * the commit record and update the journal tail sequence. */ - if (commit_transaction->t_need_data_flush && + if ((commit_transaction->t_need_data_flush || update_tail) && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); + blkdev_issue_flush(journal->j_fs_dev); /* Done it all: now write the commit record asynchronously. */ if (jbd2_has_feature_async_commit(journal)) { err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } blk_finish_plug(&plug); @@ -787,7 +798,7 @@ start_journal_io: so we incur less scheduling load. */ - jbd_debug(3, "JBD2: commit phase 3\n"); + jbd2_debug(3, "JBD2: commit phase 3\n"); while (!list_empty(&io_bufs)) { struct buffer_head *bh = list_entry(io_bufs.prev, @@ -800,6 +811,7 @@ start_journal_io: if (unlikely(!buffer_uptodate(bh))) err = -EIO; jbd2_unfile_log_bh(bh); + stats.run.rs_blocks_logged++; /* * The list contains temporary buffer heads created by @@ -829,7 +841,7 @@ start_journal_io: J_ASSERT (commit_transaction->t_shadow_list == NULL); - jbd_debug(3, "JBD2: commit phase 4\n"); + jbd2_debug(3, "JBD2: commit phase 4\n"); /* Here we wait for the revoke record and descriptor record buffers */ while (!list_empty(&log_bufs)) { @@ -845,6 +857,7 @@ start_journal_io: BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); clear_buffer_jwrite(bh); jbd2_unfile_log_bh(bh); + stats.run.rs_blocks_logged++; __brelse(bh); /* One for getblk */ /* AKPM: bforget here */ } @@ -852,7 +865,7 @@ start_journal_io: if (err) jbd2_journal_abort(journal, err); - jbd_debug(3, "JBD2: commit phase 5\n"); + jbd2_debug(3, "JBD2: commit phase 5\n"); write_lock(&journal->j_state_lock); J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); commit_transaction->t_state = T_COMMIT_JFLUSH; @@ -862,18 +875,22 @@ start_journal_io: err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } if (cbh) err = journal_wait_on_commit_record(journal, cbh); + stats.run.rs_blocks_logged++; if (jbd2_has_feature_async_commit(journal) && journal->j_flags & JBD2_BARRIER) { - blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL); + blkdev_issue_flush(journal->j_dev); } if (err) jbd2_journal_abort(journal, err); + WARN_ON_ONCE( + atomic_read(&commit_transaction->t_outstanding_credits) < 0); + /* * Now disk caches for filesystem device are flushed so we are safe to * erase checkpointed transactions from the log by updating journal @@ -887,7 +904,7 @@ start_journal_io: transaction can be removed from any checkpoint list it was on before. */ - jbd_debug(3, "JBD2: commit phase 6\n"); + jbd2_debug(3, "JBD2: commit phase 6\n"); J_ASSERT(list_empty(&commit_transaction->t_inode_list)); J_ASSERT(commit_transaction->t_buffers == NULL); @@ -904,6 +921,7 @@ restart_loop: transaction_t *cp_transaction; struct buffer_head *bh; int try_to_free = 0; + bool drop_ref; jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); @@ -913,7 +931,7 @@ restart_loop: * done with it. */ get_bh(bh); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); /* @@ -958,29 +976,34 @@ restart_loop: * it. */ /* - * A buffer which has been freed while still being journaled by - * a previous transaction. - */ - if (buffer_freed(bh)) { + * A buffer which has been freed while still being journaled + * by a previous transaction, refile the buffer to BJ_Forget of + * the running transaction. If the just committed transaction + * contains "add to orphan" operation, we can completely + * invalidate the buffer now. We are rather through in that + * since the buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial page. + */ + if (buffer_freed(bh) && !jh->b_next_transaction) { + struct address_space *mapping; + + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + /* - * If the running transaction is the one containing - * "add to orphan" operation (b_next_transaction != - * NULL), we have to wait for that transaction to - * commit before we can really get rid of the buffer. - * So just clear b_modified to not confuse transaction - * credit accounting and refile the buffer to - * BJ_Forget of the running transaction. If the just - * committed transaction contains "add to orphan" - * operation, we can completely invalidate the buffer - * now. We are rather through in that since the - * buffer may be still accessible when blocksize < - * pagesize and it is attached to the last partial - * page. + * Block device buffers need to stay mapped all the + * time, so it is enough to clear buffer_jbddirty and + * buffer_freed bits. For the file mapping buffers (i.e. + * journalled data) we need to unmap buffer and clear + * more bits. We also need to be careful about the check + * because the data page mapping can get cleared under + * our hands. Note that if mapping == NULL, we don't + * need to make buffer unmapped because the page is + * already detached from the mapping and buffers cannot + * get reused. */ - jh->b_modified = 0; - if (!jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + mapping = READ_ONCE(bh->b_folio->mapping); + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_req(bh); @@ -1008,8 +1031,10 @@ restart_loop: try_to_free = 1; } JBUFFER_TRACE(jh, "refile or unfile buffer"); - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); + drop_ref = __jbd2_journal_refile_buffer(jh); + spin_unlock(&jh->b_state_lock); + if (drop_ref) + jbd2_journal_put_journal_head(jh); if (try_to_free) release_buffer_page(bh); /* Drops bh reference */ else @@ -1056,7 +1081,7 @@ restart_loop: /* Done with this transaction! */ - jbd_debug(3, "JBD2: commit phase 7\n"); + jbd2_debug(3, "JBD2: commit phase 7\n"); J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); @@ -1076,7 +1101,7 @@ restart_loop: commit_transaction->t_state = T_COMMIT_CALLBACK; J_ASSERT(commit_transaction == journal->j_committing_transaction); - journal->j_commit_sequence = commit_transaction->t_tid; + WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid); journal->j_committing_transaction = NULL; commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); @@ -1094,23 +1119,27 @@ restart_loop: if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); + if (journal->j_fc_cleanup_callback) + journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid); trace_jbd2_end_commit(journal, commit_transaction); - jbd_debug(1, "JBD2: commit %d complete, head %d\n", + jbd2_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING; + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; /* Check if the transaction can be dropped now that we are finished */ - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { + if (commit_transaction->t_checkpoint_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); jbd2_journal_free_transaction(commit_transaction); } spin_unlock(&journal->j_list_lock); write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_done_commit); + wake_up(&journal->j_fc_wait); /* * Calculate overall stats |
