diff options
Diffstat (limited to 'fs/jbd2/transaction.c')
| -rw-r--r-- | fs/jbd2/transaction.c | 409 |
1 files changed, 203 insertions, 206 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6a3caedd2285..dca4b5d8aaaa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -63,28 +63,6 @@ void jbd2_journal_free_transaction(transaction_t *transaction) } /* - * Base amount of descriptor blocks we reserve for each transaction. - */ -static int jbd2_descriptor_blocks_per_trans(journal_t *journal) -{ - int tag_space = journal->j_blocksize - sizeof(journal_header_t); - int tags_per_block; - - /* Subtract UUID */ - tag_space -= 16; - if (jbd2_journal_has_csum_v2or3(journal)) - tag_space -= sizeof(struct jbd2_journal_block_tail); - /* Commit code leaves a slack space of 16 bytes at the end of block */ - tags_per_block = (tag_space - 16) / journal_tag_bytes(journal); - /* - * Revoke descriptors are accounted separately so we need to reserve - * space for commit block and normal transaction descriptor blocks. - */ - return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers, - tags_per_block); -} - -/* * jbd2_get_transaction: obtain a new transaction_t object. * * Simply initialise a new transaction. Initialize it in @@ -107,15 +85,13 @@ static void jbd2_get_transaction(journal_t *journal, transaction->t_start_time = ktime_get(); transaction->t_tid = journal->j_transaction_sequence++; transaction->t_expires = jiffies + journal->j_commit_interval; - spin_lock_init(&transaction->t_handle_lock); atomic_set(&transaction->t_updates, 0); atomic_set(&transaction->t_outstanding_credits, - jbd2_descriptor_blocks_per_trans(journal) + + journal->j_transaction_overhead_buffers + atomic_read(&journal->j_reserved_credits)); atomic_set(&transaction->t_outstanding_revokes, 0); atomic_set(&transaction->t_handle_count, 0); INIT_LIST_HEAD(&transaction->t_inode_list); - INIT_LIST_HEAD(&transaction->t_private_list); /* Set up the commit timer for the new transaction. */ journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); @@ -137,28 +113,21 @@ static void jbd2_get_transaction(journal_t *journal, */ /* - * Update transaction's maximum wait time, if debugging is enabled. - * - * In order for t_max_wait to be reliable, it must be protected by a - * lock. But doing so will mean that start_this_handle() can not be - * run in parallel on SMP systems, which limits our scalability. So - * unless debugging is enabled, we no longer update t_max_wait, which - * means that maximum wait time reported by the jbd2_run_stats - * tracepoint will always be zero. + * t_max_wait is carefully updated here with use of atomic compare exchange. + * Note that there could be multiplre threads trying to do this simultaneously + * hence using cmpxchg to avoid any use of locks in this case. */ static inline void update_t_max_wait(transaction_t *transaction, unsigned long ts) { -#ifdef CONFIG_JBD2_DEBUG - if (jbd2_journal_enable_debug && - time_after(transaction->t_start, ts)) { - ts = jbd2_time_diff(ts, transaction->t_start); - spin_lock(&transaction->t_handle_lock); - if (ts > transaction->t_max_wait) - transaction->t_max_wait = ts; - spin_unlock(&transaction->t_handle_lock); + unsigned long oldts, newts; + + if (time_after(transaction->t_start, ts)) { + newts = jbd2_time_diff(ts, transaction->t_start); + oldts = READ_ONCE(transaction->t_max_wait); + while (oldts < newts) + oldts = cmpxchg(&transaction->t_max_wait, oldts, newts); } -#endif } /* @@ -173,7 +142,7 @@ static void wait_transaction_locked(journal_t *journal) int need_to_start; tid_t tid = journal->j_running_transaction->t_tid; - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); @@ -199,7 +168,7 @@ static void wait_transaction_switching(journal_t *journal) read_unlock(&journal->j_state_lock); return; } - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); read_unlock(&journal->j_state_lock); /* @@ -218,6 +187,13 @@ static void sub_reserved_credits(journal_t *journal, int blocks) wake_up(&journal->j_wait_reserved); } +/* Maximum number of blocks for user transaction payload */ +static int jbd2_max_user_trans_buffers(journal_t *journal) +{ + return journal->j_max_transaction_buffers - + journal->j_transaction_overhead_buffers; +} + /* * Wait until we can add credits for handle to the running transaction. Called * with j_state_lock held for reading. Returns 0 if handle joined the running @@ -267,12 +243,12 @@ __must_hold(&journal->j_state_lock) * big to fit this handle? Wait until reserved credits are freed. */ if (atomic_read(&journal->j_reserved_credits) + total > - journal->j_max_transaction_buffers) { + jbd2_max_user_trans_buffers(journal)) { read_unlock(&journal->j_state_lock); jbd2_might_wait_for_commit(journal); wait_event(journal->j_wait_reserved, atomic_read(&journal->j_reserved_credits) + total <= - journal->j_max_transaction_buffers); + jbd2_max_user_trans_buffers(journal)); __acquire(&journal->j_state_lock); /* fake out sparse */ return 1; } @@ -312,14 +288,14 @@ __must_hold(&journal->j_state_lock) needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); /* We allow at most half of a transaction to be reserved */ - if (needed > journal->j_max_transaction_buffers / 2) { + if (needed > jbd2_max_user_trans_buffers(journal) / 2) { sub_reserved_credits(journal, rsv_blocks); atomic_sub(total, &t->t_outstanding_credits); read_unlock(&journal->j_state_lock); jbd2_might_wait_for_commit(journal); wait_event(journal->j_wait_reserved, atomic_read(&journal->j_reserved_credits) + rsv_blocks - <= journal->j_max_transaction_buffers / 2); + <= jbd2_max_user_trans_buffers(journal) / 2); __acquire(&journal->j_state_lock); /* fake out sparse */ return 1; } @@ -349,12 +325,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle, * size and limit the number of total credits to not exceed maximum * transaction size per operation. */ - if ((rsv_blocks > journal->j_max_transaction_buffers / 2) || - (rsv_blocks + blocks > journal->j_max_transaction_buffers)) { + if (rsv_blocks > jbd2_max_user_trans_buffers(journal) / 2 || + rsv_blocks + blocks > jbd2_max_user_trans_buffers(journal)) { printk(KERN_ERR "JBD2: %s wants too many credits " "credits:%d rsv_credits:%d max:%d\n", current->comm, blocks, rsv_blocks, - journal->j_max_transaction_buffers); + jbd2_max_user_trans_buffers(journal)); WARN_ON(1); return -ENOSPC; } @@ -378,7 +354,7 @@ alloc_transaction: return -ENOMEM; } - jbd_debug(3, "New handle %p going live.\n", handle); + jbd2_debug(3, "New handle %p going live.\n", handle); /* * We need to hold j_state_lock until t_updates has been incremented, @@ -449,7 +425,7 @@ repeat: } /* OK, account for the buffers that this operation expects to - * use and add the handle to the running transaction. + * use and add the handle to the running transaction. */ update_t_max_wait(transaction, ts); handle->h_transaction = transaction; @@ -458,14 +434,14 @@ repeat: handle->h_start_jiffies = jiffies; atomic_inc(&transaction->t_updates); atomic_inc(&transaction->t_handle_count); - jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", + jbd2_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", handle, blocks, atomic_read(&transaction->t_outstanding_credits), jbd2_log_space_left(journal)); read_unlock(&journal->j_state_lock); current->journal_info = handle; - rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_); + rwsem_acquire_read(&journal->j_trans_commit_map, 0, 1, _THIS_IP_); jbd2_journal_free_transaction(new_transaction); /* * Ensure that no allocations done while the transaction is open are @@ -679,7 +655,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) /* Don't extend a locked-down transaction! */ if (transaction->t_state != T_RUNNING) { - jbd_debug(3, "denied handle %p %d blocks: " + jbd2_debug(3, "denied handle %p %d blocks: " "transaction not running\n", handle, nblocks); goto error_out; } @@ -690,15 +666,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) DIV_ROUND_UP( handle->h_revoke_credits_requested, journal->j_revoke_records_per_block); - spin_lock(&transaction->t_handle_lock); wanted = atomic_add_return(nblocks, &transaction->t_outstanding_credits); if (wanted > journal->j_max_transaction_buffers) { - jbd_debug(3, "denied handle %p %d blocks: " + jbd2_debug(3, "denied handle %p %d blocks: " "transaction too large\n", handle, nblocks); atomic_sub(nblocks, &transaction->t_outstanding_credits); - goto unlock; + goto error_out; } trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, @@ -713,9 +688,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) handle->h_revoke_credits_requested += revoke_records; result = 0; - jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); -unlock: - spin_unlock(&transaction->t_handle_lock); + jbd2_debug(3, "extended handle %p by %d\n", handle, nblocks); error_out: read_unlock(&journal->j_state_lock); return result; @@ -803,7 +776,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records, * First unlink the handle from its current transaction, and start the * commit on that. */ - jbd_debug(2, "restarting handle %p\n", handle); + jbd2_debug(2, "restarting handle %p\n", handle); stop_this_handle(handle); handle->h_transaction = NULL; @@ -836,6 +809,43 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) } EXPORT_SYMBOL(jbd2_journal_restart); +/* + * Waits for any outstanding t_updates to finish. + * This is called with write j_state_lock held. + */ +void jbd2_journal_wait_updates(journal_t *journal) +{ + DEFINE_WAIT(wait); + + while (1) { + /* + * Note that the running transaction can get freed under us if + * this transaction is getting committed in + * jbd2_journal_commit_transaction() -> + * jbd2_journal_free_transaction(). This can only happen when we + * release j_state_lock -> schedule() -> acquire j_state_lock. + * Hence we should everytime retrieve new j_running_transaction + * value (after j_state_lock release acquire cycle), else it may + * lead to use-after-free of old freed transaction. + */ + transaction_t *transaction = journal->j_running_transaction; + + if (!transaction) + break; + + prepare_to_wait(&journal->j_wait_updates, &wait, + TASK_UNINTERRUPTIBLE); + if (!atomic_read(&transaction->t_updates)) { + finish_wait(&journal->j_wait_updates, &wait); + break; + } + write_unlock(&journal->j_state_lock); + schedule(); + finish_wait(&journal->j_wait_updates, &wait); + write_lock(&journal->j_state_lock); + } +} + /** * jbd2_journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. @@ -848,8 +858,6 @@ EXPORT_SYMBOL(jbd2_journal_restart); */ void jbd2_journal_lock_updates(journal_t *journal) { - DEFINE_WAIT(wait); - jbd2_might_wait_for_commit(journal); write_lock(&journal->j_state_lock); @@ -863,27 +871,9 @@ void jbd2_journal_lock_updates(journal_t *journal) write_lock(&journal->j_state_lock); } - /* Wait until there are no running updates */ - while (1) { - transaction_t *transaction = journal->j_running_transaction; - - if (!transaction) - break; + /* Wait until there are no running t_updates */ + jbd2_journal_wait_updates(journal); - spin_lock(&transaction->t_handle_lock); - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (!atomic_read(&transaction->t_updates)) { - spin_unlock(&transaction->t_handle_lock); - finish_wait(&journal->j_wait_updates, &wait); - break; - } - spin_unlock(&transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_updates, &wait); - write_lock(&journal->j_state_lock); - } write_unlock(&journal->j_state_lock); /* @@ -911,7 +901,7 @@ void jbd2_journal_unlock_updates (journal_t *journal) write_lock(&journal->j_state_lock); --journal->j_barrier_count; write_unlock(&journal->j_state_lock); - wake_up(&journal->j_wait_transaction_locked); + wake_up_all(&journal->j_wait_transaction_locked); } static void warn_dirty_buffer(struct buffer_head *bh) @@ -926,19 +916,15 @@ static void warn_dirty_buffer(struct buffer_head *bh) /* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */ static void jbd2_freeze_jh_data(struct journal_head *jh) { - struct page *page; - int offset; char *source; struct buffer_head *bh = jh2bh(jh); J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n"); - page = bh->b_page; - offset = offset_in_page(bh->b_data); - source = kmap_atomic(page); + source = kmap_local_folio(bh->b_folio, bh_offset(bh)); /* Fire data frozen trigger just before we copy the data */ - jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers); - memcpy(jh->b_frozen_data, source + offset, bh->b_size); - kunmap_atomic(source); + jbd2_buffer_frozen_trigger(jh, source, jh->b_triggers); + memcpy(jh->b_frozen_data, source, bh->b_size); + kunmap_local(source); /* * Now that the frozen data is saved off, we need to store any matching @@ -970,7 +956,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, journal = transaction->t_journal; - jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); + jbd2_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); JBUFFER_TRACE(jh, "entry"); repeat: @@ -1001,36 +987,28 @@ repeat: * ie. locked but not dirty) or tune2fs (which may actually have * the buffer dirtied, ugh.) */ - if (buffer_dirty(bh)) { - /* - * First question: is this buffer already part of the current - * transaction or the existing committing transaction? - */ - if (jh->b_transaction) { - J_ASSERT_JH(jh, - jh->b_transaction == transaction || - jh->b_transaction == - journal->j_committing_transaction); - if (jh->b_next_transaction) - J_ASSERT_JH(jh, jh->b_next_transaction == - transaction); - warn_dirty_buffer(bh); - } + if (buffer_dirty(bh) && jh->b_transaction) { + warn_dirty_buffer(bh); /* - * In any case we need to clean the dirty flag and we must - * do it under the buffer lock to be sure we don't race - * with running write-out. + * We need to clean the dirty flag and we must do it under the + * buffer lock to be sure we don't race with running write-out. */ JBUFFER_TRACE(jh, "Journalling dirty buffer"); clear_buffer_dirty(bh); + /* + * The buffer is going to be added to BJ_Reserved list now and + * nothing guarantees jbd2_journal_dirty_metadata() will be + * ever called for it. So we need to set jbddirty bit here to + * make sure the buffer is dirtied and written out when the + * journaling machinery is done with it. + */ set_buffer_jbddirty(bh); } - unlock_buffer(bh); - error = -EROFS; if (is_handle_aborted(handle)) { spin_unlock(&jh->b_state_lock); + unlock_buffer(bh); goto out; } error = 0; @@ -1040,8 +1018,10 @@ repeat: * b_next_transaction points to it */ if (jh->b_transaction == transaction || - jh->b_next_transaction == transaction) + jh->b_next_transaction == transaction) { + unlock_buffer(bh); goto done; + } /* * this is the first time this transaction is touching this buffer, @@ -1065,10 +1045,24 @@ repeat: */ smp_wmb(); spin_lock(&journal->j_list_lock); + if (test_clear_buffer_dirty(bh)) { + /* + * Execute buffer dirty clearing and jh->b_transaction + * assignment under journal->j_list_lock locked to + * prevent bh being removed from checkpoint list if + * the buffer is in an intermediate state (not dirty + * and jh->b_transaction is NULL). + */ + JBUFFER_TRACE(jh, "Journalling dirty buffer"); + set_buffer_jbddirty(bh); + } __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); spin_unlock(&journal->j_list_lock); + unlock_buffer(bh); goto done; } + unlock_buffer(bh); + /* * If there is already a copy-out version of this buffer, then we don't * need to make another one @@ -1218,11 +1212,26 @@ out: int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) { struct journal_head *jh; + journal_t *journal; int rc; if (is_handle_aborted(handle)) return -EROFS; + journal = handle->h_transaction->t_journal; + rc = jbd2_check_fs_dev_write_error(journal); + if (rc) { + /* + * If the fs dev has writeback errors, it may have failed + * to async write out metadata buffers in the background. + * In this case, we could read old data from disk and write + * it out again, which may lead to on-disk filesystem + * inconsistency. Aborting journal can avoid it happen. + */ + jbd2_journal_abort(journal, rc); + return -EIO; + } + if (jbd2_write_access_granted(handle, bh, false)) return 0; @@ -1262,7 +1271,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh = jbd2_journal_add_journal_head(bh); int err; - jbd_debug(5, "journal_head %p\n", jh); + jbd2_debug(5, "journal_head %p\n", jh); err = -EROFS; if (is_handle_aborted(handle)) goto out; @@ -1276,14 +1285,23 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) * committing transaction's lists, but it HAS to be in Forget state in * that case: the transaction must have deleted the buffer for it to be * reused here. + * In the case of file system data inconsistency, for example, if the + * block bitmap of a referenced block is not set, it can lead to the + * situation where a block being committed is allocated and used again. + * As a result, the following condition will not be satisfied, so here + * we directly trigger a JBD abort instead of immediately invoking + * bugon. */ spin_lock(&jh->b_state_lock); - J_ASSERT_JH(jh, (jh->b_transaction == transaction || - jh->b_transaction == NULL || - (jh->b_transaction == journal->j_committing_transaction && - jh->b_jlist == BJ_Forget))); + if (!(jh->b_transaction == transaction || jh->b_transaction == NULL || + (jh->b_transaction == journal->j_committing_transaction && + jh->b_jlist == BJ_Forget)) || jh->b_next_transaction != NULL) { + err = -EROFS; + spin_unlock(&jh->b_state_lock); + jbd2_journal_abort(journal, err); + goto out; + } - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); if (jh->b_transaction == NULL) { @@ -1477,8 +1495,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int ret = 0; - if (is_handle_aborted(handle)) - return -EROFS; if (!buffer_jbd(bh)) return -EUCLEAN; @@ -1487,7 +1503,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * of the running transaction. */ jh = bh2jh(bh); - jbd_debug(5, "journal_head %p\n", jh); + jbd2_debug(5, "journal_head %p\n", jh); JBUFFER_TRACE(jh, "entry"); /* @@ -1503,7 +1519,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) jh->b_next_transaction == transaction); spin_unlock(&jh->b_state_lock); } - if (jh->b_modified == 1) { + if (data_race(jh->b_modified == 1)) { /* If it's in our transaction it must be in BJ_Metadata list. */ if (data_race(jh->b_transaction == transaction && jh->b_jlist != BJ_Metadata)) { @@ -1522,9 +1538,22 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) goto out; } - journal = transaction->t_journal; spin_lock(&jh->b_state_lock); + if (is_handle_aborted(handle)) { + /* + * Check journal aborting with @jh->b_state_lock locked, + * since 'jh->b_transaction' could be replaced with + * 'jh->b_next_transaction' during old transaction + * committing if journal aborted, which may fail + * assertion on 'jh->b_frozen_data == NULL'. + */ + ret = -EROFS; + goto out_unlock_bh; + } + + journal = transaction->t_journal; + if (jh->b_modified == 0) { /* * This buffer's got modified and becoming part @@ -1640,6 +1669,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) int drop_reserve = 0; int err = 0; int was_modified = 0; + int wait_for_writeback = 0; if (is_handle_aborted(handle)) return -EROFS; @@ -1757,25 +1787,28 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) * Otherwise, if the buffer has been written to disk, * it is safe to remove the checkpoint and drop it. */ - if (!buffer_dirty(bh)) { - __jbd2_journal_remove_checkpoint(jh); + if (jbd2_journal_try_remove_checkpoint(jh) >= 0) { spin_unlock(&journal->j_list_lock); goto drop; } /* - * The buffer is still not written to disk, we should - * attach this buffer to current transaction so that the - * buffer can be checkpointed only after the current - * transaction commits. + * The buffer has not yet been written to disk. We should + * either clear the buffer or ensure that the ongoing I/O + * is completed, and attach this buffer to current + * transaction so that the buffer can be checkpointed only + * after the current transaction commits. */ clear_buffer_dirty(bh); + wait_for_writeback = 1; __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); spin_unlock(&journal->j_list_lock); } drop: __brelse(bh); spin_unlock(&jh->b_state_lock); + if (wait_for_writeback) + wait_on_buffer(bh); jbd2_journal_put_journal_head(jh); if (drop_reserve) { /* no need to reserve log space for this block -bzzz */ @@ -1809,7 +1842,7 @@ int jbd2_journal_stop(handle_t *handle) pid_t pid; if (--handle->h_ref > 0) { - jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, + jbd2_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, handle->h_ref); if (is_handle_aborted(handle)) return -EIO; @@ -1829,7 +1862,7 @@ int jbd2_journal_stop(handle_t *handle) if (is_handle_aborted(handle)) err = -EIO; - jbd_debug(4, "Handle %p going down\n", handle); + jbd2_debug(4, "Handle %p going down\n", handle); trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, tid, handle->h_type, handle->h_line_no, jiffies - handle->h_start_jiffies, @@ -1907,7 +1940,7 @@ int jbd2_journal_stop(handle_t *handle) * completes the commit thread, it just doesn't write * anything to disk. */ - jbd_debug(2, "transaction too old, requesting commit for " + jbd2_debug(2, "transaction too old, requesting commit for " "handle %p\n", handle); /* This is non-blocking */ jbd2_log_start_commit(journal, tid); @@ -2058,54 +2091,10 @@ static void __jbd2_journal_unfile_buffer(struct journal_head *jh) jh->b_transaction = NULL; } -void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) -{ - struct buffer_head *bh = jh2bh(jh); - - /* Get reference so that buffer cannot be freed before we unlock it */ - get_bh(bh); - spin_lock(&jh->b_state_lock); - spin_lock(&journal->j_list_lock); - __jbd2_journal_unfile_buffer(jh); - spin_unlock(&journal->j_list_lock); - spin_unlock(&jh->b_state_lock); - jbd2_journal_put_journal_head(jh); - __brelse(bh); -} - -/* - * Called from jbd2_journal_try_to_free_buffers(). - * - * Called under jh->b_state_lock - */ -static void -__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) -{ - struct journal_head *jh; - - jh = bh2jh(bh); - - if (buffer_locked(bh) || buffer_dirty(bh)) - goto out; - - if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) - goto out; - - spin_lock(&journal->j_list_lock); - if (jh->b_cp_transaction != NULL) { - /* written-back checkpointed metadata buffer */ - JBUFFER_TRACE(jh, "remove from checkpoint list"); - __jbd2_journal_remove_checkpoint(jh); - } - spin_unlock(&journal->j_list_lock); -out: - return; -} - /** * jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation - * @page: to try and free + * @folio: Folio to detach data from. * * For all the buffers on this page, * if they are fully written out ordered data, move them onto BUF_CLEAN @@ -2134,17 +2123,17 @@ out: * cannot happen because we never reallocate freed data as metadata * while the data is part of a transaction. Yes? * - * Return 0 on failure, 1 on success + * Return false on failure, true on success */ -int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page) +bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio) { struct buffer_head *head; struct buffer_head *bh; - int ret = 0; + bool ret = false; - J_ASSERT(PageLocked(page)); + J_ASSERT(folio_test_locked(folio)); - head = page_buffers(page); + head = folio_buffers(folio); bh = head; do { struct journal_head *jh; @@ -2159,14 +2148,20 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page) continue; spin_lock(&jh->b_state_lock); - __journal_try_to_free_buffer(journal, bh); + if (!jh->b_transaction && !jh->b_next_transaction) { + spin_lock(&journal->j_list_lock); + /* Remove written-back checkpointed metadata buffer */ + if (jh->b_cp_transaction != NULL) + jbd2_journal_try_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + } spin_unlock(&jh->b_state_lock); jbd2_journal_put_journal_head(jh); if (buffer_jbd(bh)) goto busy; } while ((bh = bh->b_this_page) != head); - ret = try_to_free_buffers(page); + ret = try_to_free_buffers(folio); busy: return ret; } @@ -2194,7 +2189,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) /* * We don't want to write the buffer anymore, clear the * bit so that we don't confuse checks in - * __journal_file_buffer + * __jbd2_journal_file_buffer */ clear_buffer_dirty(bh); __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); @@ -2208,14 +2203,14 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) } /* - * jbd2_journal_invalidatepage + * jbd2_journal_invalidate_folio * * This code is tricky. It has a number of cases to deal with. * * There are two invariants which this code relies on: * - * i_size must be updated on disk before we start calling invalidatepage on the - * data. + * i_size must be updated on disk before we start calling invalidate_folio + * on the data. * * This is done in ext3 by defining an ext3_setattr method which * updates i_size before truncate gets going. By maintaining this @@ -2360,6 +2355,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, spin_unlock(&jh->b_state_lock); write_unlock(&journal->j_state_lock); jbd2_journal_put_journal_head(jh); + /* Already zapped buffer? Nothing to do... */ + if (!bh->b_bdev) + return 0; return -EBUSY; } /* @@ -2417,9 +2415,9 @@ zap_buffer_unlocked: } /** - * jbd2_journal_invalidatepage() + * jbd2_journal_invalidate_folio() * @journal: journal to use for flush... - * @page: page to flush + * @folio: folio to flush * @offset: start of the range to invalidate * @length: length of the range to invalidate * @@ -2428,30 +2426,29 @@ zap_buffer_unlocked: * the page is straddling i_size. Caller then has to wait for current commit * and try again. */ -int jbd2_journal_invalidatepage(journal_t *journal, - struct page *page, - unsigned int offset, - unsigned int length) +int jbd2_journal_invalidate_folio(journal_t *journal, struct folio *folio, + size_t offset, size_t length) { struct buffer_head *head, *bh, *next; unsigned int stop = offset + length; unsigned int curr_off = 0; - int partial_page = (offset || length < PAGE_SIZE); + int partial_page = (offset || length < folio_size(folio)); int may_free = 1; int ret = 0; - if (!PageLocked(page)) + if (!folio_test_locked(folio)) BUG(); - if (!page_has_buffers(page)) + head = folio_buffers(folio); + if (!head) return 0; - BUG_ON(stop > PAGE_SIZE || stop < length); + BUG_ON(stop > folio_size(folio) || stop < length); /* We will potentially be playing with lists other than just the * data lists (especially for journaled data mode), so be * cautious in our locking. */ - head = bh = page_buffers(page); + bh = head; do { unsigned int next_off = curr_off + bh->b_size; next = bh->b_this_page; @@ -2474,8 +2471,8 @@ int jbd2_journal_invalidatepage(journal_t *journal, } while (bh != head); if (!partial_page) { - if (may_free && try_to_free_buffers(page)) - J_ASSERT(!page_has_buffers(page)); + if (may_free && try_to_free_buffers(folio)) + J_ASSERT(!folio_buffers(folio)); } return 0; } @@ -2654,7 +2651,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, return -EROFS; journal = transaction->t_journal; - jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, + jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); spin_lock(&journal->j_list_lock); |
