summaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/commit.c106
-rw-r--r--fs/jbd2/journal.c245
-rw-r--r--fs/jbd2/recovery.c135
3 files changed, 434 insertions, 52 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6d2da8ad0e6f..fa688e163a80 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -187,20 +187,48 @@ static int journal_wait_on_commit_record(journal_t *journal,
* use writepages() because with delayed allocation we may be doing
* block allocation in writepages().
*/
-static int journal_submit_inode_data_buffers(struct address_space *mapping,
- loff_t dirty_start, loff_t dirty_end)
+int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{
- int ret;
+ struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
- .range_start = dirty_start,
- .range_end = dirty_end,
+ .range_start = jinode->i_dirty_start,
+ .range_end = jinode->i_dirty_end,
};
- ret = generic_writepages(mapping, &wbc);
- return ret;
+ /*
+ * submit the inode data buffers. We use writepage
+ * instead of writepages. Because writepages can do
+ * block allocation with delalloc. We need to write
+ * only allocated blocks here.
+ */
+ return generic_writepages(mapping, &wbc);
+}
+
+/* Send all the data buffers related to an inode */
+int jbd2_submit_inode_data(struct jbd2_inode *jinode)
+{
+
+ if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
+ return 0;
+
+ trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
+ return jbd2_journal_submit_inode_data_buffers(jinode);
+
+}
+EXPORT_SYMBOL(jbd2_submit_inode_data);
+
+int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
+{
+ if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
+ !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
+ return 0;
+ return filemap_fdatawait_range_keep_errors(
+ jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
+ jinode->i_dirty_end);
}
+EXPORT_SYMBOL(jbd2_wait_inode_data);
/*
* Submit all the data buffers of inode associated with the transaction to
@@ -215,29 +243,20 @@ static int journal_submit_data_buffers(journal_t *journal,
{
struct jbd2_inode *jinode;
int err, ret = 0;
- struct address_space *mapping;
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
- loff_t dirty_start = jinode->i_dirty_start;
- loff_t dirty_end = jinode->i_dirty_end;
-
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
- mapping = jinode->i_vfs_inode->i_mapping;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- /*
- * submit the inode data buffers. We use writepage
- * instead of writepages. Because writepages can do
- * block allocation with delalloc. We need to write
- * only allocated blocks here.
- */
+ /* submit the inode data buffers. */
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
- err = journal_submit_inode_data_buffers(mapping, dirty_start,
- dirty_end);
- if (!ret)
- ret = err;
+ if (journal->j_submit_inode_data_buffers) {
+ err = journal->j_submit_inode_data_buffers(jinode);
+ if (!ret)
+ ret = err;
+ }
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
@@ -248,6 +267,15 @@ static int journal_submit_data_buffers(journal_t *journal,
return ret;
}
+int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+
+ return filemap_fdatawait_range_keep_errors(mapping,
+ jinode->i_dirty_start,
+ jinode->i_dirty_end);
+}
+
/*
* Wait for data submitted for writeout, refile inodes to proper
* transaction if needed.
@@ -262,18 +290,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
- loff_t dirty_start = jinode->i_dirty_start;
- loff_t dirty_end = jinode->i_dirty_end;
-
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- err = filemap_fdatawait_range_keep_errors(
- jinode->i_vfs_inode->i_mapping, dirty_start,
- dirty_end);
- if (!ret)
- ret = err;
+ /* wait for the inode data buffers writeout. */
+ if (journal->j_finish_inode_data_buffers) {
+ err = journal->j_finish_inode_data_buffers(jinode);
+ if (!ret)
+ ret = err;
+ }
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
smp_mb();
@@ -413,6 +439,20 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(journal->j_running_transaction != NULL);
J_ASSERT(journal->j_committing_transaction == NULL);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
+ while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&journal->j_fc_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ write_unlock(&journal->j_state_lock);
+ schedule();
+ write_lock(&journal->j_state_lock);
+ finish_wait(&journal->j_fc_wait, &wait);
+ }
+ write_unlock(&journal->j_state_lock);
+
commit_transaction = journal->j_running_transaction;
trace_jbd2_start_commit(journal, commit_transaction);
@@ -420,6 +460,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_tid);
write_lock(&journal->j_state_lock);
+ journal->j_fc_off = 0;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
commit_transaction->t_state = T_LOCKED;
@@ -1119,12 +1160,16 @@ restart_loop:
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
+ if (journal->j_fc_cleanup_callback)
+ journal->j_fc_cleanup_callback(journal, 1);
trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
+ journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
spin_lock(&journal->j_list_lock);
commit_transaction->t_state = T_FINISHED;
/* Check if the transaction can be dropped now that we are finished */
@@ -1136,6 +1181,7 @@ restart_loop:
spin_unlock(&journal->j_list_lock);
write_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_done_commit);
+ wake_up(&journal->j_fc_wait);
/*
* Calculate overall stats
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 17fdc482f554..0c7c42bd530f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -91,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
+EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers);
+EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
@@ -157,7 +159,9 @@ static void commit_timeout(struct timer_list *t)
*
* 1) COMMIT: Every so often we need to commit the current state of the
* filesystem to disk. The journal thread is responsible for writing
- * all of the metadata buffers to disk.
+ * all of the metadata buffers to disk. If a fast commit is ongoing
+ * journal thread waits until it's done and then continues from
+ * there on.
*
* 2) CHECKPOINT: We cannot reuse a used section of the log file until all
* of the data in that part of the log has been rewritten elsewhere on
@@ -714,6 +718,75 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
return err;
}
+/*
+ * Start a fast commit. If there's an ongoing fast or full commit wait for
+ * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
+ * if a fast commit is not needed, either because there's an already a commit
+ * going on or this tid has already been committed. Returns -EINVAL if no jbd2
+ * commit has yet been performed.
+ */
+int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
+{
+ /*
+ * Fast commits only allowed if at least one full commit has
+ * been processed.
+ */
+ if (!journal->j_stats.ts_tid)
+ return -EINVAL;
+
+ if (tid <= journal->j_commit_sequence)
+ return -EALREADY;
+
+ write_lock(&journal->j_state_lock);
+ if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&journal->j_fc_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ write_unlock(&journal->j_state_lock);
+ schedule();
+ finish_wait(&journal->j_fc_wait, &wait);
+ return -EALREADY;
+ }
+ journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
+ write_unlock(&journal->j_state_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_begin_commit);
+
+/*
+ * Stop a fast commit. If fallback is set, this function starts commit of
+ * TID tid before any other fast commit can start.
+ */
+static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
+{
+ if (journal->j_fc_cleanup_callback)
+ journal->j_fc_cleanup_callback(journal, 0);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
+ if (fallback)
+ journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
+ write_unlock(&journal->j_state_lock);
+ wake_up(&journal->j_fc_wait);
+ if (fallback)
+ return jbd2_complete_transaction(journal, tid);
+ return 0;
+}
+
+int jbd2_fc_end_commit(journal_t *journal)
+{
+ return __jbd2_fc_end_commit(journal, 0, 0);
+}
+EXPORT_SYMBOL(jbd2_fc_end_commit);
+
+int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
+{
+ return __jbd2_fc_end_commit(journal, tid, 1);
+}
+EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
+
/* Return 1 when transaction with given tid has already committed. */
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
{
@@ -782,6 +855,110 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
return jbd2_journal_bmap(journal, blocknr, retp);
}
+/* Map one fast commit buffer for use by the file system */
+int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
+{
+ unsigned long long pblock;
+ unsigned long blocknr;
+ int ret = 0;
+ struct buffer_head *bh;
+ int fc_off;
+
+ *bh_out = NULL;
+ write_lock(&journal->j_state_lock);
+
+ if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
+ fc_off = journal->j_fc_off;
+ blocknr = journal->j_fc_first + fc_off;
+ journal->j_fc_off++;
+ } else {
+ ret = -EINVAL;
+ }
+ write_unlock(&journal->j_state_lock);
+
+ if (ret)
+ return ret;
+
+ ret = jbd2_journal_bmap(journal, blocknr, &pblock);
+ if (ret)
+ return ret;
+
+ bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
+ if (!bh)
+ return -ENOMEM;
+
+ lock_buffer(bh);
+
+ clear_buffer_uptodate(bh);
+ set_buffer_dirty(bh);
+ unlock_buffer(bh);
+ journal->j_fc_wbuf[fc_off] = bh;
+
+ *bh_out = bh;
+
+ return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_get_buf);
+
+/*
+ * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
+ * for completion.
+ */
+int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
+{
+ struct buffer_head *bh;
+ int i, j_fc_off;
+
+ read_lock(&journal->j_state_lock);
+ j_fc_off = journal->j_fc_off;
+ read_unlock(&journal->j_state_lock);
+
+ /*
+ * Wait in reverse order to minimize chances of us being woken up before
+ * all IOs have completed
+ */
+ for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
+ bh = journal->j_fc_wbuf[i];
+ wait_on_buffer(bh);
+ put_bh(bh);
+ journal->j_fc_wbuf[i] = NULL;
+ if (unlikely(!buffer_uptodate(bh)))
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_wait_bufs);
+
+/*
+ * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
+ * for completion.
+ */
+int jbd2_fc_release_bufs(journal_t *journal)
+{
+ struct buffer_head *bh;
+ int i, j_fc_off;
+
+ read_lock(&journal->j_state_lock);
+ j_fc_off = journal->j_fc_off;
+ read_unlock(&journal->j_state_lock);
+
+ /*
+ * Wait in reverse order to minimize chances of us being woken up before
+ * all IOs have completed
+ */
+ for (i = j_fc_off - 1; i >= 0; i--) {
+ bh = journal->j_fc_wbuf[i];
+ if (!bh)
+ break;
+ put_bh(bh);
+ journal->j_fc_wbuf[i] = NULL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_release_bufs);
+
/*
* Conversion of logical to physical block numbers for the journal
*
@@ -1140,6 +1317,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
init_waitqueue_head(&journal->j_wait_commit);
init_waitqueue_head(&journal->j_wait_updates);
init_waitqueue_head(&journal->j_wait_reserved);
+ init_waitqueue_head(&journal->j_fc_wait);
mutex_init(&journal->j_abort_mutex);
mutex_init(&journal->j_barrier);
mutex_init(&journal->j_checkpoint_mutex);
@@ -1179,6 +1357,14 @@ static journal_t *journal_init_common(struct block_device *bdev,
if (!journal->j_wbuf)
goto err_cleanup;
+ if (journal->j_fc_wbufsize > 0) {
+ journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
+ sizeof(struct buffer_head *),
+ GFP_KERNEL);
+ if (!journal->j_fc_wbuf)
+ goto err_cleanup;
+ }
+
bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
if (!bh) {
pr_err("%s: Cannot get buffer for journal superblock\n",
@@ -1192,11 +1378,23 @@ static journal_t *journal_init_common(struct block_device *bdev,
err_cleanup:
kfree(journal->j_wbuf);
+ kfree(journal->j_fc_wbuf);
jbd2_journal_destroy_revoke(journal);
kfree(journal);
return NULL;
}
+int jbd2_fc_init(journal_t *journal, int num_fc_blks)
+{
+ journal->j_fc_wbufsize = num_fc_blks;
+ journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
+ sizeof(struct buffer_head *), GFP_KERNEL);
+ if (!journal->j_fc_wbuf)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(jbd2_fc_init);
+
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
*
* Create a journal structure assigned some fixed set of disk blocks to
@@ -1314,11 +1512,20 @@ static int journal_reset(journal_t *journal)
}
journal->j_first = first;
- journal->j_last = last;
- journal->j_head = first;
- journal->j_tail = first;
- journal->j_free = last - first;
+ if (jbd2_has_feature_fast_commit(journal) &&
+ journal->j_fc_wbufsize > 0) {
+ journal->j_fc_last = last;
+ journal->j_last = last - journal->j_fc_wbufsize;
+ journal->j_fc_first = journal->j_last + 1;
+ journal->j_fc_off = 0;
+ } else {
+ journal->j_last = last;
+ }
+
+ journal->j_head = journal->j_first;
+ journal->j_tail = journal->j_first;
+ journal->j_free = journal->j_last - journal->j_first;
journal->j_tail_sequence = journal->j_transaction_sequence;
journal->j_commit_sequence = journal->j_transaction_sequence - 1;
@@ -1464,6 +1671,7 @@ out:
static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
{
journal_superblock_t *sb = journal->j_superblock;
+ bool had_fast_commit = false;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
lock_buffer(journal->j_sb_buffer);
@@ -1477,9 +1685,20 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(0);
+ if (jbd2_has_feature_fast_commit(journal)) {
+ /*
+ * When journal is clean, no need to commit fast commit flag and
+ * make file system incompatible with older kernels.
+ */
+ jbd2_clear_feature_fast_commit(journal);
+ had_fast_commit = true;
+ }
jbd2_write_superblock(journal, write_op);
+ if (had_fast_commit)
+ jbd2_set_feature_fast_commit(journal);
+
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_FLUSHED;
@@ -1663,9 +1882,18 @@ static int load_superblock(journal_t *journal)
journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
journal->j_tail = be32_to_cpu(sb->s_start);
journal->j_first = be32_to_cpu(sb->s_first);
- journal->j_last = be32_to_cpu(sb->s_maxlen);
journal->j_errno = be32_to_cpu(sb->s_errno);
+ if (jbd2_has_feature_fast_commit(journal) &&
+ journal->j_fc_wbufsize > 0) {
+ journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
+ journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
+ journal->j_fc_first = journal->j_last + 1;
+ journal->j_fc_off = 0;
+ } else {
+ journal->j_last = be32_to_cpu(sb->s_maxlen);
+ }
+
return 0;
}
@@ -1726,6 +1954,9 @@ int jbd2_journal_load(journal_t *journal)
*/
journal->j_flags &= ~JBD2_ABORT;
+ if (journal->j_fc_wbufsize > 0)
+ jbd2_journal_set_features(journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
* and reset them on disk. */
@@ -1809,6 +2040,8 @@ int jbd2_journal_destroy(journal_t *journal)
jbd2_journal_destroy_revoke(journal);
if (journal->j_chksum_driver)
crypto_free_shash(journal->j_chksum_driver);
+ if (journal->j_fc_wbufsize > 0)
+ kfree(journal->j_fc_wbuf);
kfree(journal->j_wbuf);
kfree(journal);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index faa97d748474..eb2606133cd8 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -35,7 +35,6 @@ struct recovery_info
int nr_revoke_hits;
};
-enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass);
static int scan_revoke_records(journal_t *, struct buffer_head *,
@@ -225,10 +224,51 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
/* Make sure we wrap around the log correctly! */
#define wrap(journal, var) \
do { \
- if (var >= (journal)->j_last) \
- var -= ((journal)->j_last - (journal)->j_first); \
+ unsigned long _wrap_last = \
+ jbd2_has_feature_fast_commit(journal) ? \
+ (journal)->j_fc_last : (journal)->j_last; \
+ \
+ if (var >= _wrap_last) \
+ var -= (_wrap_last - (journal)->j_first); \
} while (0)
+static int fc_do_one_pass(journal_t *journal,
+ struct recovery_info *info, enum passtype pass)
+{
+ unsigned int expected_commit_id = info->end_transaction;
+ unsigned long next_fc_block;
+ struct buffer_head *bh;
+ int err = 0;
+
+ next_fc_block = journal->j_fc_first;
+ if (!journal->j_fc_replay_callback)
+ return 0;
+
+ while (next_fc_block <= journal->j_fc_last) {
+ jbd_debug(3, "Fast commit replay: next block %ld",
+ next_fc_block);
+ err = jread(&bh, journal, next_fc_block);
+ if (err) {
+ jbd_debug(3, "Fast commit replay: read error");
+ break;
+ }
+
+ jbd_debug(3, "Processing fast commit blk with seq %d");
+ err = journal->j_fc_replay_callback(journal, bh, pass,
+ next_fc_block - journal->j_fc_first,
+ expected_commit_id);
+ next_fc_block++;
+ if (err < 0 || err == JBD2_FC_REPLAY_STOP)
+ break;
+ err = 0;
+ }
+
+ if (err)
+ jbd_debug(3, "Fast commit replay failed, err = %d\n", err);
+
+ return err;
+}
+
/**
* jbd2_journal_recover - recovers a on-disk journal
* @journal: the journal to recover
@@ -428,6 +468,8 @@ static int do_one_pass(journal_t *journal,
__u32 crc32_sum = ~0; /* Transactional Checksums */
int descr_csum_size = 0;
int block_error = 0;
+ bool need_check_commit_time = false;
+ __u64 last_trans_commit_time = 0, commit_time;
/*
* First thing is to establish what we expect to find in the log
@@ -470,7 +512,9 @@ static int do_one_pass(journal_t *journal,
break;
jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
- next_commit_ID, next_log_block, journal->j_last);
+ next_commit_ID, next_log_block,
+ jbd2_has_feature_fast_commit(journal) ?
+ journal->j_fc_last : journal->j_last);
/* Skip over each chunk of the transaction looking
* either the next descriptor block or the final commit
@@ -520,12 +564,21 @@ static int do_one_pass(journal_t *journal,
if (descr_csum_size > 0 &&
!jbd2_descriptor_block_csum_verify(journal,
bh->b_data)) {
- printk(KERN_ERR "JBD2: Invalid checksum "
- "recovering block %lu in log\n",
- next_log_block);
- err = -EFSBADCRC;
- brelse(bh);
- goto failed;
+ /*
+ * PASS_SCAN can see stale blocks due to lazy
+ * journal init. Don't error out on those yet.
+ */
+ if (pass != PASS_SCAN) {
+ pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
+ next_log_block);
+ err = -EFSBADCRC;
+ brelse(bh);
+ goto failed;
+ }
+ need_check_commit_time = true;
+ jbd_debug(1,
+ "invalid descriptor block found in %lu\n",
+ next_log_block);
}
/* If it is a valid descriptor block, replay it
@@ -535,6 +588,7 @@ static int do_one_pass(journal_t *journal,
if (pass != PASS_REPLAY) {
if (pass == PASS_SCAN &&
jbd2_has_feature_checksum(journal) &&
+ !need_check_commit_time &&
!info->end_transaction) {
if (calc_chksums(journal, bh,
&next_log_block,
@@ -683,11 +737,41 @@ static int do_one_pass(journal_t *journal,
* mentioned conditions. Hence assume
* "Interrupted Commit".)
*/
+ commit_time = be64_to_cpu(
+ ((struct commit_header *)bh->b_data)->h_commit_sec);
+ /*
+ * If need_check_commit_time is set, it means we are in
+ * PASS_SCAN and csum verify failed before. If
+ * commit_time is increasing, it's the same journal,
+ * otherwise it is stale journal block, just end this
+ * recovery.
+ */
+ if (need_check_commit_time) {
+ if (commit_time >= last_trans_commit_time) {
+ pr_err("JBD2: Invalid checksum found in transaction %u\n",
+ next_commit_ID);
+ err = -EFSBADCRC;
+ brelse(bh);
+ goto failed;
+ }
+ ignore_crc_mismatch:
+ /*
+ * It likely does not belong to same journal,
+ * just end this recovery with success.
+ */
+ jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
+ next_commit_ID);
+ err = 0;
+ brelse(bh);
+ goto done;
+ }
- /* Found an expected commit block: if checksums
- * are present verify them in PASS_SCAN; else not
+ /*
+ * Found an expected commit block: if checksums
+ * are present, verify them in PASS_SCAN; else not
* much to do other than move on to the next sequence
- * number. */
+ * number.
+ */
if (pass == PASS_SCAN &&
jbd2_has_feature_checksum(journal)) {
struct commit_header *cbh =
@@ -719,6 +803,8 @@ static int do_one_pass(journal_t *journal,
!jbd2_commit_block_csum_verify(journal,
bh->b_data)) {
chksum_error:
+ if (commit_time < last_trans_commit_time)
+ goto ignore_crc_mismatch;
info->end_transaction = next_commit_ID;
if (!jbd2_has_feature_async_commit(journal)) {
@@ -728,11 +814,24 @@ static int do_one_pass(journal_t *journal,
break;
}
}
+ if (pass == PASS_SCAN)
+ last_trans_commit_time = commit_time;
brelse(bh);
next_commit_ID++;
continue;
case JBD2_REVOKE_BLOCK:
+ /*
+ * Check revoke block crc in pass_scan, if csum verify
+ * failed, check commit block time later.
+ */
+ if (pass == PASS_SCAN &&
+ !jbd2_descriptor_block_csum_verify(journal,
+ bh->b_data)) {
+ jbd_debug(1, "JBD2: invalid revoke block found in %lu\n",
+ next_log_block);
+ need_check_commit_time = true;
+ }
/* If we aren't in the REVOKE pass, then we can
* just skip over this block. */
if (pass != PASS_REVOKE) {
@@ -777,6 +876,13 @@ static int do_one_pass(journal_t *journal,
success = -EIO;
}
}
+
+ if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) {
+ err = fc_do_one_pass(journal, info, pass);
+ if (err)
+ success = err;
+ }
+
if (block_error && success == 0)
success = -EIO;
return success;
@@ -800,9 +906,6 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
offset = sizeof(jbd2_journal_revoke_header_t);
rcount = be32_to_cpu(header->r_count);
- if (!jbd2_descriptor_block_csum_verify(journal, header))
- return -EFSBADCRC;
-
if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_block_tail);
if (rcount > journal->j_blocksize - csum_size)