summaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/Kconfig2
-rw-r--r--fs/jbd2/checkpoint.c45
-rw-r--r--fs/jbd2/commit.c37
-rw-r--r--fs/jbd2/journal.c305
-rw-r--r--fs/jbd2/recovery.c414
-rw-r--r--fs/jbd2/revoke.c38
-rw-r--r--fs/jbd2/transaction.c71
7 files changed, 453 insertions, 459 deletions
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 4ad2c67f93f1..9c19e1512101 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -2,8 +2,6 @@
config JBD2
tristate
select CRC32
- select CRYPTO
- select CRYPTO_CRC32C
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c97e64c4784..b3971e91e8eb 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -79,17 +79,23 @@ __releases(&journal->j_state_lock)
if (space_left < nblocks) {
int chkpt = journal->j_checkpoint_transactions != NULL;
tid_t tid = 0;
+ bool has_transaction = false;
- if (journal->j_committing_transaction)
+ if (journal->j_committing_transaction) {
tid = journal->j_committing_transaction->t_tid;
+ has_transaction = true;
+ }
spin_unlock(&journal->j_list_lock);
write_unlock(&journal->j_state_lock);
if (chkpt) {
jbd2_log_do_checkpoint(journal);
- } else if (jbd2_cleanup_journal_tail(journal) == 0) {
- /* We were able to recover space; yay! */
+ } else if (jbd2_cleanup_journal_tail(journal) <= 0) {
+ /*
+ * We were able to recover space or the
+ * journal was aborted due to an error.
+ */
;
- } else if (tid) {
+ } else if (has_transaction) {
/*
* jbd2_journal_commit_transaction() may want
* to take the checkpoint_mutex if JBD2_FLUSHED
@@ -337,8 +343,6 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
-enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
-
/*
* journal_shrink_one_cp_list
*
@@ -350,7 +354,7 @@ enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
- enum shrink_type type,
+ enum jbd2_shrink_type type,
bool *released)
{
struct journal_head *last_jh;
@@ -367,12 +371,12 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
- if (type == SHRINK_DESTROY) {
+ if (type == JBD2_SHRINK_DESTROY) {
ret = __jbd2_journal_remove_checkpoint(jh);
} else {
ret = jbd2_journal_try_remove_checkpoint(jh);
if (ret < 0) {
- if (type == SHRINK_BUSY_SKIP)
+ if (type == JBD2_SHRINK_BUSY_SKIP)
continue;
break;
}
@@ -409,6 +413,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
tid_t tid = 0;
unsigned long nr_freed = 0;
unsigned long freed;
+ bool first_set = false;
again:
spin_lock(&journal->j_list_lock);
@@ -428,8 +433,10 @@ again:
else
transaction = journal->j_checkpoint_transactions;
- if (!first_tid)
+ if (!first_set) {
first_tid = transaction->t_tid;
+ first_set = true;
+ }
last_transaction = journal->j_checkpoint_transactions->t_cpprev;
next_transaction = transaction;
last_tid = last_transaction->t_tid;
@@ -439,7 +446,7 @@ again:
tid = transaction->t_tid;
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
- SHRINK_BUSY_SKIP, &released);
+ JBD2_SHRINK_BUSY_SKIP, &released);
nr_freed += freed;
(*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
@@ -459,7 +466,7 @@ again:
spin_unlock(&journal->j_list_lock);
cond_resched();
- if (*nr_to_scan && next_tid)
+ if (*nr_to_scan && journal->j_shrink_transaction)
goto again;
out:
trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
@@ -472,21 +479,25 @@ out:
* journal_clean_checkpoint_list
*
* Find all the written-back checkpoint buffers in the journal and release them.
- * If 'destroy' is set, release all buffers unconditionally.
+ * If 'type' is JBD2_SHRINK_DESTROY, release all buffers unconditionally. If
+ * 'type' is JBD2_SHRINK_BUSY_STOP, will stop release buffers if encounters a
+ * busy buffer. To avoid wasting CPU cycles scanning the buffer list in some
+ * cases, don't pass JBD2_SHRINK_BUSY_SKIP 'type' for this function.
*
* Called with j_list_lock held.
*/
-void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal,
+ enum jbd2_shrink_type type)
{
transaction_t *transaction, *last_transaction, *next_transaction;
- enum shrink_type type;
bool released;
+ WARN_ON_ONCE(type == JBD2_SHRINK_BUSY_SKIP);
+
transaction = journal->j_checkpoint_transactions;
if (!transaction)
return;
- type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
@@ -527,7 +538,7 @@ void jbd2_journal_destroy_checkpoint(journal_t *journal)
spin_unlock(&journal->j_list_lock);
break;
}
- __jbd2_journal_clean_checkpoint_list(journal, true);
+ __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_DESTROY);
spin_unlock(&journal->j_list_lock);
cond_resched();
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5e122586e06e..7203d2d2624d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -57,8 +57,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
* So here, we have a buffer which has just come off the forget list. Look to
* see if we can strip all buffers from the backing page.
*
- * Called under lock_journal(), and possibly under journal_datalist_lock. The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under j_list_lock. The caller provided us with a ref against the
+ * buffer, and we drop that here.
*/
static void release_buffer_page(struct buffer_head *bh)
{
@@ -99,7 +99,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
h->h_chksum_type = 0;
h->h_chksum_size = 0;
h->h_chksum[0] = 0;
- csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
+ csum = jbd2_chksum(j->j_csum_seed, bh->b_data, j->j_blocksize);
h->h_chksum[0] = cpu_to_be32(csum);
}
@@ -330,8 +330,8 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
seq = cpu_to_be32(sequence);
addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
- csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
- csum32 = jbd2_chksum(j, csum32, addr, bh->b_size);
+ csum32 = jbd2_chksum(j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
+ csum32 = jbd2_chksum(csum32, addr, bh->b_size);
kunmap_local(addr);
if (jbd2_has_feature_csum3(j))
@@ -353,7 +353,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
struct buffer_head *descriptor;
struct buffer_head **wbuf = journal->j_wbuf;
int bufs;
- int flags;
+ int escape;
int err;
unsigned long long blocknr;
ktime_t start_time;
@@ -501,7 +501,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* frees some memory
*/
spin_lock(&journal->j_list_lock);
- __jbd2_journal_clean_checkpoint_list(journal, false);
+ __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
spin_unlock(&journal->j_list_lock);
jbd2_debug(3, "JBD2: commit phase 1\n");
@@ -571,7 +571,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(commit_transaction->t_nr_buffers <=
atomic_read(&commit_transaction->t_outstanding_credits));
- err = 0;
bufs = 0;
descriptor = NULL;
while (commit_transaction->t_buffers) {
@@ -661,19 +660,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
set_bit(BH_JWrite, &jh2bh(jh)->b_state);
JBUFFER_TRACE(jh, "ph3: write metadata");
- flags = jbd2_journal_write_metadata_buffer(commit_transaction,
+ escape = jbd2_journal_write_metadata_buffer(commit_transaction,
jh, &wbuf[bufs], blocknr);
- if (flags < 0) {
- jbd2_journal_abort(journal, flags);
- continue;
- }
jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
/* Record the new block's tag in the current descriptor
buffer */
tag_flag = 0;
- if (flags & 1)
+ if (escape)
tag_flag |= JBD2_FLAG_ESCAPE;
if (!first_tag)
tag_flag |= JBD2_FLAG_SAME_UUID;
@@ -743,10 +738,8 @@ start_journal_io:
err = journal_finish_inode_data_buffers(journal, commit_transaction);
if (err) {
printk(KERN_WARNING
- "JBD2: Detected IO errors while flushing file data "
- "on %s\n", journal->j_devname);
- if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
- jbd2_journal_abort(journal, err);
+ "JBD2: Detected IO errors %d while flushing file data on %s\n",
+ err, journal->j_devname);
err = 0;
}
@@ -767,7 +760,7 @@ start_journal_io:
if (first_block < journal->j_tail)
freed += journal->j_last - journal->j_first;
/* Update tail only if we free significant amount of space */
- if (freed < jbd2_journal_get_max_txn_bufs(journal))
+ if (freed < journal->j_max_transaction_buffers)
update_tail = 0;
}
J_ASSERT(commit_transaction->t_state == T_COMMIT);
@@ -777,9 +770,9 @@ start_journal_io:
/*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
- * the commit record
+ * the commit record and update the journal tail sequence.
*/
- if (commit_transaction->t_need_data_flush &&
+ if ((commit_transaction->t_need_data_flush || update_tail) &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(journal->j_fs_dev);
@@ -1108,7 +1101,7 @@ restart_loop:
commit_transaction->t_state = T_COMMIT_CALLBACK;
J_ASSERT(commit_transaction == journal->j_committing_transaction);
- journal->j_commit_sequence = commit_transaction->t_tid;
+ WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid);
journal->j_committing_transaction = NULL;
commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b6c114c11b97..6d5e76848733 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -83,7 +83,7 @@ EXPORT_SYMBOL(jbd2_log_wait_commit);
EXPORT_SYMBOL(jbd2_journal_start_commit);
EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
EXPORT_SYMBOL(jbd2_journal_wipe);
-EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
+EXPORT_SYMBOL(jbd2_journal_blocks_per_folio);
EXPORT_SYMBOL(jbd2_journal_invalidate_folio);
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
@@ -115,14 +115,14 @@ void __jbd2_debug(int level, const char *file, const char *func,
#endif
/* Checksumming functions */
-static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
+static __be32 jbd2_superblock_csum(journal_superblock_t *sb)
{
__u32 csum;
__be32 old_csum;
old_csum = sb->s_checksum;
sb->s_checksum = 0;
- csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
+ csum = jbd2_chksum(~0, (char *)sb, sizeof(journal_superblock_t));
sb->s_checksum = old_csum;
return cpu_to_be32(csum);
@@ -197,7 +197,7 @@ loop:
if (journal->j_commit_sequence != journal->j_commit_request) {
jbd2_debug(1, "OK, requests differ\n");
write_unlock(&journal->j_state_lock);
- del_timer_sync(&journal->j_commit_timer);
+ timer_delete_sync(&journal->j_commit_timer);
jbd2_journal_commit_transaction(journal);
write_lock(&journal->j_state_lock);
goto loop;
@@ -220,19 +220,12 @@ loop:
* so we don't sleep
*/
DEFINE_WAIT(wait);
- int should_sleep = 1;
prepare_to_wait(&journal->j_wait_commit, &wait,
TASK_INTERRUPTIBLE);
- if (journal->j_commit_sequence != journal->j_commit_request)
- should_sleep = 0;
transaction = journal->j_running_transaction;
- if (transaction && time_after_eq(jiffies,
- transaction->t_expires))
- should_sleep = 0;
- if (journal->j_flags & JBD2_UNMOUNT)
- should_sleep = 0;
- if (should_sleep) {
+ if (transaction == NULL ||
+ time_before(jiffies, transaction->t_expires)) {
write_unlock(&journal->j_state_lock);
schedule();
write_lock(&journal->j_state_lock);
@@ -253,7 +246,7 @@ loop:
goto loop;
end_loop:
- del_timer_sync(&journal->j_commit_timer);
+ timer_delete_sync(&journal->j_commit_timer);
journal->j_task = NULL;
wake_up(&journal->j_wait_done_commit);
jbd2_debug(1, "Journal thread exiting.\n");
@@ -288,6 +281,16 @@ static void journal_kill_thread(journal_t *journal)
write_unlock(&journal->j_state_lock);
}
+static inline bool jbd2_data_needs_escaping(char *data)
+{
+ return *((__be32 *)data) == cpu_to_be32(JBD2_MAGIC_NUMBER);
+}
+
+static inline void jbd2_data_do_escape(char *data)
+{
+ *((unsigned int *)data) = 0;
+}
+
/*
* jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
*
@@ -315,12 +318,8 @@ static void journal_kill_thread(journal_t *journal)
*
*
* Return value:
- * <0: Error
- * >=0: Finished OK
- *
- * On success:
- * Bit 0 set == escape performed on the data
- * Bit 1 set == buffer copy-out performed (kfree the data after IO)
+ * =0: Finished OK without escape
+ * =1: Finished OK with escape
*/
int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
@@ -328,10 +327,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct buffer_head **bh_out,
sector_t blocknr)
{
- int need_copy_out = 0;
- int done_copy_out = 0;
int do_escape = 0;
- char *mapped_data;
struct buffer_head *new_bh;
struct folio *new_folio;
unsigned int new_offset;
@@ -355,83 +351,63 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
atomic_set(&new_bh->b_count, 1);
spin_lock(&jh_in->b_state_lock);
-repeat:
/*
* If a new transaction has already done a buffer copy-out, then
* we use that version of the data for the commit.
*/
if (jh_in->b_frozen_data) {
- done_copy_out = 1;
new_folio = virt_to_folio(jh_in->b_frozen_data);
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
+ do_escape = jbd2_data_needs_escaping(jh_in->b_frozen_data);
+ if (do_escape)
+ jbd2_data_do_escape(jh_in->b_frozen_data);
} else {
- new_folio = jh2bh(jh_in)->b_folio;
- new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data);
- }
+ char *tmp;
+ char *mapped_data;
- mapped_data = kmap_local_folio(new_folio, new_offset);
- /*
- * Fire data frozen trigger if data already wasn't frozen. Do this
- * before checking for escaping, as the trigger may modify the magic
- * offset. If a copy-out happens afterwards, it will have the correct
- * data in the buffer.
- */
- if (!done_copy_out)
+ new_folio = bh_in->b_folio;
+ new_offset = offset_in_folio(new_folio, bh_in->b_data);
+ mapped_data = kmap_local_folio(new_folio, new_offset);
+ /*
+ * Fire data frozen trigger if data already wasn't frozen. Do
+ * this before checking for escaping, as the trigger may modify
+ * the magic offset. If a copy-out happens afterwards, it will
+ * have the correct data in the buffer.
+ */
jbd2_buffer_frozen_trigger(jh_in, mapped_data,
jh_in->b_triggers);
-
- /*
- * Check for escaping
- */
- if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) {
- need_copy_out = 1;
- do_escape = 1;
- }
- kunmap_local(mapped_data);
-
- /*
- * Do we need to do a data copy?
- */
- if (need_copy_out && !done_copy_out) {
- char *tmp;
+ do_escape = jbd2_data_needs_escaping(mapped_data);
+ kunmap_local(mapped_data);
+ /*
+ * Do we need to do a data copy?
+ */
+ if (!do_escape)
+ goto escape_done;
spin_unlock(&jh_in->b_state_lock);
- tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
- if (!tmp) {
- brelse(new_bh);
- return -ENOMEM;
- }
+ tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS | __GFP_NOFAIL);
spin_lock(&jh_in->b_state_lock);
if (jh_in->b_frozen_data) {
jbd2_free(tmp, bh_in->b_size);
- goto repeat;
+ goto copy_done;
}
jh_in->b_frozen_data = tmp;
memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size);
-
- new_folio = virt_to_folio(tmp);
- new_offset = offset_in_folio(new_folio, tmp);
- done_copy_out = 1;
-
/*
* This isn't strictly necessary, as we're using frozen
* data for the escaping, but it keeps consistency with
* b_frozen_data usage.
*/
jh_in->b_frozen_triggers = jh_in->b_triggers;
- }
- /*
- * Did we need to do an escaping? Now we've done all the
- * copying, we can finally do so.
- */
- if (do_escape) {
- mapped_data = kmap_local_folio(new_folio, new_offset);
- *((unsigned int *)mapped_data) = 0;
- kunmap_local(mapped_data);
+copy_done:
+ new_folio = virt_to_folio(jh_in->b_frozen_data);
+ new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
+ jbd2_data_do_escape(jh_in->b_frozen_data);
}
+escape_done:
folio_set_bh(new_bh, new_folio, new_offset);
new_bh->b_size = bh_in->b_size;
new_bh->b_bdev = journal->j_dev;
@@ -454,7 +430,7 @@ repeat:
set_buffer_shadow(bh_in);
spin_unlock(&jh_in->b_state_lock);
- return do_escape | (done_copy_out << 1);
+ return do_escape;
}
/*
@@ -627,7 +603,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
{
int ret = 0;
- transaction_t *commit_trans;
+ transaction_t *commit_trans, *running_trans;
if (!(journal->j_flags & JBD2_BARRIER))
return 0;
@@ -637,6 +613,16 @@ int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
goto out;
commit_trans = journal->j_committing_transaction;
if (!commit_trans || commit_trans->t_tid != tid) {
+ running_trans = journal->j_running_transaction;
+ /*
+ * The query transaction hasn't started committing,
+ * it must still be running.
+ */
+ if (WARN_ON_ONCE(!running_trans ||
+ running_trans->t_tid != tid))
+ goto out;
+
+ running_trans->t_need_data_flush = 1;
ret = 1;
goto out;
}
@@ -724,7 +710,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
return -EINVAL;
write_lock(&journal->j_state_lock);
- if (tid <= journal->j_commit_sequence) {
+ if (tid_geq(journal->j_commit_sequence, tid)) {
write_unlock(&journal->j_state_lock);
return -EALREADY;
}
@@ -742,7 +728,6 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
}
journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
write_unlock(&journal->j_state_lock);
- jbd2_journal_lock_updates(journal);
return 0;
}
@@ -754,7 +739,6 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
*/
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
- jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 0, tid);
write_lock(&journal->j_state_lock);
@@ -789,17 +773,7 @@ EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
/* Return 1 when transaction with given tid has already committed. */
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
{
- int ret = 1;
-
- read_lock(&journal->j_state_lock);
- if (journal->j_running_transaction &&
- journal->j_running_transaction->t_tid == tid)
- ret = 0;
- if (journal->j_committing_transaction &&
- journal->j_committing_transaction->t_tid == tid)
- ret = 0;
- read_unlock(&journal->j_state_lock);
- return ret;
+ return tid_geq(READ_ONCE(journal->j_commit_sequence), tid);
}
EXPORT_SYMBOL(jbd2_transaction_committed);
@@ -865,17 +839,12 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
*bh_out = NULL;
- if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
- fc_off = journal->j_fc_off;
- blocknr = journal->j_fc_first + fc_off;
- journal->j_fc_off++;
- } else {
- ret = -EINVAL;
- }
-
- if (ret)
- return ret;
+ if (journal->j_fc_off + journal->j_fc_first >= journal->j_fc_last)
+ return -EINVAL;
+ fc_off = journal->j_fc_off;
+ blocknr = journal->j_fc_first + fc_off;
+ journal->j_fc_off++;
ret = jbd2_journal_bmap(journal, blocknr, &pblock);
if (ret)
return ret;
@@ -884,7 +853,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
if (!bh)
return -ENOMEM;
-
journal->j_fc_wbuf[fc_off] = bh;
*bh_out = bh;
@@ -927,7 +895,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
}
EXPORT_SYMBOL(jbd2_fc_wait_bufs);
-int jbd2_fc_release_bufs(journal_t *journal)
+void jbd2_fc_release_bufs(journal_t *journal)
{
struct buffer_head *bh;
int i, j_fc_off;
@@ -941,8 +909,6 @@ int jbd2_fc_release_bufs(journal_t *journal)
put_bh(bh);
journal->j_fc_wbuf[i] = NULL;
}
-
- return 0;
}
EXPORT_SYMBOL(jbd2_fc_release_bufs);
@@ -989,7 +955,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
* descriptor blocks we do need to generate bona fide buffers.
*
* After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
- * the buffer's contents they really should run flush_dcache_page(bh->b_page).
+ * the buffer's contents they really should run flush_dcache_folio(bh->b_folio).
* But we don't bother doing that, so there will be coherency problems with
* mmaps of blockdevs which hold live JBD-controlled filesystems.
*/
@@ -1034,7 +1000,7 @@ void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
sizeof(struct jbd2_journal_block_tail));
tail->t_checksum = 0;
- csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
+ csum = jbd2_chksum(j->j_csum_seed, bh->b_data, j->j_blocksize);
tail->t_checksum = cpu_to_be32(csum);
}
@@ -1403,7 +1369,7 @@ static int journal_check_superblock(journal_t *journal)
return err;
}
- if (jbd2_journal_has_csum_v2or3_feature(journal) &&
+ if (jbd2_journal_has_csum_v2or3(journal) &&
jbd2_has_feature_checksum(journal)) {
/* Can't have checksum v1 and v2 on at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
@@ -1411,22 +1377,14 @@ static int journal_check_superblock(journal_t *journal)
return err;
}
- /* Load the checksum driver */
- if (jbd2_journal_has_csum_v2or3_feature(journal)) {
+ if (jbd2_journal_has_csum_v2or3(journal)) {
if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
printk(KERN_ERR "JBD2: Unknown checksum type\n");
return err;
}
- journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
- err = PTR_ERR(journal->j_chksum_driver);
- journal->j_chksum_driver = NULL;
- return err;
- }
/* Check superblock checksum */
- if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
+ if (sb->s_checksum != jbd2_superblock_csum(sb)) {
printk(KERN_ERR "JBD2: journal checksum error\n");
err = -EFSBADCRC;
return err;
@@ -1451,6 +1409,48 @@ static int journal_revoke_records_per_block(journal_t *journal)
return space / record_size;
}
+static int jbd2_journal_get_max_txn_bufs(journal_t *journal)
+{
+ return (journal->j_total_len - journal->j_fc_wbufsize) / 3;
+}
+
+/*
+ * Base amount of descriptor blocks we reserve for each transaction.
+ */
+static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
+{
+ int tag_space = journal->j_blocksize - sizeof(journal_header_t);
+ int tags_per_block;
+
+ /* Subtract UUID */
+ tag_space -= 16;
+ if (jbd2_journal_has_csum_v2or3(journal))
+ tag_space -= sizeof(struct jbd2_journal_block_tail);
+ /* Commit code leaves a slack space of 16 bytes at the end of block */
+ tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
+ /*
+ * Revoke descriptors are accounted separately so we need to reserve
+ * space for commit block and normal transaction descriptor blocks.
+ */
+ return 1 + DIV_ROUND_UP(jbd2_journal_get_max_txn_bufs(journal),
+ tags_per_block);
+}
+
+/*
+ * Initialize number of blocks each transaction reserves for its bookkeeping
+ * and maximum number of blocks a transaction can use. This needs to be called
+ * after the journal size and the fastcommit area size are initialized.
+ */
+static void jbd2_journal_init_transaction_limits(journal_t *journal)
+{
+ journal->j_revoke_records_per_block =
+ journal_revoke_records_per_block(journal);
+ journal->j_transaction_overhead_buffers =
+ jbd2_descriptor_blocks_per_trans(journal);
+ journal->j_max_transaction_buffers =
+ jbd2_journal_get_max_txn_bufs(journal);
+}
+
/*
* Load the on-disk journal superblock and read the key fields into the
* journal_t.
@@ -1490,10 +1490,10 @@ static int journal_load_superblock(journal_t *journal)
journal->j_total_len = be32_to_cpu(sb->s_maxlen);
/* Precompute checksum seed for all metadata */
if (jbd2_journal_has_csum_v2or3(journal))
- journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+ journal->j_csum_seed = jbd2_chksum(~0, sb->s_uuid,
sizeof(sb->s_uuid));
- journal->j_revoke_records_per_block =
- journal_revoke_records_per_block(journal);
+ /* After journal features are set, we can compute transaction limits */
+ jbd2_journal_init_transaction_limits(journal);
if (jbd2_has_feature_fast_commit(journal)) {
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
@@ -1512,9 +1512,10 @@ static int journal_load_superblock(journal_t *journal)
* destroy journal_t structures, and to initialise and read existing
* journal blocks from disk. */
-/* First: create and setup a journal_t object in memory. We initialise
- * very few fields yet: that has to wait until we have created the
- * journal structures from from scratch, or loaded them from disk. */
+/* The journal_init_common() function creates and fills a journal_t object
+ * in memory. It calls journal_load_superblock() to load the on-disk journal
+ * superblock and initialize the journal_t object.
+ */
static journal_t *journal_init_common(struct block_device *bdev,
struct block_device *fs_dev,
@@ -1599,7 +1600,6 @@ static journal_t *journal_init_common(struct block_device *bdev,
journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan;
journal->j_shrinker->count_objects = jbd2_journal_shrink_count;
- journal->j_shrinker->batch = journal->j_max_transaction_buffers;
journal->j_shrinker->private_data = journal;
shrinker_register(journal->j_shrinker);
@@ -1608,8 +1608,6 @@ static journal_t *journal_init_common(struct block_device *bdev,
err_cleanup:
percpu_counter_destroy(&journal->j_checkpoint_jh_count);
- if (journal->j_chksum_driver)
- crypto_free_shash(journal->j_chksum_driver);
kfree(journal->j_wbuf);
jbd2_journal_destroy_revoke(journal);
journal_fail_superblock(journal);
@@ -1743,8 +1741,6 @@ static int journal_reset(journal_t *journal)
journal->j_commit_sequence = journal->j_transaction_sequence - 1;
journal->j_commit_request = journal->j_commit_sequence;
- journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
-
/*
* Now that journal recovery is done, turn fast commits off here. This
* way, if fast commit was enabled before the crash but if now FS has
@@ -1823,7 +1819,7 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
set_buffer_uptodate(bh);
}
if (jbd2_journal_has_csum_v2or3(journal))
- sb->s_checksum = jbd2_superblock_csum(journal, sb);
+ sb->s_checksum = jbd2_superblock_csum(sb);
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
submit_bh(REQ_OP_WRITE | write_flags, bh);
@@ -1881,7 +1877,6 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
- WARN_ON(!sb->s_sequence);
journal->j_flags &= ~JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
@@ -1929,7 +1924,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
if (had_fast_commit)
jbd2_set_feature_fast_commit(journal);
- /* Log is no longer empty */
+ /* Log is empty */
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
@@ -1977,17 +1972,15 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
return err;
}
- if (block_start == ~0ULL) {
- block_start = phys_block;
- block_stop = block_start - 1;
- }
+ if (block_start == ~0ULL)
+ block_stop = block_start = phys_block;
/*
* last block not contiguous with current block,
* process last contiguous region and return to this block on
* next loop
*/
- if (phys_block != block_stop + 1) {
+ if (phys_block != block_stop) {
block--;
} else {
block_stop++;
@@ -2006,11 +1999,10 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
*/
byte_start = block_start * journal->j_blocksize;
byte_stop = block_stop * journal->j_blocksize;
- byte_count = (block_stop - block_start + 1) *
- journal->j_blocksize;
+ byte_count = (block_stop - block_start) * journal->j_blocksize;
- truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
- byte_start, byte_stop);
+ truncate_inode_pages_range(journal->j_dev->bd_mapping,
+ byte_start, byte_stop - 1);
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
err = blkdev_issue_discard(journal->j_dev,
@@ -2025,7 +2017,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
}
if (unlikely(err != 0)) {
- pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
+ pr_err("JBD2: (error %d) unable to wipe journal at physical blocks [%llu, %llu)",
err, block_start, block_stop);
return err;
}
@@ -2193,8 +2185,6 @@ int jbd2_journal_destroy(journal_t *journal)
iput(journal->j_inode);
if (journal->j_revoke)
jbd2_journal_destroy_revoke(journal);
- if (journal->j_chksum_driver)
- crypto_free_shash(journal->j_chksum_driver);
kfree(journal->j_fc_wbuf);
kfree(journal->j_wbuf);
kfree(journal);
@@ -2285,8 +2275,6 @@ jbd2_journal_initialize_fast_commit(journal_t *journal)
journal->j_fc_first = journal->j_last + 1;
journal->j_fc_off = 0;
journal->j_free = journal->j_last - journal->j_first;
- journal->j_max_transaction_buffers =
- jbd2_journal_get_max_txn_bufs(journal);
return 0;
}
@@ -2341,27 +2329,15 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
}
}
- /* Load the checksum driver if necessary */
- if ((journal->j_chksum_driver == NULL) &&
- INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
- journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
- journal->j_chksum_driver = NULL;
- return 0;
- }
- /* Precompute checksum seed for all metadata */
- journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
- sizeof(sb->s_uuid));
- }
-
lock_buffer(journal->j_sb_buffer);
- /* If enabling v3 checksums, update superblock */
+ /* If enabling v3 checksums, update superblock and precompute seed */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
+ journal->j_csum_seed = jbd2_chksum(~0, sb->s_uuid,
+ sizeof(sb->s_uuid));
}
/* If enabling v1 checksums, downgrade superblock */
@@ -2374,8 +2350,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
unlock_buffer(journal->j_sb_buffer);
- journal->j_revoke_records_per_block =
- journal_revoke_records_per_block(journal);
+ jbd2_journal_init_transaction_limits(journal);
return 1;
#undef COMPAT_FEATURE_ON
@@ -2406,8 +2381,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
sb->s_feature_compat &= ~cpu_to_be32(compat);
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
- journal->j_revoke_records_per_block =
- journal_revoke_records_per_block(journal);
+ jbd2_journal_init_transaction_limits(journal);
}
EXPORT_SYMBOL(jbd2_journal_clear_features);
@@ -2681,9 +2655,10 @@ void jbd2_journal_ack_err(journal_t *journal)
write_unlock(&journal->j_state_lock);
}
-int jbd2_journal_blocks_per_page(struct inode *inode)
+int jbd2_journal_blocks_per_folio(struct inode *inode)
{
- return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
+ return 1 << (PAGE_SHIFT + mapping_max_folio_order(inode->i_mapping) -
+ inode->i_sb->s_blocksize_bits);
}
/*
@@ -2855,8 +2830,7 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
}
- if (ret)
- spin_lock_init(&ret->b_state_lock);
+ spin_lock_init(&ret->b_state_lock);
return ret;
}
@@ -3181,6 +3155,7 @@ static void __exit journal_exit(void)
jbd2_journal_destroy_caches();
}
+MODULE_DESCRIPTION("Generic filesystem journal-writing module");
MODULE_LICENSE("GPL");
module_init(journal_init);
module_exit(journal_exit);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 1f7664984d6e..cac8c2cd4a92 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -19,6 +19,7 @@
#include <linux/errno.h>
#include <linux/crc32.h>
#include <linux/blkdev.h>
+#include <linux/string_choices.h>
#endif
/*
@@ -38,7 +39,7 @@ struct recovery_info
static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass);
-static int scan_revoke_records(journal_t *, struct buffer_head *,
+static int scan_revoke_records(journal_t *, enum passtype, struct buffer_head *,
tid_t, struct recovery_info *);
#ifdef __KERNEL__
@@ -64,9 +65,8 @@ static void journal_brelse_array(struct buffer_head *b[], int n)
*/
#define MAXBUF 8
-static int do_readahead(journal_t *journal, unsigned int start)
+static void do_readahead(journal_t *journal, unsigned int start)
{
- int err;
unsigned int max, nbufs, next;
unsigned long long blocknr;
struct buffer_head *bh;
@@ -84,7 +84,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
nbufs = 0;
for (next = start; next < max; next++) {
- err = jbd2_journal_bmap(journal, next, &blocknr);
+ int err = jbd2_journal_bmap(journal, next, &blocknr);
if (err) {
printk(KERN_ERR "JBD2: bad block at offset %u\n",
@@ -93,10 +93,8 @@ static int do_readahead(journal_t *journal, unsigned int start)
}
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
- if (!bh) {
- err = -ENOMEM;
+ if (!bh)
goto failed;
- }
if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
bufs[nbufs++] = bh;
@@ -111,12 +109,10 @@ static int do_readahead(journal_t *journal, unsigned int start)
if (nbufs)
bh_readahead_batch(nbufs, bufs, 0);
- err = 0;
failed:
if (nbufs)
journal_brelse_array(bufs, nbufs);
- return err;
}
#endif /* __KERNEL__ */
@@ -189,7 +185,7 @@ static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
j->j_blocksize - sizeof(struct jbd2_journal_block_tail));
provided = tail->t_checksum;
tail->t_checksum = 0;
- calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+ calculated = jbd2_chksum(j->j_csum_seed, buf, j->j_blocksize);
tail->t_checksum = provided;
return provided == cpu_to_be32(calculated);
@@ -286,19 +282,20 @@ static int fc_do_one_pass(journal_t *journal,
int jbd2_journal_recover(journal_t *journal)
{
int err, err2;
- journal_superblock_t * sb;
-
struct recovery_info info;
memset(&info, 0, sizeof(info));
- sb = journal->j_superblock;
/*
* The journal superblock's s_start field (the current log head)
* is always zero if, and only if, the journal was cleanly
- * unmounted.
+ * unmounted. We use its in-memory version j_tail here because
+ * jbd2_journal_wipe() could have updated it without updating journal
+ * superblock.
*/
- if (!sb->s_start) {
+ if (!journal->j_tail) {
+ journal_superblock_t *sb = journal->j_superblock;
+
jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
@@ -326,6 +323,12 @@ int jbd2_journal_recover(journal_t *journal)
journal->j_transaction_sequence, journal->j_head);
jbd2_journal_clear_revoke(journal);
+ /* Free revoke table allocated for replay */
+ if (journal->j_revoke != journal->j_revoke_table[0] &&
+ journal->j_revoke != journal->j_revoke_table[1]) {
+ jbd2_journal_destroy_revoke_table(journal->j_revoke);
+ journal->j_revoke = journal->j_revoke_table[1];
+ }
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
@@ -374,7 +377,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
be32_to_cpu(journal->j_superblock->s_sequence);
jbd2_debug(1,
"JBD2: ignoring %d transaction%s from the journal.\n",
- dropped, (dropped == 1) ? "" : "s");
+ dropped, str_plural(dropped));
#endif
journal->j_transaction_sequence = ++info.end_transaction;
journal->j_head = info.head_block;
@@ -437,12 +440,33 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
h = buf;
provided = h->h_chksum[0];
h->h_chksum[0] = 0;
- calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+ calculated = jbd2_chksum(j->j_csum_seed, buf, j->j_blocksize);
h->h_chksum[0] = provided;
return provided == cpu_to_be32(calculated);
}
+static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf)
+{
+ struct commit_header *h;
+ __be32 provided;
+ __u32 calculated;
+ void *tmpbuf;
+
+ tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL);
+ if (!tmpbuf)
+ return false;
+
+ memcpy(tmpbuf, buf, sizeof(struct commit_header));
+ h = tmpbuf;
+ provided = h->h_chksum[0];
+ h->h_chksum[0] = 0;
+ calculated = jbd2_chksum(j->j_csum_seed, tmpbuf, j->j_blocksize);
+ kfree(tmpbuf);
+
+ return provided == cpu_to_be32(calculated);
+}
+
static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
journal_block_tag3_t *tag3,
void *buf, __u32 sequence)
@@ -454,8 +478,8 @@ static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
return 1;
seq = cpu_to_be32(sequence);
- csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
- csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
+ csum32 = jbd2_chksum(j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
+ csum32 = jbd2_chksum(csum32, buf, j->j_blocksize);
if (jbd2_has_feature_csum3(j))
return tag3->t_checksum == cpu_to_be32(csum32);
@@ -463,6 +487,104 @@ static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
return tag->t_checksum == cpu_to_be16(csum32);
}
+static __always_inline int jbd2_do_replay(journal_t *journal,
+ struct recovery_info *info,
+ struct buffer_head *bh,
+ unsigned long *next_log_block,
+ unsigned int next_commit_ID)
+{
+ char *tagp;
+ int flags;
+ int ret = 0;
+ int tag_bytes = journal_tag_bytes(journal);
+ int descr_csum_size = 0;
+ unsigned long io_block;
+ journal_block_tag_t tag;
+ struct buffer_head *obh;
+ struct buffer_head *nbh;
+
+ if (jbd2_journal_has_csum_v2or3(journal))
+ descr_csum_size = sizeof(struct jbd2_journal_block_tail);
+
+ tagp = &bh->b_data[sizeof(journal_header_t)];
+ while (tagp - bh->b_data + tag_bytes <=
+ journal->j_blocksize - descr_csum_size) {
+ int err;
+
+ memcpy(&tag, tagp, sizeof(tag));
+ flags = be16_to_cpu(tag.t_flags);
+
+ io_block = (*next_log_block)++;
+ wrap(journal, *next_log_block);
+ err = jread(&obh, journal, io_block);
+ if (err) {
+ /* Recover what we can, but report failure at the end. */
+ ret = err;
+ pr_err("JBD2: IO error %d recovering block %lu in log\n",
+ err, io_block);
+ } else {
+ unsigned long long blocknr;
+
+ J_ASSERT(obh != NULL);
+ blocknr = read_tag_block(journal, &tag);
+
+ /* If the block has been revoked, then we're all done here. */
+ if (jbd2_journal_test_revoke(journal, blocknr,
+ next_commit_ID)) {
+ brelse(obh);
+ ++info->nr_revoke_hits;
+ goto skip_write;
+ }
+
+ /* Look for block corruption */
+ if (!jbd2_block_tag_csum_verify(journal, &tag,
+ (journal_block_tag3_t *)tagp,
+ obh->b_data, next_commit_ID)) {
+ brelse(obh);
+ ret = -EFSBADCRC;
+ pr_err("JBD2: Invalid checksum recovering data block %llu in journal block %lu\n",
+ blocknr, io_block);
+ goto skip_write;
+ }
+
+ /* Find a buffer for the new data being restored */
+ nbh = __getblk(journal->j_fs_dev, blocknr,
+ journal->j_blocksize);
+ if (nbh == NULL) {
+ pr_err("JBD2: Out of memory during recovery.\n");
+ brelse(obh);
+ return -ENOMEM;
+ }
+
+ lock_buffer(nbh);
+ memcpy(nbh->b_data, obh->b_data, journal->j_blocksize);
+ if (flags & JBD2_FLAG_ESCAPE) {
+ *((__be32 *)nbh->b_data) =
+ cpu_to_be32(JBD2_MAGIC_NUMBER);
+ }
+
+ BUFFER_TRACE(nbh, "marking dirty");
+ set_buffer_uptodate(nbh);
+ mark_buffer_dirty(nbh);
+ BUFFER_TRACE(nbh, "marking uptodate");
+ ++info->nr_replays;
+ unlock_buffer(nbh);
+ brelse(obh);
+ brelse(nbh);
+ }
+
+skip_write:
+ tagp += tag_bytes;
+ if (!(flags & JBD2_FLAG_SAME_UUID))
+ tagp += 16;
+
+ if (flags & JBD2_FLAG_LAST_TAG)
+ break;
+ }
+
+ return ret;
+}
+
static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass)
{
@@ -471,13 +593,10 @@ static int do_one_pass(journal_t *journal,
int err, success = 0;
journal_superblock_t * sb;
journal_header_t * tmp;
- struct buffer_head * bh;
+ struct buffer_head *bh = NULL;
unsigned int sequence;
int blocktype;
- int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */
- int descr_csum_size = 0;
- int block_error = 0;
bool need_check_commit_time = false;
__u64 last_trans_commit_time = 0, commit_time;
@@ -495,6 +614,31 @@ static int do_one_pass(journal_t *journal,
first_commit_ID = next_commit_ID;
if (pass == PASS_SCAN)
info->start_transaction = first_commit_ID;
+ else if (pass == PASS_REVOKE) {
+ /*
+ * Would the default revoke table have too long hash chains
+ * during replay?
+ */
+ if (info->nr_revokes > JOURNAL_REVOKE_DEFAULT_HASH * 16) {
+ unsigned int hash_size;
+
+ /*
+ * Aim for average chain length of 8, limit at 1M
+ * entries to avoid problems with malicious
+ * filesystems.
+ */
+ hash_size = min(roundup_pow_of_two(info->nr_revokes / 8),
+ 1U << 20);
+ journal->j_revoke =
+ jbd2_journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke) {
+ printk(KERN_ERR
+ "JBD2: failed to allocate revoke table for replay with %u entries. "
+ "Journal replay may be slow.\n", hash_size);
+ journal->j_revoke = journal->j_revoke_table[1];
+ }
+ }
+ }
jbd2_debug(1, "Starting recovery pass %d\n", pass);
@@ -506,12 +650,6 @@ static int do_one_pass(journal_t *journal,
*/
while (1) {
- int flags;
- char * tagp;
- journal_block_tag_t tag;
- struct buffer_head * obh;
- struct buffer_head * nbh;
-
cond_resched();
/* If we already know where to stop the log traversal,
@@ -530,6 +668,8 @@ static int do_one_pass(journal_t *journal,
* record. */
jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block);
+ brelse(bh);
+ bh = NULL;
err = jread(&bh, journal, next_log_block);
if (err)
goto failed;
@@ -545,20 +685,16 @@ static int do_one_pass(journal_t *journal,
tmp = (journal_header_t *)bh->b_data;
- if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
- brelse(bh);
+ if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER))
break;
- }
blocktype = be32_to_cpu(tmp->h_blocktype);
sequence = be32_to_cpu(tmp->h_sequence);
jbd2_debug(3, "Found magic %d, sequence %d\n",
blocktype, sequence);
- if (sequence != next_commit_ID) {
- brelse(bh);
+ if (sequence != next_commit_ID)
break;
- }
/* OK, we have a valid descriptor block which matches
* all of the sequence number checks. What are we going
@@ -567,11 +703,7 @@ static int do_one_pass(journal_t *journal,
switch(blocktype) {
case JBD2_DESCRIPTOR_BLOCK:
/* Verify checksum first */
- if (jbd2_journal_has_csum_v2or3(journal))
- descr_csum_size =
- sizeof(struct jbd2_journal_block_tail);
- if (descr_csum_size > 0 &&
- !jbd2_descriptor_block_csum_verify(journal,
+ if (!jbd2_descriptor_block_csum_verify(journal,
bh->b_data)) {
/*
* PASS_SCAN can see stale blocks due to lazy
@@ -581,7 +713,6 @@ static int do_one_pass(journal_t *journal,
pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
next_log_block);
err = -EFSBADCRC;
- brelse(bh);
goto failed;
}
need_check_commit_time = true;
@@ -597,125 +728,39 @@ static int do_one_pass(journal_t *journal,
if (pass != PASS_REPLAY) {
if (pass == PASS_SCAN &&
jbd2_has_feature_checksum(journal) &&
- !need_check_commit_time &&
!info->end_transaction) {
if (calc_chksums(journal, bh,
&next_log_block,
- &crc32_sum)) {
- put_bh(bh);
+ &crc32_sum))
break;
- }
- put_bh(bh);
continue;
}
next_log_block += count_tags(journal, bh);
wrap(journal, next_log_block);
- put_bh(bh);
continue;
}
- /* A descriptor block: we can now write all of
- * the data blocks. Yay, useful work is finally
- * getting done here! */
-
- tagp = &bh->b_data[sizeof(journal_header_t)];
- while ((tagp - bh->b_data + tag_bytes)
- <= journal->j_blocksize - descr_csum_size) {
- unsigned long io_block;
-
- memcpy(&tag, tagp, sizeof(tag));
- flags = be16_to_cpu(tag.t_flags);
-
- io_block = next_log_block++;
- wrap(journal, next_log_block);
- err = jread(&obh, journal, io_block);
- if (err) {
- /* Recover what we can, but
- * report failure at the end. */
- success = err;
- printk(KERN_ERR
- "JBD2: IO error %d recovering "
- "block %lu in log\n",
- err, io_block);
- } else {
- unsigned long long blocknr;
-
- J_ASSERT(obh != NULL);
- blocknr = read_tag_block(journal,
- &tag);
-
- /* If the block has been
- * revoked, then we're all done
- * here. */
- if (jbd2_journal_test_revoke
- (journal, blocknr,
- next_commit_ID)) {
- brelse(obh);
- ++info->nr_revoke_hits;
- goto skip_write;
- }
-
- /* Look for block corruption */
- if (!jbd2_block_tag_csum_verify(
- journal, &tag, (journal_block_tag3_t *)tagp,
- obh->b_data, be32_to_cpu(tmp->h_sequence))) {
- brelse(obh);
- success = -EFSBADCRC;
- printk(KERN_ERR "JBD2: Invalid "
- "checksum recovering "
- "data block %llu in "
- "journal block %lu\n",
- blocknr, io_block);
- block_error = 1;
- goto skip_write;
- }
-
- /* Find a buffer for the new
- * data being restored */
- nbh = __getblk(journal->j_fs_dev,
- blocknr,
- journal->j_blocksize);
- if (nbh == NULL) {
- printk(KERN_ERR
- "JBD2: Out of memory "
- "during recovery.\n");
- err = -ENOMEM;
- brelse(bh);
- brelse(obh);
- goto failed;
- }
-
- lock_buffer(nbh);
- memcpy(nbh->b_data, obh->b_data,
- journal->j_blocksize);
- if (flags & JBD2_FLAG_ESCAPE) {
- *((__be32 *)nbh->b_data) =
- cpu_to_be32(JBD2_MAGIC_NUMBER);
- }
-
- BUFFER_TRACE(nbh, "marking dirty");
- set_buffer_uptodate(nbh);
- mark_buffer_dirty(nbh);
- BUFFER_TRACE(nbh, "marking uptodate");
- ++info->nr_replays;
- unlock_buffer(nbh);
- brelse(obh);
- brelse(nbh);
- }
-
- skip_write:
- tagp += tag_bytes;
- if (!(flags & JBD2_FLAG_SAME_UUID))
- tagp += 16;
-
- if (flags & JBD2_FLAG_LAST_TAG)
- break;
+ /*
+ * A descriptor block: we can now write all of the
+ * data blocks. Yay, useful work is finally getting
+ * done here!
+ */
+ err = jbd2_do_replay(journal, info, bh, &next_log_block,
+ next_commit_ID);
+ if (err) {
+ if (err == -ENOMEM)
+ goto failed;
+ success = err;
}
- brelse(bh);
continue;
case JBD2_COMMIT_BLOCK:
+ if (pass != PASS_SCAN) {
+ next_commit_ID++;
+ continue;
+ }
+
/* How to differentiate between interrupted commit
* and journal corruption ?
*
@@ -760,7 +805,6 @@ static int do_one_pass(journal_t *journal,
pr_err("JBD2: Invalid checksum found in transaction %u\n",
next_commit_ID);
err = -EFSBADCRC;
- brelse(bh);
goto failed;
}
ignore_crc_mismatch:
@@ -770,7 +814,6 @@ static int do_one_pass(journal_t *journal,
*/
jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
next_commit_ID);
- brelse(bh);
goto done;
}
@@ -780,8 +823,7 @@ static int do_one_pass(journal_t *journal,
* much to do other than move on to the next sequence
* number.
*/
- if (pass == PASS_SCAN &&
- jbd2_has_feature_checksum(journal)) {
+ if (jbd2_has_feature_checksum(journal)) {
struct commit_header *cbh =
(struct commit_header *)bh->b_data;
unsigned found_chksum =
@@ -790,7 +832,6 @@ static int do_one_pass(journal_t *journal,
if (info->end_transaction) {
journal->j_failed_commit =
info->end_transaction;
- brelse(bh);
break;
}
@@ -806,33 +847,45 @@ static int do_one_pass(journal_t *journal,
goto chksum_error;
crc32_sum = ~0;
+ goto chksum_ok;
}
- if (pass == PASS_SCAN &&
- !jbd2_commit_block_csum_verify(journal,
- bh->b_data)) {
- chksum_error:
- if (commit_time < last_trans_commit_time)
- goto ignore_crc_mismatch;
- info->end_transaction = next_commit_ID;
- info->head_block = head_block;
-
- if (!jbd2_has_feature_async_commit(journal)) {
- journal->j_failed_commit =
- next_commit_ID;
- brelse(bh);
- break;
- }
+
+ if (jbd2_commit_block_csum_verify(journal, bh->b_data))
+ goto chksum_ok;
+
+ if (jbd2_commit_block_csum_verify_partial(journal,
+ bh->b_data)) {
+ pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n",
+ next_commit_ID, next_log_block);
+ goto chksum_ok;
}
- if (pass == PASS_SCAN) {
- last_trans_commit_time = commit_time;
- head_block = next_log_block;
+
+chksum_error:
+ if (commit_time < last_trans_commit_time)
+ goto ignore_crc_mismatch;
+ info->end_transaction = next_commit_ID;
+ info->head_block = head_block;
+
+ if (!jbd2_has_feature_async_commit(journal)) {
+ journal->j_failed_commit = next_commit_ID;
+ break;
}
- brelse(bh);
+
+chksum_ok:
+ last_trans_commit_time = commit_time;
+ head_block = next_log_block;
next_commit_ID++;
continue;
case JBD2_REVOKE_BLOCK:
/*
+ * If we aren't in the SCAN or REVOKE pass, then we can
+ * just skip over this block.
+ */
+ if (pass != PASS_REVOKE && pass != PASS_SCAN)
+ continue;
+
+ /*
* Check revoke block crc in pass_scan, if csum verify
* failed, check commit block time later.
*/
@@ -843,16 +896,9 @@ static int do_one_pass(journal_t *journal,
next_log_block);
need_check_commit_time = true;
}
- /* If we aren't in the REVOKE pass, then we can
- * just skip over this block. */
- if (pass != PASS_REVOKE) {
- brelse(bh);
- continue;
- }
- err = scan_revoke_records(journal, bh,
+ err = scan_revoke_records(journal, pass, bh,
next_commit_ID, info);
- brelse(bh);
if (err)
goto failed;
continue;
@@ -860,12 +906,12 @@ static int do_one_pass(journal_t *journal,
default:
jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
blocktype);
- brelse(bh);
goto done;
}
}
done:
+ brelse(bh);
/*
* We broke out of the log scan loop: either we came to the
* known end of the log or we found an unexpected block in the
@@ -896,18 +942,18 @@ static int do_one_pass(journal_t *journal,
success = err;
}
- if (block_error && success == 0)
- success = -EIO;
return success;
failed:
+ brelse(bh);
return err;
}
/* Scan a revoke record, marking all blocks mentioned as revoked. */
-static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
- tid_t sequence, struct recovery_info *info)
+static int scan_revoke_records(journal_t *journal, enum passtype pass,
+ struct buffer_head *bh, tid_t sequence,
+ struct recovery_info *info)
{
jbd2_journal_revoke_header_t *header;
int offset, max;
@@ -928,6 +974,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
if (jbd2_has_feature_64bit(journal))
record_len = 8;
+ if (pass == PASS_SCAN) {
+ info->nr_revokes += (max - offset) / record_len;
+ return 0;
+ }
+
while (offset + record_len <= max) {
unsigned long long blocknr;
int err;
@@ -940,7 +991,6 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
err = jbd2_journal_set_revoke(journal, blocknr, sequence);
if (err)
return err;
- ++info->nr_revokes;
}
return 0;
}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 4556e4689024..1467f6790747 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -215,7 +215,7 @@ int __init jbd2_journal_init_revoke_table_cache(void)
return 0;
}
-static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
+struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
{
int shift = 0;
int tmp = hash_size;
@@ -231,7 +231,7 @@ static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
table->hash_size = hash_size;
table->hash_shift = shift;
table->hash_table =
- kmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL);
+ kvmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL);
if (!table->hash_table) {
kmem_cache_free(jbd2_revoke_table_cache, table);
table = NULL;
@@ -245,7 +245,7 @@ out:
return table;
}
-static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
+void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
{
int i;
struct list_head *hash_list;
@@ -255,7 +255,7 @@ static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
J_ASSERT(list_empty(hash_list));
}
- kfree(table->hash_table);
+ kvfree(table->hash_table);
kmem_cache_free(jbd2_revoke_table_cache, table);
}
@@ -345,7 +345,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
bh = bh_in;
if (!bh) {
- bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
+ bh = __find_get_block_nonatomic(bdev, blocknr,
+ journal->j_blocksize);
if (bh)
BUFFER_TRACE(bh, "found on hash");
}
@@ -355,7 +356,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
/* If there is a different buffer_head lying around in
* memory anywhere... */
- bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
+ bh2 = __find_get_block_nonatomic(bdev, blocknr,
+ journal->j_blocksize);
if (bh2) {
/* ... and it has RevokeValid status... */
if (bh2 != bh && buffer_revokevalid(bh2))
@@ -420,12 +422,11 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
* do not trust the Revoked bit on buffers unless RevokeValid is also
* set.
*/
-int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
{
struct jbd2_revoke_record_s *record;
journal_t *journal = handle->h_transaction->t_journal;
int need_cancel;
- int did_revoke = 0; /* akpm: debug */
struct buffer_head *bh = jh2bh(jh);
jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh);
@@ -450,7 +451,6 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
list_del(&record->hash);
spin_unlock(&journal->j_revoke_lock);
kmem_cache_free(jbd2_revoke_record_cache, record);
- did_revoke = 1;
}
}
@@ -466,18 +466,18 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
* state machine will get very upset later on. */
if (need_cancel) {
struct buffer_head *bh2;
- bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
+ bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr,
+ bh->b_size);
if (bh2) {
if (bh2 != bh)
clear_buffer_revoked(bh2);
__brelse(bh2);
}
}
- return did_revoke;
}
/*
- * journal_clear_revoked_flag clears revoked flag of buffers in
+ * jbd2_clear_buffer_revoked_flags clears revoked flag of buffers in
* revoke table to reflect there is no revoked buffers in the next
* transaction which is going to be started.
*/
@@ -495,9 +495,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal)
struct jbd2_revoke_record_s *record;
struct buffer_head *bh;
record = (struct jbd2_revoke_record_s *)list_entry;
- bh = __find_get_block(journal->j_fs_dev,
- record->blocknr,
- journal->j_blocksize);
+ bh = __find_get_block_nonatomic(journal->j_fs_dev,
+ record->blocknr,
+ journal->j_blocksize);
if (bh) {
clear_buffer_revoked(bh);
__brelse(bh);
@@ -506,9 +506,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal)
}
}
-/* journal_switch_revoke table select j_revoke for next transaction
- * we do not want to suspend any processing until all revokes are
- * written -bzzz
+/* jbd2_journal_switch_revoke_table table select j_revoke for next
+ * transaction we do not want to suspend any processing until all
+ * revokes are written -bzzz
*/
void jbd2_journal_switch_revoke_table(journal_t *journal)
{
@@ -654,7 +654,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(descriptor);
BUFFER_TRACE(descriptor, "write");
set_buffer_dirty(descriptor);
- write_dirty_buffer(descriptor, REQ_SYNC);
+ write_dirty_buffer(descriptor, JBD2_JOURNAL_REQ_FLAGS);
}
#endif
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index cb0b8d6fc0c6..c7867139af69 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -63,28 +63,6 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
}
/*
- * Base amount of descriptor blocks we reserve for each transaction.
- */
-static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
-{
- int tag_space = journal->j_blocksize - sizeof(journal_header_t);
- int tags_per_block;
-
- /* Subtract UUID */
- tag_space -= 16;
- if (jbd2_journal_has_csum_v2or3(journal))
- tag_space -= sizeof(struct jbd2_journal_block_tail);
- /* Commit code leaves a slack space of 16 bytes at the end of block */
- tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
- /*
- * Revoke descriptors are accounted separately so we need to reserve
- * space for commit block and normal transaction descriptor blocks.
- */
- return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
- tags_per_block);
-}
-
-/*
* jbd2_get_transaction: obtain a new transaction_t object.
*
* Simply initialise a new transaction. Initialize it in
@@ -109,12 +87,11 @@ static void jbd2_get_transaction(journal_t *journal,
transaction->t_expires = jiffies + journal->j_commit_interval;
atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits,
- jbd2_descriptor_blocks_per_trans(journal) +
+ journal->j_transaction_overhead_buffers +
atomic_read(&journal->j_reserved_credits));
atomic_set(&transaction->t_outstanding_revokes, 0);
atomic_set(&transaction->t_handle_count, 0);
INIT_LIST_HEAD(&transaction->t_inode_list);
- INIT_LIST_HEAD(&transaction->t_private_list);
/* Set up the commit timer for the new transaction. */
journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
@@ -136,12 +113,9 @@ static void jbd2_get_transaction(journal_t *journal,
*/
/*
- * Update transaction's maximum wait time, if debugging is enabled.
- *
* t_max_wait is carefully updated here with use of atomic compare exchange.
* Note that there could be multiplre threads trying to do this simultaneously
* hence using cmpxchg to avoid any use of locks in this case.
- * With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug.
*/
static inline void update_t_max_wait(transaction_t *transaction,
unsigned long ts)
@@ -213,6 +187,13 @@ static void sub_reserved_credits(journal_t *journal, int blocks)
wake_up(&journal->j_wait_reserved);
}
+/* Maximum number of blocks for user transaction payload */
+static int jbd2_max_user_trans_buffers(journal_t *journal)
+{
+ return journal->j_max_transaction_buffers -
+ journal->j_transaction_overhead_buffers;
+}
+
/*
* Wait until we can add credits for handle to the running transaction. Called
* with j_state_lock held for reading. Returns 0 if handle joined the running
@@ -262,12 +243,12 @@ __must_hold(&journal->j_state_lock)
* big to fit this handle? Wait until reserved credits are freed.
*/
if (atomic_read(&journal->j_reserved_credits) + total >
- journal->j_max_transaction_buffers) {
+ jbd2_max_user_trans_buffers(journal)) {
read_unlock(&journal->j_state_lock);
jbd2_might_wait_for_commit(journal);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + total <=
- journal->j_max_transaction_buffers);
+ jbd2_max_user_trans_buffers(journal));
__acquire(&journal->j_state_lock); /* fake out sparse */
return 1;
}
@@ -307,14 +288,14 @@ __must_hold(&journal->j_state_lock)
needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
/* We allow at most half of a transaction to be reserved */
- if (needed > journal->j_max_transaction_buffers / 2) {
+ if (needed > jbd2_max_user_trans_buffers(journal) / 2) {
sub_reserved_credits(journal, rsv_blocks);
atomic_sub(total, &t->t_outstanding_credits);
read_unlock(&journal->j_state_lock);
jbd2_might_wait_for_commit(journal);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + rsv_blocks
- <= journal->j_max_transaction_buffers / 2);
+ <= jbd2_max_user_trans_buffers(journal) / 2);
__acquire(&journal->j_state_lock); /* fake out sparse */
return 1;
}
@@ -344,12 +325,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
* size and limit the number of total credits to not exceed maximum
* transaction size per operation.
*/
- if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
- (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
+ if (rsv_blocks > jbd2_max_user_trans_buffers(journal) / 2 ||
+ rsv_blocks + blocks > jbd2_max_user_trans_buffers(journal)) {
printk(KERN_ERR "JBD2: %s wants too many credits "
"credits:%d rsv_credits:%d max:%d\n",
current->comm, blocks, rsv_blocks,
- journal->j_max_transaction_buffers);
+ jbd2_max_user_trans_buffers(journal));
WARN_ON(1);
return -ENOSPC;
}
@@ -1528,7 +1509,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
jh->b_next_transaction == transaction);
spin_unlock(&jh->b_state_lock);
}
- if (jh->b_modified == 1) {
+ if (data_race(jh->b_modified == 1)) {
/* If it's in our transaction it must be in BJ_Metadata list. */
if (data_race(jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)) {
@@ -1547,7 +1528,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
goto out;
}
- journal = transaction->t_journal;
spin_lock(&jh->b_state_lock);
if (is_handle_aborted(handle)) {
@@ -1562,6 +1542,8 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
goto out_unlock_bh;
}
+ journal = transaction->t_journal;
+
if (jh->b_modified == 0) {
/*
* This buffer's got modified and becoming part
@@ -2094,21 +2076,6 @@ static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
jh->b_transaction = NULL;
}
-void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
-{
- struct buffer_head *bh = jh2bh(jh);
-
- /* Get reference so that buffer cannot be freed before we unlock it */
- get_bh(bh);
- spin_lock(&jh->b_state_lock);
- spin_lock(&journal->j_list_lock);
- __jbd2_journal_unfile_buffer(jh);
- spin_unlock(&journal->j_list_lock);
- spin_unlock(&jh->b_state_lock);
- jbd2_journal_put_journal_head(jh);
- __brelse(bh);
-}
-
/**
* jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
@@ -2207,7 +2174,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
/*
* We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in
- * __journal_file_buffer
+ * __jbd2_journal_file_buffer
*/
clear_buffer_dirty(bh);
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);