summaryrefslogtreecommitdiff
path: root/include/linux/jbd2.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/jbd2.h')
-rw-r--r--include/linux/jbd2.h1121
1 files changed, 773 insertions, 348 deletions
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d5b50a19463c..f5eaf76198f3 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* linux/include/linux/jbd2.h
*
@@ -5,10 +6,6 @@
*
* Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
*
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
* Definitions for transaction data structures for the buffer cache
* filesystem journaling support.
*/
@@ -29,7 +26,9 @@
#include <linux/mutex.h>
#include <linux/timer.h>
#include <linux/slab.h>
-#include <crypto/hash.h>
+#include <linux/bit_spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/crc32c.h>
#endif
#define journal_oom_retry 1
@@ -55,20 +54,20 @@
* CONFIG_JBD2_DEBUG is on.
*/
#define JBD2_EXPENSIVE_CHECKING
-extern ushort jbd2_journal_enable_debug;
void __jbd2_debug(int level, const char *file, const char *func,
unsigned int line, const char *fmt, ...);
-#define jbd_debug(n, fmt, a...) \
+#define jbd2_debug(n, fmt, a...) \
__jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a)
#else
-#define jbd_debug(n, fmt, a...) /**/
+#define jbd2_debug(n, fmt, a...) no_printk(fmt, ##a)
#endif
extern void *jbd2_alloc(size_t size, gfp_t flags);
extern void jbd2_free(void *ptr, size_t size);
#define JBD2_MIN_JOURNAL_BLOCKS 1024
+#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256
#ifdef __KERNEL__
@@ -159,7 +158,11 @@ typedef struct journal_header_s
* journal_block_tag (in the descriptor). The other h_chksum* fields are
* not used.
*
- * Checksum v1 and v2 are mutually exclusive features.
+ * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses
+ * journal_block_tag3_t to store a full 32-bit checksum. Everything else
+ * is the same as v2.
+ *
+ * Checksum v1, v2, and v3 are mutually exclusive features.
*/
struct commit_header {
__be32 h_magic;
@@ -179,6 +182,14 @@ struct commit_header {
* raw struct shouldn't be used for pointer math or sizeof() - use
* journal_tag_bytes(journal) instead to compute this.
*/
+typedef struct journal_block_tag3_s
+{
+ __be32 t_blocknr; /* The on-disk block number */
+ __be32 t_flags; /* See below */
+ __be32 t_blocknr_high; /* most-significant high 32bits. */
+ __be32 t_checksum; /* crc32c(uuid+seq+block) */
+} journal_block_tag3_t;
+
typedef struct journal_block_tag_s
{
__be32 t_blocknr; /* The on-disk block number */
@@ -187,10 +198,7 @@ typedef struct journal_block_tag_s
__be32 t_blocknr_high; /* most-significant high 32bits. */
} journal_block_tag_t;
-#define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
-#define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
-
-/* Tail of descriptor block, for checksumming */
+/* Tail of descriptor or revoke block, for checksumming */
struct jbd2_journal_block_tail {
__be32 t_checksum; /* crc32c(uuid+descr_block) */
};
@@ -205,11 +213,6 @@ typedef struct jbd2_journal_revoke_header_s
__be32 r_count; /* Count of bytes used in the block */
} jbd2_journal_revoke_header_t;
-/* Tail of revoke block, for checksumming */
-struct jbd2_journal_revoke_tail {
- __be32 r_checksum; /* crc32c(uuid+revoke_block) */
-};
-
/* Definitions for the journal tag flags word: */
#define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */
#define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */
@@ -260,7 +263,12 @@ typedef struct journal_superblock_s
/* 0x0050 */
__u8 s_checksum_type; /* checksum type */
__u8 s_padding2[3];
- __u32 s_padding[42];
+/* 0x0054 */
+ __be32 s_num_fc_blks; /* Number of fast commit blocks */
+ __be32 s_head; /* blocknr of head of log, only uptodate
+ * while the filesystem is clean */
+/* 0x005C */
+ __u32 s_padding[40];
__be32 s_checksum; /* crc32c(superblock) */
/* 0x0100 */
@@ -268,22 +276,16 @@ typedef struct journal_superblock_s
/* 0x0400 */
} journal_superblock_t;
-#define JBD2_HAS_COMPAT_FEATURE(j,mask) \
- ((j)->j_format_version >= 2 && \
- ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
-#define JBD2_HAS_RO_COMPAT_FEATURE(j,mask) \
- ((j)->j_format_version >= 2 && \
- ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
-#define JBD2_HAS_INCOMPAT_FEATURE(j,mask) \
- ((j)->j_format_version >= 2 && \
- ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
-
-#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001
+#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001
#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
#define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008
+#define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010
+#define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020
+
+/* See "journal feature predicate functions" below */
/* Features known to this kernel version: */
#define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM
@@ -291,7 +293,9 @@ typedef struct journal_superblock_s
#define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \
JBD2_FEATURE_INCOMPAT_64BIT | \
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
- JBD2_FEATURE_INCOMPAT_CSUM_V2)
+ JBD2_FEATURE_INCOMPAT_CSUM_V2 | \
+ JBD2_FEATURE_INCOMPAT_CSUM_V3 | \
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
#ifdef __KERNEL__
@@ -306,7 +310,6 @@ enum jbd_state_bits {
BH_Revoked, /* Has been revoked from the log */
BH_RevokeValid, /* Revoked flag is valid */
BH_JBDDirty, /* Is dirty but journaled */
- BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Shadow, /* IO on shadow buffer is running */
BH_Verified, /* Metadata block has been verified ok */
@@ -325,7 +328,25 @@ BUFFER_FNS(Freed, freed)
BUFFER_FNS(Shadow, shadow)
BUFFER_FNS(Verified, verified)
-#include <linux/jbd_common.h>
+static inline struct buffer_head *jh2bh(struct journal_head *jh)
+{
+ return jh->b_bh;
+}
+
+static inline struct journal_head *bh2jh(struct buffer_head *bh)
+{
+ return bh->b_private;
+}
+
+static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+{
+ bit_spin_lock(BH_JournalHead, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+{
+ bit_spin_unlock(BH_JournalHead, &bh->b_state);
+}
#define J_ASSERT(assert) BUG_ON(!(assert))
@@ -355,48 +376,97 @@ BUFFER_FNS(Verified, verified)
/* Flags in jbd_inode->i_flags */
#define __JI_COMMIT_RUNNING 0
-/* Commit of the inode data in progress. We use this flag to protect us from
+#define __JI_WRITE_DATA 1
+#define __JI_WAIT_DATA 2
+
+/*
+ * Commit of the inode data in progress. We use this flag to protect us from
* concurrent deletion of inode. We cannot use reference to inode for this
* since we cannot afford doing last iput() on behalf of kjournald
*/
#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
+/* Write allocated dirty buffers in this inode before commit */
+#define JI_WRITE_DATA (1 << __JI_WRITE_DATA)
+/* Wait for outstanding data writes for this inode before commit */
+#define JI_WAIT_DATA (1 << __JI_WAIT_DATA)
/**
- * struct jbd_inode is the structure linking inodes in ordered mode
- * present in a transaction so that we can sync them during commit.
+ * struct jbd2_inode - The jbd_inode type is the structure linking inodes in
+ * ordered mode present in a transaction so that we can sync them during commit.
*/
struct jbd2_inode {
- /* Which transaction does this inode belong to? Either the running
- * transaction or the committing one. [j_list_lock] */
+ /**
+ * @i_transaction:
+ *
+ * Which transaction does this inode belong to? Either the running
+ * transaction or the committing one. [j_list_lock]
+ */
transaction_t *i_transaction;
- /* Pointer to the running transaction modifying inode's data in case
- * there is already a committing transaction touching it. [j_list_lock] */
+ /**
+ * @i_next_transaction:
+ *
+ * Pointer to the running transaction modifying inode's data in case
+ * there is already a committing transaction touching it. [j_list_lock]
+ */
transaction_t *i_next_transaction;
- /* List of inodes in the i_transaction [j_list_lock] */
+ /**
+ * @i_list: List of inodes in the i_transaction [j_list_lock]
+ */
struct list_head i_list;
- /* VFS inode this inode belongs to [constant during the lifetime
- * of the structure] */
+ /**
+ * @i_vfs_inode:
+ *
+ * VFS inode this inode belongs to [constant for lifetime of structure]
+ */
struct inode *i_vfs_inode;
- /* Flags of inode [j_list_lock] */
+ /**
+ * @i_flags: Flags of inode [j_list_lock]
+ */
unsigned long i_flags;
+
+ /**
+ * @i_dirty_start:
+ *
+ * Offset in bytes where the dirty range for this inode starts.
+ * [j_list_lock]
+ */
+ loff_t i_dirty_start;
+
+ /**
+ * @i_dirty_end:
+ *
+ * Inclusive offset in bytes where the dirty range for this inode
+ * ends. [j_list_lock]
+ */
+ loff_t i_dirty_end;
};
struct jbd2_revoke_table_s;
/**
- * struct handle_s - The handle_s type is the concrete type associated with
- * handle_t.
+ * struct jbd2_journal_handle - The jbd2_journal_handle type is the concrete
+ * type associated with handle_t.
* @h_transaction: Which compound transaction is this update a part of?
- * @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
- * @h_ref: Reference count on this handle
- * @h_err: Field for caller's use to track errors through large fs operations
- * @h_sync: flag for sync-on-close
- * @h_jdata: flag to force data journaling
- * @h_aborted: flag indicating fatal error on handle
+ * @h_journal: Which journal handle belongs to - used iff h_reserved set.
+ * @h_rsv_handle: Handle reserved for finishing the logical operation.
+ * @h_total_credits: Number of remaining buffers we are allowed to add to
+ * journal. These are dirty buffers and revoke descriptor blocks.
+ * @h_revoke_credits: Number of remaining revoke records available for handle
+ * @h_ref: Reference count on this handle.
+ * @h_err: Field for caller's use to track errors through large fs operations.
+ * @h_sync: Flag for sync-on-close.
+ * @h_reserved: Flag for handle for reserved credits.
+ * @h_aborted: Flag indicating fatal error on handle.
+ * @h_type: For handle statistics.
+ * @h_line_no: For handle statistics.
+ * @h_start_jiffies: Handle Start time.
+ * @h_requested_credits: Holds @h_total_credits after handle is started.
+ * @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started.
+ * @saved_alloc_context: Saved context while transaction is open.
**/
/* Docbook can't yet cope with the bit fields, but will leave the documentation
@@ -406,39 +476,29 @@ struct jbd2_revoke_table_s;
struct jbd2_journal_handle
{
union {
- /* Which compound transaction is this update a part of? */
transaction_t *h_transaction;
/* Which journal handle belongs to - used iff h_reserved set */
journal_t *h_journal;
};
- /* Handle reserved for finishing the logical operation */
handle_t *h_rsv_handle;
-
- /* Number of remaining buffers we are allowed to dirty: */
- int h_buffer_credits;
-
- /* Reference count on this handle */
+ int h_total_credits;
+ int h_revoke_credits;
+ int h_revoke_credits_requested;
int h_ref;
-
- /* Field for caller's use to track errors through large fs */
- /* operations */
int h_err;
/* Flags [no locking] */
- unsigned int h_sync: 1; /* sync-on-close */
- unsigned int h_jdata: 1; /* force data journaling */
- unsigned int h_reserved: 1; /* handle with reserved credits */
- unsigned int h_aborted: 1; /* fatal error on handle */
- unsigned int h_type: 8; /* for handle statistics */
- unsigned int h_line_no: 16; /* for handle statistics */
+ unsigned int h_sync: 1;
+ unsigned int h_reserved: 1;
+ unsigned int h_aborted: 1;
+ unsigned int h_type: 8;
+ unsigned int h_line_no: 16;
unsigned long h_start_jiffies;
unsigned int h_requested_credits;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map h_lockdep_map;
-#endif
+ unsigned int saved_alloc_context;
};
@@ -466,6 +526,7 @@ struct transaction_chp_stats_s {
* The transaction keeps track of all of the buffers modified by a
* running transaction, and all of the buffers committed but not yet
* flushed to home for finished transactions.
+ * (Locking Documentation improved by LockDoc)
*/
/*
@@ -475,15 +536,12 @@ struct transaction_chp_stats_s {
* ->jbd_lock_bh_journal_head() (This is "innermost")
*
* j_state_lock
- * ->jbd_lock_bh_state()
+ * ->b_state_lock
*
- * jbd_lock_bh_state()
+ * b_state_lock
* ->j_list_lock
*
* j_state_lock
- * ->t_handle_lock
- *
- * j_state_lock
* ->j_list_lock (journal_unmap_buffer)
*
*/
@@ -507,6 +565,7 @@ struct transaction_s
enum {
T_RUNNING,
T_LOCKED,
+ T_SWITCH,
T_FLUSH,
T_COMMIT,
T_COMMIT_DFLUSH,
@@ -520,18 +579,22 @@ struct transaction_s
*/
unsigned long t_log_start;
- /* Number of buffers on the t_buffers list [j_list_lock] */
+ /*
+ * Number of buffers on the t_buffers list [j_list_lock, no locks
+ * needed for jbd2 thread]
+ */
int t_nr_buffers;
/*
* Doubly-linked circular list of all buffers reserved but not yet
- * modified by this transaction [j_list_lock]
+ * modified by this transaction [j_list_lock, no locks needed fo
+ * jbd2 thread]
*/
struct journal_head *t_reserved_list;
/*
* Doubly-linked circular list of all metadata buffers owned by this
- * transaction [j_list_lock]
+ * transaction [j_list_lock, no locks needed for jbd2 thread]
*/
struct journal_head *t_buffers;
@@ -549,30 +612,23 @@ struct transaction_s
struct journal_head *t_checkpoint_list;
/*
- * Doubly-linked circular list of all buffers submitted for IO while
- * checkpointing. [j_list_lock]
- */
- struct journal_head *t_checkpoint_io_list;
-
- /*
- * Doubly-linked circular list of metadata buffers being shadowed by log
- * IO. The IO buffers on the iobuf list and the shadow buffers on this
- * list match each other one for one at all times. [j_list_lock]
+ * Doubly-linked circular list of metadata buffers being
+ * shadowed by log IO. The IO buffers on the iobuf list and
+ * the shadow buffers on this list match each other one for
+ * one at all times. [j_list_lock, no locks needed for jbd2
+ * thread]
*/
struct journal_head *t_shadow_list;
/*
- * List of inodes whose data we've modified in data=ordered mode.
+ * List of inodes associated with the transaction; e.g., ext4 uses
+ * this to track inodes in data=ordered and data=journal mode that
+ * need special handling on transaction commit; also used by ocfs2.
* [j_list_lock]
*/
struct list_head t_inode_list;
/*
- * Protects info related to handles
- */
- spinlock_t t_handle_lock;
-
- /*
* Longest time some handle had to wait for running transaction
*/
unsigned long t_max_wait;
@@ -583,28 +639,41 @@ struct transaction_s
unsigned long t_start;
/*
- * When commit was requested
+ * When commit was requested [j_state_lock]
*/
unsigned long t_requested;
/*
- * Checkpointing stats [j_checkpoint_sem]
+ * Checkpointing stats [j_list_lock]
*/
struct transaction_chp_stats_s t_chp_stats;
/*
* Number of outstanding updates running on this transaction
- * [t_handle_lock]
+ * [none]
*/
atomic_t t_updates;
/*
- * Number of buffers reserved for use by all handles in this transaction
- * handle but not yet modified. [t_handle_lock]
+ * Number of blocks reserved for this transaction in the journal.
+ * This is including all credits reserved when starting transaction
+ * handles as well as all journal descriptor blocks needed for this
+ * transaction. [none]
*/
atomic_t t_outstanding_credits;
/*
+ * Number of revoke records for this transaction added by already
+ * stopped handles. [none]
+ */
+ atomic_t t_outstanding_revokes;
+
+ /*
+ * How many handles used this transaction? [none]
+ */
+ atomic_t t_handle_count;
+
+ /*
* Forward and backward links for the circular list of all transactions
* awaiting checkpoint. [j_list_lock]
*/
@@ -622,11 +691,6 @@ struct transaction_s
ktime_t t_start_time;
/*
- * How many handles used this transaction? [t_handle_lock]
- */
- atomic_t t_handle_count;
-
- /*
* This transaction is being forced and some process is
* waiting for it to finish.
*/
@@ -634,12 +698,6 @@ struct transaction_s
/* Disk flush needs to be sent to fs partition [no locking] */
int t_need_data_flush;
-
- /*
- * For use by the filesystem to store fs-specific data
- * structures associated with the transaction
- */
- struct list_head t_private_list;
};
struct transaction_run_stats_s {
@@ -672,233 +730,326 @@ jbd2_time_diff(unsigned long start, unsigned long end)
#define JBD2_NR_BATCH 64
+enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
+
+#define JBD2_FC_REPLAY_STOP 0
+#define JBD2_FC_REPLAY_CONTINUE 1
+
/**
* struct journal_s - The journal_s type is the concrete type associated with
* journal_t.
- * @j_flags: General journaling state flags
- * @j_errno: Is there an outstanding uncleared error on the journal (from a
- * prior abort)?
- * @j_sb_buffer: First part of superblock buffer
- * @j_superblock: Second part of superblock buffer
- * @j_format_version: Version of the superblock format
- * @j_state_lock: Protect the various scalars in the journal
- * @j_barrier_count: Number of processes waiting to create a barrier lock
- * @j_barrier: The barrier lock itself
- * @j_running_transaction: The current running transaction..
- * @j_committing_transaction: the transaction we are pushing to disk
- * @j_checkpoint_transactions: a linked circular list of all transactions
- * waiting for checkpointing
- * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction
- * to start committing, or for a barrier lock to be released
- * @j_wait_done_commit: Wait queue for waiting for commit to complete
- * @j_wait_commit: Wait queue to trigger commit
- * @j_wait_updates: Wait queue to wait for updates to complete
- * @j_wait_reserved: Wait queue to wait for reserved buffer credits to drop
- * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints
- * @j_head: Journal head - identifies the first unused block in the journal
- * @j_tail: Journal tail - identifies the oldest still-used block in the
- * journal.
- * @j_free: Journal free - how many free blocks are there in the journal?
- * @j_first: The block number of the first usable block
- * @j_last: The block number one beyond the last usable block
- * @j_dev: Device where we store the journal
- * @j_blocksize: blocksize for the location where we store the journal.
- * @j_blk_offset: starting block offset for into the device where we store the
- * journal
- * @j_fs_dev: Device which holds the client fs. For internal journal this will
- * be equal to j_dev
- * @j_reserved_credits: Number of buffers reserved from the running transaction
- * @j_maxlen: Total maximum capacity of the journal region on disk.
- * @j_list_lock: Protects the buffer lists and internal buffer state.
- * @j_inode: Optional inode where we store the journal. If present, all journal
- * block numbers are mapped into this inode via bmap().
- * @j_tail_sequence: Sequence number of the oldest transaction in the log
- * @j_transaction_sequence: Sequence number of the next transaction to grant
- * @j_commit_sequence: Sequence number of the most recently committed
- * transaction
- * @j_commit_request: Sequence number of the most recent transaction wanting
- * commit
- * @j_uuid: Uuid of client object.
- * @j_task: Pointer to the current commit thread for this journal
- * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a
- * single compound commit transaction
- * @j_commit_interval: What is the maximum transaction lifetime before we begin
- * a commit?
- * @j_commit_timer: The timer used to wakeup the commit thread
- * @j_revoke_lock: Protect the revoke table
- * @j_revoke: The revoke table - maintains the list of revoked blocks in the
- * current transaction.
- * @j_revoke_table: alternate revoke tables for j_revoke
- * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction
- * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
- * number that will fit in j_blocksize
- * @j_last_sync_writer: most recent pid which did a synchronous write
- * @j_history: Buffer storing the transactions statistics history
- * @j_history_max: Maximum number of transactions in the statistics history
- * @j_history_cur: Current number of transactions in the statistics history
- * @j_history_lock: Protect the transactions statistics history
- * @j_proc_entry: procfs entry for the jbd statistics directory
- * @j_stats: Overall statistics
- * @j_private: An opaque pointer to fs-private information.
*/
-
struct journal_s
{
- /* General journaling state flags [j_state_lock] */
+ /**
+ * @j_flags: General journaling state flags [j_state_lock,
+ * no lock for quick racy checks]
+ */
unsigned long j_flags;
- /*
+ /**
+ * @j_errno:
+ *
* Is there an outstanding uncleared error on the journal (from a prior
* abort)? [j_state_lock]
*/
int j_errno;
- /* The superblock buffer */
+ /**
+ * @j_abort_mutex: Lock the whole aborting procedure.
+ */
+ struct mutex j_abort_mutex;
+
+ /**
+ * @j_sb_buffer: The first part of the superblock buffer.
+ */
struct buffer_head *j_sb_buffer;
- journal_superblock_t *j_superblock;
- /* Version of the superblock format */
- int j_format_version;
+ /**
+ * @j_superblock: The second part of the superblock buffer.
+ */
+ journal_superblock_t *j_superblock;
- /*
- * Protect the various scalars in the journal
+ /**
+ * @j_state_lock: Protect the various scalars in the journal.
*/
rwlock_t j_state_lock;
- /*
- * Number of processes waiting to create a barrier lock [j_state_lock]
+ /**
+ * @j_barrier_count:
+ *
+ * Number of processes waiting to create a barrier lock [j_state_lock,
+ * no lock for quick racy checks]
*/
int j_barrier_count;
- /* The barrier lock itself */
+ /**
+ * @j_barrier: The barrier lock itself.
+ */
struct mutex j_barrier;
- /*
+ /**
+ * @j_running_transaction:
+ *
* Transactions: The current running transaction...
- * [j_state_lock] [caller holding open handle]
+ * [j_state_lock, no lock for quick racy checks] [caller holding
+ * open handle]
*/
transaction_t *j_running_transaction;
- /*
+ /**
+ * @j_committing_transaction:
+ *
* the transaction we are pushing to disk
* [j_state_lock] [caller holding open handle]
*/
transaction_t *j_committing_transaction;
- /*
+ /**
+ * @j_checkpoint_transactions:
+ *
* ... and a linked circular list of all transactions waiting for
* checkpointing. [j_list_lock]
*/
transaction_t *j_checkpoint_transactions;
- /*
+ /**
+ * @j_wait_transaction_locked:
+ *
* Wait queue for waiting for a locked transaction to start committing,
- * or for a barrier lock to be released
+ * or for a barrier lock to be released.
*/
wait_queue_head_t j_wait_transaction_locked;
- /* Wait queue for waiting for commit to complete */
+ /**
+ * @j_wait_done_commit: Wait queue for waiting for commit to complete.
+ */
wait_queue_head_t j_wait_done_commit;
- /* Wait queue to trigger commit */
+ /**
+ * @j_wait_commit: Wait queue to trigger commit.
+ */
wait_queue_head_t j_wait_commit;
- /* Wait queue to wait for updates to complete */
+ /**
+ * @j_wait_updates: Wait queue to wait for updates to complete.
+ */
wait_queue_head_t j_wait_updates;
- /* Wait queue to wait for reserved buffer credits to drop */
+ /**
+ * @j_wait_reserved:
+ *
+ * Wait queue to wait for reserved buffer credits to drop.
+ */
wait_queue_head_t j_wait_reserved;
- /* Semaphore for locking against concurrent checkpoints */
+ /**
+ * @j_fc_wait:
+ *
+ * Wait queue to wait for completion of async fast commits.
+ */
+ wait_queue_head_t j_fc_wait;
+
+ /**
+ * @j_checkpoint_mutex:
+ *
+ * Semaphore for locking against concurrent checkpoints.
+ */
struct mutex j_checkpoint_mutex;
- /*
+ /**
+ * @j_chkpt_bhs:
+ *
* List of buffer heads used by the checkpoint routine. This
* was moved from jbd2_log_do_checkpoint() to reduce stack
* usage. Access to this array is controlled by the
- * j_checkpoint_mutex. [j_checkpoint_mutex]
+ * @j_checkpoint_mutex. [j_checkpoint_mutex]
*/
struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH];
-
- /*
+
+ /**
+ * @j_shrinker:
+ *
+ * Journal head shrinker, reclaim buffer's journal head which
+ * has been written back.
+ */
+ struct shrinker *j_shrinker;
+
+ /**
+ * @j_checkpoint_jh_count:
+ *
+ * Number of journal buffers on the checkpoint list. [j_list_lock]
+ */
+ struct percpu_counter j_checkpoint_jh_count;
+
+ /**
+ * @j_shrink_transaction:
+ *
+ * Record next transaction will shrink on the checkpoint list.
+ * [j_list_lock]
+ */
+ transaction_t *j_shrink_transaction;
+
+ /**
+ * @j_head:
+ *
* Journal head: identifies the first unused block in the journal.
* [j_state_lock]
*/
unsigned long j_head;
- /*
+ /**
+ * @j_tail:
+ *
* Journal tail: identifies the oldest still-used block in the journal.
* [j_state_lock]
*/
unsigned long j_tail;
- /*
+ /**
+ * @j_free:
+ *
* Journal free: how many free blocks are there in the journal?
* [j_state_lock]
*/
unsigned long j_free;
- /*
- * Journal start and end: the block numbers of the first usable block
- * and one beyond the last usable block in the journal. [j_state_lock]
+ /**
+ * @j_first:
+ *
+ * The block number of the first usable block in the journal
+ * [j_state_lock].
*/
unsigned long j_first;
+
+ /**
+ * @j_last:
+ *
+ * The block number one beyond the last usable block in the journal
+ * [j_state_lock].
+ */
unsigned long j_last;
- /*
- * Device, blocksize and starting block offset for the location where we
- * store the journal.
+ /**
+ * @j_fc_first:
+ *
+ * The block number of the first fast commit block in the journal
+ * [j_state_lock].
+ */
+ unsigned long j_fc_first;
+
+ /**
+ * @j_fc_off:
+ *
+ * Number of fast commit blocks currently allocated. Accessed only
+ * during fast commit. Currently only process can do fast commit, so
+ * this field is not protected by any lock.
+ */
+ unsigned long j_fc_off;
+
+ /**
+ * @j_fc_last:
+ *
+ * The block number one beyond the last fast commit block in the journal
+ * [j_state_lock].
+ */
+ unsigned long j_fc_last;
+
+ /**
+ * @j_dev: Device where we store the journal.
*/
struct block_device *j_dev;
+
+ /**
+ * @j_blocksize: Block size for the location where we store the journal.
+ */
int j_blocksize;
+
+ /**
+ * @j_blk_offset:
+ *
+ * Starting block offset into the device where we store the journal.
+ */
unsigned long long j_blk_offset;
+
+ /**
+ * @j_devname: Journal device name.
+ */
char j_devname[BDEVNAME_SIZE+24];
- /*
+ /**
+ * @j_fs_dev:
+ *
* Device which holds the client fs. For internal journal this will be
* equal to j_dev.
*/
struct block_device *j_fs_dev;
- /* Total maximum capacity of the journal region on disk. */
- unsigned int j_maxlen;
+ /**
+ * @j_fs_dev_wb_err:
+ *
+ * Records the errseq of the client fs's backing block device.
+ */
+ errseq_t j_fs_dev_wb_err;
+
+ /**
+ * @j_total_len: Total maximum capacity of the journal region on disk.
+ */
+ unsigned int j_total_len;
- /* Number of buffers reserved from the running transaction */
+ /**
+ * @j_reserved_credits:
+ *
+ * Number of buffers reserved from the running transaction.
+ */
atomic_t j_reserved_credits;
- /*
- * Protects the buffer lists and internal buffer state.
+ /**
+ * @j_list_lock: Protects the buffer lists and internal buffer state.
*/
spinlock_t j_list_lock;
- /* Optional inode where we store the journal. If present, all */
- /* journal block numbers are mapped into this inode via */
- /* bmap(). */
+ /**
+ * @j_inode:
+ *
+ * Optional inode where we store the journal. If present, all
+ * journal block numbers are mapped into this inode via bmap().
+ */
struct inode *j_inode;
- /*
+ /**
+ * @j_tail_sequence:
+ *
* Sequence number of the oldest transaction in the log [j_state_lock]
*/
tid_t j_tail_sequence;
- /*
+ /**
+ * @j_transaction_sequence:
+ *
* Sequence number of the next transaction to grant [j_state_lock]
*/
tid_t j_transaction_sequence;
- /*
+ /**
+ * @j_commit_sequence:
+ *
* Sequence number of the most recently committed transaction
- * [j_state_lock].
+ * [j_state_lock, no lock for quick racy checks]
*/
tid_t j_commit_sequence;
- /*
+ /**
+ * @j_commit_request:
+ *
* Sequence number of the most recent transaction wanting commit
- * [j_state_lock]
+ * [j_state_lock, no lock for quick racy checks]
*/
tid_t j_commit_request;
- /*
+ /**
+ * @j_uuid:
+ *
* Journal uuid: identifies the object (filesystem, LVM volume etc)
* backed by this journal. This will eventually be replaced by an array
* of uuids, allowing us to index multiple devices within a single
@@ -906,84 +1057,326 @@ struct journal_s
*/
__u8 j_uuid[16];
- /* Pointer to the current commit thread for this journal */
+ /**
+ * @j_task: Pointer to the current commit thread for this journal.
+ */
struct task_struct *j_task;
- /*
+ /**
+ * @j_max_transaction_buffers:
+ *
* Maximum number of metadata buffers to allow in a single compound
- * commit transaction
+ * commit transaction.
*/
int j_max_transaction_buffers;
- /*
+ /**
+ * @j_revoke_records_per_block:
+ *
+ * Number of revoke records that fit in one descriptor block.
+ */
+ int j_revoke_records_per_block;
+
+ /**
+ * @j_transaction_overhead_buffers:
+ *
+ * Number of blocks each transaction needs for its own bookkeeping
+ */
+ int j_transaction_overhead_buffers;
+
+ /**
+ * @j_commit_interval:
+ *
* What is the maximum transaction lifetime before we begin a commit?
*/
unsigned long j_commit_interval;
- /* The timer used to wakeup the commit thread: */
+ /**
+ * @j_commit_timer: The timer used to wakeup the commit thread.
+ */
struct timer_list j_commit_timer;
- /*
- * The revoke table: maintains the list of revoked blocks in the
- * current transaction. [j_revoke_lock]
+ /**
+ * @j_revoke_lock: Protect the revoke table.
*/
spinlock_t j_revoke_lock;
+
+ /**
+ * @j_revoke:
+ *
+ * The revoke table - maintains the list of revoked blocks in the
+ * current transaction.
+ */
struct jbd2_revoke_table_s *j_revoke;
+
+ /**
+ * @j_revoke_table: Alternate revoke tables for j_revoke.
+ */
struct jbd2_revoke_table_s *j_revoke_table[2];
- /*
- * array of bhs for jbd2_journal_commit_transaction
+ /**
+ * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction.
*/
struct buffer_head **j_wbuf;
+
+ /**
+ * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only
+ * during a fast commit. Currently only process can do fast commit, so
+ * this field is not protected by any lock.
+ */
+ struct buffer_head **j_fc_wbuf;
+
+ /**
+ * @j_wbufsize:
+ *
+ * Size of @j_wbuf array.
+ */
int j_wbufsize;
- /*
- * this is the pid of hte last person to run a synchronous operation
- * through the journal
+ /**
+ * @j_fc_wbufsize:
+ *
+ * Size of @j_fc_wbuf array.
+ */
+ int j_fc_wbufsize;
+
+ /**
+ * @j_last_sync_writer:
+ *
+ * The pid of the last person to run a synchronous operation
+ * through the journal.
*/
pid_t j_last_sync_writer;
- /*
- * the average amount of time in nanoseconds it takes to commit a
+ /**
+ * @j_average_commit_time:
+ *
+ * The average amount of time in nanoseconds it takes to commit a
* transaction to disk. [j_state_lock]
*/
u64 j_average_commit_time;
- /*
- * minimum and maximum times that we should wait for
- * additional filesystem operations to get batched into a
- * synchronous handle in microseconds
+ /**
+ * @j_min_batch_time:
+ *
+ * Minimum time that we should wait for additional filesystem operations
+ * to get batched into a synchronous handle in microseconds.
*/
u32 j_min_batch_time;
+
+ /**
+ * @j_max_batch_time:
+ *
+ * Maximum time that we should wait for additional filesystem operations
+ * to get batched into a synchronous handle in microseconds.
+ */
u32 j_max_batch_time;
- /* This function is called when a transaction is closed */
+ /**
+ * @j_commit_callback:
+ *
+ * This function is called when a transaction is closed.
+ */
void (*j_commit_callback)(journal_t *,
transaction_t *);
+ /**
+ * @j_submit_inode_data_buffers:
+ *
+ * This function is called for all inodes associated with the
+ * committing transaction marked with JI_WRITE_DATA flag
+ * before we start to write out the transaction to the journal.
+ */
+ int (*j_submit_inode_data_buffers)
+ (struct jbd2_inode *);
+
+ /**
+ * @j_finish_inode_data_buffers:
+ *
+ * This function is called for all inodes associated with the
+ * committing transaction marked with JI_WAIT_DATA flag
+ * after we have written the transaction to the journal
+ * but before we write out the commit block.
+ */
+ int (*j_finish_inode_data_buffers)
+ (struct jbd2_inode *);
+
/*
* Journal statistics
*/
+
+ /**
+ * @j_history_lock: Protect the transactions statistics history.
+ */
spinlock_t j_history_lock;
+
+ /**
+ * @j_proc_entry: procfs entry for the jbd statistics directory.
+ */
struct proc_dir_entry *j_proc_entry;
+
+ /**
+ * @j_stats: Overall statistics.
+ */
struct transaction_stats_s j_stats;
- /* Failed journal commit ID */
+ /**
+ * @j_failed_commit: Failed journal commit ID.
+ */
unsigned int j_failed_commit;
- /*
+ /**
+ * @j_private:
+ *
* An opaque pointer to fs-private information. ext3 puts its
- * superblock pointer here
+ * superblock pointer here.
*/
void *j_private;
- /* Reference to checksum algorithm driver via cryptoapi */
- struct crypto_shash *j_chksum_driver;
-
- /* Precomputed journal UUID checksum for seeding other checksums */
+ /**
+ * @j_csum_seed:
+ *
+ * Precomputed journal UUID checksum for seeding other checksums.
+ */
__u32 j_csum_seed;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /**
+ * @j_trans_commit_map:
+ *
+ * Lockdep entity to track transaction commit dependencies. Handles
+ * hold this "lock" for read, when we wait for commit, we acquire the
+ * "lock" for writing. This matches the properties of jbd2 journalling
+ * where the running transaction has to wait for all handles to be
+ * dropped to commit that transaction and also acquiring a handle may
+ * require transaction commit to finish.
+ */
+ struct lockdep_map j_trans_commit_map;
+#endif
+ /**
+ * @jbd2_trans_commit_key:
+ *
+ * "struct lock_class_key" for @j_trans_commit_map
+ */
+ struct lock_class_key jbd2_trans_commit_key;
+
+ /**
+ * @j_fc_cleanup_callback:
+ *
+ * Clean-up after fast commit or full commit. JBD2 calls this function
+ * after every commit operation.
+ */
+ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
+
+ /**
+ * @j_fc_replay_callback:
+ *
+ * File-system specific function that performs replay of a fast
+ * commit. JBD2 calls this function for each fast commit block found in
+ * the journal. This function should return JBD2_FC_REPLAY_CONTINUE
+ * to indicate that the block was processed correctly and more fast
+ * commit replay should continue. Return value of JBD2_FC_REPLAY_STOP
+ * indicates the end of replay (no more blocks remaining). A negative
+ * return value indicates error.
+ */
+ int (*j_fc_replay_callback)(struct journal_s *journal,
+ struct buffer_head *bh,
+ enum passtype pass, int off,
+ tid_t expected_commit_id);
+
+ /**
+ * @j_bmap:
+ *
+ * Bmap function that should be used instead of the generic
+ * VFS bmap function.
+ */
+ int (*j_bmap)(struct journal_s *journal, sector_t *block);
};
+#define jbd2_might_wait_for_commit(j) \
+ do { \
+ rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \
+ rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \
+ } while (0)
+
+/*
+ * We can support any known requested features iff the
+ * superblock is not in version 1. Otherwise we fail to support any
+ * extended sb features.
+ */
+static inline bool jbd2_format_support_feature(journal_t *j)
+{
+ return j->j_superblock->s_header.h_blocktype !=
+ cpu_to_be32(JBD2_SUPERBLOCK_V1);
+}
+
+/* journal feature predicate functions */
+#define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \
+static inline bool jbd2_has_feature_##name(journal_t *j) \
+{ \
+ return (jbd2_format_support_feature(j) && \
+ ((j)->j_superblock->s_feature_compat & \
+ cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \
+} \
+static inline void jbd2_set_feature_##name(journal_t *j) \
+{ \
+ (j)->j_superblock->s_feature_compat |= \
+ cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \
+} \
+static inline void jbd2_clear_feature_##name(journal_t *j) \
+{ \
+ (j)->j_superblock->s_feature_compat &= \
+ ~cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \
+}
+
+#define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \
+static inline bool jbd2_has_feature_##name(journal_t *j) \
+{ \
+ return (jbd2_format_support_feature(j) && \
+ ((j)->j_superblock->s_feature_ro_compat & \
+ cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \
+} \
+static inline void jbd2_set_feature_##name(journal_t *j) \
+{ \
+ (j)->j_superblock->s_feature_ro_compat |= \
+ cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \
+} \
+static inline void jbd2_clear_feature_##name(journal_t *j) \
+{ \
+ (j)->j_superblock->s_feature_ro_compat &= \
+ ~cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \
+}
+
+#define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \
+static inline bool jbd2_has_feature_##name(journal_t *j) \
+{ \
+ return (jbd2_format_support_feature(j) && \
+ ((j)->j_superblock->s_feature_incompat & \
+ cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \
+} \
+static inline void jbd2_set_feature_##name(journal_t *j) \
+{ \
+ (j)->j_superblock->s_feature_incompat |= \
+ cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \
+} \
+static inline void jbd2_clear_feature_##name(journal_t *j) \
+{ \
+ (j)->j_superblock->s_feature_incompat &= \
+ ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \
+}
+
+JBD2_FEATURE_COMPAT_FUNCS(checksum, CHECKSUM)
+
+JBD2_FEATURE_INCOMPAT_FUNCS(revoke, REVOKE)
+JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT)
+JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT)
+JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2)
+JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
+JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
+
+/* Journal high priority write IO operation flags */
+#define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE)
+
/*
* Journal flag definitions
*/
@@ -993,9 +1386,15 @@ struct journal_s
#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */
#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */
#define JBD2_BARRIER 0x020 /* Use IDE barriers */
-#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
- * data write error in ordered
- * mode */
+#define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on
+ * clean and empty filesystem
+ * logging area */
+#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
+#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
+#define JBD2_JOURNAL_FLUSH_DISCARD 0x0001
+#define JBD2_JOURNAL_FLUSH_ZEROOUT 0x0002
+#define JBD2_JOURNAL_FLUSH_VALID (JBD2_JOURNAL_FLUSH_DISCARD | \
+ JBD2_JOURNAL_FLUSH_ZEROOUT)
/*
* Function declarations for the journaling transaction and buffer
@@ -1003,13 +1402,10 @@ struct journal_s
*/
/* Filing buffers */
-extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
-extern void __jbd2_journal_refile_buffer(struct journal_head *);
+extern bool __jbd2_journal_refile_buffer(struct journal_head *);
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
-extern void __journal_free_buffer(struct journal_head *bh);
extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
-extern void __journal_clean_data_list(transaction_t *transaction);
static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh)
{
list_add_tail(&bh->b_assoc_buffers, head);
@@ -1020,19 +1416,25 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh)
}
/* Log buffer allocation */
-struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
+struct buffer_head *jbd2_journal_get_descriptor_buffer(transaction_t *, int);
+void jbd2_descriptor_block_csum_set(journal_t *, struct buffer_head *);
int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
unsigned long *block);
-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
+int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
/* Commit management */
extern void jbd2_journal_commit_transaction(journal_t *);
/* Checkpoint list management */
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+enum jbd2_shrink_type {JBD2_SHRINK_DESTROY, JBD2_SHRINK_BUSY_STOP, JBD2_SHRINK_BUSY_SKIP};
+
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal, enum jbd2_shrink_type type);
+unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
+int jbd2_journal_try_remove_checkpoint(struct journal_head *jh);
+void jbd2_journal_destroy_checkpoint(journal_t *journal);
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
@@ -1071,12 +1473,9 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct buffer_head **bh_out,
sector_t blocknr);
-/* Transaction locking */
-extern void __wait_on_journal (journal_t *);
-
/* Transaction cache support */
extern void jbd2_journal_destroy_transaction_cache(void);
-extern int jbd2_journal_init_transaction_cache(void);
+extern int __init jbd2_journal_init_transaction_cache(void);
extern void jbd2_journal_free_transaction(transaction_t *);
/*
@@ -1103,14 +1502,16 @@ static inline handle_t *journal_current_handle(void)
extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks,
- gfp_t gfp_mask, unsigned int type,
- unsigned int line_no);
+ int revoke_records, gfp_t gfp_mask,
+ unsigned int type, unsigned int line_no);
extern int jbd2_journal_restart(handle_t *, int nblocks);
-extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
+extern int jbd2__journal_restart(handle_t *, int nblocks,
+ int revoke_records, gfp_t gfp_mask);
extern int jbd2_journal_start_reserved(handle_t *handle,
unsigned int type, unsigned int line_no);
extern void jbd2_journal_free_reserved(handle_t *handle);
-extern int jbd2_journal_extend (handle_t *, int nblocks);
+extern int jbd2_journal_extend(handle_t *handle, int nblocks,
+ int revoke_records);
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
@@ -1118,15 +1519,16 @@ void jbd2_journal_set_triggers(struct buffer_head *,
struct jbd2_buffer_trigger_type *type);
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
-extern void journal_sync_buffer (struct buffer_head *);
-extern int jbd2_journal_invalidatepage(journal_t *,
- struct page *, unsigned int, unsigned int);
-extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
+int jbd2_journal_invalidate_folio(journal_t *, struct folio *,
+ size_t offset, size_t length);
+bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio);
extern int jbd2_journal_stop(handle_t *);
-extern int jbd2_journal_flush (journal_t *);
+extern int jbd2_journal_flush(journal_t *journal, unsigned int flags);
extern void jbd2_journal_lock_updates (journal_t *);
extern void jbd2_journal_unlock_updates (journal_t *);
+void jbd2_journal_wait_updates(journal_t *);
+
extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
struct block_device *fs_dev,
unsigned long long start, int len, int bsize);
@@ -1146,9 +1548,8 @@ extern int jbd2_journal_recover (journal_t *journal);
extern int jbd2_journal_wipe (journal_t *, int);
extern int jbd2_journal_skip_recovery (journal_t *);
extern void jbd2_journal_update_sb_errno(journal_t *);
-extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
- unsigned long, int);
-extern void __jbd2_journal_abort_hard (journal_t *);
+extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
+ unsigned long, blk_opf_t);
extern void jbd2_journal_abort (journal_t *, int);
extern int jbd2_journal_errno (journal_t *);
extern void jbd2_journal_ack_err (journal_t *);
@@ -1156,7 +1557,14 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_force_commit_nested(journal_t *);
-extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
+extern int jbd2_journal_inode_ranged_write(handle_t *handle,
+ struct jbd2_inode *inode, loff_t start_byte,
+ loff_t length);
+extern int jbd2_journal_inode_ranged_wait(handle_t *handle,
+ struct jbd2_inode *inode, loff_t start_byte,
+ loff_t length);
+extern int jbd2_journal_finish_inode_data_buffers(
+ struct jbd2_inode *jinode);
extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
@@ -1174,10 +1582,13 @@ void jbd2_journal_put_journal_head(struct journal_head *jh);
*/
extern struct kmem_cache *jbd2_handle_cache;
-static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags)
-{
- return kmem_cache_zalloc(jbd2_handle_cache, gfp_flags);
-}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
+#define jbd2_alloc_handle(_gfp_flags) \
+ ((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags))
static inline void jbd2_free_handle(handle_t *handle)
{
@@ -1190,10 +1601,13 @@ static inline void jbd2_free_handle(handle_t *handle)
*/
extern struct kmem_cache *jbd2_inode_cache;
-static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags)
-{
- return kmem_cache_alloc(jbd2_inode_cache, gfp_flags);
-}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
+#define jbd2_alloc_inode(_gfp_flags) \
+ ((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags))
static inline void jbd2_free_inode(struct jbd2_inode *jinode)
{
@@ -1203,16 +1617,18 @@ static inline void jbd2_free_inode(struct jbd2_inode *jinode)
/* Primary revoke support */
#define JOURNAL_REVOKE_DEFAULT_HASH 256
extern int jbd2_journal_init_revoke(journal_t *, int);
-extern void jbd2_journal_destroy_revoke_caches(void);
-extern int jbd2_journal_init_revoke_caches(void);
+extern void jbd2_journal_destroy_revoke_record_cache(void);
+extern void jbd2_journal_destroy_revoke_table_cache(void);
+extern int __init jbd2_journal_init_revoke_record_cache(void);
+extern int __init jbd2_journal_init_revoke_table_cache(void);
+struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size);
+void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table);
extern void jbd2_journal_destroy_revoke(journal_t *);
extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
-extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
-extern void jbd2_journal_write_revoke_records(journal_t *journal,
- transaction_t *transaction,
- struct list_head *log_bufs,
- int write_op);
+extern void jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
+extern void jbd2_journal_write_revoke_records(transaction_t *transaction,
+ struct list_head *log_bufs);
/* Recovery revoke support */
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
@@ -1229,9 +1645,9 @@ extern void jbd2_clear_buffer_revoked_flags(journal_t *journal);
*/
int jbd2_log_start_commit(journal_t *journal, tid_t tid);
-int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
+int jbd2_transaction_committed(journal_t *journal, tid_t tid);
int jbd2_complete_transaction(journal_t *journal, tid_t tid);
int jbd2_log_do_checkpoint(journal_t *journal);
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
@@ -1240,14 +1656,15 @@ void __jbd2_log_wait_for_space(journal_t *journal);
extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
extern int jbd2_cleanup_journal_tail(journal_t *);
-/* Debugging code only: */
-
-#define jbd_ENOSYS() \
-do { \
- printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \
- current->state = TASK_UNINTERRUPTIBLE; \
- schedule(); \
-} while (1)
+/* Fast commit related APIs */
+int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
+int jbd2_fc_end_commit(journal_t *journal);
+int jbd2_fc_end_commit_fallback(journal_t *journal);
+int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
+int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode);
+int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
+int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
+void jbd2_fc_release_bufs(journal_t *journal);
/*
* is_journal_abort
@@ -1276,6 +1693,25 @@ static inline void jbd2_journal_abort_handle(handle_t *handle)
handle->h_aborted = 1;
}
+static inline void jbd2_init_fs_dev_write_error(journal_t *journal)
+{
+ struct address_space *mapping = journal->j_fs_dev->bd_mapping;
+
+ /*
+ * Save the original wb_err value of client fs's bdev mapping which
+ * could be used to detect the client fs's metadata async write error.
+ */
+ errseq_check_and_advance(&mapping->wb_err, &journal->j_fs_dev_wb_err);
+}
+
+static inline int jbd2_check_fs_dev_write_error(journal_t *journal)
+{
+ struct address_space *mapping = journal->j_fs_dev->bd_mapping;
+
+ return errseq_check(&mapping->wb_err,
+ READ_ONCE(journal->j_fs_dev_wb_err));
+}
+
#endif /* __KERNEL__ */
/* Comparison functions for transaction IDs: perform comparisons using
@@ -1293,23 +1729,20 @@ static inline int tid_geq(tid_t x, tid_t y)
return (difference >= 0);
}
-extern int jbd2_journal_blocks_per_page(struct inode *inode);
+extern int jbd2_journal_blocks_per_folio(struct inode *inode);
extern size_t journal_tag_bytes(journal_t *journal);
-/*
- * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for
- * transaction control blocks.
- */
-#define JBD2_CONTROL_BLOCKS_SHIFT 5
+static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
+{
+ return jbd2_has_feature_csum2(journal) ||
+ jbd2_has_feature_csum3(journal);
+}
-/*
- * Return the minimum number of blocks which must be free in the journal
- * before a new transaction may be started. Must be called under j_state_lock.
- */
-static inline int jbd2_space_needed(journal_t *journal)
+static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb)
{
- int nblocks = journal->j_max_transaction_buffers;
- return nblocks + (nblocks >> JBD2_CONTROL_BLOCKS_SHIFT);
+ int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks);
+
+ return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS;
}
/*
@@ -1318,16 +1751,13 @@ static inline int jbd2_space_needed(journal_t *journal)
static inline unsigned long jbd2_log_space_left(journal_t *journal)
{
/* Allow for rounding errors */
- unsigned long free = journal->j_free - 32;
+ long free = journal->j_free - 32;
if (journal->j_committing_transaction) {
- unsigned long committing = atomic_read(&journal->
- j_committing_transaction->t_outstanding_credits);
-
- /* Transaction + control blocks */
- free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT);
+ free -= atomic_read(&journal->
+ j_committing_transaction->t_outstanding_credits);
}
- return free;
+ return max_t(long, free, 0);
}
/*
@@ -1342,31 +1772,9 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
#define BJ_Reserved 4 /* Buffer is reserved for access by journal */
#define BJ_Types 5
-extern int jbd_blocks_per_page(struct inode *inode);
-
-/* JBD uses a CRC32 checksum */
-#define JBD_MAX_CHECKSUM_SIZE 4
-
-static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
- const void *address, unsigned int length)
+static inline u32 jbd2_chksum(u32 crc, const void *address, unsigned int length)
{
- struct {
- struct shash_desc shash;
- char ctx[JBD_MAX_CHECKSUM_SIZE];
- } desc;
- int err;
-
- BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
- JBD_MAX_CHECKSUM_SIZE);
-
- desc.shash.tfm = journal->j_chksum_driver;
- desc.shash.flags = 0;
- *(u32 *)desc.ctx = crc;
-
- err = crypto_shash_update(&desc.shash, address, length);
- BUG_ON(err);
-
- return *(u32 *)desc.ctx;
+ return crc32c(crc, address, length);
}
/* Return most recent uncommitted transaction */
@@ -1382,6 +1790,20 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal)
return tid;
}
+static inline int jbd2_handle_buffer_credits(handle_t *handle)
+{
+ journal_t *journal;
+
+ if (!handle->h_reserved)
+ journal = handle->h_transaction->t_journal;
+ else
+ journal = handle->h_journal;
+
+ return handle->h_total_credits -
+ DIV_ROUND_UP(handle->h_revoke_credits_requested,
+ journal->j_revoke_records_per_block);
+}
+
#ifdef __KERNEL__
#define buffer_trace_init(bh) do {} while (0)
@@ -1393,4 +1815,7 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal)
#endif /* __KERNEL__ */
+#define EFSBADCRC EBADMSG /* Bad CRC detected */
+#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+
#endif /* _LINUX_JBD2_H */