summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/jbd2.h124
-rw-r--r--include/trace/events/ext4.h228
2 files changed, 344 insertions, 8 deletions
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 08f904943ab2..fb3d71ad6eea 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -289,6 +289,7 @@ typedef struct journal_superblock_s
#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
#define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008
#define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010
+#define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020
/* See "journal feature predicate functions" below */
@@ -299,7 +300,8 @@ typedef struct journal_superblock_s
JBD2_FEATURE_INCOMPAT_64BIT | \
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
JBD2_FEATURE_INCOMPAT_CSUM_V2 | \
- JBD2_FEATURE_INCOMPAT_CSUM_V3)
+ JBD2_FEATURE_INCOMPAT_CSUM_V3 | \
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
#ifdef __KERNEL__
@@ -452,8 +454,8 @@ struct jbd2_inode {
struct jbd2_revoke_table_s;
/**
- * struct handle_s - The handle_s type is the concrete type associated with
- * handle_t.
+ * struct jbd2_journal_handle - The jbd2_journal_handle type is the concrete
+ * type associated with handle_t.
* @h_transaction: Which compound transaction is this update a part of?
* @h_journal: Which journal handle belongs to - used iff h_reserved set.
* @h_rsv_handle: Handle reserved for finishing the logical operation.
@@ -629,7 +631,9 @@ struct transaction_s
struct journal_head *t_shadow_list;
/*
- * List of inodes whose data we've modified in data=ordered mode.
+ * List of inodes associated with the transaction; e.g., ext4 uses
+ * this to track inodes in data=ordered and data=journal mode that
+ * need special handling on transaction commit; also used by ocfs2.
* [j_list_lock]
*/
struct list_head t_inode_list;
@@ -747,6 +751,11 @@ jbd2_time_diff(unsigned long start, unsigned long end)
#define JBD2_NR_BATCH 64
+enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
+
+#define JBD2_FC_REPLAY_STOP 0
+#define JBD2_FC_REPLAY_CONTINUE 1
+
/**
* struct journal_s - The journal_s type is the concrete type associated with
* journal_t.
@@ -858,6 +867,13 @@ struct journal_s
wait_queue_head_t j_wait_reserved;
/**
+ * @j_fc_wait:
+ *
+ * Wait queue to wait for completion of async fast commits.
+ */
+ wait_queue_head_t j_fc_wait;
+
+ /**
* @j_checkpoint_mutex:
*
* Semaphore for locking against concurrent checkpoints.
@@ -915,6 +931,30 @@ struct journal_s
unsigned long j_last;
/**
+ * @j_fc_first:
+ *
+ * The block number of the first fast commit block in the journal
+ * [j_state_lock].
+ */
+ unsigned long j_fc_first;
+
+ /**
+ * @j_fc_off:
+ *
+ * Number of fast commit blocks currently allocated.
+ * [j_state_lock].
+ */
+ unsigned long j_fc_off;
+
+ /**
+ * @j_fc_last:
+ *
+ * The block number one beyond the last fast commit block in the journal
+ * [j_state_lock].
+ */
+ unsigned long j_fc_last;
+
+ /**
* @j_dev: Device where we store the journal.
*/
struct block_device *j_dev;
@@ -1065,6 +1105,12 @@ struct journal_s
struct buffer_head **j_wbuf;
/**
+ * @j_fc_wbuf: Array of fast commit bhs for
+ * jbd2_journal_commit_transaction.
+ */
+ struct buffer_head **j_fc_wbuf;
+
+ /**
* @j_wbufsize:
*
* Size of @j_wbuf array.
@@ -1072,6 +1118,13 @@ struct journal_s
int j_wbufsize;
/**
+ * @j_fc_wbufsize:
+ *
+ * Size of @j_fc_wbuf array.
+ */
+ int j_fc_wbufsize;
+
+ /**
* @j_last_sync_writer:
*
* The pid of the last person to run a synchronous operation
@@ -1111,6 +1164,27 @@ struct journal_s
void (*j_commit_callback)(journal_t *,
transaction_t *);
+ /**
+ * @j_submit_inode_data_buffers:
+ *
+ * This function is called for all inodes associated with the
+ * committing transaction marked with JI_WRITE_DATA flag
+ * before we start to write out the transaction to the journal.
+ */
+ int (*j_submit_inode_data_buffers)
+ (struct jbd2_inode *);
+
+ /**
+ * @j_finish_inode_data_buffers:
+ *
+ * This function is called for all inodes associated with the
+ * committing transaction marked with JI_WAIT_DATA flag
+ * after we have written the transaction to the journal
+ * but before we write out the commit block.
+ */
+ int (*j_finish_inode_data_buffers)
+ (struct jbd2_inode *);
+
/*
* Journal statistics
*/
@@ -1170,6 +1244,30 @@ struct journal_s
*/
struct lockdep_map j_trans_commit_map;
#endif
+
+ /**
+ * @j_fc_cleanup_callback:
+ *
+ * Clean-up after fast commit or full commit. JBD2 calls this function
+ * after every commit operation.
+ */
+ void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
+
+ /*
+ * @j_fc_replay_callback:
+ *
+ * File-system specific function that performs replay of a fast
+ * commit. JBD2 calls this function for each fast commit block found in
+ * the journal. This function should return JBD2_FC_REPLAY_CONTINUE
+ * to indicate that the block was processed correctly and more fast
+ * commit replay should continue. Return value of JBD2_FC_REPLAY_STOP
+ * indicates the end of replay (no more blocks remaining). A negative
+ * return value indicates error.
+ */
+ int (*j_fc_replay_callback)(struct journal_s *journal,
+ struct buffer_head *bh,
+ enum passtype pass, int off,
+ tid_t expected_commit_id);
};
#define jbd2_might_wait_for_commit(j) \
@@ -1240,6 +1338,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT)
JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT)
JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2)
JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
+JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
/*
* Journal flag definitions
@@ -1253,6 +1352,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
* data write error in ordered
* mode */
+#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
+#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
/*
* Function declarations for the journaling transaction and buffer
@@ -1421,6 +1522,10 @@ extern int jbd2_journal_inode_ranged_write(handle_t *handle,
extern int jbd2_journal_inode_ranged_wait(handle_t *handle,
struct jbd2_inode *inode, loff_t start_byte,
loff_t length);
+extern int jbd2_journal_submit_inode_data_buffers(
+ struct jbd2_inode *jinode);
+extern int jbd2_journal_finish_inode_data_buffers(
+ struct jbd2_inode *jinode);
extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
@@ -1505,6 +1610,17 @@ void __jbd2_log_wait_for_space(journal_t *journal);
extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
extern int jbd2_cleanup_journal_tail(journal_t *);
+/* Fast commit related APIs */
+int jbd2_fc_init(journal_t *journal, int num_fc_blks);
+int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
+int jbd2_fc_end_commit(journal_t *journal);
+int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);
+int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
+int jbd2_submit_inode_data(struct jbd2_inode *jinode);
+int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
+int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
+int jbd2_fc_release_bufs(journal_t *journal);
+
/*
* is_journal_abort
*
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 4c8b99ec8606..b14314fcf732 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -95,6 +95,16 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
+#define show_fc_reason(reason) \
+ __print_symbolic(reason, \
+ { EXT4_FC_REASON_XATTR, "XATTR"}, \
+ { EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \
+ { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \
+ { EXT4_FC_REASON_MEM, "NO_MEM"}, \
+ { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \
+ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \
+ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \
+ { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"})
TRACE_EVENT(ext4_other_inode_update_time,
TP_PROTO(struct inode *inode, ino_t orig_ino),
@@ -1766,9 +1776,9 @@ TRACE_EVENT(ext4_ext_load_extent,
);
TRACE_EVENT(ext4_load_inode,
- TP_PROTO(struct inode *inode),
+ TP_PROTO(struct super_block *sb, unsigned long ino),
- TP_ARGS(inode),
+ TP_ARGS(sb, ino),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -1776,8 +1786,8 @@ TRACE_EVENT(ext4_load_inode,
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
- __entry->ino = inode->i_ino;
+ __entry->dev = sb->s_dev;
+ __entry->ino = ino;
),
TP_printk("dev %d,%d ino %ld",
@@ -2791,6 +2801,216 @@ TRACE_EVENT(ext4_lazy_itable_init,
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
);
+TRACE_EVENT(ext4_fc_replay_scan,
+ TP_PROTO(struct super_block *sb, int error, int off),
+
+ TP_ARGS(sb, error, off),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, error)
+ __field(int, off)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->error = error;
+ __entry->off = off;
+ ),
+
+ TP_printk("FC scan pass on dev %d,%d: error %d, off %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->error, __entry->off)
+);
+
+TRACE_EVENT(ext4_fc_replay,
+ TP_PROTO(struct super_block *sb, int tag, int ino, int priv1, int priv2),
+
+ TP_ARGS(sb, tag, ino, priv1, priv2),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, tag)
+ __field(int, ino)
+ __field(int, priv1)
+ __field(int, priv2)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->tag = tag;
+ __entry->ino = ino;
+ __entry->priv1 = priv1;
+ __entry->priv2 = priv2;
+ ),
+
+ TP_printk("FC Replay %d,%d: tag %d, ino %d, data1 %d, data2 %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->tag, __entry->ino, __entry->priv1, __entry->priv2)
+);
+
+TRACE_EVENT(ext4_fc_commit_start,
+ TP_PROTO(struct super_block *sb),
+
+ TP_ARGS(sb),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ ),
+
+ TP_printk("fast_commit started on dev %d,%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev))
+);
+
+TRACE_EVENT(ext4_fc_commit_stop,
+ TP_PROTO(struct super_block *sb, int nblks, int reason),
+
+ TP_ARGS(sb, nblks, reason),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, nblks)
+ __field(int, reason)
+ __field(int, num_fc)
+ __field(int, num_fc_ineligible)
+ __field(int, nblks_agg)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->nblks = nblks;
+ __entry->reason = reason;
+ __entry->num_fc = EXT4_SB(sb)->s_fc_stats.fc_num_commits;
+ __entry->num_fc_ineligible =
+ EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits;
+ __entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks;
+ ),
+
+ TP_printk("fc on [%d,%d] nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nblks, __entry->reason, __entry->num_fc,
+ __entry->num_fc_ineligible, __entry->nblks_agg)
+);
+
+#define FC_REASON_NAME_STAT(reason) \
+ show_fc_reason(reason), \
+ __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason]
+
+TRACE_EVENT(ext4_fc_stats,
+ TP_PROTO(struct super_block *sb),
+
+ TP_ARGS(sb),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(struct ext4_sb_info *, sbi)
+ __field(int, count)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->sbi = EXT4_SB(sb);
+ ),
+
+ TP_printk("dev %d:%d fc ineligible reasons:\n"
+ "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s,%d; "
+ "num_commits:%ld, ineligible: %ld, numblks: %ld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_MEM),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
+ __entry->sbi->s_fc_stats.fc_num_commits,
+ __entry->sbi->s_fc_stats.fc_ineligible_commits,
+ __entry->sbi->s_fc_stats.fc_numblks)
+
+);
+
+#define DEFINE_TRACE_DENTRY_EVENT(__type) \
+ TRACE_EVENT(ext4_fc_track_##__type, \
+ TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \
+ \
+ TP_ARGS(inode, dentry, ret), \
+ \
+ TP_STRUCT__entry( \
+ __field(dev_t, dev) \
+ __field(int, ino) \
+ __field(int, error) \
+ ), \
+ \
+ TP_fast_assign( \
+ __entry->dev = inode->i_sb->s_dev; \
+ __entry->ino = inode->i_ino; \
+ __entry->error = ret; \
+ ), \
+ \
+ TP_printk("dev %d:%d, inode %d, error %d, fc_%s", \
+ MAJOR(__entry->dev), MINOR(__entry->dev), \
+ __entry->ino, __entry->error, \
+ #__type) \
+ )
+
+DEFINE_TRACE_DENTRY_EVENT(create);
+DEFINE_TRACE_DENTRY_EVENT(link);
+DEFINE_TRACE_DENTRY_EVENT(unlink);
+
+TRACE_EVENT(ext4_fc_track_inode,
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, ino)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->error = ret;
+ ),
+
+ TP_printk("dev %d:%d, inode %d, error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino, __entry->error)
+ );
+
+TRACE_EVENT(ext4_fc_track_range,
+ TP_PROTO(struct inode *inode, long start, long end, int ret),
+
+ TP_ARGS(inode, start, end, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, ino)
+ __field(long, start)
+ __field(long, end)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->start = start;
+ __entry->end = end;
+ __entry->error = ret;
+ ),
+
+ TP_printk("dev %d:%d, inode %d, error %d, start %ld, end %ld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino, __entry->error, __entry->start,
+ __entry->end)
+ );
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */