From fb2736bbaee0e704a4f33912cf532597b2dc5b33 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 24 Jul 2011 21:30:55 +0100 Subject: regmap: Add basic tracepoints Trace single register reads and writes, plus start/stop tracepoints for the actual I/O to see where we're spending time. This makes it easy to have always on logging without overwhelming the logs and also lets us take advantage of all the context and time information that the trace subsystem collects for us. We don't currently trace register values for bulk operations as this would add complexity and overhead parsing the cooked data that's being worked with. Signed-off-by: Mark Brown --- include/trace/events/regmap.h | 112 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 include/trace/events/regmap.h (limited to 'include/trace/events') diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h new file mode 100644 index 000000000000..1c76f40dee44 --- /dev/null +++ b/include/trace/events/regmap.h @@ -0,0 +1,112 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM regmap + +#if !defined(_TRACE_REGMAP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_REGMAP_H + +#include +#include +#include + +struct regmap; + +/* + * Log register events + */ +DECLARE_EVENT_CLASS(regmap_reg, + + TP_PROTO(struct device *dev, unsigned int reg, + unsigned int val), + + TP_ARGS(dev, reg, val), + + TP_STRUCT__entry( + __string( name, dev_name(dev) ) + __field( unsigned int, reg ) + __field( unsigned int, val ) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->reg = reg; + __entry->val = val; + ), + + TP_printk("%s reg=%x val=%x", __get_str(name), + (unsigned int)__entry->reg, + (unsigned int)__entry->val) +); + +DEFINE_EVENT(regmap_reg, regmap_reg_write, + + TP_PROTO(struct device *dev, unsigned int reg, + unsigned int val), + + TP_ARGS(dev, reg, val) + +); + +DEFINE_EVENT(regmap_reg, regmap_reg_read, + + TP_PROTO(struct device *dev, unsigned int reg, + unsigned int val), + + TP_ARGS(dev, reg, val) + +); + +DECLARE_EVENT_CLASS(regmap_block, + + TP_PROTO(struct device *dev, unsigned int reg, size_t count), + + TP_ARGS(dev, reg, count), + + TP_STRUCT__entry( + __string( name, dev_name(dev) ) + __field( unsigned int, reg ) + __field( size_t, count ) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->reg = reg; + __entry->count = count; + ), + + TP_printk("%s reg=%x count=%d", __get_str(name), + (unsigned int)__entry->reg, + (size_t)__entry->count) +); + +DEFINE_EVENT(regmap_block, regmap_hw_read_start, + + TP_PROTO(struct device *dev, unsigned int reg, size_t count), + + TP_ARGS(dev, reg, count) +); + +DEFINE_EVENT(regmap_block, regmap_hw_read_done, + + TP_PROTO(struct device *dev, unsigned int reg, size_t count), + + TP_ARGS(dev, reg, count) +); + +DEFINE_EVENT(regmap_block, regmap_hw_write_start, + + TP_PROTO(struct device *dev, unsigned int reg, size_t count), + + TP_ARGS(dev, reg, count) +); + +DEFINE_EVENT(regmap_block, regmap_hw_write_done, + + TP_PROTO(struct device *dev, unsigned int reg, size_t count), + + TP_ARGS(dev, reg, count) +); + +#endif /* _TRACE_REGMAP_H */ + +/* This part must be outside protection */ +#include -- cgit From 3d9ead7c06e51249a92eb710a892a66601bd7290 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Aug 2011 16:42:27 +0900 Subject: regmap: Use int rather than size_t for lengths when logging blocks x86_64 warns as size_t is not an int. Signed-off-by: Mark Brown Reported-by: Stephen Rothwell --- include/trace/events/regmap.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h index 1c76f40dee44..e35e37c378c6 100644 --- a/include/trace/events/regmap.h +++ b/include/trace/events/regmap.h @@ -57,14 +57,14 @@ DEFINE_EVENT(regmap_reg, regmap_reg_read, DECLARE_EVENT_CLASS(regmap_block, - TP_PROTO(struct device *dev, unsigned int reg, size_t count), + TP_PROTO(struct device *dev, unsigned int reg, int count), TP_ARGS(dev, reg, count), TP_STRUCT__entry( __string( name, dev_name(dev) ) __field( unsigned int, reg ) - __field( size_t, count ) + __field( int, count ) ), TP_fast_assign( @@ -75,33 +75,33 @@ DECLARE_EVENT_CLASS(regmap_block, TP_printk("%s reg=%x count=%d", __get_str(name), (unsigned int)__entry->reg, - (size_t)__entry->count) + (int)__entry->count) ); DEFINE_EVENT(regmap_block, regmap_hw_read_start, - TP_PROTO(struct device *dev, unsigned int reg, size_t count), + TP_PROTO(struct device *dev, unsigned int reg, int count), TP_ARGS(dev, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_read_done, - TP_PROTO(struct device *dev, unsigned int reg, size_t count), + TP_PROTO(struct device *dev, unsigned int reg, int count), TP_ARGS(dev, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_start, - TP_PROTO(struct device *dev, unsigned int reg, size_t count), + TP_PROTO(struct device *dev, unsigned int reg, int count), TP_ARGS(dev, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_done, - TP_PROTO(struct device *dev, unsigned int reg, size_t count), + TP_PROTO(struct device *dev, unsigned int reg, int count), TP_ARGS(dev, reg, count) ); -- cgit From d8990240d8c911064447f8aa5a440f9345a6d692 Mon Sep 17 00:00:00 2001 From: Aditya Kali Date: Fri, 9 Sep 2011 19:18:51 -0400 Subject: ext4: add some tracepoints in ext4/extents.c This patch adds some tracepoints in ext4/extents.c and updates a tracepoint in ext4/inode.c. Tested: Built and ran the kernel and verified that these tracepoints work. Also ran xfstests. Signed-off-by: Aditya Kali Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 398 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 391 insertions(+), 7 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index b50a54736242..c9a341e385a3 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -12,6 +12,8 @@ struct ext4_allocation_request; struct ext4_prealloc_space; struct ext4_inode_info; struct mpage_da_data; +struct ext4_map_blocks; +struct ext4_extent; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) @@ -1032,9 +1034,9 @@ TRACE_EVENT(ext4_forget, ); TRACE_EVENT(ext4_da_update_reserve_space, - TP_PROTO(struct inode *inode, int used_blocks), + TP_PROTO(struct inode *inode, int used_blocks, int quota_claim), - TP_ARGS(inode, used_blocks), + TP_ARGS(inode, used_blocks, quota_claim), TP_STRUCT__entry( __field( dev_t, dev ) @@ -1045,6 +1047,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, __field( int, reserved_data_blocks ) __field( int, reserved_meta_blocks ) __field( int, allocated_meta_blocks ) + __field( int, quota_claim ) ), TP_fast_assign( @@ -1053,19 +1056,24 @@ TRACE_EVENT(ext4_da_update_reserve_space, __entry->mode = inode->i_mode; __entry->i_blocks = inode->i_blocks; __entry->used_blocks = used_blocks; - __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; - __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; + __entry->reserved_data_blocks = + EXT4_I(inode)->i_reserved_data_blocks; + __entry->reserved_meta_blocks = + EXT4_I(inode)->i_reserved_meta_blocks; + __entry->allocated_meta_blocks = + EXT4_I(inode)->i_allocated_meta_blocks; + __entry->quota_claim = quota_claim; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " "reserved_data_blocks %d reserved_meta_blocks %d " - "allocated_meta_blocks %d", + "allocated_meta_blocks %d quota_claim %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, - __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) + __entry->reserved_meta_blocks, __entry->allocated_meta_blocks, + __entry->quota_claim) ); TRACE_EVENT(ext4_da_reserve_space, @@ -1589,6 +1597,382 @@ DEFINE_EVENT(ext4__trim, ext4_trim_all_free, TP_ARGS(sb, group, start, len) ); +TRACE_EVENT(ext4_ext_handle_uninitialized_extents, + TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, + unsigned int allocated, ext4_fsblk_t newblock), + + TP_ARGS(inode, map, allocated, newblock), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, lblk ) + __field( ext4_fsblk_t, pblk ) + __field( unsigned int, len ) + __field( int, flags ) + __field( unsigned int, allocated ) + __field( ext4_fsblk_t, newblk ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->lblk = map->m_lblk; + __entry->pblk = map->m_pblk; + __entry->len = map->m_len; + __entry->flags = map->m_flags; + __entry->allocated = allocated; + __entry->newblk = newblock; + ), + + TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %d" + "allocated %d newblock %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, + __entry->len, __entry->flags, + (unsigned int) __entry->allocated, + (unsigned long long) __entry->newblk) +); + +TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, + TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret), + + TP_ARGS(sb, map, ret), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ext4_lblk_t, lblk ) + __field( ext4_fsblk_t, pblk ) + __field( unsigned int, len ) + __field( unsigned int, flags ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->lblk = map->m_lblk; + __entry->pblk = map->m_pblk; + __entry->len = map->m_len; + __entry->flags = map->m_flags; + __entry->ret = ret; + ), + + TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %u ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->lblk, (unsigned long long) __entry->pblk, + __entry->len, __entry->flags, __entry->ret) +); + +TRACE_EVENT(ext4_ext_put_in_cache, + TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, + ext4_fsblk_t start), + + TP_ARGS(inode, lblk, len, start), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, lblk ) + __field( unsigned int, len ) + __field( ext4_fsblk_t, start ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->lblk = lblk; + __entry->len = len; + __entry->start = start; + ), + + TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->lblk, + __entry->len, + (unsigned long long) __entry->start) +); + +TRACE_EVENT(ext4_ext_in_cache, + TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret), + + TP_ARGS(inode, lblk, ret), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, lblk ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->lblk = lblk; + __entry->ret = ret; + ), + + TP_printk("dev %d,%d ino %lu lblk %u ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->lblk, + __entry->ret) + +); + +TRACE_EVENT(ext4_find_delalloc_range, + TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to, + int reverse, int found, ext4_lblk_t found_blk), + + TP_ARGS(inode, from, to, reverse, found, found_blk), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, from ) + __field( ext4_lblk_t, to ) + __field( int, reverse ) + __field( int, found ) + __field( ext4_lblk_t, found_blk ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->from = from; + __entry->to = to; + __entry->reverse = reverse; + __entry->found = found; + __entry->found_blk = found_blk; + ), + + TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d " + "(blk = %u)", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->from, (unsigned) __entry->to, + __entry->reverse, __entry->found, + (unsigned) __entry->found_blk) +); + +TRACE_EVENT(ext4_get_reserved_cluster_alloc, + TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len), + + TP_ARGS(inode, lblk, len), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, lblk ) + __field( unsigned int, len ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->lblk = lblk; + __entry->len = len; + ), + + TP_printk("dev %d,%d ino %lu lblk %u len %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->lblk, + __entry->len) +); + +TRACE_EVENT(ext4_ext_show_extent, + TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, + unsigned short len), + + TP_ARGS(inode, lblk, pblk, len), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, lblk ) + __field( ext4_fsblk_t, pblk ) + __field( unsigned short, len ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->lblk = lblk; + __entry->pblk = pblk; + __entry->len = len; + ), + + TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->lblk, + (unsigned long long) __entry->pblk, + (unsigned short) __entry->len) +); + +TRACE_EVENT(ext4_remove_blocks, + TP_PROTO(struct inode *inode, struct ext4_extent *ex, + ext4_lblk_t from, ext4_fsblk_t to, + ext4_fsblk_t partial_cluster), + + TP_ARGS(inode, ex, from, to, partial_cluster), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, ee_lblk ) + __field( ext4_fsblk_t, ee_pblk ) + __field( unsigned short, ee_len ) + __field( ext4_lblk_t, from ) + __field( ext4_lblk_t, to ) + __field( ext4_fsblk_t, partial ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->ee_lblk = cpu_to_le32(ex->ee_block); + __entry->ee_pblk = ext4_ext_pblock(ex); + __entry->ee_len = ext4_ext_get_actual_len(ex); + __entry->from = from; + __entry->to = to; + __entry->partial = partial_cluster; + ), + + TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" + "from %u to %u partial_cluster %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->ee_lblk, + (unsigned long long) __entry->ee_pblk, + (unsigned short) __entry->ee_len, + (unsigned) __entry->from, + (unsigned) __entry->to, + (unsigned) __entry->partial) +); + +TRACE_EVENT(ext4_ext_rm_leaf, + TP_PROTO(struct inode *inode, ext4_lblk_t start, + struct ext4_extent *ex, ext4_fsblk_t partial_cluster), + + TP_ARGS(inode, start, ex, partial_cluster), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, start ) + __field( ext4_lblk_t, ee_lblk ) + __field( ext4_fsblk_t, ee_pblk ) + __field( short, ee_len ) + __field( ext4_fsblk_t, partial ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->start = start; + __entry->ee_lblk = le32_to_cpu(ex->ee_block); + __entry->ee_pblk = ext4_ext_pblock(ex); + __entry->ee_len = ext4_ext_get_actual_len(ex); + __entry->partial = partial_cluster; + ), + + TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" + "partial_cluster %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->start, + (unsigned) __entry->ee_lblk, + (unsigned long long) __entry->ee_pblk, + (unsigned short) __entry->ee_len, + (unsigned) __entry->partial) +); + +TRACE_EVENT(ext4_ext_rm_idx, + TP_PROTO(struct inode *inode, ext4_fsblk_t pblk), + + TP_ARGS(inode, pblk), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_fsblk_t, pblk ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->pblk = pblk; + ), + + TP_printk("dev %d,%d ino %lu index_pblk %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned long long) __entry->pblk) +); + +TRACE_EVENT(ext4_ext_remove_space, + TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth), + + TP_ARGS(inode, start, depth), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, start ) + __field( int, depth ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->start = start; + __entry->depth = depth; + ), + + TP_printk("dev %d,%d ino %lu since %u depth %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->start, + __entry->depth) +); + +TRACE_EVENT(ext4_ext_remove_space_done, + TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth, + ext4_lblk_t partial, unsigned short eh_entries), + + TP_ARGS(inode, start, depth, partial, eh_entries), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, start ) + __field( int, depth ) + __field( ext4_lblk_t, partial ) + __field( unsigned short, eh_entries ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->start = start; + __entry->depth = depth; + __entry->partial = partial; + __entry->eh_entries = eh_entries; + ), + + TP_printk("dev %d,%d ino %lu since %u depth %d partial %u " + "remaining_entries %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned) __entry->start, + __entry->depth, + (unsigned) __entry->partial, + (unsigned short) __entry->eh_entries) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit From 593600890110c02eb471cf844649dee213870416 Mon Sep 17 00:00:00 2001 From: Dimitris Papastamos Date: Mon, 19 Sep 2011 14:34:04 +0100 Subject: regmap: Add the regcache_sync trace event Signed-off-by: Dimitris Papastamos Signed-off-by: Mark Brown --- include/trace/events/regmap.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h index e35e37c378c6..1e3193b8fcc8 100644 --- a/include/trace/events/regmap.h +++ b/include/trace/events/regmap.h @@ -106,6 +106,30 @@ DEFINE_EVENT(regmap_block, regmap_hw_write_done, TP_ARGS(dev, reg, count) ); +TRACE_EVENT(regcache_sync, + + TP_PROTO(struct device *dev, const char *type, + const char *status), + + TP_ARGS(dev, type, status), + + TP_STRUCT__entry( + __string( name, dev_name(dev) ) + __string( status, status ) + __string( type, type ) + __field( int, type ) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __assign_str(status, status); + __assign_str(type, type); + ), + + TP_printk("%s type=%s status=%s", __get_str(name), + __get_str(type), __get_str(status)) +); + #endif /* _TRACE_REGMAP_H */ /* This part must be outside protection */ -- cgit From de02d0786d4075091f5b1860474cd21d85ff5862 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 20 Sep 2011 21:43:24 +0100 Subject: ASoC: Trace and collect statistics for DAPM graph walking One of the longest standing areas for improvement in ASoC has been the DAPM algorithm - it repeats the same checks many times whenever it is run and makes no effort to limit the areas of the graph it checks meaning we do an awful lot of walks over the full graph. This has never mattered too much as the size of the graph has generally been small in relation to the size of the devices supported and the speed of CPUs but it is annoying. In preparation for work on improving this insert a trace point after the graph walk has been done. This gives us specific timing information for the walk, and in order to give quantifiable (non-benchmark) numbers also count every time we check a link or check the power for a widget and report those numbers. Substantial changes in the algorithm may require tweaks to the stats but they should be useful for simpler things. Signed-off-by: Mark Brown --- include/trace/events/asoc.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h index 603f5a0f0365..2e1adf62e0a8 100644 --- a/include/trace/events/asoc.h +++ b/include/trace/events/asoc.h @@ -216,6 +216,28 @@ DEFINE_EVENT(snd_soc_dapm_widget, snd_soc_dapm_widget_event_done, ); +TRACE_EVENT(snd_soc_dapm_walk_done, + + TP_PROTO(struct snd_soc_card *card), + + TP_ARGS(card), + + TP_STRUCT__entry( + __string( name, card->name ) + __field( int, power_checks ) + __field( int, path_checks ) + ), + + TP_fast_assign( + __assign_str(name, card->name); + __entry->power_checks = card->dapm_stats.power_checks; + __entry->path_checks = card->dapm_stats.path_checks; + ), + + TP_printk("%s: %d power checks, %d path checks", __get_str(name), + (int)__entry->power_checks, (int)__entry->path_checks) +); + TRACE_EVENT(snd_soc_jack_irq, TP_PROTO(const char *name), -- cgit From e56235e099d7290a2331b984a79f75bbe0865fe8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 21 Sep 2011 18:19:14 +0100 Subject: ASoC: Add another DAPM stat for neighbour checks The number of times we look at a potentially connected neighbour is just as important as the number of times we actually recurse into looking at that neighbour so also collect that statistic. Signed-off-by: Mark Brown --- include/trace/events/asoc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h index 2e1adf62e0a8..ab26f8aa3c78 100644 --- a/include/trace/events/asoc.h +++ b/include/trace/events/asoc.h @@ -226,16 +226,19 @@ TRACE_EVENT(snd_soc_dapm_walk_done, __string( name, card->name ) __field( int, power_checks ) __field( int, path_checks ) + __field( int, neighbour_checks ) ), TP_fast_assign( __assign_str(name, card->name); __entry->power_checks = card->dapm_stats.power_checks; __entry->path_checks = card->dapm_stats.path_checks; + __entry->neighbour_checks = card->dapm_stats.neighbour_checks; ), - TP_printk("%s: %d power checks, %d path checks", __get_str(name), - (int)__entry->power_checks, (int)__entry->path_checks) + TP_printk("%s: checks %d power, %d path, %d neighbour", + __get_str(name), (int)__entry->power_checks, + (int)__entry->path_checks, (int)__entry->neighbour_checks) ); TRACE_EVENT(snd_soc_jack_irq, -- cgit From 557ab425429a5123d37f412ce3e6d6137cb621f8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 16 Sep 2011 11:16:43 +0200 Subject: sched, tracing: Show PREEMPT_ACTIVE state in trace_sched_switch We had need to see the difference between scheduling a runnable task and a runnable task being involuntarily preempted. No app should rely on the old string output (the binary trace event record format is not changed). Signed-off-by: Peter Zijlstra Cc: Thomas Gleixner Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1316164603.10174.11.camel@twins Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index f6334782a593..959ff18b63b6 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p) * For all intents and purposes a preempted task is a running task. */ if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE) - state = TASK_RUNNING; + state = TASK_RUNNING | TASK_STATE_MAX; #endif return state; @@ -137,13 +137,14 @@ TRACE_EVENT(sched_switch, __entry->next_prio = next->prio; ), - TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->prev_state ? - __print_flags(__entry->prev_state, "|", + __entry->prev_state & (TASK_STATE_MAX-1) ? + __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, { 128, "W" }) : "R", + __entry->prev_state & TASK_STATE_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); -- cgit From 53b615ccca567ada1931eb04ad0614ac150c14a3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 27 Sep 2011 22:53:27 +0200 Subject: PM / Runtime: Introduce trace points for tracing rpm_* functions This patch introduces 3 trace points to prepare for tracing rpm_idle/rpm_suspend/rpm_resume functions, so we can use these trace points to replace the current dev_dbg(). Signed-off-by: Ming Lei Acked-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki --- include/trace/events/rpm.h | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 include/trace/events/rpm.h (limited to 'include/trace/events') diff --git a/include/trace/events/rpm.h b/include/trace/events/rpm.h new file mode 100644 index 000000000000..d62c558bf64b --- /dev/null +++ b/include/trace/events/rpm.h @@ -0,0 +1,99 @@ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rpm + +#if !defined(_TRACE_RUNTIME_POWER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RUNTIME_POWER_H + +#include +#include +#include + +/* + * The rpm_internal events are used for tracing some important + * runtime pm internal functions. + */ +DECLARE_EVENT_CLASS(rpm_internal, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags), + + TP_STRUCT__entry( + __string( name, dev_name(dev) ) + __field( int, flags ) + __field( int , usage_count ) + __field( int , disable_depth ) + __field( int , runtime_auto ) + __field( int , request_pending ) + __field( int , irq_safe ) + __field( int , child_count ) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->flags = flags; + __entry->usage_count = atomic_read( + &dev->power.usage_count); + __entry->disable_depth = dev->power.disable_depth; + __entry->runtime_auto = dev->power.runtime_auto; + __entry->request_pending = dev->power.request_pending; + __entry->irq_safe = dev->power.irq_safe; + __entry->child_count = atomic_read( + &dev->power.child_count); + ), + + TP_printk("%s flags-%x cnt-%-2d dep-%-2d auto-%-1d p-%-1d" + " irq-%-1d child-%d", + __get_str(name), __entry->flags, + __entry->usage_count, + __entry->disable_depth, + __entry->runtime_auto, + __entry->request_pending, + __entry->irq_safe, + __entry->child_count + ) +); +DEFINE_EVENT(rpm_internal, rpm_suspend, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags) +); +DEFINE_EVENT(rpm_internal, rpm_resume, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags) +); +DEFINE_EVENT(rpm_internal, rpm_idle, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags) +); + +TRACE_EVENT(rpm_return_int, + TP_PROTO(struct device *dev, unsigned long ip, int ret), + TP_ARGS(dev, ip, ret), + + TP_STRUCT__entry( + __string( name, dev_name(dev)) + __field( unsigned long, ip ) + __field( int, ret ) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->ip = ip; + __entry->ret = ret; + ), + + TP_printk("%pS:%s ret=%d", (void *)__entry->ip, __get_str(name), + __entry->ret) +); + +#endif /* _TRACE_RUNTIME_POWER_H */ + +/* This part must be outside protection */ +#include -- cgit From 29c00b4a1d9e277786120032aa8364631820d863 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jun 2011 15:53:19 -0700 Subject: rcu: Add event-tracing for RCU callback invocation There was recently some controversy about the overhead of invoking RCU callbacks. Add TRACE_EVENT()s to obtain fine-grained timings for the start and stop of a batch of callbacks and also for each callback invoked. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 include/trace/events/rcu.h (limited to 'include/trace/events') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h new file mode 100644 index 000000000000..db3f6e9e63e6 --- /dev/null +++ b/include/trace/events/rcu.h @@ -0,0 +1,98 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rcu + +#if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RCU_H + +#include + +/* + * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + */ +TRACE_EVENT(rcu_batch_start, + + TP_PROTO(long callbacks_ready, int blimit), + + TP_ARGS(callbacks_ready, blimit), + + TP_STRUCT__entry( + __field( long, callbacks_ready ) + __field( int, blimit ) + ), + + TP_fast_assign( + __entry->callbacks_ready = callbacks_ready; + __entry->blimit = blimit; + ), + + TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) +); + +/* + * Tracepoint for the invocation of a single RCU callback + */ +TRACE_EVENT(rcu_invoke_callback, + + TP_PROTO(struct rcu_head *rhp), + + TP_ARGS(rhp), + + TP_STRUCT__entry( + __field( void *, rhp ) + __field( void *, func ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->func = rhp->func; + ), + + TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) +); + +/* + * Tracepoint for the invocation of a single RCU kfree callback + */ +TRACE_EVENT(rcu_invoke_kfree_callback, + + TP_PROTO(struct rcu_head *rhp, unsigned long offset), + + TP_ARGS(rhp, offset), + + TP_STRUCT__entry( + __field(void *, rhp ) + __field(unsigned long, offset ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->offset = offset; + ), + + TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) +); + +/* + * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + */ +TRACE_EVENT(rcu_batch_end, + + TP_PROTO(int callbacks_invoked), + + TP_ARGS(callbacks_invoked), + + TP_STRUCT__entry( + __field( int, callbacks_invoked ) + ), + + TP_fast_assign( + __entry->callbacks_invoked = callbacks_invoked; + ), + + TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) +); + +#endif /* _TRACE_RCU_H */ + +/* This part must be outside protection */ +#include -- cgit From 300df91ca9358f7f09298eec9503c12b32054ef7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Jun 2011 22:26:31 -0700 Subject: rcu: Event-trace markers for computing RCU CPU utilization This commit adds the trace_rcu_utilization() marker that is to be used to allow postprocessing scripts compute RCU's CPU utilization, give or take event-trace overhead. Note that we do not include RCU's dyntick-idle interface because event tracing requires RCU protection, which is not available in dyntick-idle mode. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 73 +++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 20 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index db3f6e9e63e6..ab458eb689fb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -7,29 +7,58 @@ #include /* - * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + * Tracepoint for start/end markers used for utilization calculations. + * By convention, the string is of the following forms: + * + * "Start " -- Mark the start of the specified activity, + * such as "context switch". Nesting is permitted. + * "End " -- Mark the end of the specified activity. + */ +TRACE_EVENT(rcu_utilization, + + TP_PROTO(char *s), + + TP_ARGS(s), + + TP_STRUCT__entry( + __field(char *, s) + ), + + TP_fast_assign( + __entry->s = s; + ), + + TP_printk("%s", __entry->s) +); + +/* + * Tracepoint for marking the beginning rcu_do_batch, performed to start + * RCU callback invocation. The first argument is the total number of + * callbacks (including those that are not yet ready to be invoked), + * and the second argument is the current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long callbacks_ready, int blimit), + TP_PROTO(long qlen, int blimit), - TP_ARGS(callbacks_ready, blimit), + TP_ARGS(qlen, blimit), TP_STRUCT__entry( - __field( long, callbacks_ready ) - __field( int, blimit ) + __field(long, qlen) + __field(int, blimit) ), TP_fast_assign( - __entry->callbacks_ready = callbacks_ready; - __entry->blimit = blimit; + __entry->qlen = qlen; + __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) + TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) ); /* - * Tracepoint for the invocation of a single RCU callback + * Tracepoint for the invocation of a single RCU callback function. + * The argument is a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, @@ -38,20 +67,23 @@ TRACE_EVENT(rcu_invoke_callback, TP_ARGS(rhp), TP_STRUCT__entry( - __field( void *, rhp ) - __field( void *, func ) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( - __entry->rhp = rhp; - __entry->func = rhp->func; + __entry->rhp = rhp; + __entry->func = rhp->func; ), TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) ); /* - * Tracepoint for the invocation of a single RCU kfree callback + * Tracepoint for the invocation of a single RCU callback of the special + * kfree() form. The first argument is a pointer to the RCU callback + * and the second argument is the offset of the callback within the + * enclosing RCU-protected data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, @@ -60,12 +92,12 @@ TRACE_EVENT(rcu_invoke_kfree_callback, TP_ARGS(rhp, offset), TP_STRUCT__entry( - __field(void *, rhp ) - __field(unsigned long, offset ) + __field(void *, rhp) + __field(unsigned long, offset) ), TP_fast_assign( - __entry->rhp = rhp; + __entry->rhp = rhp; __entry->offset = offset; ), @@ -73,7 +105,8 @@ TRACE_EVENT(rcu_invoke_kfree_callback, ); /* - * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + * Tracepoint for exiting rcu_do_batch after RCU callbacks have been + * invoked. The first argument is the number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, @@ -82,11 +115,11 @@ TRACE_EVENT(rcu_batch_end, TP_ARGS(callbacks_invoked), TP_STRUCT__entry( - __field( int, callbacks_invoked ) + __field(int, callbacks_invoked) ), TP_fast_assign( - __entry->callbacks_invoked = callbacks_invoked; + __entry->callbacks_invoked = callbacks_invoked; ), TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) -- cgit From 72fe701b70e6ced35d734b676c13efbc8fc769a9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 01:14:54 -0700 Subject: rcu: Add RCU type to callback-invocation tracing Add a string to the rcu_batch_start() and rcu_batch_end() trace messages that indicates the RCU type ("rcu_sched", "rcu_bh", or "rcu_preempt"). The trace messages for the actual invocations themselves are not marked, as it should be clear from the rcu_batch_start() and rcu_batch_end() events before and after. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ab458eb689fb..508824e5a77d 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -33,27 +33,31 @@ TRACE_EVENT(rcu_utilization, /* * Tracepoint for marking the beginning rcu_do_batch, performed to start - * RCU callback invocation. The first argument is the total number of - * callbacks (including those that are not yet ready to be invoked), - * and the second argument is the current RCU-callback batch limit. + * RCU callback invocation. The first argument is the RCU flavor, + * the second is the total number of callbacks (including those that + * are not yet ready to be invoked), and the third argument is the + * current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen, int blimit), - TP_ARGS(qlen, blimit), + TP_ARGS(rcuname, qlen, blimit), TP_STRUCT__entry( + __field(char *, rcuname) __field(long, qlen) __field(int, blimit) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->qlen = qlen; __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) + TP_printk("%s CBs=%ld bl=%d", + __entry->rcuname, __entry->qlen, __entry->blimit) ); /* @@ -106,23 +110,27 @@ TRACE_EVENT(rcu_invoke_kfree_callback, /* * Tracepoint for exiting rcu_do_batch after RCU callbacks have been - * invoked. The first argument is the number of callbacks actually invoked. + * invoked. The first argument is the name of the RCU flavor and + * the second argument is number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, - TP_PROTO(int callbacks_invoked), + TP_PROTO(char *rcuname, int callbacks_invoked), - TP_ARGS(callbacks_invoked), + TP_ARGS(rcuname, callbacks_invoked), TP_STRUCT__entry( + __field(char *, rcuname) __field(int, callbacks_invoked) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->callbacks_invoked = callbacks_invoked; ), - TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) + TP_printk("%s CBs-invoked=%d", + __entry->rcuname, __entry->callbacks_invoked) ); #endif /* _TRACE_RCU_H */ -- cgit From 385680a9487d2f85382ad6d74e2a15837e47bfd9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 22:43:26 -0700 Subject: rcu: Add event-trace markers to TREE_RCU kthreads Add event-trace markers to TREE_RCU kthreads to allow including these kthread's CPU time in the utilization calculations. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 508824e5a77d..ac52aba00a3e 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -13,6 +13,9 @@ * "Start " -- Mark the start of the specified activity, * such as "context switch". Nesting is permitted. * "End " -- Mark the end of the specified activity. + * + * An "@" character within "" is a comment character: Data + * reduction scripts will ignore the "@" and the remainder of the line. */ TRACE_EVENT(rcu_utilization, -- cgit From d4c08f2ac311a360230eef7e5395b0ec8d8f0670 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Jun 2011 06:36:56 -0700 Subject: rcu: Add grace-period, quiescent-state, and call_rcu trace events Add trace events to record grace-period start and end, quiescent states, CPUs noticing grace-period start and end, grace-period initialization, call_rcu() invocation, tasks blocking in RCU read-side critical sections, tasks exiting those same critical sections, force_quiescent_state() detection of dyntick-idle and offline CPUs, CPUs entering and leaving dyntick-idle mode (except from NMIs), CPUs coming online and going offline, and CPUs being kicked for staying in dyntick-idle mode for too long (as in many weeks, even on 32-bit systems). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney rcu: Add the rcu flavor to callback trace events The earlier trace events for registering RCU callbacks and for invoking them did not include the RCU flavor (rcu_bh, rcu_preempt, or rcu_sched). This commit adds the RCU flavor to those trace events. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 345 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 331 insertions(+), 14 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ac52aba00a3e..669fbd62ec25 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -24,7 +24,7 @@ TRACE_EVENT(rcu_utilization, TP_ARGS(s), TP_STRUCT__entry( - __field(char *, s) + __field(char *, s) ), TP_fast_assign( @@ -34,6 +34,297 @@ TRACE_EVENT(rcu_utilization, TP_printk("%s", __entry->s) ); +#ifdef CONFIG_RCU_TRACE + +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) + +/* + * Tracepoint for grace-period events: starting and ending a grace + * period ("start" and "end", respectively), a CPU noting the start + * of a new grace period or the end of an old grace period ("cpustart" + * and "cpuend", respectively), a CPU passing through a quiescent + * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" + * and "cpuofl", respectively), and a CPU being kicked for being too + * long in dyntick-idle mode ("kick"). + */ +TRACE_EVENT(rcu_grace_period, + + TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), + + TP_ARGS(rcuname, gpnum, gpevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(char *, gpevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->gpevent = gpevent; + ), + + TP_printk("%s %lu %s", + __entry->rcuname, __entry->gpnum, __entry->gpevent) +); + +/* + * Tracepoint for grace-period-initialization events. These are + * distinguished by the type of RCU, the new grace-period number, the + * rcu_node structure level, the starting and ending CPU covered by the + * rcu_node structure, and the mask of CPUs that will be waited for. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_grace_period_init, + + TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, + int grplo, int grphi, unsigned long qsmask), + + TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(unsigned long, qsmask) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->qsmask = qsmask; + ), + + TP_printk("%s %lu %u %d %d %lx", + __entry->rcuname, __entry->gpnum, __entry->level, + __entry->grplo, __entry->grphi, __entry->qsmask) +); + +/* + * Tracepoint for tasks blocking within preemptible-RCU read-side + * critical sections. Track the type of RCU (which one day might + * include SRCU), the grace-period number that the task is blocking + * (the current or the next), and the task's PID. + */ +TRACE_EVENT(rcu_preempt_task, + + TP_PROTO(char *rcuname, int pid, unsigned long gpnum), + + TP_ARGS(rcuname, pid, gpnum), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", + __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for tasks that blocked within a given preemptible-RCU + * read-side critical section exiting that critical section. Track the + * type of RCU (which one day might include SRCU) and the task's PID. + */ +TRACE_EVENT(rcu_unlock_preempted_task, + + TP_PROTO(char *rcuname, unsigned long gpnum, int pid), + + TP_ARGS(rcuname, gpnum, pid), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for quiescent-state-reporting events. These are + * distinguished by the type of RCU, the grace-period number, the + * mask of quiescent lower-level entities, the rcu_node structure level, + * the starting and ending CPU covered by the rcu_node structure, and + * whether there are any blocked tasks blocking the current grace period. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_quiescent_state_report, + + TP_PROTO(char *rcuname, unsigned long gpnum, + unsigned long mask, unsigned long qsmask, + u8 level, int grplo, int grphi, int gp_tasks), + + TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(unsigned long, mask) + __field(unsigned long, qsmask) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(u8, gp_tasks) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->mask = mask; + __entry->qsmask = qsmask; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->gp_tasks = gp_tasks; + ), + + TP_printk("%s %lu %lx>%lx %u %d %d %u", + __entry->rcuname, __entry->gpnum, + __entry->mask, __entry->qsmask, __entry->level, + __entry->grplo, __entry->grphi, __entry->gp_tasks) +); + +/* + * Tracepoint for quiescent states detected by force_quiescent_state(). + * These trace events include the type of RCU, the grace-period number + * that was blocked by the CPU, the CPU itself, and the type of quiescent + * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, + * or "kick" when kicking a CPU that has been in dyntick-idle mode for + * too long. + */ +TRACE_EVENT(rcu_fqs, + + TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), + + TP_ARGS(rcuname, gpnum, cpu, qsevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, cpu) + __field(char *, qsevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->cpu = cpu; + __entry->qsevent = qsevent; + ), + + TP_printk("%s %lu %d %s", + __entry->rcuname, __entry->gpnum, + __entry->cpu, __entry->qsevent) +); + +#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ + +/* + * Tracepoint for dyntick-idle entry/exit events. These take a string + * as argument: "Start" for entering dyntick-idle mode and "End" for + * leaving it. + */ +TRACE_EVENT(rcu_dyntick, + + TP_PROTO(char *polarity), + + TP_ARGS(polarity), + + TP_STRUCT__entry( + __field(char *, polarity) + ), + + TP_fast_assign( + __entry->polarity = polarity; + ), + + TP_printk("%s", __entry->polarity) +); + +/* + * Tracepoint for the registration of a single RCU callback function. + * The first argument is the type of RCU, the second argument is + * a pointer to the RCU callback itself, and the third element is the + * new RCU callback queue length for the current CPU. + */ +TRACE_EVENT(rcu_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), + + TP_ARGS(rcuname, rhp, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->func = rhp->func; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%pf %ld", + __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) +); + +/* + * Tracepoint for the registration of a single RCU callback of the special + * kfree() form. The first argument is the RCU type, the second argument + * is a pointer to the RCU callback, the third argument is the offset + * of the callback within the enclosing RCU-protected data structure, + * and the fourth argument is the new RCU callback queue length for the + * current CPU. + */ +TRACE_EVENT(rcu_kfree_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, + long qlen), + + TP_ARGS(rcuname, rhp, offset, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(unsigned long, offset) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->offset = offset; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%ld %ld", + __entry->rcuname, __entry->rhp, __entry->offset, + __entry->qlen) +); + /* * Tracepoint for marking the beginning rcu_do_batch, performed to start * RCU callback invocation. The first argument is the RCU flavor, @@ -65,50 +356,58 @@ TRACE_EVENT(rcu_batch_start, /* * Tracepoint for the invocation of a single RCU callback function. - * The argument is a pointer to the RCU callback itself. + * The first argument is the type of RCU, and the second argument is + * a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, - TP_PROTO(struct rcu_head *rhp), + TP_PROTO(char *rcuname, struct rcu_head *rhp), - TP_ARGS(rhp), + TP_ARGS(rcuname, rhp), TP_STRUCT__entry( - __field(void *, rhp) - __field(void *, func) + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->func = rhp->func; ), - TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) + TP_printk("%s rhp=%p func=%pf", + __entry->rcuname, __entry->rhp, __entry->func) ); /* * Tracepoint for the invocation of a single RCU callback of the special - * kfree() form. The first argument is a pointer to the RCU callback - * and the second argument is the offset of the callback within the - * enclosing RCU-protected data structure. + * kfree() form. The first argument is the RCU flavor, the second + * argument is a pointer to the RCU callback, and the third argument + * is the offset of the callback within the enclosing RCU-protected + * data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, - TP_PROTO(struct rcu_head *rhp, unsigned long offset), + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), - TP_ARGS(rhp, offset), + TP_ARGS(rcuname, rhp, offset), TP_STRUCT__entry( - __field(void *, rhp) + __field(char *, rcuname) + __field(void *, rhp) __field(unsigned long, offset) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->offset = offset; ), - TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) + TP_printk("%s rhp=%p func=%ld", + __entry->rcuname, __entry->rhp, __entry->offset) ); /* @@ -136,6 +435,24 @@ TRACE_EVENT(rcu_batch_end, __entry->rcuname, __entry->callbacks_invoked) ); +#else /* #ifdef CONFIG_RCU_TRACE */ + +#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) +#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) +#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) +#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) +#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) +#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) +#define trace_rcu_dyntick(polarity) do { } while (0) +#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) +#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) +#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) +#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) +#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) +#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) + +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + #endif /* _TRACE_RCU_H */ /* This part must be outside protection */ -- cgit From 143dfe8611a63030ce0c79419dc362f7838be557 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 27 Aug 2010 18:45:12 -0600 Subject: writeback: IO-less balance_dirty_pages() As proposed by Chris, Dave and Jan, don't start foreground writeback IO inside balance_dirty_pages(). Instead, simply let it idle sleep for some time to throttle the dirtying task. In the mean while, kick off the per-bdi flusher thread to do background writeback IO. RATIONALS ========= - disk seeks on concurrent writeback of multiple inodes (Dave Chinner) If every thread doing writes and being throttled start foreground writeback, it leads to N IO submitters from at least N different inodes at the same time, end up with N different sets of IO being issued with potentially zero locality to each other, resulting in much lower elevator sort/merge efficiency and hence we seek the disk all over the place to service the different sets of IO. OTOH, if there is only one submission thread, it doesn't jump between inodes in the same way when congestion clears - it keeps writing to the same inode, resulting in large related chunks of sequential IOs being issued to the disk. This is more efficient than the above foreground writeback because the elevator works better and the disk seeks less. - lock contention and cache bouncing on concurrent IO submitters (Dave Chinner) With this patchset, the fs_mark benchmark on a 12-drive software RAID0 goes from CPU bound to IO bound, freeing "3-4 CPUs worth of spinlock contention". * "CPU usage has dropped by ~55%", "it certainly appears that most of the CPU time saving comes from the removal of contention on the inode_wb_list_lock" (IMHO at least 10% comes from the reduction of cacheline bouncing, because the new code is able to call much less frequently into balance_dirty_pages() and hence access the global page states) * the user space "App overhead" is reduced by 20%, by avoiding the cacheline pollution by the complex writeback code path * "for a ~5% throughput reduction", "the number of write IOs have dropped by ~25%", and the elapsed time reduced from 41:42.17 to 40:53.23. * On a simple test of 100 dd, it reduces the CPU %system time from 30% to 3%, and improves IO throughput from 38MB/s to 42MB/s. - IO size too small for fast arrays and too large for slow USB sticks The write_chunk used by current balance_dirty_pages() cannot be directly set to some large value (eg. 128MB) for better IO efficiency. Because it could lead to more than 1 second user perceivable stalls. Even the current 4MB write size may be too large for slow USB sticks. The fact that balance_dirty_pages() starts IO on itself couples the IO size to wait time, which makes it hard to do suitable IO size while keeping the wait time under control. Now it's possible to increase writeback chunk size proportional to the disk bandwidth. In a simple test of 50 dd's on XFS, 1-HDD, 3GB ram, the larger writeback size dramatically reduces the seek count to 1/10 (far beyond my expectation) and improves the write throughput by 24%. - long block time in balance_dirty_pages() hurts desktop responsiveness Many of us may have the experience: it often takes a couple of seconds or even long time to stop a heavy writing dd/cp/tar command with Ctrl-C or "kill -9". - IO pipeline broken by bumpy write() progress There are a broad class of "loop {read(buf); write(buf);}" applications whose read() pipeline will be under-utilized or even come to a stop if the write()s have long latencies _or_ don't progress in a constant rate. The current threshold based throttling inherently transfers the large low level IO completion fluctuations to bumpy application write()s, and further deteriorates with increasing number of dirtiers and/or bdi's. For example, when doing 50 dd's + 1 remote rsync to an XFS partition, the rsync progresses very bumpy in legacy kernel, and throughput is improved by 67% by this patchset. (plus the larger write chunk size, it will be 93% speedup). The new rate based throttling can support 1000+ dd's with excellent smoothness, low latency and low overheads. For the above reasons, it's much better to do IO-less and low latency pauses in balance_dirty_pages(). Jan Kara, Dave Chinner and me explored the scheme to let balance_dirty_pages() wait for enough writeback IO completions to safeguard the dirty limit. However it's found to have two problems: - in large NUMA systems, the per-cpu counters may have big accounting errors, leading to big throttle wait time and jitters. - NFS may kill large amount of unstable pages with one single COMMIT. Because NFS server serves COMMIT with expensive fsync() IOs, it is desirable to delay and reduce the number of COMMITs. So it's not likely to optimize away such kind of bursty IO completions, and the resulted large (and tiny) stall times in IO completion based throttling. So here is a pause time oriented approach, which tries to control the pause time in each balance_dirty_pages() invocations, by controlling the number of pages dirtied before calling balance_dirty_pages(), for smooth and efficient dirty throttling: - avoid useless (eg. zero pause time) balance_dirty_pages() calls - avoid too small pause time (less than 4ms, which burns CPU power) - avoid too large pause time (more than 200ms, which hurts responsiveness) - avoid big fluctuations of pause times It can control pause times at will. The default policy (in a followup patch) will be to do ~10ms pauses in 1-dd case, and increase to ~100ms in 1000-dd case. BEHAVIOR CHANGE =============== (1) dirty threshold Users will notice that the applications will get throttled once crossing the global (background + dirty)/2=15% threshold, and then balanced around 17.5%. Before patch, the behavior is to just throttle it at 20% dirtyable memory in 1-dd case. Since the task will be soft throttled earlier than before, it may be perceived by end users as performance "slow down" if his application happens to dirty more than 15% dirtyable memory. (2) smoothness/responsiveness Users will notice a more responsive system during heavy writeback. "killall dd" will take effect instantly. Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 5f172703eb4f..178c23508d3d 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -104,30 +104,6 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_register); DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); DEFINE_WRITEBACK_EVENT(writeback_thread_start); DEFINE_WRITEBACK_EVENT(writeback_thread_stop); -DEFINE_WRITEBACK_EVENT(balance_dirty_start); -DEFINE_WRITEBACK_EVENT(balance_dirty_wait); - -TRACE_EVENT(balance_dirty_written, - - TP_PROTO(struct backing_dev_info *bdi, int written), - - TP_ARGS(bdi, written), - - TP_STRUCT__entry( - __array(char, name, 32) - __field(int, written) - ), - - TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); - __entry->written = written; - ), - - TP_printk("bdi %s written %d", - __entry->name, - __entry->written - ) -); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), -- cgit From 348b59012e5c6402741d067cf6eeeb6271999d06 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 7 Aug 2011 00:46:59 +0530 Subject: net/9p: Convert net/9p protocol dumps to tracepoints This helps in more control over debugging. root@qemu-img-64:~# ls /pass/123 ls: cannot access /pass/123: No such file or directory root@qemu-img-64:~# cat /sys/kernel/debug/tracing/trace # tracer: nop # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | ls-1536 [001] 70.928584: 9p_protocol_dump: clnt 18446612132784021504 P9_TWALK(tag = 1) 000: 16 00 00 00 6e 01 00 01 00 00 00 02 00 00 00 01 010: 00 03 00 31 32 33 00 00 00 ff ff ff ff 00 00 00 ls-1536 [001] 70.928587: => trace_9p_protocol_dump => p9pdu_finalize => p9_client_rpc => p9_client_walk => v9fs_vfs_lookup => d_alloc_and_lookup => walk_component => path_lookupat ls-1536 [000] 70.929696: 9p_protocol_dump: clnt 18446612132784021504 P9_RLERROR(tag = 1) 000: 0b 00 00 00 07 01 00 02 00 00 00 4e 03 00 02 00 010: 00 00 00 00 03 00 02 00 00 00 00 00 ff 43 00 00 ls-1536 [000] 70.929697: => trace_9p_protocol_dump => p9_client_rpc => p9_client_walk => v9fs_vfs_lookup => d_alloc_and_lookup => walk_component => path_lookupat => do_path_lookup Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- include/trace/events/9p.h | 176 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 include/trace/events/9p.h (limited to 'include/trace/events') diff --git a/include/trace/events/9p.h b/include/trace/events/9p.h new file mode 100644 index 000000000000..beeaed8398ec --- /dev/null +++ b/include/trace/events/9p.h @@ -0,0 +1,176 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM 9p + +#if !defined(_TRACE_9P_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_9P_H + +#include + +#define show_9p_op(type) \ + __print_symbolic(type, \ + { P9_TLERROR, "P9_TLERROR" }, \ + { P9_RLERROR, "P9_RLERROR" }, \ + { P9_TSTATFS, "P9_TSTATFS" }, \ + { P9_RSTATFS, "P9_RSTATFS" }, \ + { P9_TLOPEN, "P9_TLOPEN" }, \ + { P9_RLOPEN, "P9_RLOPEN" }, \ + { P9_TLCREATE, "P9_TLCREATE" }, \ + { P9_RLCREATE, "P9_RLCREATE" }, \ + { P9_TSYMLINK, "P9_TSYMLINK" }, \ + { P9_RSYMLINK, "P9_RSYMLINK" }, \ + { P9_TMKNOD, "P9_TMKNOD" }, \ + { P9_RMKNOD, "P9_RMKNOD" }, \ + { P9_TRENAME, "P9_TRENAME" }, \ + { P9_RRENAME, "P9_RRENAME" }, \ + { P9_TREADLINK, "P9_TREADLINK" }, \ + { P9_RREADLINK, "P9_RREADLINK" }, \ + { P9_TGETATTR, "P9_TGETATTR" }, \ + { P9_RGETATTR, "P9_RGETATTR" }, \ + { P9_TSETATTR, "P9_TSETATTR" }, \ + { P9_RSETATTR, "P9_RSETATTR" }, \ + { P9_TXATTRWALK, "P9_TXATTRWALK" }, \ + { P9_RXATTRWALK, "P9_RXATTRWALK" }, \ + { P9_TXATTRCREATE, "P9_TXATTRCREATE" }, \ + { P9_RXATTRCREATE, "P9_RXATTRCREATE" }, \ + { P9_TREADDIR, "P9_TREADDIR" }, \ + { P9_RREADDIR, "P9_RREADDIR" }, \ + { P9_TFSYNC, "P9_TFSYNC" }, \ + { P9_RFSYNC, "P9_RFSYNC" }, \ + { P9_TLOCK, "P9_TLOCK" }, \ + { P9_RLOCK, "P9_RLOCK" }, \ + { P9_TGETLOCK, "P9_TGETLOCK" }, \ + { P9_RGETLOCK, "P9_RGETLOCK" }, \ + { P9_TLINK, "P9_TLINK" }, \ + { P9_RLINK, "P9_RLINK" }, \ + { P9_TMKDIR, "P9_TMKDIR" }, \ + { P9_RMKDIR, "P9_RMKDIR" }, \ + { P9_TRENAMEAT, "P9_TRENAMEAT" }, \ + { P9_RRENAMEAT, "P9_RRENAMEAT" }, \ + { P9_TUNLINKAT, "P9_TUNLINKAT" }, \ + { P9_RUNLINKAT, "P9_RUNLINKAT" }, \ + { P9_TVERSION, "P9_TVERSION" }, \ + { P9_RVERSION, "P9_RVERSION" }, \ + { P9_TAUTH, "P9_TAUTH" }, \ + { P9_RAUTH, "P9_RAUTH" }, \ + { P9_TATTACH, "P9_TATTACH" }, \ + { P9_RATTACH, "P9_RATTACH" }, \ + { P9_TERROR, "P9_TERROR" }, \ + { P9_RERROR, "P9_RERROR" }, \ + { P9_TFLUSH, "P9_TFLUSH" }, \ + { P9_RFLUSH, "P9_RFLUSH" }, \ + { P9_TWALK, "P9_TWALK" }, \ + { P9_RWALK, "P9_RWALK" }, \ + { P9_TOPEN, "P9_TOPEN" }, \ + { P9_ROPEN, "P9_ROPEN" }, \ + { P9_TCREATE, "P9_TCREATE" }, \ + { P9_RCREATE, "P9_RCREATE" }, \ + { P9_TREAD, "P9_TREAD" }, \ + { P9_RREAD, "P9_RREAD" }, \ + { P9_TWRITE, "P9_TWRITE" }, \ + { P9_RWRITE, "P9_RWRITE" }, \ + { P9_TCLUNK, "P9_TCLUNK" }, \ + { P9_RCLUNK, "P9_RCLUNK" }, \ + { P9_TREMOVE, "P9_TREMOVE" }, \ + { P9_RREMOVE, "P9_RREMOVE" }, \ + { P9_TSTAT, "P9_TSTAT" }, \ + { P9_RSTAT, "P9_RSTAT" }, \ + { P9_TWSTAT, "P9_TWSTAT" }, \ + { P9_RWSTAT, "P9_RWSTAT" }) + +TRACE_EVENT(9p_client_req, + TP_PROTO(struct p9_client *clnt, int8_t type, int tag), + + TP_ARGS(clnt, type, tag), + + TP_STRUCT__entry( + __field( void *, clnt ) + __field( __u8, type ) + __field( __u32, tag ) + ), + + TP_fast_assign( + __entry->clnt = clnt; + __entry->type = type; + __entry->tag = tag; + ), + + TP_printk("client %lu request %s tag %d", + (long)__entry->clnt, show_9p_op(__entry->type), + __entry->tag) + ); + +TRACE_EVENT(9p_client_res, + TP_PROTO(struct p9_client *clnt, int8_t type, int tag, int err), + + TP_ARGS(clnt, type, tag, err), + + TP_STRUCT__entry( + __field( void *, clnt ) + __field( __u8, type ) + __field( __u32, tag ) + __field( __u32, err ) + ), + + TP_fast_assign( + __entry->clnt = clnt; + __entry->type = type; + __entry->tag = tag; + __entry->err = err; + ), + + TP_printk("client %lu response %s tag %d err %d", + (long)__entry->clnt, show_9p_op(__entry->type), + __entry->tag, __entry->err) +); + +/* dump 32 bytes of protocol data */ +#define P9_PROTO_DUMP_SZ 32 +TRACE_EVENT(9p_protocol_dump, + TP_PROTO(struct p9_client *clnt, struct p9_fcall *pdu), + + TP_ARGS(clnt, pdu), + + TP_STRUCT__entry( + __field( void *, clnt ) + __field( __u8, type ) + __field( __u16, tag ) + __array( unsigned char, line, P9_PROTO_DUMP_SZ ) + ), + + TP_fast_assign( + __entry->clnt = clnt; + __entry->type = pdu->id; + __entry->tag = pdu->tag; + memcpy(__entry->line, pdu->sdata, P9_PROTO_DUMP_SZ); + ), + TP_printk("clnt %lu %s(tag = %d)\n%.3x: " + "%02x %02x %02x %02x %02x %02x %02x %02x " + "%02x %02x %02x %02x %02x %02x %02x %02x\n" + "%.3x: " + "%02x %02x %02x %02x %02x %02x %02x %02x " + "%02x %02x %02x %02x %02x %02x %02x %02x\n", + (long)__entry->clnt, show_9p_op(__entry->type), + __entry->tag, 0, + __entry->line[0], __entry->line[1], + __entry->line[2], __entry->line[3], + __entry->line[4], __entry->line[5], + __entry->line[6], __entry->line[7], + __entry->line[8], __entry->line[9], + __entry->line[10], __entry->line[11], + __entry->line[12], __entry->line[13], + __entry->line[14], __entry->line[15], + 16, + __entry->line[16], __entry->line[17], + __entry->line[18], __entry->line[19], + __entry->line[20], __entry->line[21], + __entry->line[22], __entry->line[23], + __entry->line[24], __entry->line[25], + __entry->line[26], __entry->line[27], + __entry->line[28], __entry->line[29], + __entry->line[30], __entry->line[31]) + ); + +#endif /* _TRACE_9P_H */ + +/* This part must be outside protection */ +#include -- cgit From 6f91bc5fda82d2c49b4f7fb29424cf6a3c7574bc Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 27 Oct 2011 11:43:23 -0400 Subject: ext4: optimize ext4_ext_convert_to_initialized() This patch introduces a fast path in ext4_ext_convert_to_initialized() for the case when the conversion can be performed by transferring the newly initialized blocks from the uninitialized extent into an adjacent initialized extent. Doing so removes the expensive invocations of memmove() which occur during extent insertion and the subsequent merge. In practice this should be the common case for clients performing append writes into files pre-allocated via fallocate(FALLOC_FL_KEEP_SIZE). In such a workload performed via direct IO and when using a suboptimal implementation of memmove() (x86_64 prior to the 2.6.39 rewrite), this patch reduces kernel CPU consumption by 32%. Two new trace points are added to ext4_ext_convert_to_initialized() to offer visibility into its operations. No exit trace point has been added due to the multiplicity of return points. This can be revisited once the upstream cleanup is backported. Signed-off-by: Eric Gouriou Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 82 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index c9a341e385a3..748ff7cbe555 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -9,6 +9,7 @@ struct ext4_allocation_context; struct ext4_allocation_request; +struct ext4_extent; struct ext4_prealloc_space; struct ext4_inode_info; struct mpage_da_data; @@ -1394,6 +1395,87 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, TP_ARGS(inode) ); +/* 'ux' is the uninitialized extent. */ +TRACE_EVENT(ext4_ext_convert_to_initialized_enter, + TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, + struct ext4_extent *ux), + + TP_ARGS(inode, map, ux), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, m_lblk ) + __field( unsigned, m_len ) + __field( ext4_lblk_t, u_lblk ) + __field( unsigned, u_len ) + __field( ext4_fsblk_t, u_pblk ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->m_lblk = map->m_lblk; + __entry->m_len = map->m_len; + __entry->u_lblk = le32_to_cpu(ux->ee_block); + __entry->u_len = ext4_ext_get_actual_len(ux); + __entry->u_pblk = ext4_ext_pblock(ux); + ), + + TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u " + "u_pblk %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + __entry->m_lblk, __entry->m_len, + __entry->u_lblk, __entry->u_len, __entry->u_pblk) +); + +/* + * 'ux' is the uninitialized extent. + * 'ix' is the initialized extent to which blocks are transferred. + */ +TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath, + TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, + struct ext4_extent *ux, struct ext4_extent *ix), + + TP_ARGS(inode, map, ux, ix), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ext4_lblk_t, m_lblk ) + __field( unsigned, m_len ) + __field( ext4_lblk_t, u_lblk ) + __field( unsigned, u_len ) + __field( ext4_fsblk_t, u_pblk ) + __field( ext4_lblk_t, i_lblk ) + __field( unsigned, i_len ) + __field( ext4_fsblk_t, i_pblk ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->m_lblk = map->m_lblk; + __entry->m_len = map->m_len; + __entry->u_lblk = le32_to_cpu(ux->ee_block); + __entry->u_len = ext4_ext_get_actual_len(ux); + __entry->u_pblk = ext4_ext_pblock(ux); + __entry->i_lblk = le32_to_cpu(ix->ee_block); + __entry->i_len = ext4_ext_get_actual_len(ix); + __entry->i_pblk = ext4_ext_pblock(ix); + ), + + TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u " + "u_lblk %u u_len %u u_pblk %llu " + "i_lblk %u i_len %u i_pblk %llu ", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + __entry->m_lblk, __entry->m_len, + __entry->u_lblk, __entry->u_len, __entry->u_pblk, + __entry->i_lblk, __entry->i_len, __entry->i_pblk) +); + DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, unsigned int flags), -- cgit From b48c104d2211b0ac881a71f5f76a3816225f8111 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 2 Mar 2011 17:22:49 -0600 Subject: writeback: trace event bdi_dirty_ratelimit It helps understand how various throttle bandwidths are updated. Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 178c23508d3d..ffb5deb77ca9 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -226,6 +226,51 @@ TRACE_EVENT(global_dirty_state, ) ); +#define KBps(x) ((x) << (PAGE_SHIFT - 10)) + +TRACE_EVENT(bdi_dirty_ratelimit, + + TP_PROTO(struct backing_dev_info *bdi, + unsigned long dirty_rate, + unsigned long task_ratelimit), + + TP_ARGS(bdi, dirty_rate, task_ratelimit), + + TP_STRUCT__entry( + __array(char, bdi, 32) + __field(unsigned long, write_bw) + __field(unsigned long, avg_write_bw) + __field(unsigned long, dirty_rate) + __field(unsigned long, dirty_ratelimit) + __field(unsigned long, task_ratelimit) + __field(unsigned long, balanced_dirty_ratelimit) + ), + + TP_fast_assign( + strlcpy(__entry->bdi, dev_name(bdi->dev), 32); + __entry->write_bw = KBps(bdi->write_bandwidth); + __entry->avg_write_bw = KBps(bdi->avg_write_bandwidth); + __entry->dirty_rate = KBps(dirty_rate); + __entry->dirty_ratelimit = KBps(bdi->dirty_ratelimit); + __entry->task_ratelimit = KBps(task_ratelimit); + __entry->balanced_dirty_ratelimit = + KBps(bdi->balanced_dirty_ratelimit); + ), + + TP_printk("bdi %s: " + "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " + "dirty_ratelimit=%lu task_ratelimit=%lu " + "balanced_dirty_ratelimit=%lu", + __entry->bdi, + __entry->write_bw, /* write bandwidth */ + __entry->avg_write_bw, /* avg write bandwidth */ + __entry->dirty_rate, /* bdi dirty rate */ + __entry->dirty_ratelimit, /* base ratelimit */ + __entry->task_ratelimit, /* ratelimit with position control */ + __entry->balanced_dirty_ratelimit /* the balanced ratelimit */ + ) +); + DECLARE_EVENT_CLASS(writeback_congest_waited_template, TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), -- cgit From ece13ac31bbe492d940ba0bc4ade2ae1521f46a5 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 29 Aug 2010 23:33:20 -0600 Subject: writeback: trace event balance_dirty_pages Useful for analyzing the dynamics of the throttling algorithms and debugging user reported problems. Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 73 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index ffb5deb77ca9..0ce9f06f58c2 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -271,6 +271,79 @@ TRACE_EVENT(bdi_dirty_ratelimit, ) ); +TRACE_EVENT(balance_dirty_pages, + + TP_PROTO(struct backing_dev_info *bdi, + unsigned long thresh, + unsigned long bg_thresh, + unsigned long dirty, + unsigned long bdi_thresh, + unsigned long bdi_dirty, + unsigned long dirty_ratelimit, + unsigned long task_ratelimit, + unsigned long dirtied, + long pause, + unsigned long start_time), + + TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, + dirty_ratelimit, task_ratelimit, + dirtied, pause, start_time), + + TP_STRUCT__entry( + __array( char, bdi, 32) + __field(unsigned long, limit) + __field(unsigned long, setpoint) + __field(unsigned long, dirty) + __field(unsigned long, bdi_setpoint) + __field(unsigned long, bdi_dirty) + __field(unsigned long, dirty_ratelimit) + __field(unsigned long, task_ratelimit) + __field(unsigned int, dirtied) + __field(unsigned int, dirtied_pause) + __field(unsigned long, paused) + __field( long, pause) + ), + + TP_fast_assign( + unsigned long freerun = (thresh + bg_thresh) / 2; + strlcpy(__entry->bdi, dev_name(bdi->dev), 32); + + __entry->limit = global_dirty_limit; + __entry->setpoint = (global_dirty_limit + freerun) / 2; + __entry->dirty = dirty; + __entry->bdi_setpoint = __entry->setpoint * + bdi_thresh / (thresh + 1); + __entry->bdi_dirty = bdi_dirty; + __entry->dirty_ratelimit = KBps(dirty_ratelimit); + __entry->task_ratelimit = KBps(task_ratelimit); + __entry->dirtied = dirtied; + __entry->dirtied_pause = current->nr_dirtied_pause; + __entry->pause = pause * 1000 / HZ; + __entry->paused = (jiffies - start_time) * 1000 / HZ; + ), + + + TP_printk("bdi %s: " + "limit=%lu setpoint=%lu dirty=%lu " + "bdi_setpoint=%lu bdi_dirty=%lu " + "dirty_ratelimit=%lu task_ratelimit=%lu " + "dirtied=%u dirtied_pause=%u " + "paused=%lu pause=%ld", + __entry->bdi, + __entry->limit, + __entry->setpoint, + __entry->dirty, + __entry->bdi_setpoint, + __entry->bdi_dirty, + __entry->dirty_ratelimit, + __entry->task_ratelimit, + __entry->dirtied, + __entry->dirtied_pause, + __entry->paused, /* ms */ + __entry->pause /* ms */ + ) +); + DECLARE_EVENT_CLASS(writeback_congest_waited_template, TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), -- cgit From ad4e38dd6a33bb3a4882c487d7abe621e583b982 Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Fri, 7 Oct 2011 21:51:56 -0600 Subject: writeback: send work item to queue_io, move_expired_inodes Instead of sending ->older_than_this to queue_io() and move_expired_inodes(), send the entire wb_writeback_work structure. There are other fields of a work item that are useful in these routines and in tracepoints. Acked-by: Jan Kara Signed-off-by: Curt Wohlgemuth Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 0ce9f06f58c2..1261db3916cc 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -157,9 +157,9 @@ DEFINE_WBC_EVENT(wbc_writepage); TRACE_EVENT(writeback_queue_io, TP_PROTO(struct bdi_writeback *wb, - unsigned long *older_than_this, + struct wb_writeback_work *work, int moved), - TP_ARGS(wb, older_than_this, moved), + TP_ARGS(wb, work, moved), TP_STRUCT__entry( __array(char, name, 32) __field(unsigned long, older) @@ -167,6 +167,7 @@ TRACE_EVENT(writeback_queue_io, __field(int, moved) ), TP_fast_assign( + unsigned long *older_than_this = work->older_than_this; strncpy(__entry->name, dev_name(wb->bdi->dev), 32); __entry->older = older_than_this ? *older_than_this : 0; __entry->age = older_than_this ? -- cgit From 0e175a1835ffc979e55787774e58ec79e41957d7 Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Fri, 7 Oct 2011 21:54:10 -0600 Subject: writeback: Add a 'reason' to wb_writeback_work This creates a new 'reason' field in a wb_writeback_work structure, which unambiguously identifies who initiates writeback activity. A 'wb_reason' enumeration has been added to writeback.h, to enumerate the possible reasons. The 'writeback_work_class' and tracepoint event class and 'writeback_queue_io' tracepoints are updated to include the symbolic 'reason' in all trace events. And the 'writeback_inodes_sbXXX' family of routines has had a wb_stats parameter added to them, so callers can specify why writeback is being started. Acked-by: Jan Kara Signed-off-by: Curt Wohlgemuth Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 1261db3916cc..b99caa8b780c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -34,6 +34,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(int, for_kupdate) __field(int, range_cyclic) __field(int, for_background) + __field(int, reason) ), TP_fast_assign( strncpy(__entry->name, dev_name(bdi->dev), 32); @@ -43,16 +44,18 @@ DECLARE_EVENT_CLASS(writeback_work_class, __entry->for_kupdate = work->for_kupdate; __entry->range_cyclic = work->range_cyclic; __entry->for_background = work->for_background; + __entry->reason = work->reason; ), TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " - "kupdate=%d range_cyclic=%d background=%d", + "kupdate=%d range_cyclic=%d background=%d reason=%s", __entry->name, MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), __entry->nr_pages, __entry->sync_mode, __entry->for_kupdate, __entry->range_cyclic, - __entry->for_background + __entry->for_background, + wb_reason_name[__entry->reason] ) ); #define DEFINE_WRITEBACK_WORK_EVENT(name) \ @@ -165,6 +168,7 @@ TRACE_EVENT(writeback_queue_io, __field(unsigned long, older) __field(long, age) __field(int, moved) + __field(int, reason) ), TP_fast_assign( unsigned long *older_than_this = work->older_than_this; @@ -173,12 +177,14 @@ TRACE_EVENT(writeback_queue_io, __entry->age = older_than_this ? (jiffies - *older_than_this) * 1000 / HZ : -1; __entry->moved = moved; + __entry->reason = work->reason; ), - TP_printk("bdi %s: older=%lu age=%ld enqueue=%d", + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s", __entry->name, __entry->older, /* older_than_this in jiffies */ __entry->age, /* older_than_this in relative milliseconds */ - __entry->moved) + __entry->moved, + wb_reason_name[__entry->reason]) ); TRACE_EVENT(global_dirty_state, -- cgit From de47725421ad5627a5c905f4e40bb844ebc06d29 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 26 May 2011 13:46:22 -0400 Subject: include: replace linux/module.h with "struct module" wherever possible The pretty much brings in the kitchen sink along with it, so it should be avoided wherever reasonably possible in terms of being included from other commonly used files, as it results in a measureable increase on compile times. The worst culprit was probably device.h since it is used everywhere. This file also had an implicit dependency/usage of mutex.h which was masked by module.h, and is also fixed here at the same time. There are over a dozen other headers that simply declare the struct instead of pulling in the whole file, so follow their lead and simply make it a few more. Most of the implicit dependencies on module.h being present by these headers pulling it in have been now weeded out, so we can finally make this change with hopefully minimal breakage. Signed-off-by: Paul Gortmaker --- include/trace/events/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace/events') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 21a546d27c0c..161932737416 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -1,6 +1,6 @@ /* * Because linux/module.h has tracepoints in the header, and ftrace.h - * eventually includes this file, define_trace.h includes linux/module.h + * used to include this file, define_trace.h includes linux/module.h * But we do not want the module.h to override the TRACE_SYSTEM macro * variable that define_trace.h is processing, so we only set it * when module events are being processed, which would happen when -- cgit From 4356f21d09283dc6d39a6f7287a65ddab61e2808 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:06:47 -0700 Subject: mm: change isolate mode from #define to bitwise type Change ISOLATE_XXX macro with bitwise isolate_mode_t type. Normally, macro isn't recommended as it's type-unsafe and making debugging harder as symbol cannot be passed throught to the debugger. Quote from Johannes " Hmm, it would probably be cleaner to fully convert the isolation mode into independent flags. INACTIVE, ACTIVE, BOTH is currently a tri-state among flags, which is a bit ugly." This patch moves isolate mode from swap.h to mmzone.h by memcontrol.h Signed-off-by: Minchan Kim Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Rik van Riel Cc: Michal Hocko Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 36851f7f13da..edc4b3d25a2d 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -266,7 +266,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), @@ -278,7 +278,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_taken) __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) - __field(int, isolate_mode) + __field(isolate_mode_t, isolate_mode) ), TP_fast_assign( @@ -312,7 +312,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) @@ -327,7 +327,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) -- cgit