From a9c42b33ed80968dd160e3be48c7e84ccf171cf9 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:00 -0700 Subject: dax: add tracepoints to dax_iomap_pte_fault() Patch series "second round of tracepoints for DAX". This second round of DAX tracepoint patches adds tracing to the PTE fault path (dax_iomap_pte_fault(), dax_pfn_mkwrite(), dax_load_hole(), dax_insert_mapping()) and to the writeback path (dax_writeback_mapping_range(), dax_writeback_one()). The purpose of this tracing is to give us a high level view of what DAX is doing, whether faults are being serviced by PMDs or PTEs, and by real storage or by zero pages covering holes. I do have some patches nearly ready which also add tracing to grab_mapping_entry() and dax_insert_mapping_entry(). These are more targeted at logging how we are interacting with the radix tree, how we use empty entries for locking, whether we "downgrade" huge zero pages to 4k PTE sized allocations, etc. In the end it seemed to me that this might be too detailed to have as constantly present tracepoints, but if anyone sees value in having tracepoints like this in the DAX code permanently (Jan?), please let me know and I'll add those last two patches. All these tracepoints were done to be consistent with the style of the XFS tracepoints and with the existing DAX PMD tracepoints. This patch (of 6): Add tracepoints to dax_iomap_pte_fault(), following the same logging conventions as the rest of DAX. Here is an example fault that initially tries to be serviced by the PMD fault handler but which falls back to PTEs because the VMA isn't large enough to hold a PMD: small-1086 [005] .... 71.140014: xfs_filemap_huge_fault: dev 259:0 ino 0x1003 small-1086 [005] .... 71.140027: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 vm_start 0x10200000 vm_end 0x10500000 pgoff 0x220 max_pgoff 0x1400 small-1086 [005] .... 71.140028: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 vm_start 0x10200000 vm_end 0x10500000 pgoff 0x220 max_pgoff 0x1400 FALLBACK small-1086 [005] .... 71.140035: dax_pte_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 small-1086 [005] .... 71.140396: dax_pte_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 MAJOR|NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/fs_dax.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index c566ddc87f73..cbcd7d64a18d 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -150,6 +150,47 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \ DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback); +DECLARE_EVENT_CLASS(dax_pte_fault_class, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), + TP_ARGS(inode, vmf, result), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(pgoff_t, pgoff) + __field(dev_t, dev) + __field(unsigned int, flags) + __field(int, result) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->flags = vmf->flags; + __entry->pgoff = vmf->pgoff; + __entry->result = result; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx pgoff %#lx %s", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE), + __entry->address, + __entry->pgoff, + __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE) + ) +) + +#define DEFINE_PTE_FAULT_EVENT(name) \ +DEFINE_EVENT(dax_pte_fault_class, name, \ + TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), \ + TP_ARGS(inode, vmf, result)) + +DEFINE_PTE_FAULT_EVENT(dax_pte_fault); +DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); + #endif /* _TRACE_FS_DAX_H */ /* This part must be outside protection */ -- cgit From c3ff68d7d1e6a24b7ad76d00ee583929858d4001 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:03 -0700 Subject: dax: add tracepoints to dax_pfn_mkwrite() Add tracepoints to dax_pfn_mkwrite(), following the same logging conventions as the rest of DAX. Here is an example PTE fault followed by a pfn_mkwrite: small_aligned-1094 [002] .... 374.084998: dax_pte_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10400000 pgoff 0x200 small_aligned-1094 [002] .... 374.085145: dax_pte_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10400000 pgoff 0x200 MAJOR|NOPAGE small_aligned-1094 [002] .... 374.085165: dax_pfn_mkwrite: dev 259:0 ino 0x1003 shared WRITE|MKWRITE|ALLOW_RETRY|KILLABLE|USER address 0x10400000 pgoff 0x200 NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-3-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/fs_dax.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index cbcd7d64a18d..b5a520961f8d 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -190,6 +190,8 @@ DEFINE_EVENT(dax_pte_fault_class, name, \ DEFINE_PTE_FAULT_EVENT(dax_pte_fault); DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); +DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); +DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); #endif /* _TRACE_FS_DAX_H */ -- cgit From 678c9fd0430a1431bd9901c76f41e04fcb3eac87 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:07 -0700 Subject: dax: add tracepoints to dax_load_hole() Add tracepoints to dax_load_hole(), following the same logging conventions as the rest of DAX. Here is the logging generated by a PTE read from a hole: read-1075 [002] .... 62.362108: dax_pte_fault: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10480000 pgoff 0x280 read-1075 [002] .... 62.362140: dax_load_hole: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10480000 pgoff 0x280 NOPAGE read-1075 [002] .... 62.362141: dax_pte_fault_done: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10480000 pgoff 0x280 NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-4-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/fs_dax.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index b5a520961f8d..2f15dfea7fb1 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -192,6 +192,7 @@ DEFINE_PTE_FAULT_EVENT(dax_pte_fault); DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); +DEFINE_PTE_FAULT_EVENT(dax_load_hole); #endif /* _TRACE_FS_DAX_H */ -- cgit From d14a3f48a152b75a1e690d443f509c07c7b06c0e Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:10 -0700 Subject: dax: add tracepoints to dax_writeback_mapping_range() Add tracepoints to dax_writeback_mapping_range(), following the same logging conventions as the rest of DAX. Here is an example writeback call: msync-1085 [006] .... 200.902565: dax_writeback_range: dev 259:0 ino 0x1003 pgoff 0x200-0x2ff msync-1085 [006] .... 200.902579: dax_writeback_range_done: dev 259:0 ino 0x1003 pgoff 0x200-0x2ff [ross.zwisler@linux.intel.com: fix regression in dax_writeback_mapping_range()] Link: http://lkml.kernel.org/r/20170314215358.31451-1-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/20170221195116.13278-5-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/fs_dax.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 2f15dfea7fb1..9afe8c8f0bef 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -194,6 +194,38 @@ DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); DEFINE_PTE_FAULT_EVENT(dax_load_hole); +DECLARE_EVENT_CLASS(dax_writeback_range_class, + TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index), + TP_ARGS(inode, start_index, end_index), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(pgoff_t, start_index) + __field(pgoff_t, end_index) + __field(dev_t, dev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->start_index = start_index; + __entry->end_index = end_index; + ), + TP_printk("dev %d:%d ino %#lx pgoff %#lx-%#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->start_index, + __entry->end_index + ) +) + +#define DEFINE_WRITEBACK_RANGE_EVENT(name) \ +DEFINE_EVENT(dax_writeback_range_class, name, \ + TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index),\ + TP_ARGS(inode, start_index, end_index)) + +DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range); +DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range_done); + #endif /* _TRACE_FS_DAX_H */ /* This part must be outside protection */ -- cgit From f9bc3a07539bc80b4da9ff2f5d6c13d5c7a4f073 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:13 -0700 Subject: dax: add tracepoint to dax_writeback_one() Add a tracepoint to dax_writeback_one(), following the same logging conventions as the rest of DAX. Here is an example range writeback which ends up flushing one PMD and one PTE: test-1265 [003] .... 496.615250: dax_writeback_range: dev 259:0 ino 0x1003 pgoff 0x0-0x7ffffffffffff test-1265 [003] .... 496.616263: dax_writeback_one: dev 259:0 ino 0x1003 pgoff 0x0 pglen 0x200 test-1265 [003] .... 496.616270: dax_writeback_one: dev 259:0 ino 0x1003 pgoff 0x305 pglen 0x1 test-1265 [003] .... 496.616272: dax_writeback_range_done: dev 259:0 ino 0x1003 pgoff 0x0-0x7ffffffffffff [akpm@linux-foundation.org: struct blk_dax_ctl has disappeared] Link: http://lkml.kernel.org/r/20170221195116.13278-6-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/fs_dax.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 9afe8c8f0bef..292a4719edd0 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -226,6 +226,30 @@ DEFINE_EVENT(dax_writeback_range_class, name, \ DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range); DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range_done); +TRACE_EVENT(dax_writeback_one, + TP_PROTO(struct inode *inode, pgoff_t pgoff, pgoff_t pglen), + TP_ARGS(inode, pgoff, pglen), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(pgoff_t, pgoff) + __field(pgoff_t, pglen) + __field(dev_t, dev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgoff = pgoff; + __entry->pglen = pglen; + ), + TP_printk("dev %d:%d ino %#lx pgoff %#lx pglen %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->pgoff, + __entry->pglen + ) +) + #endif /* _TRACE_FS_DAX_H */ /* This part must be outside protection */ -- cgit From b4440734583c3addf80558e8fde2b61e2d76328c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:16 -0700 Subject: dax: add tracepoint to dax_insert_mapping() Add a tracepoint to dax_insert_mapping(), following the same logging conventions as the rest of DAX. This tracepoint, along with the one in dax_load_hole(), lets us know how a DAX PTE fault was serviced. Here is an example DAX fault that inserts a PTE mapping: small-1126 [007] .... 145.451604: dax_pte_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 small-1126 [007] .... 145.452317: dax_insert_mapping: dev 259:0 ino 0x1003 shared write address 0x10420000 radix_entry 0x100006 small-1126 [007] .... 145.452399: dax_pte_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 MAJOR|NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-7-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/fs_dax.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 292a4719edd0..08bb3ed18dcc 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -194,6 +194,36 @@ DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); DEFINE_PTE_FAULT_EVENT(dax_load_hole); +TRACE_EVENT(dax_insert_mapping, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry), + TP_ARGS(inode, vmf, radix_entry), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(void *, radix_entry) + __field(dev_t, dev) + __field(int, write) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->write = vmf->flags & FAULT_FLAG_WRITE; + __entry->radix_entry = radix_entry; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx radix_entry %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __entry->write ? "write" : "read", + __entry->address, + (unsigned long)__entry->radix_entry + ) +) + DECLARE_EVENT_CLASS(dax_writeback_range_class, TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index), TP_ARGS(inode, start_index, end_index), -- cgit