aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoss Zwisler <ross.zwisler@linux.intel.com>2017-05-08 19:00:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-08 20:15:15 -0400
commita9c42b33ed80968dd160e3be48c7e84ccf171cf9 (patch)
tree3c2a9c3996433f5b6b93717e93b703ef2b5d026e
parentdcbe82149cc9d03dcdf7cd1a75d5541de7c14be1 (diff)
dax: add tracepoints to dax_iomap_pte_fault()
Patch series "second round of tracepoints for DAX". This second round of DAX tracepoint patches adds tracing to the PTE fault path (dax_iomap_pte_fault(), dax_pfn_mkwrite(), dax_load_hole(), dax_insert_mapping()) and to the writeback path (dax_writeback_mapping_range(), dax_writeback_one()). The purpose of this tracing is to give us a high level view of what DAX is doing, whether faults are being serviced by PMDs or PTEs, and by real storage or by zero pages covering holes. I do have some patches nearly ready which also add tracing to grab_mapping_entry() and dax_insert_mapping_entry(). These are more targeted at logging how we are interacting with the radix tree, how we use empty entries for locking, whether we "downgrade" huge zero pages to 4k PTE sized allocations, etc. In the end it seemed to me that this might be too detailed to have as constantly present tracepoints, but if anyone sees value in having tracepoints like this in the DAX code permanently (Jan?), please let me know and I'll add those last two patches. All these tracepoints were done to be consistent with the style of the XFS tracepoints and with the existing DAX PMD tracepoints. This patch (of 6): Add tracepoints to dax_iomap_pte_fault(), following the same logging conventions as the rest of DAX. Here is an example fault that initially tries to be serviced by the PMD fault handler but which falls back to PTEs because the VMA isn't large enough to hold a PMD: small-1086 [005] .... 71.140014: xfs_filemap_huge_fault: dev 259:0 ino 0x1003 small-1086 [005] .... 71.140027: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 vm_start 0x10200000 vm_end 0x10500000 pgoff 0x220 max_pgoff 0x1400 small-1086 [005] .... 71.140028: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 vm_start 0x10200000 vm_end 0x10500000 pgoff 0x220 max_pgoff 0x1400 FALLBACK small-1086 [005] .... 71.140035: dax_pte_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 small-1086 [005] .... 71.140396: dax_pte_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 MAJOR|NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Matthew Wilcox <mawilcox@microsoft.com> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/dax.c15
-rw-r--r--include/trace/events/fs_dax.h41
2 files changed, 52 insertions, 4 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 43bbd6d1037d..f6c32d831af6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1150,13 +1150,16 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
1150 int vmf_ret = 0; 1150 int vmf_ret = 0;
1151 void *entry; 1151 void *entry;
1152 1152
1153 trace_dax_pte_fault(inode, vmf, vmf_ret);
1153 /* 1154 /*
1154 * Check whether offset isn't beyond end of file now. Caller is supposed 1155 * Check whether offset isn't beyond end of file now. Caller is supposed
1155 * to hold locks serializing us with truncate / punch hole so this is 1156 * to hold locks serializing us with truncate / punch hole so this is
1156 * a reliable test. 1157 * a reliable test.
1157 */ 1158 */
1158 if (pos >= i_size_read(inode)) 1159 if (pos >= i_size_read(inode)) {
1159 return VM_FAULT_SIGBUS; 1160 vmf_ret = VM_FAULT_SIGBUS;
1161 goto out;
1162 }
1160 1163
1161 if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) 1164 if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
1162 flags |= IOMAP_WRITE; 1165 flags |= IOMAP_WRITE;
@@ -1167,8 +1170,10 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
1167 * that we never have to deal with more than a single extent here. 1170 * that we never have to deal with more than a single extent here.
1168 */ 1171 */
1169 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); 1172 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
1170 if (error) 1173 if (error) {
1171 return dax_fault_return(error); 1174 vmf_ret = dax_fault_return(error);
1175 goto out;
1176 }
1172 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { 1177 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
1173 vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ 1178 vmf_ret = dax_fault_return(-EIO); /* fs corruption? */
1174 goto finish_iomap; 1179 goto finish_iomap;
@@ -1252,6 +1257,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
1252 */ 1257 */
1253 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); 1258 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
1254 } 1259 }
1260out:
1261 trace_dax_pte_fault_done(inode, vmf, vmf_ret);
1255 return vmf_ret; 1262 return vmf_ret;
1256} 1263}
1257 1264
diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
index c566ddc87f73..cbcd7d64a18d 100644
--- a/include/trace/events/fs_dax.h
+++ b/include/trace/events/fs_dax.h
@@ -150,6 +150,47 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \
150DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); 150DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping);
151DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback); 151DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback);
152 152
153DECLARE_EVENT_CLASS(dax_pte_fault_class,
154 TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result),
155 TP_ARGS(inode, vmf, result),
156 TP_STRUCT__entry(
157 __field(unsigned long, ino)
158 __field(unsigned long, vm_flags)
159 __field(unsigned long, address)
160 __field(pgoff_t, pgoff)
161 __field(dev_t, dev)
162 __field(unsigned int, flags)
163 __field(int, result)
164 ),
165 TP_fast_assign(
166 __entry->dev = inode->i_sb->s_dev;
167 __entry->ino = inode->i_ino;
168 __entry->vm_flags = vmf->vma->vm_flags;
169 __entry->address = vmf->address;
170 __entry->flags = vmf->flags;
171 __entry->pgoff = vmf->pgoff;
172 __entry->result = result;
173 ),
174 TP_printk("dev %d:%d ino %#lx %s %s address %#lx pgoff %#lx %s",
175 MAJOR(__entry->dev),
176 MINOR(__entry->dev),
177 __entry->ino,
178 __entry->vm_flags & VM_SHARED ? "shared" : "private",
179 __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE),
180 __entry->address,
181 __entry->pgoff,
182 __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE)
183 )
184)
185
186#define DEFINE_PTE_FAULT_EVENT(name) \
187DEFINE_EVENT(dax_pte_fault_class, name, \
188 TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), \
189 TP_ARGS(inode, vmf, result))
190
191DEFINE_PTE_FAULT_EVENT(dax_pte_fault);
192DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done);
193
153#endif /* _TRACE_FS_DAX_H */ 194#endif /* _TRACE_FS_DAX_H */
154 195
155/* This part must be outside protection */ 196/* This part must be outside protection */