summaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2017-08-21 15:00:49 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2017-09-11 22:20:38 -0400
commit8224b2734ab1da4996b851e1e5d3047e7a0df499 (patch)
treec1c06680d8db8ac2212e8d916760735fb9864d51 /fs/nfs
parent70d2f7b1ea19b7e43e36ccc63496f45f9908134d (diff)
NFS: Add static NFS I/O tracepoints
Tools like tcpdump and rpcdebug can be very useful. But there are plenty of environments where they are difficult or impossible to use. For example, we've had customers report I/O failures during workloads so heavy that collecting network traffic or enabling RPC debugging are themselves onerous. The kernel's static tracepoints are lightweight (less likely to introduce timing changes) and efficient (the trace data is compact). They also work in scenarios where capturing network traffic is not possible due to lack of hardware support (some InfiniBand HCAs) or where data or network privacy is a concern. Introduce tracepoints that show when an NFS READ, WRITE, or COMMIT is initiated, and when it completes. Record the arguments and results of each operation, which are not shown by existing sunrpc module's tracepoints. For instance, the recorded offset and count can be used to match an "initiate" event to a "done" event. If an NFS READ result returns fewer bytes than requested or zero, seeing the EOF flag can be probative. Seeing an NFS4ERR_BAD_STATEID result is also indication of a particular class of problems. The timing information attached to each event record can often be useful as well. Usage example: [root@manet tmp]# trace-cmd record -e nfs:*initiate* -e nfs:*done /sys/kernel/debug/tracing/events/nfs/*initiate*/filter /sys/kernel/debug/tracing/events/nfs/*done/filter Hit Ctrl^C to stop recording ^CKernel buffer statistics: Note: "entries" are the entries left in the kernel ring buffer and are not recorded in the trace data. They should all be zero. CPU: 0 entries: 0 overrun: 0 commit overrun: 0 bytes: 3680 oldest event ts: 78.367422 now ts: 100.124419 dropped events: 0 read events: 74 ... and so on. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/nfstrace.h248
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/write.c7
3 files changed, 259 insertions, 0 deletions
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 2ca9167bc97d..551711042ba4 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -719,6 +719,254 @@ TRACE_EVENT(nfs_sillyrename_unlink,
719 __get_str(name) 719 __get_str(name)
720 ) 720 )
721); 721);
722
723TRACE_EVENT(nfs_initiate_read,
724 TP_PROTO(
725 const struct inode *inode,
726 loff_t offset, unsigned long count
727 ),
728
729 TP_ARGS(inode, offset, count),
730
731 TP_STRUCT__entry(
732 __field(loff_t, offset)
733 __field(unsigned long, count)
734 __field(dev_t, dev)
735 __field(u32, fhandle)
736 __field(u64, fileid)
737 ),
738
739 TP_fast_assign(
740 const struct nfs_inode *nfsi = NFS_I(inode);
741
742 __entry->offset = offset;
743 __entry->count = count;
744 __entry->dev = inode->i_sb->s_dev;
745 __entry->fileid = nfsi->fileid;
746 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
747 ),
748
749 TP_printk(
750 "fileid=%02x:%02x:%llu fhandle=0x%08x "
751 "offset=%lld count=%lu",
752 MAJOR(__entry->dev), MINOR(__entry->dev),
753 (unsigned long long)__entry->fileid,
754 __entry->fhandle,
755 __entry->offset, __entry->count
756 )
757);
758
759TRACE_EVENT(nfs_readpage_done,
760 TP_PROTO(
761 const struct inode *inode,
762 int status, loff_t offset, bool eof
763 ),
764
765 TP_ARGS(inode, status, offset, eof),
766
767 TP_STRUCT__entry(
768 __field(int, status)
769 __field(loff_t, offset)
770 __field(bool, eof)
771 __field(dev_t, dev)
772 __field(u32, fhandle)
773 __field(u64, fileid)
774 ),
775
776 TP_fast_assign(
777 const struct nfs_inode *nfsi = NFS_I(inode);
778
779 __entry->status = status;
780 __entry->offset = offset;
781 __entry->eof = eof;
782 __entry->dev = inode->i_sb->s_dev;
783 __entry->fileid = nfsi->fileid;
784 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
785 ),
786
787 TP_printk(
788 "fileid=%02x:%02x:%llu fhandle=0x%08x "
789 "offset=%lld status=%d%s",
790 MAJOR(__entry->dev), MINOR(__entry->dev),
791 (unsigned long long)__entry->fileid,
792 __entry->fhandle,
793 __entry->offset, __entry->status,
794 __entry->eof ? " eof" : ""
795 )
796);
797
798/*
799 * XXX: I tried using NFS_UNSTABLE and friends in this table, but they
800 * all evaluate to 0 for some reason, even if I include linux/nfs.h.
801 */
802#define nfs_show_stable(stable) \
803 __print_symbolic(stable, \
804 { 0, " (UNSTABLE)" }, \
805 { 1, " (DATA_SYNC)" }, \
806 { 2, " (FILE_SYNC)" })
807
808TRACE_EVENT(nfs_initiate_write,
809 TP_PROTO(
810 const struct inode *inode,
811 loff_t offset, unsigned long count,
812 enum nfs3_stable_how stable
813 ),
814
815 TP_ARGS(inode, offset, count, stable),
816
817 TP_STRUCT__entry(
818 __field(loff_t, offset)
819 __field(unsigned long, count)
820 __field(enum nfs3_stable_how, stable)
821 __field(dev_t, dev)
822 __field(u32, fhandle)
823 __field(u64, fileid)
824 ),
825
826 TP_fast_assign(
827 const struct nfs_inode *nfsi = NFS_I(inode);
828
829 __entry->offset = offset;
830 __entry->count = count;
831 __entry->stable = stable;
832 __entry->dev = inode->i_sb->s_dev;
833 __entry->fileid = nfsi->fileid;
834 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
835 ),
836
837 TP_printk(
838 "fileid=%02x:%02x:%llu fhandle=0x%08x "
839 "offset=%lld count=%lu stable=%d%s",
840 MAJOR(__entry->dev), MINOR(__entry->dev),
841 (unsigned long long)__entry->fileid,
842 __entry->fhandle,
843 __entry->offset, __entry->count,
844 __entry->stable, nfs_show_stable(__entry->stable)
845 )
846);
847
848TRACE_EVENT(nfs_writeback_done,
849 TP_PROTO(
850 const struct inode *inode,
851 int status,
852 loff_t offset,
853 struct nfs_writeverf *writeverf
854 ),
855
856 TP_ARGS(inode, status, offset, writeverf),
857
858 TP_STRUCT__entry(
859 __field(int, status)
860 __field(loff_t, offset)
861 __field(enum nfs3_stable_how, stable)
862 __field(unsigned long long, verifier)
863 __field(dev_t, dev)
864 __field(u32, fhandle)
865 __field(u64, fileid)
866 ),
867
868 TP_fast_assign(
869 const struct nfs_inode *nfsi = NFS_I(inode);
870
871 __entry->status = status;
872 __entry->offset = offset;
873 __entry->stable = writeverf->committed;
874 memcpy(&__entry->verifier, &writeverf->verifier,
875 sizeof(__entry->verifier));
876 __entry->dev = inode->i_sb->s_dev;
877 __entry->fileid = nfsi->fileid;
878 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
879 ),
880
881 TP_printk(
882 "fileid=%02x:%02x:%llu fhandle=0x%08x "
883 "offset=%lld status=%d stable=%d%s "
884 "verifier 0x%016llx",
885 MAJOR(__entry->dev), MINOR(__entry->dev),
886 (unsigned long long)__entry->fileid,
887 __entry->fhandle,
888 __entry->offset, __entry->status,
889 __entry->stable, nfs_show_stable(__entry->stable),
890 __entry->verifier
891 )
892);
893
894TRACE_EVENT(nfs_initiate_commit,
895 TP_PROTO(
896 const struct nfs_commit_data *data
897 ),
898
899 TP_ARGS(data),
900
901 TP_STRUCT__entry(
902 __field(loff_t, offset)
903 __field(unsigned long, count)
904 __field(dev_t, dev)
905 __field(u32, fhandle)
906 __field(u64, fileid)
907 ),
908
909 TP_fast_assign(
910 const struct inode *inode = data->inode;
911 const struct nfs_inode *nfsi = NFS_I(inode);
912
913 __entry->offset = data->args.offset;
914 __entry->count = data->args.count;
915 __entry->dev = inode->i_sb->s_dev;
916 __entry->fileid = nfsi->fileid;
917 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
918 ),
919
920 TP_printk(
921 "fileid=%02x:%02x:%llu fhandle=0x%08x "
922 "offset=%lld count=%lu",
923 MAJOR(__entry->dev), MINOR(__entry->dev),
924 (unsigned long long)__entry->fileid,
925 __entry->fhandle,
926 __entry->offset, __entry->count
927 )
928);
929
930TRACE_EVENT(nfs_commit_done,
931 TP_PROTO(
932 const struct nfs_commit_data *data
933 ),
934
935 TP_ARGS(data),
936
937 TP_STRUCT__entry(
938 __field(int, status)
939 __field(loff_t, offset)
940 __field(unsigned long long, verifier)
941 __field(dev_t, dev)
942 __field(u32, fhandle)
943 __field(u64, fileid)
944 ),
945
946 TP_fast_assign(
947 const struct inode *inode = data->inode;
948 const struct nfs_inode *nfsi = NFS_I(inode);
949
950 __entry->status = data->res.op_status;
951 __entry->offset = data->args.offset;
952 memcpy(&__entry->verifier, &data->verf.verifier,
953 sizeof(__entry->verifier));
954 __entry->dev = inode->i_sb->s_dev;
955 __entry->fileid = nfsi->fileid;
956 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
957 ),
958
959 TP_printk(
960 "fileid=%02x:%02x:%llu fhandle=0x%08x "
961 "offset=%lld status=%d verifier 0x%016llx",
962 MAJOR(__entry->dev), MINOR(__entry->dev),
963 (unsigned long long)__entry->fileid,
964 __entry->fhandle,
965 __entry->offset, __entry->status,
966 __entry->verifier
967 )
968);
969
722#endif /* _TRACE_NFS_H */ 970#endif /* _TRACE_NFS_H */
723 971
724#undef TRACE_INCLUDE_PATH 972#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0d42573d423d..48d7277c60a9 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
25#include "iostat.h" 25#include "iostat.h"
26#include "fscache.h" 26#include "fscache.h"
27#include "pnfs.h" 27#include "pnfs.h"
28#include "nfstrace.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -200,6 +201,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
200 201
201 task_setup_data->flags |= swap_flags; 202 task_setup_data->flags |= swap_flags;
202 rpc_ops->read_setup(hdr, msg); 203 rpc_ops->read_setup(hdr, msg);
204 trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes);
203} 205}
204 206
205static void 207static void
@@ -232,6 +234,8 @@ static int nfs_readpage_done(struct rpc_task *task,
232 return status; 234 return status;
233 235
234 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); 236 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
237 trace_nfs_readpage_done(inode, task->tk_status,
238 hdr->args.offset, hdr->res.eof);
235 239
236 if (task->tk_status == -ESTALE) { 240 if (task->tk_status == -ESTALE) {
237 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 241 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f68083db63c8..c66206ac4ebf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1383,6 +1383,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1383 1383
1384 task_setup_data->priority = priority; 1384 task_setup_data->priority = priority;
1385 rpc_ops->write_setup(hdr, msg); 1385 rpc_ops->write_setup(hdr, msg);
1386 trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
1387 hdr->args.stable);
1386 1388
1387 nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, 1389 nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client,
1388 &task_setup_data->rpc_client, msg, hdr); 1390 &task_setup_data->rpc_client, msg, hdr);
@@ -1540,7 +1542,10 @@ static int nfs_writeback_done(struct rpc_task *task,
1540 status = NFS_PROTO(inode)->write_done(task, hdr); 1542 status = NFS_PROTO(inode)->write_done(task, hdr);
1541 if (status != 0) 1543 if (status != 0)
1542 return status; 1544 return status;
1545
1543 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); 1546 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
1547 trace_nfs_writeback_done(inode, task->tk_status,
1548 hdr->args.offset, hdr->res.verf);
1544 1549
1545 if (hdr->res.verf->committed < hdr->args.stable && 1550 if (hdr->res.verf->committed < hdr->args.stable &&
1546 task->tk_status >= 0) { 1551 task->tk_status >= 0) {
@@ -1669,6 +1674,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1669 }; 1674 };
1670 /* Set up the initial task struct. */ 1675 /* Set up the initial task struct. */
1671 nfs_ops->commit_setup(data, &msg); 1676 nfs_ops->commit_setup(data, &msg);
1677 trace_nfs_initiate_commit(data);
1672 1678
1673 dprintk("NFS: initiated commit call\n"); 1679 dprintk("NFS: initiated commit call\n");
1674 1680
@@ -1793,6 +1799,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1793 1799
1794 /* Call the NFS version-specific code */ 1800 /* Call the NFS version-specific code */
1795 NFS_PROTO(data->inode)->commit_done(task, data); 1801 NFS_PROTO(data->inode)->commit_done(task, data);
1802 trace_nfs_commit_done(data);
1796} 1803}
1797 1804
1798static void nfs_commit_release_pages(struct nfs_commit_data *data) 1805static void nfs_commit_release_pages(struct nfs_commit_data *data)