diff options
author | Chuck Lever <cel@netapp.com> | 2006-03-20 13:44:13 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-03-20 13:44:13 -0500 |
commit | d9ef5a8c26aab09762afce43df64736720b4860e (patch) | |
tree | 01ec0e16b19d7e418f26f1218113bb0f90b1a2e1 | |
parent | c8bded96aa8735823e53c95a26177987ebb19a90 (diff) |
NFS: introduce mechanism for tracking NFS client metrics
Add a per-superblock performance counter facility to the NFS client. This
facility mimics the counters available for block devices and for
networking. Expose these new counters via the new /proc/self/mountstats
interface.
Thanks to Andrew Morton and Trond Myklebust for their review and comments.
Test plan:
fsx and iozone on UP and SMP systems, with and without pre-emption. Watch
for memory overwrite bugs, and performance loss (significantly more CPU
required per op).
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | fs/nfs/inode.c | 103 | ||||
-rw-r--r-- | fs/nfs/iostat.h | 152 | ||||
-rw-r--r-- | include/linux/nfs_fs_sb.h | 3 |
3 files changed, 252 insertions, 6 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 827d69255b1b..86b756f44e27 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include "nfs4_fs.h" | 42 | #include "nfs4_fs.h" |
43 | #include "callback.h" | 43 | #include "callback.h" |
44 | #include "delegation.h" | 44 | #include "delegation.h" |
45 | #include "iostat.h" | ||
45 | 46 | ||
46 | #define NFSDBG_FACILITY NFSDBG_VFS | 47 | #define NFSDBG_FACILITY NFSDBG_VFS |
47 | #define NFS_PARANOIA 1 | 48 | #define NFS_PARANOIA 1 |
@@ -65,6 +66,7 @@ static void nfs_clear_inode(struct inode *); | |||
65 | static void nfs_umount_begin(struct super_block *); | 66 | static void nfs_umount_begin(struct super_block *); |
66 | static int nfs_statfs(struct super_block *, struct kstatfs *); | 67 | static int nfs_statfs(struct super_block *, struct kstatfs *); |
67 | static int nfs_show_options(struct seq_file *, struct vfsmount *); | 68 | static int nfs_show_options(struct seq_file *, struct vfsmount *); |
69 | static int nfs_show_stats(struct seq_file *, struct vfsmount *); | ||
68 | static void nfs_zap_acl_cache(struct inode *); | 70 | static void nfs_zap_acl_cache(struct inode *); |
69 | 71 | ||
70 | static struct rpc_program nfs_program; | 72 | static struct rpc_program nfs_program; |
@@ -78,6 +80,7 @@ static struct super_operations nfs_sops = { | |||
78 | .clear_inode = nfs_clear_inode, | 80 | .clear_inode = nfs_clear_inode, |
79 | .umount_begin = nfs_umount_begin, | 81 | .umount_begin = nfs_umount_begin, |
80 | .show_options = nfs_show_options, | 82 | .show_options = nfs_show_options, |
83 | .show_stats = nfs_show_stats, | ||
81 | }; | 84 | }; |
82 | 85 | ||
83 | /* | 86 | /* |
@@ -290,6 +293,12 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) | |||
290 | } | 293 | } |
291 | sb->s_root->d_op = server->rpc_ops->dentry_ops; | 294 | sb->s_root->d_op = server->rpc_ops->dentry_ops; |
292 | 295 | ||
296 | server->io_stats = nfs_alloc_iostats(); | ||
297 | if (!server->io_stats) { | ||
298 | no_root_error = -ENOMEM; | ||
299 | goto out_no_root; | ||
300 | } | ||
301 | |||
293 | /* Get some general file system info */ | 302 | /* Get some general file system info */ |
294 | if (server->namelen == 0 && | 303 | if (server->namelen == 0 && |
295 | server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) | 304 | server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) |
@@ -582,7 +591,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf) | |||
582 | 591 | ||
583 | } | 592 | } |
584 | 593 | ||
585 | static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | 594 | static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) |
586 | { | 595 | { |
587 | static struct proc_nfs_info { | 596 | static struct proc_nfs_info { |
588 | int flag; | 597 | int flag; |
@@ -598,20 +607,19 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
598 | { 0, NULL, NULL } | 607 | { 0, NULL, NULL } |
599 | }; | 608 | }; |
600 | struct proc_nfs_info *nfs_infop; | 609 | struct proc_nfs_info *nfs_infop; |
601 | struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); | ||
602 | char buf[12]; | 610 | char buf[12]; |
603 | char *proto; | 611 | char *proto; |
604 | 612 | ||
605 | seq_printf(m, ",vers=%d", nfss->rpc_ops->version); | 613 | seq_printf(m, ",vers=%d", nfss->rpc_ops->version); |
606 | seq_printf(m, ",rsize=%d", nfss->rsize); | 614 | seq_printf(m, ",rsize=%d", nfss->rsize); |
607 | seq_printf(m, ",wsize=%d", nfss->wsize); | 615 | seq_printf(m, ",wsize=%d", nfss->wsize); |
608 | if (nfss->acregmin != 3*HZ) | 616 | if (nfss->acregmin != 3*HZ || showdefaults) |
609 | seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); | 617 | seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); |
610 | if (nfss->acregmax != 60*HZ) | 618 | if (nfss->acregmax != 60*HZ || showdefaults) |
611 | seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); | 619 | seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); |
612 | if (nfss->acdirmin != 30*HZ) | 620 | if (nfss->acdirmin != 30*HZ || showdefaults) |
613 | seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); | 621 | seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); |
614 | if (nfss->acdirmax != 60*HZ) | 622 | if (nfss->acdirmax != 60*HZ || showdefaults) |
615 | seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); | 623 | seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); |
616 | for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { | 624 | for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { |
617 | if (nfss->flags & nfs_infop->flag) | 625 | if (nfss->flags & nfs_infop->flag) |
@@ -633,8 +641,89 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
633 | seq_printf(m, ",proto=%s", proto); | 641 | seq_printf(m, ",proto=%s", proto); |
634 | seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); | 642 | seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); |
635 | seq_printf(m, ",retrans=%u", nfss->retrans_count); | 643 | seq_printf(m, ",retrans=%u", nfss->retrans_count); |
644 | } | ||
645 | |||
646 | static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
647 | { | ||
648 | struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); | ||
649 | |||
650 | nfs_show_mount_options(m, nfss, 0); | ||
651 | |||
636 | seq_puts(m, ",addr="); | 652 | seq_puts(m, ",addr="); |
637 | seq_escape(m, nfss->hostname, " \t\n\\"); | 653 | seq_escape(m, nfss->hostname, " \t\n\\"); |
654 | |||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | ||
659 | { | ||
660 | int i, cpu; | ||
661 | struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); | ||
662 | struct rpc_auth *auth = nfss->client->cl_auth; | ||
663 | struct nfs_iostats totals = { }; | ||
664 | |||
665 | seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); | ||
666 | |||
667 | /* | ||
668 | * Display all mount option settings | ||
669 | */ | ||
670 | seq_printf(m, "\n\topts:\t"); | ||
671 | seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); | ||
672 | seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); | ||
673 | seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); | ||
674 | seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); | ||
675 | nfs_show_mount_options(m, nfss, 1); | ||
676 | |||
677 | seq_printf(m, "\n\tcaps:\t"); | ||
678 | seq_printf(m, "caps=0x%x", nfss->caps); | ||
679 | seq_printf(m, ",wtmult=%d", nfss->wtmult); | ||
680 | seq_printf(m, ",dtsize=%d", nfss->dtsize); | ||
681 | seq_printf(m, ",bsize=%d", nfss->bsize); | ||
682 | seq_printf(m, ",namelen=%d", nfss->namelen); | ||
683 | |||
684 | #ifdef CONFIG_NFS_V4 | ||
685 | if (nfss->rpc_ops->version == 4) { | ||
686 | seq_printf(m, "\n\tnfsv4:\t"); | ||
687 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); | ||
688 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); | ||
689 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); | ||
690 | } | ||
691 | #endif | ||
692 | |||
693 | /* | ||
694 | * Display security flavor in effect for this mount | ||
695 | */ | ||
696 | seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); | ||
697 | if (auth->au_flavor) | ||
698 | seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); | ||
699 | |||
700 | /* | ||
701 | * Display superblock I/O counters | ||
702 | */ | ||
703 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
704 | struct nfs_iostats *stats; | ||
705 | |||
706 | if (!cpu_possible(cpu)) | ||
707 | continue; | ||
708 | |||
709 | preempt_disable(); | ||
710 | stats = per_cpu_ptr(nfss->io_stats, cpu); | ||
711 | |||
712 | for (i = 0; i < __NFSIOS_COUNTSMAX; i++) | ||
713 | totals.events[i] += stats->events[i]; | ||
714 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) | ||
715 | totals.bytes[i] += stats->bytes[i]; | ||
716 | |||
717 | preempt_enable(); | ||
718 | } | ||
719 | |||
720 | seq_printf(m, "\n\tevents:\t"); | ||
721 | for (i = 0; i < __NFSIOS_COUNTSMAX; i++) | ||
722 | seq_printf(m, "%lu ", totals.events[i]); | ||
723 | seq_printf(m, "\n\tbytes:\t"); | ||
724 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) | ||
725 | seq_printf(m, "%Lu ", totals.bytes[i]); | ||
726 | |||
638 | return 0; | 727 | return 0; |
639 | } | 728 | } |
640 | 729 | ||
@@ -1742,6 +1831,7 @@ static struct super_operations nfs4_sops = { | |||
1742 | .clear_inode = nfs4_clear_inode, | 1831 | .clear_inode = nfs4_clear_inode, |
1743 | .umount_begin = nfs_umount_begin, | 1832 | .umount_begin = nfs_umount_begin, |
1744 | .show_options = nfs_show_options, | 1833 | .show_options = nfs_show_options, |
1834 | .show_stats = nfs_show_stats, | ||
1745 | }; | 1835 | }; |
1746 | 1836 | ||
1747 | /* | 1837 | /* |
@@ -2015,6 +2105,7 @@ out_err: | |||
2015 | out_free: | 2105 | out_free: |
2016 | kfree(server->mnt_path); | 2106 | kfree(server->mnt_path); |
2017 | kfree(server->hostname); | 2107 | kfree(server->hostname); |
2108 | nfs_free_iostats(server->io_stats); | ||
2018 | kfree(server); | 2109 | kfree(server); |
2019 | return s; | 2110 | return s; |
2020 | } | 2111 | } |
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h new file mode 100644 index 000000000000..dc080e50ec57 --- /dev/null +++ b/fs/nfs/iostat.h | |||
@@ -0,0 +1,152 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/iostat.h | ||
3 | * | ||
4 | * Declarations for NFS client per-mount statistics | ||
5 | * | ||
6 | * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com> | ||
7 | * | ||
8 | * NFS client per-mount statistics provide information about the health of | ||
9 | * the NFS client and the health of each NFS mount point. Generally these | ||
10 | * are not for detailed problem diagnosis, but simply to indicate that there | ||
11 | * is a problem. | ||
12 | * | ||
13 | * These counters are not meant to be human-readable, but are meant to be | ||
14 | * integrated into system monitoring tools such as "sar" and "iostat". As | ||
15 | * such, the counters are sampled by the tools over time, and are never | ||
16 | * zeroed after a file system is mounted. Moving averages can be computed | ||
17 | * by the tools by taking the difference between two instantaneous samples | ||
18 | * and dividing that by the time between the samples. | ||
19 | */ | ||
20 | |||
21 | #ifndef _NFS_IOSTAT | ||
22 | #define _NFS_IOSTAT | ||
23 | |||
24 | #define NFS_IOSTAT_VERS "1.0" | ||
25 | |||
26 | /* | ||
27 | * NFS byte counters | ||
28 | * | ||
29 | * 1. SERVER - the number of payload bytes read from or written to the | ||
30 | * server by the NFS client via an NFS READ or WRITE request. | ||
31 | * | ||
32 | * 2. NORMAL - the number of bytes read or written by applications via | ||
33 | * the read(2) and write(2) system call interfaces. | ||
34 | * | ||
35 | * 3. DIRECT - the number of bytes read or written from files opened | ||
36 | * with the O_DIRECT flag. | ||
37 | * | ||
38 | * These counters give a view of the data throughput into and out of the NFS | ||
39 | * client. Comparing the number of bytes requested by an application with the | ||
40 | * number of bytes the client requests from the server can provide an | ||
41 | * indication of client efficiency (per-op, cache hits, etc). | ||
42 | * | ||
43 | * These counters can also help characterize which access methods are in | ||
44 | * use. DIRECT by itself shows whether there is any O_DIRECT traffic. | ||
45 | * NORMAL + DIRECT shows how much data is going through the system call | ||
46 | * interface. A large amount of SERVER traffic without much NORMAL or | ||
47 | * DIRECT traffic shows that applications are using mapped files. | ||
48 | * | ||
49 | * NFS page counters | ||
50 | * | ||
51 | * These count the number of pages read or written via nfs_readpage(), | ||
52 | * nfs_readpages(), or their write equivalents. | ||
53 | */ | ||
54 | enum nfs_stat_bytecounters { | ||
55 | NFSIOS_NORMALREADBYTES = 0, | ||
56 | NFSIOS_NORMALWRITTENBYTES, | ||
57 | NFSIOS_DIRECTREADBYTES, | ||
58 | NFSIOS_DIRECTWRITTENBYTES, | ||
59 | NFSIOS_SERVERREADBYTES, | ||
60 | NFSIOS_SERVERWRITTENBYTES, | ||
61 | NFSIOS_READPAGES, | ||
62 | NFSIOS_WRITEPAGES, | ||
63 | __NFSIOS_BYTESMAX, | ||
64 | }; | ||
65 | |||
66 | /* | ||
67 | * NFS event counters | ||
68 | * | ||
69 | * These counters provide a low-overhead way of monitoring client activity | ||
70 | * without enabling NFS trace debugging. The counters show the rate at | ||
71 | * which VFS requests are made, and how often the client invalidates its | ||
72 | * data and attribute caches. This allows system administrators to monitor | ||
73 | * such things as how close-to-open is working, and answer questions such | ||
74 | * as "why are there so many GETATTR requests on the wire?" | ||
75 | * | ||
76 | * They also count anamolous events such as short reads and writes, silly | ||
77 | * renames due to close-after-delete, and operations that change the size | ||
78 | * of a file (such operations can often be the source of data corruption | ||
79 | * if applications aren't using file locking properly). | ||
80 | */ | ||
81 | enum nfs_stat_eventcounters { | ||
82 | NFSIOS_INODEREVALIDATE = 0, | ||
83 | NFSIOS_DENTRYREVALIDATE, | ||
84 | NFSIOS_DATAINVALIDATE, | ||
85 | NFSIOS_ATTRINVALIDATE, | ||
86 | NFSIOS_VFSOPEN, | ||
87 | NFSIOS_VFSLOOKUP, | ||
88 | NFSIOS_VFSACCESS, | ||
89 | NFSIOS_VFSUPDATEPAGE, | ||
90 | NFSIOS_VFSREADPAGE, | ||
91 | NFSIOS_VFSREADPAGES, | ||
92 | NFSIOS_VFSWRITEPAGE, | ||
93 | NFSIOS_VFSWRITEPAGES, | ||
94 | NFSIOS_VFSGETDENTS, | ||
95 | NFSIOS_VFSSETATTR, | ||
96 | NFSIOS_VFSFLUSH, | ||
97 | NFSIOS_VFSFSYNC, | ||
98 | NFSIOS_VFSLOCK, | ||
99 | NFSIOS_VFSRELEASE, | ||
100 | NFSIOS_CONGESTIONWAIT, | ||
101 | NFSIOS_SETATTRTRUNC, | ||
102 | NFSIOS_EXTENDWRITE, | ||
103 | NFSIOS_SILLYRENAME, | ||
104 | NFSIOS_SHORTREAD, | ||
105 | NFSIOS_SHORTWRITE, | ||
106 | __NFSIOS_COUNTSMAX, | ||
107 | }; | ||
108 | |||
109 | #ifdef __KERNEL__ | ||
110 | |||
111 | #include <linux/percpu.h> | ||
112 | #include <linux/cache.h> | ||
113 | |||
114 | struct nfs_iostats { | ||
115 | unsigned long long bytes[__NFSIOS_BYTESMAX]; | ||
116 | unsigned long events[__NFSIOS_COUNTSMAX]; | ||
117 | } ____cacheline_aligned; | ||
118 | |||
119 | static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) | ||
120 | { | ||
121 | struct nfs_iostats *iostats; | ||
122 | int cpu; | ||
123 | |||
124 | cpu = get_cpu(); | ||
125 | iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); | ||
126 | iostats->events[stat] ++; | ||
127 | put_cpu_no_resched(); | ||
128 | } | ||
129 | |||
130 | static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) | ||
131 | { | ||
132 | struct nfs_iostats *iostats; | ||
133 | int cpu; | ||
134 | |||
135 | cpu = get_cpu(); | ||
136 | iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); | ||
137 | iostats->bytes[stat] += addend; | ||
138 | put_cpu_no_resched(); | ||
139 | } | ||
140 | |||
141 | static inline struct nfs_iostats *nfs_alloc_iostats(void) | ||
142 | { | ||
143 | return alloc_percpu(struct nfs_iostats); | ||
144 | } | ||
145 | |||
146 | static inline void nfs_free_iostats(struct nfs_iostats *stats) | ||
147 | { | ||
148 | free_percpu(stats); | ||
149 | } | ||
150 | |||
151 | #endif | ||
152 | #endif | ||
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a522ab97358d..d65e69a06b72 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <linux/list.h> | 4 | #include <linux/list.h> |
5 | #include <linux/backing-dev.h> | 5 | #include <linux/backing-dev.h> |
6 | 6 | ||
7 | struct nfs_iostats; | ||
8 | |||
7 | /* | 9 | /* |
8 | * NFS client parameters stored in the superblock. | 10 | * NFS client parameters stored in the superblock. |
9 | */ | 11 | */ |
@@ -12,6 +14,7 @@ struct nfs_server { | |||
12 | struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ | 14 | struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ |
13 | struct rpc_clnt * client_acl; /* ACL RPC client handle */ | 15 | struct rpc_clnt * client_acl; /* ACL RPC client handle */ |
14 | struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ | 16 | struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ |
17 | struct nfs_iostats * io_stats; /* I/O statistics */ | ||
15 | struct backing_dev_info backing_dev_info; | 18 | struct backing_dev_info backing_dev_info; |
16 | int flags; /* various flags */ | 19 | int flags; /* various flags */ |
17 | unsigned int caps; /* server capabilities */ | 20 | unsigned int caps; /* server capabilities */ |