diff options
author | Andy Grover <andy.grover@oracle.com> | 2009-02-24 10:30:22 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-02-27 02:39:25 -0500 |
commit | a8c879a7ee988eb67a5e85e87fa2cc953e0ca749 (patch) | |
tree | 493db4a3137d9f7bd1fe871ace4f5e12c1fbb2a1 /net/rds/info.c | |
parent | 0fbc78cbf51f3edee17c2ede42c7b3c13736158a (diff) |
RDS: Info and stats
RDS currently generates a lot of stats that are accessible via
the rds-info utility. This code implements the support for this.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/info.c')
-rw-r--r-- | net/rds/info.c | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/net/rds/info.c b/net/rds/info.c new file mode 100644 index 000000000000..1d885535214d --- /dev/null +++ b/net/rds/info.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | |||
39 | /* | ||
40 | * This file implements a getsockopt() call which copies a set of fixed | ||
41 | * sized structs into a user-specified buffer as a means of providing | ||
42 | * read-only information about RDS. | ||
43 | * | ||
44 | * For a given information source there are a given number of fixed sized | ||
45 | * structs at a given time. The structs are only copied if the user-specified | ||
46 | * buffer is big enough. The destination pages that make up the buffer | ||
47 | * are pinned for the duration of the copy. | ||
48 | * | ||
49 | * This gives us the following benefits: | ||
50 | * | ||
51 | * - simple implementation, no copy "position" across multiple calls | ||
52 | * - consistent snapshot of an info source | ||
53 | * - atomic copy works well with whatever locking info source has | ||
54 | * - one portable tool to get rds info across implementations | ||
55 | * - long-lived tool can get info without allocating | ||
56 | * | ||
57 | * at the following costs: | ||
58 | * | ||
59 | * - info source copy must be pinned, may be "large" | ||
60 | */ | ||
61 | |||
62 | struct rds_info_iterator { | ||
63 | struct page **pages; | ||
64 | void *addr; | ||
65 | unsigned long offset; | ||
66 | }; | ||
67 | |||
68 | static DEFINE_SPINLOCK(rds_info_lock); | ||
69 | static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; | ||
70 | |||
71 | void rds_info_register_func(int optname, rds_info_func func) | ||
72 | { | ||
73 | int offset = optname - RDS_INFO_FIRST; | ||
74 | |||
75 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | ||
76 | |||
77 | spin_lock(&rds_info_lock); | ||
78 | BUG_ON(rds_info_funcs[offset] != NULL); | ||
79 | rds_info_funcs[offset] = func; | ||
80 | spin_unlock(&rds_info_lock); | ||
81 | } | ||
82 | |||
83 | void rds_info_deregister_func(int optname, rds_info_func func) | ||
84 | { | ||
85 | int offset = optname - RDS_INFO_FIRST; | ||
86 | |||
87 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | ||
88 | |||
89 | spin_lock(&rds_info_lock); | ||
90 | BUG_ON(rds_info_funcs[offset] != func); | ||
91 | rds_info_funcs[offset] = NULL; | ||
92 | spin_unlock(&rds_info_lock); | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Typically we hold an atomic kmap across multiple rds_info_copy() calls | ||
97 | * because the kmap is so expensive. This must be called before using blocking | ||
98 | * operations while holding the mapping and as the iterator is torn down. | ||
99 | */ | ||
100 | void rds_info_iter_unmap(struct rds_info_iterator *iter) | ||
101 | { | ||
102 | if (iter->addr != NULL) { | ||
103 | kunmap_atomic(iter->addr, KM_USER0); | ||
104 | iter->addr = NULL; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * get_user_pages() called flush_dcache_page() on the pages for us. | ||
110 | */ | ||
111 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | ||
112 | unsigned long bytes) | ||
113 | { | ||
114 | unsigned long this; | ||
115 | |||
116 | while (bytes) { | ||
117 | if (iter->addr == NULL) | ||
118 | iter->addr = kmap_atomic(*iter->pages, KM_USER0); | ||
119 | |||
120 | this = min(bytes, PAGE_SIZE - iter->offset); | ||
121 | |||
122 | rdsdebug("page %p addr %p offset %lu this %lu data %p " | ||
123 | "bytes %lu\n", *iter->pages, iter->addr, | ||
124 | iter->offset, this, data, bytes); | ||
125 | |||
126 | memcpy(iter->addr + iter->offset, data, this); | ||
127 | |||
128 | data += this; | ||
129 | bytes -= this; | ||
130 | iter->offset += this; | ||
131 | |||
132 | if (iter->offset == PAGE_SIZE) { | ||
133 | kunmap_atomic(iter->addr, KM_USER0); | ||
134 | iter->addr = NULL; | ||
135 | iter->offset = 0; | ||
136 | iter->pages++; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * @optval points to the userspace buffer that the information snapshot | ||
143 | * will be copied into. | ||
144 | * | ||
145 | * @optlen on input is the size of the buffer in userspace. @optlen | ||
146 | * on output is the size of the requested snapshot in bytes. | ||
147 | * | ||
148 | * This function returns -errno if there is a failure, particularly -ENOSPC | ||
149 | * if the given userspace buffer was not large enough to fit the snapshot. | ||
150 | * On success it returns the positive number of bytes of each array element | ||
151 | * in the snapshot. | ||
152 | */ | ||
153 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | ||
154 | int __user *optlen) | ||
155 | { | ||
156 | struct rds_info_iterator iter; | ||
157 | struct rds_info_lengths lens; | ||
158 | unsigned long nr_pages = 0; | ||
159 | unsigned long start; | ||
160 | unsigned long i; | ||
161 | rds_info_func func; | ||
162 | struct page **pages = NULL; | ||
163 | int ret; | ||
164 | int len; | ||
165 | int total; | ||
166 | |||
167 | if (get_user(len, optlen)) { | ||
168 | ret = -EFAULT; | ||
169 | goto out; | ||
170 | } | ||
171 | |||
172 | /* check for all kinds of wrapping and the like */ | ||
173 | start = (unsigned long)optval; | ||
174 | if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { | ||
175 | ret = -EINVAL; | ||
176 | goto out; | ||
177 | } | ||
178 | |||
179 | /* a 0 len call is just trying to probe its length */ | ||
180 | if (len == 0) | ||
181 | goto call_func; | ||
182 | |||
183 | nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) | ||
184 | >> PAGE_SHIFT; | ||
185 | |||
186 | pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); | ||
187 | if (pages == NULL) { | ||
188 | ret = -ENOMEM; | ||
189 | goto out; | ||
190 | } | ||
191 | down_read(¤t->mm->mmap_sem); | ||
192 | ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0, | ||
193 | pages, NULL); | ||
194 | up_read(¤t->mm->mmap_sem); | ||
195 | if (ret != nr_pages) { | ||
196 | if (ret > 0) | ||
197 | nr_pages = ret; | ||
198 | else | ||
199 | nr_pages = 0; | ||
200 | ret = -EAGAIN; /* XXX ? */ | ||
201 | goto out; | ||
202 | } | ||
203 | |||
204 | rdsdebug("len %d nr_pages %lu\n", len, nr_pages); | ||
205 | |||
206 | call_func: | ||
207 | func = rds_info_funcs[optname - RDS_INFO_FIRST]; | ||
208 | if (func == NULL) { | ||
209 | ret = -ENOPROTOOPT; | ||
210 | goto out; | ||
211 | } | ||
212 | |||
213 | iter.pages = pages; | ||
214 | iter.addr = NULL; | ||
215 | iter.offset = start & (PAGE_SIZE - 1); | ||
216 | |||
217 | func(sock, len, &iter, &lens); | ||
218 | BUG_ON(lens.each == 0); | ||
219 | |||
220 | total = lens.nr * lens.each; | ||
221 | |||
222 | rds_info_iter_unmap(&iter); | ||
223 | |||
224 | if (total > len) { | ||
225 | len = total; | ||
226 | ret = -ENOSPC; | ||
227 | } else { | ||
228 | len = total; | ||
229 | ret = lens.each; | ||
230 | } | ||
231 | |||
232 | if (put_user(len, optlen)) | ||
233 | ret = -EFAULT; | ||
234 | |||
235 | out: | ||
236 | for (i = 0; pages != NULL && i < nr_pages; i++) | ||
237 | put_page(pages[i]); | ||
238 | kfree(pages); | ||
239 | |||
240 | return ret; | ||
241 | } | ||