diff options
-rw-r--r-- | net/rds/info.c | 241 | ||||
-rw-r--r-- | net/rds/info.h | 30 | ||||
-rw-r--r-- | net/rds/stats.c | 148 |
3 files changed, 419 insertions, 0 deletions
diff --git a/net/rds/info.c b/net/rds/info.c new file mode 100644 index 000000000000..1d885535214d --- /dev/null +++ b/net/rds/info.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | |||
39 | /* | ||
40 | * This file implements a getsockopt() call which copies a set of fixed | ||
41 | * sized structs into a user-specified buffer as a means of providing | ||
42 | * read-only information about RDS. | ||
43 | * | ||
44 | * For a given information source there are a given number of fixed sized | ||
45 | * structs at a given time. The structs are only copied if the user-specified | ||
46 | * buffer is big enough. The destination pages that make up the buffer | ||
47 | * are pinned for the duration of the copy. | ||
48 | * | ||
49 | * This gives us the following benefits: | ||
50 | * | ||
51 | * - simple implementation, no copy "position" across multiple calls | ||
52 | * - consistent snapshot of an info source | ||
53 | * - atomic copy works well with whatever locking info source has | ||
54 | * - one portable tool to get rds info across implementations | ||
55 | * - long-lived tool can get info without allocating | ||
56 | * | ||
57 | * at the following costs: | ||
58 | * | ||
59 | * - info source copy must be pinned, may be "large" | ||
60 | */ | ||
61 | |||
62 | struct rds_info_iterator { | ||
63 | struct page **pages; | ||
64 | void *addr; | ||
65 | unsigned long offset; | ||
66 | }; | ||
67 | |||
68 | static DEFINE_SPINLOCK(rds_info_lock); | ||
69 | static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; | ||
70 | |||
71 | void rds_info_register_func(int optname, rds_info_func func) | ||
72 | { | ||
73 | int offset = optname - RDS_INFO_FIRST; | ||
74 | |||
75 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | ||
76 | |||
77 | spin_lock(&rds_info_lock); | ||
78 | BUG_ON(rds_info_funcs[offset] != NULL); | ||
79 | rds_info_funcs[offset] = func; | ||
80 | spin_unlock(&rds_info_lock); | ||
81 | } | ||
82 | |||
83 | void rds_info_deregister_func(int optname, rds_info_func func) | ||
84 | { | ||
85 | int offset = optname - RDS_INFO_FIRST; | ||
86 | |||
87 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | ||
88 | |||
89 | spin_lock(&rds_info_lock); | ||
90 | BUG_ON(rds_info_funcs[offset] != func); | ||
91 | rds_info_funcs[offset] = NULL; | ||
92 | spin_unlock(&rds_info_lock); | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Typically we hold an atomic kmap across multiple rds_info_copy() calls | ||
97 | * because the kmap is so expensive. This must be called before using blocking | ||
98 | * operations while holding the mapping and as the iterator is torn down. | ||
99 | */ | ||
100 | void rds_info_iter_unmap(struct rds_info_iterator *iter) | ||
101 | { | ||
102 | if (iter->addr != NULL) { | ||
103 | kunmap_atomic(iter->addr, KM_USER0); | ||
104 | iter->addr = NULL; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * get_user_pages() called flush_dcache_page() on the pages for us. | ||
110 | */ | ||
111 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | ||
112 | unsigned long bytes) | ||
113 | { | ||
114 | unsigned long this; | ||
115 | |||
116 | while (bytes) { | ||
117 | if (iter->addr == NULL) | ||
118 | iter->addr = kmap_atomic(*iter->pages, KM_USER0); | ||
119 | |||
120 | this = min(bytes, PAGE_SIZE - iter->offset); | ||
121 | |||
122 | rdsdebug("page %p addr %p offset %lu this %lu data %p " | ||
123 | "bytes %lu\n", *iter->pages, iter->addr, | ||
124 | iter->offset, this, data, bytes); | ||
125 | |||
126 | memcpy(iter->addr + iter->offset, data, this); | ||
127 | |||
128 | data += this; | ||
129 | bytes -= this; | ||
130 | iter->offset += this; | ||
131 | |||
132 | if (iter->offset == PAGE_SIZE) { | ||
133 | kunmap_atomic(iter->addr, KM_USER0); | ||
134 | iter->addr = NULL; | ||
135 | iter->offset = 0; | ||
136 | iter->pages++; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * @optval points to the userspace buffer that the information snapshot | ||
143 | * will be copied into. | ||
144 | * | ||
145 | * @optlen on input is the size of the buffer in userspace. @optlen | ||
146 | * on output is the size of the requested snapshot in bytes. | ||
147 | * | ||
148 | * This function returns -errno if there is a failure, particularly -ENOSPC | ||
149 | * if the given userspace buffer was not large enough to fit the snapshot. | ||
150 | * On success it returns the positive number of bytes of each array element | ||
151 | * in the snapshot. | ||
152 | */ | ||
153 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | ||
154 | int __user *optlen) | ||
155 | { | ||
156 | struct rds_info_iterator iter; | ||
157 | struct rds_info_lengths lens; | ||
158 | unsigned long nr_pages = 0; | ||
159 | unsigned long start; | ||
160 | unsigned long i; | ||
161 | rds_info_func func; | ||
162 | struct page **pages = NULL; | ||
163 | int ret; | ||
164 | int len; | ||
165 | int total; | ||
166 | |||
167 | if (get_user(len, optlen)) { | ||
168 | ret = -EFAULT; | ||
169 | goto out; | ||
170 | } | ||
171 | |||
172 | /* check for all kinds of wrapping and the like */ | ||
173 | start = (unsigned long)optval; | ||
174 | if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { | ||
175 | ret = -EINVAL; | ||
176 | goto out; | ||
177 | } | ||
178 | |||
179 | /* a 0 len call is just trying to probe its length */ | ||
180 | if (len == 0) | ||
181 | goto call_func; | ||
182 | |||
183 | nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) | ||
184 | >> PAGE_SHIFT; | ||
185 | |||
186 | pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); | ||
187 | if (pages == NULL) { | ||
188 | ret = -ENOMEM; | ||
189 | goto out; | ||
190 | } | ||
191 | down_read(¤t->mm->mmap_sem); | ||
192 | ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0, | ||
193 | pages, NULL); | ||
194 | up_read(¤t->mm->mmap_sem); | ||
195 | if (ret != nr_pages) { | ||
196 | if (ret > 0) | ||
197 | nr_pages = ret; | ||
198 | else | ||
199 | nr_pages = 0; | ||
200 | ret = -EAGAIN; /* XXX ? */ | ||
201 | goto out; | ||
202 | } | ||
203 | |||
204 | rdsdebug("len %d nr_pages %lu\n", len, nr_pages); | ||
205 | |||
206 | call_func: | ||
207 | func = rds_info_funcs[optname - RDS_INFO_FIRST]; | ||
208 | if (func == NULL) { | ||
209 | ret = -ENOPROTOOPT; | ||
210 | goto out; | ||
211 | } | ||
212 | |||
213 | iter.pages = pages; | ||
214 | iter.addr = NULL; | ||
215 | iter.offset = start & (PAGE_SIZE - 1); | ||
216 | |||
217 | func(sock, len, &iter, &lens); | ||
218 | BUG_ON(lens.each == 0); | ||
219 | |||
220 | total = lens.nr * lens.each; | ||
221 | |||
222 | rds_info_iter_unmap(&iter); | ||
223 | |||
224 | if (total > len) { | ||
225 | len = total; | ||
226 | ret = -ENOSPC; | ||
227 | } else { | ||
228 | len = total; | ||
229 | ret = lens.each; | ||
230 | } | ||
231 | |||
232 | if (put_user(len, optlen)) | ||
233 | ret = -EFAULT; | ||
234 | |||
235 | out: | ||
236 | for (i = 0; pages != NULL && i < nr_pages; i++) | ||
237 | put_page(pages[i]); | ||
238 | kfree(pages); | ||
239 | |||
240 | return ret; | ||
241 | } | ||
diff --git a/net/rds/info.h b/net/rds/info.h new file mode 100644 index 000000000000..b6c052ca7d22 --- /dev/null +++ b/net/rds/info.h | |||
@@ -0,0 +1,30 @@ | |||
1 | #ifndef _RDS_INFO_H | ||
2 | #define _RDS_INFO_H | ||
3 | |||
4 | struct rds_info_lengths { | ||
5 | unsigned int nr; | ||
6 | unsigned int each; | ||
7 | }; | ||
8 | |||
9 | struct rds_info_iterator; | ||
10 | |||
11 | /* | ||
12 | * These functions must fill in the fields of @lens to reflect the size | ||
13 | * of the available info source. If the snapshot fits in @len then it | ||
14 | * should be copied using @iter. The caller will deduce if it was copied | ||
15 | * or not by comparing the lengths. | ||
16 | */ | ||
17 | typedef void (*rds_info_func)(struct socket *sock, unsigned int len, | ||
18 | struct rds_info_iterator *iter, | ||
19 | struct rds_info_lengths *lens); | ||
20 | |||
21 | void rds_info_register_func(int optname, rds_info_func func); | ||
22 | void rds_info_deregister_func(int optname, rds_info_func func); | ||
23 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | ||
24 | int __user *optlen); | ||
25 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | ||
26 | unsigned long bytes); | ||
27 | void rds_info_iter_unmap(struct rds_info_iterator *iter); | ||
28 | |||
29 | |||
30 | #endif | ||
diff --git a/net/rds/stats.c b/net/rds/stats.c new file mode 100644 index 000000000000..637146893cf3 --- /dev/null +++ b/net/rds/stats.c | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | |||
39 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); | ||
40 | |||
41 | /* :.,$s/unsigned long\>.*\<s_\(.*\);/"\1",/g */ | ||
42 | |||
43 | static char *rds_stat_names[] = { | ||
44 | "conn_reset", | ||
45 | "recv_drop_bad_checksum", | ||
46 | "recv_drop_old_seq", | ||
47 | "recv_drop_no_sock", | ||
48 | "recv_drop_dead_sock", | ||
49 | "recv_deliver_raced", | ||
50 | "recv_delivered", | ||
51 | "recv_queued", | ||
52 | "recv_immediate_retry", | ||
53 | "recv_delayed_retry", | ||
54 | "recv_ack_required", | ||
55 | "recv_rdma_bytes", | ||
56 | "recv_ping", | ||
57 | "send_queue_empty", | ||
58 | "send_queue_full", | ||
59 | "send_sem_contention", | ||
60 | "send_sem_queue_raced", | ||
61 | "send_immediate_retry", | ||
62 | "send_delayed_retry", | ||
63 | "send_drop_acked", | ||
64 | "send_ack_required", | ||
65 | "send_queued", | ||
66 | "send_rdma", | ||
67 | "send_rdma_bytes", | ||
68 | "send_pong", | ||
69 | "page_remainder_hit", | ||
70 | "page_remainder_miss", | ||
71 | "copy_to_user", | ||
72 | "copy_from_user", | ||
73 | "cong_update_queued", | ||
74 | "cong_update_received", | ||
75 | "cong_send_error", | ||
76 | "cong_send_blocked", | ||
77 | }; | ||
78 | |||
79 | void rds_stats_info_copy(struct rds_info_iterator *iter, | ||
80 | uint64_t *values, char **names, size_t nr) | ||
81 | { | ||
82 | struct rds_info_counter ctr; | ||
83 | size_t i; | ||
84 | |||
85 | for (i = 0; i < nr; i++) { | ||
86 | BUG_ON(strlen(names[i]) >= sizeof(ctr.name)); | ||
87 | strncpy(ctr.name, names[i], sizeof(ctr.name) - 1); | ||
88 | ctr.value = values[i]; | ||
89 | |||
90 | rds_info_copy(iter, &ctr, sizeof(ctr)); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * This gives global counters across all the transports. The strings | ||
96 | * are copied in so that the tool doesn't need knowledge of the specific | ||
97 | * stats that we're exporting. Some are pretty implementation dependent | ||
98 | * and may change over time. That doesn't stop them from being useful. | ||
99 | * | ||
100 | * This is the only function in the chain that knows about the byte granular | ||
101 | * length in userspace. It converts it to number of stat entries that the | ||
102 | * rest of the functions operate in. | ||
103 | */ | ||
104 | static void rds_stats_info(struct socket *sock, unsigned int len, | ||
105 | struct rds_info_iterator *iter, | ||
106 | struct rds_info_lengths *lens) | ||
107 | { | ||
108 | struct rds_statistics stats = {0, }; | ||
109 | uint64_t *src; | ||
110 | uint64_t *sum; | ||
111 | size_t i; | ||
112 | int cpu; | ||
113 | unsigned int avail; | ||
114 | |||
115 | avail = len / sizeof(struct rds_info_counter); | ||
116 | |||
117 | if (avail < ARRAY_SIZE(rds_stat_names)) { | ||
118 | avail = 0; | ||
119 | goto trans; | ||
120 | } | ||
121 | |||
122 | for_each_online_cpu(cpu) { | ||
123 | src = (uint64_t *)&(per_cpu(rds_stats, cpu)); | ||
124 | sum = (uint64_t *)&stats; | ||
125 | for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) | ||
126 | *(sum++) += *(src++); | ||
127 | } | ||
128 | |||
129 | rds_stats_info_copy(iter, (uint64_t *)&stats, rds_stat_names, | ||
130 | ARRAY_SIZE(rds_stat_names)); | ||
131 | avail -= ARRAY_SIZE(rds_stat_names); | ||
132 | |||
133 | trans: | ||
134 | lens->each = sizeof(struct rds_info_counter); | ||
135 | lens->nr = rds_trans_stats_info_copy(iter, avail) + | ||
136 | ARRAY_SIZE(rds_stat_names); | ||
137 | } | ||
138 | |||
139 | void rds_stats_exit(void) | ||
140 | { | ||
141 | rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); | ||
142 | } | ||
143 | |||
144 | int __init rds_stats_init(void) | ||
145 | { | ||
146 | rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); | ||
147 | return 0; | ||
148 | } | ||