diff options
author | Stephen Hemminger <shemminger@linux-foundation.org> | 2007-07-11 22:43:52 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-07-11 22:45:39 -0400 |
commit | 662ad4f8efd3ba2ed710d36003f968b500e6f123 (patch) | |
tree | 925ca3708b8e4d7c958ef8aec45014caa6f0c550 /net | |
parent | 0e06877c6fdbc67b1132be895f995acd1ff30135 (diff) |
[TCP]: tcp probe wraparound handling and other changes
Switch from formatting messages in probe routine and copying with
kfifo, to using a small circular queue of information and formatting
on read. This avoids wraparound issues with kfifo, and saves one
copy.
Also make sure to state correct license, rather than copying off some
other driver I started with.
Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/tcp_probe.c | 194 |
1 files changed, 124 insertions, 70 deletions
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d9323dfff826..86624fabc4bf 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -6,8 +6,7 @@ | |||
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or | 9 | * the Free Software Foundation; either version 2 of the License. |
10 | * (at your option) any later version. | ||
11 | * | 10 | * |
12 | * This program is distributed in the hope that it will be useful, | 11 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
@@ -25,23 +24,22 @@ | |||
25 | #include <linux/tcp.h> | 24 | #include <linux/tcp.h> |
26 | #include <linux/proc_fs.h> | 25 | #include <linux/proc_fs.h> |
27 | #include <linux/module.h> | 26 | #include <linux/module.h> |
28 | #include <linux/kfifo.h> | ||
29 | #include <linux/ktime.h> | 27 | #include <linux/ktime.h> |
30 | #include <linux/time.h> | 28 | #include <linux/time.h> |
31 | #include <linux/vmalloc.h> | ||
32 | 29 | ||
33 | #include <net/tcp.h> | 30 | #include <net/tcp.h> |
34 | 31 | ||
35 | MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); | 32 | MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); |
36 | MODULE_DESCRIPTION("TCP cwnd snooper"); | 33 | MODULE_DESCRIPTION("TCP cwnd snooper"); |
37 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
35 | MODULE_VERSION("1.1"); | ||
38 | 36 | ||
39 | static int port __read_mostly = 0; | 37 | static int port __read_mostly = 0; |
40 | MODULE_PARM_DESC(port, "Port to match (0=all)"); | 38 | MODULE_PARM_DESC(port, "Port to match (0=all)"); |
41 | module_param(port, int, 0); | 39 | module_param(port, int, 0); |
42 | 40 | ||
43 | static int bufsize __read_mostly = 64*1024; | 41 | static int bufsize __read_mostly = 4096; |
44 | MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); | 42 | MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); |
45 | module_param(bufsize, int, 0); | 43 | module_param(bufsize, int, 0); |
46 | 44 | ||
47 | static int full __read_mostly; | 45 | static int full __read_mostly; |
@@ -50,39 +48,38 @@ module_param(full, int, 0); | |||
50 | 48 | ||
51 | static const char procname[] = "tcpprobe"; | 49 | static const char procname[] = "tcpprobe"; |
52 | 50 | ||
53 | struct { | 51 | struct tcp_log { |
54 | struct kfifo *fifo; | 52 | ktime_t tstamp; |
53 | __be32 saddr, daddr; | ||
54 | __be16 sport, dport; | ||
55 | u16 length; | ||
56 | u32 snd_nxt; | ||
57 | u32 snd_una; | ||
58 | u32 snd_wnd; | ||
59 | u32 snd_cwnd; | ||
60 | u32 ssthresh; | ||
61 | u32 srtt; | ||
62 | }; | ||
63 | |||
64 | static struct { | ||
55 | spinlock_t lock; | 65 | spinlock_t lock; |
56 | wait_queue_head_t wait; | 66 | wait_queue_head_t wait; |
57 | ktime_t start; | 67 | ktime_t start; |
58 | u32 lastcwnd; | 68 | u32 lastcwnd; |
59 | } tcpw; | ||
60 | 69 | ||
61 | /* | 70 | unsigned long head, tail; |
62 | * Print to log with timestamps. | 71 | struct tcp_log *log; |
63 | * FIXME: causes an extra copy | 72 | } tcp_probe; |
64 | */ | 73 | |
65 | static void printl(const char *fmt, ...) | ||
66 | __attribute__ ((format (printf, 1, 2))); | ||
67 | 74 | ||
68 | static void printl(const char *fmt, ...) | 75 | static inline int tcp_probe_used(void) |
69 | { | 76 | { |
70 | va_list args; | 77 | return (tcp_probe.head - tcp_probe.tail) % bufsize; |
71 | int len; | 78 | } |
72 | struct timespec tv; | 79 | |
73 | char tbuf[256]; | 80 | static inline int tcp_probe_avail(void) |
74 | 81 | { | |
75 | va_start(args, fmt); | 82 | return bufsize - tcp_probe_used(); |
76 | /* want monotonic time since start of tcp_probe */ | ||
77 | tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start)); | ||
78 | |||
79 | len = sprintf(tbuf, "%lu.%09lu ", | ||
80 | (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec); | ||
81 | len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); | ||
82 | va_end(args); | ||
83 | |||
84 | kfifo_put(tcpw.fifo, tbuf, len); | ||
85 | wake_up(&tcpw.wait); | ||
86 | } | 83 | } |
87 | 84 | ||
88 | /* | 85 | /* |
@@ -97,63 +94,117 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
97 | 94 | ||
98 | /* Only update if port matches */ | 95 | /* Only update if port matches */ |
99 | if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) | 96 | if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) |
100 | && (full || tp->snd_cwnd != tcpw.lastcwnd)) { | 97 | && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { |
101 | printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u %u\n", | 98 | |
102 | NIPQUAD(inet->saddr), ntohs(inet->sport), | 99 | spin_lock(&tcp_probe.lock); |
103 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 100 | /* If log fills, just silently drop */ |
104 | skb->len, tp->snd_nxt, tp->snd_una, | 101 | if (tcp_probe_avail() > 1) { |
105 | tp->snd_cwnd, tcp_current_ssthresh(sk), | 102 | struct tcp_log *p = tcp_probe.log + tcp_probe.head; |
106 | tp->snd_wnd, tp->srtt >> 3); | 103 | |
107 | tcpw.lastcwnd = tp->snd_cwnd; | 104 | p->tstamp = ktime_get(); |
105 | p->saddr = inet->saddr; | ||
106 | p->sport = inet->sport; | ||
107 | p->daddr = inet->daddr; | ||
108 | p->dport = inet->dport; | ||
109 | p->length = skb->len; | ||
110 | p->snd_nxt = tp->snd_nxt; | ||
111 | p->snd_una = tp->snd_una; | ||
112 | p->snd_cwnd = tp->snd_cwnd; | ||
113 | p->snd_wnd = tp->snd_wnd; | ||
114 | p->srtt = tp->srtt >> 3; | ||
115 | |||
116 | tcp_probe.head = (tcp_probe.head + 1) % bufsize; | ||
117 | } | ||
118 | tcp_probe.lastcwnd = tp->snd_cwnd; | ||
119 | spin_unlock(&tcp_probe.lock); | ||
120 | |||
121 | wake_up(&tcp_probe.wait); | ||
108 | } | 122 | } |
109 | 123 | ||
110 | jprobe_return(); | 124 | jprobe_return(); |
111 | return 0; | 125 | return 0; |
112 | } | 126 | } |
113 | 127 | ||
114 | static struct jprobe tcp_probe = { | 128 | static struct jprobe tcp_jprobe = { |
115 | .kp = { | 129 | .kp = { |
116 | .symbol_name = "tcp_rcv_established", | 130 | .symbol_name = "tcp_rcv_established", |
117 | }, | 131 | }, |
118 | .entry = JPROBE_ENTRY(jtcp_rcv_established), | 132 | .entry = JPROBE_ENTRY(jtcp_rcv_established), |
119 | }; | 133 | }; |
120 | 134 | ||
121 | |||
122 | static int tcpprobe_open(struct inode * inode, struct file * file) | 135 | static int tcpprobe_open(struct inode * inode, struct file * file) |
123 | { | 136 | { |
124 | kfifo_reset(tcpw.fifo); | 137 | /* Reset (empty) log */ |
125 | tcpw.start = ktime_get(); | 138 | spin_lock_bh(&tcp_probe.lock); |
139 | tcp_probe.head = tcp_probe.tail = 0; | ||
140 | tcp_probe.start = ktime_get(); | ||
141 | spin_unlock_bh(&tcp_probe.lock); | ||
142 | |||
126 | return 0; | 143 | return 0; |
127 | } | 144 | } |
128 | 145 | ||
146 | static int tcpprobe_sprint(char *tbuf, int n) | ||
147 | { | ||
148 | const struct tcp_log *p | ||
149 | = tcp_probe.log + tcp_probe.tail % bufsize; | ||
150 | struct timespec tv | ||
151 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); | ||
152 | |||
153 | return snprintf(tbuf, n, | ||
154 | "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u" | ||
155 | " %d %#x %#x %u %u %u %u\n", | ||
156 | (unsigned long) tv.tv_sec, | ||
157 | (unsigned long) tv.tv_nsec, | ||
158 | NIPQUAD(p->saddr), ntohs(p->sport), | ||
159 | NIPQUAD(p->daddr), ntohs(p->dport), | ||
160 | p->length, p->snd_nxt, p->snd_una, | ||
161 | p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); | ||
162 | } | ||
163 | |||
129 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, | 164 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, |
130 | size_t len, loff_t *ppos) | 165 | size_t len, loff_t *ppos) |
131 | { | 166 | { |
132 | int error = 0, cnt = 0; | 167 | int error = 0, cnt = 0; |
133 | unsigned char *tbuf; | ||
134 | 168 | ||
135 | if (!buf || len < 0) | 169 | if (!buf || len < 0) |
136 | return -EINVAL; | 170 | return -EINVAL; |
137 | 171 | ||
138 | if (len == 0) | 172 | while (cnt < len) { |
139 | return 0; | 173 | char tbuf[128]; |
174 | int width; | ||
175 | |||
176 | /* Wait for data in buffer */ | ||
177 | error = wait_event_interruptible(tcp_probe.wait, | ||
178 | tcp_probe_used() > 0); | ||
179 | if (error) | ||
180 | break; | ||
140 | 181 | ||
141 | tbuf = vmalloc(len); | 182 | spin_lock_bh(&tcp_probe.lock); |
142 | if (!tbuf) | 183 | if (tcp_probe.head == tcp_probe.tail) { |
143 | return -ENOMEM; | 184 | /* multiple readers race? */ |
185 | spin_unlock_bh(&tcp_probe.lock); | ||
186 | continue; | ||
187 | } | ||
144 | 188 | ||
145 | error = wait_event_interruptible(tcpw.wait, | 189 | width = tcpprobe_sprint(tbuf, sizeof(tbuf)); |
146 | __kfifo_len(tcpw.fifo) != 0); | ||
147 | if (error) | ||
148 | goto out_free; | ||
149 | 190 | ||
150 | cnt = kfifo_get(tcpw.fifo, tbuf, len); | 191 | if (width < len) |
151 | error = copy_to_user(buf, tbuf, cnt); | 192 | tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; |
152 | 193 | ||
153 | out_free: | 194 | spin_unlock_bh(&tcp_probe.lock); |
154 | vfree(tbuf); | 195 | |
196 | /* if record greater than space available | ||
197 | return partial buffer (so far) */ | ||
198 | if (width >= len) | ||
199 | break; | ||
200 | |||
201 | error = copy_to_user(buf + cnt, tbuf, width); | ||
202 | if (error) | ||
203 | break; | ||
204 | cnt += width; | ||
205 | } | ||
155 | 206 | ||
156 | return error ? error : cnt; | 207 | return cnt == 0 ? error : cnt; |
157 | } | 208 | } |
158 | 209 | ||
159 | static const struct file_operations tcpprobe_fops = { | 210 | static const struct file_operations tcpprobe_fops = { |
@@ -166,34 +217,37 @@ static __init int tcpprobe_init(void) | |||
166 | { | 217 | { |
167 | int ret = -ENOMEM; | 218 | int ret = -ENOMEM; |
168 | 219 | ||
169 | init_waitqueue_head(&tcpw.wait); | 220 | init_waitqueue_head(&tcp_probe.wait); |
170 | spin_lock_init(&tcpw.lock); | 221 | spin_lock_init(&tcp_probe.lock); |
171 | tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock); | 222 | |
172 | if (IS_ERR(tcpw.fifo)) | 223 | if (bufsize < 0) |
173 | return PTR_ERR(tcpw.fifo); | 224 | return -EINVAL; |
225 | |||
226 | tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); | ||
227 | if (!tcp_probe.log) | ||
228 | goto err0; | ||
174 | 229 | ||
175 | if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) | 230 | if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) |
176 | goto err0; | 231 | goto err0; |
177 | 232 | ||
178 | ret = register_jprobe(&tcp_probe); | 233 | ret = register_jprobe(&tcp_jprobe); |
179 | if (ret) | 234 | if (ret) |
180 | goto err1; | 235 | goto err1; |
181 | 236 | ||
182 | pr_info("TCP watch registered (port=%d)\n", port); | 237 | pr_info("TCP probe registered (port=%d)\n", port); |
183 | return 0; | 238 | return 0; |
184 | err1: | 239 | err1: |
185 | proc_net_remove(procname); | 240 | proc_net_remove(procname); |
186 | err0: | 241 | err0: |
187 | kfifo_free(tcpw.fifo); | 242 | kfree(tcp_probe.log); |
188 | return ret; | 243 | return ret; |
189 | } | 244 | } |
190 | module_init(tcpprobe_init); | 245 | module_init(tcpprobe_init); |
191 | 246 | ||
192 | static __exit void tcpprobe_exit(void) | 247 | static __exit void tcpprobe_exit(void) |
193 | { | 248 | { |
194 | kfifo_free(tcpw.fifo); | ||
195 | proc_net_remove(procname); | 249 | proc_net_remove(procname); |
196 | unregister_jprobe(&tcp_probe); | 250 | unregister_jprobe(&tcp_jprobe); |
197 | 251 | kfree(tcp_probe.log); | |
198 | } | 252 | } |
199 | module_exit(tcpprobe_exit); | 253 | module_exit(tcpprobe_exit); |