diff options
-rw-r--r-- | net/ipv4/tcp_probe.c | 194 |
1 files changed, 124 insertions, 70 deletions
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d9323dfff826..86624fabc4bf 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -6,8 +6,7 @@ | |||
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or | 9 | * the Free Software Foundation; either version 2 of the License. |
10 | * (at your option) any later version. | ||
11 | * | 10 | * |
12 | * This program is distributed in the hope that it will be useful, | 11 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
@@ -25,23 +24,22 @@ | |||
25 | #include <linux/tcp.h> | 24 | #include <linux/tcp.h> |
26 | #include <linux/proc_fs.h> | 25 | #include <linux/proc_fs.h> |
27 | #include <linux/module.h> | 26 | #include <linux/module.h> |
28 | #include <linux/kfifo.h> | ||
29 | #include <linux/ktime.h> | 27 | #include <linux/ktime.h> |
30 | #include <linux/time.h> | 28 | #include <linux/time.h> |
31 | #include <linux/vmalloc.h> | ||
32 | 29 | ||
33 | #include <net/tcp.h> | 30 | #include <net/tcp.h> |
34 | 31 | ||
35 | MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); | 32 | MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); |
36 | MODULE_DESCRIPTION("TCP cwnd snooper"); | 33 | MODULE_DESCRIPTION("TCP cwnd snooper"); |
37 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
35 | MODULE_VERSION("1.1"); | ||
38 | 36 | ||
39 | static int port __read_mostly = 0; | 37 | static int port __read_mostly = 0; |
40 | MODULE_PARM_DESC(port, "Port to match (0=all)"); | 38 | MODULE_PARM_DESC(port, "Port to match (0=all)"); |
41 | module_param(port, int, 0); | 39 | module_param(port, int, 0); |
42 | 40 | ||
43 | static int bufsize __read_mostly = 64*1024; | 41 | static int bufsize __read_mostly = 4096; |
44 | MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); | 42 | MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); |
45 | module_param(bufsize, int, 0); | 43 | module_param(bufsize, int, 0); |
46 | 44 | ||
47 | static int full __read_mostly; | 45 | static int full __read_mostly; |
@@ -50,39 +48,38 @@ module_param(full, int, 0); | |||
50 | 48 | ||
51 | static const char procname[] = "tcpprobe"; | 49 | static const char procname[] = "tcpprobe"; |
52 | 50 | ||
53 | struct { | 51 | struct tcp_log { |
54 | struct kfifo *fifo; | 52 | ktime_t tstamp; |
53 | __be32 saddr, daddr; | ||
54 | __be16 sport, dport; | ||
55 | u16 length; | ||
56 | u32 snd_nxt; | ||
57 | u32 snd_una; | ||
58 | u32 snd_wnd; | ||
59 | u32 snd_cwnd; | ||
60 | u32 ssthresh; | ||
61 | u32 srtt; | ||
62 | }; | ||
63 | |||
64 | static struct { | ||
55 | spinlock_t lock; | 65 | spinlock_t lock; |
56 | wait_queue_head_t wait; | 66 | wait_queue_head_t wait; |
57 | ktime_t start; | 67 | ktime_t start; |
58 | u32 lastcwnd; | 68 | u32 lastcwnd; |
59 | } tcpw; | ||
60 | 69 | ||
61 | /* | 70 | unsigned long head, tail; |
62 | * Print to log with timestamps. | 71 | struct tcp_log *log; |
63 | * FIXME: causes an extra copy | 72 | } tcp_probe; |
64 | */ | 73 | |
65 | static void printl(const char *fmt, ...) | ||
66 | __attribute__ ((format (printf, 1, 2))); | ||
67 | 74 | ||
68 | static void printl(const char *fmt, ...) | 75 | static inline int tcp_probe_used(void) |
69 | { | 76 | { |
70 | va_list args; | 77 | return (tcp_probe.head - tcp_probe.tail) % bufsize; |
71 | int len; | 78 | } |
72 | struct timespec tv; | 79 | |
73 | char tbuf[256]; | 80 | static inline int tcp_probe_avail(void) |
74 | 81 | { | |
75 | va_start(args, fmt); | 82 | return bufsize - tcp_probe_used(); |
76 | /* want monotonic time since start of tcp_probe */ | ||
77 | tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start)); | ||
78 | |||
79 | len = sprintf(tbuf, "%lu.%09lu ", | ||
80 | (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec); | ||
81 | len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); | ||
82 | va_end(args); | ||
83 | |||
84 | kfifo_put(tcpw.fifo, tbuf, len); | ||
85 | wake_up(&tcpw.wait); | ||
86 | } | 83 | } |
87 | 84 | ||
88 | /* | 85 | /* |
@@ -97,63 +94,117 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
97 | 94 | ||
98 | /* Only update if port matches */ | 95 | /* Only update if port matches */ |
99 | if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) | 96 | if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) |
100 | && (full || tp->snd_cwnd != tcpw.lastcwnd)) { | 97 | && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { |
101 | printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u %u\n", | 98 | |
102 | NIPQUAD(inet->saddr), ntohs(inet->sport), | 99 | spin_lock(&tcp_probe.lock); |
103 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 100 | /* If log fills, just silently drop */ |
104 | skb->len, tp->snd_nxt, tp->snd_una, | 101 | if (tcp_probe_avail() > 1) { |
105 | tp->snd_cwnd, tcp_current_ssthresh(sk), | 102 | struct tcp_log *p = tcp_probe.log + tcp_probe.head; |
106 | tp->snd_wnd, tp->srtt >> 3); | 103 | |
107 | tcpw.lastcwnd = tp->snd_cwnd; | 104 | p->tstamp = ktime_get(); |
105 | p->saddr = inet->saddr; | ||
106 | p->sport = inet->sport; | ||
107 | p->daddr = inet->daddr; | ||
108 | p->dport = inet->dport; | ||
109 | p->length = skb->len; | ||
110 | p->snd_nxt = tp->snd_nxt; | ||
111 | p->snd_una = tp->snd_una; | ||
112 | p->snd_cwnd = tp->snd_cwnd; | ||
113 | p->snd_wnd = tp->snd_wnd; | ||
114 | p->srtt = tp->srtt >> 3; | ||
115 | |||
116 | tcp_probe.head = (tcp_probe.head + 1) % bufsize; | ||
117 | } | ||
118 | tcp_probe.lastcwnd = tp->snd_cwnd; | ||
119 | spin_unlock(&tcp_probe.lock); | ||
120 | |||
121 | wake_up(&tcp_probe.wait); | ||
108 | } | 122 | } |
109 | 123 | ||
110 | jprobe_return(); | 124 | jprobe_return(); |
111 | return 0; | 125 | return 0; |
112 | } | 126 | } |
113 | 127 | ||
114 | static struct jprobe tcp_probe = { | 128 | static struct jprobe tcp_jprobe = { |
115 | .kp = { | 129 | .kp = { |
116 | .symbol_name = "tcp_rcv_established", | 130 | .symbol_name = "tcp_rcv_established", |
117 | }, | 131 | }, |
118 | .entry = JPROBE_ENTRY(jtcp_rcv_established), | 132 | .entry = JPROBE_ENTRY(jtcp_rcv_established), |
119 | }; | 133 | }; |
120 | 134 | ||
121 | |||
122 | static int tcpprobe_open(struct inode * inode, struct file * file) | 135 | static int tcpprobe_open(struct inode * inode, struct file * file) |
123 | { | 136 | { |
124 | kfifo_reset(tcpw.fifo); | 137 | /* Reset (empty) log */ |
125 | tcpw.start = ktime_get(); | 138 | spin_lock_bh(&tcp_probe.lock); |
139 | tcp_probe.head = tcp_probe.tail = 0; | ||
140 | tcp_probe.start = ktime_get(); | ||
141 | spin_unlock_bh(&tcp_probe.lock); | ||
142 | |||
126 | return 0; | 143 | return 0; |
127 | } | 144 | } |
128 | 145 | ||
146 | static int tcpprobe_sprint(char *tbuf, int n) | ||
147 | { | ||
148 | const struct tcp_log *p | ||
149 | = tcp_probe.log + tcp_probe.tail % bufsize; | ||
150 | struct timespec tv | ||
151 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); | ||
152 | |||
153 | return snprintf(tbuf, n, | ||
154 | "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u" | ||
155 | " %d %#x %#x %u %u %u %u\n", | ||
156 | (unsigned long) tv.tv_sec, | ||
157 | (unsigned long) tv.tv_nsec, | ||
158 | NIPQUAD(p->saddr), ntohs(p->sport), | ||
159 | NIPQUAD(p->daddr), ntohs(p->dport), | ||
160 | p->length, p->snd_nxt, p->snd_una, | ||
161 | p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); | ||
162 | } | ||
163 | |||
129 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, | 164 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, |
130 | size_t len, loff_t *ppos) | 165 | size_t len, loff_t *ppos) |
131 | { | 166 | { |
132 | int error = 0, cnt = 0; | 167 | int error = 0, cnt = 0; |
133 | unsigned char *tbuf; | ||
134 | 168 | ||
135 | if (!buf || len < 0) | 169 | if (!buf || len < 0) |
136 | return -EINVAL; | 170 | return -EINVAL; |
137 | 171 | ||
138 | if (len == 0) | 172 | while (cnt < len) { |
139 | return 0; | 173 | char tbuf[128]; |
174 | int width; | ||
175 | |||
176 | /* Wait for data in buffer */ | ||
177 | error = wait_event_interruptible(tcp_probe.wait, | ||
178 | tcp_probe_used() > 0); | ||
179 | if (error) | ||
180 | break; | ||
140 | 181 | ||
141 | tbuf = vmalloc(len); | 182 | spin_lock_bh(&tcp_probe.lock); |
142 | if (!tbuf) | 183 | if (tcp_probe.head == tcp_probe.tail) { |
143 | return -ENOMEM; | 184 | /* multiple readers race? */ |
185 | spin_unlock_bh(&tcp_probe.lock); | ||
186 | continue; | ||
187 | } | ||
144 | 188 | ||
145 | error = wait_event_interruptible(tcpw.wait, | 189 | width = tcpprobe_sprint(tbuf, sizeof(tbuf)); |
146 | __kfifo_len(tcpw.fifo) != 0); | ||
147 | if (error) | ||
148 | goto out_free; | ||
149 | 190 | ||
150 | cnt = kfifo_get(tcpw.fifo, tbuf, len); | 191 | if (width < len) |
151 | error = copy_to_user(buf, tbuf, cnt); | 192 | tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; |
152 | 193 | ||
153 | out_free: | 194 | spin_unlock_bh(&tcp_probe.lock); |
154 | vfree(tbuf); | 195 | |
196 | /* if record greater than space available | ||
197 | return partial buffer (so far) */ | ||
198 | if (width >= len) | ||
199 | break; | ||
200 | |||
201 | error = copy_to_user(buf + cnt, tbuf, width); | ||
202 | if (error) | ||
203 | break; | ||
204 | cnt += width; | ||
205 | } | ||
155 | 206 | ||
156 | return error ? error : cnt; | 207 | return cnt == 0 ? error : cnt; |
157 | } | 208 | } |
158 | 209 | ||
159 | static const struct file_operations tcpprobe_fops = { | 210 | static const struct file_operations tcpprobe_fops = { |
@@ -166,34 +217,37 @@ static __init int tcpprobe_init(void) | |||
166 | { | 217 | { |
167 | int ret = -ENOMEM; | 218 | int ret = -ENOMEM; |
168 | 219 | ||
169 | init_waitqueue_head(&tcpw.wait); | 220 | init_waitqueue_head(&tcp_probe.wait); |
170 | spin_lock_init(&tcpw.lock); | 221 | spin_lock_init(&tcp_probe.lock); |
171 | tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock); | 222 | |
172 | if (IS_ERR(tcpw.fifo)) | 223 | if (bufsize < 0) |
173 | return PTR_ERR(tcpw.fifo); | 224 | return -EINVAL; |
225 | |||
226 | tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); | ||
227 | if (!tcp_probe.log) | ||
228 | goto err0; | ||
174 | 229 | ||
175 | if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) | 230 | if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) |
176 | goto err0; | 231 | goto err0; |
177 | 232 | ||
178 | ret = register_jprobe(&tcp_probe); | 233 | ret = register_jprobe(&tcp_jprobe); |
179 | if (ret) | 234 | if (ret) |
180 | goto err1; | 235 | goto err1; |
181 | 236 | ||
182 | pr_info("TCP watch registered (port=%d)\n", port); | 237 | pr_info("TCP probe registered (port=%d)\n", port); |
183 | return 0; | 238 | return 0; |
184 | err1: | 239 | err1: |
185 | proc_net_remove(procname); | 240 | proc_net_remove(procname); |
186 | err0: | 241 | err0: |
187 | kfifo_free(tcpw.fifo); | 242 | kfree(tcp_probe.log); |
188 | return ret; | 243 | return ret; |
189 | } | 244 | } |
190 | module_init(tcpprobe_init); | 245 | module_init(tcpprobe_init); |
191 | 246 | ||
192 | static __exit void tcpprobe_exit(void) | 247 | static __exit void tcpprobe_exit(void) |
193 | { | 248 | { |
194 | kfifo_free(tcpw.fifo); | ||
195 | proc_net_remove(procname); | 249 | proc_net_remove(procname); |
196 | unregister_jprobe(&tcp_probe); | 250 | unregister_jprobe(&tcp_jprobe); |
197 | 251 | kfree(tcp_probe.log); | |
198 | } | 252 | } |
199 | module_exit(tcpprobe_exit); | 253 | module_exit(tcpprobe_exit); |