diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 17 | ||||
-rw-r--r-- | net/core/datagram.c | 482 | ||||
-rw-r--r-- | net/core/dev.c | 3359 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 299 | ||||
-rw-r--r-- | net/core/dst.c | 276 | ||||
-rw-r--r-- | net/core/dv.c | 548 | ||||
-rw-r--r-- | net/core/ethtool.c | 819 | ||||
-rw-r--r-- | net/core/filter.c | 432 | ||||
-rw-r--r-- | net/core/flow.c | 371 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 250 | ||||
-rw-r--r-- | net/core/gen_stats.c | 239 | ||||
-rw-r--r-- | net/core/iovec.c | 239 | ||||
-rw-r--r-- | net/core/link_watch.c | 137 | ||||
-rw-r--r-- | net/core/neighbour.c | 2362 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 461 | ||||
-rw-r--r-- | net/core/netfilter.c | 799 | ||||
-rw-r--r-- | net/core/netpoll.c | 735 | ||||
-rw-r--r-- | net/core/pktgen.c | 3132 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 711 | ||||
-rw-r--r-- | net/core/scm.c | 291 | ||||
-rw-r--r-- | net/core/skbuff.c | 1460 | ||||
-rw-r--r-- | net/core/sock.c | 1565 | ||||
-rw-r--r-- | net/core/stream.c | 287 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 182 | ||||
-rw-r--r-- | net/core/utils.c | 155 | ||||
-rw-r--r-- | net/core/wireless.c | 1459 |
26 files changed, 21067 insertions, 0 deletions
diff --git a/net/core/Makefile b/net/core/Makefile new file mode 100644 index 000000000000..81f03243fe2f --- /dev/null +++ b/net/core/Makefile | |||
@@ -0,0 +1,17 @@ | |||
1 | # | ||
2 | # Makefile for the Linux networking core. | ||
3 | # | ||
4 | |||
5 | obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o | ||
6 | |||
7 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | ||
8 | |||
9 | obj-y += flow.o dev.o ethtool.o dev_mcast.o dst.o \ | ||
10 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o | ||
11 | |||
12 | obj-$(CONFIG_SYSFS) += net-sysfs.o | ||
13 | obj-$(CONFIG_NETFILTER) += netfilter.o | ||
14 | obj-$(CONFIG_NET_DIVERT) += dv.o | ||
15 | obj-$(CONFIG_NET_PKTGEN) += pktgen.o | ||
16 | obj-$(CONFIG_NET_RADIO) += wireless.o | ||
17 | obj-$(CONFIG_NETPOLL) += netpoll.o | ||
diff --git a/net/core/datagram.c b/net/core/datagram.c new file mode 100644 index 000000000000..d1bfd279cc1a --- /dev/null +++ b/net/core/datagram.c | |||
@@ -0,0 +1,482 @@ | |||
1 | /* | ||
2 | * SUCS NET3: | ||
3 | * | ||
4 | * Generic datagram handling routines. These are generic for all | ||
5 | * protocols. Possibly a generic IP version on top of these would | ||
6 | * make sense. Not tonight however 8-). | ||
7 | * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and | ||
8 | * NetROM layer all have identical poll code and mostly | ||
9 | * identical recvmsg() code. So we share it here. The poll was | ||
10 | * shared before but buried in udp.c so I moved it. | ||
11 | * | ||
12 | * Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old | ||
13 | * udp.c code) | ||
14 | * | ||
15 | * Fixes: | ||
16 | * Alan Cox : NULL return from skb_peek_copy() | ||
17 | * understood | ||
18 | * Alan Cox : Rewrote skb_read_datagram to avoid the | ||
19 | * skb_peek_copy stuff. | ||
20 | * Alan Cox : Added support for SOCK_SEQPACKET. | ||
21 | * IPX can no longer use the SO_TYPE hack | ||
22 | * but AX.25 now works right, and SPX is | ||
23 | * feasible. | ||
24 | * Alan Cox : Fixed write poll of non IP protocol | ||
25 | * crash. | ||
26 | * Florian La Roche: Changed for my new skbuff handling. | ||
27 | * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. | ||
28 | * Linus Torvalds : BSD semantic fixes. | ||
29 | * Alan Cox : Datagram iovec handling | ||
30 | * Darryl Miles : Fixed non-blocking SOCK_STREAM. | ||
31 | * Alan Cox : POSIXisms | ||
32 | * Pete Wyckoff : Unconnected accept() fix. | ||
33 | * | ||
34 | */ | ||
35 | |||
36 | #include <linux/module.h> | ||
37 | #include <linux/types.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <asm/uaccess.h> | ||
40 | #include <asm/system.h> | ||
41 | #include <linux/mm.h> | ||
42 | #include <linux/interrupt.h> | ||
43 | #include <linux/errno.h> | ||
44 | #include <linux/sched.h> | ||
45 | #include <linux/inet.h> | ||
46 | #include <linux/tcp.h> | ||
47 | #include <linux/netdevice.h> | ||
48 | #include <linux/rtnetlink.h> | ||
49 | #include <linux/poll.h> | ||
50 | #include <linux/highmem.h> | ||
51 | |||
52 | #include <net/protocol.h> | ||
53 | #include <linux/skbuff.h> | ||
54 | #include <net/sock.h> | ||
55 | #include <net/checksum.h> | ||
56 | |||
57 | |||
58 | /* | ||
59 | * Is a socket 'connection oriented' ? | ||
60 | */ | ||
61 | static inline int connection_based(struct sock *sk) | ||
62 | { | ||
63 | return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * Wait for a packet.. | ||
68 | */ | ||
69 | static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) | ||
70 | { | ||
71 | int error; | ||
72 | DEFINE_WAIT(wait); | ||
73 | |||
74 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
75 | |||
76 | /* Socket errors? */ | ||
77 | error = sock_error(sk); | ||
78 | if (error) | ||
79 | goto out_err; | ||
80 | |||
81 | if (!skb_queue_empty(&sk->sk_receive_queue)) | ||
82 | goto out; | ||
83 | |||
84 | /* Socket shut down? */ | ||
85 | if (sk->sk_shutdown & RCV_SHUTDOWN) | ||
86 | goto out_noerr; | ||
87 | |||
88 | /* Sequenced packets can come disconnected. | ||
89 | * If so we report the problem | ||
90 | */ | ||
91 | error = -ENOTCONN; | ||
92 | if (connection_based(sk) && | ||
93 | !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) | ||
94 | goto out_err; | ||
95 | |||
96 | /* handle signals */ | ||
97 | if (signal_pending(current)) | ||
98 | goto interrupted; | ||
99 | |||
100 | error = 0; | ||
101 | *timeo_p = schedule_timeout(*timeo_p); | ||
102 | out: | ||
103 | finish_wait(sk->sk_sleep, &wait); | ||
104 | return error; | ||
105 | interrupted: | ||
106 | error = sock_intr_errno(*timeo_p); | ||
107 | out_err: | ||
108 | *err = error; | ||
109 | goto out; | ||
110 | out_noerr: | ||
111 | *err = 0; | ||
112 | error = 1; | ||
113 | goto out; | ||
114 | } | ||
115 | |||
116 | /** | ||
117 | * skb_recv_datagram - Receive a datagram skbuff | ||
118 | * @sk - socket | ||
119 | * @flags - MSG_ flags | ||
120 | * @noblock - blocking operation? | ||
121 | * @err - error code returned | ||
122 | * | ||
123 | * Get a datagram skbuff, understands the peeking, nonblocking wakeups | ||
124 | * and possible races. This replaces identical code in packet, raw and | ||
125 | * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes | ||
126 | * the long standing peek and read race for datagram sockets. If you | ||
127 | * alter this routine remember it must be re-entrant. | ||
128 | * | ||
129 | * This function will lock the socket if a skb is returned, so the caller | ||
130 | * needs to unlock the socket in that case (usually by calling | ||
131 | * skb_free_datagram) | ||
132 | * | ||
133 | * * It does not lock socket since today. This function is | ||
134 | * * free of race conditions. This measure should/can improve | ||
135 | * * significantly datagram socket latencies at high loads, | ||
136 | * * when data copying to user space takes lots of time. | ||
137 | * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet | ||
138 | * * 8) Great win.) | ||
139 | * * --ANK (980729) | ||
140 | * | ||
141 | * The order of the tests when we find no data waiting are specified | ||
142 | * quite explicitly by POSIX 1003.1g, don't change them without having | ||
143 | * the standard around please. | ||
144 | */ | ||
145 | struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, | ||
146 | int noblock, int *err) | ||
147 | { | ||
148 | struct sk_buff *skb; | ||
149 | long timeo; | ||
150 | /* | ||
151 | * Caller is allowed not to check sk->sk_err before skb_recv_datagram() | ||
152 | */ | ||
153 | int error = sock_error(sk); | ||
154 | |||
155 | if (error) | ||
156 | goto no_packet; | ||
157 | |||
158 | timeo = sock_rcvtimeo(sk, noblock); | ||
159 | |||
160 | do { | ||
161 | /* Again only user level code calls this function, so nothing | ||
162 | * interrupt level will suddenly eat the receive_queue. | ||
163 | * | ||
164 | * Look at current nfs client by the way... | ||
165 | * However, this function was corrent in any case. 8) | ||
166 | */ | ||
167 | if (flags & MSG_PEEK) { | ||
168 | unsigned long cpu_flags; | ||
169 | |||
170 | spin_lock_irqsave(&sk->sk_receive_queue.lock, | ||
171 | cpu_flags); | ||
172 | skb = skb_peek(&sk->sk_receive_queue); | ||
173 | if (skb) | ||
174 | atomic_inc(&skb->users); | ||
175 | spin_unlock_irqrestore(&sk->sk_receive_queue.lock, | ||
176 | cpu_flags); | ||
177 | } else | ||
178 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
179 | |||
180 | if (skb) | ||
181 | return skb; | ||
182 | |||
183 | /* User doesn't want to wait */ | ||
184 | error = -EAGAIN; | ||
185 | if (!timeo) | ||
186 | goto no_packet; | ||
187 | |||
188 | } while (!wait_for_packet(sk, err, &timeo)); | ||
189 | |||
190 | return NULL; | ||
191 | |||
192 | no_packet: | ||
193 | *err = error; | ||
194 | return NULL; | ||
195 | } | ||
196 | |||
197 | void skb_free_datagram(struct sock *sk, struct sk_buff *skb) | ||
198 | { | ||
199 | kfree_skb(skb); | ||
200 | } | ||
201 | |||
202 | /** | ||
203 | * skb_copy_datagram_iovec - Copy a datagram to an iovec. | ||
204 | * @skb - buffer to copy | ||
205 | * @offset - offset in the buffer to start copying from | ||
206 | * @iovec - io vector to copy to | ||
207 | * @len - amount of data to copy from buffer to iovec | ||
208 | * | ||
209 | * Note: the iovec is modified during the copy. | ||
210 | */ | ||
211 | int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, | ||
212 | struct iovec *to, int len) | ||
213 | { | ||
214 | int start = skb_headlen(skb); | ||
215 | int i, copy = start - offset; | ||
216 | |||
217 | /* Copy header. */ | ||
218 | if (copy > 0) { | ||
219 | if (copy > len) | ||
220 | copy = len; | ||
221 | if (memcpy_toiovec(to, skb->data + offset, copy)) | ||
222 | goto fault; | ||
223 | if ((len -= copy) == 0) | ||
224 | return 0; | ||
225 | offset += copy; | ||
226 | } | ||
227 | |||
228 | /* Copy paged appendix. Hmm... why does this look so complicated? */ | ||
229 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
230 | int end; | ||
231 | |||
232 | BUG_TRAP(start <= offset + len); | ||
233 | |||
234 | end = start + skb_shinfo(skb)->frags[i].size; | ||
235 | if ((copy = end - offset) > 0) { | ||
236 | int err; | ||
237 | u8 *vaddr; | ||
238 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | ||
239 | struct page *page = frag->page; | ||
240 | |||
241 | if (copy > len) | ||
242 | copy = len; | ||
243 | vaddr = kmap(page); | ||
244 | err = memcpy_toiovec(to, vaddr + frag->page_offset + | ||
245 | offset - start, copy); | ||
246 | kunmap(page); | ||
247 | if (err) | ||
248 | goto fault; | ||
249 | if (!(len -= copy)) | ||
250 | return 0; | ||
251 | offset += copy; | ||
252 | } | ||
253 | start = end; | ||
254 | } | ||
255 | |||
256 | if (skb_shinfo(skb)->frag_list) { | ||
257 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | ||
258 | |||
259 | for (; list; list = list->next) { | ||
260 | int end; | ||
261 | |||
262 | BUG_TRAP(start <= offset + len); | ||
263 | |||
264 | end = start + list->len; | ||
265 | if ((copy = end - offset) > 0) { | ||
266 | if (copy > len) | ||
267 | copy = len; | ||
268 | if (skb_copy_datagram_iovec(list, | ||
269 | offset - start, | ||
270 | to, copy)) | ||
271 | goto fault; | ||
272 | if ((len -= copy) == 0) | ||
273 | return 0; | ||
274 | offset += copy; | ||
275 | } | ||
276 | start = end; | ||
277 | } | ||
278 | } | ||
279 | if (!len) | ||
280 | return 0; | ||
281 | |||
282 | fault: | ||
283 | return -EFAULT; | ||
284 | } | ||
285 | |||
286 | static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, | ||
287 | u8 __user *to, int len, | ||
288 | unsigned int *csump) | ||
289 | { | ||
290 | int start = skb_headlen(skb); | ||
291 | int pos = 0; | ||
292 | int i, copy = start - offset; | ||
293 | |||
294 | /* Copy header. */ | ||
295 | if (copy > 0) { | ||
296 | int err = 0; | ||
297 | if (copy > len) | ||
298 | copy = len; | ||
299 | *csump = csum_and_copy_to_user(skb->data + offset, to, copy, | ||
300 | *csump, &err); | ||
301 | if (err) | ||
302 | goto fault; | ||
303 | if ((len -= copy) == 0) | ||
304 | return 0; | ||
305 | offset += copy; | ||
306 | to += copy; | ||
307 | pos = copy; | ||
308 | } | ||
309 | |||
310 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
311 | int end; | ||
312 | |||
313 | BUG_TRAP(start <= offset + len); | ||
314 | |||
315 | end = start + skb_shinfo(skb)->frags[i].size; | ||
316 | if ((copy = end - offset) > 0) { | ||
317 | unsigned int csum2; | ||
318 | int err = 0; | ||
319 | u8 *vaddr; | ||
320 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | ||
321 | struct page *page = frag->page; | ||
322 | |||
323 | if (copy > len) | ||
324 | copy = len; | ||
325 | vaddr = kmap(page); | ||
326 | csum2 = csum_and_copy_to_user(vaddr + | ||
327 | frag->page_offset + | ||
328 | offset - start, | ||
329 | to, copy, 0, &err); | ||
330 | kunmap(page); | ||
331 | if (err) | ||
332 | goto fault; | ||
333 | *csump = csum_block_add(*csump, csum2, pos); | ||
334 | if (!(len -= copy)) | ||
335 | return 0; | ||
336 | offset += copy; | ||
337 | to += copy; | ||
338 | pos += copy; | ||
339 | } | ||
340 | start = end; | ||
341 | } | ||
342 | |||
343 | if (skb_shinfo(skb)->frag_list) { | ||
344 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | ||
345 | |||
346 | for (; list; list=list->next) { | ||
347 | int end; | ||
348 | |||
349 | BUG_TRAP(start <= offset + len); | ||
350 | |||
351 | end = start + list->len; | ||
352 | if ((copy = end - offset) > 0) { | ||
353 | unsigned int csum2 = 0; | ||
354 | if (copy > len) | ||
355 | copy = len; | ||
356 | if (skb_copy_and_csum_datagram(list, | ||
357 | offset - start, | ||
358 | to, copy, | ||
359 | &csum2)) | ||
360 | goto fault; | ||
361 | *csump = csum_block_add(*csump, csum2, pos); | ||
362 | if ((len -= copy) == 0) | ||
363 | return 0; | ||
364 | offset += copy; | ||
365 | to += copy; | ||
366 | pos += copy; | ||
367 | } | ||
368 | start = end; | ||
369 | } | ||
370 | } | ||
371 | if (!len) | ||
372 | return 0; | ||
373 | |||
374 | fault: | ||
375 | return -EFAULT; | ||
376 | } | ||
377 | |||
378 | /** | ||
379 | * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. | ||
380 | * @skb - skbuff | ||
381 | * @hlen - hardware length | ||
382 | * @iovec - io vector | ||
383 | * | ||
384 | * Caller _must_ check that skb will fit to this iovec. | ||
385 | * | ||
386 | * Returns: 0 - success. | ||
387 | * -EINVAL - checksum failure. | ||
388 | * -EFAULT - fault during copy. Beware, in this case iovec | ||
389 | * can be modified! | ||
390 | */ | ||
391 | int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, | ||
392 | int hlen, struct iovec *iov) | ||
393 | { | ||
394 | unsigned int csum; | ||
395 | int chunk = skb->len - hlen; | ||
396 | |||
397 | /* Skip filled elements. | ||
398 | * Pretty silly, look at memcpy_toiovec, though 8) | ||
399 | */ | ||
400 | while (!iov->iov_len) | ||
401 | iov++; | ||
402 | |||
403 | if (iov->iov_len < chunk) { | ||
404 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen, | ||
405 | skb->csum))) | ||
406 | goto csum_error; | ||
407 | if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) | ||
408 | goto fault; | ||
409 | } else { | ||
410 | csum = csum_partial(skb->data, hlen, skb->csum); | ||
411 | if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, | ||
412 | chunk, &csum)) | ||
413 | goto fault; | ||
414 | if ((unsigned short)csum_fold(csum)) | ||
415 | goto csum_error; | ||
416 | iov->iov_len -= chunk; | ||
417 | iov->iov_base += chunk; | ||
418 | } | ||
419 | return 0; | ||
420 | csum_error: | ||
421 | return -EINVAL; | ||
422 | fault: | ||
423 | return -EFAULT; | ||
424 | } | ||
425 | |||
426 | /** | ||
427 | * datagram_poll - generic datagram poll | ||
428 | * @file - file struct | ||
429 | * @sock - socket | ||
430 | * @wait - poll table | ||
431 | * | ||
432 | * Datagram poll: Again totally generic. This also handles | ||
433 | * sequenced packet sockets providing the socket receive queue | ||
434 | * is only ever holding data ready to receive. | ||
435 | * | ||
436 | * Note: when you _don't_ use this routine for this protocol, | ||
437 | * and you use a different write policy from sock_writeable() | ||
438 | * then please supply your own write_space callback. | ||
439 | */ | ||
440 | unsigned int datagram_poll(struct file *file, struct socket *sock, | ||
441 | poll_table *wait) | ||
442 | { | ||
443 | struct sock *sk = sock->sk; | ||
444 | unsigned int mask; | ||
445 | |||
446 | poll_wait(file, sk->sk_sleep, wait); | ||
447 | mask = 0; | ||
448 | |||
449 | /* exceptional events? */ | ||
450 | if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) | ||
451 | mask |= POLLERR; | ||
452 | if (sk->sk_shutdown == SHUTDOWN_MASK) | ||
453 | mask |= POLLHUP; | ||
454 | |||
455 | /* readable? */ | ||
456 | if (!skb_queue_empty(&sk->sk_receive_queue) || | ||
457 | (sk->sk_shutdown & RCV_SHUTDOWN)) | ||
458 | mask |= POLLIN | POLLRDNORM; | ||
459 | |||
460 | /* Connection-based need to check for termination and startup */ | ||
461 | if (connection_based(sk)) { | ||
462 | if (sk->sk_state == TCP_CLOSE) | ||
463 | mask |= POLLHUP; | ||
464 | /* connection hasn't started yet? */ | ||
465 | if (sk->sk_state == TCP_SYN_SENT) | ||
466 | return mask; | ||
467 | } | ||
468 | |||
469 | /* writable? */ | ||
470 | if (sock_writeable(sk)) | ||
471 | mask |= POLLOUT | POLLWRNORM | POLLWRBAND; | ||
472 | else | ||
473 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | ||
474 | |||
475 | return mask; | ||
476 | } | ||
477 | |||
478 | EXPORT_SYMBOL(datagram_poll); | ||
479 | EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); | ||
480 | EXPORT_SYMBOL(skb_copy_datagram_iovec); | ||
481 | EXPORT_SYMBOL(skb_free_datagram); | ||
482 | EXPORT_SYMBOL(skb_recv_datagram); | ||
diff --git a/net/core/dev.c b/net/core/dev.c new file mode 100644 index 000000000000..42344d903692 --- /dev/null +++ b/net/core/dev.c | |||
@@ -0,0 +1,3359 @@ | |||
1 | /* | ||
2 | * NET3 Protocol independent device support routines. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Derived from the non IP parts of dev.c 1.0.19 | ||
10 | * Authors: Ross Biro, <bir7@leland.Stanford.Edu> | ||
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | ||
12 | * Mark Evans, <evansmp@uhura.aston.ac.uk> | ||
13 | * | ||
14 | * Additional Authors: | ||
15 | * Florian la Roche <rzsfl@rz.uni-sb.de> | ||
16 | * Alan Cox <gw4pts@gw4pts.ampr.org> | ||
17 | * David Hinds <dahinds@users.sourceforge.net> | ||
18 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | ||
19 | * Adam Sulmicki <adam@cfar.umd.edu> | ||
20 | * Pekka Riikonen <priikone@poesidon.pspt.fi> | ||
21 | * | ||
22 | * Changes: | ||
23 | * D.J. Barrow : Fixed bug where dev->refcnt gets set | ||
24 | * to 2 if register_netdev gets called | ||
25 | * before net_dev_init & also removed a | ||
26 | * few lines of code in the process. | ||
27 | * Alan Cox : device private ioctl copies fields back. | ||
28 | * Alan Cox : Transmit queue code does relevant | ||
29 | * stunts to keep the queue safe. | ||
30 | * Alan Cox : Fixed double lock. | ||
31 | * Alan Cox : Fixed promisc NULL pointer trap | ||
32 | * ???????? : Support the full private ioctl range | ||
33 | * Alan Cox : Moved ioctl permission check into | ||
34 | * drivers | ||
35 | * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI | ||
36 | * Alan Cox : 100 backlog just doesn't cut it when | ||
37 | * you start doing multicast video 8) | ||
38 | * Alan Cox : Rewrote net_bh and list manager. | ||
39 | * Alan Cox : Fix ETH_P_ALL echoback lengths. | ||
40 | * Alan Cox : Took out transmit every packet pass | ||
41 | * Saved a few bytes in the ioctl handler | ||
42 | * Alan Cox : Network driver sets packet type before | ||
43 | * calling netif_rx. Saves a function | ||
44 | * call a packet. | ||
45 | * Alan Cox : Hashed net_bh() | ||
46 | * Richard Kooijman: Timestamp fixes. | ||
47 | * Alan Cox : Wrong field in SIOCGIFDSTADDR | ||
48 | * Alan Cox : Device lock protection. | ||
49 | * Alan Cox : Fixed nasty side effect of device close | ||
50 | * changes. | ||
51 | * Rudi Cilibrasi : Pass the right thing to | ||
52 | * set_mac_address() | ||
53 | * Dave Miller : 32bit quantity for the device lock to | ||
54 | * make it work out on a Sparc. | ||
55 | * Bjorn Ekwall : Added KERNELD hack. | ||
56 | * Alan Cox : Cleaned up the backlog initialise. | ||
57 | * Craig Metz : SIOCGIFCONF fix if space for under | ||
58 | * 1 device. | ||
59 | * Thomas Bogendoerfer : Return ENODEV for dev_open, if there | ||
60 | * is no device open function. | ||
61 | * Andi Kleen : Fix error reporting for SIOCGIFCONF | ||
62 | * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF | ||
63 | * Cyrus Durgin : Cleaned for KMOD | ||
64 | * Adam Sulmicki : Bug Fix : Network Device Unload | ||
65 | * A network device unload needs to purge | ||
66 | * the backlog queue. | ||
67 | * Paul Rusty Russell : SIOCSIFNAME | ||
68 | * Pekka Riikonen : Netdev boot-time settings code | ||
69 | * Andrew Morton : Make unregister_netdevice wait | ||
70 | * indefinitely on dev->refcnt | ||
71 | * J Hadi Salim : - Backlog queue sampling | ||
72 | * - netif_rx() feedback | ||
73 | */ | ||
74 | |||
75 | #include <asm/uaccess.h> | ||
76 | #include <asm/system.h> | ||
77 | #include <linux/bitops.h> | ||
78 | #include <linux/config.h> | ||
79 | #include <linux/cpu.h> | ||
80 | #include <linux/types.h> | ||
81 | #include <linux/kernel.h> | ||
82 | #include <linux/sched.h> | ||
83 | #include <linux/string.h> | ||
84 | #include <linux/mm.h> | ||
85 | #include <linux/socket.h> | ||
86 | #include <linux/sockios.h> | ||
87 | #include <linux/errno.h> | ||
88 | #include <linux/interrupt.h> | ||
89 | #include <linux/if_ether.h> | ||
90 | #include <linux/netdevice.h> | ||
91 | #include <linux/etherdevice.h> | ||
92 | #include <linux/notifier.h> | ||
93 | #include <linux/skbuff.h> | ||
94 | #include <net/sock.h> | ||
95 | #include <linux/rtnetlink.h> | ||
96 | #include <linux/proc_fs.h> | ||
97 | #include <linux/seq_file.h> | ||
98 | #include <linux/stat.h> | ||
99 | #include <linux/if_bridge.h> | ||
100 | #include <linux/divert.h> | ||
101 | #include <net/dst.h> | ||
102 | #include <net/pkt_sched.h> | ||
103 | #include <net/checksum.h> | ||
104 | #include <linux/highmem.h> | ||
105 | #include <linux/init.h> | ||
106 | #include <linux/kmod.h> | ||
107 | #include <linux/module.h> | ||
108 | #include <linux/kallsyms.h> | ||
109 | #include <linux/netpoll.h> | ||
110 | #include <linux/rcupdate.h> | ||
111 | #include <linux/delay.h> | ||
112 | #ifdef CONFIG_NET_RADIO | ||
113 | #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ | ||
114 | #include <net/iw_handler.h> | ||
115 | #endif /* CONFIG_NET_RADIO */ | ||
116 | #include <asm/current.h> | ||
117 | |||
118 | /* This define, if set, will randomly drop a packet when congestion | ||
119 | * is more than moderate. It helps fairness in the multi-interface | ||
120 | * case when one of them is a hog, but it kills performance for the | ||
121 | * single interface case so it is off now by default. | ||
122 | */ | ||
123 | #undef RAND_LIE | ||
124 | |||
125 | /* Setting this will sample the queue lengths and thus congestion | ||
126 | * via a timer instead of as each packet is received. | ||
127 | */ | ||
128 | #undef OFFLINE_SAMPLE | ||
129 | |||
130 | /* | ||
131 | * The list of packet types we will receive (as opposed to discard) | ||
132 | * and the routines to invoke. | ||
133 | * | ||
134 | * Why 16. Because with 16 the only overlap we get on a hash of the | ||
135 | * low nibble of the protocol value is RARP/SNAP/X.25. | ||
136 | * | ||
137 | * NOTE: That is no longer true with the addition of VLAN tags. Not | ||
138 | * sure which should go first, but I bet it won't make much | ||
139 | * difference if we are running VLANs. The good news is that | ||
140 | * this protocol won't be in the list unless compiled in, so | ||
141 | * the average user (w/out VLANs) will not be adversly affected. | ||
142 | * --BLG | ||
143 | * | ||
144 | * 0800 IP | ||
145 | * 8100 802.1Q VLAN | ||
146 | * 0001 802.3 | ||
147 | * 0002 AX.25 | ||
148 | * 0004 802.2 | ||
149 | * 8035 RARP | ||
150 | * 0005 SNAP | ||
151 | * 0805 X.25 | ||
152 | * 0806 ARP | ||
153 | * 8137 IPX | ||
154 | * 0009 Localtalk | ||
155 | * 86DD IPv6 | ||
156 | */ | ||
157 | |||
158 | static DEFINE_SPINLOCK(ptype_lock); | ||
159 | static struct list_head ptype_base[16]; /* 16 way hashed list */ | ||
160 | static struct list_head ptype_all; /* Taps */ | ||
161 | |||
162 | #ifdef OFFLINE_SAMPLE | ||
163 | static void sample_queue(unsigned long dummy); | ||
164 | static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); | ||
165 | #endif | ||
166 | |||
167 | /* | ||
168 | * The @dev_base list is protected by @dev_base_lock and the rtln | ||
169 | * semaphore. | ||
170 | * | ||
171 | * Pure readers hold dev_base_lock for reading. | ||
172 | * | ||
173 | * Writers must hold the rtnl semaphore while they loop through the | ||
174 | * dev_base list, and hold dev_base_lock for writing when they do the | ||
175 | * actual updates. This allows pure readers to access the list even | ||
176 | * while a writer is preparing to update it. | ||
177 | * | ||
178 | * To put it another way, dev_base_lock is held for writing only to | ||
179 | * protect against pure readers; the rtnl semaphore provides the | ||
180 | * protection against other writers. | ||
181 | * | ||
182 | * See, for example usages, register_netdevice() and | ||
183 | * unregister_netdevice(), which must be called with the rtnl | ||
184 | * semaphore held. | ||
185 | */ | ||
186 | struct net_device *dev_base; | ||
187 | static struct net_device **dev_tail = &dev_base; | ||
188 | DEFINE_RWLOCK(dev_base_lock); | ||
189 | |||
190 | EXPORT_SYMBOL(dev_base); | ||
191 | EXPORT_SYMBOL(dev_base_lock); | ||
192 | |||
193 | #define NETDEV_HASHBITS 8 | ||
194 | static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; | ||
195 | static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; | ||
196 | |||
197 | static inline struct hlist_head *dev_name_hash(const char *name) | ||
198 | { | ||
199 | unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); | ||
200 | return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; | ||
201 | } | ||
202 | |||
203 | static inline struct hlist_head *dev_index_hash(int ifindex) | ||
204 | { | ||
205 | return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Our notifier list | ||
210 | */ | ||
211 | |||
212 | static struct notifier_block *netdev_chain; | ||
213 | |||
214 | /* | ||
215 | * Device drivers call our routines to queue packets here. We empty the | ||
216 | * queue in the local softnet handler. | ||
217 | */ | ||
218 | DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, }; | ||
219 | |||
220 | #ifdef CONFIG_SYSFS | ||
221 | extern int netdev_sysfs_init(void); | ||
222 | extern int netdev_register_sysfs(struct net_device *); | ||
223 | extern void netdev_unregister_sysfs(struct net_device *); | ||
224 | #else | ||
225 | #define netdev_sysfs_init() (0) | ||
226 | #define netdev_register_sysfs(dev) (0) | ||
227 | #define netdev_unregister_sysfs(dev) do { } while(0) | ||
228 | #endif | ||
229 | |||
230 | |||
231 | /******************************************************************************* | ||
232 | |||
233 | Protocol management and registration routines | ||
234 | |||
235 | *******************************************************************************/ | ||
236 | |||
237 | /* | ||
238 | * For efficiency | ||
239 | */ | ||
240 | |||
241 | int netdev_nit; | ||
242 | |||
243 | /* | ||
244 | * Add a protocol ID to the list. Now that the input handler is | ||
245 | * smarter we can dispense with all the messy stuff that used to be | ||
246 | * here. | ||
247 | * | ||
248 | * BEWARE!!! Protocol handlers, mangling input packets, | ||
249 | * MUST BE last in hash buckets and checking protocol handlers | ||
250 | * MUST start from promiscuous ptype_all chain in net_bh. | ||
251 | * It is true now, do not change it. | ||
252 | * Explanation follows: if protocol handler, mangling packet, will | ||
253 | * be the first on list, it is not able to sense, that packet | ||
254 | * is cloned and should be copied-on-write, so that it will | ||
255 | * change it and subsequent readers will get broken packet. | ||
256 | * --ANK (980803) | ||
257 | */ | ||
258 | |||
259 | /** | ||
260 | * dev_add_pack - add packet handler | ||
261 | * @pt: packet type declaration | ||
262 | * | ||
263 | * Add a protocol handler to the networking stack. The passed &packet_type | ||
264 | * is linked into kernel lists and may not be freed until it has been | ||
265 | * removed from the kernel lists. | ||
266 | * | ||
267 | * This call does not sleep therefore it can not | ||
268 | * guarantee all CPU's that are in middle of receiving packets | ||
269 | * will see the new packet type (until the next received packet). | ||
270 | */ | ||
271 | |||
272 | void dev_add_pack(struct packet_type *pt) | ||
273 | { | ||
274 | int hash; | ||
275 | |||
276 | spin_lock_bh(&ptype_lock); | ||
277 | if (pt->type == htons(ETH_P_ALL)) { | ||
278 | netdev_nit++; | ||
279 | list_add_rcu(&pt->list, &ptype_all); | ||
280 | } else { | ||
281 | hash = ntohs(pt->type) & 15; | ||
282 | list_add_rcu(&pt->list, &ptype_base[hash]); | ||
283 | } | ||
284 | spin_unlock_bh(&ptype_lock); | ||
285 | } | ||
286 | |||
287 | extern void linkwatch_run_queue(void); | ||
288 | |||
289 | |||
290 | |||
291 | /** | ||
292 | * __dev_remove_pack - remove packet handler | ||
293 | * @pt: packet type declaration | ||
294 | * | ||
295 | * Remove a protocol handler that was previously added to the kernel | ||
296 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed | ||
297 | * from the kernel lists and can be freed or reused once this function | ||
298 | * returns. | ||
299 | * | ||
300 | * The packet type might still be in use by receivers | ||
301 | * and must not be freed until after all the CPU's have gone | ||
302 | * through a quiescent state. | ||
303 | */ | ||
304 | void __dev_remove_pack(struct packet_type *pt) | ||
305 | { | ||
306 | struct list_head *head; | ||
307 | struct packet_type *pt1; | ||
308 | |||
309 | spin_lock_bh(&ptype_lock); | ||
310 | |||
311 | if (pt->type == htons(ETH_P_ALL)) { | ||
312 | netdev_nit--; | ||
313 | head = &ptype_all; | ||
314 | } else | ||
315 | head = &ptype_base[ntohs(pt->type) & 15]; | ||
316 | |||
317 | list_for_each_entry(pt1, head, list) { | ||
318 | if (pt == pt1) { | ||
319 | list_del_rcu(&pt->list); | ||
320 | goto out; | ||
321 | } | ||
322 | } | ||
323 | |||
324 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); | ||
325 | out: | ||
326 | spin_unlock_bh(&ptype_lock); | ||
327 | } | ||
328 | /** | ||
329 | * dev_remove_pack - remove packet handler | ||
330 | * @pt: packet type declaration | ||
331 | * | ||
332 | * Remove a protocol handler that was previously added to the kernel | ||
333 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed | ||
334 | * from the kernel lists and can be freed or reused once this function | ||
335 | * returns. | ||
336 | * | ||
337 | * This call sleeps to guarantee that no CPU is looking at the packet | ||
338 | * type after return. | ||
339 | */ | ||
340 | void dev_remove_pack(struct packet_type *pt) | ||
341 | { | ||
342 | __dev_remove_pack(pt); | ||
343 | |||
344 | synchronize_net(); | ||
345 | } | ||
346 | |||
347 | /****************************************************************************** | ||
348 | |||
349 | Device Boot-time Settings Routines | ||
350 | |||
351 | *******************************************************************************/ | ||
352 | |||
353 | /* Boot time configuration table */ | ||
354 | static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; | ||
355 | |||
356 | /** | ||
357 | * netdev_boot_setup_add - add new setup entry | ||
358 | * @name: name of the device | ||
359 | * @map: configured settings for the device | ||
360 | * | ||
361 | * Adds new setup entry to the dev_boot_setup list. The function | ||
362 | * returns 0 on error and 1 on success. This is a generic routine to | ||
363 | * all netdevices. | ||
364 | */ | ||
365 | static int netdev_boot_setup_add(char *name, struct ifmap *map) | ||
366 | { | ||
367 | struct netdev_boot_setup *s; | ||
368 | int i; | ||
369 | |||
370 | s = dev_boot_setup; | ||
371 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { | ||
372 | if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { | ||
373 | memset(s[i].name, 0, sizeof(s[i].name)); | ||
374 | strcpy(s[i].name, name); | ||
375 | memcpy(&s[i].map, map, sizeof(s[i].map)); | ||
376 | break; | ||
377 | } | ||
378 | } | ||
379 | |||
380 | return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; | ||
381 | } | ||
382 | |||
383 | /** | ||
384 | * netdev_boot_setup_check - check boot time settings | ||
385 | * @dev: the netdevice | ||
386 | * | ||
387 | * Check boot time settings for the device. | ||
388 | * The found settings are set for the device to be used | ||
389 | * later in the device probing. | ||
390 | * Returns 0 if no settings found, 1 if they are. | ||
391 | */ | ||
392 | int netdev_boot_setup_check(struct net_device *dev) | ||
393 | { | ||
394 | struct netdev_boot_setup *s = dev_boot_setup; | ||
395 | int i; | ||
396 | |||
397 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { | ||
398 | if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && | ||
399 | !strncmp(dev->name, s[i].name, strlen(s[i].name))) { | ||
400 | dev->irq = s[i].map.irq; | ||
401 | dev->base_addr = s[i].map.base_addr; | ||
402 | dev->mem_start = s[i].map.mem_start; | ||
403 | dev->mem_end = s[i].map.mem_end; | ||
404 | return 1; | ||
405 | } | ||
406 | } | ||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | |||
411 | /** | ||
412 | * netdev_boot_base - get address from boot time settings | ||
413 | * @prefix: prefix for network device | ||
414 | * @unit: id for network device | ||
415 | * | ||
416 | * Check boot time settings for the base address of device. | ||
417 | * The found settings are set for the device to be used | ||
418 | * later in the device probing. | ||
419 | * Returns 0 if no settings found. | ||
420 | */ | ||
421 | unsigned long netdev_boot_base(const char *prefix, int unit) | ||
422 | { | ||
423 | const struct netdev_boot_setup *s = dev_boot_setup; | ||
424 | char name[IFNAMSIZ]; | ||
425 | int i; | ||
426 | |||
427 | sprintf(name, "%s%d", prefix, unit); | ||
428 | |||
429 | /* | ||
430 | * If device already registered then return base of 1 | ||
431 | * to indicate not to probe for this interface | ||
432 | */ | ||
433 | if (__dev_get_by_name(name)) | ||
434 | return 1; | ||
435 | |||
436 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) | ||
437 | if (!strcmp(name, s[i].name)) | ||
438 | return s[i].map.base_addr; | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Saves at boot time configured settings for any netdevice. | ||
444 | */ | ||
445 | int __init netdev_boot_setup(char *str) | ||
446 | { | ||
447 | int ints[5]; | ||
448 | struct ifmap map; | ||
449 | |||
450 | str = get_options(str, ARRAY_SIZE(ints), ints); | ||
451 | if (!str || !*str) | ||
452 | return 0; | ||
453 | |||
454 | /* Save settings */ | ||
455 | memset(&map, 0, sizeof(map)); | ||
456 | if (ints[0] > 0) | ||
457 | map.irq = ints[1]; | ||
458 | if (ints[0] > 1) | ||
459 | map.base_addr = ints[2]; | ||
460 | if (ints[0] > 2) | ||
461 | map.mem_start = ints[3]; | ||
462 | if (ints[0] > 3) | ||
463 | map.mem_end = ints[4]; | ||
464 | |||
465 | /* Add new entry to the list */ | ||
466 | return netdev_boot_setup_add(str, &map); | ||
467 | } | ||
468 | |||
469 | __setup("netdev=", netdev_boot_setup); | ||
470 | |||
471 | /******************************************************************************* | ||
472 | |||
473 | Device Interface Subroutines | ||
474 | |||
475 | *******************************************************************************/ | ||
476 | |||
477 | /** | ||
478 | * __dev_get_by_name - find a device by its name | ||
479 | * @name: name to find | ||
480 | * | ||
481 | * Find an interface by name. Must be called under RTNL semaphore | ||
482 | * or @dev_base_lock. If the name is found a pointer to the device | ||
483 | * is returned. If the name is not found then %NULL is returned. The | ||
484 | * reference counters are not incremented so the caller must be | ||
485 | * careful with locks. | ||
486 | */ | ||
487 | |||
488 | struct net_device *__dev_get_by_name(const char *name) | ||
489 | { | ||
490 | struct hlist_node *p; | ||
491 | |||
492 | hlist_for_each(p, dev_name_hash(name)) { | ||
493 | struct net_device *dev | ||
494 | = hlist_entry(p, struct net_device, name_hlist); | ||
495 | if (!strncmp(dev->name, name, IFNAMSIZ)) | ||
496 | return dev; | ||
497 | } | ||
498 | return NULL; | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * dev_get_by_name - find a device by its name | ||
503 | * @name: name to find | ||
504 | * | ||
505 | * Find an interface by name. This can be called from any | ||
506 | * context and does its own locking. The returned handle has | ||
507 | * the usage count incremented and the caller must use dev_put() to | ||
508 | * release it when it is no longer needed. %NULL is returned if no | ||
509 | * matching device is found. | ||
510 | */ | ||
511 | |||
512 | struct net_device *dev_get_by_name(const char *name) | ||
513 | { | ||
514 | struct net_device *dev; | ||
515 | |||
516 | read_lock(&dev_base_lock); | ||
517 | dev = __dev_get_by_name(name); | ||
518 | if (dev) | ||
519 | dev_hold(dev); | ||
520 | read_unlock(&dev_base_lock); | ||
521 | return dev; | ||
522 | } | ||
523 | |||
524 | /** | ||
525 | * __dev_get_by_index - find a device by its ifindex | ||
526 | * @ifindex: index of device | ||
527 | * | ||
528 | * Search for an interface by index. Returns %NULL if the device | ||
529 | * is not found or a pointer to the device. The device has not | ||
530 | * had its reference counter increased so the caller must be careful | ||
531 | * about locking. The caller must hold either the RTNL semaphore | ||
532 | * or @dev_base_lock. | ||
533 | */ | ||
534 | |||
535 | struct net_device *__dev_get_by_index(int ifindex) | ||
536 | { | ||
537 | struct hlist_node *p; | ||
538 | |||
539 | hlist_for_each(p, dev_index_hash(ifindex)) { | ||
540 | struct net_device *dev | ||
541 | = hlist_entry(p, struct net_device, index_hlist); | ||
542 | if (dev->ifindex == ifindex) | ||
543 | return dev; | ||
544 | } | ||
545 | return NULL; | ||
546 | } | ||
547 | |||
548 | |||
549 | /** | ||
550 | * dev_get_by_index - find a device by its ifindex | ||
551 | * @ifindex: index of device | ||
552 | * | ||
553 | * Search for an interface by index. Returns NULL if the device | ||
554 | * is not found or a pointer to the device. The device returned has | ||
555 | * had a reference added and the pointer is safe until the user calls | ||
556 | * dev_put to indicate they have finished with it. | ||
557 | */ | ||
558 | |||
559 | struct net_device *dev_get_by_index(int ifindex) | ||
560 | { | ||
561 | struct net_device *dev; | ||
562 | |||
563 | read_lock(&dev_base_lock); | ||
564 | dev = __dev_get_by_index(ifindex); | ||
565 | if (dev) | ||
566 | dev_hold(dev); | ||
567 | read_unlock(&dev_base_lock); | ||
568 | return dev; | ||
569 | } | ||
570 | |||
571 | /** | ||
572 | * dev_getbyhwaddr - find a device by its hardware address | ||
573 | * @type: media type of device | ||
574 | * @ha: hardware address | ||
575 | * | ||
576 | * Search for an interface by MAC address. Returns NULL if the device | ||
577 | * is not found or a pointer to the device. The caller must hold the | ||
578 | * rtnl semaphore. The returned device has not had its ref count increased | ||
579 | * and the caller must therefore be careful about locking | ||
580 | * | ||
581 | * BUGS: | ||
582 | * If the API was consistent this would be __dev_get_by_hwaddr | ||
583 | */ | ||
584 | |||
585 | struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) | ||
586 | { | ||
587 | struct net_device *dev; | ||
588 | |||
589 | ASSERT_RTNL(); | ||
590 | |||
591 | for (dev = dev_base; dev; dev = dev->next) | ||
592 | if (dev->type == type && | ||
593 | !memcmp(dev->dev_addr, ha, dev->addr_len)) | ||
594 | break; | ||
595 | return dev; | ||
596 | } | ||
597 | |||
598 | struct net_device *dev_getfirstbyhwtype(unsigned short type) | ||
599 | { | ||
600 | struct net_device *dev; | ||
601 | |||
602 | rtnl_lock(); | ||
603 | for (dev = dev_base; dev; dev = dev->next) { | ||
604 | if (dev->type == type) { | ||
605 | dev_hold(dev); | ||
606 | break; | ||
607 | } | ||
608 | } | ||
609 | rtnl_unlock(); | ||
610 | return dev; | ||
611 | } | ||
612 | |||
613 | EXPORT_SYMBOL(dev_getfirstbyhwtype); | ||
614 | |||
615 | /** | ||
616 | * dev_get_by_flags - find any device with given flags | ||
617 | * @if_flags: IFF_* values | ||
618 | * @mask: bitmask of bits in if_flags to check | ||
619 | * | ||
620 | * Search for any interface with the given flags. Returns NULL if a device | ||
621 | * is not found or a pointer to the device. The device returned has | ||
622 | * had a reference added and the pointer is safe until the user calls | ||
623 | * dev_put to indicate they have finished with it. | ||
624 | */ | ||
625 | |||
626 | struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) | ||
627 | { | ||
628 | struct net_device *dev; | ||
629 | |||
630 | read_lock(&dev_base_lock); | ||
631 | for (dev = dev_base; dev != NULL; dev = dev->next) { | ||
632 | if (((dev->flags ^ if_flags) & mask) == 0) { | ||
633 | dev_hold(dev); | ||
634 | break; | ||
635 | } | ||
636 | } | ||
637 | read_unlock(&dev_base_lock); | ||
638 | return dev; | ||
639 | } | ||
640 | |||
641 | /** | ||
642 | * dev_valid_name - check if name is okay for network device | ||
643 | * @name: name string | ||
644 | * | ||
645 | * Network device names need to be valid file names to | ||
646 | * to allow sysfs to work | ||
647 | */ | ||
648 | static int dev_valid_name(const char *name) | ||
649 | { | ||
650 | return !(*name == '\0' | ||
651 | || !strcmp(name, ".") | ||
652 | || !strcmp(name, "..") | ||
653 | || strchr(name, '/')); | ||
654 | } | ||
655 | |||
656 | /** | ||
657 | * dev_alloc_name - allocate a name for a device | ||
658 | * @dev: device | ||
659 | * @name: name format string | ||
660 | * | ||
661 | * Passed a format string - eg "lt%d" it will try and find a suitable | ||
662 | * id. Not efficient for many devices, not called a lot. The caller | ||
663 | * must hold the dev_base or rtnl lock while allocating the name and | ||
664 | * adding the device in order to avoid duplicates. Returns the number | ||
665 | * of the unit assigned or a negative errno code. | ||
666 | */ | ||
667 | |||
668 | int dev_alloc_name(struct net_device *dev, const char *name) | ||
669 | { | ||
670 | int i = 0; | ||
671 | char buf[IFNAMSIZ]; | ||
672 | const char *p; | ||
673 | const int max_netdevices = 8*PAGE_SIZE; | ||
674 | long *inuse; | ||
675 | struct net_device *d; | ||
676 | |||
677 | p = strnchr(name, IFNAMSIZ-1, '%'); | ||
678 | if (p) { | ||
679 | /* | ||
680 | * Verify the string as this thing may have come from | ||
681 | * the user. There must be either one "%d" and no other "%" | ||
682 | * characters. | ||
683 | */ | ||
684 | if (p[1] != 'd' || strchr(p + 2, '%')) | ||
685 | return -EINVAL; | ||
686 | |||
687 | /* Use one page as a bit array of possible slots */ | ||
688 | inuse = (long *) get_zeroed_page(GFP_ATOMIC); | ||
689 | if (!inuse) | ||
690 | return -ENOMEM; | ||
691 | |||
692 | for (d = dev_base; d; d = d->next) { | ||
693 | if (!sscanf(d->name, name, &i)) | ||
694 | continue; | ||
695 | if (i < 0 || i >= max_netdevices) | ||
696 | continue; | ||
697 | |||
698 | /* avoid cases where sscanf is not exact inverse of printf */ | ||
699 | snprintf(buf, sizeof(buf), name, i); | ||
700 | if (!strncmp(buf, d->name, IFNAMSIZ)) | ||
701 | set_bit(i, inuse); | ||
702 | } | ||
703 | |||
704 | i = find_first_zero_bit(inuse, max_netdevices); | ||
705 | free_page((unsigned long) inuse); | ||
706 | } | ||
707 | |||
708 | snprintf(buf, sizeof(buf), name, i); | ||
709 | if (!__dev_get_by_name(buf)) { | ||
710 | strlcpy(dev->name, buf, IFNAMSIZ); | ||
711 | return i; | ||
712 | } | ||
713 | |||
714 | /* It is possible to run out of possible slots | ||
715 | * when the name is long and there isn't enough space left | ||
716 | * for the digits, or if all bits are used. | ||
717 | */ | ||
718 | return -ENFILE; | ||
719 | } | ||
720 | |||
721 | |||
722 | /** | ||
723 | * dev_change_name - change name of a device | ||
724 | * @dev: device | ||
725 | * @newname: name (or format string) must be at least IFNAMSIZ | ||
726 | * | ||
727 | * Change name of a device, can pass format strings "eth%d". | ||
728 | * for wildcarding. | ||
729 | */ | ||
730 | int dev_change_name(struct net_device *dev, char *newname) | ||
731 | { | ||
732 | int err = 0; | ||
733 | |||
734 | ASSERT_RTNL(); | ||
735 | |||
736 | if (dev->flags & IFF_UP) | ||
737 | return -EBUSY; | ||
738 | |||
739 | if (!dev_valid_name(newname)) | ||
740 | return -EINVAL; | ||
741 | |||
742 | if (strchr(newname, '%')) { | ||
743 | err = dev_alloc_name(dev, newname); | ||
744 | if (err < 0) | ||
745 | return err; | ||
746 | strcpy(newname, dev->name); | ||
747 | } | ||
748 | else if (__dev_get_by_name(newname)) | ||
749 | return -EEXIST; | ||
750 | else | ||
751 | strlcpy(dev->name, newname, IFNAMSIZ); | ||
752 | |||
753 | err = class_device_rename(&dev->class_dev, dev->name); | ||
754 | if (!err) { | ||
755 | hlist_del(&dev->name_hlist); | ||
756 | hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); | ||
757 | notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); | ||
758 | } | ||
759 | |||
760 | return err; | ||
761 | } | ||
762 | |||
763 | /** | ||
764 | * netdev_state_change - device changes state | ||
765 | * @dev: device to cause notification | ||
766 | * | ||
767 | * Called to indicate a device has changed state. This function calls | ||
768 | * the notifier chains for netdev_chain and sends a NEWLINK message | ||
769 | * to the routing socket. | ||
770 | */ | ||
771 | void netdev_state_change(struct net_device *dev) | ||
772 | { | ||
773 | if (dev->flags & IFF_UP) { | ||
774 | notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); | ||
775 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); | ||
776 | } | ||
777 | } | ||
778 | |||
779 | /** | ||
780 | * dev_load - load a network module | ||
781 | * @name: name of interface | ||
782 | * | ||
783 | * If a network interface is not present and the process has suitable | ||
784 | * privileges this function loads the module. If module loading is not | ||
785 | * available in this kernel then it becomes a nop. | ||
786 | */ | ||
787 | |||
788 | void dev_load(const char *name) | ||
789 | { | ||
790 | struct net_device *dev; | ||
791 | |||
792 | read_lock(&dev_base_lock); | ||
793 | dev = __dev_get_by_name(name); | ||
794 | read_unlock(&dev_base_lock); | ||
795 | |||
796 | if (!dev && capable(CAP_SYS_MODULE)) | ||
797 | request_module("%s", name); | ||
798 | } | ||
799 | |||
800 | static int default_rebuild_header(struct sk_buff *skb) | ||
801 | { | ||
802 | printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", | ||
803 | skb->dev ? skb->dev->name : "NULL!!!"); | ||
804 | kfree_skb(skb); | ||
805 | return 1; | ||
806 | } | ||
807 | |||
808 | |||
809 | /** | ||
810 | * dev_open - prepare an interface for use. | ||
811 | * @dev: device to open | ||
812 | * | ||
813 | * Takes a device from down to up state. The device's private open | ||
814 | * function is invoked and then the multicast lists are loaded. Finally | ||
815 | * the device is moved into the up state and a %NETDEV_UP message is | ||
816 | * sent to the netdev notifier chain. | ||
817 | * | ||
818 | * Calling this function on an active interface is a nop. On a failure | ||
819 | * a negative errno code is returned. | ||
820 | */ | ||
821 | int dev_open(struct net_device *dev) | ||
822 | { | ||
823 | int ret = 0; | ||
824 | |||
825 | /* | ||
826 | * Is it already up? | ||
827 | */ | ||
828 | |||
829 | if (dev->flags & IFF_UP) | ||
830 | return 0; | ||
831 | |||
832 | /* | ||
833 | * Is it even present? | ||
834 | */ | ||
835 | if (!netif_device_present(dev)) | ||
836 | return -ENODEV; | ||
837 | |||
838 | /* | ||
839 | * Call device private open method | ||
840 | */ | ||
841 | set_bit(__LINK_STATE_START, &dev->state); | ||
842 | if (dev->open) { | ||
843 | ret = dev->open(dev); | ||
844 | if (ret) | ||
845 | clear_bit(__LINK_STATE_START, &dev->state); | ||
846 | } | ||
847 | |||
848 | /* | ||
849 | * If it went open OK then: | ||
850 | */ | ||
851 | |||
852 | if (!ret) { | ||
853 | /* | ||
854 | * Set the flags. | ||
855 | */ | ||
856 | dev->flags |= IFF_UP; | ||
857 | |||
858 | /* | ||
859 | * Initialize multicasting status | ||
860 | */ | ||
861 | dev_mc_upload(dev); | ||
862 | |||
863 | /* | ||
864 | * Wakeup transmit queue engine | ||
865 | */ | ||
866 | dev_activate(dev); | ||
867 | |||
868 | /* | ||
869 | * ... and announce new interface. | ||
870 | */ | ||
871 | notifier_call_chain(&netdev_chain, NETDEV_UP, dev); | ||
872 | } | ||
873 | return ret; | ||
874 | } | ||
875 | |||
876 | /** | ||
877 | * dev_close - shutdown an interface. | ||
878 | * @dev: device to shutdown | ||
879 | * | ||
880 | * This function moves an active device into down state. A | ||
881 | * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device | ||
882 | * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier | ||
883 | * chain. | ||
884 | */ | ||
885 | int dev_close(struct net_device *dev) | ||
886 | { | ||
887 | if (!(dev->flags & IFF_UP)) | ||
888 | return 0; | ||
889 | |||
890 | /* | ||
891 | * Tell people we are going down, so that they can | ||
892 | * prepare to death, when device is still operating. | ||
893 | */ | ||
894 | notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); | ||
895 | |||
896 | dev_deactivate(dev); | ||
897 | |||
898 | clear_bit(__LINK_STATE_START, &dev->state); | ||
899 | |||
900 | /* Synchronize to scheduled poll. We cannot touch poll list, | ||
901 | * it can be even on different cpu. So just clear netif_running(), | ||
902 | * and wait when poll really will happen. Actually, the best place | ||
903 | * for this is inside dev->stop() after device stopped its irq | ||
904 | * engine, but this requires more changes in devices. */ | ||
905 | |||
906 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ | ||
907 | while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { | ||
908 | /* No hurry. */ | ||
909 | current->state = TASK_INTERRUPTIBLE; | ||
910 | schedule_timeout(1); | ||
911 | } | ||
912 | |||
913 | /* | ||
914 | * Call the device specific close. This cannot fail. | ||
915 | * Only if device is UP | ||
916 | * | ||
917 | * We allow it to be called even after a DETACH hot-plug | ||
918 | * event. | ||
919 | */ | ||
920 | if (dev->stop) | ||
921 | dev->stop(dev); | ||
922 | |||
923 | /* | ||
924 | * Device is now down. | ||
925 | */ | ||
926 | |||
927 | dev->flags &= ~IFF_UP; | ||
928 | |||
929 | /* | ||
930 | * Tell people we are down | ||
931 | */ | ||
932 | notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); | ||
933 | |||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | |||
938 | /* | ||
939 | * Device change register/unregister. These are not inline or static | ||
940 | * as we export them to the world. | ||
941 | */ | ||
942 | |||
943 | /** | ||
944 | * register_netdevice_notifier - register a network notifier block | ||
945 | * @nb: notifier | ||
946 | * | ||
947 | * Register a notifier to be called when network device events occur. | ||
948 | * The notifier passed is linked into the kernel structures and must | ||
949 | * not be reused until it has been unregistered. A negative errno code | ||
950 | * is returned on a failure. | ||
951 | * | ||
952 | * When registered all registration and up events are replayed | ||
953 | * to the new notifier to allow device to have a race free | ||
954 | * view of the network device list. | ||
955 | */ | ||
956 | |||
957 | int register_netdevice_notifier(struct notifier_block *nb) | ||
958 | { | ||
959 | struct net_device *dev; | ||
960 | int err; | ||
961 | |||
962 | rtnl_lock(); | ||
963 | err = notifier_chain_register(&netdev_chain, nb); | ||
964 | if (!err) { | ||
965 | for (dev = dev_base; dev; dev = dev->next) { | ||
966 | nb->notifier_call(nb, NETDEV_REGISTER, dev); | ||
967 | |||
968 | if (dev->flags & IFF_UP) | ||
969 | nb->notifier_call(nb, NETDEV_UP, dev); | ||
970 | } | ||
971 | } | ||
972 | rtnl_unlock(); | ||
973 | return err; | ||
974 | } | ||
975 | |||
976 | /** | ||
977 | * unregister_netdevice_notifier - unregister a network notifier block | ||
978 | * @nb: notifier | ||
979 | * | ||
980 | * Unregister a notifier previously registered by | ||
981 | * register_netdevice_notifier(). The notifier is unlinked into the | ||
982 | * kernel structures and may then be reused. A negative errno code | ||
983 | * is returned on a failure. | ||
984 | */ | ||
985 | |||
986 | int unregister_netdevice_notifier(struct notifier_block *nb) | ||
987 | { | ||
988 | return notifier_chain_unregister(&netdev_chain, nb); | ||
989 | } | ||
990 | |||
991 | /** | ||
992 | * call_netdevice_notifiers - call all network notifier blocks | ||
993 | * @val: value passed unmodified to notifier function | ||
994 | * @v: pointer passed unmodified to notifier function | ||
995 | * | ||
996 | * Call all network notifier blocks. Parameters and return value | ||
997 | * are as for notifier_call_chain(). | ||
998 | */ | ||
999 | |||
1000 | int call_netdevice_notifiers(unsigned long val, void *v) | ||
1001 | { | ||
1002 | return notifier_call_chain(&netdev_chain, val, v); | ||
1003 | } | ||
1004 | |||
1005 | /* When > 0 there are consumers of rx skb time stamps */ | ||
1006 | static atomic_t netstamp_needed = ATOMIC_INIT(0); | ||
1007 | |||
1008 | void net_enable_timestamp(void) | ||
1009 | { | ||
1010 | atomic_inc(&netstamp_needed); | ||
1011 | } | ||
1012 | |||
1013 | void net_disable_timestamp(void) | ||
1014 | { | ||
1015 | atomic_dec(&netstamp_needed); | ||
1016 | } | ||
1017 | |||
1018 | static inline void net_timestamp(struct timeval *stamp) | ||
1019 | { | ||
1020 | if (atomic_read(&netstamp_needed)) | ||
1021 | do_gettimeofday(stamp); | ||
1022 | else { | ||
1023 | stamp->tv_sec = 0; | ||
1024 | stamp->tv_usec = 0; | ||
1025 | } | ||
1026 | } | ||
1027 | |||
1028 | /* | ||
1029 | * Support routine. Sends outgoing frames to any network | ||
1030 | * taps currently in use. | ||
1031 | */ | ||
1032 | |||
1033 | void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | ||
1034 | { | ||
1035 | struct packet_type *ptype; | ||
1036 | net_timestamp(&skb->stamp); | ||
1037 | |||
1038 | rcu_read_lock(); | ||
1039 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | ||
1040 | /* Never send packets back to the socket | ||
1041 | * they originated from - MvS (miquels@drinkel.ow.org) | ||
1042 | */ | ||
1043 | if ((ptype->dev == dev || !ptype->dev) && | ||
1044 | (ptype->af_packet_priv == NULL || | ||
1045 | (struct sock *)ptype->af_packet_priv != skb->sk)) { | ||
1046 | struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); | ||
1047 | if (!skb2) | ||
1048 | break; | ||
1049 | |||
1050 | /* skb->nh should be correctly | ||
1051 | set by sender, so that the second statement is | ||
1052 | just protection against buggy protocols. | ||
1053 | */ | ||
1054 | skb2->mac.raw = skb2->data; | ||
1055 | |||
1056 | if (skb2->nh.raw < skb2->data || | ||
1057 | skb2->nh.raw > skb2->tail) { | ||
1058 | if (net_ratelimit()) | ||
1059 | printk(KERN_CRIT "protocol %04x is " | ||
1060 | "buggy, dev %s\n", | ||
1061 | skb2->protocol, dev->name); | ||
1062 | skb2->nh.raw = skb2->data; | ||
1063 | } | ||
1064 | |||
1065 | skb2->h.raw = skb2->nh.raw; | ||
1066 | skb2->pkt_type = PACKET_OUTGOING; | ||
1067 | ptype->func(skb2, skb->dev, ptype); | ||
1068 | } | ||
1069 | } | ||
1070 | rcu_read_unlock(); | ||
1071 | } | ||
1072 | |||
1073 | /* | ||
1074 | * Invalidate hardware checksum when packet is to be mangled, and | ||
1075 | * complete checksum manually on outgoing path. | ||
1076 | */ | ||
1077 | int skb_checksum_help(struct sk_buff *skb, int inward) | ||
1078 | { | ||
1079 | unsigned int csum; | ||
1080 | int ret = 0, offset = skb->h.raw - skb->data; | ||
1081 | |||
1082 | if (inward) { | ||
1083 | skb->ip_summed = CHECKSUM_NONE; | ||
1084 | goto out; | ||
1085 | } | ||
1086 | |||
1087 | if (skb_cloned(skb)) { | ||
1088 | ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); | ||
1089 | if (ret) | ||
1090 | goto out; | ||
1091 | } | ||
1092 | |||
1093 | if (offset > (int)skb->len) | ||
1094 | BUG(); | ||
1095 | csum = skb_checksum(skb, offset, skb->len-offset, 0); | ||
1096 | |||
1097 | offset = skb->tail - skb->h.raw; | ||
1098 | if (offset <= 0) | ||
1099 | BUG(); | ||
1100 | if (skb->csum + 2 > offset) | ||
1101 | BUG(); | ||
1102 | |||
1103 | *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); | ||
1104 | skb->ip_summed = CHECKSUM_NONE; | ||
1105 | out: | ||
1106 | return ret; | ||
1107 | } | ||
1108 | |||
1109 | #ifdef CONFIG_HIGHMEM | ||
1110 | /* Actually, we should eliminate this check as soon as we know, that: | ||
1111 | * 1. IOMMU is present and allows to map all the memory. | ||
1112 | * 2. No high memory really exists on this machine. | ||
1113 | */ | ||
1114 | |||
1115 | static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | ||
1116 | { | ||
1117 | int i; | ||
1118 | |||
1119 | if (dev->features & NETIF_F_HIGHDMA) | ||
1120 | return 0; | ||
1121 | |||
1122 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
1123 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | ||
1124 | return 1; | ||
1125 | |||
1126 | return 0; | ||
1127 | } | ||
1128 | #else | ||
1129 | #define illegal_highdma(dev, skb) (0) | ||
1130 | #endif | ||
1131 | |||
1132 | extern void skb_release_data(struct sk_buff *); | ||
1133 | |||
1134 | /* Keep head the same: replace data */ | ||
1135 | int __skb_linearize(struct sk_buff *skb, int gfp_mask) | ||
1136 | { | ||
1137 | unsigned int size; | ||
1138 | u8 *data; | ||
1139 | long offset; | ||
1140 | struct skb_shared_info *ninfo; | ||
1141 | int headerlen = skb->data - skb->head; | ||
1142 | int expand = (skb->tail + skb->data_len) - skb->end; | ||
1143 | |||
1144 | if (skb_shared(skb)) | ||
1145 | BUG(); | ||
1146 | |||
1147 | if (expand <= 0) | ||
1148 | expand = 0; | ||
1149 | |||
1150 | size = skb->end - skb->head + expand; | ||
1151 | size = SKB_DATA_ALIGN(size); | ||
1152 | data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); | ||
1153 | if (!data) | ||
1154 | return -ENOMEM; | ||
1155 | |||
1156 | /* Copy entire thing */ | ||
1157 | if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) | ||
1158 | BUG(); | ||
1159 | |||
1160 | /* Set up shinfo */ | ||
1161 | ninfo = (struct skb_shared_info*)(data + size); | ||
1162 | atomic_set(&ninfo->dataref, 1); | ||
1163 | ninfo->tso_size = skb_shinfo(skb)->tso_size; | ||
1164 | ninfo->tso_segs = skb_shinfo(skb)->tso_segs; | ||
1165 | ninfo->nr_frags = 0; | ||
1166 | ninfo->frag_list = NULL; | ||
1167 | |||
1168 | /* Offset between the two in bytes */ | ||
1169 | offset = data - skb->head; | ||
1170 | |||
1171 | /* Free old data. */ | ||
1172 | skb_release_data(skb); | ||
1173 | |||
1174 | skb->head = data; | ||
1175 | skb->end = data + size; | ||
1176 | |||
1177 | /* Set up new pointers */ | ||
1178 | skb->h.raw += offset; | ||
1179 | skb->nh.raw += offset; | ||
1180 | skb->mac.raw += offset; | ||
1181 | skb->tail += offset; | ||
1182 | skb->data += offset; | ||
1183 | |||
1184 | /* We are no longer a clone, even if we were. */ | ||
1185 | skb->cloned = 0; | ||
1186 | |||
1187 | skb->tail += skb->data_len; | ||
1188 | skb->data_len = 0; | ||
1189 | return 0; | ||
1190 | } | ||
1191 | |||
1192 | #define HARD_TX_LOCK(dev, cpu) { \ | ||
1193 | if ((dev->features & NETIF_F_LLTX) == 0) { \ | ||
1194 | spin_lock(&dev->xmit_lock); \ | ||
1195 | dev->xmit_lock_owner = cpu; \ | ||
1196 | } \ | ||
1197 | } | ||
1198 | |||
1199 | #define HARD_TX_UNLOCK(dev) { \ | ||
1200 | if ((dev->features & NETIF_F_LLTX) == 0) { \ | ||
1201 | dev->xmit_lock_owner = -1; \ | ||
1202 | spin_unlock(&dev->xmit_lock); \ | ||
1203 | } \ | ||
1204 | } | ||
1205 | |||
1206 | /** | ||
1207 | * dev_queue_xmit - transmit a buffer | ||
1208 | * @skb: buffer to transmit | ||
1209 | * | ||
1210 | * Queue a buffer for transmission to a network device. The caller must | ||
1211 | * have set the device and priority and built the buffer before calling | ||
1212 | * this function. The function can be called from an interrupt. | ||
1213 | * | ||
1214 | * A negative errno code is returned on a failure. A success does not | ||
1215 | * guarantee the frame will be transmitted as it may be dropped due | ||
1216 | * to congestion or traffic shaping. | ||
1217 | */ | ||
1218 | |||
1219 | int dev_queue_xmit(struct sk_buff *skb) | ||
1220 | { | ||
1221 | struct net_device *dev = skb->dev; | ||
1222 | struct Qdisc *q; | ||
1223 | int rc = -ENOMEM; | ||
1224 | |||
1225 | if (skb_shinfo(skb)->frag_list && | ||
1226 | !(dev->features & NETIF_F_FRAGLIST) && | ||
1227 | __skb_linearize(skb, GFP_ATOMIC)) | ||
1228 | goto out_kfree_skb; | ||
1229 | |||
1230 | /* Fragmented skb is linearized if device does not support SG, | ||
1231 | * or if at least one of fragments is in highmem and device | ||
1232 | * does not support DMA from it. | ||
1233 | */ | ||
1234 | if (skb_shinfo(skb)->nr_frags && | ||
1235 | (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && | ||
1236 | __skb_linearize(skb, GFP_ATOMIC)) | ||
1237 | goto out_kfree_skb; | ||
1238 | |||
1239 | /* If packet is not checksummed and device does not support | ||
1240 | * checksumming for this protocol, complete checksumming here. | ||
1241 | */ | ||
1242 | if (skb->ip_summed == CHECKSUM_HW && | ||
1243 | (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && | ||
1244 | (!(dev->features & NETIF_F_IP_CSUM) || | ||
1245 | skb->protocol != htons(ETH_P_IP)))) | ||
1246 | if (skb_checksum_help(skb, 0)) | ||
1247 | goto out_kfree_skb; | ||
1248 | |||
1249 | /* Disable soft irqs for various locks below. Also | ||
1250 | * stops preemption for RCU. | ||
1251 | */ | ||
1252 | local_bh_disable(); | ||
1253 | |||
1254 | /* Updates of qdisc are serialized by queue_lock. | ||
1255 | * The struct Qdisc which is pointed to by qdisc is now a | ||
1256 | * rcu structure - it may be accessed without acquiring | ||
1257 | * a lock (but the structure may be stale.) The freeing of the | ||
1258 | * qdisc will be deferred until it's known that there are no | ||
1259 | * more references to it. | ||
1260 | * | ||
1261 | * If the qdisc has an enqueue function, we still need to | ||
1262 | * hold the queue_lock before calling it, since queue_lock | ||
1263 | * also serializes access to the device queue. | ||
1264 | */ | ||
1265 | |||
1266 | q = rcu_dereference(dev->qdisc); | ||
1267 | #ifdef CONFIG_NET_CLS_ACT | ||
1268 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); | ||
1269 | #endif | ||
1270 | if (q->enqueue) { | ||
1271 | /* Grab device queue */ | ||
1272 | spin_lock(&dev->queue_lock); | ||
1273 | |||
1274 | rc = q->enqueue(skb, q); | ||
1275 | |||
1276 | qdisc_run(dev); | ||
1277 | |||
1278 | spin_unlock(&dev->queue_lock); | ||
1279 | rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; | ||
1280 | goto out; | ||
1281 | } | ||
1282 | |||
1283 | /* The device has no queue. Common case for software devices: | ||
1284 | loopback, all the sorts of tunnels... | ||
1285 | |||
1286 | Really, it is unlikely that xmit_lock protection is necessary here. | ||
1287 | (f.e. loopback and IP tunnels are clean ignoring statistics | ||
1288 | counters.) | ||
1289 | However, it is possible, that they rely on protection | ||
1290 | made by us here. | ||
1291 | |||
1292 | Check this and shot the lock. It is not prone from deadlocks. | ||
1293 | Either shot noqueue qdisc, it is even simpler 8) | ||
1294 | */ | ||
1295 | if (dev->flags & IFF_UP) { | ||
1296 | int cpu = smp_processor_id(); /* ok because BHs are off */ | ||
1297 | |||
1298 | if (dev->xmit_lock_owner != cpu) { | ||
1299 | |||
1300 | HARD_TX_LOCK(dev, cpu); | ||
1301 | |||
1302 | if (!netif_queue_stopped(dev)) { | ||
1303 | if (netdev_nit) | ||
1304 | dev_queue_xmit_nit(skb, dev); | ||
1305 | |||
1306 | rc = 0; | ||
1307 | if (!dev->hard_start_xmit(skb, dev)) { | ||
1308 | HARD_TX_UNLOCK(dev); | ||
1309 | goto out; | ||
1310 | } | ||
1311 | } | ||
1312 | HARD_TX_UNLOCK(dev); | ||
1313 | if (net_ratelimit()) | ||
1314 | printk(KERN_CRIT "Virtual device %s asks to " | ||
1315 | "queue packet!\n", dev->name); | ||
1316 | } else { | ||
1317 | /* Recursion is detected! It is possible, | ||
1318 | * unfortunately */ | ||
1319 | if (net_ratelimit()) | ||
1320 | printk(KERN_CRIT "Dead loop on virtual device " | ||
1321 | "%s, fix it urgently!\n", dev->name); | ||
1322 | } | ||
1323 | } | ||
1324 | |||
1325 | rc = -ENETDOWN; | ||
1326 | local_bh_enable(); | ||
1327 | |||
1328 | out_kfree_skb: | ||
1329 | kfree_skb(skb); | ||
1330 | return rc; | ||
1331 | out: | ||
1332 | local_bh_enable(); | ||
1333 | return rc; | ||
1334 | } | ||
1335 | |||
1336 | |||
1337 | /*======================================================================= | ||
1338 | Receiver routines | ||
1339 | =======================================================================*/ | ||
1340 | |||
1341 | int netdev_max_backlog = 300; | ||
1342 | int weight_p = 64; /* old backlog weight */ | ||
1343 | /* These numbers are selected based on intuition and some | ||
1344 | * experimentatiom, if you have more scientific way of doing this | ||
1345 | * please go ahead and fix things. | ||
1346 | */ | ||
1347 | int no_cong_thresh = 10; | ||
1348 | int no_cong = 20; | ||
1349 | int lo_cong = 100; | ||
1350 | int mod_cong = 290; | ||
1351 | |||
1352 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | ||
1353 | |||
1354 | |||
1355 | static void get_sample_stats(int cpu) | ||
1356 | { | ||
1357 | #ifdef RAND_LIE | ||
1358 | unsigned long rd; | ||
1359 | int rq; | ||
1360 | #endif | ||
1361 | struct softnet_data *sd = &per_cpu(softnet_data, cpu); | ||
1362 | int blog = sd->input_pkt_queue.qlen; | ||
1363 | int avg_blog = sd->avg_blog; | ||
1364 | |||
1365 | avg_blog = (avg_blog >> 1) + (blog >> 1); | ||
1366 | |||
1367 | if (avg_blog > mod_cong) { | ||
1368 | /* Above moderate congestion levels. */ | ||
1369 | sd->cng_level = NET_RX_CN_HIGH; | ||
1370 | #ifdef RAND_LIE | ||
1371 | rd = net_random(); | ||
1372 | rq = rd % netdev_max_backlog; | ||
1373 | if (rq < avg_blog) /* unlucky bastard */ | ||
1374 | sd->cng_level = NET_RX_DROP; | ||
1375 | #endif | ||
1376 | } else if (avg_blog > lo_cong) { | ||
1377 | sd->cng_level = NET_RX_CN_MOD; | ||
1378 | #ifdef RAND_LIE | ||
1379 | rd = net_random(); | ||
1380 | rq = rd % netdev_max_backlog; | ||
1381 | if (rq < avg_blog) /* unlucky bastard */ | ||
1382 | sd->cng_level = NET_RX_CN_HIGH; | ||
1383 | #endif | ||
1384 | } else if (avg_blog > no_cong) | ||
1385 | sd->cng_level = NET_RX_CN_LOW; | ||
1386 | else /* no congestion */ | ||
1387 | sd->cng_level = NET_RX_SUCCESS; | ||
1388 | |||
1389 | sd->avg_blog = avg_blog; | ||
1390 | } | ||
1391 | |||
1392 | #ifdef OFFLINE_SAMPLE | ||
1393 | static void sample_queue(unsigned long dummy) | ||
1394 | { | ||
1395 | /* 10 ms 0r 1ms -- i don't care -- JHS */ | ||
1396 | int next_tick = 1; | ||
1397 | int cpu = smp_processor_id(); | ||
1398 | |||
1399 | get_sample_stats(cpu); | ||
1400 | next_tick += jiffies; | ||
1401 | mod_timer(&samp_timer, next_tick); | ||
1402 | } | ||
1403 | #endif | ||
1404 | |||
1405 | |||
1406 | /** | ||
1407 | * netif_rx - post buffer to the network code | ||
1408 | * @skb: buffer to post | ||
1409 | * | ||
1410 | * This function receives a packet from a device driver and queues it for | ||
1411 | * the upper (protocol) levels to process. It always succeeds. The buffer | ||
1412 | * may be dropped during processing for congestion control or by the | ||
1413 | * protocol layers. | ||
1414 | * | ||
1415 | * return values: | ||
1416 | * NET_RX_SUCCESS (no congestion) | ||
1417 | * NET_RX_CN_LOW (low congestion) | ||
1418 | * NET_RX_CN_MOD (moderate congestion) | ||
1419 | * NET_RX_CN_HIGH (high congestion) | ||
1420 | * NET_RX_DROP (packet was dropped) | ||
1421 | * | ||
1422 | */ | ||
1423 | |||
1424 | int netif_rx(struct sk_buff *skb) | ||
1425 | { | ||
1426 | int this_cpu; | ||
1427 | struct softnet_data *queue; | ||
1428 | unsigned long flags; | ||
1429 | |||
1430 | /* if netpoll wants it, pretend we never saw it */ | ||
1431 | if (netpoll_rx(skb)) | ||
1432 | return NET_RX_DROP; | ||
1433 | |||
1434 | if (!skb->stamp.tv_sec) | ||
1435 | net_timestamp(&skb->stamp); | ||
1436 | |||
1437 | /* | ||
1438 | * The code is rearranged so that the path is the most | ||
1439 | * short when CPU is congested, but is still operating. | ||
1440 | */ | ||
1441 | local_irq_save(flags); | ||
1442 | this_cpu = smp_processor_id(); | ||
1443 | queue = &__get_cpu_var(softnet_data); | ||
1444 | |||
1445 | __get_cpu_var(netdev_rx_stat).total++; | ||
1446 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | ||
1447 | if (queue->input_pkt_queue.qlen) { | ||
1448 | if (queue->throttle) | ||
1449 | goto drop; | ||
1450 | |||
1451 | enqueue: | ||
1452 | dev_hold(skb->dev); | ||
1453 | __skb_queue_tail(&queue->input_pkt_queue, skb); | ||
1454 | #ifndef OFFLINE_SAMPLE | ||
1455 | get_sample_stats(this_cpu); | ||
1456 | #endif | ||
1457 | local_irq_restore(flags); | ||
1458 | return queue->cng_level; | ||
1459 | } | ||
1460 | |||
1461 | if (queue->throttle) | ||
1462 | queue->throttle = 0; | ||
1463 | |||
1464 | netif_rx_schedule(&queue->backlog_dev); | ||
1465 | goto enqueue; | ||
1466 | } | ||
1467 | |||
1468 | if (!queue->throttle) { | ||
1469 | queue->throttle = 1; | ||
1470 | __get_cpu_var(netdev_rx_stat).throttled++; | ||
1471 | } | ||
1472 | |||
1473 | drop: | ||
1474 | __get_cpu_var(netdev_rx_stat).dropped++; | ||
1475 | local_irq_restore(flags); | ||
1476 | |||
1477 | kfree_skb(skb); | ||
1478 | return NET_RX_DROP; | ||
1479 | } | ||
1480 | |||
1481 | int netif_rx_ni(struct sk_buff *skb) | ||
1482 | { | ||
1483 | int err; | ||
1484 | |||
1485 | preempt_disable(); | ||
1486 | err = netif_rx(skb); | ||
1487 | if (local_softirq_pending()) | ||
1488 | do_softirq(); | ||
1489 | preempt_enable(); | ||
1490 | |||
1491 | return err; | ||
1492 | } | ||
1493 | |||
1494 | EXPORT_SYMBOL(netif_rx_ni); | ||
1495 | |||
1496 | static __inline__ void skb_bond(struct sk_buff *skb) | ||
1497 | { | ||
1498 | struct net_device *dev = skb->dev; | ||
1499 | |||
1500 | if (dev->master) { | ||
1501 | skb->real_dev = skb->dev; | ||
1502 | skb->dev = dev->master; | ||
1503 | } | ||
1504 | } | ||
1505 | |||
1506 | static void net_tx_action(struct softirq_action *h) | ||
1507 | { | ||
1508 | struct softnet_data *sd = &__get_cpu_var(softnet_data); | ||
1509 | |||
1510 | if (sd->completion_queue) { | ||
1511 | struct sk_buff *clist; | ||
1512 | |||
1513 | local_irq_disable(); | ||
1514 | clist = sd->completion_queue; | ||
1515 | sd->completion_queue = NULL; | ||
1516 | local_irq_enable(); | ||
1517 | |||
1518 | while (clist) { | ||
1519 | struct sk_buff *skb = clist; | ||
1520 | clist = clist->next; | ||
1521 | |||
1522 | BUG_TRAP(!atomic_read(&skb->users)); | ||
1523 | __kfree_skb(skb); | ||
1524 | } | ||
1525 | } | ||
1526 | |||
1527 | if (sd->output_queue) { | ||
1528 | struct net_device *head; | ||
1529 | |||
1530 | local_irq_disable(); | ||
1531 | head = sd->output_queue; | ||
1532 | sd->output_queue = NULL; | ||
1533 | local_irq_enable(); | ||
1534 | |||
1535 | while (head) { | ||
1536 | struct net_device *dev = head; | ||
1537 | head = head->next_sched; | ||
1538 | |||
1539 | smp_mb__before_clear_bit(); | ||
1540 | clear_bit(__LINK_STATE_SCHED, &dev->state); | ||
1541 | |||
1542 | if (spin_trylock(&dev->queue_lock)) { | ||
1543 | qdisc_run(dev); | ||
1544 | spin_unlock(&dev->queue_lock); | ||
1545 | } else { | ||
1546 | netif_schedule(dev); | ||
1547 | } | ||
1548 | } | ||
1549 | } | ||
1550 | } | ||
1551 | |||
1552 | static __inline__ int deliver_skb(struct sk_buff *skb, | ||
1553 | struct packet_type *pt_prev) | ||
1554 | { | ||
1555 | atomic_inc(&skb->users); | ||
1556 | return pt_prev->func(skb, skb->dev, pt_prev); | ||
1557 | } | ||
1558 | |||
1559 | #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) | ||
1560 | int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); | ||
1561 | struct net_bridge; | ||
1562 | struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, | ||
1563 | unsigned char *addr); | ||
1564 | void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); | ||
1565 | |||
1566 | static __inline__ int handle_bridge(struct sk_buff **pskb, | ||
1567 | struct packet_type **pt_prev, int *ret) | ||
1568 | { | ||
1569 | struct net_bridge_port *port; | ||
1570 | |||
1571 | if ((*pskb)->pkt_type == PACKET_LOOPBACK || | ||
1572 | (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) | ||
1573 | return 0; | ||
1574 | |||
1575 | if (*pt_prev) { | ||
1576 | *ret = deliver_skb(*pskb, *pt_prev); | ||
1577 | *pt_prev = NULL; | ||
1578 | } | ||
1579 | |||
1580 | return br_handle_frame_hook(port, pskb); | ||
1581 | } | ||
1582 | #else | ||
1583 | #define handle_bridge(skb, pt_prev, ret) (0) | ||
1584 | #endif | ||
1585 | |||
1586 | #ifdef CONFIG_NET_CLS_ACT | ||
1587 | /* TODO: Maybe we should just force sch_ingress to be compiled in | ||
1588 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions | ||
1589 | * a compare and 2 stores extra right now if we dont have it on | ||
1590 | * but have CONFIG_NET_CLS_ACT | ||
1591 | * NOTE: This doesnt stop any functionality; if you dont have | ||
1592 | * the ingress scheduler, you just cant add policies on ingress. | ||
1593 | * | ||
1594 | */ | ||
1595 | static int ing_filter(struct sk_buff *skb) | ||
1596 | { | ||
1597 | struct Qdisc *q; | ||
1598 | struct net_device *dev = skb->dev; | ||
1599 | int result = TC_ACT_OK; | ||
1600 | |||
1601 | if (dev->qdisc_ingress) { | ||
1602 | __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); | ||
1603 | if (MAX_RED_LOOP < ttl++) { | ||
1604 | printk("Redir loop detected Dropping packet (%s->%s)\n", | ||
1605 | skb->input_dev?skb->input_dev->name:"??",skb->dev->name); | ||
1606 | return TC_ACT_SHOT; | ||
1607 | } | ||
1608 | |||
1609 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); | ||
1610 | |||
1611 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); | ||
1612 | if (NULL == skb->input_dev) { | ||
1613 | skb->input_dev = skb->dev; | ||
1614 | printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name); | ||
1615 | } | ||
1616 | spin_lock(&dev->ingress_lock); | ||
1617 | if ((q = dev->qdisc_ingress) != NULL) | ||
1618 | result = q->enqueue(skb, q); | ||
1619 | spin_unlock(&dev->ingress_lock); | ||
1620 | |||
1621 | } | ||
1622 | |||
1623 | return result; | ||
1624 | } | ||
1625 | #endif | ||
1626 | |||
1627 | int netif_receive_skb(struct sk_buff *skb) | ||
1628 | { | ||
1629 | struct packet_type *ptype, *pt_prev; | ||
1630 | int ret = NET_RX_DROP; | ||
1631 | unsigned short type; | ||
1632 | |||
1633 | /* if we've gotten here through NAPI, check netpoll */ | ||
1634 | if (skb->dev->poll && netpoll_rx(skb)) | ||
1635 | return NET_RX_DROP; | ||
1636 | |||
1637 | if (!skb->stamp.tv_sec) | ||
1638 | net_timestamp(&skb->stamp); | ||
1639 | |||
1640 | skb_bond(skb); | ||
1641 | |||
1642 | __get_cpu_var(netdev_rx_stat).total++; | ||
1643 | |||
1644 | skb->h.raw = skb->nh.raw = skb->data; | ||
1645 | skb->mac_len = skb->nh.raw - skb->mac.raw; | ||
1646 | |||
1647 | pt_prev = NULL; | ||
1648 | |||
1649 | rcu_read_lock(); | ||
1650 | |||
1651 | #ifdef CONFIG_NET_CLS_ACT | ||
1652 | if (skb->tc_verd & TC_NCLS) { | ||
1653 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | ||
1654 | goto ncls; | ||
1655 | } | ||
1656 | #endif | ||
1657 | |||
1658 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | ||
1659 | if (!ptype->dev || ptype->dev == skb->dev) { | ||
1660 | if (pt_prev) | ||
1661 | ret = deliver_skb(skb, pt_prev); | ||
1662 | pt_prev = ptype; | ||
1663 | } | ||
1664 | } | ||
1665 | |||
1666 | #ifdef CONFIG_NET_CLS_ACT | ||
1667 | if (pt_prev) { | ||
1668 | ret = deliver_skb(skb, pt_prev); | ||
1669 | pt_prev = NULL; /* noone else should process this after*/ | ||
1670 | } else { | ||
1671 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); | ||
1672 | } | ||
1673 | |||
1674 | ret = ing_filter(skb); | ||
1675 | |||
1676 | if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { | ||
1677 | kfree_skb(skb); | ||
1678 | goto out; | ||
1679 | } | ||
1680 | |||
1681 | skb->tc_verd = 0; | ||
1682 | ncls: | ||
1683 | #endif | ||
1684 | |||
1685 | handle_diverter(skb); | ||
1686 | |||
1687 | if (handle_bridge(&skb, &pt_prev, &ret)) | ||
1688 | goto out; | ||
1689 | |||
1690 | type = skb->protocol; | ||
1691 | list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { | ||
1692 | if (ptype->type == type && | ||
1693 | (!ptype->dev || ptype->dev == skb->dev)) { | ||
1694 | if (pt_prev) | ||
1695 | ret = deliver_skb(skb, pt_prev); | ||
1696 | pt_prev = ptype; | ||
1697 | } | ||
1698 | } | ||
1699 | |||
1700 | if (pt_prev) { | ||
1701 | ret = pt_prev->func(skb, skb->dev, pt_prev); | ||
1702 | } else { | ||
1703 | kfree_skb(skb); | ||
1704 | /* Jamal, now you will not able to escape explaining | ||
1705 | * me how you were going to use this. :-) | ||
1706 | */ | ||
1707 | ret = NET_RX_DROP; | ||
1708 | } | ||
1709 | |||
1710 | out: | ||
1711 | rcu_read_unlock(); | ||
1712 | return ret; | ||
1713 | } | ||
1714 | |||
1715 | static int process_backlog(struct net_device *backlog_dev, int *budget) | ||
1716 | { | ||
1717 | int work = 0; | ||
1718 | int quota = min(backlog_dev->quota, *budget); | ||
1719 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | ||
1720 | unsigned long start_time = jiffies; | ||
1721 | |||
1722 | for (;;) { | ||
1723 | struct sk_buff *skb; | ||
1724 | struct net_device *dev; | ||
1725 | |||
1726 | local_irq_disable(); | ||
1727 | skb = __skb_dequeue(&queue->input_pkt_queue); | ||
1728 | if (!skb) | ||
1729 | goto job_done; | ||
1730 | local_irq_enable(); | ||
1731 | |||
1732 | dev = skb->dev; | ||
1733 | |||
1734 | netif_receive_skb(skb); | ||
1735 | |||
1736 | dev_put(dev); | ||
1737 | |||
1738 | work++; | ||
1739 | |||
1740 | if (work >= quota || jiffies - start_time > 1) | ||
1741 | break; | ||
1742 | |||
1743 | } | ||
1744 | |||
1745 | backlog_dev->quota -= work; | ||
1746 | *budget -= work; | ||
1747 | return -1; | ||
1748 | |||
1749 | job_done: | ||
1750 | backlog_dev->quota -= work; | ||
1751 | *budget -= work; | ||
1752 | |||
1753 | list_del(&backlog_dev->poll_list); | ||
1754 | smp_mb__before_clear_bit(); | ||
1755 | netif_poll_enable(backlog_dev); | ||
1756 | |||
1757 | if (queue->throttle) | ||
1758 | queue->throttle = 0; | ||
1759 | local_irq_enable(); | ||
1760 | return 0; | ||
1761 | } | ||
1762 | |||
1763 | static void net_rx_action(struct softirq_action *h) | ||
1764 | { | ||
1765 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | ||
1766 | unsigned long start_time = jiffies; | ||
1767 | int budget = netdev_max_backlog; | ||
1768 | |||
1769 | |||
1770 | local_irq_disable(); | ||
1771 | |||
1772 | while (!list_empty(&queue->poll_list)) { | ||
1773 | struct net_device *dev; | ||
1774 | |||
1775 | if (budget <= 0 || jiffies - start_time > 1) | ||
1776 | goto softnet_break; | ||
1777 | |||
1778 | local_irq_enable(); | ||
1779 | |||
1780 | dev = list_entry(queue->poll_list.next, | ||
1781 | struct net_device, poll_list); | ||
1782 | netpoll_poll_lock(dev); | ||
1783 | |||
1784 | if (dev->quota <= 0 || dev->poll(dev, &budget)) { | ||
1785 | netpoll_poll_unlock(dev); | ||
1786 | local_irq_disable(); | ||
1787 | list_del(&dev->poll_list); | ||
1788 | list_add_tail(&dev->poll_list, &queue->poll_list); | ||
1789 | if (dev->quota < 0) | ||
1790 | dev->quota += dev->weight; | ||
1791 | else | ||
1792 | dev->quota = dev->weight; | ||
1793 | } else { | ||
1794 | netpoll_poll_unlock(dev); | ||
1795 | dev_put(dev); | ||
1796 | local_irq_disable(); | ||
1797 | } | ||
1798 | } | ||
1799 | out: | ||
1800 | local_irq_enable(); | ||
1801 | return; | ||
1802 | |||
1803 | softnet_break: | ||
1804 | __get_cpu_var(netdev_rx_stat).time_squeeze++; | ||
1805 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
1806 | goto out; | ||
1807 | } | ||
1808 | |||
1809 | static gifconf_func_t * gifconf_list [NPROTO]; | ||
1810 | |||
1811 | /** | ||
1812 | * register_gifconf - register a SIOCGIF handler | ||
1813 | * @family: Address family | ||
1814 | * @gifconf: Function handler | ||
1815 | * | ||
1816 | * Register protocol dependent address dumping routines. The handler | ||
1817 | * that is passed must not be freed or reused until it has been replaced | ||
1818 | * by another handler. | ||
1819 | */ | ||
1820 | int register_gifconf(unsigned int family, gifconf_func_t * gifconf) | ||
1821 | { | ||
1822 | if (family >= NPROTO) | ||
1823 | return -EINVAL; | ||
1824 | gifconf_list[family] = gifconf; | ||
1825 | return 0; | ||
1826 | } | ||
1827 | |||
1828 | |||
1829 | /* | ||
1830 | * Map an interface index to its name (SIOCGIFNAME) | ||
1831 | */ | ||
1832 | |||
1833 | /* | ||
1834 | * We need this ioctl for efficient implementation of the | ||
1835 | * if_indextoname() function required by the IPv6 API. Without | ||
1836 | * it, we would have to search all the interfaces to find a | ||
1837 | * match. --pb | ||
1838 | */ | ||
1839 | |||
1840 | static int dev_ifname(struct ifreq __user *arg) | ||
1841 | { | ||
1842 | struct net_device *dev; | ||
1843 | struct ifreq ifr; | ||
1844 | |||
1845 | /* | ||
1846 | * Fetch the caller's info block. | ||
1847 | */ | ||
1848 | |||
1849 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | ||
1850 | return -EFAULT; | ||
1851 | |||
1852 | read_lock(&dev_base_lock); | ||
1853 | dev = __dev_get_by_index(ifr.ifr_ifindex); | ||
1854 | if (!dev) { | ||
1855 | read_unlock(&dev_base_lock); | ||
1856 | return -ENODEV; | ||
1857 | } | ||
1858 | |||
1859 | strcpy(ifr.ifr_name, dev->name); | ||
1860 | read_unlock(&dev_base_lock); | ||
1861 | |||
1862 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) | ||
1863 | return -EFAULT; | ||
1864 | return 0; | ||
1865 | } | ||
1866 | |||
1867 | /* | ||
1868 | * Perform a SIOCGIFCONF call. This structure will change | ||
1869 | * size eventually, and there is nothing I can do about it. | ||
1870 | * Thus we will need a 'compatibility mode'. | ||
1871 | */ | ||
1872 | |||
1873 | static int dev_ifconf(char __user *arg) | ||
1874 | { | ||
1875 | struct ifconf ifc; | ||
1876 | struct net_device *dev; | ||
1877 | char __user *pos; | ||
1878 | int len; | ||
1879 | int total; | ||
1880 | int i; | ||
1881 | |||
1882 | /* | ||
1883 | * Fetch the caller's info block. | ||
1884 | */ | ||
1885 | |||
1886 | if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) | ||
1887 | return -EFAULT; | ||
1888 | |||
1889 | pos = ifc.ifc_buf; | ||
1890 | len = ifc.ifc_len; | ||
1891 | |||
1892 | /* | ||
1893 | * Loop over the interfaces, and write an info block for each. | ||
1894 | */ | ||
1895 | |||
1896 | total = 0; | ||
1897 | for (dev = dev_base; dev; dev = dev->next) { | ||
1898 | for (i = 0; i < NPROTO; i++) { | ||
1899 | if (gifconf_list[i]) { | ||
1900 | int done; | ||
1901 | if (!pos) | ||
1902 | done = gifconf_list[i](dev, NULL, 0); | ||
1903 | else | ||
1904 | done = gifconf_list[i](dev, pos + total, | ||
1905 | len - total); | ||
1906 | if (done < 0) | ||
1907 | return -EFAULT; | ||
1908 | total += done; | ||
1909 | } | ||
1910 | } | ||
1911 | } | ||
1912 | |||
1913 | /* | ||
1914 | * All done. Write the updated control block back to the caller. | ||
1915 | */ | ||
1916 | ifc.ifc_len = total; | ||
1917 | |||
1918 | /* | ||
1919 | * Both BSD and Solaris return 0 here, so we do too. | ||
1920 | */ | ||
1921 | return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; | ||
1922 | } | ||
1923 | |||
1924 | #ifdef CONFIG_PROC_FS | ||
1925 | /* | ||
1926 | * This is invoked by the /proc filesystem handler to display a device | ||
1927 | * in detail. | ||
1928 | */ | ||
1929 | static __inline__ struct net_device *dev_get_idx(loff_t pos) | ||
1930 | { | ||
1931 | struct net_device *dev; | ||
1932 | loff_t i; | ||
1933 | |||
1934 | for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); | ||
1935 | |||
1936 | return i == pos ? dev : NULL; | ||
1937 | } | ||
1938 | |||
1939 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) | ||
1940 | { | ||
1941 | read_lock(&dev_base_lock); | ||
1942 | return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; | ||
1943 | } | ||
1944 | |||
1945 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1946 | { | ||
1947 | ++*pos; | ||
1948 | return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; | ||
1949 | } | ||
1950 | |||
1951 | void dev_seq_stop(struct seq_file *seq, void *v) | ||
1952 | { | ||
1953 | read_unlock(&dev_base_lock); | ||
1954 | } | ||
1955 | |||
1956 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) | ||
1957 | { | ||
1958 | if (dev->get_stats) { | ||
1959 | struct net_device_stats *stats = dev->get_stats(dev); | ||
1960 | |||
1961 | seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " | ||
1962 | "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", | ||
1963 | dev->name, stats->rx_bytes, stats->rx_packets, | ||
1964 | stats->rx_errors, | ||
1965 | stats->rx_dropped + stats->rx_missed_errors, | ||
1966 | stats->rx_fifo_errors, | ||
1967 | stats->rx_length_errors + stats->rx_over_errors + | ||
1968 | stats->rx_crc_errors + stats->rx_frame_errors, | ||
1969 | stats->rx_compressed, stats->multicast, | ||
1970 | stats->tx_bytes, stats->tx_packets, | ||
1971 | stats->tx_errors, stats->tx_dropped, | ||
1972 | stats->tx_fifo_errors, stats->collisions, | ||
1973 | stats->tx_carrier_errors + | ||
1974 | stats->tx_aborted_errors + | ||
1975 | stats->tx_window_errors + | ||
1976 | stats->tx_heartbeat_errors, | ||
1977 | stats->tx_compressed); | ||
1978 | } else | ||
1979 | seq_printf(seq, "%6s: No statistics available.\n", dev->name); | ||
1980 | } | ||
1981 | |||
1982 | /* | ||
1983 | * Called from the PROCfs module. This now uses the new arbitrary sized | ||
1984 | * /proc/net interface to create /proc/net/dev | ||
1985 | */ | ||
1986 | static int dev_seq_show(struct seq_file *seq, void *v) | ||
1987 | { | ||
1988 | if (v == SEQ_START_TOKEN) | ||
1989 | seq_puts(seq, "Inter-| Receive " | ||
1990 | " | Transmit\n" | ||
1991 | " face |bytes packets errs drop fifo frame " | ||
1992 | "compressed multicast|bytes packets errs " | ||
1993 | "drop fifo colls carrier compressed\n"); | ||
1994 | else | ||
1995 | dev_seq_printf_stats(seq, v); | ||
1996 | return 0; | ||
1997 | } | ||
1998 | |||
1999 | static struct netif_rx_stats *softnet_get_online(loff_t *pos) | ||
2000 | { | ||
2001 | struct netif_rx_stats *rc = NULL; | ||
2002 | |||
2003 | while (*pos < NR_CPUS) | ||
2004 | if (cpu_online(*pos)) { | ||
2005 | rc = &per_cpu(netdev_rx_stat, *pos); | ||
2006 | break; | ||
2007 | } else | ||
2008 | ++*pos; | ||
2009 | return rc; | ||
2010 | } | ||
2011 | |||
2012 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) | ||
2013 | { | ||
2014 | return softnet_get_online(pos); | ||
2015 | } | ||
2016 | |||
2017 | static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
2018 | { | ||
2019 | ++*pos; | ||
2020 | return softnet_get_online(pos); | ||
2021 | } | ||
2022 | |||
2023 | static void softnet_seq_stop(struct seq_file *seq, void *v) | ||
2024 | { | ||
2025 | } | ||
2026 | |||
2027 | static int softnet_seq_show(struct seq_file *seq, void *v) | ||
2028 | { | ||
2029 | struct netif_rx_stats *s = v; | ||
2030 | |||
2031 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | ||
2032 | s->total, s->dropped, s->time_squeeze, s->throttled, | ||
2033 | s->fastroute_hit, s->fastroute_success, s->fastroute_defer, | ||
2034 | s->fastroute_deferred_out, | ||
2035 | #if 0 | ||
2036 | s->fastroute_latency_reduction | ||
2037 | #else | ||
2038 | s->cpu_collision | ||
2039 | #endif | ||
2040 | ); | ||
2041 | return 0; | ||
2042 | } | ||
2043 | |||
2044 | static struct seq_operations dev_seq_ops = { | ||
2045 | .start = dev_seq_start, | ||
2046 | .next = dev_seq_next, | ||
2047 | .stop = dev_seq_stop, | ||
2048 | .show = dev_seq_show, | ||
2049 | }; | ||
2050 | |||
2051 | static int dev_seq_open(struct inode *inode, struct file *file) | ||
2052 | { | ||
2053 | return seq_open(file, &dev_seq_ops); | ||
2054 | } | ||
2055 | |||
2056 | static struct file_operations dev_seq_fops = { | ||
2057 | .owner = THIS_MODULE, | ||
2058 | .open = dev_seq_open, | ||
2059 | .read = seq_read, | ||
2060 | .llseek = seq_lseek, | ||
2061 | .release = seq_release, | ||
2062 | }; | ||
2063 | |||
2064 | static struct seq_operations softnet_seq_ops = { | ||
2065 | .start = softnet_seq_start, | ||
2066 | .next = softnet_seq_next, | ||
2067 | .stop = softnet_seq_stop, | ||
2068 | .show = softnet_seq_show, | ||
2069 | }; | ||
2070 | |||
2071 | static int softnet_seq_open(struct inode *inode, struct file *file) | ||
2072 | { | ||
2073 | return seq_open(file, &softnet_seq_ops); | ||
2074 | } | ||
2075 | |||
2076 | static struct file_operations softnet_seq_fops = { | ||
2077 | .owner = THIS_MODULE, | ||
2078 | .open = softnet_seq_open, | ||
2079 | .read = seq_read, | ||
2080 | .llseek = seq_lseek, | ||
2081 | .release = seq_release, | ||
2082 | }; | ||
2083 | |||
2084 | #ifdef WIRELESS_EXT | ||
2085 | extern int wireless_proc_init(void); | ||
2086 | #else | ||
2087 | #define wireless_proc_init() 0 | ||
2088 | #endif | ||
2089 | |||
2090 | static int __init dev_proc_init(void) | ||
2091 | { | ||
2092 | int rc = -ENOMEM; | ||
2093 | |||
2094 | if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) | ||
2095 | goto out; | ||
2096 | if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) | ||
2097 | goto out_dev; | ||
2098 | if (wireless_proc_init()) | ||
2099 | goto out_softnet; | ||
2100 | rc = 0; | ||
2101 | out: | ||
2102 | return rc; | ||
2103 | out_softnet: | ||
2104 | proc_net_remove("softnet_stat"); | ||
2105 | out_dev: | ||
2106 | proc_net_remove("dev"); | ||
2107 | goto out; | ||
2108 | } | ||
2109 | #else | ||
2110 | #define dev_proc_init() 0 | ||
2111 | #endif /* CONFIG_PROC_FS */ | ||
2112 | |||
2113 | |||
2114 | /** | ||
2115 | * netdev_set_master - set up master/slave pair | ||
2116 | * @slave: slave device | ||
2117 | * @master: new master device | ||
2118 | * | ||
2119 | * Changes the master device of the slave. Pass %NULL to break the | ||
2120 | * bonding. The caller must hold the RTNL semaphore. On a failure | ||
2121 | * a negative errno code is returned. On success the reference counts | ||
2122 | * are adjusted, %RTM_NEWLINK is sent to the routing socket and the | ||
2123 | * function returns zero. | ||
2124 | */ | ||
2125 | int netdev_set_master(struct net_device *slave, struct net_device *master) | ||
2126 | { | ||
2127 | struct net_device *old = slave->master; | ||
2128 | |||
2129 | ASSERT_RTNL(); | ||
2130 | |||
2131 | if (master) { | ||
2132 | if (old) | ||
2133 | return -EBUSY; | ||
2134 | dev_hold(master); | ||
2135 | } | ||
2136 | |||
2137 | slave->master = master; | ||
2138 | |||
2139 | synchronize_net(); | ||
2140 | |||
2141 | if (old) | ||
2142 | dev_put(old); | ||
2143 | |||
2144 | if (master) | ||
2145 | slave->flags |= IFF_SLAVE; | ||
2146 | else | ||
2147 | slave->flags &= ~IFF_SLAVE; | ||
2148 | |||
2149 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); | ||
2150 | return 0; | ||
2151 | } | ||
2152 | |||
2153 | /** | ||
2154 | * dev_set_promiscuity - update promiscuity count on a device | ||
2155 | * @dev: device | ||
2156 | * @inc: modifier | ||
2157 | * | ||
2158 | * Add or remove promsicuity from a device. While the count in the device | ||
2159 | * remains above zero the interface remains promiscuous. Once it hits zero | ||
2160 | * the device reverts back to normal filtering operation. A negative inc | ||
2161 | * value is used to drop promiscuity on the device. | ||
2162 | */ | ||
2163 | void dev_set_promiscuity(struct net_device *dev, int inc) | ||
2164 | { | ||
2165 | unsigned short old_flags = dev->flags; | ||
2166 | |||
2167 | dev->flags |= IFF_PROMISC; | ||
2168 | if ((dev->promiscuity += inc) == 0) | ||
2169 | dev->flags &= ~IFF_PROMISC; | ||
2170 | if (dev->flags ^ old_flags) { | ||
2171 | dev_mc_upload(dev); | ||
2172 | printk(KERN_INFO "device %s %s promiscuous mode\n", | ||
2173 | dev->name, (dev->flags & IFF_PROMISC) ? "entered" : | ||
2174 | "left"); | ||
2175 | } | ||
2176 | } | ||
2177 | |||
2178 | /** | ||
2179 | * dev_set_allmulti - update allmulti count on a device | ||
2180 | * @dev: device | ||
2181 | * @inc: modifier | ||
2182 | * | ||
2183 | * Add or remove reception of all multicast frames to a device. While the | ||
2184 | * count in the device remains above zero the interface remains listening | ||
2185 | * to all interfaces. Once it hits zero the device reverts back to normal | ||
2186 | * filtering operation. A negative @inc value is used to drop the counter | ||
2187 | * when releasing a resource needing all multicasts. | ||
2188 | */ | ||
2189 | |||
2190 | void dev_set_allmulti(struct net_device *dev, int inc) | ||
2191 | { | ||
2192 | unsigned short old_flags = dev->flags; | ||
2193 | |||
2194 | dev->flags |= IFF_ALLMULTI; | ||
2195 | if ((dev->allmulti += inc) == 0) | ||
2196 | dev->flags &= ~IFF_ALLMULTI; | ||
2197 | if (dev->flags ^ old_flags) | ||
2198 | dev_mc_upload(dev); | ||
2199 | } | ||
2200 | |||
2201 | unsigned dev_get_flags(const struct net_device *dev) | ||
2202 | { | ||
2203 | unsigned flags; | ||
2204 | |||
2205 | flags = (dev->flags & ~(IFF_PROMISC | | ||
2206 | IFF_ALLMULTI | | ||
2207 | IFF_RUNNING)) | | ||
2208 | (dev->gflags & (IFF_PROMISC | | ||
2209 | IFF_ALLMULTI)); | ||
2210 | |||
2211 | if (netif_running(dev) && netif_carrier_ok(dev)) | ||
2212 | flags |= IFF_RUNNING; | ||
2213 | |||
2214 | return flags; | ||
2215 | } | ||
2216 | |||
2217 | int dev_change_flags(struct net_device *dev, unsigned flags) | ||
2218 | { | ||
2219 | int ret; | ||
2220 | int old_flags = dev->flags; | ||
2221 | |||
2222 | /* | ||
2223 | * Set the flags on our device. | ||
2224 | */ | ||
2225 | |||
2226 | dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | | ||
2227 | IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | | ||
2228 | IFF_AUTOMEDIA)) | | ||
2229 | (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | | ||
2230 | IFF_ALLMULTI)); | ||
2231 | |||
2232 | /* | ||
2233 | * Load in the correct multicast list now the flags have changed. | ||
2234 | */ | ||
2235 | |||
2236 | dev_mc_upload(dev); | ||
2237 | |||
2238 | /* | ||
2239 | * Have we downed the interface. We handle IFF_UP ourselves | ||
2240 | * according to user attempts to set it, rather than blindly | ||
2241 | * setting it. | ||
2242 | */ | ||
2243 | |||
2244 | ret = 0; | ||
2245 | if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ | ||
2246 | ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); | ||
2247 | |||
2248 | if (!ret) | ||
2249 | dev_mc_upload(dev); | ||
2250 | } | ||
2251 | |||
2252 | if (dev->flags & IFF_UP && | ||
2253 | ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | | ||
2254 | IFF_VOLATILE))) | ||
2255 | notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); | ||
2256 | |||
2257 | if ((flags ^ dev->gflags) & IFF_PROMISC) { | ||
2258 | int inc = (flags & IFF_PROMISC) ? +1 : -1; | ||
2259 | dev->gflags ^= IFF_PROMISC; | ||
2260 | dev_set_promiscuity(dev, inc); | ||
2261 | } | ||
2262 | |||
2263 | /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI | ||
2264 | is important. Some (broken) drivers set IFF_PROMISC, when | ||
2265 | IFF_ALLMULTI is requested not asking us and not reporting. | ||
2266 | */ | ||
2267 | if ((flags ^ dev->gflags) & IFF_ALLMULTI) { | ||
2268 | int inc = (flags & IFF_ALLMULTI) ? +1 : -1; | ||
2269 | dev->gflags ^= IFF_ALLMULTI; | ||
2270 | dev_set_allmulti(dev, inc); | ||
2271 | } | ||
2272 | |||
2273 | if (old_flags ^ dev->flags) | ||
2274 | rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); | ||
2275 | |||
2276 | return ret; | ||
2277 | } | ||
2278 | |||
2279 | int dev_set_mtu(struct net_device *dev, int new_mtu) | ||
2280 | { | ||
2281 | int err; | ||
2282 | |||
2283 | if (new_mtu == dev->mtu) | ||
2284 | return 0; | ||
2285 | |||
2286 | /* MTU must be positive. */ | ||
2287 | if (new_mtu < 0) | ||
2288 | return -EINVAL; | ||
2289 | |||
2290 | if (!netif_device_present(dev)) | ||
2291 | return -ENODEV; | ||
2292 | |||
2293 | err = 0; | ||
2294 | if (dev->change_mtu) | ||
2295 | err = dev->change_mtu(dev, new_mtu); | ||
2296 | else | ||
2297 | dev->mtu = new_mtu; | ||
2298 | if (!err && dev->flags & IFF_UP) | ||
2299 | notifier_call_chain(&netdev_chain, | ||
2300 | NETDEV_CHANGEMTU, dev); | ||
2301 | return err; | ||
2302 | } | ||
2303 | |||
2304 | int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | ||
2305 | { | ||
2306 | int err; | ||
2307 | |||
2308 | if (!dev->set_mac_address) | ||
2309 | return -EOPNOTSUPP; | ||
2310 | if (sa->sa_family != dev->type) | ||
2311 | return -EINVAL; | ||
2312 | if (!netif_device_present(dev)) | ||
2313 | return -ENODEV; | ||
2314 | err = dev->set_mac_address(dev, sa); | ||
2315 | if (!err) | ||
2316 | notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); | ||
2317 | return err; | ||
2318 | } | ||
2319 | |||
2320 | /* | ||
2321 | * Perform the SIOCxIFxxx calls. | ||
2322 | */ | ||
2323 | static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) | ||
2324 | { | ||
2325 | int err; | ||
2326 | struct net_device *dev = __dev_get_by_name(ifr->ifr_name); | ||
2327 | |||
2328 | if (!dev) | ||
2329 | return -ENODEV; | ||
2330 | |||
2331 | switch (cmd) { | ||
2332 | case SIOCGIFFLAGS: /* Get interface flags */ | ||
2333 | ifr->ifr_flags = dev_get_flags(dev); | ||
2334 | return 0; | ||
2335 | |||
2336 | case SIOCSIFFLAGS: /* Set interface flags */ | ||
2337 | return dev_change_flags(dev, ifr->ifr_flags); | ||
2338 | |||
2339 | case SIOCGIFMETRIC: /* Get the metric on the interface | ||
2340 | (currently unused) */ | ||
2341 | ifr->ifr_metric = 0; | ||
2342 | return 0; | ||
2343 | |||
2344 | case SIOCSIFMETRIC: /* Set the metric on the interface | ||
2345 | (currently unused) */ | ||
2346 | return -EOPNOTSUPP; | ||
2347 | |||
2348 | case SIOCGIFMTU: /* Get the MTU of a device */ | ||
2349 | ifr->ifr_mtu = dev->mtu; | ||
2350 | return 0; | ||
2351 | |||
2352 | case SIOCSIFMTU: /* Set the MTU of a device */ | ||
2353 | return dev_set_mtu(dev, ifr->ifr_mtu); | ||
2354 | |||
2355 | case SIOCGIFHWADDR: | ||
2356 | if (!dev->addr_len) | ||
2357 | memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); | ||
2358 | else | ||
2359 | memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, | ||
2360 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | ||
2361 | ifr->ifr_hwaddr.sa_family = dev->type; | ||
2362 | return 0; | ||
2363 | |||
2364 | case SIOCSIFHWADDR: | ||
2365 | return dev_set_mac_address(dev, &ifr->ifr_hwaddr); | ||
2366 | |||
2367 | case SIOCSIFHWBROADCAST: | ||
2368 | if (ifr->ifr_hwaddr.sa_family != dev->type) | ||
2369 | return -EINVAL; | ||
2370 | memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, | ||
2371 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | ||
2372 | notifier_call_chain(&netdev_chain, | ||
2373 | NETDEV_CHANGEADDR, dev); | ||
2374 | return 0; | ||
2375 | |||
2376 | case SIOCGIFMAP: | ||
2377 | ifr->ifr_map.mem_start = dev->mem_start; | ||
2378 | ifr->ifr_map.mem_end = dev->mem_end; | ||
2379 | ifr->ifr_map.base_addr = dev->base_addr; | ||
2380 | ifr->ifr_map.irq = dev->irq; | ||
2381 | ifr->ifr_map.dma = dev->dma; | ||
2382 | ifr->ifr_map.port = dev->if_port; | ||
2383 | return 0; | ||
2384 | |||
2385 | case SIOCSIFMAP: | ||
2386 | if (dev->set_config) { | ||
2387 | if (!netif_device_present(dev)) | ||
2388 | return -ENODEV; | ||
2389 | return dev->set_config(dev, &ifr->ifr_map); | ||
2390 | } | ||
2391 | return -EOPNOTSUPP; | ||
2392 | |||
2393 | case SIOCADDMULTI: | ||
2394 | if (!dev->set_multicast_list || | ||
2395 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | ||
2396 | return -EINVAL; | ||
2397 | if (!netif_device_present(dev)) | ||
2398 | return -ENODEV; | ||
2399 | return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, | ||
2400 | dev->addr_len, 1); | ||
2401 | |||
2402 | case SIOCDELMULTI: | ||
2403 | if (!dev->set_multicast_list || | ||
2404 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | ||
2405 | return -EINVAL; | ||
2406 | if (!netif_device_present(dev)) | ||
2407 | return -ENODEV; | ||
2408 | return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, | ||
2409 | dev->addr_len, 1); | ||
2410 | |||
2411 | case SIOCGIFINDEX: | ||
2412 | ifr->ifr_ifindex = dev->ifindex; | ||
2413 | return 0; | ||
2414 | |||
2415 | case SIOCGIFTXQLEN: | ||
2416 | ifr->ifr_qlen = dev->tx_queue_len; | ||
2417 | return 0; | ||
2418 | |||
2419 | case SIOCSIFTXQLEN: | ||
2420 | if (ifr->ifr_qlen < 0) | ||
2421 | return -EINVAL; | ||
2422 | dev->tx_queue_len = ifr->ifr_qlen; | ||
2423 | return 0; | ||
2424 | |||
2425 | case SIOCSIFNAME: | ||
2426 | ifr->ifr_newname[IFNAMSIZ-1] = '\0'; | ||
2427 | return dev_change_name(dev, ifr->ifr_newname); | ||
2428 | |||
2429 | /* | ||
2430 | * Unknown or private ioctl | ||
2431 | */ | ||
2432 | |||
2433 | default: | ||
2434 | if ((cmd >= SIOCDEVPRIVATE && | ||
2435 | cmd <= SIOCDEVPRIVATE + 15) || | ||
2436 | cmd == SIOCBONDENSLAVE || | ||
2437 | cmd == SIOCBONDRELEASE || | ||
2438 | cmd == SIOCBONDSETHWADDR || | ||
2439 | cmd == SIOCBONDSLAVEINFOQUERY || | ||
2440 | cmd == SIOCBONDINFOQUERY || | ||
2441 | cmd == SIOCBONDCHANGEACTIVE || | ||
2442 | cmd == SIOCGMIIPHY || | ||
2443 | cmd == SIOCGMIIREG || | ||
2444 | cmd == SIOCSMIIREG || | ||
2445 | cmd == SIOCBRADDIF || | ||
2446 | cmd == SIOCBRDELIF || | ||
2447 | cmd == SIOCWANDEV) { | ||
2448 | err = -EOPNOTSUPP; | ||
2449 | if (dev->do_ioctl) { | ||
2450 | if (netif_device_present(dev)) | ||
2451 | err = dev->do_ioctl(dev, ifr, | ||
2452 | cmd); | ||
2453 | else | ||
2454 | err = -ENODEV; | ||
2455 | } | ||
2456 | } else | ||
2457 | err = -EINVAL; | ||
2458 | |||
2459 | } | ||
2460 | return err; | ||
2461 | } | ||
2462 | |||
2463 | /* | ||
2464 | * This function handles all "interface"-type I/O control requests. The actual | ||
2465 | * 'doing' part of this is dev_ifsioc above. | ||
2466 | */ | ||
2467 | |||
2468 | /** | ||
2469 | * dev_ioctl - network device ioctl | ||
2470 | * @cmd: command to issue | ||
2471 | * @arg: pointer to a struct ifreq in user space | ||
2472 | * | ||
2473 | * Issue ioctl functions to devices. This is normally called by the | ||
2474 | * user space syscall interfaces but can sometimes be useful for | ||
2475 | * other purposes. The return value is the return from the syscall if | ||
2476 | * positive or a negative errno code on error. | ||
2477 | */ | ||
2478 | |||
2479 | int dev_ioctl(unsigned int cmd, void __user *arg) | ||
2480 | { | ||
2481 | struct ifreq ifr; | ||
2482 | int ret; | ||
2483 | char *colon; | ||
2484 | |||
2485 | /* One special case: SIOCGIFCONF takes ifconf argument | ||
2486 | and requires shared lock, because it sleeps writing | ||
2487 | to user space. | ||
2488 | */ | ||
2489 | |||
2490 | if (cmd == SIOCGIFCONF) { | ||
2491 | rtnl_shlock(); | ||
2492 | ret = dev_ifconf((char __user *) arg); | ||
2493 | rtnl_shunlock(); | ||
2494 | return ret; | ||
2495 | } | ||
2496 | if (cmd == SIOCGIFNAME) | ||
2497 | return dev_ifname((struct ifreq __user *)arg); | ||
2498 | |||
2499 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | ||
2500 | return -EFAULT; | ||
2501 | |||
2502 | ifr.ifr_name[IFNAMSIZ-1] = 0; | ||
2503 | |||
2504 | colon = strchr(ifr.ifr_name, ':'); | ||
2505 | if (colon) | ||
2506 | *colon = 0; | ||
2507 | |||
2508 | /* | ||
2509 | * See which interface the caller is talking about. | ||
2510 | */ | ||
2511 | |||
2512 | switch (cmd) { | ||
2513 | /* | ||
2514 | * These ioctl calls: | ||
2515 | * - can be done by all. | ||
2516 | * - atomic and do not require locking. | ||
2517 | * - return a value | ||
2518 | */ | ||
2519 | case SIOCGIFFLAGS: | ||
2520 | case SIOCGIFMETRIC: | ||
2521 | case SIOCGIFMTU: | ||
2522 | case SIOCGIFHWADDR: | ||
2523 | case SIOCGIFSLAVE: | ||
2524 | case SIOCGIFMAP: | ||
2525 | case SIOCGIFINDEX: | ||
2526 | case SIOCGIFTXQLEN: | ||
2527 | dev_load(ifr.ifr_name); | ||
2528 | read_lock(&dev_base_lock); | ||
2529 | ret = dev_ifsioc(&ifr, cmd); | ||
2530 | read_unlock(&dev_base_lock); | ||
2531 | if (!ret) { | ||
2532 | if (colon) | ||
2533 | *colon = ':'; | ||
2534 | if (copy_to_user(arg, &ifr, | ||
2535 | sizeof(struct ifreq))) | ||
2536 | ret = -EFAULT; | ||
2537 | } | ||
2538 | return ret; | ||
2539 | |||
2540 | case SIOCETHTOOL: | ||
2541 | dev_load(ifr.ifr_name); | ||
2542 | rtnl_lock(); | ||
2543 | ret = dev_ethtool(&ifr); | ||
2544 | rtnl_unlock(); | ||
2545 | if (!ret) { | ||
2546 | if (colon) | ||
2547 | *colon = ':'; | ||
2548 | if (copy_to_user(arg, &ifr, | ||
2549 | sizeof(struct ifreq))) | ||
2550 | ret = -EFAULT; | ||
2551 | } | ||
2552 | return ret; | ||
2553 | |||
2554 | /* | ||
2555 | * These ioctl calls: | ||
2556 | * - require superuser power. | ||
2557 | * - require strict serialization. | ||
2558 | * - return a value | ||
2559 | */ | ||
2560 | case SIOCGMIIPHY: | ||
2561 | case SIOCGMIIREG: | ||
2562 | case SIOCSIFNAME: | ||
2563 | if (!capable(CAP_NET_ADMIN)) | ||
2564 | return -EPERM; | ||
2565 | dev_load(ifr.ifr_name); | ||
2566 | rtnl_lock(); | ||
2567 | ret = dev_ifsioc(&ifr, cmd); | ||
2568 | rtnl_unlock(); | ||
2569 | if (!ret) { | ||
2570 | if (colon) | ||
2571 | *colon = ':'; | ||
2572 | if (copy_to_user(arg, &ifr, | ||
2573 | sizeof(struct ifreq))) | ||
2574 | ret = -EFAULT; | ||
2575 | } | ||
2576 | return ret; | ||
2577 | |||
2578 | /* | ||
2579 | * These ioctl calls: | ||
2580 | * - require superuser power. | ||
2581 | * - require strict serialization. | ||
2582 | * - do not return a value | ||
2583 | */ | ||
2584 | case SIOCSIFFLAGS: | ||
2585 | case SIOCSIFMETRIC: | ||
2586 | case SIOCSIFMTU: | ||
2587 | case SIOCSIFMAP: | ||
2588 | case SIOCSIFHWADDR: | ||
2589 | case SIOCSIFSLAVE: | ||
2590 | case SIOCADDMULTI: | ||
2591 | case SIOCDELMULTI: | ||
2592 | case SIOCSIFHWBROADCAST: | ||
2593 | case SIOCSIFTXQLEN: | ||
2594 | case SIOCSMIIREG: | ||
2595 | case SIOCBONDENSLAVE: | ||
2596 | case SIOCBONDRELEASE: | ||
2597 | case SIOCBONDSETHWADDR: | ||
2598 | case SIOCBONDSLAVEINFOQUERY: | ||
2599 | case SIOCBONDINFOQUERY: | ||
2600 | case SIOCBONDCHANGEACTIVE: | ||
2601 | case SIOCBRADDIF: | ||
2602 | case SIOCBRDELIF: | ||
2603 | if (!capable(CAP_NET_ADMIN)) | ||
2604 | return -EPERM; | ||
2605 | dev_load(ifr.ifr_name); | ||
2606 | rtnl_lock(); | ||
2607 | ret = dev_ifsioc(&ifr, cmd); | ||
2608 | rtnl_unlock(); | ||
2609 | return ret; | ||
2610 | |||
2611 | case SIOCGIFMEM: | ||
2612 | /* Get the per device memory space. We can add this but | ||
2613 | * currently do not support it */ | ||
2614 | case SIOCSIFMEM: | ||
2615 | /* Set the per device memory buffer space. | ||
2616 | * Not applicable in our case */ | ||
2617 | case SIOCSIFLINK: | ||
2618 | return -EINVAL; | ||
2619 | |||
2620 | /* | ||
2621 | * Unknown or private ioctl. | ||
2622 | */ | ||
2623 | default: | ||
2624 | if (cmd == SIOCWANDEV || | ||
2625 | (cmd >= SIOCDEVPRIVATE && | ||
2626 | cmd <= SIOCDEVPRIVATE + 15)) { | ||
2627 | dev_load(ifr.ifr_name); | ||
2628 | rtnl_lock(); | ||
2629 | ret = dev_ifsioc(&ifr, cmd); | ||
2630 | rtnl_unlock(); | ||
2631 | if (!ret && copy_to_user(arg, &ifr, | ||
2632 | sizeof(struct ifreq))) | ||
2633 | ret = -EFAULT; | ||
2634 | return ret; | ||
2635 | } | ||
2636 | #ifdef WIRELESS_EXT | ||
2637 | /* Take care of Wireless Extensions */ | ||
2638 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { | ||
2639 | /* If command is `set a parameter', or | ||
2640 | * `get the encoding parameters', check if | ||
2641 | * the user has the right to do it */ | ||
2642 | if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) { | ||
2643 | if (!capable(CAP_NET_ADMIN)) | ||
2644 | return -EPERM; | ||
2645 | } | ||
2646 | dev_load(ifr.ifr_name); | ||
2647 | rtnl_lock(); | ||
2648 | /* Follow me in net/core/wireless.c */ | ||
2649 | ret = wireless_process_ioctl(&ifr, cmd); | ||
2650 | rtnl_unlock(); | ||
2651 | if (IW_IS_GET(cmd) && | ||
2652 | copy_to_user(arg, &ifr, | ||
2653 | sizeof(struct ifreq))) | ||
2654 | ret = -EFAULT; | ||
2655 | return ret; | ||
2656 | } | ||
2657 | #endif /* WIRELESS_EXT */ | ||
2658 | return -EINVAL; | ||
2659 | } | ||
2660 | } | ||
2661 | |||
2662 | |||
2663 | /** | ||
2664 | * dev_new_index - allocate an ifindex | ||
2665 | * | ||
2666 | * Returns a suitable unique value for a new device interface | ||
2667 | * number. The caller must hold the rtnl semaphore or the | ||
2668 | * dev_base_lock to be sure it remains unique. | ||
2669 | */ | ||
2670 | static int dev_new_index(void) | ||
2671 | { | ||
2672 | static int ifindex; | ||
2673 | for (;;) { | ||
2674 | if (++ifindex <= 0) | ||
2675 | ifindex = 1; | ||
2676 | if (!__dev_get_by_index(ifindex)) | ||
2677 | return ifindex; | ||
2678 | } | ||
2679 | } | ||
2680 | |||
2681 | static int dev_boot_phase = 1; | ||
2682 | |||
2683 | /* Delayed registration/unregisteration */ | ||
2684 | static DEFINE_SPINLOCK(net_todo_list_lock); | ||
2685 | static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); | ||
2686 | |||
2687 | static inline void net_set_todo(struct net_device *dev) | ||
2688 | { | ||
2689 | spin_lock(&net_todo_list_lock); | ||
2690 | list_add_tail(&dev->todo_list, &net_todo_list); | ||
2691 | spin_unlock(&net_todo_list_lock); | ||
2692 | } | ||
2693 | |||
2694 | /** | ||
2695 | * register_netdevice - register a network device | ||
2696 | * @dev: device to register | ||
2697 | * | ||
2698 | * Take a completed network device structure and add it to the kernel | ||
2699 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier | ||
2700 | * chain. 0 is returned on success. A negative errno code is returned | ||
2701 | * on a failure to set up the device, or if the name is a duplicate. | ||
2702 | * | ||
2703 | * Callers must hold the rtnl semaphore. You may want | ||
2704 | * register_netdev() instead of this. | ||
2705 | * | ||
2706 | * BUGS: | ||
2707 | * The locking appears insufficient to guarantee two parallel registers | ||
2708 | * will not get the same name. | ||
2709 | */ | ||
2710 | |||
2711 | int register_netdevice(struct net_device *dev) | ||
2712 | { | ||
2713 | struct hlist_head *head; | ||
2714 | struct hlist_node *p; | ||
2715 | int ret; | ||
2716 | |||
2717 | BUG_ON(dev_boot_phase); | ||
2718 | ASSERT_RTNL(); | ||
2719 | |||
2720 | /* When net_device's are persistent, this will be fatal. */ | ||
2721 | BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); | ||
2722 | |||
2723 | spin_lock_init(&dev->queue_lock); | ||
2724 | spin_lock_init(&dev->xmit_lock); | ||
2725 | dev->xmit_lock_owner = -1; | ||
2726 | #ifdef CONFIG_NET_CLS_ACT | ||
2727 | spin_lock_init(&dev->ingress_lock); | ||
2728 | #endif | ||
2729 | |||
2730 | ret = alloc_divert_blk(dev); | ||
2731 | if (ret) | ||
2732 | goto out; | ||
2733 | |||
2734 | dev->iflink = -1; | ||
2735 | |||
2736 | /* Init, if this function is available */ | ||
2737 | if (dev->init) { | ||
2738 | ret = dev->init(dev); | ||
2739 | if (ret) { | ||
2740 | if (ret > 0) | ||
2741 | ret = -EIO; | ||
2742 | goto out_err; | ||
2743 | } | ||
2744 | } | ||
2745 | |||
2746 | if (!dev_valid_name(dev->name)) { | ||
2747 | ret = -EINVAL; | ||
2748 | goto out_err; | ||
2749 | } | ||
2750 | |||
2751 | dev->ifindex = dev_new_index(); | ||
2752 | if (dev->iflink == -1) | ||
2753 | dev->iflink = dev->ifindex; | ||
2754 | |||
2755 | /* Check for existence of name */ | ||
2756 | head = dev_name_hash(dev->name); | ||
2757 | hlist_for_each(p, head) { | ||
2758 | struct net_device *d | ||
2759 | = hlist_entry(p, struct net_device, name_hlist); | ||
2760 | if (!strncmp(d->name, dev->name, IFNAMSIZ)) { | ||
2761 | ret = -EEXIST; | ||
2762 | goto out_err; | ||
2763 | } | ||
2764 | } | ||
2765 | |||
2766 | /* Fix illegal SG+CSUM combinations. */ | ||
2767 | if ((dev->features & NETIF_F_SG) && | ||
2768 | !(dev->features & (NETIF_F_IP_CSUM | | ||
2769 | NETIF_F_NO_CSUM | | ||
2770 | NETIF_F_HW_CSUM))) { | ||
2771 | printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", | ||
2772 | dev->name); | ||
2773 | dev->features &= ~NETIF_F_SG; | ||
2774 | } | ||
2775 | |||
2776 | /* TSO requires that SG is present as well. */ | ||
2777 | if ((dev->features & NETIF_F_TSO) && | ||
2778 | !(dev->features & NETIF_F_SG)) { | ||
2779 | printk("%s: Dropping NETIF_F_TSO since no SG feature.\n", | ||
2780 | dev->name); | ||
2781 | dev->features &= ~NETIF_F_TSO; | ||
2782 | } | ||
2783 | |||
2784 | /* | ||
2785 | * nil rebuild_header routine, | ||
2786 | * that should be never called and used as just bug trap. | ||
2787 | */ | ||
2788 | |||
2789 | if (!dev->rebuild_header) | ||
2790 | dev->rebuild_header = default_rebuild_header; | ||
2791 | |||
2792 | /* | ||
2793 | * Default initial state at registry is that the | ||
2794 | * device is present. | ||
2795 | */ | ||
2796 | |||
2797 | set_bit(__LINK_STATE_PRESENT, &dev->state); | ||
2798 | |||
2799 | dev->next = NULL; | ||
2800 | dev_init_scheduler(dev); | ||
2801 | write_lock_bh(&dev_base_lock); | ||
2802 | *dev_tail = dev; | ||
2803 | dev_tail = &dev->next; | ||
2804 | hlist_add_head(&dev->name_hlist, head); | ||
2805 | hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); | ||
2806 | dev_hold(dev); | ||
2807 | dev->reg_state = NETREG_REGISTERING; | ||
2808 | write_unlock_bh(&dev_base_lock); | ||
2809 | |||
2810 | /* Notify protocols, that a new device appeared. */ | ||
2811 | notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); | ||
2812 | |||
2813 | /* Finish registration after unlock */ | ||
2814 | net_set_todo(dev); | ||
2815 | ret = 0; | ||
2816 | |||
2817 | out: | ||
2818 | return ret; | ||
2819 | out_err: | ||
2820 | free_divert_blk(dev); | ||
2821 | goto out; | ||
2822 | } | ||
2823 | |||
2824 | /** | ||
2825 | * register_netdev - register a network device | ||
2826 | * @dev: device to register | ||
2827 | * | ||
2828 | * Take a completed network device structure and add it to the kernel | ||
2829 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier | ||
2830 | * chain. 0 is returned on success. A negative errno code is returned | ||
2831 | * on a failure to set up the device, or if the name is a duplicate. | ||
2832 | * | ||
2833 | * This is a wrapper around register_netdev that takes the rtnl semaphore | ||
2834 | * and expands the device name if you passed a format string to | ||
2835 | * alloc_netdev. | ||
2836 | */ | ||
2837 | int register_netdev(struct net_device *dev) | ||
2838 | { | ||
2839 | int err; | ||
2840 | |||
2841 | rtnl_lock(); | ||
2842 | |||
2843 | /* | ||
2844 | * If the name is a format string the caller wants us to do a | ||
2845 | * name allocation. | ||
2846 | */ | ||
2847 | if (strchr(dev->name, '%')) { | ||
2848 | err = dev_alloc_name(dev, dev->name); | ||
2849 | if (err < 0) | ||
2850 | goto out; | ||
2851 | } | ||
2852 | |||
2853 | /* | ||
2854 | * Back compatibility hook. Kill this one in 2.5 | ||
2855 | */ | ||
2856 | if (dev->name[0] == 0 || dev->name[0] == ' ') { | ||
2857 | err = dev_alloc_name(dev, "eth%d"); | ||
2858 | if (err < 0) | ||
2859 | goto out; | ||
2860 | } | ||
2861 | |||
2862 | err = register_netdevice(dev); | ||
2863 | out: | ||
2864 | rtnl_unlock(); | ||
2865 | return err; | ||
2866 | } | ||
2867 | EXPORT_SYMBOL(register_netdev); | ||
2868 | |||
2869 | /* | ||
2870 | * netdev_wait_allrefs - wait until all references are gone. | ||
2871 | * | ||
2872 | * This is called when unregistering network devices. | ||
2873 | * | ||
2874 | * Any protocol or device that holds a reference should register | ||
2875 | * for netdevice notification, and cleanup and put back the | ||
2876 | * reference if they receive an UNREGISTER event. | ||
2877 | * We can get stuck here if buggy protocols don't correctly | ||
2878 | * call dev_put. | ||
2879 | */ | ||
2880 | static void netdev_wait_allrefs(struct net_device *dev) | ||
2881 | { | ||
2882 | unsigned long rebroadcast_time, warning_time; | ||
2883 | |||
2884 | rebroadcast_time = warning_time = jiffies; | ||
2885 | while (atomic_read(&dev->refcnt) != 0) { | ||
2886 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | ||
2887 | rtnl_shlock(); | ||
2888 | |||
2889 | /* Rebroadcast unregister notification */ | ||
2890 | notifier_call_chain(&netdev_chain, | ||
2891 | NETDEV_UNREGISTER, dev); | ||
2892 | |||
2893 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, | ||
2894 | &dev->state)) { | ||
2895 | /* We must not have linkwatch events | ||
2896 | * pending on unregister. If this | ||
2897 | * happens, we simply run the queue | ||
2898 | * unscheduled, resulting in a noop | ||
2899 | * for this device. | ||
2900 | */ | ||
2901 | linkwatch_run_queue(); | ||
2902 | } | ||
2903 | |||
2904 | rtnl_shunlock(); | ||
2905 | |||
2906 | rebroadcast_time = jiffies; | ||
2907 | } | ||
2908 | |||
2909 | msleep(250); | ||
2910 | |||
2911 | if (time_after(jiffies, warning_time + 10 * HZ)) { | ||
2912 | printk(KERN_EMERG "unregister_netdevice: " | ||
2913 | "waiting for %s to become free. Usage " | ||
2914 | "count = %d\n", | ||
2915 | dev->name, atomic_read(&dev->refcnt)); | ||
2916 | warning_time = jiffies; | ||
2917 | } | ||
2918 | } | ||
2919 | } | ||
2920 | |||
2921 | /* The sequence is: | ||
2922 | * | ||
2923 | * rtnl_lock(); | ||
2924 | * ... | ||
2925 | * register_netdevice(x1); | ||
2926 | * register_netdevice(x2); | ||
2927 | * ... | ||
2928 | * unregister_netdevice(y1); | ||
2929 | * unregister_netdevice(y2); | ||
2930 | * ... | ||
2931 | * rtnl_unlock(); | ||
2932 | * free_netdev(y1); | ||
2933 | * free_netdev(y2); | ||
2934 | * | ||
2935 | * We are invoked by rtnl_unlock() after it drops the semaphore. | ||
2936 | * This allows us to deal with problems: | ||
2937 | * 1) We can create/delete sysfs objects which invoke hotplug | ||
2938 | * without deadlocking with linkwatch via keventd. | ||
2939 | * 2) Since we run with the RTNL semaphore not held, we can sleep | ||
2940 | * safely in order to wait for the netdev refcnt to drop to zero. | ||
2941 | */ | ||
2942 | static DECLARE_MUTEX(net_todo_run_mutex); | ||
2943 | void netdev_run_todo(void) | ||
2944 | { | ||
2945 | struct list_head list = LIST_HEAD_INIT(list); | ||
2946 | int err; | ||
2947 | |||
2948 | |||
2949 | /* Need to guard against multiple cpu's getting out of order. */ | ||
2950 | down(&net_todo_run_mutex); | ||
2951 | |||
2952 | /* Not safe to do outside the semaphore. We must not return | ||
2953 | * until all unregister events invoked by the local processor | ||
2954 | * have been completed (either by this todo run, or one on | ||
2955 | * another cpu). | ||
2956 | */ | ||
2957 | if (list_empty(&net_todo_list)) | ||
2958 | goto out; | ||
2959 | |||
2960 | /* Snapshot list, allow later requests */ | ||
2961 | spin_lock(&net_todo_list_lock); | ||
2962 | list_splice_init(&net_todo_list, &list); | ||
2963 | spin_unlock(&net_todo_list_lock); | ||
2964 | |||
2965 | while (!list_empty(&list)) { | ||
2966 | struct net_device *dev | ||
2967 | = list_entry(list.next, struct net_device, todo_list); | ||
2968 | list_del(&dev->todo_list); | ||
2969 | |||
2970 | switch(dev->reg_state) { | ||
2971 | case NETREG_REGISTERING: | ||
2972 | err = netdev_register_sysfs(dev); | ||
2973 | if (err) | ||
2974 | printk(KERN_ERR "%s: failed sysfs registration (%d)\n", | ||
2975 | dev->name, err); | ||
2976 | dev->reg_state = NETREG_REGISTERED; | ||
2977 | break; | ||
2978 | |||
2979 | case NETREG_UNREGISTERING: | ||
2980 | netdev_unregister_sysfs(dev); | ||
2981 | dev->reg_state = NETREG_UNREGISTERED; | ||
2982 | |||
2983 | netdev_wait_allrefs(dev); | ||
2984 | |||
2985 | /* paranoia */ | ||
2986 | BUG_ON(atomic_read(&dev->refcnt)); | ||
2987 | BUG_TRAP(!dev->ip_ptr); | ||
2988 | BUG_TRAP(!dev->ip6_ptr); | ||
2989 | BUG_TRAP(!dev->dn_ptr); | ||
2990 | |||
2991 | |||
2992 | /* It must be the very last action, | ||
2993 | * after this 'dev' may point to freed up memory. | ||
2994 | */ | ||
2995 | if (dev->destructor) | ||
2996 | dev->destructor(dev); | ||
2997 | break; | ||
2998 | |||
2999 | default: | ||
3000 | printk(KERN_ERR "network todo '%s' but state %d\n", | ||
3001 | dev->name, dev->reg_state); | ||
3002 | break; | ||
3003 | } | ||
3004 | } | ||
3005 | |||
3006 | out: | ||
3007 | up(&net_todo_run_mutex); | ||
3008 | } | ||
3009 | |||
3010 | /** | ||
3011 | * alloc_netdev - allocate network device | ||
3012 | * @sizeof_priv: size of private data to allocate space for | ||
3013 | * @name: device name format string | ||
3014 | * @setup: callback to initialize device | ||
3015 | * | ||
3016 | * Allocates a struct net_device with private data area for driver use | ||
3017 | * and performs basic initialization. | ||
3018 | */ | ||
3019 | struct net_device *alloc_netdev(int sizeof_priv, const char *name, | ||
3020 | void (*setup)(struct net_device *)) | ||
3021 | { | ||
3022 | void *p; | ||
3023 | struct net_device *dev; | ||
3024 | int alloc_size; | ||
3025 | |||
3026 | /* ensure 32-byte alignment of both the device and private area */ | ||
3027 | alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; | ||
3028 | alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; | ||
3029 | |||
3030 | p = kmalloc(alloc_size, GFP_KERNEL); | ||
3031 | if (!p) { | ||
3032 | printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); | ||
3033 | return NULL; | ||
3034 | } | ||
3035 | memset(p, 0, alloc_size); | ||
3036 | |||
3037 | dev = (struct net_device *) | ||
3038 | (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); | ||
3039 | dev->padded = (char *)dev - (char *)p; | ||
3040 | |||
3041 | if (sizeof_priv) | ||
3042 | dev->priv = netdev_priv(dev); | ||
3043 | |||
3044 | setup(dev); | ||
3045 | strcpy(dev->name, name); | ||
3046 | return dev; | ||
3047 | } | ||
3048 | EXPORT_SYMBOL(alloc_netdev); | ||
3049 | |||
3050 | /** | ||
3051 | * free_netdev - free network device | ||
3052 | * @dev: device | ||
3053 | * | ||
3054 | * This function does the last stage of destroying an allocated device | ||
3055 | * interface. The reference to the device object is released. | ||
3056 | * If this is the last reference then it will be freed. | ||
3057 | */ | ||
3058 | void free_netdev(struct net_device *dev) | ||
3059 | { | ||
3060 | #ifdef CONFIG_SYSFS | ||
3061 | /* Compatiablity with error handling in drivers */ | ||
3062 | if (dev->reg_state == NETREG_UNINITIALIZED) { | ||
3063 | kfree((char *)dev - dev->padded); | ||
3064 | return; | ||
3065 | } | ||
3066 | |||
3067 | BUG_ON(dev->reg_state != NETREG_UNREGISTERED); | ||
3068 | dev->reg_state = NETREG_RELEASED; | ||
3069 | |||
3070 | /* will free via class release */ | ||
3071 | class_device_put(&dev->class_dev); | ||
3072 | #else | ||
3073 | kfree((char *)dev - dev->padded); | ||
3074 | #endif | ||
3075 | } | ||
3076 | |||
3077 | /* Synchronize with packet receive processing. */ | ||
3078 | void synchronize_net(void) | ||
3079 | { | ||
3080 | might_sleep(); | ||
3081 | synchronize_kernel(); | ||
3082 | } | ||
3083 | |||
3084 | /** | ||
3085 | * unregister_netdevice - remove device from the kernel | ||
3086 | * @dev: device | ||
3087 | * | ||
3088 | * This function shuts down a device interface and removes it | ||
3089 | * from the kernel tables. On success 0 is returned, on a failure | ||
3090 | * a negative errno code is returned. | ||
3091 | * | ||
3092 | * Callers must hold the rtnl semaphore. You may want | ||
3093 | * unregister_netdev() instead of this. | ||
3094 | */ | ||
3095 | |||
3096 | int unregister_netdevice(struct net_device *dev) | ||
3097 | { | ||
3098 | struct net_device *d, **dp; | ||
3099 | |||
3100 | BUG_ON(dev_boot_phase); | ||
3101 | ASSERT_RTNL(); | ||
3102 | |||
3103 | /* Some devices call without registering for initialization unwind. */ | ||
3104 | if (dev->reg_state == NETREG_UNINITIALIZED) { | ||
3105 | printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " | ||
3106 | "was registered\n", dev->name, dev); | ||
3107 | return -ENODEV; | ||
3108 | } | ||
3109 | |||
3110 | BUG_ON(dev->reg_state != NETREG_REGISTERED); | ||
3111 | |||
3112 | /* If device is running, close it first. */ | ||
3113 | if (dev->flags & IFF_UP) | ||
3114 | dev_close(dev); | ||
3115 | |||
3116 | /* And unlink it from device chain. */ | ||
3117 | for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { | ||
3118 | if (d == dev) { | ||
3119 | write_lock_bh(&dev_base_lock); | ||
3120 | hlist_del(&dev->name_hlist); | ||
3121 | hlist_del(&dev->index_hlist); | ||
3122 | if (dev_tail == &dev->next) | ||
3123 | dev_tail = dp; | ||
3124 | *dp = d->next; | ||
3125 | write_unlock_bh(&dev_base_lock); | ||
3126 | break; | ||
3127 | } | ||
3128 | } | ||
3129 | if (!d) { | ||
3130 | printk(KERN_ERR "unregister net_device: '%s' not found\n", | ||
3131 | dev->name); | ||
3132 | return -ENODEV; | ||
3133 | } | ||
3134 | |||
3135 | dev->reg_state = NETREG_UNREGISTERING; | ||
3136 | |||
3137 | synchronize_net(); | ||
3138 | |||
3139 | /* Shutdown queueing discipline. */ | ||
3140 | dev_shutdown(dev); | ||
3141 | |||
3142 | |||
3143 | /* Notify protocols, that we are about to destroy | ||
3144 | this device. They should clean all the things. | ||
3145 | */ | ||
3146 | notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); | ||
3147 | |||
3148 | /* | ||
3149 | * Flush the multicast chain | ||
3150 | */ | ||
3151 | dev_mc_discard(dev); | ||
3152 | |||
3153 | if (dev->uninit) | ||
3154 | dev->uninit(dev); | ||
3155 | |||
3156 | /* Notifier chain MUST detach us from master device. */ | ||
3157 | BUG_TRAP(!dev->master); | ||
3158 | |||
3159 | free_divert_blk(dev); | ||
3160 | |||
3161 | /* Finish processing unregister after unlock */ | ||
3162 | net_set_todo(dev); | ||
3163 | |||
3164 | synchronize_net(); | ||
3165 | |||
3166 | dev_put(dev); | ||
3167 | return 0; | ||
3168 | } | ||
3169 | |||
3170 | /** | ||
3171 | * unregister_netdev - remove device from the kernel | ||
3172 | * @dev: device | ||
3173 | * | ||
3174 | * This function shuts down a device interface and removes it | ||
3175 | * from the kernel tables. On success 0 is returned, on a failure | ||
3176 | * a negative errno code is returned. | ||
3177 | * | ||
3178 | * This is just a wrapper for unregister_netdevice that takes | ||
3179 | * the rtnl semaphore. In general you want to use this and not | ||
3180 | * unregister_netdevice. | ||
3181 | */ | ||
3182 | void unregister_netdev(struct net_device *dev) | ||
3183 | { | ||
3184 | rtnl_lock(); | ||
3185 | unregister_netdevice(dev); | ||
3186 | rtnl_unlock(); | ||
3187 | } | ||
3188 | |||
3189 | EXPORT_SYMBOL(unregister_netdev); | ||
3190 | |||
3191 | #ifdef CONFIG_HOTPLUG_CPU | ||
3192 | static int dev_cpu_callback(struct notifier_block *nfb, | ||
3193 | unsigned long action, | ||
3194 | void *ocpu) | ||
3195 | { | ||
3196 | struct sk_buff **list_skb; | ||
3197 | struct net_device **list_net; | ||
3198 | struct sk_buff *skb; | ||
3199 | unsigned int cpu, oldcpu = (unsigned long)ocpu; | ||
3200 | struct softnet_data *sd, *oldsd; | ||
3201 | |||
3202 | if (action != CPU_DEAD) | ||
3203 | return NOTIFY_OK; | ||
3204 | |||
3205 | local_irq_disable(); | ||
3206 | cpu = smp_processor_id(); | ||
3207 | sd = &per_cpu(softnet_data, cpu); | ||
3208 | oldsd = &per_cpu(softnet_data, oldcpu); | ||
3209 | |||
3210 | /* Find end of our completion_queue. */ | ||
3211 | list_skb = &sd->completion_queue; | ||
3212 | while (*list_skb) | ||
3213 | list_skb = &(*list_skb)->next; | ||
3214 | /* Append completion queue from offline CPU. */ | ||
3215 | *list_skb = oldsd->completion_queue; | ||
3216 | oldsd->completion_queue = NULL; | ||
3217 | |||
3218 | /* Find end of our output_queue. */ | ||
3219 | list_net = &sd->output_queue; | ||
3220 | while (*list_net) | ||
3221 | list_net = &(*list_net)->next_sched; | ||
3222 | /* Append output queue from offline CPU. */ | ||
3223 | *list_net = oldsd->output_queue; | ||
3224 | oldsd->output_queue = NULL; | ||
3225 | |||
3226 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | ||
3227 | local_irq_enable(); | ||
3228 | |||
3229 | /* Process offline CPU's input_pkt_queue */ | ||
3230 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) | ||
3231 | netif_rx(skb); | ||
3232 | |||
3233 | return NOTIFY_OK; | ||
3234 | } | ||
3235 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
3236 | |||
3237 | |||
3238 | /* | ||
3239 | * Initialize the DEV module. At boot time this walks the device list and | ||
3240 | * unhooks any devices that fail to initialise (normally hardware not | ||
3241 | * present) and leaves us with a valid list of present and active devices. | ||
3242 | * | ||
3243 | */ | ||
3244 | |||
3245 | /* | ||
3246 | * This is called single threaded during boot, so no need | ||
3247 | * to take the rtnl semaphore. | ||
3248 | */ | ||
3249 | static int __init net_dev_init(void) | ||
3250 | { | ||
3251 | int i, rc = -ENOMEM; | ||
3252 | |||
3253 | BUG_ON(!dev_boot_phase); | ||
3254 | |||
3255 | net_random_init(); | ||
3256 | |||
3257 | if (dev_proc_init()) | ||
3258 | goto out; | ||
3259 | |||
3260 | if (netdev_sysfs_init()) | ||
3261 | goto out; | ||
3262 | |||
3263 | INIT_LIST_HEAD(&ptype_all); | ||
3264 | for (i = 0; i < 16; i++) | ||
3265 | INIT_LIST_HEAD(&ptype_base[i]); | ||
3266 | |||
3267 | for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) | ||
3268 | INIT_HLIST_HEAD(&dev_name_head[i]); | ||
3269 | |||
3270 | for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) | ||
3271 | INIT_HLIST_HEAD(&dev_index_head[i]); | ||
3272 | |||
3273 | /* | ||
3274 | * Initialise the packet receive queues. | ||
3275 | */ | ||
3276 | |||
3277 | for (i = 0; i < NR_CPUS; i++) { | ||
3278 | struct softnet_data *queue; | ||
3279 | |||
3280 | queue = &per_cpu(softnet_data, i); | ||
3281 | skb_queue_head_init(&queue->input_pkt_queue); | ||
3282 | queue->throttle = 0; | ||
3283 | queue->cng_level = 0; | ||
3284 | queue->avg_blog = 10; /* arbitrary non-zero */ | ||
3285 | queue->completion_queue = NULL; | ||
3286 | INIT_LIST_HEAD(&queue->poll_list); | ||
3287 | set_bit(__LINK_STATE_START, &queue->backlog_dev.state); | ||
3288 | queue->backlog_dev.weight = weight_p; | ||
3289 | queue->backlog_dev.poll = process_backlog; | ||
3290 | atomic_set(&queue->backlog_dev.refcnt, 1); | ||
3291 | } | ||
3292 | |||
3293 | #ifdef OFFLINE_SAMPLE | ||
3294 | samp_timer.expires = jiffies + (10 * HZ); | ||
3295 | add_timer(&samp_timer); | ||
3296 | #endif | ||
3297 | |||
3298 | dev_boot_phase = 0; | ||
3299 | |||
3300 | open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); | ||
3301 | open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); | ||
3302 | |||
3303 | hotcpu_notifier(dev_cpu_callback, 0); | ||
3304 | dst_init(); | ||
3305 | dev_mcast_init(); | ||
3306 | rc = 0; | ||
3307 | out: | ||
3308 | return rc; | ||
3309 | } | ||
3310 | |||
3311 | subsys_initcall(net_dev_init); | ||
3312 | |||
3313 | EXPORT_SYMBOL(__dev_get_by_index); | ||
3314 | EXPORT_SYMBOL(__dev_get_by_name); | ||
3315 | EXPORT_SYMBOL(__dev_remove_pack); | ||
3316 | EXPORT_SYMBOL(__skb_linearize); | ||
3317 | EXPORT_SYMBOL(dev_add_pack); | ||
3318 | EXPORT_SYMBOL(dev_alloc_name); | ||
3319 | EXPORT_SYMBOL(dev_close); | ||
3320 | EXPORT_SYMBOL(dev_get_by_flags); | ||
3321 | EXPORT_SYMBOL(dev_get_by_index); | ||
3322 | EXPORT_SYMBOL(dev_get_by_name); | ||
3323 | EXPORT_SYMBOL(dev_ioctl); | ||
3324 | EXPORT_SYMBOL(dev_open); | ||
3325 | EXPORT_SYMBOL(dev_queue_xmit); | ||
3326 | EXPORT_SYMBOL(dev_remove_pack); | ||
3327 | EXPORT_SYMBOL(dev_set_allmulti); | ||
3328 | EXPORT_SYMBOL(dev_set_promiscuity); | ||
3329 | EXPORT_SYMBOL(dev_change_flags); | ||
3330 | EXPORT_SYMBOL(dev_set_mtu); | ||
3331 | EXPORT_SYMBOL(dev_set_mac_address); | ||
3332 | EXPORT_SYMBOL(free_netdev); | ||
3333 | EXPORT_SYMBOL(netdev_boot_setup_check); | ||
3334 | EXPORT_SYMBOL(netdev_set_master); | ||
3335 | EXPORT_SYMBOL(netdev_state_change); | ||
3336 | EXPORT_SYMBOL(netif_receive_skb); | ||
3337 | EXPORT_SYMBOL(netif_rx); | ||
3338 | EXPORT_SYMBOL(register_gifconf); | ||
3339 | EXPORT_SYMBOL(register_netdevice); | ||
3340 | EXPORT_SYMBOL(register_netdevice_notifier); | ||
3341 | EXPORT_SYMBOL(skb_checksum_help); | ||
3342 | EXPORT_SYMBOL(synchronize_net); | ||
3343 | EXPORT_SYMBOL(unregister_netdevice); | ||
3344 | EXPORT_SYMBOL(unregister_netdevice_notifier); | ||
3345 | EXPORT_SYMBOL(net_enable_timestamp); | ||
3346 | EXPORT_SYMBOL(net_disable_timestamp); | ||
3347 | EXPORT_SYMBOL(dev_get_flags); | ||
3348 | |||
3349 | #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) | ||
3350 | EXPORT_SYMBOL(br_handle_frame_hook); | ||
3351 | EXPORT_SYMBOL(br_fdb_get_hook); | ||
3352 | EXPORT_SYMBOL(br_fdb_put_hook); | ||
3353 | #endif | ||
3354 | |||
3355 | #ifdef CONFIG_KMOD | ||
3356 | EXPORT_SYMBOL(dev_load); | ||
3357 | #endif | ||
3358 | |||
3359 | EXPORT_PER_CPU_SYMBOL(softnet_data); | ||
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c new file mode 100644 index 000000000000..db098ff3cd6a --- /dev/null +++ b/net/core/dev_mcast.c | |||
@@ -0,0 +1,299 @@ | |||
1 | /* | ||
2 | * Linux NET3: Multicast List maintenance. | ||
3 | * | ||
4 | * Authors: | ||
5 | * Tim Kordas <tjk@nostromo.eeap.cwru.edu> | ||
6 | * Richard Underwood <richard@wuzz.demon.co.uk> | ||
7 | * | ||
8 | * Stir fried together from the IP multicast and CAP patches above | ||
9 | * Alan Cox <Alan.Cox@linux.org> | ||
10 | * | ||
11 | * Fixes: | ||
12 | * Alan Cox : Update the device on a real delete | ||
13 | * rather than any time but... | ||
14 | * Alan Cox : IFF_ALLMULTI support. | ||
15 | * Alan Cox : New format set_multicast_list() calls. | ||
16 | * Gleb Natapov : Remove dev_mc_lock. | ||
17 | * | ||
18 | * This program is free software; you can redistribute it and/or | ||
19 | * modify it under the terms of the GNU General Public License | ||
20 | * as published by the Free Software Foundation; either version | ||
21 | * 2 of the License, or (at your option) any later version. | ||
22 | */ | ||
23 | |||
24 | #include <linux/config.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <asm/uaccess.h> | ||
27 | #include <asm/system.h> | ||
28 | #include <linux/bitops.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/string.h> | ||
33 | #include <linux/mm.h> | ||
34 | #include <linux/socket.h> | ||
35 | #include <linux/sockios.h> | ||
36 | #include <linux/in.h> | ||
37 | #include <linux/errno.h> | ||
38 | #include <linux/interrupt.h> | ||
39 | #include <linux/if_ether.h> | ||
40 | #include <linux/inet.h> | ||
41 | #include <linux/netdevice.h> | ||
42 | #include <linux/etherdevice.h> | ||
43 | #include <linux/proc_fs.h> | ||
44 | #include <linux/seq_file.h> | ||
45 | #include <linux/init.h> | ||
46 | #include <net/ip.h> | ||
47 | #include <net/route.h> | ||
48 | #include <linux/skbuff.h> | ||
49 | #include <net/sock.h> | ||
50 | #include <net/arp.h> | ||
51 | |||
52 | |||
53 | /* | ||
54 | * Device multicast list maintenance. | ||
55 | * | ||
56 | * This is used both by IP and by the user level maintenance functions. | ||
57 | * Unlike BSD we maintain a usage count on a given multicast address so | ||
58 | * that a casual user application can add/delete multicasts used by | ||
59 | * protocols without doing damage to the protocols when it deletes the | ||
60 | * entries. It also helps IP as it tracks overlapping maps. | ||
61 | * | ||
62 | * Device mc lists are changed by bh at least if IPv6 is enabled, | ||
63 | * so that it must be bh protected. | ||
64 | * | ||
65 | * We block accesses to device mc filters with dev->xmit_lock. | ||
66 | */ | ||
67 | |||
68 | /* | ||
69 | * Update the multicast list into the physical NIC controller. | ||
70 | */ | ||
71 | |||
72 | static void __dev_mc_upload(struct net_device *dev) | ||
73 | { | ||
74 | /* Don't do anything till we up the interface | ||
75 | * [dev_open will call this function so the list will | ||
76 | * stay sane] | ||
77 | */ | ||
78 | |||
79 | if (!(dev->flags&IFF_UP)) | ||
80 | return; | ||
81 | |||
82 | /* | ||
83 | * Devices with no set multicast or which have been | ||
84 | * detached don't get set. | ||
85 | */ | ||
86 | |||
87 | if (dev->set_multicast_list == NULL || | ||
88 | !netif_device_present(dev)) | ||
89 | return; | ||
90 | |||
91 | dev->set_multicast_list(dev); | ||
92 | } | ||
93 | |||
94 | void dev_mc_upload(struct net_device *dev) | ||
95 | { | ||
96 | spin_lock_bh(&dev->xmit_lock); | ||
97 | __dev_mc_upload(dev); | ||
98 | spin_unlock_bh(&dev->xmit_lock); | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Delete a device level multicast | ||
103 | */ | ||
104 | |||
105 | int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) | ||
106 | { | ||
107 | int err = 0; | ||
108 | struct dev_mc_list *dmi, **dmip; | ||
109 | |||
110 | spin_lock_bh(&dev->xmit_lock); | ||
111 | |||
112 | for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { | ||
113 | /* | ||
114 | * Find the entry we want to delete. The device could | ||
115 | * have variable length entries so check these too. | ||
116 | */ | ||
117 | if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && | ||
118 | alen == dmi->dmi_addrlen) { | ||
119 | if (glbl) { | ||
120 | int old_glbl = dmi->dmi_gusers; | ||
121 | dmi->dmi_gusers = 0; | ||
122 | if (old_glbl == 0) | ||
123 | break; | ||
124 | } | ||
125 | if (--dmi->dmi_users) | ||
126 | goto done; | ||
127 | |||
128 | /* | ||
129 | * Last user. So delete the entry. | ||
130 | */ | ||
131 | *dmip = dmi->next; | ||
132 | dev->mc_count--; | ||
133 | |||
134 | kfree(dmi); | ||
135 | |||
136 | /* | ||
137 | * We have altered the list, so the card | ||
138 | * loaded filter is now wrong. Fix it | ||
139 | */ | ||
140 | __dev_mc_upload(dev); | ||
141 | |||
142 | spin_unlock_bh(&dev->xmit_lock); | ||
143 | return 0; | ||
144 | } | ||
145 | } | ||
146 | err = -ENOENT; | ||
147 | done: | ||
148 | spin_unlock_bh(&dev->xmit_lock); | ||
149 | return err; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Add a device level multicast | ||
154 | */ | ||
155 | |||
156 | int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) | ||
157 | { | ||
158 | int err = 0; | ||
159 | struct dev_mc_list *dmi, *dmi1; | ||
160 | |||
161 | dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC); | ||
162 | |||
163 | spin_lock_bh(&dev->xmit_lock); | ||
164 | for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { | ||
165 | if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && | ||
166 | dmi->dmi_addrlen == alen) { | ||
167 | if (glbl) { | ||
168 | int old_glbl = dmi->dmi_gusers; | ||
169 | dmi->dmi_gusers = 1; | ||
170 | if (old_glbl) | ||
171 | goto done; | ||
172 | } | ||
173 | dmi->dmi_users++; | ||
174 | goto done; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | if ((dmi = dmi1) == NULL) { | ||
179 | spin_unlock_bh(&dev->xmit_lock); | ||
180 | return -ENOMEM; | ||
181 | } | ||
182 | memcpy(dmi->dmi_addr, addr, alen); | ||
183 | dmi->dmi_addrlen = alen; | ||
184 | dmi->next = dev->mc_list; | ||
185 | dmi->dmi_users = 1; | ||
186 | dmi->dmi_gusers = glbl ? 1 : 0; | ||
187 | dev->mc_list = dmi; | ||
188 | dev->mc_count++; | ||
189 | |||
190 | __dev_mc_upload(dev); | ||
191 | |||
192 | spin_unlock_bh(&dev->xmit_lock); | ||
193 | return 0; | ||
194 | |||
195 | done: | ||
196 | spin_unlock_bh(&dev->xmit_lock); | ||
197 | if (dmi1) | ||
198 | kfree(dmi1); | ||
199 | return err; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * Discard multicast list when a device is downed | ||
204 | */ | ||
205 | |||
206 | void dev_mc_discard(struct net_device *dev) | ||
207 | { | ||
208 | spin_lock_bh(&dev->xmit_lock); | ||
209 | |||
210 | while (dev->mc_list != NULL) { | ||
211 | struct dev_mc_list *tmp = dev->mc_list; | ||
212 | dev->mc_list = tmp->next; | ||
213 | if (tmp->dmi_users > tmp->dmi_gusers) | ||
214 | printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); | ||
215 | kfree(tmp); | ||
216 | } | ||
217 | dev->mc_count = 0; | ||
218 | |||
219 | spin_unlock_bh(&dev->xmit_lock); | ||
220 | } | ||
221 | |||
222 | #ifdef CONFIG_PROC_FS | ||
223 | static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) | ||
224 | { | ||
225 | struct net_device *dev; | ||
226 | loff_t off = 0; | ||
227 | |||
228 | read_lock(&dev_base_lock); | ||
229 | for (dev = dev_base; dev; dev = dev->next) { | ||
230 | if (off++ == *pos) | ||
231 | return dev; | ||
232 | } | ||
233 | return NULL; | ||
234 | } | ||
235 | |||
236 | static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
237 | { | ||
238 | struct net_device *dev = v; | ||
239 | ++*pos; | ||
240 | return dev->next; | ||
241 | } | ||
242 | |||
243 | static void dev_mc_seq_stop(struct seq_file *seq, void *v) | ||
244 | { | ||
245 | read_unlock(&dev_base_lock); | ||
246 | } | ||
247 | |||
248 | |||
249 | static int dev_mc_seq_show(struct seq_file *seq, void *v) | ||
250 | { | ||
251 | struct dev_mc_list *m; | ||
252 | struct net_device *dev = v; | ||
253 | |||
254 | spin_lock_bh(&dev->xmit_lock); | ||
255 | for (m = dev->mc_list; m; m = m->next) { | ||
256 | int i; | ||
257 | |||
258 | seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, | ||
259 | dev->name, m->dmi_users, m->dmi_gusers); | ||
260 | |||
261 | for (i = 0; i < m->dmi_addrlen; i++) | ||
262 | seq_printf(seq, "%02x", m->dmi_addr[i]); | ||
263 | |||
264 | seq_putc(seq, '\n'); | ||
265 | } | ||
266 | spin_unlock_bh(&dev->xmit_lock); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | static struct seq_operations dev_mc_seq_ops = { | ||
271 | .start = dev_mc_seq_start, | ||
272 | .next = dev_mc_seq_next, | ||
273 | .stop = dev_mc_seq_stop, | ||
274 | .show = dev_mc_seq_show, | ||
275 | }; | ||
276 | |||
277 | static int dev_mc_seq_open(struct inode *inode, struct file *file) | ||
278 | { | ||
279 | return seq_open(file, &dev_mc_seq_ops); | ||
280 | } | ||
281 | |||
282 | static struct file_operations dev_mc_seq_fops = { | ||
283 | .owner = THIS_MODULE, | ||
284 | .open = dev_mc_seq_open, | ||
285 | .read = seq_read, | ||
286 | .llseek = seq_lseek, | ||
287 | .release = seq_release, | ||
288 | }; | ||
289 | |||
290 | #endif | ||
291 | |||
292 | void __init dev_mcast_init(void) | ||
293 | { | ||
294 | proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); | ||
295 | } | ||
296 | |||
297 | EXPORT_SYMBOL(dev_mc_add); | ||
298 | EXPORT_SYMBOL(dev_mc_delete); | ||
299 | EXPORT_SYMBOL(dev_mc_upload); | ||
diff --git a/net/core/dst.c b/net/core/dst.c new file mode 100644 index 000000000000..3bf6cc434814 --- /dev/null +++ b/net/core/dst.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * net/core/dst.c Protocol independent destination cache. | ||
3 | * | ||
4 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include <linux/bitops.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/netdevice.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/types.h> | ||
19 | |||
20 | #include <net/dst.h> | ||
21 | |||
22 | /* Locking strategy: | ||
23 | * 1) Garbage collection state of dead destination cache | ||
24 | * entries is protected by dst_lock. | ||
25 | * 2) GC is run only from BH context, and is the only remover | ||
26 | * of entries. | ||
27 | * 3) Entries are added to the garbage list from both BH | ||
28 | * and non-BH context, so local BH disabling is needed. | ||
29 | * 4) All operations modify state, so a spinlock is used. | ||
30 | */ | ||
31 | static struct dst_entry *dst_garbage_list; | ||
32 | #if RT_CACHE_DEBUG >= 2 | ||
33 | static atomic_t dst_total = ATOMIC_INIT(0); | ||
34 | #endif | ||
35 | static DEFINE_SPINLOCK(dst_lock); | ||
36 | |||
37 | static unsigned long dst_gc_timer_expires; | ||
38 | static unsigned long dst_gc_timer_inc = DST_GC_MAX; | ||
39 | static void dst_run_gc(unsigned long); | ||
40 | static void ___dst_free(struct dst_entry * dst); | ||
41 | |||
42 | static struct timer_list dst_gc_timer = | ||
43 | TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0); | ||
44 | |||
45 | static void dst_run_gc(unsigned long dummy) | ||
46 | { | ||
47 | int delayed = 0; | ||
48 | struct dst_entry * dst, **dstp; | ||
49 | |||
50 | if (!spin_trylock(&dst_lock)) { | ||
51 | mod_timer(&dst_gc_timer, jiffies + HZ/10); | ||
52 | return; | ||
53 | } | ||
54 | |||
55 | |||
56 | del_timer(&dst_gc_timer); | ||
57 | dstp = &dst_garbage_list; | ||
58 | while ((dst = *dstp) != NULL) { | ||
59 | if (atomic_read(&dst->__refcnt)) { | ||
60 | dstp = &dst->next; | ||
61 | delayed++; | ||
62 | continue; | ||
63 | } | ||
64 | *dstp = dst->next; | ||
65 | |||
66 | dst = dst_destroy(dst); | ||
67 | if (dst) { | ||
68 | /* NOHASH and still referenced. Unless it is already | ||
69 | * on gc list, invalidate it and add to gc list. | ||
70 | * | ||
71 | * Note: this is temporary. Actually, NOHASH dst's | ||
72 | * must be obsoleted when parent is obsoleted. | ||
73 | * But we do not have state "obsoleted, but | ||
74 | * referenced by parent", so it is right. | ||
75 | */ | ||
76 | if (dst->obsolete > 1) | ||
77 | continue; | ||
78 | |||
79 | ___dst_free(dst); | ||
80 | dst->next = *dstp; | ||
81 | *dstp = dst; | ||
82 | dstp = &dst->next; | ||
83 | } | ||
84 | } | ||
85 | if (!dst_garbage_list) { | ||
86 | dst_gc_timer_inc = DST_GC_MAX; | ||
87 | goto out; | ||
88 | } | ||
89 | if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) | ||
90 | dst_gc_timer_expires = DST_GC_MAX; | ||
91 | dst_gc_timer_inc += DST_GC_INC; | ||
92 | dst_gc_timer.expires = jiffies + dst_gc_timer_expires; | ||
93 | #if RT_CACHE_DEBUG >= 2 | ||
94 | printk("dst_total: %d/%d %ld\n", | ||
95 | atomic_read(&dst_total), delayed, dst_gc_timer_expires); | ||
96 | #endif | ||
97 | add_timer(&dst_gc_timer); | ||
98 | |||
99 | out: | ||
100 | spin_unlock(&dst_lock); | ||
101 | } | ||
102 | |||
103 | static int dst_discard_in(struct sk_buff *skb) | ||
104 | { | ||
105 | kfree_skb(skb); | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | static int dst_discard_out(struct sk_buff *skb) | ||
110 | { | ||
111 | kfree_skb(skb); | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | void * dst_alloc(struct dst_ops * ops) | ||
116 | { | ||
117 | struct dst_entry * dst; | ||
118 | |||
119 | if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { | ||
120 | if (ops->gc()) | ||
121 | return NULL; | ||
122 | } | ||
123 | dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC); | ||
124 | if (!dst) | ||
125 | return NULL; | ||
126 | memset(dst, 0, ops->entry_size); | ||
127 | atomic_set(&dst->__refcnt, 0); | ||
128 | dst->ops = ops; | ||
129 | dst->lastuse = jiffies; | ||
130 | dst->path = dst; | ||
131 | dst->input = dst_discard_in; | ||
132 | dst->output = dst_discard_out; | ||
133 | #if RT_CACHE_DEBUG >= 2 | ||
134 | atomic_inc(&dst_total); | ||
135 | #endif | ||
136 | atomic_inc(&ops->entries); | ||
137 | return dst; | ||
138 | } | ||
139 | |||
140 | static void ___dst_free(struct dst_entry * dst) | ||
141 | { | ||
142 | /* The first case (dev==NULL) is required, when | ||
143 | protocol module is unloaded. | ||
144 | */ | ||
145 | if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { | ||
146 | dst->input = dst_discard_in; | ||
147 | dst->output = dst_discard_out; | ||
148 | } | ||
149 | dst->obsolete = 2; | ||
150 | } | ||
151 | |||
152 | void __dst_free(struct dst_entry * dst) | ||
153 | { | ||
154 | spin_lock_bh(&dst_lock); | ||
155 | ___dst_free(dst); | ||
156 | dst->next = dst_garbage_list; | ||
157 | dst_garbage_list = dst; | ||
158 | if (dst_gc_timer_inc > DST_GC_INC) { | ||
159 | dst_gc_timer_inc = DST_GC_INC; | ||
160 | dst_gc_timer_expires = DST_GC_MIN; | ||
161 | mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); | ||
162 | } | ||
163 | spin_unlock_bh(&dst_lock); | ||
164 | } | ||
165 | |||
166 | struct dst_entry *dst_destroy(struct dst_entry * dst) | ||
167 | { | ||
168 | struct dst_entry *child; | ||
169 | struct neighbour *neigh; | ||
170 | struct hh_cache *hh; | ||
171 | |||
172 | smp_rmb(); | ||
173 | |||
174 | again: | ||
175 | neigh = dst->neighbour; | ||
176 | hh = dst->hh; | ||
177 | child = dst->child; | ||
178 | |||
179 | dst->hh = NULL; | ||
180 | if (hh && atomic_dec_and_test(&hh->hh_refcnt)) | ||
181 | kfree(hh); | ||
182 | |||
183 | if (neigh) { | ||
184 | dst->neighbour = NULL; | ||
185 | neigh_release(neigh); | ||
186 | } | ||
187 | |||
188 | atomic_dec(&dst->ops->entries); | ||
189 | |||
190 | if (dst->ops->destroy) | ||
191 | dst->ops->destroy(dst); | ||
192 | if (dst->dev) | ||
193 | dev_put(dst->dev); | ||
194 | #if RT_CACHE_DEBUG >= 2 | ||
195 | atomic_dec(&dst_total); | ||
196 | #endif | ||
197 | kmem_cache_free(dst->ops->kmem_cachep, dst); | ||
198 | |||
199 | dst = child; | ||
200 | if (dst) { | ||
201 | if (atomic_dec_and_test(&dst->__refcnt)) { | ||
202 | /* We were real parent of this dst, so kill child. */ | ||
203 | if (dst->flags&DST_NOHASH) | ||
204 | goto again; | ||
205 | } else { | ||
206 | /* Child is still referenced, return it for freeing. */ | ||
207 | if (dst->flags&DST_NOHASH) | ||
208 | return dst; | ||
209 | /* Child is still in his hash table */ | ||
210 | } | ||
211 | } | ||
212 | return NULL; | ||
213 | } | ||
214 | |||
215 | /* Dirty hack. We did it in 2.2 (in __dst_free), | ||
216 | * we have _very_ good reasons not to repeat | ||
217 | * this mistake in 2.3, but we have no choice | ||
218 | * now. _It_ _is_ _explicit_ _deliberate_ | ||
219 | * _race_ _condition_. | ||
220 | * | ||
221 | * Commented and originally written by Alexey. | ||
222 | */ | ||
223 | static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, | ||
224 | int unregister) | ||
225 | { | ||
226 | if (dst->ops->ifdown) | ||
227 | dst->ops->ifdown(dst, dev, unregister); | ||
228 | |||
229 | if (dev != dst->dev) | ||
230 | return; | ||
231 | |||
232 | if (!unregister) { | ||
233 | dst->input = dst_discard_in; | ||
234 | dst->output = dst_discard_out; | ||
235 | } else { | ||
236 | dst->dev = &loopback_dev; | ||
237 | dev_hold(&loopback_dev); | ||
238 | dev_put(dev); | ||
239 | if (dst->neighbour && dst->neighbour->dev == dev) { | ||
240 | dst->neighbour->dev = &loopback_dev; | ||
241 | dev_put(dev); | ||
242 | dev_hold(&loopback_dev); | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 | static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) | ||
248 | { | ||
249 | struct net_device *dev = ptr; | ||
250 | struct dst_entry *dst; | ||
251 | |||
252 | switch (event) { | ||
253 | case NETDEV_UNREGISTER: | ||
254 | case NETDEV_DOWN: | ||
255 | spin_lock_bh(&dst_lock); | ||
256 | for (dst = dst_garbage_list; dst; dst = dst->next) { | ||
257 | dst_ifdown(dst, dev, event != NETDEV_DOWN); | ||
258 | } | ||
259 | spin_unlock_bh(&dst_lock); | ||
260 | break; | ||
261 | } | ||
262 | return NOTIFY_DONE; | ||
263 | } | ||
264 | |||
265 | static struct notifier_block dst_dev_notifier = { | ||
266 | .notifier_call = dst_dev_event, | ||
267 | }; | ||
268 | |||
269 | void __init dst_init(void) | ||
270 | { | ||
271 | register_netdevice_notifier(&dst_dev_notifier); | ||
272 | } | ||
273 | |||
274 | EXPORT_SYMBOL(__dst_free); | ||
275 | EXPORT_SYMBOL(dst_alloc); | ||
276 | EXPORT_SYMBOL(dst_destroy); | ||
diff --git a/net/core/dv.c b/net/core/dv.c new file mode 100644 index 000000000000..3f25f4aa4e66 --- /dev/null +++ b/net/core/dv.c | |||
@@ -0,0 +1,548 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Generic frame diversion | ||
7 | * | ||
8 | * Authors: | ||
9 | * Benoit LOCHER: initial integration within the kernel with support for ethernet | ||
10 | * Dave Miller: improvement on the code (correctness, performance and source files) | ||
11 | * | ||
12 | */ | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/socket.h> | ||
20 | #include <linux/in.h> | ||
21 | #include <linux/inet.h> | ||
22 | #include <linux/ip.h> | ||
23 | #include <linux/udp.h> | ||
24 | #include <linux/netdevice.h> | ||
25 | #include <linux/etherdevice.h> | ||
26 | #include <linux/skbuff.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <net/dst.h> | ||
30 | #include <net/arp.h> | ||
31 | #include <net/sock.h> | ||
32 | #include <net/ipv6.h> | ||
33 | #include <net/ip.h> | ||
34 | #include <asm/uaccess.h> | ||
35 | #include <asm/system.h> | ||
36 | #include <asm/checksum.h> | ||
37 | #include <linux/divert.h> | ||
38 | #include <linux/sockios.h> | ||
39 | |||
40 | const char sysctl_divert_version[32]="0.46"; /* Current version */ | ||
41 | |||
42 | static int __init dv_init(void) | ||
43 | { | ||
44 | return 0; | ||
45 | } | ||
46 | module_init(dv_init); | ||
47 | |||
48 | /* | ||
49 | * Allocate a divert_blk for a device. This must be an ethernet nic. | ||
50 | */ | ||
51 | int alloc_divert_blk(struct net_device *dev) | ||
52 | { | ||
53 | int alloc_size = (sizeof(struct divert_blk) + 3) & ~3; | ||
54 | |||
55 | dev->divert = NULL; | ||
56 | if (dev->type == ARPHRD_ETHER) { | ||
57 | dev->divert = (struct divert_blk *) | ||
58 | kmalloc(alloc_size, GFP_KERNEL); | ||
59 | if (dev->divert == NULL) { | ||
60 | printk(KERN_INFO "divert: unable to allocate divert_blk for %s\n", | ||
61 | dev->name); | ||
62 | return -ENOMEM; | ||
63 | } | ||
64 | |||
65 | memset(dev->divert, 0, sizeof(struct divert_blk)); | ||
66 | dev_hold(dev); | ||
67 | } | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Free a divert_blk allocated by the above function, if it was | ||
74 | * allocated on that device. | ||
75 | */ | ||
76 | void free_divert_blk(struct net_device *dev) | ||
77 | { | ||
78 | if (dev->divert) { | ||
79 | kfree(dev->divert); | ||
80 | dev->divert=NULL; | ||
81 | dev_put(dev); | ||
82 | } | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * Adds a tcp/udp (source or dest) port to an array | ||
87 | */ | ||
88 | static int add_port(u16 ports[], u16 port) | ||
89 | { | ||
90 | int i; | ||
91 | |||
92 | if (port == 0) | ||
93 | return -EINVAL; | ||
94 | |||
95 | /* Storing directly in network format for performance, | ||
96 | * thanks Dave :) | ||
97 | */ | ||
98 | port = htons(port); | ||
99 | |||
100 | for (i = 0; i < MAX_DIVERT_PORTS; i++) { | ||
101 | if (ports[i] == port) | ||
102 | return -EALREADY; | ||
103 | } | ||
104 | |||
105 | for (i = 0; i < MAX_DIVERT_PORTS; i++) { | ||
106 | if (ports[i] == 0) { | ||
107 | ports[i] = port; | ||
108 | return 0; | ||
109 | } | ||
110 | } | ||
111 | |||
112 | return -ENOBUFS; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Removes a port from an array tcp/udp (source or dest) | ||
117 | */ | ||
118 | static int remove_port(u16 ports[], u16 port) | ||
119 | { | ||
120 | int i; | ||
121 | |||
122 | if (port == 0) | ||
123 | return -EINVAL; | ||
124 | |||
125 | /* Storing directly in network format for performance, | ||
126 | * thanks Dave ! | ||
127 | */ | ||
128 | port = htons(port); | ||
129 | |||
130 | for (i = 0; i < MAX_DIVERT_PORTS; i++) { | ||
131 | if (ports[i] == port) { | ||
132 | ports[i] = 0; | ||
133 | return 0; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | return -EINVAL; | ||
138 | } | ||
139 | |||
140 | /* Some basic sanity checks on the arguments passed to divert_ioctl() */ | ||
141 | static int check_args(struct divert_cf *div_cf, struct net_device **dev) | ||
142 | { | ||
143 | char devname[32]; | ||
144 | int ret; | ||
145 | |||
146 | if (dev == NULL) | ||
147 | return -EFAULT; | ||
148 | |||
149 | /* GETVERSION: all other args are unused */ | ||
150 | if (div_cf->cmd == DIVCMD_GETVERSION) | ||
151 | return 0; | ||
152 | |||
153 | /* Network device index should reasonably be between 0 and 1000 :) */ | ||
154 | if (div_cf->dev_index < 0 || div_cf->dev_index > 1000) | ||
155 | return -EINVAL; | ||
156 | |||
157 | /* Let's try to find the ifname */ | ||
158 | sprintf(devname, "eth%d", div_cf->dev_index); | ||
159 | *dev = dev_get_by_name(devname); | ||
160 | |||
161 | /* dev should NOT be null */ | ||
162 | if (*dev == NULL) | ||
163 | return -EINVAL; | ||
164 | |||
165 | ret = 0; | ||
166 | |||
167 | /* user issuing the ioctl must be a super one :) */ | ||
168 | if (!capable(CAP_SYS_ADMIN)) { | ||
169 | ret = -EPERM; | ||
170 | goto out; | ||
171 | } | ||
172 | |||
173 | /* Device must have a divert_blk member NOT null */ | ||
174 | if ((*dev)->divert == NULL) | ||
175 | ret = -EINVAL; | ||
176 | out: | ||
177 | dev_put(*dev); | ||
178 | return ret; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * control function of the diverter | ||
183 | */ | ||
184 | #if 0 | ||
185 | #define DVDBG(a) \ | ||
186 | printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a)) | ||
187 | #else | ||
188 | #define DVDBG(a) | ||
189 | #endif | ||
190 | |||
191 | int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg) | ||
192 | { | ||
193 | struct divert_cf div_cf; | ||
194 | struct divert_blk *div_blk; | ||
195 | struct net_device *dev; | ||
196 | int ret; | ||
197 | |||
198 | switch (cmd) { | ||
199 | case SIOCGIFDIVERT: | ||
200 | DVDBG("SIOCGIFDIVERT, copy_from_user"); | ||
201 | if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) | ||
202 | return -EFAULT; | ||
203 | DVDBG("before check_args"); | ||
204 | ret = check_args(&div_cf, &dev); | ||
205 | if (ret) | ||
206 | return ret; | ||
207 | DVDBG("after checkargs"); | ||
208 | div_blk = dev->divert; | ||
209 | |||
210 | DVDBG("befre switch()"); | ||
211 | switch (div_cf.cmd) { | ||
212 | case DIVCMD_GETSTATUS: | ||
213 | /* Now, just give the user the raw divert block | ||
214 | * for him to play with :) | ||
215 | */ | ||
216 | if (copy_to_user(div_cf.arg1.ptr, dev->divert, | ||
217 | sizeof(struct divert_blk))) | ||
218 | return -EFAULT; | ||
219 | break; | ||
220 | |||
221 | case DIVCMD_GETVERSION: | ||
222 | DVDBG("GETVERSION: checking ptr"); | ||
223 | if (div_cf.arg1.ptr == NULL) | ||
224 | return -EINVAL; | ||
225 | DVDBG("GETVERSION: copying data to userland"); | ||
226 | if (copy_to_user(div_cf.arg1.ptr, | ||
227 | sysctl_divert_version, 32)) | ||
228 | return -EFAULT; | ||
229 | DVDBG("GETVERSION: data copied"); | ||
230 | break; | ||
231 | |||
232 | default: | ||
233 | return -EINVAL; | ||
234 | } | ||
235 | |||
236 | break; | ||
237 | |||
238 | case SIOCSIFDIVERT: | ||
239 | if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) | ||
240 | return -EFAULT; | ||
241 | |||
242 | ret = check_args(&div_cf, &dev); | ||
243 | if (ret) | ||
244 | return ret; | ||
245 | |||
246 | div_blk = dev->divert; | ||
247 | |||
248 | switch(div_cf.cmd) { | ||
249 | case DIVCMD_RESET: | ||
250 | div_blk->divert = 0; | ||
251 | div_blk->protos = DIVERT_PROTO_NONE; | ||
252 | memset(div_blk->tcp_dst, 0, | ||
253 | MAX_DIVERT_PORTS * sizeof(u16)); | ||
254 | memset(div_blk->tcp_src, 0, | ||
255 | MAX_DIVERT_PORTS * sizeof(u16)); | ||
256 | memset(div_blk->udp_dst, 0, | ||
257 | MAX_DIVERT_PORTS * sizeof(u16)); | ||
258 | memset(div_blk->udp_src, 0, | ||
259 | MAX_DIVERT_PORTS * sizeof(u16)); | ||
260 | return 0; | ||
261 | |||
262 | case DIVCMD_DIVERT: | ||
263 | switch(div_cf.arg1.int32) { | ||
264 | case DIVARG1_ENABLE: | ||
265 | if (div_blk->divert) | ||
266 | return -EALREADY; | ||
267 | div_blk->divert = 1; | ||
268 | break; | ||
269 | |||
270 | case DIVARG1_DISABLE: | ||
271 | if (!div_blk->divert) | ||
272 | return -EALREADY; | ||
273 | div_blk->divert = 0; | ||
274 | break; | ||
275 | |||
276 | default: | ||
277 | return -EINVAL; | ||
278 | } | ||
279 | |||
280 | break; | ||
281 | |||
282 | case DIVCMD_IP: | ||
283 | switch(div_cf.arg1.int32) { | ||
284 | case DIVARG1_ENABLE: | ||
285 | if (div_blk->protos & DIVERT_PROTO_IP) | ||
286 | return -EALREADY; | ||
287 | div_blk->protos |= DIVERT_PROTO_IP; | ||
288 | break; | ||
289 | |||
290 | case DIVARG1_DISABLE: | ||
291 | if (!(div_blk->protos & DIVERT_PROTO_IP)) | ||
292 | return -EALREADY; | ||
293 | div_blk->protos &= ~DIVERT_PROTO_IP; | ||
294 | break; | ||
295 | |||
296 | default: | ||
297 | return -EINVAL; | ||
298 | } | ||
299 | |||
300 | break; | ||
301 | |||
302 | case DIVCMD_TCP: | ||
303 | switch(div_cf.arg1.int32) { | ||
304 | case DIVARG1_ENABLE: | ||
305 | if (div_blk->protos & DIVERT_PROTO_TCP) | ||
306 | return -EALREADY; | ||
307 | div_blk->protos |= DIVERT_PROTO_TCP; | ||
308 | break; | ||
309 | |||
310 | case DIVARG1_DISABLE: | ||
311 | if (!(div_blk->protos & DIVERT_PROTO_TCP)) | ||
312 | return -EALREADY; | ||
313 | div_blk->protos &= ~DIVERT_PROTO_TCP; | ||
314 | break; | ||
315 | |||
316 | default: | ||
317 | return -EINVAL; | ||
318 | } | ||
319 | |||
320 | break; | ||
321 | |||
322 | case DIVCMD_TCPDST: | ||
323 | switch(div_cf.arg1.int32) { | ||
324 | case DIVARG1_ADD: | ||
325 | return add_port(div_blk->tcp_dst, | ||
326 | div_cf.arg2.uint16); | ||
327 | |||
328 | case DIVARG1_REMOVE: | ||
329 | return remove_port(div_blk->tcp_dst, | ||
330 | div_cf.arg2.uint16); | ||
331 | |||
332 | default: | ||
333 | return -EINVAL; | ||
334 | } | ||
335 | |||
336 | break; | ||
337 | |||
338 | case DIVCMD_TCPSRC: | ||
339 | switch(div_cf.arg1.int32) { | ||
340 | case DIVARG1_ADD: | ||
341 | return add_port(div_blk->tcp_src, | ||
342 | div_cf.arg2.uint16); | ||
343 | |||
344 | case DIVARG1_REMOVE: | ||
345 | return remove_port(div_blk->tcp_src, | ||
346 | div_cf.arg2.uint16); | ||
347 | |||
348 | default: | ||
349 | return -EINVAL; | ||
350 | } | ||
351 | |||
352 | break; | ||
353 | |||
354 | case DIVCMD_UDP: | ||
355 | switch(div_cf.arg1.int32) { | ||
356 | case DIVARG1_ENABLE: | ||
357 | if (div_blk->protos & DIVERT_PROTO_UDP) | ||
358 | return -EALREADY; | ||
359 | div_blk->protos |= DIVERT_PROTO_UDP; | ||
360 | break; | ||
361 | |||
362 | case DIVARG1_DISABLE: | ||
363 | if (!(div_blk->protos & DIVERT_PROTO_UDP)) | ||
364 | return -EALREADY; | ||
365 | div_blk->protos &= ~DIVERT_PROTO_UDP; | ||
366 | break; | ||
367 | |||
368 | default: | ||
369 | return -EINVAL; | ||
370 | } | ||
371 | |||
372 | break; | ||
373 | |||
374 | case DIVCMD_UDPDST: | ||
375 | switch(div_cf.arg1.int32) { | ||
376 | case DIVARG1_ADD: | ||
377 | return add_port(div_blk->udp_dst, | ||
378 | div_cf.arg2.uint16); | ||
379 | |||
380 | case DIVARG1_REMOVE: | ||
381 | return remove_port(div_blk->udp_dst, | ||
382 | div_cf.arg2.uint16); | ||
383 | |||
384 | default: | ||
385 | return -EINVAL; | ||
386 | } | ||
387 | |||
388 | break; | ||
389 | |||
390 | case DIVCMD_UDPSRC: | ||
391 | switch(div_cf.arg1.int32) { | ||
392 | case DIVARG1_ADD: | ||
393 | return add_port(div_blk->udp_src, | ||
394 | div_cf.arg2.uint16); | ||
395 | |||
396 | case DIVARG1_REMOVE: | ||
397 | return remove_port(div_blk->udp_src, | ||
398 | div_cf.arg2.uint16); | ||
399 | |||
400 | default: | ||
401 | return -EINVAL; | ||
402 | } | ||
403 | |||
404 | break; | ||
405 | |||
406 | case DIVCMD_ICMP: | ||
407 | switch(div_cf.arg1.int32) { | ||
408 | case DIVARG1_ENABLE: | ||
409 | if (div_blk->protos & DIVERT_PROTO_ICMP) | ||
410 | return -EALREADY; | ||
411 | div_blk->protos |= DIVERT_PROTO_ICMP; | ||
412 | break; | ||
413 | |||
414 | case DIVARG1_DISABLE: | ||
415 | if (!(div_blk->protos & DIVERT_PROTO_ICMP)) | ||
416 | return -EALREADY; | ||
417 | div_blk->protos &= ~DIVERT_PROTO_ICMP; | ||
418 | break; | ||
419 | |||
420 | default: | ||
421 | return -EINVAL; | ||
422 | } | ||
423 | |||
424 | break; | ||
425 | |||
426 | default: | ||
427 | return -EINVAL; | ||
428 | } | ||
429 | |||
430 | break; | ||
431 | |||
432 | default: | ||
433 | return -EINVAL; | ||
434 | } | ||
435 | |||
436 | return 0; | ||
437 | } | ||
438 | |||
439 | |||
440 | /* | ||
441 | * Check if packet should have its dest mac address set to the box itself | ||
442 | * for diversion | ||
443 | */ | ||
444 | |||
445 | #define ETH_DIVERT_FRAME(skb) \ | ||
446 | memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \ | ||
447 | skb->pkt_type=PACKET_HOST | ||
448 | |||
449 | void divert_frame(struct sk_buff *skb) | ||
450 | { | ||
451 | struct ethhdr *eth = eth_hdr(skb); | ||
452 | struct iphdr *iph; | ||
453 | struct tcphdr *tcph; | ||
454 | struct udphdr *udph; | ||
455 | struct divert_blk *divert = skb->dev->divert; | ||
456 | int i, src, dst; | ||
457 | unsigned char *skb_data_end = skb->data + skb->len; | ||
458 | |||
459 | /* Packet is already aimed at us, return */ | ||
460 | if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN)) | ||
461 | return; | ||
462 | |||
463 | /* proto is not IP, do nothing */ | ||
464 | if (eth->h_proto != htons(ETH_P_IP)) | ||
465 | return; | ||
466 | |||
467 | /* Divert all IP frames ? */ | ||
468 | if (divert->protos & DIVERT_PROTO_IP) { | ||
469 | ETH_DIVERT_FRAME(skb); | ||
470 | return; | ||
471 | } | ||
472 | |||
473 | /* Check for possible (maliciously) malformed IP frame (thanks Dave) */ | ||
474 | iph = (struct iphdr *) skb->data; | ||
475 | if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) { | ||
476 | printk(KERN_INFO "divert: malformed IP packet !\n"); | ||
477 | return; | ||
478 | } | ||
479 | |||
480 | switch (iph->protocol) { | ||
481 | /* Divert all ICMP frames ? */ | ||
482 | case IPPROTO_ICMP: | ||
483 | if (divert->protos & DIVERT_PROTO_ICMP) { | ||
484 | ETH_DIVERT_FRAME(skb); | ||
485 | return; | ||
486 | } | ||
487 | break; | ||
488 | |||
489 | /* Divert all TCP frames ? */ | ||
490 | case IPPROTO_TCP: | ||
491 | if (divert->protos & DIVERT_PROTO_TCP) { | ||
492 | ETH_DIVERT_FRAME(skb); | ||
493 | return; | ||
494 | } | ||
495 | |||
496 | /* Check for possible (maliciously) malformed IP | ||
497 | * frame (thanx Dave) | ||
498 | */ | ||
499 | tcph = (struct tcphdr *) | ||
500 | (((unsigned char *)iph) + (iph->ihl<<2)); | ||
501 | if (((unsigned char *)(tcph+1)) >= skb_data_end) { | ||
502 | printk(KERN_INFO "divert: malformed TCP packet !\n"); | ||
503 | return; | ||
504 | } | ||
505 | |||
506 | /* Divert some tcp dst/src ports only ?*/ | ||
507 | for (i = 0; i < MAX_DIVERT_PORTS; i++) { | ||
508 | dst = divert->tcp_dst[i]; | ||
509 | src = divert->tcp_src[i]; | ||
510 | if ((dst && dst == tcph->dest) || | ||
511 | (src && src == tcph->source)) { | ||
512 | ETH_DIVERT_FRAME(skb); | ||
513 | return; | ||
514 | } | ||
515 | } | ||
516 | break; | ||
517 | |||
518 | /* Divert all UDP frames ? */ | ||
519 | case IPPROTO_UDP: | ||
520 | if (divert->protos & DIVERT_PROTO_UDP) { | ||
521 | ETH_DIVERT_FRAME(skb); | ||
522 | return; | ||
523 | } | ||
524 | |||
525 | /* Check for possible (maliciously) malformed IP | ||
526 | * packet (thanks Dave) | ||
527 | */ | ||
528 | udph = (struct udphdr *) | ||
529 | (((unsigned char *)iph) + (iph->ihl<<2)); | ||
530 | if (((unsigned char *)(udph+1)) >= skb_data_end) { | ||
531 | printk(KERN_INFO | ||
532 | "divert: malformed UDP packet !\n"); | ||
533 | return; | ||
534 | } | ||
535 | |||
536 | /* Divert some udp dst/src ports only ? */ | ||
537 | for (i = 0; i < MAX_DIVERT_PORTS; i++) { | ||
538 | dst = divert->udp_dst[i]; | ||
539 | src = divert->udp_src[i]; | ||
540 | if ((dst && dst == udph->dest) || | ||
541 | (src && src == udph->source)) { | ||
542 | ETH_DIVERT_FRAME(skb); | ||
543 | return; | ||
544 | } | ||
545 | } | ||
546 | break; | ||
547 | } | ||
548 | } | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c new file mode 100644 index 000000000000..f05fde97c43d --- /dev/null +++ b/net/core/ethtool.c | |||
@@ -0,0 +1,819 @@ | |||
1 | /* | ||
2 | * net/core/ethtool.c - Ethtool ioctl handler | ||
3 | * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> | ||
4 | * | ||
5 | * This file is where we call all the ethtool_ops commands to get | ||
6 | * the information ethtool needs. We fall back to calling do_ioctl() | ||
7 | * for drivers which haven't been converted to ethtool_ops yet. | ||
8 | * | ||
9 | * It's GPL, stupid. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/types.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/ethtool.h> | ||
16 | #include <linux/netdevice.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | |||
19 | /* | ||
20 | * Some useful ethtool_ops methods that're device independent. | ||
21 | * If we find that all drivers want to do the same thing here, | ||
22 | * we can turn these into dev_() function calls. | ||
23 | */ | ||
24 | |||
25 | u32 ethtool_op_get_link(struct net_device *dev) | ||
26 | { | ||
27 | return netif_carrier_ok(dev) ? 1 : 0; | ||
28 | } | ||
29 | |||
30 | u32 ethtool_op_get_tx_csum(struct net_device *dev) | ||
31 | { | ||
32 | return (dev->features & NETIF_F_IP_CSUM) != 0; | ||
33 | } | ||
34 | |||
35 | int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) | ||
36 | { | ||
37 | if (data) | ||
38 | dev->features |= NETIF_F_IP_CSUM; | ||
39 | else | ||
40 | dev->features &= ~NETIF_F_IP_CSUM; | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | u32 ethtool_op_get_sg(struct net_device *dev) | ||
46 | { | ||
47 | return (dev->features & NETIF_F_SG) != 0; | ||
48 | } | ||
49 | |||
50 | int ethtool_op_set_sg(struct net_device *dev, u32 data) | ||
51 | { | ||
52 | if (data) | ||
53 | dev->features |= NETIF_F_SG; | ||
54 | else | ||
55 | dev->features &= ~NETIF_F_SG; | ||
56 | |||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | u32 ethtool_op_get_tso(struct net_device *dev) | ||
61 | { | ||
62 | return (dev->features & NETIF_F_TSO) != 0; | ||
63 | } | ||
64 | |||
65 | int ethtool_op_set_tso(struct net_device *dev, u32 data) | ||
66 | { | ||
67 | if (data) | ||
68 | dev->features |= NETIF_F_TSO; | ||
69 | else | ||
70 | dev->features &= ~NETIF_F_TSO; | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | /* Handlers for each ethtool command */ | ||
76 | |||
77 | static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) | ||
78 | { | ||
79 | struct ethtool_cmd cmd = { ETHTOOL_GSET }; | ||
80 | int err; | ||
81 | |||
82 | if (!dev->ethtool_ops->get_settings) | ||
83 | return -EOPNOTSUPP; | ||
84 | |||
85 | err = dev->ethtool_ops->get_settings(dev, &cmd); | ||
86 | if (err < 0) | ||
87 | return err; | ||
88 | |||
89 | if (copy_to_user(useraddr, &cmd, sizeof(cmd))) | ||
90 | return -EFAULT; | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) | ||
95 | { | ||
96 | struct ethtool_cmd cmd; | ||
97 | |||
98 | if (!dev->ethtool_ops->set_settings) | ||
99 | return -EOPNOTSUPP; | ||
100 | |||
101 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | ||
102 | return -EFAULT; | ||
103 | |||
104 | return dev->ethtool_ops->set_settings(dev, &cmd); | ||
105 | } | ||
106 | |||
107 | static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) | ||
108 | { | ||
109 | struct ethtool_drvinfo info; | ||
110 | struct ethtool_ops *ops = dev->ethtool_ops; | ||
111 | |||
112 | if (!ops->get_drvinfo) | ||
113 | return -EOPNOTSUPP; | ||
114 | |||
115 | memset(&info, 0, sizeof(info)); | ||
116 | info.cmd = ETHTOOL_GDRVINFO; | ||
117 | ops->get_drvinfo(dev, &info); | ||
118 | |||
119 | if (ops->self_test_count) | ||
120 | info.testinfo_len = ops->self_test_count(dev); | ||
121 | if (ops->get_stats_count) | ||
122 | info.n_stats = ops->get_stats_count(dev); | ||
123 | if (ops->get_regs_len) | ||
124 | info.regdump_len = ops->get_regs_len(dev); | ||
125 | if (ops->get_eeprom_len) | ||
126 | info.eedump_len = ops->get_eeprom_len(dev); | ||
127 | |||
128 | if (copy_to_user(useraddr, &info, sizeof(info))) | ||
129 | return -EFAULT; | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) | ||
134 | { | ||
135 | struct ethtool_regs regs; | ||
136 | struct ethtool_ops *ops = dev->ethtool_ops; | ||
137 | void *regbuf; | ||
138 | int reglen, ret; | ||
139 | |||
140 | if (!ops->get_regs || !ops->get_regs_len) | ||
141 | return -EOPNOTSUPP; | ||
142 | |||
143 | if (copy_from_user(®s, useraddr, sizeof(regs))) | ||
144 | return -EFAULT; | ||
145 | |||
146 | reglen = ops->get_regs_len(dev); | ||
147 | if (regs.len > reglen) | ||
148 | regs.len = reglen; | ||
149 | |||
150 | regbuf = kmalloc(reglen, GFP_USER); | ||
151 | if (!regbuf) | ||
152 | return -ENOMEM; | ||
153 | |||
154 | ops->get_regs(dev, ®s, regbuf); | ||
155 | |||
156 | ret = -EFAULT; | ||
157 | if (copy_to_user(useraddr, ®s, sizeof(regs))) | ||
158 | goto out; | ||
159 | useraddr += offsetof(struct ethtool_regs, data); | ||
160 | if (copy_to_user(useraddr, regbuf, regs.len)) | ||
161 | goto out; | ||
162 | ret = 0; | ||
163 | |||
164 | out: | ||
165 | kfree(regbuf); | ||
166 | return ret; | ||
167 | } | ||
168 | |||
169 | static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) | ||
170 | { | ||
171 | struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; | ||
172 | |||
173 | if (!dev->ethtool_ops->get_wol) | ||
174 | return -EOPNOTSUPP; | ||
175 | |||
176 | dev->ethtool_ops->get_wol(dev, &wol); | ||
177 | |||
178 | if (copy_to_user(useraddr, &wol, sizeof(wol))) | ||
179 | return -EFAULT; | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) | ||
184 | { | ||
185 | struct ethtool_wolinfo wol; | ||
186 | |||
187 | if (!dev->ethtool_ops->set_wol) | ||
188 | return -EOPNOTSUPP; | ||
189 | |||
190 | if (copy_from_user(&wol, useraddr, sizeof(wol))) | ||
191 | return -EFAULT; | ||
192 | |||
193 | return dev->ethtool_ops->set_wol(dev, &wol); | ||
194 | } | ||
195 | |||
196 | static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr) | ||
197 | { | ||
198 | struct ethtool_value edata = { ETHTOOL_GMSGLVL }; | ||
199 | |||
200 | if (!dev->ethtool_ops->get_msglevel) | ||
201 | return -EOPNOTSUPP; | ||
202 | |||
203 | edata.data = dev->ethtool_ops->get_msglevel(dev); | ||
204 | |||
205 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
206 | return -EFAULT; | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr) | ||
211 | { | ||
212 | struct ethtool_value edata; | ||
213 | |||
214 | if (!dev->ethtool_ops->set_msglevel) | ||
215 | return -EOPNOTSUPP; | ||
216 | |||
217 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
218 | return -EFAULT; | ||
219 | |||
220 | dev->ethtool_ops->set_msglevel(dev, edata.data); | ||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static int ethtool_nway_reset(struct net_device *dev) | ||
225 | { | ||
226 | if (!dev->ethtool_ops->nway_reset) | ||
227 | return -EOPNOTSUPP; | ||
228 | |||
229 | return dev->ethtool_ops->nway_reset(dev); | ||
230 | } | ||
231 | |||
232 | static int ethtool_get_link(struct net_device *dev, void __user *useraddr) | ||
233 | { | ||
234 | struct ethtool_value edata = { ETHTOOL_GLINK }; | ||
235 | |||
236 | if (!dev->ethtool_ops->get_link) | ||
237 | return -EOPNOTSUPP; | ||
238 | |||
239 | edata.data = dev->ethtool_ops->get_link(dev); | ||
240 | |||
241 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
242 | return -EFAULT; | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) | ||
247 | { | ||
248 | struct ethtool_eeprom eeprom; | ||
249 | struct ethtool_ops *ops = dev->ethtool_ops; | ||
250 | u8 *data; | ||
251 | int ret; | ||
252 | |||
253 | if (!ops->get_eeprom || !ops->get_eeprom_len) | ||
254 | return -EOPNOTSUPP; | ||
255 | |||
256 | if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) | ||
257 | return -EFAULT; | ||
258 | |||
259 | /* Check for wrap and zero */ | ||
260 | if (eeprom.offset + eeprom.len <= eeprom.offset) | ||
261 | return -EINVAL; | ||
262 | |||
263 | /* Check for exceeding total eeprom len */ | ||
264 | if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) | ||
265 | return -EINVAL; | ||
266 | |||
267 | data = kmalloc(eeprom.len, GFP_USER); | ||
268 | if (!data) | ||
269 | return -ENOMEM; | ||
270 | |||
271 | ret = -EFAULT; | ||
272 | if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) | ||
273 | goto out; | ||
274 | |||
275 | ret = ops->get_eeprom(dev, &eeprom, data); | ||
276 | if (ret) | ||
277 | goto out; | ||
278 | |||
279 | ret = -EFAULT; | ||
280 | if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) | ||
281 | goto out; | ||
282 | if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) | ||
283 | goto out; | ||
284 | ret = 0; | ||
285 | |||
286 | out: | ||
287 | kfree(data); | ||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) | ||
292 | { | ||
293 | struct ethtool_eeprom eeprom; | ||
294 | struct ethtool_ops *ops = dev->ethtool_ops; | ||
295 | u8 *data; | ||
296 | int ret; | ||
297 | |||
298 | if (!ops->set_eeprom || !ops->get_eeprom_len) | ||
299 | return -EOPNOTSUPP; | ||
300 | |||
301 | if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) | ||
302 | return -EFAULT; | ||
303 | |||
304 | /* Check for wrap and zero */ | ||
305 | if (eeprom.offset + eeprom.len <= eeprom.offset) | ||
306 | return -EINVAL; | ||
307 | |||
308 | /* Check for exceeding total eeprom len */ | ||
309 | if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) | ||
310 | return -EINVAL; | ||
311 | |||
312 | data = kmalloc(eeprom.len, GFP_USER); | ||
313 | if (!data) | ||
314 | return -ENOMEM; | ||
315 | |||
316 | ret = -EFAULT; | ||
317 | if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) | ||
318 | goto out; | ||
319 | |||
320 | ret = ops->set_eeprom(dev, &eeprom, data); | ||
321 | if (ret) | ||
322 | goto out; | ||
323 | |||
324 | if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) | ||
325 | ret = -EFAULT; | ||
326 | |||
327 | out: | ||
328 | kfree(data); | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) | ||
333 | { | ||
334 | struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; | ||
335 | |||
336 | if (!dev->ethtool_ops->get_coalesce) | ||
337 | return -EOPNOTSUPP; | ||
338 | |||
339 | dev->ethtool_ops->get_coalesce(dev, &coalesce); | ||
340 | |||
341 | if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) | ||
342 | return -EFAULT; | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) | ||
347 | { | ||
348 | struct ethtool_coalesce coalesce; | ||
349 | |||
350 | if (!dev->ethtool_ops->get_coalesce) | ||
351 | return -EOPNOTSUPP; | ||
352 | |||
353 | if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) | ||
354 | return -EFAULT; | ||
355 | |||
356 | return dev->ethtool_ops->set_coalesce(dev, &coalesce); | ||
357 | } | ||
358 | |||
359 | static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) | ||
360 | { | ||
361 | struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; | ||
362 | |||
363 | if (!dev->ethtool_ops->get_ringparam) | ||
364 | return -EOPNOTSUPP; | ||
365 | |||
366 | dev->ethtool_ops->get_ringparam(dev, &ringparam); | ||
367 | |||
368 | if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) | ||
369 | return -EFAULT; | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) | ||
374 | { | ||
375 | struct ethtool_ringparam ringparam; | ||
376 | |||
377 | if (!dev->ethtool_ops->set_ringparam) | ||
378 | return -EOPNOTSUPP; | ||
379 | |||
380 | if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) | ||
381 | return -EFAULT; | ||
382 | |||
383 | return dev->ethtool_ops->set_ringparam(dev, &ringparam); | ||
384 | } | ||
385 | |||
386 | static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) | ||
387 | { | ||
388 | struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; | ||
389 | |||
390 | if (!dev->ethtool_ops->get_pauseparam) | ||
391 | return -EOPNOTSUPP; | ||
392 | |||
393 | dev->ethtool_ops->get_pauseparam(dev, &pauseparam); | ||
394 | |||
395 | if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) | ||
396 | return -EFAULT; | ||
397 | return 0; | ||
398 | } | ||
399 | |||
400 | static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) | ||
401 | { | ||
402 | struct ethtool_pauseparam pauseparam; | ||
403 | |||
404 | if (!dev->ethtool_ops->get_pauseparam) | ||
405 | return -EOPNOTSUPP; | ||
406 | |||
407 | if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) | ||
408 | return -EFAULT; | ||
409 | |||
410 | return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); | ||
411 | } | ||
412 | |||
413 | static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr) | ||
414 | { | ||
415 | struct ethtool_value edata = { ETHTOOL_GRXCSUM }; | ||
416 | |||
417 | if (!dev->ethtool_ops->get_rx_csum) | ||
418 | return -EOPNOTSUPP; | ||
419 | |||
420 | edata.data = dev->ethtool_ops->get_rx_csum(dev); | ||
421 | |||
422 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
423 | return -EFAULT; | ||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) | ||
428 | { | ||
429 | struct ethtool_value edata; | ||
430 | |||
431 | if (!dev->ethtool_ops->set_rx_csum) | ||
432 | return -EOPNOTSUPP; | ||
433 | |||
434 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
435 | return -EFAULT; | ||
436 | |||
437 | dev->ethtool_ops->set_rx_csum(dev, edata.data); | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr) | ||
442 | { | ||
443 | struct ethtool_value edata = { ETHTOOL_GTXCSUM }; | ||
444 | |||
445 | if (!dev->ethtool_ops->get_tx_csum) | ||
446 | return -EOPNOTSUPP; | ||
447 | |||
448 | edata.data = dev->ethtool_ops->get_tx_csum(dev); | ||
449 | |||
450 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
451 | return -EFAULT; | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | static int __ethtool_set_sg(struct net_device *dev, u32 data) | ||
456 | { | ||
457 | int err; | ||
458 | |||
459 | if (!data && dev->ethtool_ops->set_tso) { | ||
460 | err = dev->ethtool_ops->set_tso(dev, 0); | ||
461 | if (err) | ||
462 | return err; | ||
463 | } | ||
464 | |||
465 | return dev->ethtool_ops->set_sg(dev, data); | ||
466 | } | ||
467 | |||
468 | static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) | ||
469 | { | ||
470 | struct ethtool_value edata; | ||
471 | int err; | ||
472 | |||
473 | if (!dev->ethtool_ops->set_tx_csum) | ||
474 | return -EOPNOTSUPP; | ||
475 | |||
476 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
477 | return -EFAULT; | ||
478 | |||
479 | if (!edata.data && dev->ethtool_ops->set_sg) { | ||
480 | err = __ethtool_set_sg(dev, 0); | ||
481 | if (err) | ||
482 | return err; | ||
483 | } | ||
484 | |||
485 | return dev->ethtool_ops->set_tx_csum(dev, edata.data); | ||
486 | } | ||
487 | |||
488 | static int ethtool_get_sg(struct net_device *dev, char __user *useraddr) | ||
489 | { | ||
490 | struct ethtool_value edata = { ETHTOOL_GSG }; | ||
491 | |||
492 | if (!dev->ethtool_ops->get_sg) | ||
493 | return -EOPNOTSUPP; | ||
494 | |||
495 | edata.data = dev->ethtool_ops->get_sg(dev); | ||
496 | |||
497 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
498 | return -EFAULT; | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) | ||
503 | { | ||
504 | struct ethtool_value edata; | ||
505 | |||
506 | if (!dev->ethtool_ops->set_sg) | ||
507 | return -EOPNOTSUPP; | ||
508 | |||
509 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
510 | return -EFAULT; | ||
511 | |||
512 | if (edata.data && | ||
513 | !(dev->features & (NETIF_F_IP_CSUM | | ||
514 | NETIF_F_NO_CSUM | | ||
515 | NETIF_F_HW_CSUM))) | ||
516 | return -EINVAL; | ||
517 | |||
518 | return __ethtool_set_sg(dev, edata.data); | ||
519 | } | ||
520 | |||
521 | static int ethtool_get_tso(struct net_device *dev, char __user *useraddr) | ||
522 | { | ||
523 | struct ethtool_value edata = { ETHTOOL_GTSO }; | ||
524 | |||
525 | if (!dev->ethtool_ops->get_tso) | ||
526 | return -EOPNOTSUPP; | ||
527 | |||
528 | edata.data = dev->ethtool_ops->get_tso(dev); | ||
529 | |||
530 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
531 | return -EFAULT; | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) | ||
536 | { | ||
537 | struct ethtool_value edata; | ||
538 | |||
539 | if (!dev->ethtool_ops->set_tso) | ||
540 | return -EOPNOTSUPP; | ||
541 | |||
542 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
543 | return -EFAULT; | ||
544 | |||
545 | if (edata.data && !(dev->features & NETIF_F_SG)) | ||
546 | return -EINVAL; | ||
547 | |||
548 | return dev->ethtool_ops->set_tso(dev, edata.data); | ||
549 | } | ||
550 | |||
551 | static int ethtool_self_test(struct net_device *dev, char __user *useraddr) | ||
552 | { | ||
553 | struct ethtool_test test; | ||
554 | struct ethtool_ops *ops = dev->ethtool_ops; | ||
555 | u64 *data; | ||
556 | int ret; | ||
557 | |||
558 | if (!ops->self_test || !ops->self_test_count) | ||
559 | return -EOPNOTSUPP; | ||
560 | |||
561 | if (copy_from_user(&test, useraddr, sizeof(test))) | ||
562 | return -EFAULT; | ||
563 | |||
564 | test.len = ops->self_test_count(dev); | ||
565 | data = kmalloc(test.len * sizeof(u64), GFP_USER); | ||
566 | if (!data) | ||
567 | return -ENOMEM; | ||
568 | |||
569 | ops->self_test(dev, &test, data); | ||
570 | |||
571 | ret = -EFAULT; | ||
572 | if (copy_to_user(useraddr, &test, sizeof(test))) | ||
573 | goto out; | ||
574 | useraddr += sizeof(test); | ||
575 | if (copy_to_user(useraddr, data, test.len * sizeof(u64))) | ||
576 | goto out; | ||
577 | ret = 0; | ||
578 | |||
579 | out: | ||
580 | kfree(data); | ||
581 | return ret; | ||
582 | } | ||
583 | |||
584 | static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) | ||
585 | { | ||
586 | struct ethtool_gstrings gstrings; | ||
587 | struct ethtool_ops *ops = dev->ethtool_ops; | ||
588 | u8 *data; | ||
589 | int ret; | ||
590 | |||
591 | if (!ops->get_strings) | ||
592 | return -EOPNOTSUPP; | ||
593 | |||
594 | if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) | ||
595 | return -EFAULT; | ||
596 | |||
597 | switch (gstrings.string_set) { | ||
598 | case ETH_SS_TEST: | ||
599 | if (!ops->self_test_count) | ||
600 | return -EOPNOTSUPP; | ||
601 | gstrings.len = ops->self_test_count(dev); | ||
602 | break; | ||
603 | case ETH_SS_STATS: | ||
604 | if (!ops->get_stats_count) | ||
605 | return -EOPNOTSUPP; | ||
606 | gstrings.len = ops->get_stats_count(dev); | ||
607 | break; | ||
608 | default: | ||
609 | return -EINVAL; | ||
610 | } | ||
611 | |||
612 | data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); | ||
613 | if (!data) | ||
614 | return -ENOMEM; | ||
615 | |||
616 | ops->get_strings(dev, gstrings.string_set, data); | ||
617 | |||
618 | ret = -EFAULT; | ||
619 | if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) | ||
620 | goto out; | ||
621 | useraddr += sizeof(gstrings); | ||
622 | if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) | ||
623 | goto out; | ||
624 | ret = 0; | ||
625 | |||
626 | out: | ||
627 | kfree(data); | ||
628 | return ret; | ||
629 | } | ||
630 | |||
631 | static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) | ||
632 | { | ||
633 | struct ethtool_value id; | ||
634 | |||
635 | if (!dev->ethtool_ops->phys_id) | ||
636 | return -EOPNOTSUPP; | ||
637 | |||
638 | if (copy_from_user(&id, useraddr, sizeof(id))) | ||
639 | return -EFAULT; | ||
640 | |||
641 | return dev->ethtool_ops->phys_id(dev, id.data); | ||
642 | } | ||
643 | |||
644 | static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) | ||
645 | { | ||
646 | struct ethtool_stats stats; | ||
647 | struct ethtool_ops *ops = dev->ethtool_ops; | ||
648 | u64 *data; | ||
649 | int ret; | ||
650 | |||
651 | if (!ops->get_ethtool_stats || !ops->get_stats_count) | ||
652 | return -EOPNOTSUPP; | ||
653 | |||
654 | if (copy_from_user(&stats, useraddr, sizeof(stats))) | ||
655 | return -EFAULT; | ||
656 | |||
657 | stats.n_stats = ops->get_stats_count(dev); | ||
658 | data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); | ||
659 | if (!data) | ||
660 | return -ENOMEM; | ||
661 | |||
662 | ops->get_ethtool_stats(dev, &stats, data); | ||
663 | |||
664 | ret = -EFAULT; | ||
665 | if (copy_to_user(useraddr, &stats, sizeof(stats))) | ||
666 | goto out; | ||
667 | useraddr += sizeof(stats); | ||
668 | if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) | ||
669 | goto out; | ||
670 | ret = 0; | ||
671 | |||
672 | out: | ||
673 | kfree(data); | ||
674 | return ret; | ||
675 | } | ||
676 | |||
677 | /* The main entry point in this file. Called from net/core/dev.c */ | ||
678 | |||
679 | int dev_ethtool(struct ifreq *ifr) | ||
680 | { | ||
681 | struct net_device *dev = __dev_get_by_name(ifr->ifr_name); | ||
682 | void __user *useraddr = ifr->ifr_data; | ||
683 | u32 ethcmd; | ||
684 | int rc; | ||
685 | |||
686 | /* | ||
687 | * XXX: This can be pushed down into the ethtool_* handlers that | ||
688 | * need it. Keep existing behaviour for the moment. | ||
689 | */ | ||
690 | if (!capable(CAP_NET_ADMIN)) | ||
691 | return -EPERM; | ||
692 | |||
693 | if (!dev || !netif_device_present(dev)) | ||
694 | return -ENODEV; | ||
695 | |||
696 | if (!dev->ethtool_ops) | ||
697 | goto ioctl; | ||
698 | |||
699 | if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) | ||
700 | return -EFAULT; | ||
701 | |||
702 | if(dev->ethtool_ops->begin) | ||
703 | if ((rc = dev->ethtool_ops->begin(dev)) < 0) | ||
704 | return rc; | ||
705 | |||
706 | switch (ethcmd) { | ||
707 | case ETHTOOL_GSET: | ||
708 | rc = ethtool_get_settings(dev, useraddr); | ||
709 | break; | ||
710 | case ETHTOOL_SSET: | ||
711 | rc = ethtool_set_settings(dev, useraddr); | ||
712 | break; | ||
713 | case ETHTOOL_GDRVINFO: | ||
714 | rc = ethtool_get_drvinfo(dev, useraddr); | ||
715 | |||
716 | break; | ||
717 | case ETHTOOL_GREGS: | ||
718 | rc = ethtool_get_regs(dev, useraddr); | ||
719 | break; | ||
720 | case ETHTOOL_GWOL: | ||
721 | rc = ethtool_get_wol(dev, useraddr); | ||
722 | break; | ||
723 | case ETHTOOL_SWOL: | ||
724 | rc = ethtool_set_wol(dev, useraddr); | ||
725 | break; | ||
726 | case ETHTOOL_GMSGLVL: | ||
727 | rc = ethtool_get_msglevel(dev, useraddr); | ||
728 | break; | ||
729 | case ETHTOOL_SMSGLVL: | ||
730 | rc = ethtool_set_msglevel(dev, useraddr); | ||
731 | break; | ||
732 | case ETHTOOL_NWAY_RST: | ||
733 | rc = ethtool_nway_reset(dev); | ||
734 | break; | ||
735 | case ETHTOOL_GLINK: | ||
736 | rc = ethtool_get_link(dev, useraddr); | ||
737 | break; | ||
738 | case ETHTOOL_GEEPROM: | ||
739 | rc = ethtool_get_eeprom(dev, useraddr); | ||
740 | break; | ||
741 | case ETHTOOL_SEEPROM: | ||
742 | rc = ethtool_set_eeprom(dev, useraddr); | ||
743 | break; | ||
744 | case ETHTOOL_GCOALESCE: | ||
745 | rc = ethtool_get_coalesce(dev, useraddr); | ||
746 | break; | ||
747 | case ETHTOOL_SCOALESCE: | ||
748 | rc = ethtool_set_coalesce(dev, useraddr); | ||
749 | break; | ||
750 | case ETHTOOL_GRINGPARAM: | ||
751 | rc = ethtool_get_ringparam(dev, useraddr); | ||
752 | break; | ||
753 | case ETHTOOL_SRINGPARAM: | ||
754 | rc = ethtool_set_ringparam(dev, useraddr); | ||
755 | break; | ||
756 | case ETHTOOL_GPAUSEPARAM: | ||
757 | rc = ethtool_get_pauseparam(dev, useraddr); | ||
758 | break; | ||
759 | case ETHTOOL_SPAUSEPARAM: | ||
760 | rc = ethtool_set_pauseparam(dev, useraddr); | ||
761 | break; | ||
762 | case ETHTOOL_GRXCSUM: | ||
763 | rc = ethtool_get_rx_csum(dev, useraddr); | ||
764 | break; | ||
765 | case ETHTOOL_SRXCSUM: | ||
766 | rc = ethtool_set_rx_csum(dev, useraddr); | ||
767 | break; | ||
768 | case ETHTOOL_GTXCSUM: | ||
769 | rc = ethtool_get_tx_csum(dev, useraddr); | ||
770 | break; | ||
771 | case ETHTOOL_STXCSUM: | ||
772 | rc = ethtool_set_tx_csum(dev, useraddr); | ||
773 | break; | ||
774 | case ETHTOOL_GSG: | ||
775 | rc = ethtool_get_sg(dev, useraddr); | ||
776 | break; | ||
777 | case ETHTOOL_SSG: | ||
778 | rc = ethtool_set_sg(dev, useraddr); | ||
779 | break; | ||
780 | case ETHTOOL_GTSO: | ||
781 | rc = ethtool_get_tso(dev, useraddr); | ||
782 | break; | ||
783 | case ETHTOOL_STSO: | ||
784 | rc = ethtool_set_tso(dev, useraddr); | ||
785 | break; | ||
786 | case ETHTOOL_TEST: | ||
787 | rc = ethtool_self_test(dev, useraddr); | ||
788 | break; | ||
789 | case ETHTOOL_GSTRINGS: | ||
790 | rc = ethtool_get_strings(dev, useraddr); | ||
791 | break; | ||
792 | case ETHTOOL_PHYS_ID: | ||
793 | rc = ethtool_phys_id(dev, useraddr); | ||
794 | break; | ||
795 | case ETHTOOL_GSTATS: | ||
796 | rc = ethtool_get_stats(dev, useraddr); | ||
797 | break; | ||
798 | default: | ||
799 | rc = -EOPNOTSUPP; | ||
800 | } | ||
801 | |||
802 | if(dev->ethtool_ops->complete) | ||
803 | dev->ethtool_ops->complete(dev); | ||
804 | return rc; | ||
805 | |||
806 | ioctl: | ||
807 | if (dev->do_ioctl) | ||
808 | return dev->do_ioctl(dev, ifr, SIOCETHTOOL); | ||
809 | return -EOPNOTSUPP; | ||
810 | } | ||
811 | |||
812 | EXPORT_SYMBOL(dev_ethtool); | ||
813 | EXPORT_SYMBOL(ethtool_op_get_link); | ||
814 | EXPORT_SYMBOL(ethtool_op_get_sg); | ||
815 | EXPORT_SYMBOL(ethtool_op_get_tso); | ||
816 | EXPORT_SYMBOL(ethtool_op_get_tx_csum); | ||
817 | EXPORT_SYMBOL(ethtool_op_set_sg); | ||
818 | EXPORT_SYMBOL(ethtool_op_set_tso); | ||
819 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
diff --git a/net/core/filter.c b/net/core/filter.c new file mode 100644 index 000000000000..f3b88205ace2 --- /dev/null +++ b/net/core/filter.c | |||
@@ -0,0 +1,432 @@ | |||
1 | /* | ||
2 | * Linux Socket Filter - Kernel level socket filtering | ||
3 | * | ||
4 | * Author: | ||
5 | * Jay Schulist <jschlst@samba.org> | ||
6 | * | ||
7 | * Based on the design of: | ||
8 | * - The Berkeley Packet Filter | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * Andi Kleen - Fix a few bad bugs and races. | ||
16 | */ | ||
17 | |||
18 | #include <linux/module.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/mm.h> | ||
22 | #include <linux/fcntl.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/in.h> | ||
25 | #include <linux/inet.h> | ||
26 | #include <linux/netdevice.h> | ||
27 | #include <linux/if_packet.h> | ||
28 | #include <net/ip.h> | ||
29 | #include <net/protocol.h> | ||
30 | #include <linux/skbuff.h> | ||
31 | #include <net/sock.h> | ||
32 | #include <linux/errno.h> | ||
33 | #include <linux/timer.h> | ||
34 | #include <asm/system.h> | ||
35 | #include <asm/uaccess.h> | ||
36 | #include <linux/filter.h> | ||
37 | |||
38 | /* No hurry in this branch */ | ||
39 | static u8 *load_pointer(struct sk_buff *skb, int k) | ||
40 | { | ||
41 | u8 *ptr = NULL; | ||
42 | |||
43 | if (k >= SKF_NET_OFF) | ||
44 | ptr = skb->nh.raw + k - SKF_NET_OFF; | ||
45 | else if (k >= SKF_LL_OFF) | ||
46 | ptr = skb->mac.raw + k - SKF_LL_OFF; | ||
47 | |||
48 | if (ptr >= skb->head && ptr < skb->tail) | ||
49 | return ptr; | ||
50 | return NULL; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * sk_run_filter - run a filter on a socket | ||
55 | * @skb: buffer to run the filter on | ||
56 | * @filter: filter to apply | ||
57 | * @flen: length of filter | ||
58 | * | ||
59 | * Decode and apply filter instructions to the skb->data. | ||
60 | * Return length to keep, 0 for none. skb is the data we are | ||
61 | * filtering, filter is the array of filter instructions, and | ||
62 | * len is the number of filter blocks in the array. | ||
63 | */ | ||
64 | |||
65 | int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) | ||
66 | { | ||
67 | unsigned char *data = skb->data; | ||
68 | /* len is UNSIGNED. Byte wide insns relies only on implicit | ||
69 | type casts to prevent reading arbitrary memory locations. | ||
70 | */ | ||
71 | unsigned int len = skb->len-skb->data_len; | ||
72 | struct sock_filter *fentry; /* We walk down these */ | ||
73 | u32 A = 0; /* Accumulator */ | ||
74 | u32 X = 0; /* Index Register */ | ||
75 | u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ | ||
76 | int k; | ||
77 | int pc; | ||
78 | |||
79 | /* | ||
80 | * Process array of filter instructions. | ||
81 | */ | ||
82 | for (pc = 0; pc < flen; pc++) { | ||
83 | fentry = &filter[pc]; | ||
84 | |||
85 | switch (fentry->code) { | ||
86 | case BPF_ALU|BPF_ADD|BPF_X: | ||
87 | A += X; | ||
88 | continue; | ||
89 | case BPF_ALU|BPF_ADD|BPF_K: | ||
90 | A += fentry->k; | ||
91 | continue; | ||
92 | case BPF_ALU|BPF_SUB|BPF_X: | ||
93 | A -= X; | ||
94 | continue; | ||
95 | case BPF_ALU|BPF_SUB|BPF_K: | ||
96 | A -= fentry->k; | ||
97 | continue; | ||
98 | case BPF_ALU|BPF_MUL|BPF_X: | ||
99 | A *= X; | ||
100 | continue; | ||
101 | case BPF_ALU|BPF_MUL|BPF_K: | ||
102 | A *= fentry->k; | ||
103 | continue; | ||
104 | case BPF_ALU|BPF_DIV|BPF_X: | ||
105 | if (X == 0) | ||
106 | return 0; | ||
107 | A /= X; | ||
108 | continue; | ||
109 | case BPF_ALU|BPF_DIV|BPF_K: | ||
110 | if (fentry->k == 0) | ||
111 | return 0; | ||
112 | A /= fentry->k; | ||
113 | continue; | ||
114 | case BPF_ALU|BPF_AND|BPF_X: | ||
115 | A &= X; | ||
116 | continue; | ||
117 | case BPF_ALU|BPF_AND|BPF_K: | ||
118 | A &= fentry->k; | ||
119 | continue; | ||
120 | case BPF_ALU|BPF_OR|BPF_X: | ||
121 | A |= X; | ||
122 | continue; | ||
123 | case BPF_ALU|BPF_OR|BPF_K: | ||
124 | A |= fentry->k; | ||
125 | continue; | ||
126 | case BPF_ALU|BPF_LSH|BPF_X: | ||
127 | A <<= X; | ||
128 | continue; | ||
129 | case BPF_ALU|BPF_LSH|BPF_K: | ||
130 | A <<= fentry->k; | ||
131 | continue; | ||
132 | case BPF_ALU|BPF_RSH|BPF_X: | ||
133 | A >>= X; | ||
134 | continue; | ||
135 | case BPF_ALU|BPF_RSH|BPF_K: | ||
136 | A >>= fentry->k; | ||
137 | continue; | ||
138 | case BPF_ALU|BPF_NEG: | ||
139 | A = -A; | ||
140 | continue; | ||
141 | case BPF_JMP|BPF_JA: | ||
142 | pc += fentry->k; | ||
143 | continue; | ||
144 | case BPF_JMP|BPF_JGT|BPF_K: | ||
145 | pc += (A > fentry->k) ? fentry->jt : fentry->jf; | ||
146 | continue; | ||
147 | case BPF_JMP|BPF_JGE|BPF_K: | ||
148 | pc += (A >= fentry->k) ? fentry->jt : fentry->jf; | ||
149 | continue; | ||
150 | case BPF_JMP|BPF_JEQ|BPF_K: | ||
151 | pc += (A == fentry->k) ? fentry->jt : fentry->jf; | ||
152 | continue; | ||
153 | case BPF_JMP|BPF_JSET|BPF_K: | ||
154 | pc += (A & fentry->k) ? fentry->jt : fentry->jf; | ||
155 | continue; | ||
156 | case BPF_JMP|BPF_JGT|BPF_X: | ||
157 | pc += (A > X) ? fentry->jt : fentry->jf; | ||
158 | continue; | ||
159 | case BPF_JMP|BPF_JGE|BPF_X: | ||
160 | pc += (A >= X) ? fentry->jt : fentry->jf; | ||
161 | continue; | ||
162 | case BPF_JMP|BPF_JEQ|BPF_X: | ||
163 | pc += (A == X) ? fentry->jt : fentry->jf; | ||
164 | continue; | ||
165 | case BPF_JMP|BPF_JSET|BPF_X: | ||
166 | pc += (A & X) ? fentry->jt : fentry->jf; | ||
167 | continue; | ||
168 | case BPF_LD|BPF_W|BPF_ABS: | ||
169 | k = fentry->k; | ||
170 | load_w: | ||
171 | if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) { | ||
172 | A = ntohl(*(u32*)&data[k]); | ||
173 | continue; | ||
174 | } | ||
175 | if (k < 0) { | ||
176 | u8 *ptr; | ||
177 | |||
178 | if (k >= SKF_AD_OFF) | ||
179 | break; | ||
180 | ptr = load_pointer(skb, k); | ||
181 | if (ptr) { | ||
182 | A = ntohl(*(u32*)ptr); | ||
183 | continue; | ||
184 | } | ||
185 | } else { | ||
186 | u32 _tmp, *p; | ||
187 | p = skb_header_pointer(skb, k, 4, &_tmp); | ||
188 | if (p != NULL) { | ||
189 | A = ntohl(*p); | ||
190 | continue; | ||
191 | } | ||
192 | } | ||
193 | return 0; | ||
194 | case BPF_LD|BPF_H|BPF_ABS: | ||
195 | k = fentry->k; | ||
196 | load_h: | ||
197 | if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) { | ||
198 | A = ntohs(*(u16*)&data[k]); | ||
199 | continue; | ||
200 | } | ||
201 | if (k < 0) { | ||
202 | u8 *ptr; | ||
203 | |||
204 | if (k >= SKF_AD_OFF) | ||
205 | break; | ||
206 | ptr = load_pointer(skb, k); | ||
207 | if (ptr) { | ||
208 | A = ntohs(*(u16*)ptr); | ||
209 | continue; | ||
210 | } | ||
211 | } else { | ||
212 | u16 _tmp, *p; | ||
213 | p = skb_header_pointer(skb, k, 2, &_tmp); | ||
214 | if (p != NULL) { | ||
215 | A = ntohs(*p); | ||
216 | continue; | ||
217 | } | ||
218 | } | ||
219 | return 0; | ||
220 | case BPF_LD|BPF_B|BPF_ABS: | ||
221 | k = fentry->k; | ||
222 | load_b: | ||
223 | if (k >= 0 && (unsigned int)k < len) { | ||
224 | A = data[k]; | ||
225 | continue; | ||
226 | } | ||
227 | if (k < 0) { | ||
228 | u8 *ptr; | ||
229 | |||
230 | if (k >= SKF_AD_OFF) | ||
231 | break; | ||
232 | ptr = load_pointer(skb, k); | ||
233 | if (ptr) { | ||
234 | A = *ptr; | ||
235 | continue; | ||
236 | } | ||
237 | } else { | ||
238 | u8 _tmp, *p; | ||
239 | p = skb_header_pointer(skb, k, 1, &_tmp); | ||
240 | if (p != NULL) { | ||
241 | A = *p; | ||
242 | continue; | ||
243 | } | ||
244 | } | ||
245 | return 0; | ||
246 | case BPF_LD|BPF_W|BPF_LEN: | ||
247 | A = len; | ||
248 | continue; | ||
249 | case BPF_LDX|BPF_W|BPF_LEN: | ||
250 | X = len; | ||
251 | continue; | ||
252 | case BPF_LD|BPF_W|BPF_IND: | ||
253 | k = X + fentry->k; | ||
254 | goto load_w; | ||
255 | case BPF_LD|BPF_H|BPF_IND: | ||
256 | k = X + fentry->k; | ||
257 | goto load_h; | ||
258 | case BPF_LD|BPF_B|BPF_IND: | ||
259 | k = X + fentry->k; | ||
260 | goto load_b; | ||
261 | case BPF_LDX|BPF_B|BPF_MSH: | ||
262 | if (fentry->k >= len) | ||
263 | return 0; | ||
264 | X = (data[fentry->k] & 0xf) << 2; | ||
265 | continue; | ||
266 | case BPF_LD|BPF_IMM: | ||
267 | A = fentry->k; | ||
268 | continue; | ||
269 | case BPF_LDX|BPF_IMM: | ||
270 | X = fentry->k; | ||
271 | continue; | ||
272 | case BPF_LD|BPF_MEM: | ||
273 | A = mem[fentry->k]; | ||
274 | continue; | ||
275 | case BPF_LDX|BPF_MEM: | ||
276 | X = mem[fentry->k]; | ||
277 | continue; | ||
278 | case BPF_MISC|BPF_TAX: | ||
279 | X = A; | ||
280 | continue; | ||
281 | case BPF_MISC|BPF_TXA: | ||
282 | A = X; | ||
283 | continue; | ||
284 | case BPF_RET|BPF_K: | ||
285 | return ((unsigned int)fentry->k); | ||
286 | case BPF_RET|BPF_A: | ||
287 | return ((unsigned int)A); | ||
288 | case BPF_ST: | ||
289 | mem[fentry->k] = A; | ||
290 | continue; | ||
291 | case BPF_STX: | ||
292 | mem[fentry->k] = X; | ||
293 | continue; | ||
294 | default: | ||
295 | /* Invalid instruction counts as RET */ | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Handle ancillary data, which are impossible | ||
301 | * (or very difficult) to get parsing packet contents. | ||
302 | */ | ||
303 | switch (k-SKF_AD_OFF) { | ||
304 | case SKF_AD_PROTOCOL: | ||
305 | A = htons(skb->protocol); | ||
306 | continue; | ||
307 | case SKF_AD_PKTTYPE: | ||
308 | A = skb->pkt_type; | ||
309 | continue; | ||
310 | case SKF_AD_IFINDEX: | ||
311 | A = skb->dev->ifindex; | ||
312 | continue; | ||
313 | default: | ||
314 | return 0; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * sk_chk_filter - verify socket filter code | ||
323 | * @filter: filter to verify | ||
324 | * @flen: length of filter | ||
325 | * | ||
326 | * Check the user's filter code. If we let some ugly | ||
327 | * filter code slip through kaboom! The filter must contain | ||
328 | * no references or jumps that are out of range, no illegal instructions | ||
329 | * and no backward jumps. It must end with a RET instruction | ||
330 | * | ||
331 | * Returns 0 if the rule set is legal or a negative errno code if not. | ||
332 | */ | ||
333 | int sk_chk_filter(struct sock_filter *filter, int flen) | ||
334 | { | ||
335 | struct sock_filter *ftest; | ||
336 | int pc; | ||
337 | |||
338 | if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0) | ||
339 | return -EINVAL; | ||
340 | |||
341 | /* check the filter code now */ | ||
342 | for (pc = 0; pc < flen; pc++) { | ||
343 | /* all jumps are forward as they are not signed */ | ||
344 | ftest = &filter[pc]; | ||
345 | if (BPF_CLASS(ftest->code) == BPF_JMP) { | ||
346 | /* but they mustn't jump off the end */ | ||
347 | if (BPF_OP(ftest->code) == BPF_JA) { | ||
348 | /* | ||
349 | * Note, the large ftest->k might cause loops. | ||
350 | * Compare this with conditional jumps below, | ||
351 | * where offsets are limited. --ANK (981016) | ||
352 | */ | ||
353 | if (ftest->k >= (unsigned)(flen-pc-1)) | ||
354 | return -EINVAL; | ||
355 | } else { | ||
356 | /* for conditionals both must be safe */ | ||
357 | if (pc + ftest->jt +1 >= flen || | ||
358 | pc + ftest->jf +1 >= flen) | ||
359 | return -EINVAL; | ||
360 | } | ||
361 | } | ||
362 | |||
363 | /* check that memory operations use valid addresses. */ | ||
364 | if (ftest->k >= BPF_MEMWORDS) { | ||
365 | /* but it might not be a memory operation... */ | ||
366 | switch (ftest->code) { | ||
367 | case BPF_ST: | ||
368 | case BPF_STX: | ||
369 | case BPF_LD|BPF_MEM: | ||
370 | case BPF_LDX|BPF_MEM: | ||
371 | return -EINVAL; | ||
372 | } | ||
373 | } | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * The program must end with a return. We don't care where they | ||
378 | * jumped within the script (its always forwards) but in the end | ||
379 | * they _will_ hit this. | ||
380 | */ | ||
381 | return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; | ||
382 | } | ||
383 | |||
384 | /** | ||
385 | * sk_attach_filter - attach a socket filter | ||
386 | * @fprog: the filter program | ||
387 | * @sk: the socket to use | ||
388 | * | ||
389 | * Attach the user's filter code. We first run some sanity checks on | ||
390 | * it to make sure it does not explode on us later. If an error | ||
391 | * occurs or there is insufficient memory for the filter a negative | ||
392 | * errno code is returned. On success the return is zero. | ||
393 | */ | ||
394 | int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | ||
395 | { | ||
396 | struct sk_filter *fp; | ||
397 | unsigned int fsize = sizeof(struct sock_filter) * fprog->len; | ||
398 | int err; | ||
399 | |||
400 | /* Make sure new filter is there and in the right amounts. */ | ||
401 | if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS) | ||
402 | return -EINVAL; | ||
403 | |||
404 | fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); | ||
405 | if (!fp) | ||
406 | return -ENOMEM; | ||
407 | if (copy_from_user(fp->insns, fprog->filter, fsize)) { | ||
408 | sock_kfree_s(sk, fp, fsize+sizeof(*fp)); | ||
409 | return -EFAULT; | ||
410 | } | ||
411 | |||
412 | atomic_set(&fp->refcnt, 1); | ||
413 | fp->len = fprog->len; | ||
414 | |||
415 | err = sk_chk_filter(fp->insns, fp->len); | ||
416 | if (!err) { | ||
417 | struct sk_filter *old_fp; | ||
418 | |||
419 | spin_lock_bh(&sk->sk_lock.slock); | ||
420 | old_fp = sk->sk_filter; | ||
421 | sk->sk_filter = fp; | ||
422 | spin_unlock_bh(&sk->sk_lock.slock); | ||
423 | fp = old_fp; | ||
424 | } | ||
425 | |||
426 | if (fp) | ||
427 | sk_filter_release(sk, fp); | ||
428 | return err; | ||
429 | } | ||
430 | |||
431 | EXPORT_SYMBOL(sk_chk_filter); | ||
432 | EXPORT_SYMBOL(sk_run_filter); | ||
diff --git a/net/core/flow.c b/net/core/flow.c new file mode 100644 index 000000000000..f289570b15a3 --- /dev/null +++ b/net/core/flow.c | |||
@@ -0,0 +1,371 @@ | |||
1 | /* flow.c: Generic flow cache. | ||
2 | * | ||
3 | * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru) | ||
4 | * Copyright (C) 2003 David S. Miller (davem@redhat.com) | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/list.h> | ||
10 | #include <linux/jhash.h> | ||
11 | #include <linux/interrupt.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/random.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/completion.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/bitops.h> | ||
20 | #include <linux/notifier.h> | ||
21 | #include <linux/cpu.h> | ||
22 | #include <linux/cpumask.h> | ||
23 | #include <net/flow.h> | ||
24 | #include <asm/atomic.h> | ||
25 | #include <asm/semaphore.h> | ||
26 | |||
27 | struct flow_cache_entry { | ||
28 | struct flow_cache_entry *next; | ||
29 | u16 family; | ||
30 | u8 dir; | ||
31 | struct flowi key; | ||
32 | u32 genid; | ||
33 | void *object; | ||
34 | atomic_t *object_ref; | ||
35 | }; | ||
36 | |||
37 | atomic_t flow_cache_genid = ATOMIC_INIT(0); | ||
38 | |||
39 | static u32 flow_hash_shift; | ||
40 | #define flow_hash_size (1 << flow_hash_shift) | ||
41 | static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; | ||
42 | |||
43 | #define flow_table(cpu) (per_cpu(flow_tables, cpu)) | ||
44 | |||
45 | static kmem_cache_t *flow_cachep; | ||
46 | |||
47 | static int flow_lwm, flow_hwm; | ||
48 | |||
49 | struct flow_percpu_info { | ||
50 | int hash_rnd_recalc; | ||
51 | u32 hash_rnd; | ||
52 | int count; | ||
53 | } ____cacheline_aligned; | ||
54 | static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 }; | ||
55 | |||
56 | #define flow_hash_rnd_recalc(cpu) \ | ||
57 | (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) | ||
58 | #define flow_hash_rnd(cpu) \ | ||
59 | (per_cpu(flow_hash_info, cpu).hash_rnd) | ||
60 | #define flow_count(cpu) \ | ||
61 | (per_cpu(flow_hash_info, cpu).count) | ||
62 | |||
63 | static struct timer_list flow_hash_rnd_timer; | ||
64 | |||
65 | #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) | ||
66 | |||
67 | struct flow_flush_info { | ||
68 | atomic_t cpuleft; | ||
69 | struct completion completion; | ||
70 | }; | ||
71 | static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL }; | ||
72 | |||
73 | #define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) | ||
74 | |||
75 | static void flow_cache_new_hashrnd(unsigned long arg) | ||
76 | { | ||
77 | int i; | ||
78 | |||
79 | for_each_cpu(i) | ||
80 | flow_hash_rnd_recalc(i) = 1; | ||
81 | |||
82 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | ||
83 | add_timer(&flow_hash_rnd_timer); | ||
84 | } | ||
85 | |||
86 | static void __flow_cache_shrink(int cpu, int shrink_to) | ||
87 | { | ||
88 | struct flow_cache_entry *fle, **flp; | ||
89 | int i; | ||
90 | |||
91 | for (i = 0; i < flow_hash_size; i++) { | ||
92 | int k = 0; | ||
93 | |||
94 | flp = &flow_table(cpu)[i]; | ||
95 | while ((fle = *flp) != NULL && k < shrink_to) { | ||
96 | k++; | ||
97 | flp = &fle->next; | ||
98 | } | ||
99 | while ((fle = *flp) != NULL) { | ||
100 | *flp = fle->next; | ||
101 | if (fle->object) | ||
102 | atomic_dec(fle->object_ref); | ||
103 | kmem_cache_free(flow_cachep, fle); | ||
104 | flow_count(cpu)--; | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | static void flow_cache_shrink(int cpu) | ||
110 | { | ||
111 | int shrink_to = flow_lwm / flow_hash_size; | ||
112 | |||
113 | __flow_cache_shrink(cpu, shrink_to); | ||
114 | } | ||
115 | |||
116 | static void flow_new_hash_rnd(int cpu) | ||
117 | { | ||
118 | get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32)); | ||
119 | flow_hash_rnd_recalc(cpu) = 0; | ||
120 | |||
121 | __flow_cache_shrink(cpu, 0); | ||
122 | } | ||
123 | |||
124 | static u32 flow_hash_code(struct flowi *key, int cpu) | ||
125 | { | ||
126 | u32 *k = (u32 *) key; | ||
127 | |||
128 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) & | ||
129 | (flow_hash_size - 1)); | ||
130 | } | ||
131 | |||
132 | #if (BITS_PER_LONG == 64) | ||
133 | typedef u64 flow_compare_t; | ||
134 | #else | ||
135 | typedef u32 flow_compare_t; | ||
136 | #endif | ||
137 | |||
138 | extern void flowi_is_missized(void); | ||
139 | |||
140 | /* I hear what you're saying, use memcmp. But memcmp cannot make | ||
141 | * important assumptions that we can here, such as alignment and | ||
142 | * constant size. | ||
143 | */ | ||
144 | static int flow_key_compare(struct flowi *key1, struct flowi *key2) | ||
145 | { | ||
146 | flow_compare_t *k1, *k1_lim, *k2; | ||
147 | const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); | ||
148 | |||
149 | if (sizeof(struct flowi) % sizeof(flow_compare_t)) | ||
150 | flowi_is_missized(); | ||
151 | |||
152 | k1 = (flow_compare_t *) key1; | ||
153 | k1_lim = k1 + n_elem; | ||
154 | |||
155 | k2 = (flow_compare_t *) key2; | ||
156 | |||
157 | do { | ||
158 | if (*k1++ != *k2++) | ||
159 | return 1; | ||
160 | } while (k1 < k1_lim); | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, | ||
166 | flow_resolve_t resolver) | ||
167 | { | ||
168 | struct flow_cache_entry *fle, **head; | ||
169 | unsigned int hash; | ||
170 | int cpu; | ||
171 | |||
172 | local_bh_disable(); | ||
173 | cpu = smp_processor_id(); | ||
174 | |||
175 | fle = NULL; | ||
176 | /* Packet really early in init? Making flow_cache_init a | ||
177 | * pre-smp initcall would solve this. --RR */ | ||
178 | if (!flow_table(cpu)) | ||
179 | goto nocache; | ||
180 | |||
181 | if (flow_hash_rnd_recalc(cpu)) | ||
182 | flow_new_hash_rnd(cpu); | ||
183 | hash = flow_hash_code(key, cpu); | ||
184 | |||
185 | head = &flow_table(cpu)[hash]; | ||
186 | for (fle = *head; fle; fle = fle->next) { | ||
187 | if (fle->family == family && | ||
188 | fle->dir == dir && | ||
189 | flow_key_compare(key, &fle->key) == 0) { | ||
190 | if (fle->genid == atomic_read(&flow_cache_genid)) { | ||
191 | void *ret = fle->object; | ||
192 | |||
193 | if (ret) | ||
194 | atomic_inc(fle->object_ref); | ||
195 | local_bh_enable(); | ||
196 | |||
197 | return ret; | ||
198 | } | ||
199 | break; | ||
200 | } | ||
201 | } | ||
202 | |||
203 | if (!fle) { | ||
204 | if (flow_count(cpu) > flow_hwm) | ||
205 | flow_cache_shrink(cpu); | ||
206 | |||
207 | fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); | ||
208 | if (fle) { | ||
209 | fle->next = *head; | ||
210 | *head = fle; | ||
211 | fle->family = family; | ||
212 | fle->dir = dir; | ||
213 | memcpy(&fle->key, key, sizeof(*key)); | ||
214 | fle->object = NULL; | ||
215 | flow_count(cpu)++; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | nocache: | ||
220 | { | ||
221 | void *obj; | ||
222 | atomic_t *obj_ref; | ||
223 | |||
224 | resolver(key, family, dir, &obj, &obj_ref); | ||
225 | |||
226 | if (fle) { | ||
227 | fle->genid = atomic_read(&flow_cache_genid); | ||
228 | |||
229 | if (fle->object) | ||
230 | atomic_dec(fle->object_ref); | ||
231 | |||
232 | fle->object = obj; | ||
233 | fle->object_ref = obj_ref; | ||
234 | if (obj) | ||
235 | atomic_inc(fle->object_ref); | ||
236 | } | ||
237 | local_bh_enable(); | ||
238 | |||
239 | return obj; | ||
240 | } | ||
241 | } | ||
242 | |||
243 | static void flow_cache_flush_tasklet(unsigned long data) | ||
244 | { | ||
245 | struct flow_flush_info *info = (void *)data; | ||
246 | int i; | ||
247 | int cpu; | ||
248 | |||
249 | cpu = smp_processor_id(); | ||
250 | for (i = 0; i < flow_hash_size; i++) { | ||
251 | struct flow_cache_entry *fle; | ||
252 | |||
253 | fle = flow_table(cpu)[i]; | ||
254 | for (; fle; fle = fle->next) { | ||
255 | unsigned genid = atomic_read(&flow_cache_genid); | ||
256 | |||
257 | if (!fle->object || fle->genid == genid) | ||
258 | continue; | ||
259 | |||
260 | fle->object = NULL; | ||
261 | atomic_dec(fle->object_ref); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | if (atomic_dec_and_test(&info->cpuleft)) | ||
266 | complete(&info->completion); | ||
267 | } | ||
268 | |||
269 | static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__)); | ||
270 | static void flow_cache_flush_per_cpu(void *data) | ||
271 | { | ||
272 | struct flow_flush_info *info = data; | ||
273 | int cpu; | ||
274 | struct tasklet_struct *tasklet; | ||
275 | |||
276 | cpu = smp_processor_id(); | ||
277 | |||
278 | tasklet = flow_flush_tasklet(cpu); | ||
279 | tasklet->data = (unsigned long)info; | ||
280 | tasklet_schedule(tasklet); | ||
281 | } | ||
282 | |||
283 | void flow_cache_flush(void) | ||
284 | { | ||
285 | struct flow_flush_info info; | ||
286 | static DECLARE_MUTEX(flow_flush_sem); | ||
287 | |||
288 | /* Don't want cpus going down or up during this. */ | ||
289 | lock_cpu_hotplug(); | ||
290 | down(&flow_flush_sem); | ||
291 | atomic_set(&info.cpuleft, num_online_cpus()); | ||
292 | init_completion(&info.completion); | ||
293 | |||
294 | local_bh_disable(); | ||
295 | smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0); | ||
296 | flow_cache_flush_tasklet((unsigned long)&info); | ||
297 | local_bh_enable(); | ||
298 | |||
299 | wait_for_completion(&info.completion); | ||
300 | up(&flow_flush_sem); | ||
301 | unlock_cpu_hotplug(); | ||
302 | } | ||
303 | |||
304 | static void __devinit flow_cache_cpu_prepare(int cpu) | ||
305 | { | ||
306 | struct tasklet_struct *tasklet; | ||
307 | unsigned long order; | ||
308 | |||
309 | for (order = 0; | ||
310 | (PAGE_SIZE << order) < | ||
311 | (sizeof(struct flow_cache_entry *)*flow_hash_size); | ||
312 | order++) | ||
313 | /* NOTHING */; | ||
314 | |||
315 | flow_table(cpu) = (struct flow_cache_entry **) | ||
316 | __get_free_pages(GFP_KERNEL, order); | ||
317 | if (!flow_table(cpu)) | ||
318 | panic("NET: failed to allocate flow cache order %lu\n", order); | ||
319 | |||
320 | memset(flow_table(cpu), 0, PAGE_SIZE << order); | ||
321 | |||
322 | flow_hash_rnd_recalc(cpu) = 1; | ||
323 | flow_count(cpu) = 0; | ||
324 | |||
325 | tasklet = flow_flush_tasklet(cpu); | ||
326 | tasklet_init(tasklet, flow_cache_flush_tasklet, 0); | ||
327 | } | ||
328 | |||
329 | #ifdef CONFIG_HOTPLUG_CPU | ||
330 | static int flow_cache_cpu(struct notifier_block *nfb, | ||
331 | unsigned long action, | ||
332 | void *hcpu) | ||
333 | { | ||
334 | if (action == CPU_DEAD) | ||
335 | __flow_cache_shrink((unsigned long)hcpu, 0); | ||
336 | return NOTIFY_OK; | ||
337 | } | ||
338 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
339 | |||
340 | static int __init flow_cache_init(void) | ||
341 | { | ||
342 | int i; | ||
343 | |||
344 | flow_cachep = kmem_cache_create("flow_cache", | ||
345 | sizeof(struct flow_cache_entry), | ||
346 | 0, SLAB_HWCACHE_ALIGN, | ||
347 | NULL, NULL); | ||
348 | |||
349 | if (!flow_cachep) | ||
350 | panic("NET: failed to allocate flow cache slab\n"); | ||
351 | |||
352 | flow_hash_shift = 10; | ||
353 | flow_lwm = 2 * flow_hash_size; | ||
354 | flow_hwm = 4 * flow_hash_size; | ||
355 | |||
356 | init_timer(&flow_hash_rnd_timer); | ||
357 | flow_hash_rnd_timer.function = flow_cache_new_hashrnd; | ||
358 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | ||
359 | add_timer(&flow_hash_rnd_timer); | ||
360 | |||
361 | for_each_cpu(i) | ||
362 | flow_cache_cpu_prepare(i); | ||
363 | |||
364 | hotcpu_notifier(flow_cache_cpu, 0); | ||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | module_init(flow_cache_init); | ||
369 | |||
370 | EXPORT_SYMBOL(flow_cache_genid); | ||
371 | EXPORT_SYMBOL(flow_cache_lookup); | ||
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c new file mode 100644 index 000000000000..b07c029e8219 --- /dev/null +++ b/net/core/gen_estimator.c | |||
@@ -0,0 +1,250 @@ | |||
1 | /* | ||
2 | * net/sched/gen_estimator.c Simple rate estimator. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
10 | * | ||
11 | * Changes: | ||
12 | * Jamal Hadi Salim - moved it to net/core and reshulfed | ||
13 | * names to make it usable in general net subsystem. | ||
14 | */ | ||
15 | |||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/system.h> | ||
18 | #include <asm/bitops.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/jiffies.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/socket.h> | ||
26 | #include <linux/sockios.h> | ||
27 | #include <linux/in.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/interrupt.h> | ||
30 | #include <linux/netdevice.h> | ||
31 | #include <linux/skbuff.h> | ||
32 | #include <linux/rtnetlink.h> | ||
33 | #include <linux/init.h> | ||
34 | #include <net/sock.h> | ||
35 | #include <net/gen_stats.h> | ||
36 | |||
37 | /* | ||
38 | This code is NOT intended to be used for statistics collection, | ||
39 | its purpose is to provide a base for statistical multiplexing | ||
40 | for controlled load service. | ||
41 | If you need only statistics, run a user level daemon which | ||
42 | periodically reads byte counters. | ||
43 | |||
44 | Unfortunately, rate estimation is not a very easy task. | ||
45 | F.e. I did not find a simple way to estimate the current peak rate | ||
46 | and even failed to formulate the problem 8)8) | ||
47 | |||
48 | So I preferred not to built an estimator into the scheduler, | ||
49 | but run this task separately. | ||
50 | Ideally, it should be kernel thread(s), but for now it runs | ||
51 | from timers, which puts apparent top bounds on the number of rated | ||
52 | flows, has minimal overhead on small, but is enough | ||
53 | to handle controlled load service, sets of aggregates. | ||
54 | |||
55 | We measure rate over A=(1<<interval) seconds and evaluate EWMA: | ||
56 | |||
57 | avrate = avrate*(1-W) + rate*W | ||
58 | |||
59 | where W is chosen as negative power of 2: W = 2^(-ewma_log) | ||
60 | |||
61 | The resulting time constant is: | ||
62 | |||
63 | T = A/(-ln(1-W)) | ||
64 | |||
65 | |||
66 | NOTES. | ||
67 | |||
68 | * The stored value for avbps is scaled by 2^5, so that maximal | ||
69 | rate is ~1Gbit, avpps is scaled by 2^10. | ||
70 | |||
71 | * Minimal interval is HZ/4=250msec (it is the greatest common divisor | ||
72 | for HZ=100 and HZ=1024 8)), maximal interval | ||
73 | is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals | ||
74 | are too expensive, longer ones can be implemented | ||
75 | at user level painlessly. | ||
76 | */ | ||
77 | |||
78 | #define EST_MAX_INTERVAL 5 | ||
79 | |||
80 | struct gen_estimator | ||
81 | { | ||
82 | struct gen_estimator *next; | ||
83 | struct gnet_stats_basic *bstats; | ||
84 | struct gnet_stats_rate_est *rate_est; | ||
85 | spinlock_t *stats_lock; | ||
86 | unsigned interval; | ||
87 | int ewma_log; | ||
88 | u64 last_bytes; | ||
89 | u32 last_packets; | ||
90 | u32 avpps; | ||
91 | u32 avbps; | ||
92 | }; | ||
93 | |||
94 | struct gen_estimator_head | ||
95 | { | ||
96 | struct timer_list timer; | ||
97 | struct gen_estimator *list; | ||
98 | }; | ||
99 | |||
100 | static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; | ||
101 | |||
102 | /* Estimator array lock */ | ||
103 | static DEFINE_RWLOCK(est_lock); | ||
104 | |||
105 | static void est_timer(unsigned long arg) | ||
106 | { | ||
107 | int idx = (int)arg; | ||
108 | struct gen_estimator *e; | ||
109 | |||
110 | read_lock(&est_lock); | ||
111 | for (e = elist[idx].list; e; e = e->next) { | ||
112 | u64 nbytes; | ||
113 | u32 npackets; | ||
114 | u32 rate; | ||
115 | |||
116 | spin_lock(e->stats_lock); | ||
117 | nbytes = e->bstats->bytes; | ||
118 | npackets = e->bstats->packets; | ||
119 | rate = (nbytes - e->last_bytes)<<(7 - idx); | ||
120 | e->last_bytes = nbytes; | ||
121 | e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; | ||
122 | e->rate_est->bps = (e->avbps+0xF)>>5; | ||
123 | |||
124 | rate = (npackets - e->last_packets)<<(12 - idx); | ||
125 | e->last_packets = npackets; | ||
126 | e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; | ||
127 | e->rate_est->pps = (e->avpps+0x1FF)>>10; | ||
128 | spin_unlock(e->stats_lock); | ||
129 | } | ||
130 | |||
131 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); | ||
132 | read_unlock(&est_lock); | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * gen_new_estimator - create a new rate estimator | ||
137 | * @bstats: basic statistics | ||
138 | * @rate_est: rate estimator statistics | ||
139 | * @stats_lock: statistics lock | ||
140 | * @opt: rate estimator configuration TLV | ||
141 | * | ||
142 | * Creates a new rate estimator with &bstats as source and &rate_est | ||
143 | * as destination. A new timer with the interval specified in the | ||
144 | * configuration TLV is created. Upon each interval, the latest statistics | ||
145 | * will be read from &bstats and the estimated rate will be stored in | ||
146 | * &rate_est with the statistics lock grabed during this period. | ||
147 | * | ||
148 | * Returns 0 on success or a negative error code. | ||
149 | */ | ||
150 | int gen_new_estimator(struct gnet_stats_basic *bstats, | ||
151 | struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) | ||
152 | { | ||
153 | struct gen_estimator *est; | ||
154 | struct gnet_estimator *parm = RTA_DATA(opt); | ||
155 | |||
156 | if (RTA_PAYLOAD(opt) < sizeof(*parm)) | ||
157 | return -EINVAL; | ||
158 | |||
159 | if (parm->interval < -2 || parm->interval > 3) | ||
160 | return -EINVAL; | ||
161 | |||
162 | est = kmalloc(sizeof(*est), GFP_KERNEL); | ||
163 | if (est == NULL) | ||
164 | return -ENOBUFS; | ||
165 | |||
166 | memset(est, 0, sizeof(*est)); | ||
167 | est->interval = parm->interval + 2; | ||
168 | est->bstats = bstats; | ||
169 | est->rate_est = rate_est; | ||
170 | est->stats_lock = stats_lock; | ||
171 | est->ewma_log = parm->ewma_log; | ||
172 | est->last_bytes = bstats->bytes; | ||
173 | est->avbps = rate_est->bps<<5; | ||
174 | est->last_packets = bstats->packets; | ||
175 | est->avpps = rate_est->pps<<10; | ||
176 | |||
177 | est->next = elist[est->interval].list; | ||
178 | if (est->next == NULL) { | ||
179 | init_timer(&elist[est->interval].timer); | ||
180 | elist[est->interval].timer.data = est->interval; | ||
181 | elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); | ||
182 | elist[est->interval].timer.function = est_timer; | ||
183 | add_timer(&elist[est->interval].timer); | ||
184 | } | ||
185 | write_lock_bh(&est_lock); | ||
186 | elist[est->interval].list = est; | ||
187 | write_unlock_bh(&est_lock); | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | /** | ||
192 | * gen_kill_estimator - remove a rate estimator | ||
193 | * @bstats: basic statistics | ||
194 | * @rate_est: rate estimator statistics | ||
195 | * | ||
196 | * Removes the rate estimator specified by &bstats and &rate_est | ||
197 | * and deletes the timer. | ||
198 | */ | ||
199 | void gen_kill_estimator(struct gnet_stats_basic *bstats, | ||
200 | struct gnet_stats_rate_est *rate_est) | ||
201 | { | ||
202 | int idx; | ||
203 | struct gen_estimator *est, **pest; | ||
204 | |||
205 | for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { | ||
206 | int killed = 0; | ||
207 | pest = &elist[idx].list; | ||
208 | while ((est=*pest) != NULL) { | ||
209 | if (est->rate_est != rate_est || est->bstats != bstats) { | ||
210 | pest = &est->next; | ||
211 | continue; | ||
212 | } | ||
213 | |||
214 | write_lock_bh(&est_lock); | ||
215 | *pest = est->next; | ||
216 | write_unlock_bh(&est_lock); | ||
217 | |||
218 | kfree(est); | ||
219 | killed++; | ||
220 | } | ||
221 | if (killed && elist[idx].list == NULL) | ||
222 | del_timer(&elist[idx].timer); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | /** | ||
227 | * gen_replace_estimator - replace rate estimator configruation | ||
228 | * @bstats: basic statistics | ||
229 | * @rate_est: rate estimator statistics | ||
230 | * @stats_lock: statistics lock | ||
231 | * @opt: rate estimator configuration TLV | ||
232 | * | ||
233 | * Replaces the configuration of a rate estimator by calling | ||
234 | * gen_kill_estimator() and gen_new_estimator(). | ||
235 | * | ||
236 | * Returns 0 on success or a negative error code. | ||
237 | */ | ||
238 | int | ||
239 | gen_replace_estimator(struct gnet_stats_basic *bstats, | ||
240 | struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, | ||
241 | struct rtattr *opt) | ||
242 | { | ||
243 | gen_kill_estimator(bstats, rate_est); | ||
244 | return gen_new_estimator(bstats, rate_est, stats_lock, opt); | ||
245 | } | ||
246 | |||
247 | |||
248 | EXPORT_SYMBOL(gen_kill_estimator); | ||
249 | EXPORT_SYMBOL(gen_new_estimator); | ||
250 | EXPORT_SYMBOL(gen_replace_estimator); | ||
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c new file mode 100644 index 000000000000..8f21490355fa --- /dev/null +++ b/net/core/gen_stats.c | |||
@@ -0,0 +1,239 @@ | |||
1 | /* | ||
2 | * net/core/gen_stats.c | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
10 | * Jamal Hadi Salim | ||
11 | * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
12 | * | ||
13 | * See Documentation/networking/gen_stats.txt | ||
14 | */ | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/socket.h> | ||
21 | #include <linux/rtnetlink.h> | ||
22 | #include <linux/gen_stats.h> | ||
23 | #include <net/gen_stats.h> | ||
24 | |||
25 | |||
26 | static inline int | ||
27 | gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) | ||
28 | { | ||
29 | RTA_PUT(d->skb, type, size, buf); | ||
30 | return 0; | ||
31 | |||
32 | rtattr_failure: | ||
33 | spin_unlock_bh(d->lock); | ||
34 | return -1; | ||
35 | } | ||
36 | |||
37 | /** | ||
38 | * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode | ||
39 | * @skb: socket buffer to put statistics TLVs into | ||
40 | * @type: TLV type for top level statistic TLV | ||
41 | * @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV | ||
42 | * @xstats_type: TLV type for backward compatibility xstats TLV | ||
43 | * @lock: statistics lock | ||
44 | * @d: dumping handle | ||
45 | * | ||
46 | * Initializes the dumping handle, grabs the statistic lock and appends | ||
47 | * an empty TLV header to the socket buffer for use a container for all | ||
48 | * other statistic TLVS. | ||
49 | * | ||
50 | * The dumping handle is marked to be in backward compatibility mode telling | ||
51 | * all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats. | ||
52 | * | ||
53 | * Returns 0 on success or -1 if the room in the socket buffer was not sufficient. | ||
54 | */ | ||
55 | int | ||
56 | gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, | ||
57 | int xstats_type, spinlock_t *lock, struct gnet_dump *d) | ||
58 | { | ||
59 | memset(d, 0, sizeof(*d)); | ||
60 | |||
61 | spin_lock_bh(lock); | ||
62 | d->lock = lock; | ||
63 | if (type) | ||
64 | d->tail = (struct rtattr *) skb->tail; | ||
65 | d->skb = skb; | ||
66 | d->compat_tc_stats = tc_stats_type; | ||
67 | d->compat_xstats = xstats_type; | ||
68 | |||
69 | if (d->tail) | ||
70 | return gnet_stats_copy(d, type, NULL, 0); | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode | ||
77 | * @skb: socket buffer to put statistics TLVs into | ||
78 | * @type: TLV type for top level statistic TLV | ||
79 | * @lock: statistics lock | ||
80 | * @d: dumping handle | ||
81 | * | ||
82 | * Initializes the dumping handle, grabs the statistic lock and appends | ||
83 | * an empty TLV header to the socket buffer for use a container for all | ||
84 | * other statistic TLVS. | ||
85 | * | ||
86 | * Returns 0 on success or -1 if the room in the socket buffer was not sufficient. | ||
87 | */ | ||
88 | int | ||
89 | gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, | ||
90 | struct gnet_dump *d) | ||
91 | { | ||
92 | return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); | ||
93 | } | ||
94 | |||
95 | /** | ||
96 | * gnet_stats_copy_basic - copy basic statistics into statistic TLV | ||
97 | * @d: dumping handle | ||
98 | * @b: basic statistics | ||
99 | * | ||
100 | * Appends the basic statistics to the top level TLV created by | ||
101 | * gnet_stats_start_copy(). | ||
102 | * | ||
103 | * Returns 0 on success or -1 with the statistic lock released | ||
104 | * if the room in the socket buffer was not sufficient. | ||
105 | */ | ||
106 | int | ||
107 | gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b) | ||
108 | { | ||
109 | if (d->compat_tc_stats) { | ||
110 | d->tc_stats.bytes = b->bytes; | ||
111 | d->tc_stats.packets = b->packets; | ||
112 | } | ||
113 | |||
114 | if (d->tail) | ||
115 | return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b)); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV | ||
122 | * @d: dumping handle | ||
123 | * @r: rate estimator statistics | ||
124 | * | ||
125 | * Appends the rate estimator statistics to the top level TLV created by | ||
126 | * gnet_stats_start_copy(). | ||
127 | * | ||
128 | * Returns 0 on success or -1 with the statistic lock released | ||
129 | * if the room in the socket buffer was not sufficient. | ||
130 | */ | ||
131 | int | ||
132 | gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r) | ||
133 | { | ||
134 | if (d->compat_tc_stats) { | ||
135 | d->tc_stats.bps = r->bps; | ||
136 | d->tc_stats.pps = r->pps; | ||
137 | } | ||
138 | |||
139 | if (d->tail) | ||
140 | return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * gnet_stats_copy_queue - copy queue statistics into statistics TLV | ||
147 | * @d: dumping handle | ||
148 | * @q: queue statistics | ||
149 | * | ||
150 | * Appends the queue statistics to the top level TLV created by | ||
151 | * gnet_stats_start_copy(). | ||
152 | * | ||
153 | * Returns 0 on success or -1 with the statistic lock released | ||
154 | * if the room in the socket buffer was not sufficient. | ||
155 | */ | ||
156 | int | ||
157 | gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q) | ||
158 | { | ||
159 | if (d->compat_tc_stats) { | ||
160 | d->tc_stats.drops = q->drops; | ||
161 | d->tc_stats.qlen = q->qlen; | ||
162 | d->tc_stats.backlog = q->backlog; | ||
163 | d->tc_stats.overlimits = q->overlimits; | ||
164 | } | ||
165 | |||
166 | if (d->tail) | ||
167 | return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q)); | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | /** | ||
173 | * gnet_stats_copy_app - copy application specific statistics into statistics TLV | ||
174 | * @d: dumping handle | ||
175 | * @st: application specific statistics data | ||
176 | * @len: length of data | ||
177 | * | ||
178 | * Appends the application sepecific statistics to the top level TLV created by | ||
179 | * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping | ||
180 | * handle is in backward compatibility mode. | ||
181 | * | ||
182 | * Returns 0 on success or -1 with the statistic lock released | ||
183 | * if the room in the socket buffer was not sufficient. | ||
184 | */ | ||
185 | int | ||
186 | gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) | ||
187 | { | ||
188 | if (d->compat_xstats) { | ||
189 | d->xstats = st; | ||
190 | d->xstats_len = len; | ||
191 | } | ||
192 | |||
193 | if (d->tail) | ||
194 | return gnet_stats_copy(d, TCA_STATS_APP, st, len); | ||
195 | |||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | /** | ||
200 | * gnet_stats_finish_copy - finish dumping procedure | ||
201 | * @d: dumping handle | ||
202 | * | ||
203 | * Corrects the length of the top level TLV to include all TLVs added | ||
204 | * by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs | ||
205 | * if gnet_stats_start_copy_compat() was used and releases the statistics | ||
206 | * lock. | ||
207 | * | ||
208 | * Returns 0 on success or -1 with the statistic lock released | ||
209 | * if the room in the socket buffer was not sufficient. | ||
210 | */ | ||
211 | int | ||
212 | gnet_stats_finish_copy(struct gnet_dump *d) | ||
213 | { | ||
214 | if (d->tail) | ||
215 | d->tail->rta_len = d->skb->tail - (u8 *) d->tail; | ||
216 | |||
217 | if (d->compat_tc_stats) | ||
218 | if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, | ||
219 | sizeof(d->tc_stats)) < 0) | ||
220 | return -1; | ||
221 | |||
222 | if (d->compat_xstats && d->xstats) { | ||
223 | if (gnet_stats_copy(d, d->compat_xstats, d->xstats, | ||
224 | d->xstats_len) < 0) | ||
225 | return -1; | ||
226 | } | ||
227 | |||
228 | spin_unlock_bh(d->lock); | ||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | |||
233 | EXPORT_SYMBOL(gnet_stats_start_copy); | ||
234 | EXPORT_SYMBOL(gnet_stats_start_copy_compat); | ||
235 | EXPORT_SYMBOL(gnet_stats_copy_basic); | ||
236 | EXPORT_SYMBOL(gnet_stats_copy_rate_est); | ||
237 | EXPORT_SYMBOL(gnet_stats_copy_queue); | ||
238 | EXPORT_SYMBOL(gnet_stats_copy_app); | ||
239 | EXPORT_SYMBOL(gnet_stats_finish_copy); | ||
diff --git a/net/core/iovec.c b/net/core/iovec.c new file mode 100644 index 000000000000..d57ace949ab8 --- /dev/null +++ b/net/core/iovec.c | |||
@@ -0,0 +1,239 @@ | |||
1 | /* | ||
2 | * iovec manipulation routines. | ||
3 | * | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * Fixes: | ||
11 | * Andrew Lunn : Errors in iovec copying. | ||
12 | * Pedro Roque : Added memcpy_fromiovecend and | ||
13 | * csum_..._fromiovecend. | ||
14 | * Andi Kleen : fixed error handling for 2.1 | ||
15 | * Alexey Kuznetsov: 2.1 optimisations | ||
16 | * Andi Kleen : Fix csum*fromiovecend for IPv6. | ||
17 | */ | ||
18 | |||
19 | #include <linux/errno.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/mm.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/net.h> | ||
26 | #include <linux/in6.h> | ||
27 | #include <asm/uaccess.h> | ||
28 | #include <asm/byteorder.h> | ||
29 | #include <net/checksum.h> | ||
30 | #include <net/sock.h> | ||
31 | |||
32 | /* | ||
33 | * Verify iovec. The caller must ensure that the iovec is big enough | ||
34 | * to hold the message iovec. | ||
35 | * | ||
36 | * Save time not doing verify_area. copy_*_user will make this work | ||
37 | * in any case. | ||
38 | */ | ||
39 | |||
40 | int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode) | ||
41 | { | ||
42 | int size, err, ct; | ||
43 | |||
44 | if (m->msg_namelen) { | ||
45 | if (mode == VERIFY_READ) { | ||
46 | err = move_addr_to_kernel(m->msg_name, m->msg_namelen, | ||
47 | address); | ||
48 | if (err < 0) | ||
49 | return err; | ||
50 | } | ||
51 | m->msg_name = address; | ||
52 | } else { | ||
53 | m->msg_name = NULL; | ||
54 | } | ||
55 | |||
56 | size = m->msg_iovlen * sizeof(struct iovec); | ||
57 | if (copy_from_user(iov, m->msg_iov, size)) | ||
58 | return -EFAULT; | ||
59 | |||
60 | m->msg_iov = iov; | ||
61 | err = 0; | ||
62 | |||
63 | for (ct = 0; ct < m->msg_iovlen; ct++) { | ||
64 | err += iov[ct].iov_len; | ||
65 | /* | ||
66 | * Goal is not to verify user data, but to prevent returning | ||
67 | * negative value, which is interpreted as errno. | ||
68 | * Overflow is still possible, but it is harmless. | ||
69 | */ | ||
70 | if (err < 0) | ||
71 | return -EMSGSIZE; | ||
72 | } | ||
73 | |||
74 | return err; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Copy kernel to iovec. Returns -EFAULT on error. | ||
79 | * | ||
80 | * Note: this modifies the original iovec. | ||
81 | */ | ||
82 | |||
83 | int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) | ||
84 | { | ||
85 | while (len > 0) { | ||
86 | if (iov->iov_len) { | ||
87 | int copy = min_t(unsigned int, iov->iov_len, len); | ||
88 | if (copy_to_user(iov->iov_base, kdata, copy)) | ||
89 | return -EFAULT; | ||
90 | kdata += copy; | ||
91 | len -= copy; | ||
92 | iov->iov_len -= copy; | ||
93 | iov->iov_base += copy; | ||
94 | } | ||
95 | iov++; | ||
96 | } | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Copy iovec to kernel. Returns -EFAULT on error. | ||
103 | * | ||
104 | * Note: this modifies the original iovec. | ||
105 | */ | ||
106 | |||
107 | int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) | ||
108 | { | ||
109 | while (len > 0) { | ||
110 | if (iov->iov_len) { | ||
111 | int copy = min_t(unsigned int, len, iov->iov_len); | ||
112 | if (copy_from_user(kdata, iov->iov_base, copy)) | ||
113 | return -EFAULT; | ||
114 | len -= copy; | ||
115 | kdata += copy; | ||
116 | iov->iov_base += copy; | ||
117 | iov->iov_len -= copy; | ||
118 | } | ||
119 | iov++; | ||
120 | } | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * For use with ip_build_xmit | ||
127 | */ | ||
128 | int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, | ||
129 | int len) | ||
130 | { | ||
131 | /* Skip over the finished iovecs */ | ||
132 | while (offset >= iov->iov_len) { | ||
133 | offset -= iov->iov_len; | ||
134 | iov++; | ||
135 | } | ||
136 | |||
137 | while (len > 0) { | ||
138 | u8 __user *base = iov->iov_base + offset; | ||
139 | int copy = min_t(unsigned int, len, iov->iov_len - offset); | ||
140 | |||
141 | offset = 0; | ||
142 | if (copy_from_user(kdata, base, copy)) | ||
143 | return -EFAULT; | ||
144 | len -= copy; | ||
145 | kdata += copy; | ||
146 | iov++; | ||
147 | } | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * And now for the all-in-one: copy and checksum from a user iovec | ||
154 | * directly to a datagram | ||
155 | * Calls to csum_partial but the last must be in 32 bit chunks | ||
156 | * | ||
157 | * ip_build_xmit must ensure that when fragmenting only the last | ||
158 | * call to this function will be unaligned also. | ||
159 | */ | ||
160 | int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, | ||
161 | int offset, unsigned int len, int *csump) | ||
162 | { | ||
163 | int csum = *csump; | ||
164 | int partial_cnt = 0, err = 0; | ||
165 | |||
166 | /* Skip over the finished iovecs */ | ||
167 | while (offset >= iov->iov_len) { | ||
168 | offset -= iov->iov_len; | ||
169 | iov++; | ||
170 | } | ||
171 | |||
172 | while (len > 0) { | ||
173 | u8 __user *base = iov->iov_base + offset; | ||
174 | int copy = min_t(unsigned int, len, iov->iov_len - offset); | ||
175 | |||
176 | offset = 0; | ||
177 | |||
178 | /* There is a remnant from previous iov. */ | ||
179 | if (partial_cnt) { | ||
180 | int par_len = 4 - partial_cnt; | ||
181 | |||
182 | /* iov component is too short ... */ | ||
183 | if (par_len > copy) { | ||
184 | if (copy_from_user(kdata, base, copy)) | ||
185 | goto out_fault; | ||
186 | kdata += copy; | ||
187 | base += copy; | ||
188 | partial_cnt += copy; | ||
189 | len -= copy; | ||
190 | iov++; | ||
191 | if (len) | ||
192 | continue; | ||
193 | *csump = csum_partial(kdata - partial_cnt, | ||
194 | partial_cnt, csum); | ||
195 | goto out; | ||
196 | } | ||
197 | if (copy_from_user(kdata, base, par_len)) | ||
198 | goto out_fault; | ||
199 | csum = csum_partial(kdata - partial_cnt, 4, csum); | ||
200 | kdata += par_len; | ||
201 | base += par_len; | ||
202 | copy -= par_len; | ||
203 | len -= par_len; | ||
204 | partial_cnt = 0; | ||
205 | } | ||
206 | |||
207 | if (len > copy) { | ||
208 | partial_cnt = copy % 4; | ||
209 | if (partial_cnt) { | ||
210 | copy -= partial_cnt; | ||
211 | if (copy_from_user(kdata + copy, base + copy, | ||
212 | partial_cnt)) | ||
213 | goto out_fault; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | if (copy) { | ||
218 | csum = csum_and_copy_from_user(base, kdata, copy, | ||
219 | csum, &err); | ||
220 | if (err) | ||
221 | goto out; | ||
222 | } | ||
223 | len -= copy + partial_cnt; | ||
224 | kdata += copy + partial_cnt; | ||
225 | iov++; | ||
226 | } | ||
227 | *csump = csum; | ||
228 | out: | ||
229 | return err; | ||
230 | |||
231 | out_fault: | ||
232 | err = -EFAULT; | ||
233 | goto out; | ||
234 | } | ||
235 | |||
236 | EXPORT_SYMBOL(csum_partial_copy_fromiovecend); | ||
237 | EXPORT_SYMBOL(memcpy_fromiovec); | ||
238 | EXPORT_SYMBOL(memcpy_fromiovecend); | ||
239 | EXPORT_SYMBOL(memcpy_toiovec); | ||
diff --git a/net/core/link_watch.c b/net/core/link_watch.c new file mode 100644 index 000000000000..4859b7446c6f --- /dev/null +++ b/net/core/link_watch.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * Linux network device link state notification | ||
3 | * | ||
4 | * Author: | ||
5 | * Stefan Rompf <sux@loplof.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/netdevice.h> | ||
17 | #include <linux/if.h> | ||
18 | #include <net/sock.h> | ||
19 | #include <linux/rtnetlink.h> | ||
20 | #include <linux/jiffies.h> | ||
21 | #include <linux/spinlock.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | #include <linux/bitops.h> | ||
26 | #include <asm/types.h> | ||
27 | |||
28 | |||
29 | enum lw_bits { | ||
30 | LW_RUNNING = 0, | ||
31 | LW_SE_USED | ||
32 | }; | ||
33 | |||
34 | static unsigned long linkwatch_flags; | ||
35 | static unsigned long linkwatch_nextevent; | ||
36 | |||
37 | static void linkwatch_event(void *dummy); | ||
38 | static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL); | ||
39 | |||
40 | static LIST_HEAD(lweventlist); | ||
41 | static DEFINE_SPINLOCK(lweventlist_lock); | ||
42 | |||
43 | struct lw_event { | ||
44 | struct list_head list; | ||
45 | struct net_device *dev; | ||
46 | }; | ||
47 | |||
48 | /* Avoid kmalloc() for most systems */ | ||
49 | static struct lw_event singleevent; | ||
50 | |||
51 | /* Must be called with the rtnl semaphore held */ | ||
52 | void linkwatch_run_queue(void) | ||
53 | { | ||
54 | LIST_HEAD(head); | ||
55 | struct list_head *n, *next; | ||
56 | |||
57 | spin_lock_irq(&lweventlist_lock); | ||
58 | list_splice_init(&lweventlist, &head); | ||
59 | spin_unlock_irq(&lweventlist_lock); | ||
60 | |||
61 | list_for_each_safe(n, next, &head) { | ||
62 | struct lw_event *event = list_entry(n, struct lw_event, list); | ||
63 | struct net_device *dev = event->dev; | ||
64 | |||
65 | if (event == &singleevent) { | ||
66 | clear_bit(LW_SE_USED, &linkwatch_flags); | ||
67 | } else { | ||
68 | kfree(event); | ||
69 | } | ||
70 | |||
71 | /* We are about to handle this device, | ||
72 | * so new events can be accepted | ||
73 | */ | ||
74 | clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); | ||
75 | |||
76 | if (dev->flags & IFF_UP) { | ||
77 | netdev_state_change(dev); | ||
78 | } | ||
79 | |||
80 | dev_put(dev); | ||
81 | } | ||
82 | } | ||
83 | |||
84 | |||
85 | static void linkwatch_event(void *dummy) | ||
86 | { | ||
87 | /* Limit the number of linkwatch events to one | ||
88 | * per second so that a runaway driver does not | ||
89 | * cause a storm of messages on the netlink | ||
90 | * socket | ||
91 | */ | ||
92 | linkwatch_nextevent = jiffies + HZ; | ||
93 | clear_bit(LW_RUNNING, &linkwatch_flags); | ||
94 | |||
95 | rtnl_shlock(); | ||
96 | linkwatch_run_queue(); | ||
97 | rtnl_shunlock(); | ||
98 | } | ||
99 | |||
100 | |||
101 | void linkwatch_fire_event(struct net_device *dev) | ||
102 | { | ||
103 | if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { | ||
104 | unsigned long flags; | ||
105 | struct lw_event *event; | ||
106 | |||
107 | if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) { | ||
108 | event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC); | ||
109 | |||
110 | if (unlikely(event == NULL)) { | ||
111 | clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); | ||
112 | return; | ||
113 | } | ||
114 | } else { | ||
115 | event = &singleevent; | ||
116 | } | ||
117 | |||
118 | dev_hold(dev); | ||
119 | event->dev = dev; | ||
120 | |||
121 | spin_lock_irqsave(&lweventlist_lock, flags); | ||
122 | list_add_tail(&event->list, &lweventlist); | ||
123 | spin_unlock_irqrestore(&lweventlist_lock, flags); | ||
124 | |||
125 | if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) { | ||
126 | unsigned long thisevent = jiffies; | ||
127 | |||
128 | if (thisevent >= linkwatch_nextevent) { | ||
129 | schedule_work(&linkwatch_work); | ||
130 | } else { | ||
131 | schedule_delayed_work(&linkwatch_work, linkwatch_nextevent - thisevent); | ||
132 | } | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | |||
137 | EXPORT_SYMBOL(linkwatch_fire_event); | ||
diff --git a/net/core/neighbour.c b/net/core/neighbour.c new file mode 100644 index 000000000000..0a2f67bbef2e --- /dev/null +++ b/net/core/neighbour.c | |||
@@ -0,0 +1,2362 @@ | |||
1 | /* | ||
2 | * Generic address resolution entity | ||
3 | * | ||
4 | * Authors: | ||
5 | * Pedro Roque <roque@di.fc.ul.pt> | ||
6 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * Fixes: | ||
14 | * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. | ||
15 | * Harald Welte Add neighbour cache statistics like rtstat | ||
16 | */ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/socket.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/netdevice.h> | ||
25 | #include <linux/proc_fs.h> | ||
26 | #ifdef CONFIG_SYSCTL | ||
27 | #include <linux/sysctl.h> | ||
28 | #endif | ||
29 | #include <linux/times.h> | ||
30 | #include <net/neighbour.h> | ||
31 | #include <net/dst.h> | ||
32 | #include <net/sock.h> | ||
33 | #include <linux/rtnetlink.h> | ||
34 | #include <linux/random.h> | ||
35 | |||
36 | #define NEIGH_DEBUG 1 | ||
37 | |||
38 | #define NEIGH_PRINTK(x...) printk(x) | ||
39 | #define NEIGH_NOPRINTK(x...) do { ; } while(0) | ||
40 | #define NEIGH_PRINTK0 NEIGH_PRINTK | ||
41 | #define NEIGH_PRINTK1 NEIGH_NOPRINTK | ||
42 | #define NEIGH_PRINTK2 NEIGH_NOPRINTK | ||
43 | |||
44 | #if NEIGH_DEBUG >= 1 | ||
45 | #undef NEIGH_PRINTK1 | ||
46 | #define NEIGH_PRINTK1 NEIGH_PRINTK | ||
47 | #endif | ||
48 | #if NEIGH_DEBUG >= 2 | ||
49 | #undef NEIGH_PRINTK2 | ||
50 | #define NEIGH_PRINTK2 NEIGH_PRINTK | ||
51 | #endif | ||
52 | |||
53 | #define PNEIGH_HASHMASK 0xF | ||
54 | |||
55 | static void neigh_timer_handler(unsigned long arg); | ||
56 | #ifdef CONFIG_ARPD | ||
57 | static void neigh_app_notify(struct neighbour *n); | ||
58 | #endif | ||
59 | static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); | ||
60 | void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); | ||
61 | |||
62 | static struct neigh_table *neigh_tables; | ||
63 | static struct file_operations neigh_stat_seq_fops; | ||
64 | |||
65 | /* | ||
66 | Neighbour hash table buckets are protected with rwlock tbl->lock. | ||
67 | |||
68 | - All the scans/updates to hash buckets MUST be made under this lock. | ||
69 | - NOTHING clever should be made under this lock: no callbacks | ||
70 | to protocol backends, no attempts to send something to network. | ||
71 | It will result in deadlocks, if backend/driver wants to use neighbour | ||
72 | cache. | ||
73 | - If the entry requires some non-trivial actions, increase | ||
74 | its reference count and release table lock. | ||
75 | |||
76 | Neighbour entries are protected: | ||
77 | - with reference count. | ||
78 | - with rwlock neigh->lock | ||
79 | |||
80 | Reference count prevents destruction. | ||
81 | |||
82 | neigh->lock mainly serializes ll address data and its validity state. | ||
83 | However, the same lock is used to protect another entry fields: | ||
84 | - timer | ||
85 | - resolution queue | ||
86 | |||
87 | Again, nothing clever shall be made under neigh->lock, | ||
88 | the most complicated procedure, which we allow is dev->hard_header. | ||
89 | It is supposed, that dev->hard_header is simplistic and does | ||
90 | not make callbacks to neighbour tables. | ||
91 | |||
92 | The last lock is neigh_tbl_lock. It is pure SMP lock, protecting | ||
93 | list of neighbour tables. This list is used only in process context, | ||
94 | */ | ||
95 | |||
96 | static DEFINE_RWLOCK(neigh_tbl_lock); | ||
97 | |||
98 | static int neigh_blackhole(struct sk_buff *skb) | ||
99 | { | ||
100 | kfree_skb(skb); | ||
101 | return -ENETDOWN; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * It is random distribution in the interval (1/2)*base...(3/2)*base. | ||
106 | * It corresponds to default IPv6 settings and is not overridable, | ||
107 | * because it is really reasonable choice. | ||
108 | */ | ||
109 | |||
110 | unsigned long neigh_rand_reach_time(unsigned long base) | ||
111 | { | ||
112 | return (base ? (net_random() % base) + (base >> 1) : 0); | ||
113 | } | ||
114 | |||
115 | |||
116 | static int neigh_forced_gc(struct neigh_table *tbl) | ||
117 | { | ||
118 | int shrunk = 0; | ||
119 | int i; | ||
120 | |||
121 | NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); | ||
122 | |||
123 | write_lock_bh(&tbl->lock); | ||
124 | for (i = 0; i <= tbl->hash_mask; i++) { | ||
125 | struct neighbour *n, **np; | ||
126 | |||
127 | np = &tbl->hash_buckets[i]; | ||
128 | while ((n = *np) != NULL) { | ||
129 | /* Neighbour record may be discarded if: | ||
130 | * - nobody refers to it. | ||
131 | * - it is not permanent | ||
132 | */ | ||
133 | write_lock(&n->lock); | ||
134 | if (atomic_read(&n->refcnt) == 1 && | ||
135 | !(n->nud_state & NUD_PERMANENT)) { | ||
136 | *np = n->next; | ||
137 | n->dead = 1; | ||
138 | shrunk = 1; | ||
139 | write_unlock(&n->lock); | ||
140 | neigh_release(n); | ||
141 | continue; | ||
142 | } | ||
143 | write_unlock(&n->lock); | ||
144 | np = &n->next; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | tbl->last_flush = jiffies; | ||
149 | |||
150 | write_unlock_bh(&tbl->lock); | ||
151 | |||
152 | return shrunk; | ||
153 | } | ||
154 | |||
155 | static int neigh_del_timer(struct neighbour *n) | ||
156 | { | ||
157 | if ((n->nud_state & NUD_IN_TIMER) && | ||
158 | del_timer(&n->timer)) { | ||
159 | neigh_release(n); | ||
160 | return 1; | ||
161 | } | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static void pneigh_queue_purge(struct sk_buff_head *list) | ||
166 | { | ||
167 | struct sk_buff *skb; | ||
168 | |||
169 | while ((skb = skb_dequeue(list)) != NULL) { | ||
170 | dev_put(skb->dev); | ||
171 | kfree_skb(skb); | ||
172 | } | ||
173 | } | ||
174 | |||
175 | void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) | ||
176 | { | ||
177 | int i; | ||
178 | |||
179 | write_lock_bh(&tbl->lock); | ||
180 | |||
181 | for (i=0; i <= tbl->hash_mask; i++) { | ||
182 | struct neighbour *n, **np; | ||
183 | |||
184 | np = &tbl->hash_buckets[i]; | ||
185 | while ((n = *np) != NULL) { | ||
186 | if (dev && n->dev != dev) { | ||
187 | np = &n->next; | ||
188 | continue; | ||
189 | } | ||
190 | *np = n->next; | ||
191 | write_lock_bh(&n->lock); | ||
192 | n->dead = 1; | ||
193 | neigh_del_timer(n); | ||
194 | write_unlock_bh(&n->lock); | ||
195 | neigh_release(n); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | write_unlock_bh(&tbl->lock); | ||
200 | } | ||
201 | |||
202 | int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) | ||
203 | { | ||
204 | int i; | ||
205 | |||
206 | write_lock_bh(&tbl->lock); | ||
207 | |||
208 | for (i = 0; i <= tbl->hash_mask; i++) { | ||
209 | struct neighbour *n, **np = &tbl->hash_buckets[i]; | ||
210 | |||
211 | while ((n = *np) != NULL) { | ||
212 | if (dev && n->dev != dev) { | ||
213 | np = &n->next; | ||
214 | continue; | ||
215 | } | ||
216 | *np = n->next; | ||
217 | write_lock(&n->lock); | ||
218 | neigh_del_timer(n); | ||
219 | n->dead = 1; | ||
220 | |||
221 | if (atomic_read(&n->refcnt) != 1) { | ||
222 | /* The most unpleasant situation. | ||
223 | We must destroy neighbour entry, | ||
224 | but someone still uses it. | ||
225 | |||
226 | The destroy will be delayed until | ||
227 | the last user releases us, but | ||
228 | we must kill timers etc. and move | ||
229 | it to safe state. | ||
230 | */ | ||
231 | skb_queue_purge(&n->arp_queue); | ||
232 | n->output = neigh_blackhole; | ||
233 | if (n->nud_state & NUD_VALID) | ||
234 | n->nud_state = NUD_NOARP; | ||
235 | else | ||
236 | n->nud_state = NUD_NONE; | ||
237 | NEIGH_PRINTK2("neigh %p is stray.\n", n); | ||
238 | } | ||
239 | write_unlock(&n->lock); | ||
240 | neigh_release(n); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | pneigh_ifdown(tbl, dev); | ||
245 | write_unlock_bh(&tbl->lock); | ||
246 | |||
247 | del_timer_sync(&tbl->proxy_timer); | ||
248 | pneigh_queue_purge(&tbl->proxy_queue); | ||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | static struct neighbour *neigh_alloc(struct neigh_table *tbl) | ||
253 | { | ||
254 | struct neighbour *n = NULL; | ||
255 | unsigned long now = jiffies; | ||
256 | int entries; | ||
257 | |||
258 | entries = atomic_inc_return(&tbl->entries) - 1; | ||
259 | if (entries >= tbl->gc_thresh3 || | ||
260 | (entries >= tbl->gc_thresh2 && | ||
261 | time_after(now, tbl->last_flush + 5 * HZ))) { | ||
262 | if (!neigh_forced_gc(tbl) && | ||
263 | entries >= tbl->gc_thresh3) | ||
264 | goto out_entries; | ||
265 | } | ||
266 | |||
267 | n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); | ||
268 | if (!n) | ||
269 | goto out_entries; | ||
270 | |||
271 | memset(n, 0, tbl->entry_size); | ||
272 | |||
273 | skb_queue_head_init(&n->arp_queue); | ||
274 | rwlock_init(&n->lock); | ||
275 | n->updated = n->used = now; | ||
276 | n->nud_state = NUD_NONE; | ||
277 | n->output = neigh_blackhole; | ||
278 | n->parms = neigh_parms_clone(&tbl->parms); | ||
279 | init_timer(&n->timer); | ||
280 | n->timer.function = neigh_timer_handler; | ||
281 | n->timer.data = (unsigned long)n; | ||
282 | |||
283 | NEIGH_CACHE_STAT_INC(tbl, allocs); | ||
284 | n->tbl = tbl; | ||
285 | atomic_set(&n->refcnt, 1); | ||
286 | n->dead = 1; | ||
287 | out: | ||
288 | return n; | ||
289 | |||
290 | out_entries: | ||
291 | atomic_dec(&tbl->entries); | ||
292 | goto out; | ||
293 | } | ||
294 | |||
295 | static struct neighbour **neigh_hash_alloc(unsigned int entries) | ||
296 | { | ||
297 | unsigned long size = entries * sizeof(struct neighbour *); | ||
298 | struct neighbour **ret; | ||
299 | |||
300 | if (size <= PAGE_SIZE) { | ||
301 | ret = kmalloc(size, GFP_ATOMIC); | ||
302 | } else { | ||
303 | ret = (struct neighbour **) | ||
304 | __get_free_pages(GFP_ATOMIC, get_order(size)); | ||
305 | } | ||
306 | if (ret) | ||
307 | memset(ret, 0, size); | ||
308 | |||
309 | return ret; | ||
310 | } | ||
311 | |||
312 | static void neigh_hash_free(struct neighbour **hash, unsigned int entries) | ||
313 | { | ||
314 | unsigned long size = entries * sizeof(struct neighbour *); | ||
315 | |||
316 | if (size <= PAGE_SIZE) | ||
317 | kfree(hash); | ||
318 | else | ||
319 | free_pages((unsigned long)hash, get_order(size)); | ||
320 | } | ||
321 | |||
322 | static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) | ||
323 | { | ||
324 | struct neighbour **new_hash, **old_hash; | ||
325 | unsigned int i, new_hash_mask, old_entries; | ||
326 | |||
327 | NEIGH_CACHE_STAT_INC(tbl, hash_grows); | ||
328 | |||
329 | BUG_ON(new_entries & (new_entries - 1)); | ||
330 | new_hash = neigh_hash_alloc(new_entries); | ||
331 | if (!new_hash) | ||
332 | return; | ||
333 | |||
334 | old_entries = tbl->hash_mask + 1; | ||
335 | new_hash_mask = new_entries - 1; | ||
336 | old_hash = tbl->hash_buckets; | ||
337 | |||
338 | get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); | ||
339 | for (i = 0; i < old_entries; i++) { | ||
340 | struct neighbour *n, *next; | ||
341 | |||
342 | for (n = old_hash[i]; n; n = next) { | ||
343 | unsigned int hash_val = tbl->hash(n->primary_key, n->dev); | ||
344 | |||
345 | hash_val &= new_hash_mask; | ||
346 | next = n->next; | ||
347 | |||
348 | n->next = new_hash[hash_val]; | ||
349 | new_hash[hash_val] = n; | ||
350 | } | ||
351 | } | ||
352 | tbl->hash_buckets = new_hash; | ||
353 | tbl->hash_mask = new_hash_mask; | ||
354 | |||
355 | neigh_hash_free(old_hash, old_entries); | ||
356 | } | ||
357 | |||
358 | struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, | ||
359 | struct net_device *dev) | ||
360 | { | ||
361 | struct neighbour *n; | ||
362 | int key_len = tbl->key_len; | ||
363 | u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; | ||
364 | |||
365 | NEIGH_CACHE_STAT_INC(tbl, lookups); | ||
366 | |||
367 | read_lock_bh(&tbl->lock); | ||
368 | for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { | ||
369 | if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { | ||
370 | neigh_hold(n); | ||
371 | NEIGH_CACHE_STAT_INC(tbl, hits); | ||
372 | break; | ||
373 | } | ||
374 | } | ||
375 | read_unlock_bh(&tbl->lock); | ||
376 | return n; | ||
377 | } | ||
378 | |||
379 | struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) | ||
380 | { | ||
381 | struct neighbour *n; | ||
382 | int key_len = tbl->key_len; | ||
383 | u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask; | ||
384 | |||
385 | NEIGH_CACHE_STAT_INC(tbl, lookups); | ||
386 | |||
387 | read_lock_bh(&tbl->lock); | ||
388 | for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { | ||
389 | if (!memcmp(n->primary_key, pkey, key_len)) { | ||
390 | neigh_hold(n); | ||
391 | NEIGH_CACHE_STAT_INC(tbl, hits); | ||
392 | break; | ||
393 | } | ||
394 | } | ||
395 | read_unlock_bh(&tbl->lock); | ||
396 | return n; | ||
397 | } | ||
398 | |||
399 | struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | ||
400 | struct net_device *dev) | ||
401 | { | ||
402 | u32 hash_val; | ||
403 | int key_len = tbl->key_len; | ||
404 | int error; | ||
405 | struct neighbour *n1, *rc, *n = neigh_alloc(tbl); | ||
406 | |||
407 | if (!n) { | ||
408 | rc = ERR_PTR(-ENOBUFS); | ||
409 | goto out; | ||
410 | } | ||
411 | |||
412 | memcpy(n->primary_key, pkey, key_len); | ||
413 | n->dev = dev; | ||
414 | dev_hold(dev); | ||
415 | |||
416 | /* Protocol specific setup. */ | ||
417 | if (tbl->constructor && (error = tbl->constructor(n)) < 0) { | ||
418 | rc = ERR_PTR(error); | ||
419 | goto out_neigh_release; | ||
420 | } | ||
421 | |||
422 | /* Device specific setup. */ | ||
423 | if (n->parms->neigh_setup && | ||
424 | (error = n->parms->neigh_setup(n)) < 0) { | ||
425 | rc = ERR_PTR(error); | ||
426 | goto out_neigh_release; | ||
427 | } | ||
428 | |||
429 | n->confirmed = jiffies - (n->parms->base_reachable_time << 1); | ||
430 | |||
431 | write_lock_bh(&tbl->lock); | ||
432 | |||
433 | if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) | ||
434 | neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); | ||
435 | |||
436 | hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; | ||
437 | |||
438 | if (n->parms->dead) { | ||
439 | rc = ERR_PTR(-EINVAL); | ||
440 | goto out_tbl_unlock; | ||
441 | } | ||
442 | |||
443 | for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { | ||
444 | if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { | ||
445 | neigh_hold(n1); | ||
446 | rc = n1; | ||
447 | goto out_tbl_unlock; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | n->next = tbl->hash_buckets[hash_val]; | ||
452 | tbl->hash_buckets[hash_val] = n; | ||
453 | n->dead = 0; | ||
454 | neigh_hold(n); | ||
455 | write_unlock_bh(&tbl->lock); | ||
456 | NEIGH_PRINTK2("neigh %p is created.\n", n); | ||
457 | rc = n; | ||
458 | out: | ||
459 | return rc; | ||
460 | out_tbl_unlock: | ||
461 | write_unlock_bh(&tbl->lock); | ||
462 | out_neigh_release: | ||
463 | neigh_release(n); | ||
464 | goto out; | ||
465 | } | ||
466 | |||
467 | struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, | ||
468 | struct net_device *dev, int creat) | ||
469 | { | ||
470 | struct pneigh_entry *n; | ||
471 | int key_len = tbl->key_len; | ||
472 | u32 hash_val = *(u32 *)(pkey + key_len - 4); | ||
473 | |||
474 | hash_val ^= (hash_val >> 16); | ||
475 | hash_val ^= hash_val >> 8; | ||
476 | hash_val ^= hash_val >> 4; | ||
477 | hash_val &= PNEIGH_HASHMASK; | ||
478 | |||
479 | read_lock_bh(&tbl->lock); | ||
480 | |||
481 | for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { | ||
482 | if (!memcmp(n->key, pkey, key_len) && | ||
483 | (n->dev == dev || !n->dev)) { | ||
484 | read_unlock_bh(&tbl->lock); | ||
485 | goto out; | ||
486 | } | ||
487 | } | ||
488 | read_unlock_bh(&tbl->lock); | ||
489 | n = NULL; | ||
490 | if (!creat) | ||
491 | goto out; | ||
492 | |||
493 | n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); | ||
494 | if (!n) | ||
495 | goto out; | ||
496 | |||
497 | memcpy(n->key, pkey, key_len); | ||
498 | n->dev = dev; | ||
499 | if (dev) | ||
500 | dev_hold(dev); | ||
501 | |||
502 | if (tbl->pconstructor && tbl->pconstructor(n)) { | ||
503 | if (dev) | ||
504 | dev_put(dev); | ||
505 | kfree(n); | ||
506 | n = NULL; | ||
507 | goto out; | ||
508 | } | ||
509 | |||
510 | write_lock_bh(&tbl->lock); | ||
511 | n->next = tbl->phash_buckets[hash_val]; | ||
512 | tbl->phash_buckets[hash_val] = n; | ||
513 | write_unlock_bh(&tbl->lock); | ||
514 | out: | ||
515 | return n; | ||
516 | } | ||
517 | |||
518 | |||
519 | int pneigh_delete(struct neigh_table *tbl, const void *pkey, | ||
520 | struct net_device *dev) | ||
521 | { | ||
522 | struct pneigh_entry *n, **np; | ||
523 | int key_len = tbl->key_len; | ||
524 | u32 hash_val = *(u32 *)(pkey + key_len - 4); | ||
525 | |||
526 | hash_val ^= (hash_val >> 16); | ||
527 | hash_val ^= hash_val >> 8; | ||
528 | hash_val ^= hash_val >> 4; | ||
529 | hash_val &= PNEIGH_HASHMASK; | ||
530 | |||
531 | write_lock_bh(&tbl->lock); | ||
532 | for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; | ||
533 | np = &n->next) { | ||
534 | if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { | ||
535 | *np = n->next; | ||
536 | write_unlock_bh(&tbl->lock); | ||
537 | if (tbl->pdestructor) | ||
538 | tbl->pdestructor(n); | ||
539 | if (n->dev) | ||
540 | dev_put(n->dev); | ||
541 | kfree(n); | ||
542 | return 0; | ||
543 | } | ||
544 | } | ||
545 | write_unlock_bh(&tbl->lock); | ||
546 | return -ENOENT; | ||
547 | } | ||
548 | |||
549 | static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) | ||
550 | { | ||
551 | struct pneigh_entry *n, **np; | ||
552 | u32 h; | ||
553 | |||
554 | for (h = 0; h <= PNEIGH_HASHMASK; h++) { | ||
555 | np = &tbl->phash_buckets[h]; | ||
556 | while ((n = *np) != NULL) { | ||
557 | if (!dev || n->dev == dev) { | ||
558 | *np = n->next; | ||
559 | if (tbl->pdestructor) | ||
560 | tbl->pdestructor(n); | ||
561 | if (n->dev) | ||
562 | dev_put(n->dev); | ||
563 | kfree(n); | ||
564 | continue; | ||
565 | } | ||
566 | np = &n->next; | ||
567 | } | ||
568 | } | ||
569 | return -ENOENT; | ||
570 | } | ||
571 | |||
572 | |||
573 | /* | ||
574 | * neighbour must already be out of the table; | ||
575 | * | ||
576 | */ | ||
577 | void neigh_destroy(struct neighbour *neigh) | ||
578 | { | ||
579 | struct hh_cache *hh; | ||
580 | |||
581 | NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); | ||
582 | |||
583 | if (!neigh->dead) { | ||
584 | printk(KERN_WARNING | ||
585 | "Destroying alive neighbour %p\n", neigh); | ||
586 | dump_stack(); | ||
587 | return; | ||
588 | } | ||
589 | |||
590 | if (neigh_del_timer(neigh)) | ||
591 | printk(KERN_WARNING "Impossible event.\n"); | ||
592 | |||
593 | while ((hh = neigh->hh) != NULL) { | ||
594 | neigh->hh = hh->hh_next; | ||
595 | hh->hh_next = NULL; | ||
596 | write_lock_bh(&hh->hh_lock); | ||
597 | hh->hh_output = neigh_blackhole; | ||
598 | write_unlock_bh(&hh->hh_lock); | ||
599 | if (atomic_dec_and_test(&hh->hh_refcnt)) | ||
600 | kfree(hh); | ||
601 | } | ||
602 | |||
603 | if (neigh->ops && neigh->ops->destructor) | ||
604 | (neigh->ops->destructor)(neigh); | ||
605 | |||
606 | skb_queue_purge(&neigh->arp_queue); | ||
607 | |||
608 | dev_put(neigh->dev); | ||
609 | neigh_parms_put(neigh->parms); | ||
610 | |||
611 | NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); | ||
612 | |||
613 | atomic_dec(&neigh->tbl->entries); | ||
614 | kmem_cache_free(neigh->tbl->kmem_cachep, neigh); | ||
615 | } | ||
616 | |||
617 | /* Neighbour state is suspicious; | ||
618 | disable fast path. | ||
619 | |||
620 | Called with write_locked neigh. | ||
621 | */ | ||
622 | static void neigh_suspect(struct neighbour *neigh) | ||
623 | { | ||
624 | struct hh_cache *hh; | ||
625 | |||
626 | NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); | ||
627 | |||
628 | neigh->output = neigh->ops->output; | ||
629 | |||
630 | for (hh = neigh->hh; hh; hh = hh->hh_next) | ||
631 | hh->hh_output = neigh->ops->output; | ||
632 | } | ||
633 | |||
634 | /* Neighbour state is OK; | ||
635 | enable fast path. | ||
636 | |||
637 | Called with write_locked neigh. | ||
638 | */ | ||
639 | static void neigh_connect(struct neighbour *neigh) | ||
640 | { | ||
641 | struct hh_cache *hh; | ||
642 | |||
643 | NEIGH_PRINTK2("neigh %p is connected.\n", neigh); | ||
644 | |||
645 | neigh->output = neigh->ops->connected_output; | ||
646 | |||
647 | for (hh = neigh->hh; hh; hh = hh->hh_next) | ||
648 | hh->hh_output = neigh->ops->hh_output; | ||
649 | } | ||
650 | |||
651 | static void neigh_periodic_timer(unsigned long arg) | ||
652 | { | ||
653 | struct neigh_table *tbl = (struct neigh_table *)arg; | ||
654 | struct neighbour *n, **np; | ||
655 | unsigned long expire, now = jiffies; | ||
656 | |||
657 | NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); | ||
658 | |||
659 | write_lock(&tbl->lock); | ||
660 | |||
661 | /* | ||
662 | * periodically recompute ReachableTime from random function | ||
663 | */ | ||
664 | |||
665 | if (time_after(now, tbl->last_rand + 300 * HZ)) { | ||
666 | struct neigh_parms *p; | ||
667 | tbl->last_rand = now; | ||
668 | for (p = &tbl->parms; p; p = p->next) | ||
669 | p->reachable_time = | ||
670 | neigh_rand_reach_time(p->base_reachable_time); | ||
671 | } | ||
672 | |||
673 | np = &tbl->hash_buckets[tbl->hash_chain_gc]; | ||
674 | tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask); | ||
675 | |||
676 | while ((n = *np) != NULL) { | ||
677 | unsigned int state; | ||
678 | |||
679 | write_lock(&n->lock); | ||
680 | |||
681 | state = n->nud_state; | ||
682 | if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { | ||
683 | write_unlock(&n->lock); | ||
684 | goto next_elt; | ||
685 | } | ||
686 | |||
687 | if (time_before(n->used, n->confirmed)) | ||
688 | n->used = n->confirmed; | ||
689 | |||
690 | if (atomic_read(&n->refcnt) == 1 && | ||
691 | (state == NUD_FAILED || | ||
692 | time_after(now, n->used + n->parms->gc_staletime))) { | ||
693 | *np = n->next; | ||
694 | n->dead = 1; | ||
695 | write_unlock(&n->lock); | ||
696 | neigh_release(n); | ||
697 | continue; | ||
698 | } | ||
699 | write_unlock(&n->lock); | ||
700 | |||
701 | next_elt: | ||
702 | np = &n->next; | ||
703 | } | ||
704 | |||
705 | /* Cycle through all hash buckets every base_reachable_time/2 ticks. | ||
706 | * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 | ||
707 | * base_reachable_time. | ||
708 | */ | ||
709 | expire = tbl->parms.base_reachable_time >> 1; | ||
710 | expire /= (tbl->hash_mask + 1); | ||
711 | if (!expire) | ||
712 | expire = 1; | ||
713 | |||
714 | mod_timer(&tbl->gc_timer, now + expire); | ||
715 | |||
716 | write_unlock(&tbl->lock); | ||
717 | } | ||
718 | |||
719 | static __inline__ int neigh_max_probes(struct neighbour *n) | ||
720 | { | ||
721 | struct neigh_parms *p = n->parms; | ||
722 | return (n->nud_state & NUD_PROBE ? | ||
723 | p->ucast_probes : | ||
724 | p->ucast_probes + p->app_probes + p->mcast_probes); | ||
725 | } | ||
726 | |||
727 | |||
728 | /* Called when a timer expires for a neighbour entry. */ | ||
729 | |||
730 | static void neigh_timer_handler(unsigned long arg) | ||
731 | { | ||
732 | unsigned long now, next; | ||
733 | struct neighbour *neigh = (struct neighbour *)arg; | ||
734 | unsigned state; | ||
735 | int notify = 0; | ||
736 | |||
737 | write_lock(&neigh->lock); | ||
738 | |||
739 | state = neigh->nud_state; | ||
740 | now = jiffies; | ||
741 | next = now + HZ; | ||
742 | |||
743 | if (!(state & NUD_IN_TIMER)) { | ||
744 | #ifndef CONFIG_SMP | ||
745 | printk(KERN_WARNING "neigh: timer & !nud_in_timer\n"); | ||
746 | #endif | ||
747 | goto out; | ||
748 | } | ||
749 | |||
750 | if (state & NUD_REACHABLE) { | ||
751 | if (time_before_eq(now, | ||
752 | neigh->confirmed + neigh->parms->reachable_time)) { | ||
753 | NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); | ||
754 | next = neigh->confirmed + neigh->parms->reachable_time; | ||
755 | } else if (time_before_eq(now, | ||
756 | neigh->used + neigh->parms->delay_probe_time)) { | ||
757 | NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); | ||
758 | neigh->nud_state = NUD_DELAY; | ||
759 | neigh_suspect(neigh); | ||
760 | next = now + neigh->parms->delay_probe_time; | ||
761 | } else { | ||
762 | NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); | ||
763 | neigh->nud_state = NUD_STALE; | ||
764 | neigh_suspect(neigh); | ||
765 | } | ||
766 | } else if (state & NUD_DELAY) { | ||
767 | if (time_before_eq(now, | ||
768 | neigh->confirmed + neigh->parms->delay_probe_time)) { | ||
769 | NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); | ||
770 | neigh->nud_state = NUD_REACHABLE; | ||
771 | neigh_connect(neigh); | ||
772 | next = neigh->confirmed + neigh->parms->reachable_time; | ||
773 | } else { | ||
774 | NEIGH_PRINTK2("neigh %p is probed.\n", neigh); | ||
775 | neigh->nud_state = NUD_PROBE; | ||
776 | atomic_set(&neigh->probes, 0); | ||
777 | next = now + neigh->parms->retrans_time; | ||
778 | } | ||
779 | } else { | ||
780 | /* NUD_PROBE|NUD_INCOMPLETE */ | ||
781 | next = now + neigh->parms->retrans_time; | ||
782 | } | ||
783 | |||
784 | if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && | ||
785 | atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { | ||
786 | struct sk_buff *skb; | ||
787 | |||
788 | neigh->nud_state = NUD_FAILED; | ||
789 | notify = 1; | ||
790 | NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); | ||
791 | NEIGH_PRINTK2("neigh %p is failed.\n", neigh); | ||
792 | |||
793 | /* It is very thin place. report_unreachable is very complicated | ||
794 | routine. Particularly, it can hit the same neighbour entry! | ||
795 | |||
796 | So that, we try to be accurate and avoid dead loop. --ANK | ||
797 | */ | ||
798 | while (neigh->nud_state == NUD_FAILED && | ||
799 | (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { | ||
800 | write_unlock(&neigh->lock); | ||
801 | neigh->ops->error_report(neigh, skb); | ||
802 | write_lock(&neigh->lock); | ||
803 | } | ||
804 | skb_queue_purge(&neigh->arp_queue); | ||
805 | } | ||
806 | |||
807 | if (neigh->nud_state & NUD_IN_TIMER) { | ||
808 | neigh_hold(neigh); | ||
809 | if (time_before(next, jiffies + HZ/2)) | ||
810 | next = jiffies + HZ/2; | ||
811 | neigh->timer.expires = next; | ||
812 | add_timer(&neigh->timer); | ||
813 | } | ||
814 | if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { | ||
815 | struct sk_buff *skb = skb_peek(&neigh->arp_queue); | ||
816 | /* keep skb alive even if arp_queue overflows */ | ||
817 | if (skb) | ||
818 | skb_get(skb); | ||
819 | write_unlock(&neigh->lock); | ||
820 | neigh->ops->solicit(neigh, skb); | ||
821 | atomic_inc(&neigh->probes); | ||
822 | if (skb) | ||
823 | kfree_skb(skb); | ||
824 | } else { | ||
825 | out: | ||
826 | write_unlock(&neigh->lock); | ||
827 | } | ||
828 | |||
829 | #ifdef CONFIG_ARPD | ||
830 | if (notify && neigh->parms->app_probes) | ||
831 | neigh_app_notify(neigh); | ||
832 | #endif | ||
833 | neigh_release(neigh); | ||
834 | } | ||
835 | |||
836 | int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) | ||
837 | { | ||
838 | int rc; | ||
839 | unsigned long now; | ||
840 | |||
841 | write_lock_bh(&neigh->lock); | ||
842 | |||
843 | rc = 0; | ||
844 | if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) | ||
845 | goto out_unlock_bh; | ||
846 | |||
847 | now = jiffies; | ||
848 | |||
849 | if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { | ||
850 | if (neigh->parms->mcast_probes + neigh->parms->app_probes) { | ||
851 | atomic_set(&neigh->probes, neigh->parms->ucast_probes); | ||
852 | neigh->nud_state = NUD_INCOMPLETE; | ||
853 | neigh_hold(neigh); | ||
854 | neigh->timer.expires = now + 1; | ||
855 | add_timer(&neigh->timer); | ||
856 | } else { | ||
857 | neigh->nud_state = NUD_FAILED; | ||
858 | write_unlock_bh(&neigh->lock); | ||
859 | |||
860 | if (skb) | ||
861 | kfree_skb(skb); | ||
862 | return 1; | ||
863 | } | ||
864 | } else if (neigh->nud_state & NUD_STALE) { | ||
865 | NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); | ||
866 | neigh_hold(neigh); | ||
867 | neigh->nud_state = NUD_DELAY; | ||
868 | neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; | ||
869 | add_timer(&neigh->timer); | ||
870 | } | ||
871 | |||
872 | if (neigh->nud_state == NUD_INCOMPLETE) { | ||
873 | if (skb) { | ||
874 | if (skb_queue_len(&neigh->arp_queue) >= | ||
875 | neigh->parms->queue_len) { | ||
876 | struct sk_buff *buff; | ||
877 | buff = neigh->arp_queue.next; | ||
878 | __skb_unlink(buff, &neigh->arp_queue); | ||
879 | kfree_skb(buff); | ||
880 | } | ||
881 | __skb_queue_tail(&neigh->arp_queue, skb); | ||
882 | } | ||
883 | rc = 1; | ||
884 | } | ||
885 | out_unlock_bh: | ||
886 | write_unlock_bh(&neigh->lock); | ||
887 | return rc; | ||
888 | } | ||
889 | |||
890 | static __inline__ void neigh_update_hhs(struct neighbour *neigh) | ||
891 | { | ||
892 | struct hh_cache *hh; | ||
893 | void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = | ||
894 | neigh->dev->header_cache_update; | ||
895 | |||
896 | if (update) { | ||
897 | for (hh = neigh->hh; hh; hh = hh->hh_next) { | ||
898 | write_lock_bh(&hh->hh_lock); | ||
899 | update(hh, neigh->dev, neigh->ha); | ||
900 | write_unlock_bh(&hh->hh_lock); | ||
901 | } | ||
902 | } | ||
903 | } | ||
904 | |||
905 | |||
906 | |||
907 | /* Generic update routine. | ||
908 | -- lladdr is new lladdr or NULL, if it is not supplied. | ||
909 | -- new is new state. | ||
910 | -- flags | ||
911 | NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, | ||
912 | if it is different. | ||
913 | NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" | ||
914 | lladdr instead of overriding it | ||
915 | if it is different. | ||
916 | It also allows to retain current state | ||
917 | if lladdr is unchanged. | ||
918 | NEIGH_UPDATE_F_ADMIN means that the change is administrative. | ||
919 | |||
920 | NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing | ||
921 | NTF_ROUTER flag. | ||
922 | NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as | ||
923 | a router. | ||
924 | |||
925 | Caller MUST hold reference count on the entry. | ||
926 | */ | ||
927 | |||
928 | int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | ||
929 | u32 flags) | ||
930 | { | ||
931 | u8 old; | ||
932 | int err; | ||
933 | #ifdef CONFIG_ARPD | ||
934 | int notify = 0; | ||
935 | #endif | ||
936 | struct net_device *dev; | ||
937 | int update_isrouter = 0; | ||
938 | |||
939 | write_lock_bh(&neigh->lock); | ||
940 | |||
941 | dev = neigh->dev; | ||
942 | old = neigh->nud_state; | ||
943 | err = -EPERM; | ||
944 | |||
945 | if (!(flags & NEIGH_UPDATE_F_ADMIN) && | ||
946 | (old & (NUD_NOARP | NUD_PERMANENT))) | ||
947 | goto out; | ||
948 | |||
949 | if (!(new & NUD_VALID)) { | ||
950 | neigh_del_timer(neigh); | ||
951 | if (old & NUD_CONNECTED) | ||
952 | neigh_suspect(neigh); | ||
953 | neigh->nud_state = new; | ||
954 | err = 0; | ||
955 | #ifdef CONFIG_ARPD | ||
956 | notify = old & NUD_VALID; | ||
957 | #endif | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | /* Compare new lladdr with cached one */ | ||
962 | if (!dev->addr_len) { | ||
963 | /* First case: device needs no address. */ | ||
964 | lladdr = neigh->ha; | ||
965 | } else if (lladdr) { | ||
966 | /* The second case: if something is already cached | ||
967 | and a new address is proposed: | ||
968 | - compare new & old | ||
969 | - if they are different, check override flag | ||
970 | */ | ||
971 | if ((old & NUD_VALID) && | ||
972 | !memcmp(lladdr, neigh->ha, dev->addr_len)) | ||
973 | lladdr = neigh->ha; | ||
974 | } else { | ||
975 | /* No address is supplied; if we know something, | ||
976 | use it, otherwise discard the request. | ||
977 | */ | ||
978 | err = -EINVAL; | ||
979 | if (!(old & NUD_VALID)) | ||
980 | goto out; | ||
981 | lladdr = neigh->ha; | ||
982 | } | ||
983 | |||
984 | if (new & NUD_CONNECTED) | ||
985 | neigh->confirmed = jiffies; | ||
986 | neigh->updated = jiffies; | ||
987 | |||
988 | /* If entry was valid and address is not changed, | ||
989 | do not change entry state, if new one is STALE. | ||
990 | */ | ||
991 | err = 0; | ||
992 | update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; | ||
993 | if (old & NUD_VALID) { | ||
994 | if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { | ||
995 | update_isrouter = 0; | ||
996 | if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && | ||
997 | (old & NUD_CONNECTED)) { | ||
998 | lladdr = neigh->ha; | ||
999 | new = NUD_STALE; | ||
1000 | } else | ||
1001 | goto out; | ||
1002 | } else { | ||
1003 | if (lladdr == neigh->ha && new == NUD_STALE && | ||
1004 | ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) || | ||
1005 | (old & NUD_CONNECTED)) | ||
1006 | ) | ||
1007 | new = old; | ||
1008 | } | ||
1009 | } | ||
1010 | |||
1011 | if (new != old) { | ||
1012 | neigh_del_timer(neigh); | ||
1013 | if (new & NUD_IN_TIMER) { | ||
1014 | neigh_hold(neigh); | ||
1015 | neigh->timer.expires = jiffies + | ||
1016 | ((new & NUD_REACHABLE) ? | ||
1017 | neigh->parms->reachable_time : 0); | ||
1018 | add_timer(&neigh->timer); | ||
1019 | } | ||
1020 | neigh->nud_state = new; | ||
1021 | } | ||
1022 | |||
1023 | if (lladdr != neigh->ha) { | ||
1024 | memcpy(&neigh->ha, lladdr, dev->addr_len); | ||
1025 | neigh_update_hhs(neigh); | ||
1026 | if (!(new & NUD_CONNECTED)) | ||
1027 | neigh->confirmed = jiffies - | ||
1028 | (neigh->parms->base_reachable_time << 1); | ||
1029 | #ifdef CONFIG_ARPD | ||
1030 | notify = 1; | ||
1031 | #endif | ||
1032 | } | ||
1033 | if (new == old) | ||
1034 | goto out; | ||
1035 | if (new & NUD_CONNECTED) | ||
1036 | neigh_connect(neigh); | ||
1037 | else | ||
1038 | neigh_suspect(neigh); | ||
1039 | if (!(old & NUD_VALID)) { | ||
1040 | struct sk_buff *skb; | ||
1041 | |||
1042 | /* Again: avoid dead loop if something went wrong */ | ||
1043 | |||
1044 | while (neigh->nud_state & NUD_VALID && | ||
1045 | (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { | ||
1046 | struct neighbour *n1 = neigh; | ||
1047 | write_unlock_bh(&neigh->lock); | ||
1048 | /* On shaper/eql skb->dst->neighbour != neigh :( */ | ||
1049 | if (skb->dst && skb->dst->neighbour) | ||
1050 | n1 = skb->dst->neighbour; | ||
1051 | n1->output(skb); | ||
1052 | write_lock_bh(&neigh->lock); | ||
1053 | } | ||
1054 | skb_queue_purge(&neigh->arp_queue); | ||
1055 | } | ||
1056 | out: | ||
1057 | if (update_isrouter) { | ||
1058 | neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ? | ||
1059 | (neigh->flags | NTF_ROUTER) : | ||
1060 | (neigh->flags & ~NTF_ROUTER); | ||
1061 | } | ||
1062 | write_unlock_bh(&neigh->lock); | ||
1063 | #ifdef CONFIG_ARPD | ||
1064 | if (notify && neigh->parms->app_probes) | ||
1065 | neigh_app_notify(neigh); | ||
1066 | #endif | ||
1067 | return err; | ||
1068 | } | ||
1069 | |||
1070 | struct neighbour *neigh_event_ns(struct neigh_table *tbl, | ||
1071 | u8 *lladdr, void *saddr, | ||
1072 | struct net_device *dev) | ||
1073 | { | ||
1074 | struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, | ||
1075 | lladdr || !dev->addr_len); | ||
1076 | if (neigh) | ||
1077 | neigh_update(neigh, lladdr, NUD_STALE, | ||
1078 | NEIGH_UPDATE_F_OVERRIDE); | ||
1079 | return neigh; | ||
1080 | } | ||
1081 | |||
1082 | static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, | ||
1083 | u16 protocol) | ||
1084 | { | ||
1085 | struct hh_cache *hh; | ||
1086 | struct net_device *dev = dst->dev; | ||
1087 | |||
1088 | for (hh = n->hh; hh; hh = hh->hh_next) | ||
1089 | if (hh->hh_type == protocol) | ||
1090 | break; | ||
1091 | |||
1092 | if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { | ||
1093 | memset(hh, 0, sizeof(struct hh_cache)); | ||
1094 | rwlock_init(&hh->hh_lock); | ||
1095 | hh->hh_type = protocol; | ||
1096 | atomic_set(&hh->hh_refcnt, 0); | ||
1097 | hh->hh_next = NULL; | ||
1098 | if (dev->hard_header_cache(n, hh)) { | ||
1099 | kfree(hh); | ||
1100 | hh = NULL; | ||
1101 | } else { | ||
1102 | atomic_inc(&hh->hh_refcnt); | ||
1103 | hh->hh_next = n->hh; | ||
1104 | n->hh = hh; | ||
1105 | if (n->nud_state & NUD_CONNECTED) | ||
1106 | hh->hh_output = n->ops->hh_output; | ||
1107 | else | ||
1108 | hh->hh_output = n->ops->output; | ||
1109 | } | ||
1110 | } | ||
1111 | if (hh) { | ||
1112 | atomic_inc(&hh->hh_refcnt); | ||
1113 | dst->hh = hh; | ||
1114 | } | ||
1115 | } | ||
1116 | |||
1117 | /* This function can be used in contexts, where only old dev_queue_xmit | ||
1118 | worked, f.e. if you want to override normal output path (eql, shaper), | ||
1119 | but resolution is not made yet. | ||
1120 | */ | ||
1121 | |||
1122 | int neigh_compat_output(struct sk_buff *skb) | ||
1123 | { | ||
1124 | struct net_device *dev = skb->dev; | ||
1125 | |||
1126 | __skb_pull(skb, skb->nh.raw - skb->data); | ||
1127 | |||
1128 | if (dev->hard_header && | ||
1129 | dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, | ||
1130 | skb->len) < 0 && | ||
1131 | dev->rebuild_header(skb)) | ||
1132 | return 0; | ||
1133 | |||
1134 | return dev_queue_xmit(skb); | ||
1135 | } | ||
1136 | |||
1137 | /* Slow and careful. */ | ||
1138 | |||
1139 | int neigh_resolve_output(struct sk_buff *skb) | ||
1140 | { | ||
1141 | struct dst_entry *dst = skb->dst; | ||
1142 | struct neighbour *neigh; | ||
1143 | int rc = 0; | ||
1144 | |||
1145 | if (!dst || !(neigh = dst->neighbour)) | ||
1146 | goto discard; | ||
1147 | |||
1148 | __skb_pull(skb, skb->nh.raw - skb->data); | ||
1149 | |||
1150 | if (!neigh_event_send(neigh, skb)) { | ||
1151 | int err; | ||
1152 | struct net_device *dev = neigh->dev; | ||
1153 | if (dev->hard_header_cache && !dst->hh) { | ||
1154 | write_lock_bh(&neigh->lock); | ||
1155 | if (!dst->hh) | ||
1156 | neigh_hh_init(neigh, dst, dst->ops->protocol); | ||
1157 | err = dev->hard_header(skb, dev, ntohs(skb->protocol), | ||
1158 | neigh->ha, NULL, skb->len); | ||
1159 | write_unlock_bh(&neigh->lock); | ||
1160 | } else { | ||
1161 | read_lock_bh(&neigh->lock); | ||
1162 | err = dev->hard_header(skb, dev, ntohs(skb->protocol), | ||
1163 | neigh->ha, NULL, skb->len); | ||
1164 | read_unlock_bh(&neigh->lock); | ||
1165 | } | ||
1166 | if (err >= 0) | ||
1167 | rc = neigh->ops->queue_xmit(skb); | ||
1168 | else | ||
1169 | goto out_kfree_skb; | ||
1170 | } | ||
1171 | out: | ||
1172 | return rc; | ||
1173 | discard: | ||
1174 | NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", | ||
1175 | dst, dst ? dst->neighbour : NULL); | ||
1176 | out_kfree_skb: | ||
1177 | rc = -EINVAL; | ||
1178 | kfree_skb(skb); | ||
1179 | goto out; | ||
1180 | } | ||
1181 | |||
1182 | /* As fast as possible without hh cache */ | ||
1183 | |||
1184 | int neigh_connected_output(struct sk_buff *skb) | ||
1185 | { | ||
1186 | int err; | ||
1187 | struct dst_entry *dst = skb->dst; | ||
1188 | struct neighbour *neigh = dst->neighbour; | ||
1189 | struct net_device *dev = neigh->dev; | ||
1190 | |||
1191 | __skb_pull(skb, skb->nh.raw - skb->data); | ||
1192 | |||
1193 | read_lock_bh(&neigh->lock); | ||
1194 | err = dev->hard_header(skb, dev, ntohs(skb->protocol), | ||
1195 | neigh->ha, NULL, skb->len); | ||
1196 | read_unlock_bh(&neigh->lock); | ||
1197 | if (err >= 0) | ||
1198 | err = neigh->ops->queue_xmit(skb); | ||
1199 | else { | ||
1200 | err = -EINVAL; | ||
1201 | kfree_skb(skb); | ||
1202 | } | ||
1203 | return err; | ||
1204 | } | ||
1205 | |||
1206 | static void neigh_proxy_process(unsigned long arg) | ||
1207 | { | ||
1208 | struct neigh_table *tbl = (struct neigh_table *)arg; | ||
1209 | long sched_next = 0; | ||
1210 | unsigned long now = jiffies; | ||
1211 | struct sk_buff *skb; | ||
1212 | |||
1213 | spin_lock(&tbl->proxy_queue.lock); | ||
1214 | |||
1215 | skb = tbl->proxy_queue.next; | ||
1216 | |||
1217 | while (skb != (struct sk_buff *)&tbl->proxy_queue) { | ||
1218 | struct sk_buff *back = skb; | ||
1219 | long tdif = back->stamp.tv_usec - now; | ||
1220 | |||
1221 | skb = skb->next; | ||
1222 | if (tdif <= 0) { | ||
1223 | struct net_device *dev = back->dev; | ||
1224 | __skb_unlink(back, &tbl->proxy_queue); | ||
1225 | if (tbl->proxy_redo && netif_running(dev)) | ||
1226 | tbl->proxy_redo(back); | ||
1227 | else | ||
1228 | kfree_skb(back); | ||
1229 | |||
1230 | dev_put(dev); | ||
1231 | } else if (!sched_next || tdif < sched_next) | ||
1232 | sched_next = tdif; | ||
1233 | } | ||
1234 | del_timer(&tbl->proxy_timer); | ||
1235 | if (sched_next) | ||
1236 | mod_timer(&tbl->proxy_timer, jiffies + sched_next); | ||
1237 | spin_unlock(&tbl->proxy_queue.lock); | ||
1238 | } | ||
1239 | |||
1240 | void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, | ||
1241 | struct sk_buff *skb) | ||
1242 | { | ||
1243 | unsigned long now = jiffies; | ||
1244 | unsigned long sched_next = now + (net_random() % p->proxy_delay); | ||
1245 | |||
1246 | if (tbl->proxy_queue.qlen > p->proxy_qlen) { | ||
1247 | kfree_skb(skb); | ||
1248 | return; | ||
1249 | } | ||
1250 | skb->stamp.tv_sec = LOCALLY_ENQUEUED; | ||
1251 | skb->stamp.tv_usec = sched_next; | ||
1252 | |||
1253 | spin_lock(&tbl->proxy_queue.lock); | ||
1254 | if (del_timer(&tbl->proxy_timer)) { | ||
1255 | if (time_before(tbl->proxy_timer.expires, sched_next)) | ||
1256 | sched_next = tbl->proxy_timer.expires; | ||
1257 | } | ||
1258 | dst_release(skb->dst); | ||
1259 | skb->dst = NULL; | ||
1260 | dev_hold(skb->dev); | ||
1261 | __skb_queue_tail(&tbl->proxy_queue, skb); | ||
1262 | mod_timer(&tbl->proxy_timer, sched_next); | ||
1263 | spin_unlock(&tbl->proxy_queue.lock); | ||
1264 | } | ||
1265 | |||
1266 | |||
1267 | struct neigh_parms *neigh_parms_alloc(struct net_device *dev, | ||
1268 | struct neigh_table *tbl) | ||
1269 | { | ||
1270 | struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL); | ||
1271 | |||
1272 | if (p) { | ||
1273 | memcpy(p, &tbl->parms, sizeof(*p)); | ||
1274 | p->tbl = tbl; | ||
1275 | atomic_set(&p->refcnt, 1); | ||
1276 | INIT_RCU_HEAD(&p->rcu_head); | ||
1277 | p->reachable_time = | ||
1278 | neigh_rand_reach_time(p->base_reachable_time); | ||
1279 | if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) { | ||
1280 | kfree(p); | ||
1281 | return NULL; | ||
1282 | } | ||
1283 | p->sysctl_table = NULL; | ||
1284 | write_lock_bh(&tbl->lock); | ||
1285 | p->next = tbl->parms.next; | ||
1286 | tbl->parms.next = p; | ||
1287 | write_unlock_bh(&tbl->lock); | ||
1288 | } | ||
1289 | return p; | ||
1290 | } | ||
1291 | |||
1292 | static void neigh_rcu_free_parms(struct rcu_head *head) | ||
1293 | { | ||
1294 | struct neigh_parms *parms = | ||
1295 | container_of(head, struct neigh_parms, rcu_head); | ||
1296 | |||
1297 | neigh_parms_put(parms); | ||
1298 | } | ||
1299 | |||
1300 | void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) | ||
1301 | { | ||
1302 | struct neigh_parms **p; | ||
1303 | |||
1304 | if (!parms || parms == &tbl->parms) | ||
1305 | return; | ||
1306 | write_lock_bh(&tbl->lock); | ||
1307 | for (p = &tbl->parms.next; *p; p = &(*p)->next) { | ||
1308 | if (*p == parms) { | ||
1309 | *p = parms->next; | ||
1310 | parms->dead = 1; | ||
1311 | write_unlock_bh(&tbl->lock); | ||
1312 | call_rcu(&parms->rcu_head, neigh_rcu_free_parms); | ||
1313 | return; | ||
1314 | } | ||
1315 | } | ||
1316 | write_unlock_bh(&tbl->lock); | ||
1317 | NEIGH_PRINTK1("neigh_parms_release: not found\n"); | ||
1318 | } | ||
1319 | |||
1320 | void neigh_parms_destroy(struct neigh_parms *parms) | ||
1321 | { | ||
1322 | kfree(parms); | ||
1323 | } | ||
1324 | |||
1325 | |||
1326 | void neigh_table_init(struct neigh_table *tbl) | ||
1327 | { | ||
1328 | unsigned long now = jiffies; | ||
1329 | unsigned long phsize; | ||
1330 | |||
1331 | atomic_set(&tbl->parms.refcnt, 1); | ||
1332 | INIT_RCU_HEAD(&tbl->parms.rcu_head); | ||
1333 | tbl->parms.reachable_time = | ||
1334 | neigh_rand_reach_time(tbl->parms.base_reachable_time); | ||
1335 | |||
1336 | if (!tbl->kmem_cachep) | ||
1337 | tbl->kmem_cachep = kmem_cache_create(tbl->id, | ||
1338 | tbl->entry_size, | ||
1339 | 0, SLAB_HWCACHE_ALIGN, | ||
1340 | NULL, NULL); | ||
1341 | |||
1342 | if (!tbl->kmem_cachep) | ||
1343 | panic("cannot create neighbour cache"); | ||
1344 | |||
1345 | tbl->stats = alloc_percpu(struct neigh_statistics); | ||
1346 | if (!tbl->stats) | ||
1347 | panic("cannot create neighbour cache statistics"); | ||
1348 | |||
1349 | #ifdef CONFIG_PROC_FS | ||
1350 | tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); | ||
1351 | if (!tbl->pde) | ||
1352 | panic("cannot create neighbour proc dir entry"); | ||
1353 | tbl->pde->proc_fops = &neigh_stat_seq_fops; | ||
1354 | tbl->pde->data = tbl; | ||
1355 | #endif | ||
1356 | |||
1357 | tbl->hash_mask = 1; | ||
1358 | tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); | ||
1359 | |||
1360 | phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); | ||
1361 | tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL); | ||
1362 | |||
1363 | if (!tbl->hash_buckets || !tbl->phash_buckets) | ||
1364 | panic("cannot allocate neighbour cache hashes"); | ||
1365 | |||
1366 | memset(tbl->phash_buckets, 0, phsize); | ||
1367 | |||
1368 | get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); | ||
1369 | |||
1370 | rwlock_init(&tbl->lock); | ||
1371 | init_timer(&tbl->gc_timer); | ||
1372 | tbl->gc_timer.data = (unsigned long)tbl; | ||
1373 | tbl->gc_timer.function = neigh_periodic_timer; | ||
1374 | tbl->gc_timer.expires = now + 1; | ||
1375 | add_timer(&tbl->gc_timer); | ||
1376 | |||
1377 | init_timer(&tbl->proxy_timer); | ||
1378 | tbl->proxy_timer.data = (unsigned long)tbl; | ||
1379 | tbl->proxy_timer.function = neigh_proxy_process; | ||
1380 | skb_queue_head_init(&tbl->proxy_queue); | ||
1381 | |||
1382 | tbl->last_flush = now; | ||
1383 | tbl->last_rand = now + tbl->parms.reachable_time * 20; | ||
1384 | write_lock(&neigh_tbl_lock); | ||
1385 | tbl->next = neigh_tables; | ||
1386 | neigh_tables = tbl; | ||
1387 | write_unlock(&neigh_tbl_lock); | ||
1388 | } | ||
1389 | |||
1390 | int neigh_table_clear(struct neigh_table *tbl) | ||
1391 | { | ||
1392 | struct neigh_table **tp; | ||
1393 | |||
1394 | /* It is not clean... Fix it to unload IPv6 module safely */ | ||
1395 | del_timer_sync(&tbl->gc_timer); | ||
1396 | del_timer_sync(&tbl->proxy_timer); | ||
1397 | pneigh_queue_purge(&tbl->proxy_queue); | ||
1398 | neigh_ifdown(tbl, NULL); | ||
1399 | if (atomic_read(&tbl->entries)) | ||
1400 | printk(KERN_CRIT "neighbour leakage\n"); | ||
1401 | write_lock(&neigh_tbl_lock); | ||
1402 | for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { | ||
1403 | if (*tp == tbl) { | ||
1404 | *tp = tbl->next; | ||
1405 | break; | ||
1406 | } | ||
1407 | } | ||
1408 | write_unlock(&neigh_tbl_lock); | ||
1409 | |||
1410 | neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); | ||
1411 | tbl->hash_buckets = NULL; | ||
1412 | |||
1413 | kfree(tbl->phash_buckets); | ||
1414 | tbl->phash_buckets = NULL; | ||
1415 | |||
1416 | return 0; | ||
1417 | } | ||
1418 | |||
1419 | int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | ||
1420 | { | ||
1421 | struct ndmsg *ndm = NLMSG_DATA(nlh); | ||
1422 | struct rtattr **nda = arg; | ||
1423 | struct neigh_table *tbl; | ||
1424 | struct net_device *dev = NULL; | ||
1425 | int err = -ENODEV; | ||
1426 | |||
1427 | if (ndm->ndm_ifindex && | ||
1428 | (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) | ||
1429 | goto out; | ||
1430 | |||
1431 | read_lock(&neigh_tbl_lock); | ||
1432 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { | ||
1433 | struct rtattr *dst_attr = nda[NDA_DST - 1]; | ||
1434 | struct neighbour *n; | ||
1435 | |||
1436 | if (tbl->family != ndm->ndm_family) | ||
1437 | continue; | ||
1438 | read_unlock(&neigh_tbl_lock); | ||
1439 | |||
1440 | err = -EINVAL; | ||
1441 | if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) | ||
1442 | goto out_dev_put; | ||
1443 | |||
1444 | if (ndm->ndm_flags & NTF_PROXY) { | ||
1445 | err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev); | ||
1446 | goto out_dev_put; | ||
1447 | } | ||
1448 | |||
1449 | if (!dev) | ||
1450 | goto out; | ||
1451 | |||
1452 | n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); | ||
1453 | if (n) { | ||
1454 | err = neigh_update(n, NULL, NUD_FAILED, | ||
1455 | NEIGH_UPDATE_F_OVERRIDE| | ||
1456 | NEIGH_UPDATE_F_ADMIN); | ||
1457 | neigh_release(n); | ||
1458 | } | ||
1459 | goto out_dev_put; | ||
1460 | } | ||
1461 | read_unlock(&neigh_tbl_lock); | ||
1462 | err = -EADDRNOTAVAIL; | ||
1463 | out_dev_put: | ||
1464 | if (dev) | ||
1465 | dev_put(dev); | ||
1466 | out: | ||
1467 | return err; | ||
1468 | } | ||
1469 | |||
1470 | int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | ||
1471 | { | ||
1472 | struct ndmsg *ndm = NLMSG_DATA(nlh); | ||
1473 | struct rtattr **nda = arg; | ||
1474 | struct neigh_table *tbl; | ||
1475 | struct net_device *dev = NULL; | ||
1476 | int err = -ENODEV; | ||
1477 | |||
1478 | if (ndm->ndm_ifindex && | ||
1479 | (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) | ||
1480 | goto out; | ||
1481 | |||
1482 | read_lock(&neigh_tbl_lock); | ||
1483 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { | ||
1484 | struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1]; | ||
1485 | struct rtattr *dst_attr = nda[NDA_DST - 1]; | ||
1486 | int override = 1; | ||
1487 | struct neighbour *n; | ||
1488 | |||
1489 | if (tbl->family != ndm->ndm_family) | ||
1490 | continue; | ||
1491 | read_unlock(&neigh_tbl_lock); | ||
1492 | |||
1493 | err = -EINVAL; | ||
1494 | if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) | ||
1495 | goto out_dev_put; | ||
1496 | |||
1497 | if (ndm->ndm_flags & NTF_PROXY) { | ||
1498 | err = -ENOBUFS; | ||
1499 | if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1)) | ||
1500 | err = 0; | ||
1501 | goto out_dev_put; | ||
1502 | } | ||
1503 | |||
1504 | err = -EINVAL; | ||
1505 | if (!dev) | ||
1506 | goto out; | ||
1507 | if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len) | ||
1508 | goto out_dev_put; | ||
1509 | |||
1510 | n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); | ||
1511 | if (n) { | ||
1512 | if (nlh->nlmsg_flags & NLM_F_EXCL) { | ||
1513 | err = -EEXIST; | ||
1514 | neigh_release(n); | ||
1515 | goto out_dev_put; | ||
1516 | } | ||
1517 | |||
1518 | override = nlh->nlmsg_flags & NLM_F_REPLACE; | ||
1519 | } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { | ||
1520 | err = -ENOENT; | ||
1521 | goto out_dev_put; | ||
1522 | } else { | ||
1523 | n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev); | ||
1524 | if (IS_ERR(n)) { | ||
1525 | err = PTR_ERR(n); | ||
1526 | goto out_dev_put; | ||
1527 | } | ||
1528 | } | ||
1529 | |||
1530 | err = neigh_update(n, | ||
1531 | lladdr_attr ? RTA_DATA(lladdr_attr) : NULL, | ||
1532 | ndm->ndm_state, | ||
1533 | (override ? NEIGH_UPDATE_F_OVERRIDE : 0) | | ||
1534 | NEIGH_UPDATE_F_ADMIN); | ||
1535 | |||
1536 | neigh_release(n); | ||
1537 | goto out_dev_put; | ||
1538 | } | ||
1539 | |||
1540 | read_unlock(&neigh_tbl_lock); | ||
1541 | err = -EADDRNOTAVAIL; | ||
1542 | out_dev_put: | ||
1543 | if (dev) | ||
1544 | dev_put(dev); | ||
1545 | out: | ||
1546 | return err; | ||
1547 | } | ||
1548 | |||
1549 | |||
1550 | static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, | ||
1551 | u32 pid, u32 seq, int event) | ||
1552 | { | ||
1553 | unsigned long now = jiffies; | ||
1554 | unsigned char *b = skb->tail; | ||
1555 | struct nda_cacheinfo ci; | ||
1556 | int locked = 0; | ||
1557 | u32 probes; | ||
1558 | struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event, | ||
1559 | sizeof(struct ndmsg)); | ||
1560 | struct ndmsg *ndm = NLMSG_DATA(nlh); | ||
1561 | |||
1562 | nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0; | ||
1563 | ndm->ndm_family = n->ops->family; | ||
1564 | ndm->ndm_flags = n->flags; | ||
1565 | ndm->ndm_type = n->type; | ||
1566 | ndm->ndm_ifindex = n->dev->ifindex; | ||
1567 | RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); | ||
1568 | read_lock_bh(&n->lock); | ||
1569 | locked = 1; | ||
1570 | ndm->ndm_state = n->nud_state; | ||
1571 | if (n->nud_state & NUD_VALID) | ||
1572 | RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha); | ||
1573 | ci.ndm_used = now - n->used; | ||
1574 | ci.ndm_confirmed = now - n->confirmed; | ||
1575 | ci.ndm_updated = now - n->updated; | ||
1576 | ci.ndm_refcnt = atomic_read(&n->refcnt) - 1; | ||
1577 | probes = atomic_read(&n->probes); | ||
1578 | read_unlock_bh(&n->lock); | ||
1579 | locked = 0; | ||
1580 | RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); | ||
1581 | RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes); | ||
1582 | nlh->nlmsg_len = skb->tail - b; | ||
1583 | return skb->len; | ||
1584 | |||
1585 | nlmsg_failure: | ||
1586 | rtattr_failure: | ||
1587 | if (locked) | ||
1588 | read_unlock_bh(&n->lock); | ||
1589 | skb_trim(skb, b - skb->data); | ||
1590 | return -1; | ||
1591 | } | ||
1592 | |||
1593 | |||
1594 | static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, | ||
1595 | struct netlink_callback *cb) | ||
1596 | { | ||
1597 | struct neighbour *n; | ||
1598 | int rc, h, s_h = cb->args[1]; | ||
1599 | int idx, s_idx = idx = cb->args[2]; | ||
1600 | |||
1601 | for (h = 0; h <= tbl->hash_mask; h++) { | ||
1602 | if (h < s_h) | ||
1603 | continue; | ||
1604 | if (h > s_h) | ||
1605 | s_idx = 0; | ||
1606 | read_lock_bh(&tbl->lock); | ||
1607 | for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { | ||
1608 | if (idx < s_idx) | ||
1609 | continue; | ||
1610 | if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, | ||
1611 | cb->nlh->nlmsg_seq, | ||
1612 | RTM_NEWNEIGH) <= 0) { | ||
1613 | read_unlock_bh(&tbl->lock); | ||
1614 | rc = -1; | ||
1615 | goto out; | ||
1616 | } | ||
1617 | } | ||
1618 | read_unlock_bh(&tbl->lock); | ||
1619 | } | ||
1620 | rc = skb->len; | ||
1621 | out: | ||
1622 | cb->args[1] = h; | ||
1623 | cb->args[2] = idx; | ||
1624 | return rc; | ||
1625 | } | ||
1626 | |||
1627 | int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) | ||
1628 | { | ||
1629 | struct neigh_table *tbl; | ||
1630 | int t, family, s_t; | ||
1631 | |||
1632 | read_lock(&neigh_tbl_lock); | ||
1633 | family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; | ||
1634 | s_t = cb->args[0]; | ||
1635 | |||
1636 | for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { | ||
1637 | if (t < s_t || (family && tbl->family != family)) | ||
1638 | continue; | ||
1639 | if (t > s_t) | ||
1640 | memset(&cb->args[1], 0, sizeof(cb->args) - | ||
1641 | sizeof(cb->args[0])); | ||
1642 | if (neigh_dump_table(tbl, skb, cb) < 0) | ||
1643 | break; | ||
1644 | } | ||
1645 | read_unlock(&neigh_tbl_lock); | ||
1646 | |||
1647 | cb->args[0] = t; | ||
1648 | return skb->len; | ||
1649 | } | ||
1650 | |||
1651 | void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) | ||
1652 | { | ||
1653 | int chain; | ||
1654 | |||
1655 | read_lock_bh(&tbl->lock); | ||
1656 | for (chain = 0; chain <= tbl->hash_mask; chain++) { | ||
1657 | struct neighbour *n; | ||
1658 | |||
1659 | for (n = tbl->hash_buckets[chain]; n; n = n->next) | ||
1660 | cb(n, cookie); | ||
1661 | } | ||
1662 | read_unlock_bh(&tbl->lock); | ||
1663 | } | ||
1664 | EXPORT_SYMBOL(neigh_for_each); | ||
1665 | |||
1666 | /* The tbl->lock must be held as a writer and BH disabled. */ | ||
1667 | void __neigh_for_each_release(struct neigh_table *tbl, | ||
1668 | int (*cb)(struct neighbour *)) | ||
1669 | { | ||
1670 | int chain; | ||
1671 | |||
1672 | for (chain = 0; chain <= tbl->hash_mask; chain++) { | ||
1673 | struct neighbour *n, **np; | ||
1674 | |||
1675 | np = &tbl->hash_buckets[chain]; | ||
1676 | while ((n = *np) != NULL) { | ||
1677 | int release; | ||
1678 | |||
1679 | write_lock(&n->lock); | ||
1680 | release = cb(n); | ||
1681 | if (release) { | ||
1682 | *np = n->next; | ||
1683 | n->dead = 1; | ||
1684 | } else | ||
1685 | np = &n->next; | ||
1686 | write_unlock(&n->lock); | ||
1687 | if (release) | ||
1688 | neigh_release(n); | ||
1689 | } | ||
1690 | } | ||
1691 | } | ||
1692 | EXPORT_SYMBOL(__neigh_for_each_release); | ||
1693 | |||
1694 | #ifdef CONFIG_PROC_FS | ||
1695 | |||
1696 | static struct neighbour *neigh_get_first(struct seq_file *seq) | ||
1697 | { | ||
1698 | struct neigh_seq_state *state = seq->private; | ||
1699 | struct neigh_table *tbl = state->tbl; | ||
1700 | struct neighbour *n = NULL; | ||
1701 | int bucket = state->bucket; | ||
1702 | |||
1703 | state->flags &= ~NEIGH_SEQ_IS_PNEIGH; | ||
1704 | for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { | ||
1705 | n = tbl->hash_buckets[bucket]; | ||
1706 | |||
1707 | while (n) { | ||
1708 | if (state->neigh_sub_iter) { | ||
1709 | loff_t fakep = 0; | ||
1710 | void *v; | ||
1711 | |||
1712 | v = state->neigh_sub_iter(state, n, &fakep); | ||
1713 | if (!v) | ||
1714 | goto next; | ||
1715 | } | ||
1716 | if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) | ||
1717 | break; | ||
1718 | if (n->nud_state & ~NUD_NOARP) | ||
1719 | break; | ||
1720 | next: | ||
1721 | n = n->next; | ||
1722 | } | ||
1723 | |||
1724 | if (n) | ||
1725 | break; | ||
1726 | } | ||
1727 | state->bucket = bucket; | ||
1728 | |||
1729 | return n; | ||
1730 | } | ||
1731 | |||
1732 | static struct neighbour *neigh_get_next(struct seq_file *seq, | ||
1733 | struct neighbour *n, | ||
1734 | loff_t *pos) | ||
1735 | { | ||
1736 | struct neigh_seq_state *state = seq->private; | ||
1737 | struct neigh_table *tbl = state->tbl; | ||
1738 | |||
1739 | if (state->neigh_sub_iter) { | ||
1740 | void *v = state->neigh_sub_iter(state, n, pos); | ||
1741 | if (v) | ||
1742 | return n; | ||
1743 | } | ||
1744 | n = n->next; | ||
1745 | |||
1746 | while (1) { | ||
1747 | while (n) { | ||
1748 | if (state->neigh_sub_iter) { | ||
1749 | void *v = state->neigh_sub_iter(state, n, pos); | ||
1750 | if (v) | ||
1751 | return n; | ||
1752 | goto next; | ||
1753 | } | ||
1754 | if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) | ||
1755 | break; | ||
1756 | |||
1757 | if (n->nud_state & ~NUD_NOARP) | ||
1758 | break; | ||
1759 | next: | ||
1760 | n = n->next; | ||
1761 | } | ||
1762 | |||
1763 | if (n) | ||
1764 | break; | ||
1765 | |||
1766 | if (++state->bucket > tbl->hash_mask) | ||
1767 | break; | ||
1768 | |||
1769 | n = tbl->hash_buckets[state->bucket]; | ||
1770 | } | ||
1771 | |||
1772 | if (n && pos) | ||
1773 | --(*pos); | ||
1774 | return n; | ||
1775 | } | ||
1776 | |||
1777 | static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) | ||
1778 | { | ||
1779 | struct neighbour *n = neigh_get_first(seq); | ||
1780 | |||
1781 | if (n) { | ||
1782 | while (*pos) { | ||
1783 | n = neigh_get_next(seq, n, pos); | ||
1784 | if (!n) | ||
1785 | break; | ||
1786 | } | ||
1787 | } | ||
1788 | return *pos ? NULL : n; | ||
1789 | } | ||
1790 | |||
1791 | static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) | ||
1792 | { | ||
1793 | struct neigh_seq_state *state = seq->private; | ||
1794 | struct neigh_table *tbl = state->tbl; | ||
1795 | struct pneigh_entry *pn = NULL; | ||
1796 | int bucket = state->bucket; | ||
1797 | |||
1798 | state->flags |= NEIGH_SEQ_IS_PNEIGH; | ||
1799 | for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { | ||
1800 | pn = tbl->phash_buckets[bucket]; | ||
1801 | if (pn) | ||
1802 | break; | ||
1803 | } | ||
1804 | state->bucket = bucket; | ||
1805 | |||
1806 | return pn; | ||
1807 | } | ||
1808 | |||
1809 | static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, | ||
1810 | struct pneigh_entry *pn, | ||
1811 | loff_t *pos) | ||
1812 | { | ||
1813 | struct neigh_seq_state *state = seq->private; | ||
1814 | struct neigh_table *tbl = state->tbl; | ||
1815 | |||
1816 | pn = pn->next; | ||
1817 | while (!pn) { | ||
1818 | if (++state->bucket > PNEIGH_HASHMASK) | ||
1819 | break; | ||
1820 | pn = tbl->phash_buckets[state->bucket]; | ||
1821 | if (pn) | ||
1822 | break; | ||
1823 | } | ||
1824 | |||
1825 | if (pn && pos) | ||
1826 | --(*pos); | ||
1827 | |||
1828 | return pn; | ||
1829 | } | ||
1830 | |||
1831 | static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) | ||
1832 | { | ||
1833 | struct pneigh_entry *pn = pneigh_get_first(seq); | ||
1834 | |||
1835 | if (pn) { | ||
1836 | while (*pos) { | ||
1837 | pn = pneigh_get_next(seq, pn, pos); | ||
1838 | if (!pn) | ||
1839 | break; | ||
1840 | } | ||
1841 | } | ||
1842 | return *pos ? NULL : pn; | ||
1843 | } | ||
1844 | |||
1845 | static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) | ||
1846 | { | ||
1847 | struct neigh_seq_state *state = seq->private; | ||
1848 | void *rc; | ||
1849 | |||
1850 | rc = neigh_get_idx(seq, pos); | ||
1851 | if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) | ||
1852 | rc = pneigh_get_idx(seq, pos); | ||
1853 | |||
1854 | return rc; | ||
1855 | } | ||
1856 | |||
1857 | void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) | ||
1858 | { | ||
1859 | struct neigh_seq_state *state = seq->private; | ||
1860 | loff_t pos_minus_one; | ||
1861 | |||
1862 | state->tbl = tbl; | ||
1863 | state->bucket = 0; | ||
1864 | state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); | ||
1865 | |||
1866 | read_lock_bh(&tbl->lock); | ||
1867 | |||
1868 | pos_minus_one = *pos - 1; | ||
1869 | return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN; | ||
1870 | } | ||
1871 | EXPORT_SYMBOL(neigh_seq_start); | ||
1872 | |||
1873 | void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1874 | { | ||
1875 | struct neigh_seq_state *state; | ||
1876 | void *rc; | ||
1877 | |||
1878 | if (v == SEQ_START_TOKEN) { | ||
1879 | rc = neigh_get_idx(seq, pos); | ||
1880 | goto out; | ||
1881 | } | ||
1882 | |||
1883 | state = seq->private; | ||
1884 | if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { | ||
1885 | rc = neigh_get_next(seq, v, NULL); | ||
1886 | if (rc) | ||
1887 | goto out; | ||
1888 | if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) | ||
1889 | rc = pneigh_get_first(seq); | ||
1890 | } else { | ||
1891 | BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); | ||
1892 | rc = pneigh_get_next(seq, v, NULL); | ||
1893 | } | ||
1894 | out: | ||
1895 | ++(*pos); | ||
1896 | return rc; | ||
1897 | } | ||
1898 | EXPORT_SYMBOL(neigh_seq_next); | ||
1899 | |||
1900 | void neigh_seq_stop(struct seq_file *seq, void *v) | ||
1901 | { | ||
1902 | struct neigh_seq_state *state = seq->private; | ||
1903 | struct neigh_table *tbl = state->tbl; | ||
1904 | |||
1905 | read_unlock_bh(&tbl->lock); | ||
1906 | } | ||
1907 | EXPORT_SYMBOL(neigh_seq_stop); | ||
1908 | |||
1909 | /* statistics via seq_file */ | ||
1910 | |||
1911 | static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) | ||
1912 | { | ||
1913 | struct proc_dir_entry *pde = seq->private; | ||
1914 | struct neigh_table *tbl = pde->data; | ||
1915 | int cpu; | ||
1916 | |||
1917 | if (*pos == 0) | ||
1918 | return SEQ_START_TOKEN; | ||
1919 | |||
1920 | for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { | ||
1921 | if (!cpu_possible(cpu)) | ||
1922 | continue; | ||
1923 | *pos = cpu+1; | ||
1924 | return per_cpu_ptr(tbl->stats, cpu); | ||
1925 | } | ||
1926 | return NULL; | ||
1927 | } | ||
1928 | |||
1929 | static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1930 | { | ||
1931 | struct proc_dir_entry *pde = seq->private; | ||
1932 | struct neigh_table *tbl = pde->data; | ||
1933 | int cpu; | ||
1934 | |||
1935 | for (cpu = *pos; cpu < NR_CPUS; ++cpu) { | ||
1936 | if (!cpu_possible(cpu)) | ||
1937 | continue; | ||
1938 | *pos = cpu+1; | ||
1939 | return per_cpu_ptr(tbl->stats, cpu); | ||
1940 | } | ||
1941 | return NULL; | ||
1942 | } | ||
1943 | |||
1944 | static void neigh_stat_seq_stop(struct seq_file *seq, void *v) | ||
1945 | { | ||
1946 | |||
1947 | } | ||
1948 | |||
1949 | static int neigh_stat_seq_show(struct seq_file *seq, void *v) | ||
1950 | { | ||
1951 | struct proc_dir_entry *pde = seq->private; | ||
1952 | struct neigh_table *tbl = pde->data; | ||
1953 | struct neigh_statistics *st = v; | ||
1954 | |||
1955 | if (v == SEQ_START_TOKEN) { | ||
1956 | seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs forced_gc_goal_miss\n"); | ||
1957 | return 0; | ||
1958 | } | ||
1959 | |||
1960 | seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " | ||
1961 | "%08lx %08lx %08lx %08lx\n", | ||
1962 | atomic_read(&tbl->entries), | ||
1963 | |||
1964 | st->allocs, | ||
1965 | st->destroys, | ||
1966 | st->hash_grows, | ||
1967 | |||
1968 | st->lookups, | ||
1969 | st->hits, | ||
1970 | |||
1971 | st->res_failed, | ||
1972 | |||
1973 | st->rcv_probes_mcast, | ||
1974 | st->rcv_probes_ucast, | ||
1975 | |||
1976 | st->periodic_gc_runs, | ||
1977 | st->forced_gc_runs | ||
1978 | ); | ||
1979 | |||
1980 | return 0; | ||
1981 | } | ||
1982 | |||
1983 | static struct seq_operations neigh_stat_seq_ops = { | ||
1984 | .start = neigh_stat_seq_start, | ||
1985 | .next = neigh_stat_seq_next, | ||
1986 | .stop = neigh_stat_seq_stop, | ||
1987 | .show = neigh_stat_seq_show, | ||
1988 | }; | ||
1989 | |||
1990 | static int neigh_stat_seq_open(struct inode *inode, struct file *file) | ||
1991 | { | ||
1992 | int ret = seq_open(file, &neigh_stat_seq_ops); | ||
1993 | |||
1994 | if (!ret) { | ||
1995 | struct seq_file *sf = file->private_data; | ||
1996 | sf->private = PDE(inode); | ||
1997 | } | ||
1998 | return ret; | ||
1999 | }; | ||
2000 | |||
2001 | static struct file_operations neigh_stat_seq_fops = { | ||
2002 | .owner = THIS_MODULE, | ||
2003 | .open = neigh_stat_seq_open, | ||
2004 | .read = seq_read, | ||
2005 | .llseek = seq_lseek, | ||
2006 | .release = seq_release, | ||
2007 | }; | ||
2008 | |||
2009 | #endif /* CONFIG_PROC_FS */ | ||
2010 | |||
2011 | #ifdef CONFIG_ARPD | ||
2012 | void neigh_app_ns(struct neighbour *n) | ||
2013 | { | ||
2014 | struct nlmsghdr *nlh; | ||
2015 | int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); | ||
2016 | struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); | ||
2017 | |||
2018 | if (!skb) | ||
2019 | return; | ||
2020 | |||
2021 | if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) { | ||
2022 | kfree_skb(skb); | ||
2023 | return; | ||
2024 | } | ||
2025 | nlh = (struct nlmsghdr *)skb->data; | ||
2026 | nlh->nlmsg_flags = NLM_F_REQUEST; | ||
2027 | NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; | ||
2028 | netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); | ||
2029 | } | ||
2030 | |||
2031 | static void neigh_app_notify(struct neighbour *n) | ||
2032 | { | ||
2033 | struct nlmsghdr *nlh; | ||
2034 | int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); | ||
2035 | struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); | ||
2036 | |||
2037 | if (!skb) | ||
2038 | return; | ||
2039 | |||
2040 | if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) { | ||
2041 | kfree_skb(skb); | ||
2042 | return; | ||
2043 | } | ||
2044 | nlh = (struct nlmsghdr *)skb->data; | ||
2045 | NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; | ||
2046 | netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); | ||
2047 | } | ||
2048 | |||
2049 | #endif /* CONFIG_ARPD */ | ||
2050 | |||
2051 | #ifdef CONFIG_SYSCTL | ||
2052 | |||
2053 | static struct neigh_sysctl_table { | ||
2054 | struct ctl_table_header *sysctl_header; | ||
2055 | ctl_table neigh_vars[__NET_NEIGH_MAX]; | ||
2056 | ctl_table neigh_dev[2]; | ||
2057 | ctl_table neigh_neigh_dir[2]; | ||
2058 | ctl_table neigh_proto_dir[2]; | ||
2059 | ctl_table neigh_root_dir[2]; | ||
2060 | } neigh_sysctl_template = { | ||
2061 | .neigh_vars = { | ||
2062 | { | ||
2063 | .ctl_name = NET_NEIGH_MCAST_SOLICIT, | ||
2064 | .procname = "mcast_solicit", | ||
2065 | .maxlen = sizeof(int), | ||
2066 | .mode = 0644, | ||
2067 | .proc_handler = &proc_dointvec, | ||
2068 | }, | ||
2069 | { | ||
2070 | .ctl_name = NET_NEIGH_UCAST_SOLICIT, | ||
2071 | .procname = "ucast_solicit", | ||
2072 | .maxlen = sizeof(int), | ||
2073 | .mode = 0644, | ||
2074 | .proc_handler = &proc_dointvec, | ||
2075 | }, | ||
2076 | { | ||
2077 | .ctl_name = NET_NEIGH_APP_SOLICIT, | ||
2078 | .procname = "app_solicit", | ||
2079 | .maxlen = sizeof(int), | ||
2080 | .mode = 0644, | ||
2081 | .proc_handler = &proc_dointvec, | ||
2082 | }, | ||
2083 | { | ||
2084 | .ctl_name = NET_NEIGH_RETRANS_TIME, | ||
2085 | .procname = "retrans_time", | ||
2086 | .maxlen = sizeof(int), | ||
2087 | .mode = 0644, | ||
2088 | .proc_handler = &proc_dointvec_userhz_jiffies, | ||
2089 | }, | ||
2090 | { | ||
2091 | .ctl_name = NET_NEIGH_REACHABLE_TIME, | ||
2092 | .procname = "base_reachable_time", | ||
2093 | .maxlen = sizeof(int), | ||
2094 | .mode = 0644, | ||
2095 | .proc_handler = &proc_dointvec_jiffies, | ||
2096 | .strategy = &sysctl_jiffies, | ||
2097 | }, | ||
2098 | { | ||
2099 | .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, | ||
2100 | .procname = "delay_first_probe_time", | ||
2101 | .maxlen = sizeof(int), | ||
2102 | .mode = 0644, | ||
2103 | .proc_handler = &proc_dointvec_jiffies, | ||
2104 | .strategy = &sysctl_jiffies, | ||
2105 | }, | ||
2106 | { | ||
2107 | .ctl_name = NET_NEIGH_GC_STALE_TIME, | ||
2108 | .procname = "gc_stale_time", | ||
2109 | .maxlen = sizeof(int), | ||
2110 | .mode = 0644, | ||
2111 | .proc_handler = &proc_dointvec_jiffies, | ||
2112 | .strategy = &sysctl_jiffies, | ||
2113 | }, | ||
2114 | { | ||
2115 | .ctl_name = NET_NEIGH_UNRES_QLEN, | ||
2116 | .procname = "unres_qlen", | ||
2117 | .maxlen = sizeof(int), | ||
2118 | .mode = 0644, | ||
2119 | .proc_handler = &proc_dointvec, | ||
2120 | }, | ||
2121 | { | ||
2122 | .ctl_name = NET_NEIGH_PROXY_QLEN, | ||
2123 | .procname = "proxy_qlen", | ||
2124 | .maxlen = sizeof(int), | ||
2125 | .mode = 0644, | ||
2126 | .proc_handler = &proc_dointvec, | ||
2127 | }, | ||
2128 | { | ||
2129 | .ctl_name = NET_NEIGH_ANYCAST_DELAY, | ||
2130 | .procname = "anycast_delay", | ||
2131 | .maxlen = sizeof(int), | ||
2132 | .mode = 0644, | ||
2133 | .proc_handler = &proc_dointvec_userhz_jiffies, | ||
2134 | }, | ||
2135 | { | ||
2136 | .ctl_name = NET_NEIGH_PROXY_DELAY, | ||
2137 | .procname = "proxy_delay", | ||
2138 | .maxlen = sizeof(int), | ||
2139 | .mode = 0644, | ||
2140 | .proc_handler = &proc_dointvec_userhz_jiffies, | ||
2141 | }, | ||
2142 | { | ||
2143 | .ctl_name = NET_NEIGH_LOCKTIME, | ||
2144 | .procname = "locktime", | ||
2145 | .maxlen = sizeof(int), | ||
2146 | .mode = 0644, | ||
2147 | .proc_handler = &proc_dointvec_userhz_jiffies, | ||
2148 | }, | ||
2149 | { | ||
2150 | .ctl_name = NET_NEIGH_GC_INTERVAL, | ||
2151 | .procname = "gc_interval", | ||
2152 | .maxlen = sizeof(int), | ||
2153 | .mode = 0644, | ||
2154 | .proc_handler = &proc_dointvec_jiffies, | ||
2155 | .strategy = &sysctl_jiffies, | ||
2156 | }, | ||
2157 | { | ||
2158 | .ctl_name = NET_NEIGH_GC_THRESH1, | ||
2159 | .procname = "gc_thresh1", | ||
2160 | .maxlen = sizeof(int), | ||
2161 | .mode = 0644, | ||
2162 | .proc_handler = &proc_dointvec, | ||
2163 | }, | ||
2164 | { | ||
2165 | .ctl_name = NET_NEIGH_GC_THRESH2, | ||
2166 | .procname = "gc_thresh2", | ||
2167 | .maxlen = sizeof(int), | ||
2168 | .mode = 0644, | ||
2169 | .proc_handler = &proc_dointvec, | ||
2170 | }, | ||
2171 | { | ||
2172 | .ctl_name = NET_NEIGH_GC_THRESH3, | ||
2173 | .procname = "gc_thresh3", | ||
2174 | .maxlen = sizeof(int), | ||
2175 | .mode = 0644, | ||
2176 | .proc_handler = &proc_dointvec, | ||
2177 | }, | ||
2178 | { | ||
2179 | .ctl_name = NET_NEIGH_RETRANS_TIME_MS, | ||
2180 | .procname = "retrans_time_ms", | ||
2181 | .maxlen = sizeof(int), | ||
2182 | .mode = 0644, | ||
2183 | .proc_handler = &proc_dointvec_ms_jiffies, | ||
2184 | .strategy = &sysctl_ms_jiffies, | ||
2185 | }, | ||
2186 | { | ||
2187 | .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, | ||
2188 | .procname = "base_reachable_time_ms", | ||
2189 | .maxlen = sizeof(int), | ||
2190 | .mode = 0644, | ||
2191 | .proc_handler = &proc_dointvec_ms_jiffies, | ||
2192 | .strategy = &sysctl_ms_jiffies, | ||
2193 | }, | ||
2194 | }, | ||
2195 | .neigh_dev = { | ||
2196 | { | ||
2197 | .ctl_name = NET_PROTO_CONF_DEFAULT, | ||
2198 | .procname = "default", | ||
2199 | .mode = 0555, | ||
2200 | }, | ||
2201 | }, | ||
2202 | .neigh_neigh_dir = { | ||
2203 | { | ||
2204 | .procname = "neigh", | ||
2205 | .mode = 0555, | ||
2206 | }, | ||
2207 | }, | ||
2208 | .neigh_proto_dir = { | ||
2209 | { | ||
2210 | .mode = 0555, | ||
2211 | }, | ||
2212 | }, | ||
2213 | .neigh_root_dir = { | ||
2214 | { | ||
2215 | .ctl_name = CTL_NET, | ||
2216 | .procname = "net", | ||
2217 | .mode = 0555, | ||
2218 | }, | ||
2219 | }, | ||
2220 | }; | ||
2221 | |||
2222 | int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, | ||
2223 | int p_id, int pdev_id, char *p_name, | ||
2224 | proc_handler *handler, ctl_handler *strategy) | ||
2225 | { | ||
2226 | struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); | ||
2227 | const char *dev_name_source = NULL; | ||
2228 | char *dev_name = NULL; | ||
2229 | int err = 0; | ||
2230 | |||
2231 | if (!t) | ||
2232 | return -ENOBUFS; | ||
2233 | memcpy(t, &neigh_sysctl_template, sizeof(*t)); | ||
2234 | t->neigh_vars[0].data = &p->mcast_probes; | ||
2235 | t->neigh_vars[1].data = &p->ucast_probes; | ||
2236 | t->neigh_vars[2].data = &p->app_probes; | ||
2237 | t->neigh_vars[3].data = &p->retrans_time; | ||
2238 | t->neigh_vars[4].data = &p->base_reachable_time; | ||
2239 | t->neigh_vars[5].data = &p->delay_probe_time; | ||
2240 | t->neigh_vars[6].data = &p->gc_staletime; | ||
2241 | t->neigh_vars[7].data = &p->queue_len; | ||
2242 | t->neigh_vars[8].data = &p->proxy_qlen; | ||
2243 | t->neigh_vars[9].data = &p->anycast_delay; | ||
2244 | t->neigh_vars[10].data = &p->proxy_delay; | ||
2245 | t->neigh_vars[11].data = &p->locktime; | ||
2246 | |||
2247 | if (dev) { | ||
2248 | dev_name_source = dev->name; | ||
2249 | t->neigh_dev[0].ctl_name = dev->ifindex; | ||
2250 | t->neigh_vars[12].procname = NULL; | ||
2251 | t->neigh_vars[13].procname = NULL; | ||
2252 | t->neigh_vars[14].procname = NULL; | ||
2253 | t->neigh_vars[15].procname = NULL; | ||
2254 | } else { | ||
2255 | dev_name_source = t->neigh_dev[0].procname; | ||
2256 | t->neigh_vars[12].data = (int *)(p + 1); | ||
2257 | t->neigh_vars[13].data = (int *)(p + 1) + 1; | ||
2258 | t->neigh_vars[14].data = (int *)(p + 1) + 2; | ||
2259 | t->neigh_vars[15].data = (int *)(p + 1) + 3; | ||
2260 | } | ||
2261 | |||
2262 | t->neigh_vars[16].data = &p->retrans_time; | ||
2263 | t->neigh_vars[17].data = &p->base_reachable_time; | ||
2264 | |||
2265 | if (handler || strategy) { | ||
2266 | /* RetransTime */ | ||
2267 | t->neigh_vars[3].proc_handler = handler; | ||
2268 | t->neigh_vars[3].strategy = strategy; | ||
2269 | t->neigh_vars[3].extra1 = dev; | ||
2270 | /* ReachableTime */ | ||
2271 | t->neigh_vars[4].proc_handler = handler; | ||
2272 | t->neigh_vars[4].strategy = strategy; | ||
2273 | t->neigh_vars[4].extra1 = dev; | ||
2274 | /* RetransTime (in milliseconds)*/ | ||
2275 | t->neigh_vars[16].proc_handler = handler; | ||
2276 | t->neigh_vars[16].strategy = strategy; | ||
2277 | t->neigh_vars[16].extra1 = dev; | ||
2278 | /* ReachableTime (in milliseconds) */ | ||
2279 | t->neigh_vars[17].proc_handler = handler; | ||
2280 | t->neigh_vars[17].strategy = strategy; | ||
2281 | t->neigh_vars[17].extra1 = dev; | ||
2282 | } | ||
2283 | |||
2284 | dev_name = net_sysctl_strdup(dev_name_source); | ||
2285 | if (!dev_name) { | ||
2286 | err = -ENOBUFS; | ||
2287 | goto free; | ||
2288 | } | ||
2289 | |||
2290 | t->neigh_dev[0].procname = dev_name; | ||
2291 | |||
2292 | t->neigh_neigh_dir[0].ctl_name = pdev_id; | ||
2293 | |||
2294 | t->neigh_proto_dir[0].procname = p_name; | ||
2295 | t->neigh_proto_dir[0].ctl_name = p_id; | ||
2296 | |||
2297 | t->neigh_dev[0].child = t->neigh_vars; | ||
2298 | t->neigh_neigh_dir[0].child = t->neigh_dev; | ||
2299 | t->neigh_proto_dir[0].child = t->neigh_neigh_dir; | ||
2300 | t->neigh_root_dir[0].child = t->neigh_proto_dir; | ||
2301 | |||
2302 | t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0); | ||
2303 | if (!t->sysctl_header) { | ||
2304 | err = -ENOBUFS; | ||
2305 | goto free_procname; | ||
2306 | } | ||
2307 | p->sysctl_table = t; | ||
2308 | return 0; | ||
2309 | |||
2310 | /* error path */ | ||
2311 | free_procname: | ||
2312 | kfree(dev_name); | ||
2313 | free: | ||
2314 | kfree(t); | ||
2315 | |||
2316 | return err; | ||
2317 | } | ||
2318 | |||
2319 | void neigh_sysctl_unregister(struct neigh_parms *p) | ||
2320 | { | ||
2321 | if (p->sysctl_table) { | ||
2322 | struct neigh_sysctl_table *t = p->sysctl_table; | ||
2323 | p->sysctl_table = NULL; | ||
2324 | unregister_sysctl_table(t->sysctl_header); | ||
2325 | kfree(t->neigh_dev[0].procname); | ||
2326 | kfree(t); | ||
2327 | } | ||
2328 | } | ||
2329 | |||
2330 | #endif /* CONFIG_SYSCTL */ | ||
2331 | |||
2332 | EXPORT_SYMBOL(__neigh_event_send); | ||
2333 | EXPORT_SYMBOL(neigh_add); | ||
2334 | EXPORT_SYMBOL(neigh_changeaddr); | ||
2335 | EXPORT_SYMBOL(neigh_compat_output); | ||
2336 | EXPORT_SYMBOL(neigh_connected_output); | ||
2337 | EXPORT_SYMBOL(neigh_create); | ||
2338 | EXPORT_SYMBOL(neigh_delete); | ||
2339 | EXPORT_SYMBOL(neigh_destroy); | ||
2340 | EXPORT_SYMBOL(neigh_dump_info); | ||
2341 | EXPORT_SYMBOL(neigh_event_ns); | ||
2342 | EXPORT_SYMBOL(neigh_ifdown); | ||
2343 | EXPORT_SYMBOL(neigh_lookup); | ||
2344 | EXPORT_SYMBOL(neigh_lookup_nodev); | ||
2345 | EXPORT_SYMBOL(neigh_parms_alloc); | ||
2346 | EXPORT_SYMBOL(neigh_parms_release); | ||
2347 | EXPORT_SYMBOL(neigh_rand_reach_time); | ||
2348 | EXPORT_SYMBOL(neigh_resolve_output); | ||
2349 | EXPORT_SYMBOL(neigh_table_clear); | ||
2350 | EXPORT_SYMBOL(neigh_table_init); | ||
2351 | EXPORT_SYMBOL(neigh_update); | ||
2352 | EXPORT_SYMBOL(neigh_update_hhs); | ||
2353 | EXPORT_SYMBOL(pneigh_enqueue); | ||
2354 | EXPORT_SYMBOL(pneigh_lookup); | ||
2355 | |||
2356 | #ifdef CONFIG_ARPD | ||
2357 | EXPORT_SYMBOL(neigh_app_ns); | ||
2358 | #endif | ||
2359 | #ifdef CONFIG_SYSCTL | ||
2360 | EXPORT_SYMBOL(neigh_sysctl_register); | ||
2361 | EXPORT_SYMBOL(neigh_sysctl_unregister); | ||
2362 | #endif | ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c new file mode 100644 index 000000000000..060f703659e8 --- /dev/null +++ b/net/core/net-sysfs.c | |||
@@ -0,0 +1,461 @@ | |||
1 | /* | ||
2 | * net-sysfs.c - network device class and attributes | ||
3 | * | ||
4 | * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/netdevice.h> | ||
15 | #include <linux/if_arp.h> | ||
16 | #include <net/sock.h> | ||
17 | #include <linux/rtnetlink.h> | ||
18 | #include <linux/wireless.h> | ||
19 | |||
20 | #define to_class_dev(obj) container_of(obj,struct class_device,kobj) | ||
21 | #define to_net_dev(class) container_of(class, struct net_device, class_dev) | ||
22 | |||
23 | static const char fmt_hex[] = "%#x\n"; | ||
24 | static const char fmt_dec[] = "%d\n"; | ||
25 | static const char fmt_ulong[] = "%lu\n"; | ||
26 | |||
27 | static inline int dev_isalive(const struct net_device *dev) | ||
28 | { | ||
29 | return dev->reg_state == NETREG_REGISTERED; | ||
30 | } | ||
31 | |||
32 | /* use same locking rules as GIF* ioctl's */ | ||
33 | static ssize_t netdev_show(const struct class_device *cd, char *buf, | ||
34 | ssize_t (*format)(const struct net_device *, char *)) | ||
35 | { | ||
36 | struct net_device *net = to_net_dev(cd); | ||
37 | ssize_t ret = -EINVAL; | ||
38 | |||
39 | read_lock(&dev_base_lock); | ||
40 | if (dev_isalive(net)) | ||
41 | ret = (*format)(net, buf); | ||
42 | read_unlock(&dev_base_lock); | ||
43 | |||
44 | return ret; | ||
45 | } | ||
46 | |||
47 | /* generate a show function for simple field */ | ||
48 | #define NETDEVICE_SHOW(field, format_string) \ | ||
49 | static ssize_t format_##field(const struct net_device *net, char *buf) \ | ||
50 | { \ | ||
51 | return sprintf(buf, format_string, net->field); \ | ||
52 | } \ | ||
53 | static ssize_t show_##field(struct class_device *cd, char *buf) \ | ||
54 | { \ | ||
55 | return netdev_show(cd, buf, format_##field); \ | ||
56 | } | ||
57 | |||
58 | |||
59 | /* use same locking and permission rules as SIF* ioctl's */ | ||
60 | static ssize_t netdev_store(struct class_device *dev, | ||
61 | const char *buf, size_t len, | ||
62 | int (*set)(struct net_device *, unsigned long)) | ||
63 | { | ||
64 | struct net_device *net = to_net_dev(dev); | ||
65 | char *endp; | ||
66 | unsigned long new; | ||
67 | int ret = -EINVAL; | ||
68 | |||
69 | if (!capable(CAP_NET_ADMIN)) | ||
70 | return -EPERM; | ||
71 | |||
72 | new = simple_strtoul(buf, &endp, 0); | ||
73 | if (endp == buf) | ||
74 | goto err; | ||
75 | |||
76 | rtnl_lock(); | ||
77 | if (dev_isalive(net)) { | ||
78 | if ((ret = (*set)(net, new)) == 0) | ||
79 | ret = len; | ||
80 | } | ||
81 | rtnl_unlock(); | ||
82 | err: | ||
83 | return ret; | ||
84 | } | ||
85 | |||
86 | /* generate a read-only network device class attribute */ | ||
87 | #define NETDEVICE_ATTR(field, format_string) \ | ||
88 | NETDEVICE_SHOW(field, format_string) \ | ||
89 | static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \ | ||
90 | |||
91 | NETDEVICE_ATTR(addr_len, fmt_dec); | ||
92 | NETDEVICE_ATTR(iflink, fmt_dec); | ||
93 | NETDEVICE_ATTR(ifindex, fmt_dec); | ||
94 | NETDEVICE_ATTR(features, fmt_hex); | ||
95 | NETDEVICE_ATTR(type, fmt_dec); | ||
96 | |||
97 | /* use same locking rules as GIFHWADDR ioctl's */ | ||
98 | static ssize_t format_addr(char *buf, const unsigned char *addr, int len) | ||
99 | { | ||
100 | int i; | ||
101 | char *cp = buf; | ||
102 | |||
103 | for (i = 0; i < len; i++) | ||
104 | cp += sprintf(cp, "%02x%c", addr[i], | ||
105 | i == (len - 1) ? '\n' : ':'); | ||
106 | return cp - buf; | ||
107 | } | ||
108 | |||
109 | static ssize_t show_address(struct class_device *dev, char *buf) | ||
110 | { | ||
111 | struct net_device *net = to_net_dev(dev); | ||
112 | ssize_t ret = -EINVAL; | ||
113 | |||
114 | read_lock(&dev_base_lock); | ||
115 | if (dev_isalive(net)) | ||
116 | ret = format_addr(buf, net->dev_addr, net->addr_len); | ||
117 | read_unlock(&dev_base_lock); | ||
118 | return ret; | ||
119 | } | ||
120 | |||
121 | static ssize_t show_broadcast(struct class_device *dev, char *buf) | ||
122 | { | ||
123 | struct net_device *net = to_net_dev(dev); | ||
124 | if (dev_isalive(net)) | ||
125 | return format_addr(buf, net->broadcast, net->addr_len); | ||
126 | return -EINVAL; | ||
127 | } | ||
128 | |||
129 | static ssize_t show_carrier(struct class_device *dev, char *buf) | ||
130 | { | ||
131 | struct net_device *netdev = to_net_dev(dev); | ||
132 | if (netif_running(netdev)) { | ||
133 | return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev)); | ||
134 | } | ||
135 | return -EINVAL; | ||
136 | } | ||
137 | |||
138 | static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); | ||
139 | static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL); | ||
140 | static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); | ||
141 | |||
142 | /* read-write attributes */ | ||
143 | NETDEVICE_SHOW(mtu, fmt_dec); | ||
144 | |||
145 | static int change_mtu(struct net_device *net, unsigned long new_mtu) | ||
146 | { | ||
147 | return dev_set_mtu(net, (int) new_mtu); | ||
148 | } | ||
149 | |||
150 | static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len) | ||
151 | { | ||
152 | return netdev_store(dev, buf, len, change_mtu); | ||
153 | } | ||
154 | |||
155 | static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu); | ||
156 | |||
157 | NETDEVICE_SHOW(flags, fmt_hex); | ||
158 | |||
159 | static int change_flags(struct net_device *net, unsigned long new_flags) | ||
160 | { | ||
161 | return dev_change_flags(net, (unsigned) new_flags); | ||
162 | } | ||
163 | |||
164 | static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len) | ||
165 | { | ||
166 | return netdev_store(dev, buf, len, change_flags); | ||
167 | } | ||
168 | |||
169 | static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags); | ||
170 | |||
171 | NETDEVICE_SHOW(tx_queue_len, fmt_ulong); | ||
172 | |||
173 | static int change_tx_queue_len(struct net_device *net, unsigned long new_len) | ||
174 | { | ||
175 | net->tx_queue_len = new_len; | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, size_t len) | ||
180 | { | ||
181 | return netdev_store(dev, buf, len, change_tx_queue_len); | ||
182 | } | ||
183 | |||
184 | static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, | ||
185 | store_tx_queue_len); | ||
186 | |||
187 | |||
188 | static struct class_device_attribute *net_class_attributes[] = { | ||
189 | &class_device_attr_ifindex, | ||
190 | &class_device_attr_iflink, | ||
191 | &class_device_attr_addr_len, | ||
192 | &class_device_attr_tx_queue_len, | ||
193 | &class_device_attr_features, | ||
194 | &class_device_attr_mtu, | ||
195 | &class_device_attr_flags, | ||
196 | &class_device_attr_type, | ||
197 | &class_device_attr_address, | ||
198 | &class_device_attr_broadcast, | ||
199 | &class_device_attr_carrier, | ||
200 | NULL | ||
201 | }; | ||
202 | |||
203 | /* Show a given an attribute in the statistics group */ | ||
204 | static ssize_t netstat_show(const struct class_device *cd, char *buf, | ||
205 | unsigned long offset) | ||
206 | { | ||
207 | struct net_device *dev = to_net_dev(cd); | ||
208 | struct net_device_stats *stats; | ||
209 | ssize_t ret = -EINVAL; | ||
210 | |||
211 | if (offset > sizeof(struct net_device_stats) || | ||
212 | offset % sizeof(unsigned long) != 0) | ||
213 | WARN_ON(1); | ||
214 | |||
215 | read_lock(&dev_base_lock); | ||
216 | if (dev_isalive(dev) && dev->get_stats && | ||
217 | (stats = (*dev->get_stats)(dev))) | ||
218 | ret = sprintf(buf, fmt_ulong, | ||
219 | *(unsigned long *)(((u8 *) stats) + offset)); | ||
220 | |||
221 | read_unlock(&dev_base_lock); | ||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | /* generate a read-only statistics attribute */ | ||
226 | #define NETSTAT_ENTRY(name) \ | ||
227 | static ssize_t show_##name(struct class_device *cd, char *buf) \ | ||
228 | { \ | ||
229 | return netstat_show(cd, buf, \ | ||
230 | offsetof(struct net_device_stats, name)); \ | ||
231 | } \ | ||
232 | static CLASS_DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | ||
233 | |||
234 | NETSTAT_ENTRY(rx_packets); | ||
235 | NETSTAT_ENTRY(tx_packets); | ||
236 | NETSTAT_ENTRY(rx_bytes); | ||
237 | NETSTAT_ENTRY(tx_bytes); | ||
238 | NETSTAT_ENTRY(rx_errors); | ||
239 | NETSTAT_ENTRY(tx_errors); | ||
240 | NETSTAT_ENTRY(rx_dropped); | ||
241 | NETSTAT_ENTRY(tx_dropped); | ||
242 | NETSTAT_ENTRY(multicast); | ||
243 | NETSTAT_ENTRY(collisions); | ||
244 | NETSTAT_ENTRY(rx_length_errors); | ||
245 | NETSTAT_ENTRY(rx_over_errors); | ||
246 | NETSTAT_ENTRY(rx_crc_errors); | ||
247 | NETSTAT_ENTRY(rx_frame_errors); | ||
248 | NETSTAT_ENTRY(rx_fifo_errors); | ||
249 | NETSTAT_ENTRY(rx_missed_errors); | ||
250 | NETSTAT_ENTRY(tx_aborted_errors); | ||
251 | NETSTAT_ENTRY(tx_carrier_errors); | ||
252 | NETSTAT_ENTRY(tx_fifo_errors); | ||
253 | NETSTAT_ENTRY(tx_heartbeat_errors); | ||
254 | NETSTAT_ENTRY(tx_window_errors); | ||
255 | NETSTAT_ENTRY(rx_compressed); | ||
256 | NETSTAT_ENTRY(tx_compressed); | ||
257 | |||
258 | static struct attribute *netstat_attrs[] = { | ||
259 | &class_device_attr_rx_packets.attr, | ||
260 | &class_device_attr_tx_packets.attr, | ||
261 | &class_device_attr_rx_bytes.attr, | ||
262 | &class_device_attr_tx_bytes.attr, | ||
263 | &class_device_attr_rx_errors.attr, | ||
264 | &class_device_attr_tx_errors.attr, | ||
265 | &class_device_attr_rx_dropped.attr, | ||
266 | &class_device_attr_tx_dropped.attr, | ||
267 | &class_device_attr_multicast.attr, | ||
268 | &class_device_attr_collisions.attr, | ||
269 | &class_device_attr_rx_length_errors.attr, | ||
270 | &class_device_attr_rx_over_errors.attr, | ||
271 | &class_device_attr_rx_crc_errors.attr, | ||
272 | &class_device_attr_rx_frame_errors.attr, | ||
273 | &class_device_attr_rx_fifo_errors.attr, | ||
274 | &class_device_attr_rx_missed_errors.attr, | ||
275 | &class_device_attr_tx_aborted_errors.attr, | ||
276 | &class_device_attr_tx_carrier_errors.attr, | ||
277 | &class_device_attr_tx_fifo_errors.attr, | ||
278 | &class_device_attr_tx_heartbeat_errors.attr, | ||
279 | &class_device_attr_tx_window_errors.attr, | ||
280 | &class_device_attr_rx_compressed.attr, | ||
281 | &class_device_attr_tx_compressed.attr, | ||
282 | NULL | ||
283 | }; | ||
284 | |||
285 | |||
286 | static struct attribute_group netstat_group = { | ||
287 | .name = "statistics", | ||
288 | .attrs = netstat_attrs, | ||
289 | }; | ||
290 | |||
291 | #ifdef WIRELESS_EXT | ||
292 | /* helper function that does all the locking etc for wireless stats */ | ||
293 | static ssize_t wireless_show(struct class_device *cd, char *buf, | ||
294 | ssize_t (*format)(const struct iw_statistics *, | ||
295 | char *)) | ||
296 | { | ||
297 | struct net_device *dev = to_net_dev(cd); | ||
298 | const struct iw_statistics *iw; | ||
299 | ssize_t ret = -EINVAL; | ||
300 | |||
301 | read_lock(&dev_base_lock); | ||
302 | if (dev_isalive(dev) && dev->get_wireless_stats | ||
303 | && (iw = dev->get_wireless_stats(dev)) != NULL) | ||
304 | ret = (*format)(iw, buf); | ||
305 | read_unlock(&dev_base_lock); | ||
306 | |||
307 | return ret; | ||
308 | } | ||
309 | |||
310 | /* show function template for wireless fields */ | ||
311 | #define WIRELESS_SHOW(name, field, format_string) \ | ||
312 | static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \ | ||
313 | { \ | ||
314 | return sprintf(buf, format_string, iw->field); \ | ||
315 | } \ | ||
316 | static ssize_t show_iw_##name(struct class_device *cd, char *buf) \ | ||
317 | { \ | ||
318 | return wireless_show(cd, buf, format_iw_##name); \ | ||
319 | } \ | ||
320 | static CLASS_DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL) | ||
321 | |||
322 | WIRELESS_SHOW(status, status, fmt_hex); | ||
323 | WIRELESS_SHOW(link, qual.qual, fmt_dec); | ||
324 | WIRELESS_SHOW(level, qual.level, fmt_dec); | ||
325 | WIRELESS_SHOW(noise, qual.noise, fmt_dec); | ||
326 | WIRELESS_SHOW(nwid, discard.nwid, fmt_dec); | ||
327 | WIRELESS_SHOW(crypt, discard.code, fmt_dec); | ||
328 | WIRELESS_SHOW(fragment, discard.fragment, fmt_dec); | ||
329 | WIRELESS_SHOW(misc, discard.misc, fmt_dec); | ||
330 | WIRELESS_SHOW(retries, discard.retries, fmt_dec); | ||
331 | WIRELESS_SHOW(beacon, miss.beacon, fmt_dec); | ||
332 | |||
333 | static struct attribute *wireless_attrs[] = { | ||
334 | &class_device_attr_status.attr, | ||
335 | &class_device_attr_link.attr, | ||
336 | &class_device_attr_level.attr, | ||
337 | &class_device_attr_noise.attr, | ||
338 | &class_device_attr_nwid.attr, | ||
339 | &class_device_attr_crypt.attr, | ||
340 | &class_device_attr_fragment.attr, | ||
341 | &class_device_attr_retries.attr, | ||
342 | &class_device_attr_misc.attr, | ||
343 | &class_device_attr_beacon.attr, | ||
344 | NULL | ||
345 | }; | ||
346 | |||
347 | static struct attribute_group wireless_group = { | ||
348 | .name = "wireless", | ||
349 | .attrs = wireless_attrs, | ||
350 | }; | ||
351 | #endif | ||
352 | |||
353 | #ifdef CONFIG_HOTPLUG | ||
354 | static int netdev_hotplug(struct class_device *cd, char **envp, | ||
355 | int num_envp, char *buf, int size) | ||
356 | { | ||
357 | struct net_device *dev = to_net_dev(cd); | ||
358 | int i = 0; | ||
359 | int n; | ||
360 | |||
361 | /* pass interface in env to hotplug. */ | ||
362 | envp[i++] = buf; | ||
363 | n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1; | ||
364 | buf += n; | ||
365 | size -= n; | ||
366 | |||
367 | if ((size <= 0) || (i >= num_envp)) | ||
368 | return -ENOMEM; | ||
369 | |||
370 | envp[i] = NULL; | ||
371 | return 0; | ||
372 | } | ||
373 | #endif | ||
374 | |||
375 | /* | ||
376 | * netdev_release -- destroy and free a dead device. | ||
377 | * Called when last reference to class_device kobject is gone. | ||
378 | */ | ||
379 | static void netdev_release(struct class_device *cd) | ||
380 | { | ||
381 | struct net_device *dev | ||
382 | = container_of(cd, struct net_device, class_dev); | ||
383 | |||
384 | BUG_ON(dev->reg_state != NETREG_RELEASED); | ||
385 | |||
386 | kfree((char *)dev - dev->padded); | ||
387 | } | ||
388 | |||
389 | static struct class net_class = { | ||
390 | .name = "net", | ||
391 | .release = netdev_release, | ||
392 | #ifdef CONFIG_HOTPLUG | ||
393 | .hotplug = netdev_hotplug, | ||
394 | #endif | ||
395 | }; | ||
396 | |||
397 | void netdev_unregister_sysfs(struct net_device * net) | ||
398 | { | ||
399 | struct class_device * class_dev = &(net->class_dev); | ||
400 | |||
401 | if (net->get_stats) | ||
402 | sysfs_remove_group(&class_dev->kobj, &netstat_group); | ||
403 | |||
404 | #ifdef WIRELESS_EXT | ||
405 | if (net->get_wireless_stats) | ||
406 | sysfs_remove_group(&class_dev->kobj, &wireless_group); | ||
407 | #endif | ||
408 | class_device_del(class_dev); | ||
409 | |||
410 | } | ||
411 | |||
412 | /* Create sysfs entries for network device. */ | ||
413 | int netdev_register_sysfs(struct net_device *net) | ||
414 | { | ||
415 | struct class_device *class_dev = &(net->class_dev); | ||
416 | int i; | ||
417 | struct class_device_attribute *attr; | ||
418 | int ret; | ||
419 | |||
420 | class_dev->class = &net_class; | ||
421 | class_dev->class_data = net; | ||
422 | |||
423 | strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE); | ||
424 | if ((ret = class_device_register(class_dev))) | ||
425 | goto out; | ||
426 | |||
427 | for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) { | ||
428 | if ((ret = class_device_create_file(class_dev, attr))) | ||
429 | goto out_unreg; | ||
430 | } | ||
431 | |||
432 | |||
433 | if (net->get_stats && | ||
434 | (ret = sysfs_create_group(&class_dev->kobj, &netstat_group))) | ||
435 | goto out_unreg; | ||
436 | |||
437 | #ifdef WIRELESS_EXT | ||
438 | if (net->get_wireless_stats && | ||
439 | (ret = sysfs_create_group(&class_dev->kobj, &wireless_group))) | ||
440 | goto out_cleanup; | ||
441 | |||
442 | return 0; | ||
443 | out_cleanup: | ||
444 | if (net->get_stats) | ||
445 | sysfs_remove_group(&class_dev->kobj, &netstat_group); | ||
446 | #else | ||
447 | return 0; | ||
448 | #endif | ||
449 | |||
450 | out_unreg: | ||
451 | printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n", | ||
452 | net->name, ret); | ||
453 | class_device_unregister(class_dev); | ||
454 | out: | ||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | int netdev_sysfs_init(void) | ||
459 | { | ||
460 | return class_register(&net_class); | ||
461 | } | ||
diff --git a/net/core/netfilter.c b/net/core/netfilter.c new file mode 100644 index 000000000000..e51cfa46950c --- /dev/null +++ b/net/core/netfilter.c | |||
@@ -0,0 +1,799 @@ | |||
1 | /* netfilter.c: look after the filters for various protocols. | ||
2 | * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. | ||
3 | * | ||
4 | * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any | ||
5 | * way. | ||
6 | * | ||
7 | * Rusty Russell (C)2000 -- This code is GPL. | ||
8 | * | ||
9 | * February 2000: Modified by James Morris to have 1 queue per protocol. | ||
10 | * 15-Mar-2000: Added NF_REPEAT --RR. | ||
11 | * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. | ||
12 | */ | ||
13 | #include <linux/config.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/netfilter.h> | ||
16 | #include <net/protocol.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/wait.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | #include <linux/if.h> | ||
23 | #include <linux/netdevice.h> | ||
24 | #include <linux/inetdevice.h> | ||
25 | #include <linux/tcp.h> | ||
26 | #include <linux/udp.h> | ||
27 | #include <linux/icmp.h> | ||
28 | #include <net/sock.h> | ||
29 | #include <net/route.h> | ||
30 | #include <linux/ip.h> | ||
31 | |||
32 | /* In this code, we can be waiting indefinitely for userspace to | ||
33 | * service a packet if a hook returns NF_QUEUE. We could keep a count | ||
34 | * of skbuffs queued for userspace, and not deregister a hook unless | ||
35 | * this is zero, but that sucks. Now, we simply check when the | ||
36 | * packets come back: if the hook is gone, the packet is discarded. */ | ||
37 | #ifdef CONFIG_NETFILTER_DEBUG | ||
38 | #define NFDEBUG(format, args...) printk(format , ## args) | ||
39 | #else | ||
40 | #define NFDEBUG(format, args...) | ||
41 | #endif | ||
42 | |||
43 | /* Sockopts only registered and called from user context, so | ||
44 | net locking would be overkill. Also, [gs]etsockopt calls may | ||
45 | sleep. */ | ||
46 | static DECLARE_MUTEX(nf_sockopt_mutex); | ||
47 | |||
48 | struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; | ||
49 | static LIST_HEAD(nf_sockopts); | ||
50 | static DEFINE_SPINLOCK(nf_hook_lock); | ||
51 | |||
52 | /* | ||
53 | * A queue handler may be registered for each protocol. Each is protected by | ||
54 | * long term mutex. The handler must provide an an outfn() to accept packets | ||
55 | * for queueing and must reinject all packets it receives, no matter what. | ||
56 | */ | ||
57 | static struct nf_queue_handler_t { | ||
58 | nf_queue_outfn_t outfn; | ||
59 | void *data; | ||
60 | } queue_handler[NPROTO]; | ||
61 | static DEFINE_RWLOCK(queue_handler_lock); | ||
62 | |||
63 | int nf_register_hook(struct nf_hook_ops *reg) | ||
64 | { | ||
65 | struct list_head *i; | ||
66 | |||
67 | spin_lock_bh(&nf_hook_lock); | ||
68 | list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { | ||
69 | if (reg->priority < ((struct nf_hook_ops *)i)->priority) | ||
70 | break; | ||
71 | } | ||
72 | list_add_rcu(®->list, i->prev); | ||
73 | spin_unlock_bh(&nf_hook_lock); | ||
74 | |||
75 | synchronize_net(); | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | void nf_unregister_hook(struct nf_hook_ops *reg) | ||
80 | { | ||
81 | spin_lock_bh(&nf_hook_lock); | ||
82 | list_del_rcu(®->list); | ||
83 | spin_unlock_bh(&nf_hook_lock); | ||
84 | |||
85 | synchronize_net(); | ||
86 | } | ||
87 | |||
88 | /* Do exclusive ranges overlap? */ | ||
89 | static inline int overlap(int min1, int max1, int min2, int max2) | ||
90 | { | ||
91 | return max1 > min2 && min1 < max2; | ||
92 | } | ||
93 | |||
94 | /* Functions to register sockopt ranges (exclusive). */ | ||
95 | int nf_register_sockopt(struct nf_sockopt_ops *reg) | ||
96 | { | ||
97 | struct list_head *i; | ||
98 | int ret = 0; | ||
99 | |||
100 | if (down_interruptible(&nf_sockopt_mutex) != 0) | ||
101 | return -EINTR; | ||
102 | |||
103 | list_for_each(i, &nf_sockopts) { | ||
104 | struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; | ||
105 | if (ops->pf == reg->pf | ||
106 | && (overlap(ops->set_optmin, ops->set_optmax, | ||
107 | reg->set_optmin, reg->set_optmax) | ||
108 | || overlap(ops->get_optmin, ops->get_optmax, | ||
109 | reg->get_optmin, reg->get_optmax))) { | ||
110 | NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", | ||
111 | ops->set_optmin, ops->set_optmax, | ||
112 | ops->get_optmin, ops->get_optmax, | ||
113 | reg->set_optmin, reg->set_optmax, | ||
114 | reg->get_optmin, reg->get_optmax); | ||
115 | ret = -EBUSY; | ||
116 | goto out; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | list_add(®->list, &nf_sockopts); | ||
121 | out: | ||
122 | up(&nf_sockopt_mutex); | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | void nf_unregister_sockopt(struct nf_sockopt_ops *reg) | ||
127 | { | ||
128 | /* No point being interruptible: we're probably in cleanup_module() */ | ||
129 | restart: | ||
130 | down(&nf_sockopt_mutex); | ||
131 | if (reg->use != 0) { | ||
132 | /* To be woken by nf_sockopt call... */ | ||
133 | /* FIXME: Stuart Young's name appears gratuitously. */ | ||
134 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
135 | reg->cleanup_task = current; | ||
136 | up(&nf_sockopt_mutex); | ||
137 | schedule(); | ||
138 | goto restart; | ||
139 | } | ||
140 | list_del(®->list); | ||
141 | up(&nf_sockopt_mutex); | ||
142 | } | ||
143 | |||
144 | #ifdef CONFIG_NETFILTER_DEBUG | ||
145 | #include <net/ip.h> | ||
146 | #include <net/tcp.h> | ||
147 | #include <linux/netfilter_ipv4.h> | ||
148 | |||
149 | static void debug_print_hooks_ip(unsigned int nf_debug) | ||
150 | { | ||
151 | if (nf_debug & (1 << NF_IP_PRE_ROUTING)) { | ||
152 | printk("PRE_ROUTING "); | ||
153 | nf_debug ^= (1 << NF_IP_PRE_ROUTING); | ||
154 | } | ||
155 | if (nf_debug & (1 << NF_IP_LOCAL_IN)) { | ||
156 | printk("LOCAL_IN "); | ||
157 | nf_debug ^= (1 << NF_IP_LOCAL_IN); | ||
158 | } | ||
159 | if (nf_debug & (1 << NF_IP_FORWARD)) { | ||
160 | printk("FORWARD "); | ||
161 | nf_debug ^= (1 << NF_IP_FORWARD); | ||
162 | } | ||
163 | if (nf_debug & (1 << NF_IP_LOCAL_OUT)) { | ||
164 | printk("LOCAL_OUT "); | ||
165 | nf_debug ^= (1 << NF_IP_LOCAL_OUT); | ||
166 | } | ||
167 | if (nf_debug & (1 << NF_IP_POST_ROUTING)) { | ||
168 | printk("POST_ROUTING "); | ||
169 | nf_debug ^= (1 << NF_IP_POST_ROUTING); | ||
170 | } | ||
171 | if (nf_debug) | ||
172 | printk("Crap bits: 0x%04X", nf_debug); | ||
173 | printk("\n"); | ||
174 | } | ||
175 | |||
176 | static void nf_dump_skb(int pf, struct sk_buff *skb) | ||
177 | { | ||
178 | printk("skb: pf=%i %s dev=%s len=%u\n", | ||
179 | pf, | ||
180 | skb->sk ? "(owned)" : "(unowned)", | ||
181 | skb->dev ? skb->dev->name : "(no dev)", | ||
182 | skb->len); | ||
183 | switch (pf) { | ||
184 | case PF_INET: { | ||
185 | const struct iphdr *ip = skb->nh.iph; | ||
186 | __u32 *opt = (__u32 *) (ip + 1); | ||
187 | int opti; | ||
188 | __u16 src_port = 0, dst_port = 0; | ||
189 | |||
190 | if (ip->protocol == IPPROTO_TCP | ||
191 | || ip->protocol == IPPROTO_UDP) { | ||
192 | struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); | ||
193 | src_port = ntohs(tcp->source); | ||
194 | dst_port = ntohs(tcp->dest); | ||
195 | } | ||
196 | |||
197 | printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu" | ||
198 | " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", | ||
199 | ip->protocol, NIPQUAD(ip->saddr), | ||
200 | src_port, NIPQUAD(ip->daddr), | ||
201 | dst_port, | ||
202 | ntohs(ip->tot_len), ip->tos, ntohs(ip->id), | ||
203 | ntohs(ip->frag_off), ip->ttl); | ||
204 | |||
205 | for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) | ||
206 | printk(" O=0x%8.8X", *opt++); | ||
207 | printk("\n"); | ||
208 | } | ||
209 | } | ||
210 | } | ||
211 | |||
212 | void nf_debug_ip_local_deliver(struct sk_buff *skb) | ||
213 | { | ||
214 | /* If it's a loopback packet, it must have come through | ||
215 | * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and | ||
216 | * NF_IP_LOCAL_IN. Otherwise, must have gone through | ||
217 | * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */ | ||
218 | if (!skb->dev) { | ||
219 | printk("ip_local_deliver: skb->dev is NULL.\n"); | ||
220 | } | ||
221 | else if (strcmp(skb->dev->name, "lo") == 0) { | ||
222 | if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | ||
223 | | (1 << NF_IP_POST_ROUTING) | ||
224 | | (1 << NF_IP_PRE_ROUTING) | ||
225 | | (1 << NF_IP_LOCAL_IN))) { | ||
226 | printk("ip_local_deliver: bad loopback skb: "); | ||
227 | debug_print_hooks_ip(skb->nf_debug); | ||
228 | nf_dump_skb(PF_INET, skb); | ||
229 | } | ||
230 | } | ||
231 | else { | ||
232 | if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING) | ||
233 | | (1<<NF_IP_LOCAL_IN))) { | ||
234 | printk("ip_local_deliver: bad non-lo skb: "); | ||
235 | debug_print_hooks_ip(skb->nf_debug); | ||
236 | nf_dump_skb(PF_INET, skb); | ||
237 | } | ||
238 | } | ||
239 | } | ||
240 | |||
241 | void nf_debug_ip_loopback_xmit(struct sk_buff *newskb) | ||
242 | { | ||
243 | if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | ||
244 | | (1 << NF_IP_POST_ROUTING))) { | ||
245 | printk("ip_dev_loopback_xmit: bad owned skb = %p: ", | ||
246 | newskb); | ||
247 | debug_print_hooks_ip(newskb->nf_debug); | ||
248 | nf_dump_skb(PF_INET, newskb); | ||
249 | } | ||
250 | /* Clear to avoid confusing input check */ | ||
251 | newskb->nf_debug = 0; | ||
252 | } | ||
253 | |||
254 | void nf_debug_ip_finish_output2(struct sk_buff *skb) | ||
255 | { | ||
256 | /* If it's owned, it must have gone through the | ||
257 | * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING. | ||
258 | * Otherwise, must have gone through | ||
259 | * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING. | ||
260 | */ | ||
261 | if (skb->sk) { | ||
262 | if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | ||
263 | | (1 << NF_IP_POST_ROUTING))) { | ||
264 | printk("ip_finish_output: bad owned skb = %p: ", skb); | ||
265 | debug_print_hooks_ip(skb->nf_debug); | ||
266 | nf_dump_skb(PF_INET, skb); | ||
267 | } | ||
268 | } else { | ||
269 | if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) | ||
270 | | (1 << NF_IP_FORWARD) | ||
271 | | (1 << NF_IP_POST_ROUTING))) { | ||
272 | /* Fragments, entunnelled packets, TCP RSTs | ||
273 | generated by ipt_REJECT will have no | ||
274 | owners, but still may be local */ | ||
275 | if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | ||
276 | | (1 << NF_IP_POST_ROUTING))){ | ||
277 | printk("ip_finish_output:" | ||
278 | " bad unowned skb = %p: ",skb); | ||
279 | debug_print_hooks_ip(skb->nf_debug); | ||
280 | nf_dump_skb(PF_INET, skb); | ||
281 | } | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | #endif /*CONFIG_NETFILTER_DEBUG*/ | ||
286 | |||
287 | /* Call get/setsockopt() */ | ||
288 | static int nf_sockopt(struct sock *sk, int pf, int val, | ||
289 | char __user *opt, int *len, int get) | ||
290 | { | ||
291 | struct list_head *i; | ||
292 | struct nf_sockopt_ops *ops; | ||
293 | int ret; | ||
294 | |||
295 | if (down_interruptible(&nf_sockopt_mutex) != 0) | ||
296 | return -EINTR; | ||
297 | |||
298 | list_for_each(i, &nf_sockopts) { | ||
299 | ops = (struct nf_sockopt_ops *)i; | ||
300 | if (ops->pf == pf) { | ||
301 | if (get) { | ||
302 | if (val >= ops->get_optmin | ||
303 | && val < ops->get_optmax) { | ||
304 | ops->use++; | ||
305 | up(&nf_sockopt_mutex); | ||
306 | ret = ops->get(sk, val, opt, len); | ||
307 | goto out; | ||
308 | } | ||
309 | } else { | ||
310 | if (val >= ops->set_optmin | ||
311 | && val < ops->set_optmax) { | ||
312 | ops->use++; | ||
313 | up(&nf_sockopt_mutex); | ||
314 | ret = ops->set(sk, val, opt, *len); | ||
315 | goto out; | ||
316 | } | ||
317 | } | ||
318 | } | ||
319 | } | ||
320 | up(&nf_sockopt_mutex); | ||
321 | return -ENOPROTOOPT; | ||
322 | |||
323 | out: | ||
324 | down(&nf_sockopt_mutex); | ||
325 | ops->use--; | ||
326 | if (ops->cleanup_task) | ||
327 | wake_up_process(ops->cleanup_task); | ||
328 | up(&nf_sockopt_mutex); | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, | ||
333 | int len) | ||
334 | { | ||
335 | return nf_sockopt(sk, pf, val, opt, &len, 0); | ||
336 | } | ||
337 | |||
338 | int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) | ||
339 | { | ||
340 | return nf_sockopt(sk, pf, val, opt, len, 1); | ||
341 | } | ||
342 | |||
343 | static unsigned int nf_iterate(struct list_head *head, | ||
344 | struct sk_buff **skb, | ||
345 | int hook, | ||
346 | const struct net_device *indev, | ||
347 | const struct net_device *outdev, | ||
348 | struct list_head **i, | ||
349 | int (*okfn)(struct sk_buff *), | ||
350 | int hook_thresh) | ||
351 | { | ||
352 | unsigned int verdict; | ||
353 | |||
354 | /* | ||
355 | * The caller must not block between calls to this | ||
356 | * function because of risk of continuing from deleted element. | ||
357 | */ | ||
358 | list_for_each_continue_rcu(*i, head) { | ||
359 | struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; | ||
360 | |||
361 | if (hook_thresh > elem->priority) | ||
362 | continue; | ||
363 | |||
364 | /* Optimization: we don't need to hold module | ||
365 | reference here, since function can't sleep. --RR */ | ||
366 | verdict = elem->hook(hook, skb, indev, outdev, okfn); | ||
367 | if (verdict != NF_ACCEPT) { | ||
368 | #ifdef CONFIG_NETFILTER_DEBUG | ||
369 | if (unlikely(verdict > NF_MAX_VERDICT)) { | ||
370 | NFDEBUG("Evil return from %p(%u).\n", | ||
371 | elem->hook, hook); | ||
372 | continue; | ||
373 | } | ||
374 | #endif | ||
375 | if (verdict != NF_REPEAT) | ||
376 | return verdict; | ||
377 | *i = (*i)->prev; | ||
378 | } | ||
379 | } | ||
380 | return NF_ACCEPT; | ||
381 | } | ||
382 | |||
383 | int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) | ||
384 | { | ||
385 | int ret; | ||
386 | |||
387 | write_lock_bh(&queue_handler_lock); | ||
388 | if (queue_handler[pf].outfn) | ||
389 | ret = -EBUSY; | ||
390 | else { | ||
391 | queue_handler[pf].outfn = outfn; | ||
392 | queue_handler[pf].data = data; | ||
393 | ret = 0; | ||
394 | } | ||
395 | write_unlock_bh(&queue_handler_lock); | ||
396 | |||
397 | return ret; | ||
398 | } | ||
399 | |||
400 | /* The caller must flush their queue before this */ | ||
401 | int nf_unregister_queue_handler(int pf) | ||
402 | { | ||
403 | write_lock_bh(&queue_handler_lock); | ||
404 | queue_handler[pf].outfn = NULL; | ||
405 | queue_handler[pf].data = NULL; | ||
406 | write_unlock_bh(&queue_handler_lock); | ||
407 | |||
408 | return 0; | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * Any packet that leaves via this function must come back | ||
413 | * through nf_reinject(). | ||
414 | */ | ||
415 | static int nf_queue(struct sk_buff *skb, | ||
416 | struct list_head *elem, | ||
417 | int pf, unsigned int hook, | ||
418 | struct net_device *indev, | ||
419 | struct net_device *outdev, | ||
420 | int (*okfn)(struct sk_buff *)) | ||
421 | { | ||
422 | int status; | ||
423 | struct nf_info *info; | ||
424 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
425 | struct net_device *physindev = NULL; | ||
426 | struct net_device *physoutdev = NULL; | ||
427 | #endif | ||
428 | |||
429 | /* QUEUE == DROP if noone is waiting, to be safe. */ | ||
430 | read_lock(&queue_handler_lock); | ||
431 | if (!queue_handler[pf].outfn) { | ||
432 | read_unlock(&queue_handler_lock); | ||
433 | kfree_skb(skb); | ||
434 | return 1; | ||
435 | } | ||
436 | |||
437 | info = kmalloc(sizeof(*info), GFP_ATOMIC); | ||
438 | if (!info) { | ||
439 | if (net_ratelimit()) | ||
440 | printk(KERN_ERR "OOM queueing packet %p\n", | ||
441 | skb); | ||
442 | read_unlock(&queue_handler_lock); | ||
443 | kfree_skb(skb); | ||
444 | return 1; | ||
445 | } | ||
446 | |||
447 | *info = (struct nf_info) { | ||
448 | (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; | ||
449 | |||
450 | /* If it's going away, ignore hook. */ | ||
451 | if (!try_module_get(info->elem->owner)) { | ||
452 | read_unlock(&queue_handler_lock); | ||
453 | kfree(info); | ||
454 | return 0; | ||
455 | } | ||
456 | |||
457 | /* Bump dev refs so they don't vanish while packet is out */ | ||
458 | if (indev) dev_hold(indev); | ||
459 | if (outdev) dev_hold(outdev); | ||
460 | |||
461 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
462 | if (skb->nf_bridge) { | ||
463 | physindev = skb->nf_bridge->physindev; | ||
464 | if (physindev) dev_hold(physindev); | ||
465 | physoutdev = skb->nf_bridge->physoutdev; | ||
466 | if (physoutdev) dev_hold(physoutdev); | ||
467 | } | ||
468 | #endif | ||
469 | |||
470 | status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); | ||
471 | read_unlock(&queue_handler_lock); | ||
472 | |||
473 | if (status < 0) { | ||
474 | /* James M doesn't say fuck enough. */ | ||
475 | if (indev) dev_put(indev); | ||
476 | if (outdev) dev_put(outdev); | ||
477 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
478 | if (physindev) dev_put(physindev); | ||
479 | if (physoutdev) dev_put(physoutdev); | ||
480 | #endif | ||
481 | module_put(info->elem->owner); | ||
482 | kfree(info); | ||
483 | kfree_skb(skb); | ||
484 | return 1; | ||
485 | } | ||
486 | return 1; | ||
487 | } | ||
488 | |||
489 | /* Returns 1 if okfn() needs to be executed by the caller, | ||
490 | * -EPERM for NF_DROP, 0 otherwise. */ | ||
491 | int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, | ||
492 | struct net_device *indev, | ||
493 | struct net_device *outdev, | ||
494 | int (*okfn)(struct sk_buff *), | ||
495 | int hook_thresh) | ||
496 | { | ||
497 | struct list_head *elem; | ||
498 | unsigned int verdict; | ||
499 | int ret = 0; | ||
500 | |||
501 | /* We may already have this, but read-locks nest anyway */ | ||
502 | rcu_read_lock(); | ||
503 | |||
504 | #ifdef CONFIG_NETFILTER_DEBUG | ||
505 | if (unlikely((*pskb)->nf_debug & (1 << hook))) { | ||
506 | printk("nf_hook: hook %i already set.\n", hook); | ||
507 | nf_dump_skb(pf, *pskb); | ||
508 | } | ||
509 | (*pskb)->nf_debug |= (1 << hook); | ||
510 | #endif | ||
511 | |||
512 | elem = &nf_hooks[pf][hook]; | ||
513 | next_hook: | ||
514 | verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, | ||
515 | outdev, &elem, okfn, hook_thresh); | ||
516 | if (verdict == NF_ACCEPT || verdict == NF_STOP) { | ||
517 | ret = 1; | ||
518 | goto unlock; | ||
519 | } else if (verdict == NF_DROP) { | ||
520 | kfree_skb(*pskb); | ||
521 | ret = -EPERM; | ||
522 | } else if (verdict == NF_QUEUE) { | ||
523 | NFDEBUG("nf_hook: Verdict = QUEUE.\n"); | ||
524 | if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) | ||
525 | goto next_hook; | ||
526 | } | ||
527 | unlock: | ||
528 | rcu_read_unlock(); | ||
529 | return ret; | ||
530 | } | ||
531 | |||
532 | void nf_reinject(struct sk_buff *skb, struct nf_info *info, | ||
533 | unsigned int verdict) | ||
534 | { | ||
535 | struct list_head *elem = &info->elem->list; | ||
536 | struct list_head *i; | ||
537 | |||
538 | rcu_read_lock(); | ||
539 | |||
540 | /* Release those devices we held, or Alexey will kill me. */ | ||
541 | if (info->indev) dev_put(info->indev); | ||
542 | if (info->outdev) dev_put(info->outdev); | ||
543 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
544 | if (skb->nf_bridge) { | ||
545 | if (skb->nf_bridge->physindev) | ||
546 | dev_put(skb->nf_bridge->physindev); | ||
547 | if (skb->nf_bridge->physoutdev) | ||
548 | dev_put(skb->nf_bridge->physoutdev); | ||
549 | } | ||
550 | #endif | ||
551 | |||
552 | /* Drop reference to owner of hook which queued us. */ | ||
553 | module_put(info->elem->owner); | ||
554 | |||
555 | list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { | ||
556 | if (i == elem) | ||
557 | break; | ||
558 | } | ||
559 | |||
560 | if (elem == &nf_hooks[info->pf][info->hook]) { | ||
561 | /* The module which sent it to userspace is gone. */ | ||
562 | NFDEBUG("%s: module disappeared, dropping packet.\n", | ||
563 | __FUNCTION__); | ||
564 | verdict = NF_DROP; | ||
565 | } | ||
566 | |||
567 | /* Continue traversal iff userspace said ok... */ | ||
568 | if (verdict == NF_REPEAT) { | ||
569 | elem = elem->prev; | ||
570 | verdict = NF_ACCEPT; | ||
571 | } | ||
572 | |||
573 | if (verdict == NF_ACCEPT) { | ||
574 | next_hook: | ||
575 | verdict = nf_iterate(&nf_hooks[info->pf][info->hook], | ||
576 | &skb, info->hook, | ||
577 | info->indev, info->outdev, &elem, | ||
578 | info->okfn, INT_MIN); | ||
579 | } | ||
580 | |||
581 | switch (verdict) { | ||
582 | case NF_ACCEPT: | ||
583 | info->okfn(skb); | ||
584 | break; | ||
585 | |||
586 | case NF_QUEUE: | ||
587 | if (!nf_queue(skb, elem, info->pf, info->hook, | ||
588 | info->indev, info->outdev, info->okfn)) | ||
589 | goto next_hook; | ||
590 | break; | ||
591 | } | ||
592 | rcu_read_unlock(); | ||
593 | |||
594 | if (verdict == NF_DROP) | ||
595 | kfree_skb(skb); | ||
596 | |||
597 | kfree(info); | ||
598 | return; | ||
599 | } | ||
600 | |||
601 | #ifdef CONFIG_INET | ||
602 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ | ||
603 | int ip_route_me_harder(struct sk_buff **pskb) | ||
604 | { | ||
605 | struct iphdr *iph = (*pskb)->nh.iph; | ||
606 | struct rtable *rt; | ||
607 | struct flowi fl = {}; | ||
608 | struct dst_entry *odst; | ||
609 | unsigned int hh_len; | ||
610 | |||
611 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause | ||
612 | * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. | ||
613 | */ | ||
614 | if (inet_addr_type(iph->saddr) == RTN_LOCAL) { | ||
615 | fl.nl_u.ip4_u.daddr = iph->daddr; | ||
616 | fl.nl_u.ip4_u.saddr = iph->saddr; | ||
617 | fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); | ||
618 | fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; | ||
619 | #ifdef CONFIG_IP_ROUTE_FWMARK | ||
620 | fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; | ||
621 | #endif | ||
622 | fl.proto = iph->protocol; | ||
623 | if (ip_route_output_key(&rt, &fl) != 0) | ||
624 | return -1; | ||
625 | |||
626 | /* Drop old route. */ | ||
627 | dst_release((*pskb)->dst); | ||
628 | (*pskb)->dst = &rt->u.dst; | ||
629 | } else { | ||
630 | /* non-local src, find valid iif to satisfy | ||
631 | * rp-filter when calling ip_route_input. */ | ||
632 | fl.nl_u.ip4_u.daddr = iph->saddr; | ||
633 | if (ip_route_output_key(&rt, &fl) != 0) | ||
634 | return -1; | ||
635 | |||
636 | odst = (*pskb)->dst; | ||
637 | if (ip_route_input(*pskb, iph->daddr, iph->saddr, | ||
638 | RT_TOS(iph->tos), rt->u.dst.dev) != 0) { | ||
639 | dst_release(&rt->u.dst); | ||
640 | return -1; | ||
641 | } | ||
642 | dst_release(&rt->u.dst); | ||
643 | dst_release(odst); | ||
644 | } | ||
645 | |||
646 | if ((*pskb)->dst->error) | ||
647 | return -1; | ||
648 | |||
649 | /* Change in oif may mean change in hh_len. */ | ||
650 | hh_len = (*pskb)->dst->dev->hard_header_len; | ||
651 | if (skb_headroom(*pskb) < hh_len) { | ||
652 | struct sk_buff *nskb; | ||
653 | |||
654 | nskb = skb_realloc_headroom(*pskb, hh_len); | ||
655 | if (!nskb) | ||
656 | return -1; | ||
657 | if ((*pskb)->sk) | ||
658 | skb_set_owner_w(nskb, (*pskb)->sk); | ||
659 | kfree_skb(*pskb); | ||
660 | *pskb = nskb; | ||
661 | } | ||
662 | |||
663 | return 0; | ||
664 | } | ||
665 | EXPORT_SYMBOL(ip_route_me_harder); | ||
666 | |||
667 | int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) | ||
668 | { | ||
669 | struct sk_buff *nskb; | ||
670 | |||
671 | if (writable_len > (*pskb)->len) | ||
672 | return 0; | ||
673 | |||
674 | /* Not exclusive use of packet? Must copy. */ | ||
675 | if (skb_shared(*pskb) || skb_cloned(*pskb)) | ||
676 | goto copy_skb; | ||
677 | |||
678 | return pskb_may_pull(*pskb, writable_len); | ||
679 | |||
680 | copy_skb: | ||
681 | nskb = skb_copy(*pskb, GFP_ATOMIC); | ||
682 | if (!nskb) | ||
683 | return 0; | ||
684 | BUG_ON(skb_is_nonlinear(nskb)); | ||
685 | |||
686 | /* Rest of kernel will get very unhappy if we pass it a | ||
687 | suddenly-orphaned skbuff */ | ||
688 | if ((*pskb)->sk) | ||
689 | skb_set_owner_w(nskb, (*pskb)->sk); | ||
690 | kfree_skb(*pskb); | ||
691 | *pskb = nskb; | ||
692 | return 1; | ||
693 | } | ||
694 | EXPORT_SYMBOL(skb_ip_make_writable); | ||
695 | #endif /*CONFIG_INET*/ | ||
696 | |||
697 | /* Internal logging interface, which relies on the real | ||
698 | LOG target modules */ | ||
699 | |||
700 | #define NF_LOG_PREFIXLEN 128 | ||
701 | |||
702 | static nf_logfn *nf_logging[NPROTO]; /* = NULL */ | ||
703 | static int reported = 0; | ||
704 | static DEFINE_SPINLOCK(nf_log_lock); | ||
705 | |||
706 | int nf_log_register(int pf, nf_logfn *logfn) | ||
707 | { | ||
708 | int ret = -EBUSY; | ||
709 | |||
710 | /* Any setup of logging members must be done before | ||
711 | * substituting pointer. */ | ||
712 | spin_lock(&nf_log_lock); | ||
713 | if (!nf_logging[pf]) { | ||
714 | rcu_assign_pointer(nf_logging[pf], logfn); | ||
715 | ret = 0; | ||
716 | } | ||
717 | spin_unlock(&nf_log_lock); | ||
718 | return ret; | ||
719 | } | ||
720 | |||
721 | void nf_log_unregister(int pf, nf_logfn *logfn) | ||
722 | { | ||
723 | spin_lock(&nf_log_lock); | ||
724 | if (nf_logging[pf] == logfn) | ||
725 | nf_logging[pf] = NULL; | ||
726 | spin_unlock(&nf_log_lock); | ||
727 | |||
728 | /* Give time to concurrent readers. */ | ||
729 | synchronize_net(); | ||
730 | } | ||
731 | |||
732 | void nf_log_packet(int pf, | ||
733 | unsigned int hooknum, | ||
734 | const struct sk_buff *skb, | ||
735 | const struct net_device *in, | ||
736 | const struct net_device *out, | ||
737 | const char *fmt, ...) | ||
738 | { | ||
739 | va_list args; | ||
740 | char prefix[NF_LOG_PREFIXLEN]; | ||
741 | nf_logfn *logfn; | ||
742 | |||
743 | rcu_read_lock(); | ||
744 | logfn = rcu_dereference(nf_logging[pf]); | ||
745 | if (logfn) { | ||
746 | va_start(args, fmt); | ||
747 | vsnprintf(prefix, sizeof(prefix), fmt, args); | ||
748 | va_end(args); | ||
749 | /* We must read logging before nf_logfn[pf] */ | ||
750 | logfn(hooknum, skb, in, out, prefix); | ||
751 | } else if (!reported) { | ||
752 | printk(KERN_WARNING "nf_log_packet: can\'t log yet, " | ||
753 | "no backend logging module loaded in!\n"); | ||
754 | reported++; | ||
755 | } | ||
756 | rcu_read_unlock(); | ||
757 | } | ||
758 | EXPORT_SYMBOL(nf_log_register); | ||
759 | EXPORT_SYMBOL(nf_log_unregister); | ||
760 | EXPORT_SYMBOL(nf_log_packet); | ||
761 | |||
762 | /* This does not belong here, but locally generated errors need it if connection | ||
763 | tracking in use: without this, connection may not be in hash table, and hence | ||
764 | manufactured ICMP or RST packets will not be associated with it. */ | ||
765 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); | ||
766 | |||
767 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) | ||
768 | { | ||
769 | void (*attach)(struct sk_buff *, struct sk_buff *); | ||
770 | |||
771 | if (skb->nfct && (attach = ip_ct_attach) != NULL) { | ||
772 | mb(); /* Just to be sure: must be read before executing this */ | ||
773 | attach(new, skb); | ||
774 | } | ||
775 | } | ||
776 | |||
777 | void __init netfilter_init(void) | ||
778 | { | ||
779 | int i, h; | ||
780 | |||
781 | for (i = 0; i < NPROTO; i++) { | ||
782 | for (h = 0; h < NF_MAX_HOOKS; h++) | ||
783 | INIT_LIST_HEAD(&nf_hooks[i][h]); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | EXPORT_SYMBOL(ip_ct_attach); | ||
788 | EXPORT_SYMBOL(nf_ct_attach); | ||
789 | EXPORT_SYMBOL(nf_getsockopt); | ||
790 | EXPORT_SYMBOL(nf_hook_slow); | ||
791 | EXPORT_SYMBOL(nf_hooks); | ||
792 | EXPORT_SYMBOL(nf_register_hook); | ||
793 | EXPORT_SYMBOL(nf_register_queue_handler); | ||
794 | EXPORT_SYMBOL(nf_register_sockopt); | ||
795 | EXPORT_SYMBOL(nf_reinject); | ||
796 | EXPORT_SYMBOL(nf_setsockopt); | ||
797 | EXPORT_SYMBOL(nf_unregister_hook); | ||
798 | EXPORT_SYMBOL(nf_unregister_queue_handler); | ||
799 | EXPORT_SYMBOL(nf_unregister_sockopt); | ||
diff --git a/net/core/netpoll.c b/net/core/netpoll.c new file mode 100644 index 000000000000..a119696d5521 --- /dev/null +++ b/net/core/netpoll.c | |||
@@ -0,0 +1,735 @@ | |||
1 | /* | ||
2 | * Common framework for low-level network console, dump, and debugger code | ||
3 | * | ||
4 | * Sep 8 2003 Matt Mackall <mpm@selenic.com> | ||
5 | * | ||
6 | * based on the netconsole code from: | ||
7 | * | ||
8 | * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> | ||
9 | * Copyright (C) 2002 Red Hat, Inc. | ||
10 | */ | ||
11 | |||
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/netdevice.h> | ||
14 | #include <linux/etherdevice.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/inetdevice.h> | ||
17 | #include <linux/inet.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/netpoll.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/delay.h> | ||
22 | #include <linux/rcupdate.h> | ||
23 | #include <linux/workqueue.h> | ||
24 | #include <net/tcp.h> | ||
25 | #include <net/udp.h> | ||
26 | #include <asm/unaligned.h> | ||
27 | |||
28 | /* | ||
29 | * We maintain a small pool of fully-sized skbs, to make sure the | ||
30 | * message gets out even in extreme OOM situations. | ||
31 | */ | ||
32 | |||
33 | #define MAX_UDP_CHUNK 1460 | ||
34 | #define MAX_SKBS 32 | ||
35 | #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) | ||
36 | |||
37 | static DEFINE_SPINLOCK(skb_list_lock); | ||
38 | static int nr_skbs; | ||
39 | static struct sk_buff *skbs; | ||
40 | |||
41 | static DEFINE_SPINLOCK(queue_lock); | ||
42 | static int queue_depth; | ||
43 | static struct sk_buff *queue_head, *queue_tail; | ||
44 | |||
45 | static atomic_t trapped; | ||
46 | |||
47 | #define NETPOLL_RX_ENABLED 1 | ||
48 | #define NETPOLL_RX_DROP 2 | ||
49 | |||
50 | #define MAX_SKB_SIZE \ | ||
51 | (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ | ||
52 | sizeof(struct iphdr) + sizeof(struct ethhdr)) | ||
53 | |||
54 | static void zap_completion_queue(void); | ||
55 | |||
56 | static void queue_process(void *p) | ||
57 | { | ||
58 | unsigned long flags; | ||
59 | struct sk_buff *skb; | ||
60 | |||
61 | while (queue_head) { | ||
62 | spin_lock_irqsave(&queue_lock, flags); | ||
63 | |||
64 | skb = queue_head; | ||
65 | queue_head = skb->next; | ||
66 | if (skb == queue_tail) | ||
67 | queue_head = NULL; | ||
68 | |||
69 | queue_depth--; | ||
70 | |||
71 | spin_unlock_irqrestore(&queue_lock, flags); | ||
72 | |||
73 | dev_queue_xmit(skb); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | static DECLARE_WORK(send_queue, queue_process, NULL); | ||
78 | |||
79 | void netpoll_queue(struct sk_buff *skb) | ||
80 | { | ||
81 | unsigned long flags; | ||
82 | |||
83 | if (queue_depth == MAX_QUEUE_DEPTH) { | ||
84 | __kfree_skb(skb); | ||
85 | return; | ||
86 | } | ||
87 | |||
88 | spin_lock_irqsave(&queue_lock, flags); | ||
89 | if (!queue_head) | ||
90 | queue_head = skb; | ||
91 | else | ||
92 | queue_tail->next = skb; | ||
93 | queue_tail = skb; | ||
94 | queue_depth++; | ||
95 | spin_unlock_irqrestore(&queue_lock, flags); | ||
96 | |||
97 | schedule_work(&send_queue); | ||
98 | } | ||
99 | |||
100 | static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, | ||
101 | unsigned short ulen, u32 saddr, u32 daddr) | ||
102 | { | ||
103 | if (uh->check == 0) | ||
104 | return 0; | ||
105 | |||
106 | if (skb->ip_summed == CHECKSUM_HW) | ||
107 | return csum_tcpudp_magic( | ||
108 | saddr, daddr, ulen, IPPROTO_UDP, skb->csum); | ||
109 | |||
110 | skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); | ||
111 | |||
112 | return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Check whether delayed processing was scheduled for our NIC. If so, | ||
117 | * we attempt to grab the poll lock and use ->poll() to pump the card. | ||
118 | * If this fails, either we've recursed in ->poll() or it's already | ||
119 | * running on another CPU. | ||
120 | * | ||
121 | * Note: we don't mask interrupts with this lock because we're using | ||
122 | * trylock here and interrupts are already disabled in the softirq | ||
123 | * case. Further, we test the poll_owner to avoid recursion on UP | ||
124 | * systems where the lock doesn't exist. | ||
125 | * | ||
126 | * In cases where there is bi-directional communications, reading only | ||
127 | * one message at a time can lead to packets being dropped by the | ||
128 | * network adapter, forcing superfluous retries and possibly timeouts. | ||
129 | * Thus, we set our budget to greater than 1. | ||
130 | */ | ||
131 | static void poll_napi(struct netpoll *np) | ||
132 | { | ||
133 | int budget = 16; | ||
134 | |||
135 | if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && | ||
136 | np->poll_owner != smp_processor_id() && | ||
137 | spin_trylock(&np->poll_lock)) { | ||
138 | np->rx_flags |= NETPOLL_RX_DROP; | ||
139 | atomic_inc(&trapped); | ||
140 | |||
141 | np->dev->poll(np->dev, &budget); | ||
142 | |||
143 | atomic_dec(&trapped); | ||
144 | np->rx_flags &= ~NETPOLL_RX_DROP; | ||
145 | spin_unlock(&np->poll_lock); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | void netpoll_poll(struct netpoll *np) | ||
150 | { | ||
151 | if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) | ||
152 | return; | ||
153 | |||
154 | /* Process pending work on NIC */ | ||
155 | np->dev->poll_controller(np->dev); | ||
156 | if (np->dev->poll) | ||
157 | poll_napi(np); | ||
158 | |||
159 | zap_completion_queue(); | ||
160 | } | ||
161 | |||
162 | static void refill_skbs(void) | ||
163 | { | ||
164 | struct sk_buff *skb; | ||
165 | unsigned long flags; | ||
166 | |||
167 | spin_lock_irqsave(&skb_list_lock, flags); | ||
168 | while (nr_skbs < MAX_SKBS) { | ||
169 | skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); | ||
170 | if (!skb) | ||
171 | break; | ||
172 | |||
173 | skb->next = skbs; | ||
174 | skbs = skb; | ||
175 | nr_skbs++; | ||
176 | } | ||
177 | spin_unlock_irqrestore(&skb_list_lock, flags); | ||
178 | } | ||
179 | |||
180 | static void zap_completion_queue(void) | ||
181 | { | ||
182 | unsigned long flags; | ||
183 | struct softnet_data *sd = &get_cpu_var(softnet_data); | ||
184 | |||
185 | if (sd->completion_queue) { | ||
186 | struct sk_buff *clist; | ||
187 | |||
188 | local_irq_save(flags); | ||
189 | clist = sd->completion_queue; | ||
190 | sd->completion_queue = NULL; | ||
191 | local_irq_restore(flags); | ||
192 | |||
193 | while (clist != NULL) { | ||
194 | struct sk_buff *skb = clist; | ||
195 | clist = clist->next; | ||
196 | if(skb->destructor) | ||
197 | dev_kfree_skb_any(skb); /* put this one back */ | ||
198 | else | ||
199 | __kfree_skb(skb); | ||
200 | } | ||
201 | } | ||
202 | |||
203 | put_cpu_var(softnet_data); | ||
204 | } | ||
205 | |||
206 | static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve) | ||
207 | { | ||
208 | int once = 1, count = 0; | ||
209 | unsigned long flags; | ||
210 | struct sk_buff *skb = NULL; | ||
211 | |||
212 | zap_completion_queue(); | ||
213 | repeat: | ||
214 | if (nr_skbs < MAX_SKBS) | ||
215 | refill_skbs(); | ||
216 | |||
217 | skb = alloc_skb(len, GFP_ATOMIC); | ||
218 | |||
219 | if (!skb) { | ||
220 | spin_lock_irqsave(&skb_list_lock, flags); | ||
221 | skb = skbs; | ||
222 | if (skb) { | ||
223 | skbs = skb->next; | ||
224 | skb->next = NULL; | ||
225 | nr_skbs--; | ||
226 | } | ||
227 | spin_unlock_irqrestore(&skb_list_lock, flags); | ||
228 | } | ||
229 | |||
230 | if(!skb) { | ||
231 | count++; | ||
232 | if (once && (count == 1000000)) { | ||
233 | printk("out of netpoll skbs!\n"); | ||
234 | once = 0; | ||
235 | } | ||
236 | netpoll_poll(np); | ||
237 | goto repeat; | ||
238 | } | ||
239 | |||
240 | atomic_set(&skb->users, 1); | ||
241 | skb_reserve(skb, reserve); | ||
242 | return skb; | ||
243 | } | ||
244 | |||
245 | static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | ||
246 | { | ||
247 | int status; | ||
248 | |||
249 | repeat: | ||
250 | if(!np || !np->dev || !netif_running(np->dev)) { | ||
251 | __kfree_skb(skb); | ||
252 | return; | ||
253 | } | ||
254 | |||
255 | /* avoid recursion */ | ||
256 | if(np->poll_owner == smp_processor_id() || | ||
257 | np->dev->xmit_lock_owner == smp_processor_id()) { | ||
258 | if (np->drop) | ||
259 | np->drop(skb); | ||
260 | else | ||
261 | __kfree_skb(skb); | ||
262 | return; | ||
263 | } | ||
264 | |||
265 | spin_lock(&np->dev->xmit_lock); | ||
266 | np->dev->xmit_lock_owner = smp_processor_id(); | ||
267 | |||
268 | /* | ||
269 | * network drivers do not expect to be called if the queue is | ||
270 | * stopped. | ||
271 | */ | ||
272 | if (netif_queue_stopped(np->dev)) { | ||
273 | np->dev->xmit_lock_owner = -1; | ||
274 | spin_unlock(&np->dev->xmit_lock); | ||
275 | |||
276 | netpoll_poll(np); | ||
277 | goto repeat; | ||
278 | } | ||
279 | |||
280 | status = np->dev->hard_start_xmit(skb, np->dev); | ||
281 | np->dev->xmit_lock_owner = -1; | ||
282 | spin_unlock(&np->dev->xmit_lock); | ||
283 | |||
284 | /* transmit busy */ | ||
285 | if(status) { | ||
286 | netpoll_poll(np); | ||
287 | goto repeat; | ||
288 | } | ||
289 | } | ||
290 | |||
291 | void netpoll_send_udp(struct netpoll *np, const char *msg, int len) | ||
292 | { | ||
293 | int total_len, eth_len, ip_len, udp_len; | ||
294 | struct sk_buff *skb; | ||
295 | struct udphdr *udph; | ||
296 | struct iphdr *iph; | ||
297 | struct ethhdr *eth; | ||
298 | |||
299 | udp_len = len + sizeof(*udph); | ||
300 | ip_len = eth_len = udp_len + sizeof(*iph); | ||
301 | total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; | ||
302 | |||
303 | skb = find_skb(np, total_len, total_len - len); | ||
304 | if (!skb) | ||
305 | return; | ||
306 | |||
307 | memcpy(skb->data, msg, len); | ||
308 | skb->len += len; | ||
309 | |||
310 | udph = (struct udphdr *) skb_push(skb, sizeof(*udph)); | ||
311 | udph->source = htons(np->local_port); | ||
312 | udph->dest = htons(np->remote_port); | ||
313 | udph->len = htons(udp_len); | ||
314 | udph->check = 0; | ||
315 | |||
316 | iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); | ||
317 | |||
318 | /* iph->version = 4; iph->ihl = 5; */ | ||
319 | put_unaligned(0x45, (unsigned char *)iph); | ||
320 | iph->tos = 0; | ||
321 | put_unaligned(htons(ip_len), &(iph->tot_len)); | ||
322 | iph->id = 0; | ||
323 | iph->frag_off = 0; | ||
324 | iph->ttl = 64; | ||
325 | iph->protocol = IPPROTO_UDP; | ||
326 | iph->check = 0; | ||
327 | put_unaligned(htonl(np->local_ip), &(iph->saddr)); | ||
328 | put_unaligned(htonl(np->remote_ip), &(iph->daddr)); | ||
329 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | ||
330 | |||
331 | eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); | ||
332 | |||
333 | eth->h_proto = htons(ETH_P_IP); | ||
334 | memcpy(eth->h_source, np->local_mac, 6); | ||
335 | memcpy(eth->h_dest, np->remote_mac, 6); | ||
336 | |||
337 | skb->dev = np->dev; | ||
338 | |||
339 | netpoll_send_skb(np, skb); | ||
340 | } | ||
341 | |||
342 | static void arp_reply(struct sk_buff *skb) | ||
343 | { | ||
344 | struct arphdr *arp; | ||
345 | unsigned char *arp_ptr; | ||
346 | int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; | ||
347 | u32 sip, tip; | ||
348 | struct sk_buff *send_skb; | ||
349 | struct netpoll *np = skb->dev->np; | ||
350 | |||
351 | if (!np) return; | ||
352 | |||
353 | /* No arp on this interface */ | ||
354 | if (skb->dev->flags & IFF_NOARP) | ||
355 | return; | ||
356 | |||
357 | if (!pskb_may_pull(skb, (sizeof(struct arphdr) + | ||
358 | (2 * skb->dev->addr_len) + | ||
359 | (2 * sizeof(u32))))) | ||
360 | return; | ||
361 | |||
362 | skb->h.raw = skb->nh.raw = skb->data; | ||
363 | arp = skb->nh.arph; | ||
364 | |||
365 | if ((arp->ar_hrd != htons(ARPHRD_ETHER) && | ||
366 | arp->ar_hrd != htons(ARPHRD_IEEE802)) || | ||
367 | arp->ar_pro != htons(ETH_P_IP) || | ||
368 | arp->ar_op != htons(ARPOP_REQUEST)) | ||
369 | return; | ||
370 | |||
371 | arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len; | ||
372 | memcpy(&sip, arp_ptr, 4); | ||
373 | arp_ptr += 4 + skb->dev->addr_len; | ||
374 | memcpy(&tip, arp_ptr, 4); | ||
375 | |||
376 | /* Should we ignore arp? */ | ||
377 | if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip)) | ||
378 | return; | ||
379 | |||
380 | size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); | ||
381 | send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), | ||
382 | LL_RESERVED_SPACE(np->dev)); | ||
383 | |||
384 | if (!send_skb) | ||
385 | return; | ||
386 | |||
387 | send_skb->nh.raw = send_skb->data; | ||
388 | arp = (struct arphdr *) skb_put(send_skb, size); | ||
389 | send_skb->dev = skb->dev; | ||
390 | send_skb->protocol = htons(ETH_P_ARP); | ||
391 | |||
392 | /* Fill the device header for the ARP frame */ | ||
393 | |||
394 | if (np->dev->hard_header && | ||
395 | np->dev->hard_header(send_skb, skb->dev, ptype, | ||
396 | np->remote_mac, np->local_mac, | ||
397 | send_skb->len) < 0) { | ||
398 | kfree_skb(send_skb); | ||
399 | return; | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * Fill out the arp protocol part. | ||
404 | * | ||
405 | * we only support ethernet device type, | ||
406 | * which (according to RFC 1390) should always equal 1 (Ethernet). | ||
407 | */ | ||
408 | |||
409 | arp->ar_hrd = htons(np->dev->type); | ||
410 | arp->ar_pro = htons(ETH_P_IP); | ||
411 | arp->ar_hln = np->dev->addr_len; | ||
412 | arp->ar_pln = 4; | ||
413 | arp->ar_op = htons(type); | ||
414 | |||
415 | arp_ptr=(unsigned char *)(arp + 1); | ||
416 | memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); | ||
417 | arp_ptr += np->dev->addr_len; | ||
418 | memcpy(arp_ptr, &tip, 4); | ||
419 | arp_ptr += 4; | ||
420 | memcpy(arp_ptr, np->remote_mac, np->dev->addr_len); | ||
421 | arp_ptr += np->dev->addr_len; | ||
422 | memcpy(arp_ptr, &sip, 4); | ||
423 | |||
424 | netpoll_send_skb(np, send_skb); | ||
425 | } | ||
426 | |||
427 | int __netpoll_rx(struct sk_buff *skb) | ||
428 | { | ||
429 | int proto, len, ulen; | ||
430 | struct iphdr *iph; | ||
431 | struct udphdr *uh; | ||
432 | struct netpoll *np = skb->dev->np; | ||
433 | |||
434 | if (!np->rx_hook) | ||
435 | goto out; | ||
436 | if (skb->dev->type != ARPHRD_ETHER) | ||
437 | goto out; | ||
438 | |||
439 | /* check if netpoll clients need ARP */ | ||
440 | if (skb->protocol == __constant_htons(ETH_P_ARP) && | ||
441 | atomic_read(&trapped)) { | ||
442 | arp_reply(skb); | ||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | proto = ntohs(eth_hdr(skb)->h_proto); | ||
447 | if (proto != ETH_P_IP) | ||
448 | goto out; | ||
449 | if (skb->pkt_type == PACKET_OTHERHOST) | ||
450 | goto out; | ||
451 | if (skb_shared(skb)) | ||
452 | goto out; | ||
453 | |||
454 | iph = (struct iphdr *)skb->data; | ||
455 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | ||
456 | goto out; | ||
457 | if (iph->ihl < 5 || iph->version != 4) | ||
458 | goto out; | ||
459 | if (!pskb_may_pull(skb, iph->ihl*4)) | ||
460 | goto out; | ||
461 | if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) | ||
462 | goto out; | ||
463 | |||
464 | len = ntohs(iph->tot_len); | ||
465 | if (skb->len < len || len < iph->ihl*4) | ||
466 | goto out; | ||
467 | |||
468 | if (iph->protocol != IPPROTO_UDP) | ||
469 | goto out; | ||
470 | |||
471 | len -= iph->ihl*4; | ||
472 | uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); | ||
473 | ulen = ntohs(uh->len); | ||
474 | |||
475 | if (ulen != len) | ||
476 | goto out; | ||
477 | if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0) | ||
478 | goto out; | ||
479 | if (np->local_ip && np->local_ip != ntohl(iph->daddr)) | ||
480 | goto out; | ||
481 | if (np->remote_ip && np->remote_ip != ntohl(iph->saddr)) | ||
482 | goto out; | ||
483 | if (np->local_port && np->local_port != ntohs(uh->dest)) | ||
484 | goto out; | ||
485 | |||
486 | np->rx_hook(np, ntohs(uh->source), | ||
487 | (char *)(uh+1), | ||
488 | ulen - sizeof(struct udphdr)); | ||
489 | |||
490 | kfree_skb(skb); | ||
491 | return 1; | ||
492 | |||
493 | out: | ||
494 | if (atomic_read(&trapped)) { | ||
495 | kfree_skb(skb); | ||
496 | return 1; | ||
497 | } | ||
498 | |||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | int netpoll_parse_options(struct netpoll *np, char *opt) | ||
503 | { | ||
504 | char *cur=opt, *delim; | ||
505 | |||
506 | if(*cur != '@') { | ||
507 | if ((delim = strchr(cur, '@')) == NULL) | ||
508 | goto parse_failed; | ||
509 | *delim=0; | ||
510 | np->local_port=simple_strtol(cur, NULL, 10); | ||
511 | cur=delim; | ||
512 | } | ||
513 | cur++; | ||
514 | printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); | ||
515 | |||
516 | if(*cur != '/') { | ||
517 | if ((delim = strchr(cur, '/')) == NULL) | ||
518 | goto parse_failed; | ||
519 | *delim=0; | ||
520 | np->local_ip=ntohl(in_aton(cur)); | ||
521 | cur=delim; | ||
522 | |||
523 | printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", | ||
524 | np->name, HIPQUAD(np->local_ip)); | ||
525 | } | ||
526 | cur++; | ||
527 | |||
528 | if ( *cur != ',') { | ||
529 | /* parse out dev name */ | ||
530 | if ((delim = strchr(cur, ',')) == NULL) | ||
531 | goto parse_failed; | ||
532 | *delim=0; | ||
533 | strlcpy(np->dev_name, cur, sizeof(np->dev_name)); | ||
534 | cur=delim; | ||
535 | } | ||
536 | cur++; | ||
537 | |||
538 | printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); | ||
539 | |||
540 | if ( *cur != '@' ) { | ||
541 | /* dst port */ | ||
542 | if ((delim = strchr(cur, '@')) == NULL) | ||
543 | goto parse_failed; | ||
544 | *delim=0; | ||
545 | np->remote_port=simple_strtol(cur, NULL, 10); | ||
546 | cur=delim; | ||
547 | } | ||
548 | cur++; | ||
549 | printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); | ||
550 | |||
551 | /* dst ip */ | ||
552 | if ((delim = strchr(cur, '/')) == NULL) | ||
553 | goto parse_failed; | ||
554 | *delim=0; | ||
555 | np->remote_ip=ntohl(in_aton(cur)); | ||
556 | cur=delim+1; | ||
557 | |||
558 | printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", | ||
559 | np->name, HIPQUAD(np->remote_ip)); | ||
560 | |||
561 | if( *cur != 0 ) | ||
562 | { | ||
563 | /* MAC address */ | ||
564 | if ((delim = strchr(cur, ':')) == NULL) | ||
565 | goto parse_failed; | ||
566 | *delim=0; | ||
567 | np->remote_mac[0]=simple_strtol(cur, NULL, 16); | ||
568 | cur=delim+1; | ||
569 | if ((delim = strchr(cur, ':')) == NULL) | ||
570 | goto parse_failed; | ||
571 | *delim=0; | ||
572 | np->remote_mac[1]=simple_strtol(cur, NULL, 16); | ||
573 | cur=delim+1; | ||
574 | if ((delim = strchr(cur, ':')) == NULL) | ||
575 | goto parse_failed; | ||
576 | *delim=0; | ||
577 | np->remote_mac[2]=simple_strtol(cur, NULL, 16); | ||
578 | cur=delim+1; | ||
579 | if ((delim = strchr(cur, ':')) == NULL) | ||
580 | goto parse_failed; | ||
581 | *delim=0; | ||
582 | np->remote_mac[3]=simple_strtol(cur, NULL, 16); | ||
583 | cur=delim+1; | ||
584 | if ((delim = strchr(cur, ':')) == NULL) | ||
585 | goto parse_failed; | ||
586 | *delim=0; | ||
587 | np->remote_mac[4]=simple_strtol(cur, NULL, 16); | ||
588 | cur=delim+1; | ||
589 | np->remote_mac[5]=simple_strtol(cur, NULL, 16); | ||
590 | } | ||
591 | |||
592 | printk(KERN_INFO "%s: remote ethernet address " | ||
593 | "%02x:%02x:%02x:%02x:%02x:%02x\n", | ||
594 | np->name, | ||
595 | np->remote_mac[0], | ||
596 | np->remote_mac[1], | ||
597 | np->remote_mac[2], | ||
598 | np->remote_mac[3], | ||
599 | np->remote_mac[4], | ||
600 | np->remote_mac[5]); | ||
601 | |||
602 | return 0; | ||
603 | |||
604 | parse_failed: | ||
605 | printk(KERN_INFO "%s: couldn't parse config at %s!\n", | ||
606 | np->name, cur); | ||
607 | return -1; | ||
608 | } | ||
609 | |||
610 | int netpoll_setup(struct netpoll *np) | ||
611 | { | ||
612 | struct net_device *ndev = NULL; | ||
613 | struct in_device *in_dev; | ||
614 | |||
615 | np->poll_lock = SPIN_LOCK_UNLOCKED; | ||
616 | np->poll_owner = -1; | ||
617 | |||
618 | if (np->dev_name) | ||
619 | ndev = dev_get_by_name(np->dev_name); | ||
620 | if (!ndev) { | ||
621 | printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", | ||
622 | np->name, np->dev_name); | ||
623 | return -1; | ||
624 | } | ||
625 | |||
626 | np->dev = ndev; | ||
627 | ndev->np = np; | ||
628 | |||
629 | if (!ndev->poll_controller) { | ||
630 | printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", | ||
631 | np->name, np->dev_name); | ||
632 | goto release; | ||
633 | } | ||
634 | |||
635 | if (!netif_running(ndev)) { | ||
636 | unsigned long atmost, atleast; | ||
637 | |||
638 | printk(KERN_INFO "%s: device %s not up yet, forcing it\n", | ||
639 | np->name, np->dev_name); | ||
640 | |||
641 | rtnl_shlock(); | ||
642 | if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) { | ||
643 | printk(KERN_ERR "%s: failed to open %s\n", | ||
644 | np->name, np->dev_name); | ||
645 | rtnl_shunlock(); | ||
646 | goto release; | ||
647 | } | ||
648 | rtnl_shunlock(); | ||
649 | |||
650 | atleast = jiffies + HZ/10; | ||
651 | atmost = jiffies + 4*HZ; | ||
652 | while (!netif_carrier_ok(ndev)) { | ||
653 | if (time_after(jiffies, atmost)) { | ||
654 | printk(KERN_NOTICE | ||
655 | "%s: timeout waiting for carrier\n", | ||
656 | np->name); | ||
657 | break; | ||
658 | } | ||
659 | cond_resched(); | ||
660 | } | ||
661 | |||
662 | /* If carrier appears to come up instantly, we don't | ||
663 | * trust it and pause so that we don't pump all our | ||
664 | * queued console messages into the bitbucket. | ||
665 | */ | ||
666 | |||
667 | if (time_before(jiffies, atleast)) { | ||
668 | printk(KERN_NOTICE "%s: carrier detect appears" | ||
669 | " untrustworthy, waiting 4 seconds\n", | ||
670 | np->name); | ||
671 | msleep(4000); | ||
672 | } | ||
673 | } | ||
674 | |||
675 | if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr) | ||
676 | memcpy(np->local_mac, ndev->dev_addr, 6); | ||
677 | |||
678 | if (!np->local_ip) { | ||
679 | rcu_read_lock(); | ||
680 | in_dev = __in_dev_get(ndev); | ||
681 | |||
682 | if (!in_dev || !in_dev->ifa_list) { | ||
683 | rcu_read_unlock(); | ||
684 | printk(KERN_ERR "%s: no IP address for %s, aborting\n", | ||
685 | np->name, np->dev_name); | ||
686 | goto release; | ||
687 | } | ||
688 | |||
689 | np->local_ip = ntohl(in_dev->ifa_list->ifa_local); | ||
690 | rcu_read_unlock(); | ||
691 | printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", | ||
692 | np->name, HIPQUAD(np->local_ip)); | ||
693 | } | ||
694 | |||
695 | if(np->rx_hook) | ||
696 | np->rx_flags = NETPOLL_RX_ENABLED; | ||
697 | |||
698 | return 0; | ||
699 | |||
700 | release: | ||
701 | ndev->np = NULL; | ||
702 | np->dev = NULL; | ||
703 | dev_put(ndev); | ||
704 | return -1; | ||
705 | } | ||
706 | |||
707 | void netpoll_cleanup(struct netpoll *np) | ||
708 | { | ||
709 | if (np->dev) | ||
710 | np->dev->np = NULL; | ||
711 | dev_put(np->dev); | ||
712 | np->dev = NULL; | ||
713 | } | ||
714 | |||
715 | int netpoll_trap(void) | ||
716 | { | ||
717 | return atomic_read(&trapped); | ||
718 | } | ||
719 | |||
720 | void netpoll_set_trap(int trap) | ||
721 | { | ||
722 | if (trap) | ||
723 | atomic_inc(&trapped); | ||
724 | else | ||
725 | atomic_dec(&trapped); | ||
726 | } | ||
727 | |||
728 | EXPORT_SYMBOL(netpoll_set_trap); | ||
729 | EXPORT_SYMBOL(netpoll_trap); | ||
730 | EXPORT_SYMBOL(netpoll_parse_options); | ||
731 | EXPORT_SYMBOL(netpoll_setup); | ||
732 | EXPORT_SYMBOL(netpoll_cleanup); | ||
733 | EXPORT_SYMBOL(netpoll_send_udp); | ||
734 | EXPORT_SYMBOL(netpoll_poll); | ||
735 | EXPORT_SYMBOL(netpoll_queue); | ||
diff --git a/net/core/pktgen.c b/net/core/pktgen.c new file mode 100644 index 000000000000..c57b06bc79f3 --- /dev/null +++ b/net/core/pktgen.c | |||
@@ -0,0 +1,3132 @@ | |||
1 | /* | ||
2 | * Authors: | ||
3 | * Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se> | ||
4 | * Uppsala University and | ||
5 | * Swedish University of Agricultural Sciences | ||
6 | * | ||
7 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | ||
8 | * Ben Greear <greearb@candelatech.com> | ||
9 | * Jens Låås <jens.laas@data.slu.se> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License | ||
13 | * as published by the Free Software Foundation; either version | ||
14 | * 2 of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * | ||
17 | * A tool for loading the network with preconfigurated packets. | ||
18 | * The tool is implemented as a linux module. Parameters are output | ||
19 | * device, delay (to hard_xmit), number of packets, and whether | ||
20 | * to use multiple SKBs or just the same one. | ||
21 | * pktgen uses the installed interface's output routine. | ||
22 | * | ||
23 | * Additional hacking by: | ||
24 | * | ||
25 | * Jens.Laas@data.slu.se | ||
26 | * Improved by ANK. 010120. | ||
27 | * Improved by ANK even more. 010212. | ||
28 | * MAC address typo fixed. 010417 --ro | ||
29 | * Integrated. 020301 --DaveM | ||
30 | * Added multiskb option 020301 --DaveM | ||
31 | * Scaling of results. 020417--sigurdur@linpro.no | ||
32 | * Significant re-work of the module: | ||
33 | * * Convert to threaded model to more efficiently be able to transmit | ||
34 | * and receive on multiple interfaces at once. | ||
35 | * * Converted many counters to __u64 to allow longer runs. | ||
36 | * * Allow configuration of ranges, like min/max IP address, MACs, | ||
37 | * and UDP-ports, for both source and destination, and can | ||
38 | * set to use a random distribution or sequentially walk the range. | ||
39 | * * Can now change most values after starting. | ||
40 | * * Place 12-byte packet in UDP payload with magic number, | ||
41 | * sequence number, and timestamp. | ||
42 | * * Add receiver code that detects dropped pkts, re-ordered pkts, and | ||
43 | * latencies (with micro-second) precision. | ||
44 | * * Add IOCTL interface to easily get counters & configuration. | ||
45 | * --Ben Greear <greearb@candelatech.com> | ||
46 | * | ||
47 | * Renamed multiskb to clone_skb and cleaned up sending core for two distinct | ||
48 | * skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0 | ||
49 | * as a "fastpath" with a configurable number of clones after alloc's. | ||
50 | * clone_skb=0 means all packets are allocated this also means ranges time | ||
51 | * stamps etc can be used. clone_skb=100 means 1 malloc is followed by 100 | ||
52 | * clones. | ||
53 | * | ||
54 | * Also moved to /proc/net/pktgen/ | ||
55 | * --ro | ||
56 | * | ||
57 | * Sept 10: Fixed threading/locking. Lots of bone-headed and more clever | ||
58 | * mistakes. Also merged in DaveM's patch in the -pre6 patch. | ||
59 | * --Ben Greear <greearb@candelatech.com> | ||
60 | * | ||
61 | * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br) | ||
62 | * | ||
63 | * | ||
64 | * 021124 Finished major redesign and rewrite for new functionality. | ||
65 | * See Documentation/networking/pktgen.txt for how to use this. | ||
66 | * | ||
67 | * The new operation: | ||
68 | * For each CPU one thread/process is created at start. This process checks | ||
69 | * for running devices in the if_list and sends packets until count is 0 it | ||
70 | * also the thread checks the thread->control which is used for inter-process | ||
71 | * communication. controlling process "posts" operations to the threads this | ||
72 | * way. The if_lock should be possible to remove when add/rem_device is merged | ||
73 | * into this too. | ||
74 | * | ||
75 | * By design there should only be *one* "controlling" process. In practice | ||
76 | * multiple write accesses gives unpredictable result. Understood by "write" | ||
77 | * to /proc gives result code thats should be read be the "writer". | ||
78 | * For pratical use this should be no problem. | ||
79 | * | ||
80 | * Note when adding devices to a specific CPU there good idea to also assign | ||
81 | * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. | ||
82 | * --ro | ||
83 | * | ||
84 | * Fix refcount off by one if first packet fails, potential null deref, | ||
85 | * memleak 030710- KJP | ||
86 | * | ||
87 | * First "ranges" functionality for ipv6 030726 --ro | ||
88 | * | ||
89 | * Included flow support. 030802 ANK. | ||
90 | * | ||
91 | * Fixed unaligned access on IA-64 Grant Grundler <grundler@parisc-linux.org> | ||
92 | * | ||
93 | * Remove if fix from added Harald Welte <laforge@netfilter.org> 040419 | ||
94 | * ia64 compilation fix from Aron Griffis <aron@hp.com> 040604 | ||
95 | * | ||
96 | * New xmit() return, do_div and misc clean up by Stephen Hemminger | ||
97 | * <shemminger@osdl.org> 040923 | ||
98 | * | ||
99 | * Rany Dunlap fixed u64 printk compiler waring | ||
100 | * | ||
101 | * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org> | ||
102 | * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213 | ||
103 | * | ||
104 | * Corrections from Nikolai Malykh (nmalykh@bilim.com) | ||
105 | * Removed unused flags F_SET_SRCMAC & F_SET_SRCIP 041230 | ||
106 | * | ||
107 | * interruptible_sleep_on_timeout() replaced Nishanth Aravamudan <nacc@us.ibm.com> | ||
108 | * 050103 | ||
109 | */ | ||
110 | #include <linux/sys.h> | ||
111 | #include <linux/types.h> | ||
112 | #include <linux/module.h> | ||
113 | #include <linux/moduleparam.h> | ||
114 | #include <linux/kernel.h> | ||
115 | #include <linux/smp_lock.h> | ||
116 | #include <linux/sched.h> | ||
117 | #include <linux/slab.h> | ||
118 | #include <linux/vmalloc.h> | ||
119 | #include <linux/sched.h> | ||
120 | #include <linux/unistd.h> | ||
121 | #include <linux/string.h> | ||
122 | #include <linux/ptrace.h> | ||
123 | #include <linux/errno.h> | ||
124 | #include <linux/ioport.h> | ||
125 | #include <linux/interrupt.h> | ||
126 | #include <linux/delay.h> | ||
127 | #include <linux/timer.h> | ||
128 | #include <linux/init.h> | ||
129 | #include <linux/skbuff.h> | ||
130 | #include <linux/netdevice.h> | ||
131 | #include <linux/inet.h> | ||
132 | #include <linux/inetdevice.h> | ||
133 | #include <linux/rtnetlink.h> | ||
134 | #include <linux/if_arp.h> | ||
135 | #include <linux/in.h> | ||
136 | #include <linux/ip.h> | ||
137 | #include <linux/ipv6.h> | ||
138 | #include <linux/udp.h> | ||
139 | #include <linux/proc_fs.h> | ||
140 | #include <linux/wait.h> | ||
141 | #include <net/checksum.h> | ||
142 | #include <net/ipv6.h> | ||
143 | #include <net/addrconf.h> | ||
144 | #include <asm/byteorder.h> | ||
145 | #include <linux/rcupdate.h> | ||
146 | #include <asm/bitops.h> | ||
147 | #include <asm/io.h> | ||
148 | #include <asm/dma.h> | ||
149 | #include <asm/uaccess.h> | ||
150 | #include <asm/div64.h> /* do_div */ | ||
151 | #include <asm/timex.h> | ||
152 | |||
153 | |||
154 | #define VERSION "pktgen v2.61: Packet Generator for packet performance testing.\n" | ||
155 | |||
156 | /* #define PG_DEBUG(a) a */ | ||
157 | #define PG_DEBUG(a) | ||
158 | |||
159 | /* The buckets are exponential in 'width' */ | ||
160 | #define LAT_BUCKETS_MAX 32 | ||
161 | #define IP_NAME_SZ 32 | ||
162 | |||
163 | /* Device flag bits */ | ||
164 | #define F_IPSRC_RND (1<<0) /* IP-Src Random */ | ||
165 | #define F_IPDST_RND (1<<1) /* IP-Dst Random */ | ||
166 | #define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ | ||
167 | #define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ | ||
168 | #define F_MACSRC_RND (1<<4) /* MAC-Src Random */ | ||
169 | #define F_MACDST_RND (1<<5) /* MAC-Dst Random */ | ||
170 | #define F_TXSIZE_RND (1<<6) /* Transmit size is random */ | ||
171 | #define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ | ||
172 | |||
173 | /* Thread control flag bits */ | ||
174 | #define T_TERMINATE (1<<0) | ||
175 | #define T_STOP (1<<1) /* Stop run */ | ||
176 | #define T_RUN (1<<2) /* Start run */ | ||
177 | #define T_REMDEV (1<<3) /* Remove all devs */ | ||
178 | |||
179 | /* Locks */ | ||
180 | #define thread_lock() spin_lock(&_thread_lock) | ||
181 | #define thread_unlock() spin_unlock(&_thread_lock) | ||
182 | |||
183 | /* If lock -- can be removed after some work */ | ||
184 | #define if_lock(t) spin_lock(&(t->if_lock)); | ||
185 | #define if_unlock(t) spin_unlock(&(t->if_lock)); | ||
186 | |||
187 | /* Used to help with determining the pkts on receive */ | ||
188 | #define PKTGEN_MAGIC 0xbe9be955 | ||
189 | #define PG_PROC_DIR "pktgen" | ||
190 | |||
191 | #define MAX_CFLOWS 65536 | ||
192 | |||
193 | struct flow_state | ||
194 | { | ||
195 | __u32 cur_daddr; | ||
196 | int count; | ||
197 | }; | ||
198 | |||
199 | struct pktgen_dev { | ||
200 | |||
201 | /* | ||
202 | * Try to keep frequent/infrequent used vars. separated. | ||
203 | */ | ||
204 | |||
205 | char ifname[32]; | ||
206 | struct proc_dir_entry *proc_ent; | ||
207 | char result[512]; | ||
208 | /* proc file names */ | ||
209 | char fname[80]; | ||
210 | |||
211 | struct pktgen_thread* pg_thread; /* the owner */ | ||
212 | struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */ | ||
213 | |||
214 | int running; /* if this changes to false, the test will stop */ | ||
215 | |||
216 | /* If min != max, then we will either do a linear iteration, or | ||
217 | * we will do a random selection from within the range. | ||
218 | */ | ||
219 | __u32 flags; | ||
220 | |||
221 | int min_pkt_size; /* = ETH_ZLEN; */ | ||
222 | int max_pkt_size; /* = ETH_ZLEN; */ | ||
223 | int nfrags; | ||
224 | __u32 delay_us; /* Default delay */ | ||
225 | __u32 delay_ns; | ||
226 | __u64 count; /* Default No packets to send */ | ||
227 | __u64 sofar; /* How many pkts we've sent so far */ | ||
228 | __u64 tx_bytes; /* How many bytes we've transmitted */ | ||
229 | __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ | ||
230 | |||
231 | /* runtime counters relating to clone_skb */ | ||
232 | __u64 next_tx_us; /* timestamp of when to tx next */ | ||
233 | __u32 next_tx_ns; | ||
234 | |||
235 | __u64 allocated_skbs; | ||
236 | __u32 clone_count; | ||
237 | int last_ok; /* Was last skb sent? | ||
238 | * Or a failed transmit of some sort? This will keep | ||
239 | * sequence numbers in order, for example. | ||
240 | */ | ||
241 | __u64 started_at; /* micro-seconds */ | ||
242 | __u64 stopped_at; /* micro-seconds */ | ||
243 | __u64 idle_acc; /* micro-seconds */ | ||
244 | __u32 seq_num; | ||
245 | |||
246 | int clone_skb; /* Use multiple SKBs during packet gen. If this number | ||
247 | * is greater than 1, then that many coppies of the same | ||
248 | * packet will be sent before a new packet is allocated. | ||
249 | * For instance, if you want to send 1024 identical packets | ||
250 | * before creating a new packet, set clone_skb to 1024. | ||
251 | */ | ||
252 | |||
253 | char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ | ||
254 | char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ | ||
255 | char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ | ||
256 | char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ | ||
257 | |||
258 | struct in6_addr in6_saddr; | ||
259 | struct in6_addr in6_daddr; | ||
260 | struct in6_addr cur_in6_daddr; | ||
261 | struct in6_addr cur_in6_saddr; | ||
262 | /* For ranges */ | ||
263 | struct in6_addr min_in6_daddr; | ||
264 | struct in6_addr max_in6_daddr; | ||
265 | struct in6_addr min_in6_saddr; | ||
266 | struct in6_addr max_in6_saddr; | ||
267 | |||
268 | /* If we're doing ranges, random or incremental, then this | ||
269 | * defines the min/max for those ranges. | ||
270 | */ | ||
271 | __u32 saddr_min; /* inclusive, source IP address */ | ||
272 | __u32 saddr_max; /* exclusive, source IP address */ | ||
273 | __u32 daddr_min; /* inclusive, dest IP address */ | ||
274 | __u32 daddr_max; /* exclusive, dest IP address */ | ||
275 | |||
276 | __u16 udp_src_min; /* inclusive, source UDP port */ | ||
277 | __u16 udp_src_max; /* exclusive, source UDP port */ | ||
278 | __u16 udp_dst_min; /* inclusive, dest UDP port */ | ||
279 | __u16 udp_dst_max; /* exclusive, dest UDP port */ | ||
280 | |||
281 | __u32 src_mac_count; /* How many MACs to iterate through */ | ||
282 | __u32 dst_mac_count; /* How many MACs to iterate through */ | ||
283 | |||
284 | unsigned char dst_mac[6]; | ||
285 | unsigned char src_mac[6]; | ||
286 | |||
287 | __u32 cur_dst_mac_offset; | ||
288 | __u32 cur_src_mac_offset; | ||
289 | __u32 cur_saddr; | ||
290 | __u32 cur_daddr; | ||
291 | __u16 cur_udp_dst; | ||
292 | __u16 cur_udp_src; | ||
293 | __u32 cur_pkt_size; | ||
294 | |||
295 | __u8 hh[14]; | ||
296 | /* = { | ||
297 | 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, | ||
298 | |||
299 | We fill in SRC address later | ||
300 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
301 | 0x08, 0x00 | ||
302 | }; | ||
303 | */ | ||
304 | __u16 pad; /* pad out the hh struct to an even 16 bytes */ | ||
305 | |||
306 | struct sk_buff* skb; /* skb we are to transmit next, mainly used for when we | ||
307 | * are transmitting the same one multiple times | ||
308 | */ | ||
309 | struct net_device* odev; /* The out-going device. Note that the device should | ||
310 | * have it's pg_info pointer pointing back to this | ||
311 | * device. This will be set when the user specifies | ||
312 | * the out-going device name (not when the inject is | ||
313 | * started as it used to do.) | ||
314 | */ | ||
315 | struct flow_state *flows; | ||
316 | unsigned cflows; /* Concurrent flows (config) */ | ||
317 | unsigned lflow; /* Flow length (config) */ | ||
318 | unsigned nflows; /* accumulated flows (stats) */ | ||
319 | }; | ||
320 | |||
321 | struct pktgen_hdr { | ||
322 | __u32 pgh_magic; | ||
323 | __u32 seq_num; | ||
324 | __u32 tv_sec; | ||
325 | __u32 tv_usec; | ||
326 | }; | ||
327 | |||
328 | struct pktgen_thread { | ||
329 | spinlock_t if_lock; | ||
330 | struct pktgen_dev *if_list; /* All device here */ | ||
331 | struct pktgen_thread* next; | ||
332 | char name[32]; | ||
333 | char fname[128]; /* name of proc file */ | ||
334 | struct proc_dir_entry *proc_ent; | ||
335 | char result[512]; | ||
336 | u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ | ||
337 | |||
338 | /* Field for thread to receive "posted" events terminate, stop ifs etc.*/ | ||
339 | |||
340 | u32 control; | ||
341 | int pid; | ||
342 | int cpu; | ||
343 | |||
344 | wait_queue_head_t queue; | ||
345 | }; | ||
346 | |||
347 | #define REMOVE 1 | ||
348 | #define FIND 0 | ||
349 | |||
350 | /* This code works around the fact that do_div cannot handle two 64-bit | ||
351 | numbers, and regular 64-bit division doesn't work on x86 kernels. | ||
352 | --Ben | ||
353 | */ | ||
354 | |||
355 | #define PG_DIV 0 | ||
356 | |||
357 | /* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net> | ||
358 | * Function copied/adapted/optimized from: | ||
359 | * | ||
360 | * nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html | ||
361 | * | ||
362 | * Copyright 1994, University of Cambridge Computer Laboratory | ||
363 | * All Rights Reserved. | ||
364 | * | ||
365 | */ | ||
366 | inline static s64 divremdi3(s64 x, s64 y, int type) | ||
367 | { | ||
368 | u64 a = (x < 0) ? -x : x; | ||
369 | u64 b = (y < 0) ? -y : y; | ||
370 | u64 res = 0, d = 1; | ||
371 | |||
372 | if (b > 0) { | ||
373 | while (b < a) { | ||
374 | b <<= 1; | ||
375 | d <<= 1; | ||
376 | } | ||
377 | } | ||
378 | |||
379 | do { | ||
380 | if ( a >= b ) { | ||
381 | a -= b; | ||
382 | res += d; | ||
383 | } | ||
384 | b >>= 1; | ||
385 | d >>= 1; | ||
386 | } | ||
387 | while (d); | ||
388 | |||
389 | if (PG_DIV == type) { | ||
390 | return (((x ^ y) & (1ll<<63)) == 0) ? res : -(s64)res; | ||
391 | } | ||
392 | else { | ||
393 | return ((x & (1ll<<63)) == 0) ? a : -(s64)a; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | /* End of hacks to deal with 64-bit math on x86 */ | ||
398 | |||
399 | /** Convert to miliseconds */ | ||
400 | static inline __u64 tv_to_ms(const struct timeval* tv) | ||
401 | { | ||
402 | __u64 ms = tv->tv_usec / 1000; | ||
403 | ms += (__u64)tv->tv_sec * (__u64)1000; | ||
404 | return ms; | ||
405 | } | ||
406 | |||
407 | |||
408 | /** Convert to micro-seconds */ | ||
409 | static inline __u64 tv_to_us(const struct timeval* tv) | ||
410 | { | ||
411 | __u64 us = tv->tv_usec; | ||
412 | us += (__u64)tv->tv_sec * (__u64)1000000; | ||
413 | return us; | ||
414 | } | ||
415 | |||
416 | static inline __u64 pg_div(__u64 n, __u32 base) { | ||
417 | __u64 tmp = n; | ||
418 | do_div(tmp, base); | ||
419 | /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n", | ||
420 | n, base, tmp); */ | ||
421 | return tmp; | ||
422 | } | ||
423 | |||
424 | static inline __u64 pg_div64(__u64 n, __u64 base) | ||
425 | { | ||
426 | __u64 tmp = n; | ||
427 | /* | ||
428 | * How do we know if the architectrure we are running on | ||
429 | * supports division with 64 bit base? | ||
430 | * | ||
431 | */ | ||
432 | #if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__) | ||
433 | |||
434 | do_div(tmp, base); | ||
435 | #else | ||
436 | tmp = divremdi3(n, base, PG_DIV); | ||
437 | #endif | ||
438 | return tmp; | ||
439 | } | ||
440 | |||
441 | static inline u32 pktgen_random(void) | ||
442 | { | ||
443 | #if 0 | ||
444 | __u32 n; | ||
445 | get_random_bytes(&n, 4); | ||
446 | return n; | ||
447 | #else | ||
448 | return net_random(); | ||
449 | #endif | ||
450 | } | ||
451 | |||
452 | static inline __u64 getCurMs(void) | ||
453 | { | ||
454 | struct timeval tv; | ||
455 | do_gettimeofday(&tv); | ||
456 | return tv_to_ms(&tv); | ||
457 | } | ||
458 | |||
459 | static inline __u64 getCurUs(void) | ||
460 | { | ||
461 | struct timeval tv; | ||
462 | do_gettimeofday(&tv); | ||
463 | return tv_to_us(&tv); | ||
464 | } | ||
465 | |||
466 | static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) | ||
467 | { | ||
468 | return tv_to_us(a) - tv_to_us(b); | ||
469 | } | ||
470 | |||
471 | |||
472 | /* old include end */ | ||
473 | |||
474 | static char version[] __initdata = VERSION; | ||
475 | |||
476 | static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos); | ||
477 | static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos); | ||
478 | static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); | ||
479 | |||
480 | static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); | ||
481 | static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); | ||
482 | static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); | ||
483 | static int create_proc_dir(void); | ||
484 | static int remove_proc_dir(void); | ||
485 | |||
486 | static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); | ||
487 | static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); | ||
488 | static struct pktgen_thread* pktgen_find_thread(const char* name); | ||
489 | static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname); | ||
490 | static int pktgen_device_event(struct notifier_block *, unsigned long, void *); | ||
491 | static void pktgen_run_all_threads(void); | ||
492 | static void pktgen_stop_all_threads_ifs(void); | ||
493 | static int pktgen_stop_device(struct pktgen_dev *pkt_dev); | ||
494 | static void pktgen_stop(struct pktgen_thread* t); | ||
495 | static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); | ||
496 | static struct pktgen_dev *pktgen_NN_threads(const char* dev_name, int remove); | ||
497 | static unsigned int scan_ip6(const char *s,char ip[16]); | ||
498 | static unsigned int fmt_ip6(char *s,const char ip[16]); | ||
499 | |||
500 | /* Module parameters, defaults. */ | ||
501 | static int pg_count_d = 1000; /* 1000 pkts by default */ | ||
502 | static int pg_delay_d = 0; | ||
503 | static int pg_clone_skb_d = 0; | ||
504 | static int debug = 0; | ||
505 | |||
506 | static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED; | ||
507 | static struct pktgen_thread *pktgen_threads = NULL; | ||
508 | |||
509 | static char module_fname[128]; | ||
510 | static struct proc_dir_entry *module_proc_ent = NULL; | ||
511 | |||
512 | static struct notifier_block pktgen_notifier_block = { | ||
513 | .notifier_call = pktgen_device_event, | ||
514 | }; | ||
515 | |||
516 | static struct file_operations pktgen_fops = { | ||
517 | .read = proc_pgctrl_read, | ||
518 | .write = proc_pgctrl_write, | ||
519 | /* .ioctl = pktgen_ioctl, later maybe */ | ||
520 | }; | ||
521 | |||
522 | /* | ||
523 | * /proc handling functions | ||
524 | * | ||
525 | */ | ||
526 | |||
527 | static struct proc_dir_entry *pg_proc_dir = NULL; | ||
528 | static int proc_pgctrl_read_eof=0; | ||
529 | |||
530 | static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, | ||
531 | size_t count, loff_t *ppos) | ||
532 | { | ||
533 | char data[200]; | ||
534 | int len = 0; | ||
535 | |||
536 | if(proc_pgctrl_read_eof) { | ||
537 | proc_pgctrl_read_eof=0; | ||
538 | len = 0; | ||
539 | goto out; | ||
540 | } | ||
541 | |||
542 | sprintf(data, "%s", VERSION); | ||
543 | |||
544 | len = strlen(data); | ||
545 | |||
546 | if(len > count) { | ||
547 | len =-EFAULT; | ||
548 | goto out; | ||
549 | } | ||
550 | |||
551 | if (copy_to_user(buf, data, len)) { | ||
552 | len =-EFAULT; | ||
553 | goto out; | ||
554 | } | ||
555 | |||
556 | *ppos += len; | ||
557 | proc_pgctrl_read_eof=1; /* EOF next call */ | ||
558 | |||
559 | out: | ||
560 | return len; | ||
561 | } | ||
562 | |||
563 | static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, | ||
564 | size_t count, loff_t *ppos) | ||
565 | { | ||
566 | char *data = NULL; | ||
567 | int err = 0; | ||
568 | |||
569 | if (!capable(CAP_NET_ADMIN)){ | ||
570 | err = -EPERM; | ||
571 | goto out; | ||
572 | } | ||
573 | |||
574 | data = (void*)vmalloc ((unsigned int)count); | ||
575 | |||
576 | if(!data) { | ||
577 | err = -ENOMEM; | ||
578 | goto out; | ||
579 | } | ||
580 | if (copy_from_user(data, buf, count)) { | ||
581 | err =-EFAULT; | ||
582 | goto out_free; | ||
583 | } | ||
584 | data[count-1] = 0; /* Make string */ | ||
585 | |||
586 | if (!strcmp(data, "stop")) | ||
587 | pktgen_stop_all_threads_ifs(); | ||
588 | |||
589 | else if (!strcmp(data, "start")) | ||
590 | pktgen_run_all_threads(); | ||
591 | |||
592 | else | ||
593 | printk("pktgen: Unknown command: %s\n", data); | ||
594 | |||
595 | err = count; | ||
596 | |||
597 | out_free: | ||
598 | vfree (data); | ||
599 | out: | ||
600 | return err; | ||
601 | } | ||
602 | |||
603 | static int proc_if_read(char *buf , char **start, off_t offset, | ||
604 | int len, int *eof, void *data) | ||
605 | { | ||
606 | char *p; | ||
607 | int i; | ||
608 | struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); | ||
609 | __u64 sa; | ||
610 | __u64 stopped; | ||
611 | __u64 now = getCurUs(); | ||
612 | |||
613 | p = buf; | ||
614 | p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", | ||
615 | (unsigned long long) pkt_dev->count, | ||
616 | pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); | ||
617 | |||
618 | p += sprintf(p, " frags: %d delay: %u clone_skb: %d ifname: %s\n", | ||
619 | pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); | ||
620 | |||
621 | p += sprintf(p, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); | ||
622 | |||
623 | |||
624 | if(pkt_dev->flags & F_IPV6) { | ||
625 | char b1[128], b2[128], b3[128]; | ||
626 | fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); | ||
627 | fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); | ||
628 | fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); | ||
629 | p += sprintf(p, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); | ||
630 | |||
631 | fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); | ||
632 | fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); | ||
633 | fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); | ||
634 | p += sprintf(p, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); | ||
635 | |||
636 | } | ||
637 | else | ||
638 | p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", | ||
639 | pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); | ||
640 | |||
641 | p += sprintf(p, " src_mac: "); | ||
642 | |||
643 | if ((pkt_dev->src_mac[0] == 0) && | ||
644 | (pkt_dev->src_mac[1] == 0) && | ||
645 | (pkt_dev->src_mac[2] == 0) && | ||
646 | (pkt_dev->src_mac[3] == 0) && | ||
647 | (pkt_dev->src_mac[4] == 0) && | ||
648 | (pkt_dev->src_mac[5] == 0)) | ||
649 | |||
650 | for (i = 0; i < 6; i++) | ||
651 | p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); | ||
652 | |||
653 | else | ||
654 | for (i = 0; i < 6; i++) | ||
655 | p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); | ||
656 | |||
657 | p += sprintf(p, "dst_mac: "); | ||
658 | for (i = 0; i < 6; i++) | ||
659 | p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); | ||
660 | |||
661 | p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", | ||
662 | pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, | ||
663 | pkt_dev->udp_dst_max); | ||
664 | |||
665 | p += sprintf(p, " src_mac_count: %d dst_mac_count: %d \n Flags: ", | ||
666 | pkt_dev->src_mac_count, pkt_dev->dst_mac_count); | ||
667 | |||
668 | |||
669 | if (pkt_dev->flags & F_IPV6) | ||
670 | p += sprintf(p, "IPV6 "); | ||
671 | |||
672 | if (pkt_dev->flags & F_IPSRC_RND) | ||
673 | p += sprintf(p, "IPSRC_RND "); | ||
674 | |||
675 | if (pkt_dev->flags & F_IPDST_RND) | ||
676 | p += sprintf(p, "IPDST_RND "); | ||
677 | |||
678 | if (pkt_dev->flags & F_TXSIZE_RND) | ||
679 | p += sprintf(p, "TXSIZE_RND "); | ||
680 | |||
681 | if (pkt_dev->flags & F_UDPSRC_RND) | ||
682 | p += sprintf(p, "UDPSRC_RND "); | ||
683 | |||
684 | if (pkt_dev->flags & F_UDPDST_RND) | ||
685 | p += sprintf(p, "UDPDST_RND "); | ||
686 | |||
687 | if (pkt_dev->flags & F_MACSRC_RND) | ||
688 | p += sprintf(p, "MACSRC_RND "); | ||
689 | |||
690 | if (pkt_dev->flags & F_MACDST_RND) | ||
691 | p += sprintf(p, "MACDST_RND "); | ||
692 | |||
693 | |||
694 | p += sprintf(p, "\n"); | ||
695 | |||
696 | sa = pkt_dev->started_at; | ||
697 | stopped = pkt_dev->stopped_at; | ||
698 | if (pkt_dev->running) | ||
699 | stopped = now; /* not really stopped, more like last-running-at */ | ||
700 | |||
701 | p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", | ||
702 | (unsigned long long) pkt_dev->sofar, | ||
703 | (unsigned long long) pkt_dev->errors, | ||
704 | (unsigned long long) sa, | ||
705 | (unsigned long long) stopped, | ||
706 | (unsigned long long) pkt_dev->idle_acc); | ||
707 | |||
708 | p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", | ||
709 | pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset); | ||
710 | |||
711 | if(pkt_dev->flags & F_IPV6) { | ||
712 | char b1[128], b2[128]; | ||
713 | fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); | ||
714 | fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); | ||
715 | p += sprintf(p, " cur_saddr: %s cur_daddr: %s\n", b2, b1); | ||
716 | } | ||
717 | else | ||
718 | p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x\n", | ||
719 | pkt_dev->cur_saddr, pkt_dev->cur_daddr); | ||
720 | |||
721 | |||
722 | p += sprintf(p, " cur_udp_dst: %d cur_udp_src: %d\n", | ||
723 | pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); | ||
724 | |||
725 | p += sprintf(p, " flows: %u\n", pkt_dev->nflows); | ||
726 | |||
727 | if (pkt_dev->result[0]) | ||
728 | p += sprintf(p, "Result: %s\n", pkt_dev->result); | ||
729 | else | ||
730 | p += sprintf(p, "Result: Idle\n"); | ||
731 | *eof = 1; | ||
732 | |||
733 | return p - buf; | ||
734 | } | ||
735 | |||
736 | |||
737 | static int count_trail_chars(const char __user *user_buffer, unsigned int maxlen) | ||
738 | { | ||
739 | int i; | ||
740 | |||
741 | for (i = 0; i < maxlen; i++) { | ||
742 | char c; | ||
743 | if (get_user(c, &user_buffer[i])) | ||
744 | return -EFAULT; | ||
745 | switch (c) { | ||
746 | case '\"': | ||
747 | case '\n': | ||
748 | case '\r': | ||
749 | case '\t': | ||
750 | case ' ': | ||
751 | case '=': | ||
752 | break; | ||
753 | default: | ||
754 | goto done; | ||
755 | }; | ||
756 | } | ||
757 | done: | ||
758 | return i; | ||
759 | } | ||
760 | |||
761 | static unsigned long num_arg(const char __user *user_buffer, unsigned long maxlen, | ||
762 | unsigned long *num) | ||
763 | { | ||
764 | int i = 0; | ||
765 | *num = 0; | ||
766 | |||
767 | for(; i < maxlen; i++) { | ||
768 | char c; | ||
769 | if (get_user(c, &user_buffer[i])) | ||
770 | return -EFAULT; | ||
771 | if ((c >= '0') && (c <= '9')) { | ||
772 | *num *= 10; | ||
773 | *num += c -'0'; | ||
774 | } else | ||
775 | break; | ||
776 | } | ||
777 | return i; | ||
778 | } | ||
779 | |||
780 | static int strn_len(const char __user *user_buffer, unsigned int maxlen) | ||
781 | { | ||
782 | int i = 0; | ||
783 | |||
784 | for(; i < maxlen; i++) { | ||
785 | char c; | ||
786 | if (get_user(c, &user_buffer[i])) | ||
787 | return -EFAULT; | ||
788 | switch (c) { | ||
789 | case '\"': | ||
790 | case '\n': | ||
791 | case '\r': | ||
792 | case '\t': | ||
793 | case ' ': | ||
794 | goto done_str; | ||
795 | break; | ||
796 | default: | ||
797 | break; | ||
798 | }; | ||
799 | } | ||
800 | done_str: | ||
801 | |||
802 | return i; | ||
803 | } | ||
804 | |||
805 | static int proc_if_write(struct file *file, const char __user *user_buffer, | ||
806 | unsigned long count, void *data) | ||
807 | { | ||
808 | int i = 0, max, len; | ||
809 | char name[16], valstr[32]; | ||
810 | unsigned long value = 0; | ||
811 | struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); | ||
812 | char* pg_result = NULL; | ||
813 | int tmp = 0; | ||
814 | char buf[128]; | ||
815 | |||
816 | pg_result = &(pkt_dev->result[0]); | ||
817 | |||
818 | if (count < 1) { | ||
819 | printk("pktgen: wrong command format\n"); | ||
820 | return -EINVAL; | ||
821 | } | ||
822 | |||
823 | max = count - i; | ||
824 | tmp = count_trail_chars(&user_buffer[i], max); | ||
825 | if (tmp < 0) { | ||
826 | printk("pktgen: illegal format\n"); | ||
827 | return tmp; | ||
828 | } | ||
829 | i += tmp; | ||
830 | |||
831 | /* Read variable name */ | ||
832 | |||
833 | len = strn_len(&user_buffer[i], sizeof(name) - 1); | ||
834 | if (len < 0) { return len; } | ||
835 | memset(name, 0, sizeof(name)); | ||
836 | if (copy_from_user(name, &user_buffer[i], len) ) | ||
837 | return -EFAULT; | ||
838 | i += len; | ||
839 | |||
840 | max = count -i; | ||
841 | len = count_trail_chars(&user_buffer[i], max); | ||
842 | if (len < 0) | ||
843 | return len; | ||
844 | |||
845 | i += len; | ||
846 | |||
847 | if (debug) { | ||
848 | char tb[count + 1]; | ||
849 | if (copy_from_user(tb, user_buffer, count)) | ||
850 | return -EFAULT; | ||
851 | tb[count] = 0; | ||
852 | printk("pktgen: %s,%lu buffer -:%s:-\n", name, count, tb); | ||
853 | } | ||
854 | |||
855 | if (!strcmp(name, "min_pkt_size")) { | ||
856 | len = num_arg(&user_buffer[i], 10, &value); | ||
857 | if (len < 0) { return len; } | ||
858 | i += len; | ||
859 | if (value < 14+20+8) | ||
860 | value = 14+20+8; | ||
861 | if (value != pkt_dev->min_pkt_size) { | ||
862 | pkt_dev->min_pkt_size = value; | ||
863 | pkt_dev->cur_pkt_size = value; | ||
864 | } | ||
865 | sprintf(pg_result, "OK: min_pkt_size=%u", pkt_dev->min_pkt_size); | ||
866 | return count; | ||
867 | } | ||
868 | |||
869 | if (!strcmp(name, "max_pkt_size")) { | ||
870 | len = num_arg(&user_buffer[i], 10, &value); | ||
871 | if (len < 0) { return len; } | ||
872 | i += len; | ||
873 | if (value < 14+20+8) | ||
874 | value = 14+20+8; | ||
875 | if (value != pkt_dev->max_pkt_size) { | ||
876 | pkt_dev->max_pkt_size = value; | ||
877 | pkt_dev->cur_pkt_size = value; | ||
878 | } | ||
879 | sprintf(pg_result, "OK: max_pkt_size=%u", pkt_dev->max_pkt_size); | ||
880 | return count; | ||
881 | } | ||
882 | |||
883 | /* Shortcut for min = max */ | ||
884 | |||
885 | if (!strcmp(name, "pkt_size")) { | ||
886 | len = num_arg(&user_buffer[i], 10, &value); | ||
887 | if (len < 0) { return len; } | ||
888 | i += len; | ||
889 | if (value < 14+20+8) | ||
890 | value = 14+20+8; | ||
891 | if (value != pkt_dev->min_pkt_size) { | ||
892 | pkt_dev->min_pkt_size = value; | ||
893 | pkt_dev->max_pkt_size = value; | ||
894 | pkt_dev->cur_pkt_size = value; | ||
895 | } | ||
896 | sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size); | ||
897 | return count; | ||
898 | } | ||
899 | |||
900 | if (!strcmp(name, "debug")) { | ||
901 | len = num_arg(&user_buffer[i], 10, &value); | ||
902 | if (len < 0) { return len; } | ||
903 | i += len; | ||
904 | debug = value; | ||
905 | sprintf(pg_result, "OK: debug=%u", debug); | ||
906 | return count; | ||
907 | } | ||
908 | |||
909 | if (!strcmp(name, "frags")) { | ||
910 | len = num_arg(&user_buffer[i], 10, &value); | ||
911 | if (len < 0) { return len; } | ||
912 | i += len; | ||
913 | pkt_dev->nfrags = value; | ||
914 | sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags); | ||
915 | return count; | ||
916 | } | ||
917 | if (!strcmp(name, "delay")) { | ||
918 | len = num_arg(&user_buffer[i], 10, &value); | ||
919 | if (len < 0) { return len; } | ||
920 | i += len; | ||
921 | if (value == 0x7FFFFFFF) { | ||
922 | pkt_dev->delay_us = 0x7FFFFFFF; | ||
923 | pkt_dev->delay_ns = 0; | ||
924 | } else { | ||
925 | pkt_dev->delay_us = value / 1000; | ||
926 | pkt_dev->delay_ns = value % 1000; | ||
927 | } | ||
928 | sprintf(pg_result, "OK: delay=%u", 1000*pkt_dev->delay_us+pkt_dev->delay_ns); | ||
929 | return count; | ||
930 | } | ||
931 | if (!strcmp(name, "udp_src_min")) { | ||
932 | len = num_arg(&user_buffer[i], 10, &value); | ||
933 | if (len < 0) { return len; } | ||
934 | i += len; | ||
935 | if (value != pkt_dev->udp_src_min) { | ||
936 | pkt_dev->udp_src_min = value; | ||
937 | pkt_dev->cur_udp_src = value; | ||
938 | } | ||
939 | sprintf(pg_result, "OK: udp_src_min=%u", pkt_dev->udp_src_min); | ||
940 | return count; | ||
941 | } | ||
942 | if (!strcmp(name, "udp_dst_min")) { | ||
943 | len = num_arg(&user_buffer[i], 10, &value); | ||
944 | if (len < 0) { return len; } | ||
945 | i += len; | ||
946 | if (value != pkt_dev->udp_dst_min) { | ||
947 | pkt_dev->udp_dst_min = value; | ||
948 | pkt_dev->cur_udp_dst = value; | ||
949 | } | ||
950 | sprintf(pg_result, "OK: udp_dst_min=%u", pkt_dev->udp_dst_min); | ||
951 | return count; | ||
952 | } | ||
953 | if (!strcmp(name, "udp_src_max")) { | ||
954 | len = num_arg(&user_buffer[i], 10, &value); | ||
955 | if (len < 0) { return len; } | ||
956 | i += len; | ||
957 | if (value != pkt_dev->udp_src_max) { | ||
958 | pkt_dev->udp_src_max = value; | ||
959 | pkt_dev->cur_udp_src = value; | ||
960 | } | ||
961 | sprintf(pg_result, "OK: udp_src_max=%u", pkt_dev->udp_src_max); | ||
962 | return count; | ||
963 | } | ||
964 | if (!strcmp(name, "udp_dst_max")) { | ||
965 | len = num_arg(&user_buffer[i], 10, &value); | ||
966 | if (len < 0) { return len; } | ||
967 | i += len; | ||
968 | if (value != pkt_dev->udp_dst_max) { | ||
969 | pkt_dev->udp_dst_max = value; | ||
970 | pkt_dev->cur_udp_dst = value; | ||
971 | } | ||
972 | sprintf(pg_result, "OK: udp_dst_max=%u", pkt_dev->udp_dst_max); | ||
973 | return count; | ||
974 | } | ||
975 | if (!strcmp(name, "clone_skb")) { | ||
976 | len = num_arg(&user_buffer[i], 10, &value); | ||
977 | if (len < 0) { return len; } | ||
978 | i += len; | ||
979 | pkt_dev->clone_skb = value; | ||
980 | |||
981 | sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); | ||
982 | return count; | ||
983 | } | ||
984 | if (!strcmp(name, "count")) { | ||
985 | len = num_arg(&user_buffer[i], 10, &value); | ||
986 | if (len < 0) { return len; } | ||
987 | i += len; | ||
988 | pkt_dev->count = value; | ||
989 | sprintf(pg_result, "OK: count=%llu", | ||
990 | (unsigned long long) pkt_dev->count); | ||
991 | return count; | ||
992 | } | ||
993 | if (!strcmp(name, "src_mac_count")) { | ||
994 | len = num_arg(&user_buffer[i], 10, &value); | ||
995 | if (len < 0) { return len; } | ||
996 | i += len; | ||
997 | if (pkt_dev->src_mac_count != value) { | ||
998 | pkt_dev->src_mac_count = value; | ||
999 | pkt_dev->cur_src_mac_offset = 0; | ||
1000 | } | ||
1001 | sprintf(pg_result, "OK: src_mac_count=%d", pkt_dev->src_mac_count); | ||
1002 | return count; | ||
1003 | } | ||
1004 | if (!strcmp(name, "dst_mac_count")) { | ||
1005 | len = num_arg(&user_buffer[i], 10, &value); | ||
1006 | if (len < 0) { return len; } | ||
1007 | i += len; | ||
1008 | if (pkt_dev->dst_mac_count != value) { | ||
1009 | pkt_dev->dst_mac_count = value; | ||
1010 | pkt_dev->cur_dst_mac_offset = 0; | ||
1011 | } | ||
1012 | sprintf(pg_result, "OK: dst_mac_count=%d", pkt_dev->dst_mac_count); | ||
1013 | return count; | ||
1014 | } | ||
1015 | if (!strcmp(name, "flag")) { | ||
1016 | char f[32]; | ||
1017 | memset(f, 0, 32); | ||
1018 | len = strn_len(&user_buffer[i], sizeof(f) - 1); | ||
1019 | if (len < 0) { return len; } | ||
1020 | if (copy_from_user(f, &user_buffer[i], len)) | ||
1021 | return -EFAULT; | ||
1022 | i += len; | ||
1023 | if (strcmp(f, "IPSRC_RND") == 0) | ||
1024 | pkt_dev->flags |= F_IPSRC_RND; | ||
1025 | |||
1026 | else if (strcmp(f, "!IPSRC_RND") == 0) | ||
1027 | pkt_dev->flags &= ~F_IPSRC_RND; | ||
1028 | |||
1029 | else if (strcmp(f, "TXSIZE_RND") == 0) | ||
1030 | pkt_dev->flags |= F_TXSIZE_RND; | ||
1031 | |||
1032 | else if (strcmp(f, "!TXSIZE_RND") == 0) | ||
1033 | pkt_dev->flags &= ~F_TXSIZE_RND; | ||
1034 | |||
1035 | else if (strcmp(f, "IPDST_RND") == 0) | ||
1036 | pkt_dev->flags |= F_IPDST_RND; | ||
1037 | |||
1038 | else if (strcmp(f, "!IPDST_RND") == 0) | ||
1039 | pkt_dev->flags &= ~F_IPDST_RND; | ||
1040 | |||
1041 | else if (strcmp(f, "UDPSRC_RND") == 0) | ||
1042 | pkt_dev->flags |= F_UDPSRC_RND; | ||
1043 | |||
1044 | else if (strcmp(f, "!UDPSRC_RND") == 0) | ||
1045 | pkt_dev->flags &= ~F_UDPSRC_RND; | ||
1046 | |||
1047 | else if (strcmp(f, "UDPDST_RND") == 0) | ||
1048 | pkt_dev->flags |= F_UDPDST_RND; | ||
1049 | |||
1050 | else if (strcmp(f, "!UDPDST_RND") == 0) | ||
1051 | pkt_dev->flags &= ~F_UDPDST_RND; | ||
1052 | |||
1053 | else if (strcmp(f, "MACSRC_RND") == 0) | ||
1054 | pkt_dev->flags |= F_MACSRC_RND; | ||
1055 | |||
1056 | else if (strcmp(f, "!MACSRC_RND") == 0) | ||
1057 | pkt_dev->flags &= ~F_MACSRC_RND; | ||
1058 | |||
1059 | else if (strcmp(f, "MACDST_RND") == 0) | ||
1060 | pkt_dev->flags |= F_MACDST_RND; | ||
1061 | |||
1062 | else if (strcmp(f, "!MACDST_RND") == 0) | ||
1063 | pkt_dev->flags &= ~F_MACDST_RND; | ||
1064 | |||
1065 | else { | ||
1066 | sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", | ||
1067 | f, | ||
1068 | "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n"); | ||
1069 | return count; | ||
1070 | } | ||
1071 | sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); | ||
1072 | return count; | ||
1073 | } | ||
1074 | if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { | ||
1075 | len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1); | ||
1076 | if (len < 0) { return len; } | ||
1077 | |||
1078 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1079 | return -EFAULT; | ||
1080 | buf[len] = 0; | ||
1081 | if (strcmp(buf, pkt_dev->dst_min) != 0) { | ||
1082 | memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min)); | ||
1083 | strncpy(pkt_dev->dst_min, buf, len); | ||
1084 | pkt_dev->daddr_min = in_aton(pkt_dev->dst_min); | ||
1085 | pkt_dev->cur_daddr = pkt_dev->daddr_min; | ||
1086 | } | ||
1087 | if(debug) | ||
1088 | printk("pktgen: dst_min set to: %s\n", pkt_dev->dst_min); | ||
1089 | i += len; | ||
1090 | sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); | ||
1091 | return count; | ||
1092 | } | ||
1093 | if (!strcmp(name, "dst_max")) { | ||
1094 | len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1); | ||
1095 | if (len < 0) { return len; } | ||
1096 | |||
1097 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1098 | return -EFAULT; | ||
1099 | |||
1100 | buf[len] = 0; | ||
1101 | if (strcmp(buf, pkt_dev->dst_max) != 0) { | ||
1102 | memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max)); | ||
1103 | strncpy(pkt_dev->dst_max, buf, len); | ||
1104 | pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); | ||
1105 | pkt_dev->cur_daddr = pkt_dev->daddr_max; | ||
1106 | } | ||
1107 | if(debug) | ||
1108 | printk("pktgen: dst_max set to: %s\n", pkt_dev->dst_max); | ||
1109 | i += len; | ||
1110 | sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); | ||
1111 | return count; | ||
1112 | } | ||
1113 | if (!strcmp(name, "dst6")) { | ||
1114 | len = strn_len(&user_buffer[i], sizeof(buf) - 1); | ||
1115 | if (len < 0) return len; | ||
1116 | |||
1117 | pkt_dev->flags |= F_IPV6; | ||
1118 | |||
1119 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1120 | return -EFAULT; | ||
1121 | buf[len] = 0; | ||
1122 | |||
1123 | scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); | ||
1124 | fmt_ip6(buf, pkt_dev->in6_daddr.s6_addr); | ||
1125 | |||
1126 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); | ||
1127 | |||
1128 | if(debug) | ||
1129 | printk("pktgen: dst6 set to: %s\n", buf); | ||
1130 | |||
1131 | i += len; | ||
1132 | sprintf(pg_result, "OK: dst6=%s", buf); | ||
1133 | return count; | ||
1134 | } | ||
1135 | if (!strcmp(name, "dst6_min")) { | ||
1136 | len = strn_len(&user_buffer[i], sizeof(buf) - 1); | ||
1137 | if (len < 0) return len; | ||
1138 | |||
1139 | pkt_dev->flags |= F_IPV6; | ||
1140 | |||
1141 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1142 | return -EFAULT; | ||
1143 | buf[len] = 0; | ||
1144 | |||
1145 | scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); | ||
1146 | fmt_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); | ||
1147 | |||
1148 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr); | ||
1149 | if(debug) | ||
1150 | printk("pktgen: dst6_min set to: %s\n", buf); | ||
1151 | |||
1152 | i += len; | ||
1153 | sprintf(pg_result, "OK: dst6_min=%s", buf); | ||
1154 | return count; | ||
1155 | } | ||
1156 | if (!strcmp(name, "dst6_max")) { | ||
1157 | len = strn_len(&user_buffer[i], sizeof(buf) - 1); | ||
1158 | if (len < 0) return len; | ||
1159 | |||
1160 | pkt_dev->flags |= F_IPV6; | ||
1161 | |||
1162 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1163 | return -EFAULT; | ||
1164 | buf[len] = 0; | ||
1165 | |||
1166 | scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); | ||
1167 | fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); | ||
1168 | |||
1169 | if(debug) | ||
1170 | printk("pktgen: dst6_max set to: %s\n", buf); | ||
1171 | |||
1172 | i += len; | ||
1173 | sprintf(pg_result, "OK: dst6_max=%s", buf); | ||
1174 | return count; | ||
1175 | } | ||
1176 | if (!strcmp(name, "src6")) { | ||
1177 | len = strn_len(&user_buffer[i], sizeof(buf) - 1); | ||
1178 | if (len < 0) return len; | ||
1179 | |||
1180 | pkt_dev->flags |= F_IPV6; | ||
1181 | |||
1182 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1183 | return -EFAULT; | ||
1184 | buf[len] = 0; | ||
1185 | |||
1186 | scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); | ||
1187 | fmt_ip6(buf, pkt_dev->in6_saddr.s6_addr); | ||
1188 | |||
1189 | ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); | ||
1190 | |||
1191 | if(debug) | ||
1192 | printk("pktgen: src6 set to: %s\n", buf); | ||
1193 | |||
1194 | i += len; | ||
1195 | sprintf(pg_result, "OK: src6=%s", buf); | ||
1196 | return count; | ||
1197 | } | ||
1198 | if (!strcmp(name, "src_min")) { | ||
1199 | len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1); | ||
1200 | if (len < 0) { return len; } | ||
1201 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1202 | return -EFAULT; | ||
1203 | buf[len] = 0; | ||
1204 | if (strcmp(buf, pkt_dev->src_min) != 0) { | ||
1205 | memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min)); | ||
1206 | strncpy(pkt_dev->src_min, buf, len); | ||
1207 | pkt_dev->saddr_min = in_aton(pkt_dev->src_min); | ||
1208 | pkt_dev->cur_saddr = pkt_dev->saddr_min; | ||
1209 | } | ||
1210 | if(debug) | ||
1211 | printk("pktgen: src_min set to: %s\n", pkt_dev->src_min); | ||
1212 | i += len; | ||
1213 | sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); | ||
1214 | return count; | ||
1215 | } | ||
1216 | if (!strcmp(name, "src_max")) { | ||
1217 | len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1); | ||
1218 | if (len < 0) { return len; } | ||
1219 | if (copy_from_user(buf, &user_buffer[i], len)) | ||
1220 | return -EFAULT; | ||
1221 | buf[len] = 0; | ||
1222 | if (strcmp(buf, pkt_dev->src_max) != 0) { | ||
1223 | memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max)); | ||
1224 | strncpy(pkt_dev->src_max, buf, len); | ||
1225 | pkt_dev->saddr_max = in_aton(pkt_dev->src_max); | ||
1226 | pkt_dev->cur_saddr = pkt_dev->saddr_max; | ||
1227 | } | ||
1228 | if(debug) | ||
1229 | printk("pktgen: src_max set to: %s\n", pkt_dev->src_max); | ||
1230 | i += len; | ||
1231 | sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); | ||
1232 | return count; | ||
1233 | } | ||
1234 | if (!strcmp(name, "dst_mac")) { | ||
1235 | char *v = valstr; | ||
1236 | unsigned char old_dmac[6]; | ||
1237 | unsigned char *m = pkt_dev->dst_mac; | ||
1238 | memcpy(old_dmac, pkt_dev->dst_mac, 6); | ||
1239 | |||
1240 | len = strn_len(&user_buffer[i], sizeof(valstr) - 1); | ||
1241 | if (len < 0) { return len; } | ||
1242 | memset(valstr, 0, sizeof(valstr)); | ||
1243 | if( copy_from_user(valstr, &user_buffer[i], len)) | ||
1244 | return -EFAULT; | ||
1245 | i += len; | ||
1246 | |||
1247 | for(*m = 0;*v && m < pkt_dev->dst_mac + 6; v++) { | ||
1248 | if (*v >= '0' && *v <= '9') { | ||
1249 | *m *= 16; | ||
1250 | *m += *v - '0'; | ||
1251 | } | ||
1252 | if (*v >= 'A' && *v <= 'F') { | ||
1253 | *m *= 16; | ||
1254 | *m += *v - 'A' + 10; | ||
1255 | } | ||
1256 | if (*v >= 'a' && *v <= 'f') { | ||
1257 | *m *= 16; | ||
1258 | *m += *v - 'a' + 10; | ||
1259 | } | ||
1260 | if (*v == ':') { | ||
1261 | m++; | ||
1262 | *m = 0; | ||
1263 | } | ||
1264 | } | ||
1265 | |||
1266 | /* Set up Dest MAC */ | ||
1267 | if (memcmp(old_dmac, pkt_dev->dst_mac, 6) != 0) | ||
1268 | memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6); | ||
1269 | |||
1270 | sprintf(pg_result, "OK: dstmac"); | ||
1271 | return count; | ||
1272 | } | ||
1273 | if (!strcmp(name, "src_mac")) { | ||
1274 | char *v = valstr; | ||
1275 | unsigned char *m = pkt_dev->src_mac; | ||
1276 | |||
1277 | len = strn_len(&user_buffer[i], sizeof(valstr) - 1); | ||
1278 | if (len < 0) { return len; } | ||
1279 | memset(valstr, 0, sizeof(valstr)); | ||
1280 | if( copy_from_user(valstr, &user_buffer[i], len)) | ||
1281 | return -EFAULT; | ||
1282 | i += len; | ||
1283 | |||
1284 | for(*m = 0;*v && m < pkt_dev->src_mac + 6; v++) { | ||
1285 | if (*v >= '0' && *v <= '9') { | ||
1286 | *m *= 16; | ||
1287 | *m += *v - '0'; | ||
1288 | } | ||
1289 | if (*v >= 'A' && *v <= 'F') { | ||
1290 | *m *= 16; | ||
1291 | *m += *v - 'A' + 10; | ||
1292 | } | ||
1293 | if (*v >= 'a' && *v <= 'f') { | ||
1294 | *m *= 16; | ||
1295 | *m += *v - 'a' + 10; | ||
1296 | } | ||
1297 | if (*v == ':') { | ||
1298 | m++; | ||
1299 | *m = 0; | ||
1300 | } | ||
1301 | } | ||
1302 | |||
1303 | sprintf(pg_result, "OK: srcmac"); | ||
1304 | return count; | ||
1305 | } | ||
1306 | |||
1307 | if (!strcmp(name, "clear_counters")) { | ||
1308 | pktgen_clear_counters(pkt_dev); | ||
1309 | sprintf(pg_result, "OK: Clearing counters.\n"); | ||
1310 | return count; | ||
1311 | } | ||
1312 | |||
1313 | if (!strcmp(name, "flows")) { | ||
1314 | len = num_arg(&user_buffer[i], 10, &value); | ||
1315 | if (len < 0) { return len; } | ||
1316 | i += len; | ||
1317 | if (value > MAX_CFLOWS) | ||
1318 | value = MAX_CFLOWS; | ||
1319 | |||
1320 | pkt_dev->cflows = value; | ||
1321 | sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); | ||
1322 | return count; | ||
1323 | } | ||
1324 | |||
1325 | if (!strcmp(name, "flowlen")) { | ||
1326 | len = num_arg(&user_buffer[i], 10, &value); | ||
1327 | if (len < 0) { return len; } | ||
1328 | i += len; | ||
1329 | pkt_dev->lflow = value; | ||
1330 | sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow); | ||
1331 | return count; | ||
1332 | } | ||
1333 | |||
1334 | sprintf(pkt_dev->result, "No such parameter \"%s\"", name); | ||
1335 | return -EINVAL; | ||
1336 | } | ||
1337 | |||
1338 | static int proc_thread_read(char *buf , char **start, off_t offset, | ||
1339 | int len, int *eof, void *data) | ||
1340 | { | ||
1341 | char *p; | ||
1342 | struct pktgen_thread *t = (struct pktgen_thread*)(data); | ||
1343 | struct pktgen_dev *pkt_dev = NULL; | ||
1344 | |||
1345 | |||
1346 | if (!t) { | ||
1347 | printk("pktgen: ERROR: could not find thread in proc_thread_read\n"); | ||
1348 | return -EINVAL; | ||
1349 | } | ||
1350 | |||
1351 | p = buf; | ||
1352 | p += sprintf(p, "Name: %s max_before_softirq: %d\n", | ||
1353 | t->name, t->max_before_softirq); | ||
1354 | |||
1355 | p += sprintf(p, "Running: "); | ||
1356 | |||
1357 | if_lock(t); | ||
1358 | for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) | ||
1359 | if(pkt_dev->running) | ||
1360 | p += sprintf(p, "%s ", pkt_dev->ifname); | ||
1361 | |||
1362 | p += sprintf(p, "\nStopped: "); | ||
1363 | |||
1364 | for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) | ||
1365 | if(!pkt_dev->running) | ||
1366 | p += sprintf(p, "%s ", pkt_dev->ifname); | ||
1367 | |||
1368 | if (t->result[0]) | ||
1369 | p += sprintf(p, "\nResult: %s\n", t->result); | ||
1370 | else | ||
1371 | p += sprintf(p, "\nResult: NA\n"); | ||
1372 | |||
1373 | *eof = 1; | ||
1374 | |||
1375 | if_unlock(t); | ||
1376 | |||
1377 | return p - buf; | ||
1378 | } | ||
1379 | |||
1380 | static int proc_thread_write(struct file *file, const char __user *user_buffer, | ||
1381 | unsigned long count, void *data) | ||
1382 | { | ||
1383 | int i = 0, max, len, ret; | ||
1384 | char name[40]; | ||
1385 | struct pktgen_thread *t; | ||
1386 | char *pg_result; | ||
1387 | unsigned long value = 0; | ||
1388 | |||
1389 | if (count < 1) { | ||
1390 | // sprintf(pg_result, "Wrong command format"); | ||
1391 | return -EINVAL; | ||
1392 | } | ||
1393 | |||
1394 | max = count - i; | ||
1395 | len = count_trail_chars(&user_buffer[i], max); | ||
1396 | if (len < 0) | ||
1397 | return len; | ||
1398 | |||
1399 | i += len; | ||
1400 | |||
1401 | /* Read variable name */ | ||
1402 | |||
1403 | len = strn_len(&user_buffer[i], sizeof(name) - 1); | ||
1404 | if (len < 0) | ||
1405 | return len; | ||
1406 | |||
1407 | memset(name, 0, sizeof(name)); | ||
1408 | if (copy_from_user(name, &user_buffer[i], len)) | ||
1409 | return -EFAULT; | ||
1410 | i += len; | ||
1411 | |||
1412 | max = count -i; | ||
1413 | len = count_trail_chars(&user_buffer[i], max); | ||
1414 | if (len < 0) | ||
1415 | return len; | ||
1416 | |||
1417 | i += len; | ||
1418 | |||
1419 | if (debug) | ||
1420 | printk("pktgen: t=%s, count=%lu\n", name, count); | ||
1421 | |||
1422 | |||
1423 | t = (struct pktgen_thread*)(data); | ||
1424 | if(!t) { | ||
1425 | printk("pktgen: ERROR: No thread\n"); | ||
1426 | ret = -EINVAL; | ||
1427 | goto out; | ||
1428 | } | ||
1429 | |||
1430 | pg_result = &(t->result[0]); | ||
1431 | |||
1432 | if (!strcmp(name, "add_device")) { | ||
1433 | char f[32]; | ||
1434 | memset(f, 0, 32); | ||
1435 | len = strn_len(&user_buffer[i], sizeof(f) - 1); | ||
1436 | if (len < 0) { | ||
1437 | ret = len; | ||
1438 | goto out; | ||
1439 | } | ||
1440 | if( copy_from_user(f, &user_buffer[i], len) ) | ||
1441 | return -EFAULT; | ||
1442 | i += len; | ||
1443 | thread_lock(); | ||
1444 | pktgen_add_device(t, f); | ||
1445 | thread_unlock(); | ||
1446 | ret = count; | ||
1447 | sprintf(pg_result, "OK: add_device=%s", f); | ||
1448 | goto out; | ||
1449 | } | ||
1450 | |||
1451 | if (!strcmp(name, "rem_device_all")) { | ||
1452 | thread_lock(); | ||
1453 | t->control |= T_REMDEV; | ||
1454 | thread_unlock(); | ||
1455 | current->state = TASK_INTERRUPTIBLE; | ||
1456 | schedule_timeout(HZ/8); /* Propagate thread->control */ | ||
1457 | ret = count; | ||
1458 | sprintf(pg_result, "OK: rem_device_all"); | ||
1459 | goto out; | ||
1460 | } | ||
1461 | |||
1462 | if (!strcmp(name, "max_before_softirq")) { | ||
1463 | len = num_arg(&user_buffer[i], 10, &value); | ||
1464 | thread_lock(); | ||
1465 | t->max_before_softirq = value; | ||
1466 | thread_unlock(); | ||
1467 | ret = count; | ||
1468 | sprintf(pg_result, "OK: max_before_softirq=%lu", value); | ||
1469 | goto out; | ||
1470 | } | ||
1471 | |||
1472 | ret = -EINVAL; | ||
1473 | out: | ||
1474 | |||
1475 | return ret; | ||
1476 | } | ||
1477 | |||
1478 | static int create_proc_dir(void) | ||
1479 | { | ||
1480 | int len; | ||
1481 | /* does proc_dir already exists */ | ||
1482 | len = strlen(PG_PROC_DIR); | ||
1483 | |||
1484 | for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) { | ||
1485 | if ((pg_proc_dir->namelen == len) && | ||
1486 | (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) | ||
1487 | break; | ||
1488 | } | ||
1489 | |||
1490 | if (!pg_proc_dir) | ||
1491 | pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net); | ||
1492 | |||
1493 | if (!pg_proc_dir) | ||
1494 | return -ENODEV; | ||
1495 | |||
1496 | return 0; | ||
1497 | } | ||
1498 | |||
1499 | static int remove_proc_dir(void) | ||
1500 | { | ||
1501 | remove_proc_entry(PG_PROC_DIR, proc_net); | ||
1502 | return 0; | ||
1503 | } | ||
1504 | |||
1505 | /* Think find or remove for NN */ | ||
1506 | static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) | ||
1507 | { | ||
1508 | struct pktgen_thread *t; | ||
1509 | struct pktgen_dev *pkt_dev = NULL; | ||
1510 | |||
1511 | t = pktgen_threads; | ||
1512 | |||
1513 | while (t) { | ||
1514 | pkt_dev = pktgen_find_dev(t, ifname); | ||
1515 | if (pkt_dev) { | ||
1516 | if(remove) { | ||
1517 | if_lock(t); | ||
1518 | pktgen_remove_device(t, pkt_dev); | ||
1519 | if_unlock(t); | ||
1520 | } | ||
1521 | break; | ||
1522 | } | ||
1523 | t = t->next; | ||
1524 | } | ||
1525 | return pkt_dev; | ||
1526 | } | ||
1527 | |||
1528 | static struct pktgen_dev *pktgen_NN_threads(const char* ifname, int remove) | ||
1529 | { | ||
1530 | struct pktgen_dev *pkt_dev = NULL; | ||
1531 | thread_lock(); | ||
1532 | pkt_dev = __pktgen_NN_threads(ifname, remove); | ||
1533 | thread_unlock(); | ||
1534 | return pkt_dev; | ||
1535 | } | ||
1536 | |||
1537 | static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) | ||
1538 | { | ||
1539 | struct net_device *dev = (struct net_device *)(ptr); | ||
1540 | |||
1541 | /* It is OK that we do not hold the group lock right now, | ||
1542 | * as we run under the RTNL lock. | ||
1543 | */ | ||
1544 | |||
1545 | switch (event) { | ||
1546 | case NETDEV_CHANGEADDR: | ||
1547 | case NETDEV_GOING_DOWN: | ||
1548 | case NETDEV_DOWN: | ||
1549 | case NETDEV_UP: | ||
1550 | /* Ignore for now */ | ||
1551 | break; | ||
1552 | |||
1553 | case NETDEV_UNREGISTER: | ||
1554 | pktgen_NN_threads(dev->name, REMOVE); | ||
1555 | break; | ||
1556 | }; | ||
1557 | |||
1558 | return NOTIFY_DONE; | ||
1559 | } | ||
1560 | |||
1561 | /* Associate pktgen_dev with a device. */ | ||
1562 | |||
1563 | static struct net_device* pktgen_setup_dev(struct pktgen_dev *pkt_dev) { | ||
1564 | struct net_device *odev; | ||
1565 | |||
1566 | /* Clean old setups */ | ||
1567 | |||
1568 | if (pkt_dev->odev) { | ||
1569 | dev_put(pkt_dev->odev); | ||
1570 | pkt_dev->odev = NULL; | ||
1571 | } | ||
1572 | |||
1573 | odev = dev_get_by_name(pkt_dev->ifname); | ||
1574 | |||
1575 | if (!odev) { | ||
1576 | printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname); | ||
1577 | goto out; | ||
1578 | } | ||
1579 | if (odev->type != ARPHRD_ETHER) { | ||
1580 | printk("pktgen: not an ethernet device: \"%s\"\n", pkt_dev->ifname); | ||
1581 | goto out_put; | ||
1582 | } | ||
1583 | if (!netif_running(odev)) { | ||
1584 | printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname); | ||
1585 | goto out_put; | ||
1586 | } | ||
1587 | pkt_dev->odev = odev; | ||
1588 | |||
1589 | return pkt_dev->odev; | ||
1590 | |||
1591 | out_put: | ||
1592 | dev_put(odev); | ||
1593 | out: | ||
1594 | return NULL; | ||
1595 | |||
1596 | } | ||
1597 | |||
1598 | /* Read pkt_dev from the interface and set up internal pktgen_dev | ||
1599 | * structure to have the right information to create/send packets | ||
1600 | */ | ||
1601 | static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) | ||
1602 | { | ||
1603 | /* Try once more, just in case it works now. */ | ||
1604 | if (!pkt_dev->odev) | ||
1605 | pktgen_setup_dev(pkt_dev); | ||
1606 | |||
1607 | if (!pkt_dev->odev) { | ||
1608 | printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); | ||
1609 | sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); | ||
1610 | return; | ||
1611 | } | ||
1612 | |||
1613 | /* Default to the interface's mac if not explicitly set. */ | ||
1614 | |||
1615 | if ((pkt_dev->src_mac[0] == 0) && | ||
1616 | (pkt_dev->src_mac[1] == 0) && | ||
1617 | (pkt_dev->src_mac[2] == 0) && | ||
1618 | (pkt_dev->src_mac[3] == 0) && | ||
1619 | (pkt_dev->src_mac[4] == 0) && | ||
1620 | (pkt_dev->src_mac[5] == 0)) { | ||
1621 | |||
1622 | memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, 6); | ||
1623 | } | ||
1624 | /* Set up Dest MAC */ | ||
1625 | memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6); | ||
1626 | |||
1627 | /* Set up pkt size */ | ||
1628 | pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size; | ||
1629 | |||
1630 | if(pkt_dev->flags & F_IPV6) { | ||
1631 | /* | ||
1632 | * Skip this automatic address setting until locks or functions | ||
1633 | * gets exported | ||
1634 | */ | ||
1635 | |||
1636 | #ifdef NOTNOW | ||
1637 | int i, set = 0, err=1; | ||
1638 | struct inet6_dev *idev; | ||
1639 | |||
1640 | for(i=0; i< IN6_ADDR_HSIZE; i++) | ||
1641 | if(pkt_dev->cur_in6_saddr.s6_addr[i]) { | ||
1642 | set = 1; | ||
1643 | break; | ||
1644 | } | ||
1645 | |||
1646 | if(!set) { | ||
1647 | |||
1648 | /* | ||
1649 | * Use linklevel address if unconfigured. | ||
1650 | * | ||
1651 | * use ipv6_get_lladdr if/when it's get exported | ||
1652 | */ | ||
1653 | |||
1654 | |||
1655 | read_lock(&addrconf_lock); | ||
1656 | if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { | ||
1657 | struct inet6_ifaddr *ifp; | ||
1658 | |||
1659 | read_lock_bh(&idev->lock); | ||
1660 | for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { | ||
1661 | if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { | ||
1662 | ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &ifp->addr); | ||
1663 | err = 0; | ||
1664 | break; | ||
1665 | } | ||
1666 | } | ||
1667 | read_unlock_bh(&idev->lock); | ||
1668 | } | ||
1669 | read_unlock(&addrconf_lock); | ||
1670 | if(err) printk("pktgen: ERROR: IPv6 link address not availble.\n"); | ||
1671 | } | ||
1672 | #endif | ||
1673 | } | ||
1674 | else { | ||
1675 | pkt_dev->saddr_min = 0; | ||
1676 | pkt_dev->saddr_max = 0; | ||
1677 | if (strlen(pkt_dev->src_min) == 0) { | ||
1678 | |||
1679 | struct in_device *in_dev; | ||
1680 | |||
1681 | rcu_read_lock(); | ||
1682 | in_dev = __in_dev_get(pkt_dev->odev); | ||
1683 | if (in_dev) { | ||
1684 | if (in_dev->ifa_list) { | ||
1685 | pkt_dev->saddr_min = in_dev->ifa_list->ifa_address; | ||
1686 | pkt_dev->saddr_max = pkt_dev->saddr_min; | ||
1687 | } | ||
1688 | __in_dev_put(in_dev); | ||
1689 | } | ||
1690 | rcu_read_unlock(); | ||
1691 | } | ||
1692 | else { | ||
1693 | pkt_dev->saddr_min = in_aton(pkt_dev->src_min); | ||
1694 | pkt_dev->saddr_max = in_aton(pkt_dev->src_max); | ||
1695 | } | ||
1696 | |||
1697 | pkt_dev->daddr_min = in_aton(pkt_dev->dst_min); | ||
1698 | pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); | ||
1699 | } | ||
1700 | /* Initialize current values. */ | ||
1701 | pkt_dev->cur_dst_mac_offset = 0; | ||
1702 | pkt_dev->cur_src_mac_offset = 0; | ||
1703 | pkt_dev->cur_saddr = pkt_dev->saddr_min; | ||
1704 | pkt_dev->cur_daddr = pkt_dev->daddr_min; | ||
1705 | pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min; | ||
1706 | pkt_dev->cur_udp_src = pkt_dev->udp_src_min; | ||
1707 | pkt_dev->nflows = 0; | ||
1708 | } | ||
1709 | |||
1710 | static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) | ||
1711 | { | ||
1712 | __u64 start; | ||
1713 | __u64 now; | ||
1714 | |||
1715 | start = now = getCurUs(); | ||
1716 | printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); | ||
1717 | while (now < spin_until_us) { | ||
1718 | /* TODO: optimise sleeping behavior */ | ||
1719 | if (spin_until_us - now > (1000000/HZ)+1) { | ||
1720 | current->state = TASK_INTERRUPTIBLE; | ||
1721 | schedule_timeout(1); | ||
1722 | } else if (spin_until_us - now > 100) { | ||
1723 | do_softirq(); | ||
1724 | if (!pkt_dev->running) | ||
1725 | return; | ||
1726 | if (need_resched()) | ||
1727 | schedule(); | ||
1728 | } | ||
1729 | |||
1730 | now = getCurUs(); | ||
1731 | } | ||
1732 | |||
1733 | pkt_dev->idle_acc += now - start; | ||
1734 | } | ||
1735 | |||
1736 | |||
1737 | /* Increment/randomize headers according to flags and current values | ||
1738 | * for IP src/dest, UDP src/dst port, MAC-Addr src/dst | ||
1739 | */ | ||
1740 | static void mod_cur_headers(struct pktgen_dev *pkt_dev) { | ||
1741 | __u32 imn; | ||
1742 | __u32 imx; | ||
1743 | int flow = 0; | ||
1744 | |||
1745 | if(pkt_dev->cflows) { | ||
1746 | flow = pktgen_random() % pkt_dev->cflows; | ||
1747 | |||
1748 | if (pkt_dev->flows[flow].count > pkt_dev->lflow) | ||
1749 | pkt_dev->flows[flow].count = 0; | ||
1750 | } | ||
1751 | |||
1752 | |||
1753 | /* Deal with source MAC */ | ||
1754 | if (pkt_dev->src_mac_count > 1) { | ||
1755 | __u32 mc; | ||
1756 | __u32 tmp; | ||
1757 | |||
1758 | if (pkt_dev->flags & F_MACSRC_RND) | ||
1759 | mc = pktgen_random() % (pkt_dev->src_mac_count); | ||
1760 | else { | ||
1761 | mc = pkt_dev->cur_src_mac_offset++; | ||
1762 | if (pkt_dev->cur_src_mac_offset > pkt_dev->src_mac_count) | ||
1763 | pkt_dev->cur_src_mac_offset = 0; | ||
1764 | } | ||
1765 | |||
1766 | tmp = pkt_dev->src_mac[5] + (mc & 0xFF); | ||
1767 | pkt_dev->hh[11] = tmp; | ||
1768 | tmp = (pkt_dev->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); | ||
1769 | pkt_dev->hh[10] = tmp; | ||
1770 | tmp = (pkt_dev->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); | ||
1771 | pkt_dev->hh[9] = tmp; | ||
1772 | tmp = (pkt_dev->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); | ||
1773 | pkt_dev->hh[8] = tmp; | ||
1774 | tmp = (pkt_dev->src_mac[1] + (tmp >> 8)); | ||
1775 | pkt_dev->hh[7] = tmp; | ||
1776 | } | ||
1777 | |||
1778 | /* Deal with Destination MAC */ | ||
1779 | if (pkt_dev->dst_mac_count > 1) { | ||
1780 | __u32 mc; | ||
1781 | __u32 tmp; | ||
1782 | |||
1783 | if (pkt_dev->flags & F_MACDST_RND) | ||
1784 | mc = pktgen_random() % (pkt_dev->dst_mac_count); | ||
1785 | |||
1786 | else { | ||
1787 | mc = pkt_dev->cur_dst_mac_offset++; | ||
1788 | if (pkt_dev->cur_dst_mac_offset > pkt_dev->dst_mac_count) { | ||
1789 | pkt_dev->cur_dst_mac_offset = 0; | ||
1790 | } | ||
1791 | } | ||
1792 | |||
1793 | tmp = pkt_dev->dst_mac[5] + (mc & 0xFF); | ||
1794 | pkt_dev->hh[5] = tmp; | ||
1795 | tmp = (pkt_dev->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); | ||
1796 | pkt_dev->hh[4] = tmp; | ||
1797 | tmp = (pkt_dev->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); | ||
1798 | pkt_dev->hh[3] = tmp; | ||
1799 | tmp = (pkt_dev->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); | ||
1800 | pkt_dev->hh[2] = tmp; | ||
1801 | tmp = (pkt_dev->dst_mac[1] + (tmp >> 8)); | ||
1802 | pkt_dev->hh[1] = tmp; | ||
1803 | } | ||
1804 | |||
1805 | if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { | ||
1806 | if (pkt_dev->flags & F_UDPSRC_RND) | ||
1807 | pkt_dev->cur_udp_src = ((pktgen_random() % (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) + pkt_dev->udp_src_min); | ||
1808 | |||
1809 | else { | ||
1810 | pkt_dev->cur_udp_src++; | ||
1811 | if (pkt_dev->cur_udp_src >= pkt_dev->udp_src_max) | ||
1812 | pkt_dev->cur_udp_src = pkt_dev->udp_src_min; | ||
1813 | } | ||
1814 | } | ||
1815 | |||
1816 | if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) { | ||
1817 | if (pkt_dev->flags & F_UDPDST_RND) { | ||
1818 | pkt_dev->cur_udp_dst = ((pktgen_random() % (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) + pkt_dev->udp_dst_min); | ||
1819 | } | ||
1820 | else { | ||
1821 | pkt_dev->cur_udp_dst++; | ||
1822 | if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max) | ||
1823 | pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min; | ||
1824 | } | ||
1825 | } | ||
1826 | |||
1827 | if (!(pkt_dev->flags & F_IPV6)) { | ||
1828 | |||
1829 | if ((imn = ntohl(pkt_dev->saddr_min)) < (imx = ntohl(pkt_dev->saddr_max))) { | ||
1830 | __u32 t; | ||
1831 | if (pkt_dev->flags & F_IPSRC_RND) | ||
1832 | t = ((pktgen_random() % (imx - imn)) + imn); | ||
1833 | else { | ||
1834 | t = ntohl(pkt_dev->cur_saddr); | ||
1835 | t++; | ||
1836 | if (t > imx) { | ||
1837 | t = imn; | ||
1838 | } | ||
1839 | } | ||
1840 | pkt_dev->cur_saddr = htonl(t); | ||
1841 | } | ||
1842 | |||
1843 | if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { | ||
1844 | pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; | ||
1845 | } else { | ||
1846 | |||
1847 | if ((imn = ntohl(pkt_dev->daddr_min)) < (imx = ntohl(pkt_dev->daddr_max))) { | ||
1848 | __u32 t; | ||
1849 | if (pkt_dev->flags & F_IPDST_RND) { | ||
1850 | |||
1851 | t = ((pktgen_random() % (imx - imn)) + imn); | ||
1852 | t = htonl(t); | ||
1853 | |||
1854 | while( LOOPBACK(t) || MULTICAST(t) || BADCLASS(t) || ZERONET(t) || LOCAL_MCAST(t) ) { | ||
1855 | t = ((pktgen_random() % (imx - imn)) + imn); | ||
1856 | t = htonl(t); | ||
1857 | } | ||
1858 | pkt_dev->cur_daddr = t; | ||
1859 | } | ||
1860 | |||
1861 | else { | ||
1862 | t = ntohl(pkt_dev->cur_daddr); | ||
1863 | t++; | ||
1864 | if (t > imx) { | ||
1865 | t = imn; | ||
1866 | } | ||
1867 | pkt_dev->cur_daddr = htonl(t); | ||
1868 | } | ||
1869 | } | ||
1870 | if(pkt_dev->cflows) { | ||
1871 | pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; | ||
1872 | pkt_dev->nflows++; | ||
1873 | } | ||
1874 | } | ||
1875 | } | ||
1876 | else /* IPV6 * */ | ||
1877 | { | ||
1878 | if(pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && | ||
1879 | pkt_dev->min_in6_daddr.s6_addr32[1] == 0 && | ||
1880 | pkt_dev->min_in6_daddr.s6_addr32[2] == 0 && | ||
1881 | pkt_dev->min_in6_daddr.s6_addr32[3] == 0); | ||
1882 | else { | ||
1883 | int i; | ||
1884 | |||
1885 | /* Only random destinations yet */ | ||
1886 | |||
1887 | for(i=0; i < 4; i++) { | ||
1888 | pkt_dev->cur_in6_daddr.s6_addr32[i] = | ||
1889 | ((pktgen_random() | | ||
1890 | pkt_dev->min_in6_daddr.s6_addr32[i]) & | ||
1891 | pkt_dev->max_in6_daddr.s6_addr32[i]); | ||
1892 | } | ||
1893 | } | ||
1894 | } | ||
1895 | |||
1896 | if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { | ||
1897 | __u32 t; | ||
1898 | if (pkt_dev->flags & F_TXSIZE_RND) { | ||
1899 | t = ((pktgen_random() % (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)) | ||
1900 | + pkt_dev->min_pkt_size); | ||
1901 | } | ||
1902 | else { | ||
1903 | t = pkt_dev->cur_pkt_size + 1; | ||
1904 | if (t > pkt_dev->max_pkt_size) | ||
1905 | t = pkt_dev->min_pkt_size; | ||
1906 | } | ||
1907 | pkt_dev->cur_pkt_size = t; | ||
1908 | } | ||
1909 | |||
1910 | pkt_dev->flows[flow].count++; | ||
1911 | } | ||
1912 | |||
1913 | |||
1914 | static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | ||
1915 | struct pktgen_dev *pkt_dev) | ||
1916 | { | ||
1917 | struct sk_buff *skb = NULL; | ||
1918 | __u8 *eth; | ||
1919 | struct udphdr *udph; | ||
1920 | int datalen, iplen; | ||
1921 | struct iphdr *iph; | ||
1922 | struct pktgen_hdr *pgh = NULL; | ||
1923 | |||
1924 | skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC); | ||
1925 | if (!skb) { | ||
1926 | sprintf(pkt_dev->result, "No memory"); | ||
1927 | return NULL; | ||
1928 | } | ||
1929 | |||
1930 | skb_reserve(skb, 16); | ||
1931 | |||
1932 | /* Reserve for ethernet and IP header */ | ||
1933 | eth = (__u8 *) skb_push(skb, 14); | ||
1934 | iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)); | ||
1935 | udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); | ||
1936 | |||
1937 | /* Update any of the values, used when we're incrementing various | ||
1938 | * fields. | ||
1939 | */ | ||
1940 | mod_cur_headers(pkt_dev); | ||
1941 | |||
1942 | memcpy(eth, pkt_dev->hh, 12); | ||
1943 | *(u16*)ð[12] = __constant_htons(ETH_P_IP); | ||
1944 | |||
1945 | datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8; /* Eth + IPh + UDPh */ | ||
1946 | if (datalen < sizeof(struct pktgen_hdr)) | ||
1947 | datalen = sizeof(struct pktgen_hdr); | ||
1948 | |||
1949 | udph->source = htons(pkt_dev->cur_udp_src); | ||
1950 | udph->dest = htons(pkt_dev->cur_udp_dst); | ||
1951 | udph->len = htons(datalen + 8); /* DATA + udphdr */ | ||
1952 | udph->check = 0; /* No checksum */ | ||
1953 | |||
1954 | iph->ihl = 5; | ||
1955 | iph->version = 4; | ||
1956 | iph->ttl = 32; | ||
1957 | iph->tos = 0; | ||
1958 | iph->protocol = IPPROTO_UDP; /* UDP */ | ||
1959 | iph->saddr = pkt_dev->cur_saddr; | ||
1960 | iph->daddr = pkt_dev->cur_daddr; | ||
1961 | iph->frag_off = 0; | ||
1962 | iplen = 20 + 8 + datalen; | ||
1963 | iph->tot_len = htons(iplen); | ||
1964 | iph->check = 0; | ||
1965 | iph->check = ip_fast_csum((void *) iph, iph->ihl); | ||
1966 | skb->protocol = __constant_htons(ETH_P_IP); | ||
1967 | skb->mac.raw = ((u8 *)iph) - 14; | ||
1968 | skb->dev = odev; | ||
1969 | skb->pkt_type = PACKET_HOST; | ||
1970 | |||
1971 | if (pkt_dev->nfrags <= 0) | ||
1972 | pgh = (struct pktgen_hdr *)skb_put(skb, datalen); | ||
1973 | else { | ||
1974 | int frags = pkt_dev->nfrags; | ||
1975 | int i; | ||
1976 | |||
1977 | pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8); | ||
1978 | |||
1979 | if (frags > MAX_SKB_FRAGS) | ||
1980 | frags = MAX_SKB_FRAGS; | ||
1981 | if (datalen > frags*PAGE_SIZE) { | ||
1982 | skb_put(skb, datalen-frags*PAGE_SIZE); | ||
1983 | datalen = frags*PAGE_SIZE; | ||
1984 | } | ||
1985 | |||
1986 | i = 0; | ||
1987 | while (datalen > 0) { | ||
1988 | struct page *page = alloc_pages(GFP_KERNEL, 0); | ||
1989 | skb_shinfo(skb)->frags[i].page = page; | ||
1990 | skb_shinfo(skb)->frags[i].page_offset = 0; | ||
1991 | skb_shinfo(skb)->frags[i].size = | ||
1992 | (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); | ||
1993 | datalen -= skb_shinfo(skb)->frags[i].size; | ||
1994 | skb->len += skb_shinfo(skb)->frags[i].size; | ||
1995 | skb->data_len += skb_shinfo(skb)->frags[i].size; | ||
1996 | i++; | ||
1997 | skb_shinfo(skb)->nr_frags = i; | ||
1998 | } | ||
1999 | |||
2000 | while (i < frags) { | ||
2001 | int rem; | ||
2002 | |||
2003 | if (i == 0) | ||
2004 | break; | ||
2005 | |||
2006 | rem = skb_shinfo(skb)->frags[i - 1].size / 2; | ||
2007 | if (rem == 0) | ||
2008 | break; | ||
2009 | |||
2010 | skb_shinfo(skb)->frags[i - 1].size -= rem; | ||
2011 | |||
2012 | skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1]; | ||
2013 | get_page(skb_shinfo(skb)->frags[i].page); | ||
2014 | skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page; | ||
2015 | skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size; | ||
2016 | skb_shinfo(skb)->frags[i].size = rem; | ||
2017 | i++; | ||
2018 | skb_shinfo(skb)->nr_frags = i; | ||
2019 | } | ||
2020 | } | ||
2021 | |||
2022 | /* Stamp the time, and sequence number, convert them to network byte order */ | ||
2023 | |||
2024 | if (pgh) { | ||
2025 | struct timeval timestamp; | ||
2026 | |||
2027 | pgh->pgh_magic = htonl(PKTGEN_MAGIC); | ||
2028 | pgh->seq_num = htonl(pkt_dev->seq_num); | ||
2029 | |||
2030 | do_gettimeofday(×tamp); | ||
2031 | pgh->tv_sec = htonl(timestamp.tv_sec); | ||
2032 | pgh->tv_usec = htonl(timestamp.tv_usec); | ||
2033 | } | ||
2034 | pkt_dev->seq_num++; | ||
2035 | |||
2036 | return skb; | ||
2037 | } | ||
2038 | |||
2039 | /* | ||
2040 | * scan_ip6, fmt_ip taken from dietlibc-0.21 | ||
2041 | * Author Felix von Leitner <felix-dietlibc@fefe.de> | ||
2042 | * | ||
2043 | * Slightly modified for kernel. | ||
2044 | * Should be candidate for net/ipv4/utils.c | ||
2045 | * --ro | ||
2046 | */ | ||
2047 | |||
2048 | static unsigned int scan_ip6(const char *s,char ip[16]) | ||
2049 | { | ||
2050 | unsigned int i; | ||
2051 | unsigned int len=0; | ||
2052 | unsigned long u; | ||
2053 | char suffix[16]; | ||
2054 | unsigned int prefixlen=0; | ||
2055 | unsigned int suffixlen=0; | ||
2056 | __u32 tmp; | ||
2057 | |||
2058 | for (i=0; i<16; i++) ip[i]=0; | ||
2059 | |||
2060 | for (;;) { | ||
2061 | if (*s == ':') { | ||
2062 | len++; | ||
2063 | if (s[1] == ':') { /* Found "::", skip to part 2 */ | ||
2064 | s+=2; | ||
2065 | len++; | ||
2066 | break; | ||
2067 | } | ||
2068 | s++; | ||
2069 | } | ||
2070 | { | ||
2071 | char *tmp; | ||
2072 | u=simple_strtoul(s,&tmp,16); | ||
2073 | i=tmp-s; | ||
2074 | } | ||
2075 | |||
2076 | if (!i) return 0; | ||
2077 | if (prefixlen==12 && s[i]=='.') { | ||
2078 | |||
2079 | /* the last 4 bytes may be written as IPv4 address */ | ||
2080 | |||
2081 | tmp = in_aton(s); | ||
2082 | memcpy((struct in_addr*)(ip+12), &tmp, sizeof(tmp)); | ||
2083 | return i+len; | ||
2084 | } | ||
2085 | ip[prefixlen++] = (u >> 8); | ||
2086 | ip[prefixlen++] = (u & 255); | ||
2087 | s += i; len += i; | ||
2088 | if (prefixlen==16) | ||
2089 | return len; | ||
2090 | } | ||
2091 | |||
2092 | /* part 2, after "::" */ | ||
2093 | for (;;) { | ||
2094 | if (*s == ':') { | ||
2095 | if (suffixlen==0) | ||
2096 | break; | ||
2097 | s++; | ||
2098 | len++; | ||
2099 | } else if (suffixlen!=0) | ||
2100 | break; | ||
2101 | { | ||
2102 | char *tmp; | ||
2103 | u=simple_strtol(s,&tmp,16); | ||
2104 | i=tmp-s; | ||
2105 | } | ||
2106 | if (!i) { | ||
2107 | if (*s) len--; | ||
2108 | break; | ||
2109 | } | ||
2110 | if (suffixlen+prefixlen<=12 && s[i]=='.') { | ||
2111 | tmp = in_aton(s); | ||
2112 | memcpy((struct in_addr*)(suffix+suffixlen), &tmp, sizeof(tmp)); | ||
2113 | suffixlen+=4; | ||
2114 | len+=strlen(s); | ||
2115 | break; | ||
2116 | } | ||
2117 | suffix[suffixlen++] = (u >> 8); | ||
2118 | suffix[suffixlen++] = (u & 255); | ||
2119 | s += i; len += i; | ||
2120 | if (prefixlen+suffixlen==16) | ||
2121 | break; | ||
2122 | } | ||
2123 | for (i=0; i<suffixlen; i++) | ||
2124 | ip[16-suffixlen+i] = suffix[i]; | ||
2125 | return len; | ||
2126 | } | ||
2127 | |||
2128 | static char tohex(char hexdigit) { | ||
2129 | return hexdigit>9?hexdigit+'a'-10:hexdigit+'0'; | ||
2130 | } | ||
2131 | |||
2132 | static int fmt_xlong(char* s,unsigned int i) { | ||
2133 | char* bak=s; | ||
2134 | *s=tohex((i>>12)&0xf); if (s!=bak || *s!='0') ++s; | ||
2135 | *s=tohex((i>>8)&0xf); if (s!=bak || *s!='0') ++s; | ||
2136 | *s=tohex((i>>4)&0xf); if (s!=bak || *s!='0') ++s; | ||
2137 | *s=tohex(i&0xf); | ||
2138 | return s-bak+1; | ||
2139 | } | ||
2140 | |||
2141 | static unsigned int fmt_ip6(char *s,const char ip[16]) { | ||
2142 | unsigned int len; | ||
2143 | unsigned int i; | ||
2144 | unsigned int temp; | ||
2145 | unsigned int compressing; | ||
2146 | int j; | ||
2147 | |||
2148 | len = 0; compressing = 0; | ||
2149 | for (j=0; j<16; j+=2) { | ||
2150 | |||
2151 | #ifdef V4MAPPEDPREFIX | ||
2152 | if (j==12 && !memcmp(ip,V4mappedprefix,12)) { | ||
2153 | inet_ntoa_r(*(struct in_addr*)(ip+12),s); | ||
2154 | temp=strlen(s); | ||
2155 | return len+temp; | ||
2156 | } | ||
2157 | #endif | ||
2158 | temp = ((unsigned long) (unsigned char) ip[j] << 8) + | ||
2159 | (unsigned long) (unsigned char) ip[j+1]; | ||
2160 | if (temp == 0) { | ||
2161 | if (!compressing) { | ||
2162 | compressing=1; | ||
2163 | if (j==0) { | ||
2164 | *s++=':'; ++len; | ||
2165 | } | ||
2166 | } | ||
2167 | } else { | ||
2168 | if (compressing) { | ||
2169 | compressing=0; | ||
2170 | *s++=':'; ++len; | ||
2171 | } | ||
2172 | i = fmt_xlong(s,temp); len += i; s += i; | ||
2173 | if (j<14) { | ||
2174 | *s++ = ':'; | ||
2175 | ++len; | ||
2176 | } | ||
2177 | } | ||
2178 | } | ||
2179 | if (compressing) { | ||
2180 | *s++=':'; ++len; | ||
2181 | } | ||
2182 | *s=0; | ||
2183 | return len; | ||
2184 | } | ||
2185 | |||
2186 | static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | ||
2187 | struct pktgen_dev *pkt_dev) | ||
2188 | { | ||
2189 | struct sk_buff *skb = NULL; | ||
2190 | __u8 *eth; | ||
2191 | struct udphdr *udph; | ||
2192 | int datalen; | ||
2193 | struct ipv6hdr *iph; | ||
2194 | struct pktgen_hdr *pgh = NULL; | ||
2195 | |||
2196 | skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC); | ||
2197 | if (!skb) { | ||
2198 | sprintf(pkt_dev->result, "No memory"); | ||
2199 | return NULL; | ||
2200 | } | ||
2201 | |||
2202 | skb_reserve(skb, 16); | ||
2203 | |||
2204 | /* Reserve for ethernet and IP header */ | ||
2205 | eth = (__u8 *) skb_push(skb, 14); | ||
2206 | iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr)); | ||
2207 | udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); | ||
2208 | |||
2209 | |||
2210 | /* Update any of the values, used when we're incrementing various | ||
2211 | * fields. | ||
2212 | */ | ||
2213 | mod_cur_headers(pkt_dev); | ||
2214 | |||
2215 | |||
2216 | memcpy(eth, pkt_dev->hh, 12); | ||
2217 | *(u16*)ð[12] = __constant_htons(ETH_P_IPV6); | ||
2218 | |||
2219 | |||
2220 | datalen = pkt_dev->cur_pkt_size-14- | ||
2221 | sizeof(struct ipv6hdr)-sizeof(struct udphdr); /* Eth + IPh + UDPh */ | ||
2222 | |||
2223 | if (datalen < sizeof(struct pktgen_hdr)) { | ||
2224 | datalen = sizeof(struct pktgen_hdr); | ||
2225 | if (net_ratelimit()) | ||
2226 | printk(KERN_INFO "pktgen: increased datalen to %d\n", datalen); | ||
2227 | } | ||
2228 | |||
2229 | udph->source = htons(pkt_dev->cur_udp_src); | ||
2230 | udph->dest = htons(pkt_dev->cur_udp_dst); | ||
2231 | udph->len = htons(datalen + sizeof(struct udphdr)); | ||
2232 | udph->check = 0; /* No checksum */ | ||
2233 | |||
2234 | *(u32*)iph = __constant_htonl(0x60000000); /* Version + flow */ | ||
2235 | |||
2236 | iph->hop_limit = 32; | ||
2237 | |||
2238 | iph->payload_len = htons(sizeof(struct udphdr) + datalen); | ||
2239 | iph->nexthdr = IPPROTO_UDP; | ||
2240 | |||
2241 | ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); | ||
2242 | ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); | ||
2243 | |||
2244 | skb->mac.raw = ((u8 *)iph) - 14; | ||
2245 | skb->protocol = __constant_htons(ETH_P_IPV6); | ||
2246 | skb->dev = odev; | ||
2247 | skb->pkt_type = PACKET_HOST; | ||
2248 | |||
2249 | if (pkt_dev->nfrags <= 0) | ||
2250 | pgh = (struct pktgen_hdr *)skb_put(skb, datalen); | ||
2251 | else { | ||
2252 | int frags = pkt_dev->nfrags; | ||
2253 | int i; | ||
2254 | |||
2255 | pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8); | ||
2256 | |||
2257 | if (frags > MAX_SKB_FRAGS) | ||
2258 | frags = MAX_SKB_FRAGS; | ||
2259 | if (datalen > frags*PAGE_SIZE) { | ||
2260 | skb_put(skb, datalen-frags*PAGE_SIZE); | ||
2261 | datalen = frags*PAGE_SIZE; | ||
2262 | } | ||
2263 | |||
2264 | i = 0; | ||
2265 | while (datalen > 0) { | ||
2266 | struct page *page = alloc_pages(GFP_KERNEL, 0); | ||
2267 | skb_shinfo(skb)->frags[i].page = page; | ||
2268 | skb_shinfo(skb)->frags[i].page_offset = 0; | ||
2269 | skb_shinfo(skb)->frags[i].size = | ||
2270 | (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); | ||
2271 | datalen -= skb_shinfo(skb)->frags[i].size; | ||
2272 | skb->len += skb_shinfo(skb)->frags[i].size; | ||
2273 | skb->data_len += skb_shinfo(skb)->frags[i].size; | ||
2274 | i++; | ||
2275 | skb_shinfo(skb)->nr_frags = i; | ||
2276 | } | ||
2277 | |||
2278 | while (i < frags) { | ||
2279 | int rem; | ||
2280 | |||
2281 | if (i == 0) | ||
2282 | break; | ||
2283 | |||
2284 | rem = skb_shinfo(skb)->frags[i - 1].size / 2; | ||
2285 | if (rem == 0) | ||
2286 | break; | ||
2287 | |||
2288 | skb_shinfo(skb)->frags[i - 1].size -= rem; | ||
2289 | |||
2290 | skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1]; | ||
2291 | get_page(skb_shinfo(skb)->frags[i].page); | ||
2292 | skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page; | ||
2293 | skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size; | ||
2294 | skb_shinfo(skb)->frags[i].size = rem; | ||
2295 | i++; | ||
2296 | skb_shinfo(skb)->nr_frags = i; | ||
2297 | } | ||
2298 | } | ||
2299 | |||
2300 | /* Stamp the time, and sequence number, convert them to network byte order */ | ||
2301 | /* should we update cloned packets too ? */ | ||
2302 | if (pgh) { | ||
2303 | struct timeval timestamp; | ||
2304 | |||
2305 | pgh->pgh_magic = htonl(PKTGEN_MAGIC); | ||
2306 | pgh->seq_num = htonl(pkt_dev->seq_num); | ||
2307 | |||
2308 | do_gettimeofday(×tamp); | ||
2309 | pgh->tv_sec = htonl(timestamp.tv_sec); | ||
2310 | pgh->tv_usec = htonl(timestamp.tv_usec); | ||
2311 | } | ||
2312 | pkt_dev->seq_num++; | ||
2313 | |||
2314 | return skb; | ||
2315 | } | ||
2316 | |||
2317 | static inline struct sk_buff *fill_packet(struct net_device *odev, | ||
2318 | struct pktgen_dev *pkt_dev) | ||
2319 | { | ||
2320 | if(pkt_dev->flags & F_IPV6) | ||
2321 | return fill_packet_ipv6(odev, pkt_dev); | ||
2322 | else | ||
2323 | return fill_packet_ipv4(odev, pkt_dev); | ||
2324 | } | ||
2325 | |||
2326 | static void pktgen_clear_counters(struct pktgen_dev *pkt_dev) | ||
2327 | { | ||
2328 | pkt_dev->seq_num = 1; | ||
2329 | pkt_dev->idle_acc = 0; | ||
2330 | pkt_dev->sofar = 0; | ||
2331 | pkt_dev->tx_bytes = 0; | ||
2332 | pkt_dev->errors = 0; | ||
2333 | } | ||
2334 | |||
2335 | /* Set up structure for sending pkts, clear counters */ | ||
2336 | |||
2337 | static void pktgen_run(struct pktgen_thread *t) | ||
2338 | { | ||
2339 | struct pktgen_dev *pkt_dev = NULL; | ||
2340 | int started = 0; | ||
2341 | |||
2342 | PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t)); | ||
2343 | |||
2344 | if_lock(t); | ||
2345 | for (pkt_dev = t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { | ||
2346 | |||
2347 | /* | ||
2348 | * setup odev and create initial packet. | ||
2349 | */ | ||
2350 | pktgen_setup_inject(pkt_dev); | ||
2351 | |||
2352 | if(pkt_dev->odev) { | ||
2353 | pktgen_clear_counters(pkt_dev); | ||
2354 | pkt_dev->running = 1; /* Cranke yeself! */ | ||
2355 | pkt_dev->skb = NULL; | ||
2356 | pkt_dev->started_at = getCurUs(); | ||
2357 | pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ | ||
2358 | pkt_dev->next_tx_ns = 0; | ||
2359 | |||
2360 | strcpy(pkt_dev->result, "Starting"); | ||
2361 | started++; | ||
2362 | } | ||
2363 | else | ||
2364 | strcpy(pkt_dev->result, "Error starting"); | ||
2365 | } | ||
2366 | if_unlock(t); | ||
2367 | if(started) t->control &= ~(T_STOP); | ||
2368 | } | ||
2369 | |||
2370 | static void pktgen_stop_all_threads_ifs(void) | ||
2371 | { | ||
2372 | struct pktgen_thread *t = pktgen_threads; | ||
2373 | |||
2374 | PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads.\n")); | ||
2375 | |||
2376 | thread_lock(); | ||
2377 | while(t) { | ||
2378 | pktgen_stop(t); | ||
2379 | t = t->next; | ||
2380 | } | ||
2381 | thread_unlock(); | ||
2382 | } | ||
2383 | |||
2384 | static int thread_is_running(struct pktgen_thread *t ) | ||
2385 | { | ||
2386 | struct pktgen_dev *next; | ||
2387 | int res = 0; | ||
2388 | |||
2389 | for(next=t->if_list; next; next=next->next) { | ||
2390 | if(next->running) { | ||
2391 | res = 1; | ||
2392 | break; | ||
2393 | } | ||
2394 | } | ||
2395 | return res; | ||
2396 | } | ||
2397 | |||
2398 | static int pktgen_wait_thread_run(struct pktgen_thread *t ) | ||
2399 | { | ||
2400 | if_lock(t); | ||
2401 | |||
2402 | while(thread_is_running(t)) { | ||
2403 | |||
2404 | if_unlock(t); | ||
2405 | |||
2406 | msleep_interruptible(100); | ||
2407 | |||
2408 | if (signal_pending(current)) | ||
2409 | goto signal; | ||
2410 | if_lock(t); | ||
2411 | } | ||
2412 | if_unlock(t); | ||
2413 | return 1; | ||
2414 | signal: | ||
2415 | return 0; | ||
2416 | } | ||
2417 | |||
2418 | static int pktgen_wait_all_threads_run(void) | ||
2419 | { | ||
2420 | struct pktgen_thread *t = pktgen_threads; | ||
2421 | int sig = 1; | ||
2422 | |||
2423 | while (t) { | ||
2424 | sig = pktgen_wait_thread_run(t); | ||
2425 | if( sig == 0 ) break; | ||
2426 | thread_lock(); | ||
2427 | t=t->next; | ||
2428 | thread_unlock(); | ||
2429 | } | ||
2430 | if(sig == 0) { | ||
2431 | thread_lock(); | ||
2432 | while (t) { | ||
2433 | t->control |= (T_STOP); | ||
2434 | t=t->next; | ||
2435 | } | ||
2436 | thread_unlock(); | ||
2437 | } | ||
2438 | return sig; | ||
2439 | } | ||
2440 | |||
2441 | static void pktgen_run_all_threads(void) | ||
2442 | { | ||
2443 | struct pktgen_thread *t = pktgen_threads; | ||
2444 | |||
2445 | PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n")); | ||
2446 | |||
2447 | thread_lock(); | ||
2448 | |||
2449 | while(t) { | ||
2450 | t->control |= (T_RUN); | ||
2451 | t = t->next; | ||
2452 | } | ||
2453 | thread_unlock(); | ||
2454 | |||
2455 | current->state = TASK_INTERRUPTIBLE; | ||
2456 | schedule_timeout(HZ/8); /* Propagate thread->control */ | ||
2457 | |||
2458 | pktgen_wait_all_threads_run(); | ||
2459 | } | ||
2460 | |||
2461 | |||
2462 | static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) | ||
2463 | { | ||
2464 | __u64 total_us, bps, mbps, pps, idle; | ||
2465 | char *p = pkt_dev->result; | ||
2466 | |||
2467 | total_us = pkt_dev->stopped_at - pkt_dev->started_at; | ||
2468 | |||
2469 | idle = pkt_dev->idle_acc; | ||
2470 | |||
2471 | p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", | ||
2472 | (unsigned long long) total_us, | ||
2473 | (unsigned long long)(total_us - idle), | ||
2474 | (unsigned long long) idle, | ||
2475 | (unsigned long long) pkt_dev->sofar, | ||
2476 | pkt_dev->cur_pkt_size, nr_frags); | ||
2477 | |||
2478 | pps = pkt_dev->sofar * USEC_PER_SEC; | ||
2479 | |||
2480 | while ((total_us >> 32) != 0) { | ||
2481 | pps >>= 1; | ||
2482 | total_us >>= 1; | ||
2483 | } | ||
2484 | |||
2485 | do_div(pps, total_us); | ||
2486 | |||
2487 | bps = pps * 8 * pkt_dev->cur_pkt_size; | ||
2488 | |||
2489 | mbps = bps; | ||
2490 | do_div(mbps, 1000000); | ||
2491 | p += sprintf(p, " %llupps %lluMb/sec (%llubps) errors: %llu", | ||
2492 | (unsigned long long) pps, | ||
2493 | (unsigned long long) mbps, | ||
2494 | (unsigned long long) bps, | ||
2495 | (unsigned long long) pkt_dev->errors); | ||
2496 | } | ||
2497 | |||
2498 | |||
2499 | /* Set stopped-at timer, remove from running list, do counters & statistics */ | ||
2500 | |||
2501 | static int pktgen_stop_device(struct pktgen_dev *pkt_dev) | ||
2502 | { | ||
2503 | |||
2504 | if (!pkt_dev->running) { | ||
2505 | printk("pktgen: interface: %s is already stopped\n", pkt_dev->ifname); | ||
2506 | return -EINVAL; | ||
2507 | } | ||
2508 | |||
2509 | pkt_dev->stopped_at = getCurUs(); | ||
2510 | pkt_dev->running = 0; | ||
2511 | |||
2512 | show_results(pkt_dev, skb_shinfo(pkt_dev->skb)->nr_frags); | ||
2513 | |||
2514 | if (pkt_dev->skb) | ||
2515 | kfree_skb(pkt_dev->skb); | ||
2516 | |||
2517 | pkt_dev->skb = NULL; | ||
2518 | |||
2519 | return 0; | ||
2520 | } | ||
2521 | |||
2522 | static struct pktgen_dev *next_to_run(struct pktgen_thread *t ) | ||
2523 | { | ||
2524 | struct pktgen_dev *next, *best = NULL; | ||
2525 | |||
2526 | if_lock(t); | ||
2527 | |||
2528 | for(next=t->if_list; next ; next=next->next) { | ||
2529 | if(!next->running) continue; | ||
2530 | if(best == NULL) best=next; | ||
2531 | else if ( next->next_tx_us < best->next_tx_us) | ||
2532 | best = next; | ||
2533 | } | ||
2534 | if_unlock(t); | ||
2535 | return best; | ||
2536 | } | ||
2537 | |||
2538 | static void pktgen_stop(struct pktgen_thread *t) { | ||
2539 | struct pktgen_dev *next = NULL; | ||
2540 | |||
2541 | PG_DEBUG(printk("pktgen: entering pktgen_stop.\n")); | ||
2542 | |||
2543 | if_lock(t); | ||
2544 | |||
2545 | for(next=t->if_list; next; next=next->next) | ||
2546 | pktgen_stop_device(next); | ||
2547 | |||
2548 | if_unlock(t); | ||
2549 | } | ||
2550 | |||
2551 | static void pktgen_rem_all_ifs(struct pktgen_thread *t) | ||
2552 | { | ||
2553 | struct pktgen_dev *cur, *next = NULL; | ||
2554 | |||
2555 | /* Remove all devices, free mem */ | ||
2556 | |||
2557 | if_lock(t); | ||
2558 | |||
2559 | for(cur=t->if_list; cur; cur=next) { | ||
2560 | next = cur->next; | ||
2561 | pktgen_remove_device(t, cur); | ||
2562 | } | ||
2563 | |||
2564 | if_unlock(t); | ||
2565 | } | ||
2566 | |||
2567 | static void pktgen_rem_thread(struct pktgen_thread *t) | ||
2568 | { | ||
2569 | /* Remove from the thread list */ | ||
2570 | |||
2571 | struct pktgen_thread *tmp = pktgen_threads; | ||
2572 | |||
2573 | if (strlen(t->fname)) | ||
2574 | remove_proc_entry(t->fname, NULL); | ||
2575 | |||
2576 | thread_lock(); | ||
2577 | |||
2578 | if (tmp == t) | ||
2579 | pktgen_threads = tmp->next; | ||
2580 | else { | ||
2581 | while (tmp) { | ||
2582 | if (tmp->next == t) { | ||
2583 | tmp->next = t->next; | ||
2584 | t->next = NULL; | ||
2585 | break; | ||
2586 | } | ||
2587 | tmp = tmp->next; | ||
2588 | } | ||
2589 | } | ||
2590 | thread_unlock(); | ||
2591 | } | ||
2592 | |||
2593 | static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) | ||
2594 | { | ||
2595 | struct net_device *odev = NULL; | ||
2596 | __u64 idle_start = 0; | ||
2597 | int ret; | ||
2598 | |||
2599 | odev = pkt_dev->odev; | ||
2600 | |||
2601 | if (pkt_dev->delay_us || pkt_dev->delay_ns) { | ||
2602 | u64 now; | ||
2603 | |||
2604 | now = getCurUs(); | ||
2605 | if (now < pkt_dev->next_tx_us) | ||
2606 | spin(pkt_dev, pkt_dev->next_tx_us); | ||
2607 | |||
2608 | /* This is max DELAY, this has special meaning of | ||
2609 | * "never transmit" | ||
2610 | */ | ||
2611 | if (pkt_dev->delay_us == 0x7FFFFFFF) { | ||
2612 | pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us; | ||
2613 | pkt_dev->next_tx_ns = pkt_dev->delay_ns; | ||
2614 | goto out; | ||
2615 | } | ||
2616 | } | ||
2617 | |||
2618 | if (netif_queue_stopped(odev) || need_resched()) { | ||
2619 | idle_start = getCurUs(); | ||
2620 | |||
2621 | if (!netif_running(odev)) { | ||
2622 | pktgen_stop_device(pkt_dev); | ||
2623 | goto out; | ||
2624 | } | ||
2625 | if (need_resched()) | ||
2626 | schedule(); | ||
2627 | |||
2628 | pkt_dev->idle_acc += getCurUs() - idle_start; | ||
2629 | |||
2630 | if (netif_queue_stopped(odev)) { | ||
2631 | pkt_dev->next_tx_us = getCurUs(); /* TODO */ | ||
2632 | pkt_dev->next_tx_ns = 0; | ||
2633 | goto out; /* Try the next interface */ | ||
2634 | } | ||
2635 | } | ||
2636 | |||
2637 | if (pkt_dev->last_ok || !pkt_dev->skb) { | ||
2638 | if ((++pkt_dev->clone_count >= pkt_dev->clone_skb ) || (!pkt_dev->skb)) { | ||
2639 | /* build a new pkt */ | ||
2640 | if (pkt_dev->skb) | ||
2641 | kfree_skb(pkt_dev->skb); | ||
2642 | |||
2643 | pkt_dev->skb = fill_packet(odev, pkt_dev); | ||
2644 | if (pkt_dev->skb == NULL) { | ||
2645 | printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n"); | ||
2646 | schedule(); | ||
2647 | pkt_dev->clone_count--; /* back out increment, OOM */ | ||
2648 | goto out; | ||
2649 | } | ||
2650 | pkt_dev->allocated_skbs++; | ||
2651 | pkt_dev->clone_count = 0; /* reset counter */ | ||
2652 | } | ||
2653 | } | ||
2654 | |||
2655 | spin_lock_bh(&odev->xmit_lock); | ||
2656 | if (!netif_queue_stopped(odev)) { | ||
2657 | |||
2658 | atomic_inc(&(pkt_dev->skb->users)); | ||
2659 | retry_now: | ||
2660 | ret = odev->hard_start_xmit(pkt_dev->skb, odev); | ||
2661 | if (likely(ret == NETDEV_TX_OK)) { | ||
2662 | pkt_dev->last_ok = 1; | ||
2663 | pkt_dev->sofar++; | ||
2664 | pkt_dev->seq_num++; | ||
2665 | pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; | ||
2666 | |||
2667 | } else if (ret == NETDEV_TX_LOCKED | ||
2668 | && (odev->features & NETIF_F_LLTX)) { | ||
2669 | cpu_relax(); | ||
2670 | goto retry_now; | ||
2671 | } else { /* Retry it next time */ | ||
2672 | |||
2673 | atomic_dec(&(pkt_dev->skb->users)); | ||
2674 | |||
2675 | if (debug && net_ratelimit()) | ||
2676 | printk(KERN_INFO "pktgen: Hard xmit error\n"); | ||
2677 | |||
2678 | pkt_dev->errors++; | ||
2679 | pkt_dev->last_ok = 0; | ||
2680 | } | ||
2681 | |||
2682 | pkt_dev->next_tx_us = getCurUs(); | ||
2683 | pkt_dev->next_tx_ns = 0; | ||
2684 | |||
2685 | pkt_dev->next_tx_us += pkt_dev->delay_us; | ||
2686 | pkt_dev->next_tx_ns += pkt_dev->delay_ns; | ||
2687 | |||
2688 | if (pkt_dev->next_tx_ns > 1000) { | ||
2689 | pkt_dev->next_tx_us++; | ||
2690 | pkt_dev->next_tx_ns -= 1000; | ||
2691 | } | ||
2692 | } | ||
2693 | |||
2694 | else { /* Retry it next time */ | ||
2695 | pkt_dev->last_ok = 0; | ||
2696 | pkt_dev->next_tx_us = getCurUs(); /* TODO */ | ||
2697 | pkt_dev->next_tx_ns = 0; | ||
2698 | } | ||
2699 | |||
2700 | spin_unlock_bh(&odev->xmit_lock); | ||
2701 | |||
2702 | /* If pkt_dev->count is zero, then run forever */ | ||
2703 | if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { | ||
2704 | if (atomic_read(&(pkt_dev->skb->users)) != 1) { | ||
2705 | idle_start = getCurUs(); | ||
2706 | while (atomic_read(&(pkt_dev->skb->users)) != 1) { | ||
2707 | if (signal_pending(current)) { | ||
2708 | break; | ||
2709 | } | ||
2710 | schedule(); | ||
2711 | } | ||
2712 | pkt_dev->idle_acc += getCurUs() - idle_start; | ||
2713 | } | ||
2714 | |||
2715 | /* Done with this */ | ||
2716 | pktgen_stop_device(pkt_dev); | ||
2717 | } | ||
2718 | out:; | ||
2719 | } | ||
2720 | |||
2721 | /* | ||
2722 | * Main loop of the thread goes here | ||
2723 | */ | ||
2724 | |||
2725 | static void pktgen_thread_worker(struct pktgen_thread *t) | ||
2726 | { | ||
2727 | DEFINE_WAIT(wait); | ||
2728 | struct pktgen_dev *pkt_dev = NULL; | ||
2729 | int cpu = t->cpu; | ||
2730 | sigset_t tmpsig; | ||
2731 | u32 max_before_softirq; | ||
2732 | u32 tx_since_softirq = 0; | ||
2733 | |||
2734 | daemonize("pktgen/%d", cpu); | ||
2735 | |||
2736 | /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */ | ||
2737 | |||
2738 | spin_lock_irq(¤t->sighand->siglock); | ||
2739 | tmpsig = current->blocked; | ||
2740 | siginitsetinv(¤t->blocked, | ||
2741 | sigmask(SIGKILL) | | ||
2742 | sigmask(SIGSTOP)| | ||
2743 | sigmask(SIGTERM)); | ||
2744 | |||
2745 | recalc_sigpending(); | ||
2746 | spin_unlock_irq(¤t->sighand->siglock); | ||
2747 | |||
2748 | /* Migrate to the right CPU */ | ||
2749 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | ||
2750 | if (smp_processor_id() != cpu) | ||
2751 | BUG(); | ||
2752 | |||
2753 | init_waitqueue_head(&t->queue); | ||
2754 | |||
2755 | t->control &= ~(T_TERMINATE); | ||
2756 | t->control &= ~(T_RUN); | ||
2757 | t->control &= ~(T_STOP); | ||
2758 | t->control &= ~(T_REMDEV); | ||
2759 | |||
2760 | t->pid = current->pid; | ||
2761 | |||
2762 | PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid)); | ||
2763 | |||
2764 | max_before_softirq = t->max_before_softirq; | ||
2765 | |||
2766 | __set_current_state(TASK_INTERRUPTIBLE); | ||
2767 | mb(); | ||
2768 | |||
2769 | while (1) { | ||
2770 | |||
2771 | __set_current_state(TASK_RUNNING); | ||
2772 | |||
2773 | /* | ||
2774 | * Get next dev to xmit -- if any. | ||
2775 | */ | ||
2776 | |||
2777 | pkt_dev = next_to_run(t); | ||
2778 | |||
2779 | if (pkt_dev) { | ||
2780 | |||
2781 | pktgen_xmit(pkt_dev); | ||
2782 | |||
2783 | /* | ||
2784 | * We like to stay RUNNING but must also give | ||
2785 | * others fair share. | ||
2786 | */ | ||
2787 | |||
2788 | tx_since_softirq += pkt_dev->last_ok; | ||
2789 | |||
2790 | if (tx_since_softirq > max_before_softirq) { | ||
2791 | if (local_softirq_pending()) | ||
2792 | do_softirq(); | ||
2793 | tx_since_softirq = 0; | ||
2794 | } | ||
2795 | } else { | ||
2796 | prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE); | ||
2797 | schedule_timeout(HZ/10); | ||
2798 | finish_wait(&(t->queue), &wait); | ||
2799 | } | ||
2800 | |||
2801 | /* | ||
2802 | * Back from sleep, either due to the timeout or signal. | ||
2803 | * We check if we have any "posted" work for us. | ||
2804 | */ | ||
2805 | |||
2806 | if (t->control & T_TERMINATE || signal_pending(current)) | ||
2807 | /* we received a request to terminate ourself */ | ||
2808 | break; | ||
2809 | |||
2810 | |||
2811 | if(t->control & T_STOP) { | ||
2812 | pktgen_stop(t); | ||
2813 | t->control &= ~(T_STOP); | ||
2814 | } | ||
2815 | |||
2816 | if(t->control & T_RUN) { | ||
2817 | pktgen_run(t); | ||
2818 | t->control &= ~(T_RUN); | ||
2819 | } | ||
2820 | |||
2821 | if(t->control & T_REMDEV) { | ||
2822 | pktgen_rem_all_ifs(t); | ||
2823 | t->control &= ~(T_REMDEV); | ||
2824 | } | ||
2825 | |||
2826 | if (need_resched()) | ||
2827 | schedule(); | ||
2828 | } | ||
2829 | |||
2830 | PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name)); | ||
2831 | pktgen_stop(t); | ||
2832 | |||
2833 | PG_DEBUG(printk("pktgen: %s removing all device\n", t->name)); | ||
2834 | pktgen_rem_all_ifs(t); | ||
2835 | |||
2836 | PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name)); | ||
2837 | pktgen_rem_thread(t); | ||
2838 | } | ||
2839 | |||
2840 | static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* ifname) | ||
2841 | { | ||
2842 | struct pktgen_dev *pkt_dev = NULL; | ||
2843 | if_lock(t); | ||
2844 | |||
2845 | for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { | ||
2846 | if (strcmp(pkt_dev->ifname, ifname) == 0) { | ||
2847 | break; | ||
2848 | } | ||
2849 | } | ||
2850 | |||
2851 | if_unlock(t); | ||
2852 | PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname,pkt_dev)); | ||
2853 | return pkt_dev; | ||
2854 | } | ||
2855 | |||
2856 | /* | ||
2857 | * Adds a dev at front of if_list. | ||
2858 | */ | ||
2859 | |||
2860 | static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) | ||
2861 | { | ||
2862 | int rv = 0; | ||
2863 | |||
2864 | if_lock(t); | ||
2865 | |||
2866 | if (pkt_dev->pg_thread) { | ||
2867 | printk("pktgen: ERROR: already assigned to a thread.\n"); | ||
2868 | rv = -EBUSY; | ||
2869 | goto out; | ||
2870 | } | ||
2871 | pkt_dev->next =t->if_list; t->if_list=pkt_dev; | ||
2872 | pkt_dev->pg_thread = t; | ||
2873 | pkt_dev->running = 0; | ||
2874 | |||
2875 | out: | ||
2876 | if_unlock(t); | ||
2877 | return rv; | ||
2878 | } | ||
2879 | |||
2880 | /* Called under thread lock */ | ||
2881 | |||
2882 | static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) | ||
2883 | { | ||
2884 | struct pktgen_dev *pkt_dev; | ||
2885 | |||
2886 | /* We don't allow a device to be on several threads */ | ||
2887 | |||
2888 | if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) { | ||
2889 | |||
2890 | pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL); | ||
2891 | if (!pkt_dev) | ||
2892 | return -ENOMEM; | ||
2893 | |||
2894 | memset(pkt_dev, 0, sizeof(struct pktgen_dev)); | ||
2895 | |||
2896 | pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); | ||
2897 | if (pkt_dev->flows == NULL) { | ||
2898 | kfree(pkt_dev); | ||
2899 | return -ENOMEM; | ||
2900 | } | ||
2901 | memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); | ||
2902 | |||
2903 | pkt_dev->min_pkt_size = ETH_ZLEN; | ||
2904 | pkt_dev->max_pkt_size = ETH_ZLEN; | ||
2905 | pkt_dev->nfrags = 0; | ||
2906 | pkt_dev->clone_skb = pg_clone_skb_d; | ||
2907 | pkt_dev->delay_us = pg_delay_d / 1000; | ||
2908 | pkt_dev->delay_ns = pg_delay_d % 1000; | ||
2909 | pkt_dev->count = pg_count_d; | ||
2910 | pkt_dev->sofar = 0; | ||
2911 | pkt_dev->udp_src_min = 9; /* sink port */ | ||
2912 | pkt_dev->udp_src_max = 9; | ||
2913 | pkt_dev->udp_dst_min = 9; | ||
2914 | pkt_dev->udp_dst_max = 9; | ||
2915 | |||
2916 | strncpy(pkt_dev->ifname, ifname, 31); | ||
2917 | sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname); | ||
2918 | |||
2919 | if (! pktgen_setup_dev(pkt_dev)) { | ||
2920 | printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); | ||
2921 | if (pkt_dev->flows) | ||
2922 | vfree(pkt_dev->flows); | ||
2923 | kfree(pkt_dev); | ||
2924 | return -ENODEV; | ||
2925 | } | ||
2926 | |||
2927 | pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL); | ||
2928 | if (!pkt_dev->proc_ent) { | ||
2929 | printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname); | ||
2930 | if (pkt_dev->flows) | ||
2931 | vfree(pkt_dev->flows); | ||
2932 | kfree(pkt_dev); | ||
2933 | return -EINVAL; | ||
2934 | } | ||
2935 | pkt_dev->proc_ent->read_proc = proc_if_read; | ||
2936 | pkt_dev->proc_ent->write_proc = proc_if_write; | ||
2937 | pkt_dev->proc_ent->data = (void*)(pkt_dev); | ||
2938 | pkt_dev->proc_ent->owner = THIS_MODULE; | ||
2939 | |||
2940 | return add_dev_to_thread(t, pkt_dev); | ||
2941 | } | ||
2942 | else { | ||
2943 | printk("pktgen: ERROR: interface already used.\n"); | ||
2944 | return -EBUSY; | ||
2945 | } | ||
2946 | } | ||
2947 | |||
2948 | static struct pktgen_thread *pktgen_find_thread(const char* name) | ||
2949 | { | ||
2950 | struct pktgen_thread *t = NULL; | ||
2951 | |||
2952 | thread_lock(); | ||
2953 | |||
2954 | t = pktgen_threads; | ||
2955 | while (t) { | ||
2956 | if (strcmp(t->name, name) == 0) | ||
2957 | break; | ||
2958 | |||
2959 | t = t->next; | ||
2960 | } | ||
2961 | thread_unlock(); | ||
2962 | return t; | ||
2963 | } | ||
2964 | |||
2965 | static int pktgen_create_thread(const char* name, int cpu) | ||
2966 | { | ||
2967 | struct pktgen_thread *t = NULL; | ||
2968 | |||
2969 | if (strlen(name) > 31) { | ||
2970 | printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); | ||
2971 | return -EINVAL; | ||
2972 | } | ||
2973 | |||
2974 | if (pktgen_find_thread(name)) { | ||
2975 | printk("pktgen: ERROR: thread: %s already exists\n", name); | ||
2976 | return -EINVAL; | ||
2977 | } | ||
2978 | |||
2979 | t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL)); | ||
2980 | if (!t) { | ||
2981 | printk("pktgen: ERROR: out of memory, can't create new thread.\n"); | ||
2982 | return -ENOMEM; | ||
2983 | } | ||
2984 | |||
2985 | memset(t, 0, sizeof(struct pktgen_thread)); | ||
2986 | strcpy(t->name, name); | ||
2987 | spin_lock_init(&t->if_lock); | ||
2988 | t->cpu = cpu; | ||
2989 | |||
2990 | sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name); | ||
2991 | t->proc_ent = create_proc_entry(t->fname, 0600, NULL); | ||
2992 | if (!t->proc_ent) { | ||
2993 | printk("pktgen: cannot create %s procfs entry.\n", t->fname); | ||
2994 | kfree(t); | ||
2995 | return -EINVAL; | ||
2996 | } | ||
2997 | t->proc_ent->read_proc = proc_thread_read; | ||
2998 | t->proc_ent->write_proc = proc_thread_write; | ||
2999 | t->proc_ent->data = (void*)(t); | ||
3000 | t->proc_ent->owner = THIS_MODULE; | ||
3001 | |||
3002 | t->next = pktgen_threads; | ||
3003 | pktgen_threads = t; | ||
3004 | |||
3005 | if (kernel_thread((void *) pktgen_thread_worker, (void *) t, | ||
3006 | CLONE_FS | CLONE_FILES | CLONE_SIGHAND) < 0) | ||
3007 | printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); | ||
3008 | |||
3009 | return 0; | ||
3010 | } | ||
3011 | |||
3012 | /* | ||
3013 | * Removes a device from the thread if_list. | ||
3014 | */ | ||
3015 | static void _rem_dev_from_if_list(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) | ||
3016 | { | ||
3017 | struct pktgen_dev *i, *prev = NULL; | ||
3018 | |||
3019 | i = t->if_list; | ||
3020 | |||
3021 | while(i) { | ||
3022 | if(i == pkt_dev) { | ||
3023 | if(prev) prev->next = i->next; | ||
3024 | else t->if_list = NULL; | ||
3025 | break; | ||
3026 | } | ||
3027 | prev = i; | ||
3028 | i=i->next; | ||
3029 | } | ||
3030 | } | ||
3031 | |||
3032 | static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) | ||
3033 | { | ||
3034 | |||
3035 | PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev)); | ||
3036 | |||
3037 | if (pkt_dev->running) { | ||
3038 | printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n"); | ||
3039 | pktgen_stop_device(pkt_dev); | ||
3040 | } | ||
3041 | |||
3042 | /* Dis-associate from the interface */ | ||
3043 | |||
3044 | if (pkt_dev->odev) { | ||
3045 | dev_put(pkt_dev->odev); | ||
3046 | pkt_dev->odev = NULL; | ||
3047 | } | ||
3048 | |||
3049 | /* And update the thread if_list */ | ||
3050 | |||
3051 | _rem_dev_from_if_list(t, pkt_dev); | ||
3052 | |||
3053 | /* Clean up proc file system */ | ||
3054 | |||
3055 | if (strlen(pkt_dev->fname)) | ||
3056 | remove_proc_entry(pkt_dev->fname, NULL); | ||
3057 | |||
3058 | if (pkt_dev->flows) | ||
3059 | vfree(pkt_dev->flows); | ||
3060 | kfree(pkt_dev); | ||
3061 | return 0; | ||
3062 | } | ||
3063 | |||
3064 | static int __init pg_init(void) | ||
3065 | { | ||
3066 | int cpu; | ||
3067 | printk(version); | ||
3068 | |||
3069 | module_fname[0] = 0; | ||
3070 | |||
3071 | create_proc_dir(); | ||
3072 | |||
3073 | sprintf(module_fname, "net/%s/pgctrl", PG_PROC_DIR); | ||
3074 | module_proc_ent = create_proc_entry(module_fname, 0600, NULL); | ||
3075 | if (!module_proc_ent) { | ||
3076 | printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname); | ||
3077 | return -EINVAL; | ||
3078 | } | ||
3079 | |||
3080 | module_proc_ent->proc_fops = &pktgen_fops; | ||
3081 | module_proc_ent->data = NULL; | ||
3082 | |||
3083 | /* Register us to receive netdevice events */ | ||
3084 | register_netdevice_notifier(&pktgen_notifier_block); | ||
3085 | |||
3086 | for (cpu = 0; cpu < NR_CPUS ; cpu++) { | ||
3087 | char buf[30]; | ||
3088 | |||
3089 | if (!cpu_online(cpu)) | ||
3090 | continue; | ||
3091 | |||
3092 | sprintf(buf, "kpktgend_%i", cpu); | ||
3093 | pktgen_create_thread(buf, cpu); | ||
3094 | } | ||
3095 | return 0; | ||
3096 | } | ||
3097 | |||
3098 | static void __exit pg_cleanup(void) | ||
3099 | { | ||
3100 | wait_queue_head_t queue; | ||
3101 | init_waitqueue_head(&queue); | ||
3102 | |||
3103 | /* Stop all interfaces & threads */ | ||
3104 | |||
3105 | while (pktgen_threads) { | ||
3106 | struct pktgen_thread *t = pktgen_threads; | ||
3107 | pktgen_threads->control |= (T_TERMINATE); | ||
3108 | |||
3109 | wait_event_interruptible_timeout(queue, (t != pktgen_threads), HZ); | ||
3110 | } | ||
3111 | |||
3112 | /* Un-register us from receiving netdevice events */ | ||
3113 | unregister_netdevice_notifier(&pktgen_notifier_block); | ||
3114 | |||
3115 | /* Clean up proc file system */ | ||
3116 | |||
3117 | remove_proc_entry(module_fname, NULL); | ||
3118 | |||
3119 | remove_proc_dir(); | ||
3120 | } | ||
3121 | |||
3122 | |||
3123 | module_init(pg_init); | ||
3124 | module_exit(pg_cleanup); | ||
3125 | |||
3126 | MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se"); | ||
3127 | MODULE_DESCRIPTION("Packet Generator tool"); | ||
3128 | MODULE_LICENSE("GPL"); | ||
3129 | module_param(pg_count_d, int, 0); | ||
3130 | module_param(pg_delay_d, int, 0); | ||
3131 | module_param(pg_clone_skb_d, int, 0); | ||
3132 | module_param(debug, int, 0); | ||
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c new file mode 100644 index 000000000000..d69ad90e5811 --- /dev/null +++ b/net/core/rtnetlink.c | |||
@@ -0,0 +1,711 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Routing netlink socket interface: protocol independent part. | ||
7 | * | ||
8 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * Fixes: | ||
16 | * Vitaly E. Lavrov RTA_OK arithmetics was wrong. | ||
17 | */ | ||
18 | |||
19 | #include <linux/config.h> | ||
20 | #include <linux/errno.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/major.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/timer.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/sockios.h> | ||
30 | #include <linux/net.h> | ||
31 | #include <linux/fcntl.h> | ||
32 | #include <linux/mm.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/interrupt.h> | ||
35 | #include <linux/capability.h> | ||
36 | #include <linux/skbuff.h> | ||
37 | #include <linux/init.h> | ||
38 | #include <linux/security.h> | ||
39 | |||
40 | #include <asm/uaccess.h> | ||
41 | #include <asm/system.h> | ||
42 | #include <asm/string.h> | ||
43 | |||
44 | #include <linux/inet.h> | ||
45 | #include <linux/netdevice.h> | ||
46 | #include <net/ip.h> | ||
47 | #include <net/protocol.h> | ||
48 | #include <net/arp.h> | ||
49 | #include <net/route.h> | ||
50 | #include <net/udp.h> | ||
51 | #include <net/sock.h> | ||
52 | #include <net/pkt_sched.h> | ||
53 | |||
54 | DECLARE_MUTEX(rtnl_sem); | ||
55 | |||
56 | void rtnl_lock(void) | ||
57 | { | ||
58 | rtnl_shlock(); | ||
59 | } | ||
60 | |||
61 | int rtnl_lock_interruptible(void) | ||
62 | { | ||
63 | return down_interruptible(&rtnl_sem); | ||
64 | } | ||
65 | |||
66 | void rtnl_unlock(void) | ||
67 | { | ||
68 | rtnl_shunlock(); | ||
69 | |||
70 | netdev_run_todo(); | ||
71 | } | ||
72 | |||
73 | int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) | ||
74 | { | ||
75 | memset(tb, 0, sizeof(struct rtattr*)*maxattr); | ||
76 | |||
77 | while (RTA_OK(rta, len)) { | ||
78 | unsigned flavor = rta->rta_type; | ||
79 | if (flavor && flavor <= maxattr) | ||
80 | tb[flavor-1] = rta; | ||
81 | rta = RTA_NEXT(rta, len); | ||
82 | } | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | struct sock *rtnl; | ||
87 | |||
88 | struct rtnetlink_link * rtnetlink_links[NPROTO]; | ||
89 | |||
90 | static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] = | ||
91 | { | ||
92 | NLMSG_LENGTH(sizeof(struct ifinfomsg)), | ||
93 | NLMSG_LENGTH(sizeof(struct ifaddrmsg)), | ||
94 | NLMSG_LENGTH(sizeof(struct rtmsg)), | ||
95 | NLMSG_LENGTH(sizeof(struct ndmsg)), | ||
96 | NLMSG_LENGTH(sizeof(struct rtmsg)), | ||
97 | NLMSG_LENGTH(sizeof(struct tcmsg)), | ||
98 | NLMSG_LENGTH(sizeof(struct tcmsg)), | ||
99 | NLMSG_LENGTH(sizeof(struct tcmsg)), | ||
100 | NLMSG_LENGTH(sizeof(struct tcamsg)) | ||
101 | }; | ||
102 | |||
103 | static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] = | ||
104 | { | ||
105 | IFLA_MAX, | ||
106 | IFA_MAX, | ||
107 | RTA_MAX, | ||
108 | NDA_MAX, | ||
109 | RTA_MAX, | ||
110 | TCA_MAX, | ||
111 | TCA_MAX, | ||
112 | TCA_MAX, | ||
113 | TCAA_MAX | ||
114 | }; | ||
115 | |||
116 | void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) | ||
117 | { | ||
118 | struct rtattr *rta; | ||
119 | int size = RTA_LENGTH(attrlen); | ||
120 | |||
121 | rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); | ||
122 | rta->rta_type = attrtype; | ||
123 | rta->rta_len = size; | ||
124 | memcpy(RTA_DATA(rta), data, attrlen); | ||
125 | } | ||
126 | |||
127 | size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size) | ||
128 | { | ||
129 | size_t ret = RTA_PAYLOAD(rta); | ||
130 | char *src = RTA_DATA(rta); | ||
131 | |||
132 | if (ret > 0 && src[ret - 1] == '\0') | ||
133 | ret--; | ||
134 | if (size > 0) { | ||
135 | size_t len = (ret >= size) ? size - 1 : ret; | ||
136 | memset(dest, 0, size); | ||
137 | memcpy(dest, src, len); | ||
138 | } | ||
139 | return ret; | ||
140 | } | ||
141 | |||
142 | int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) | ||
143 | { | ||
144 | int err = 0; | ||
145 | |||
146 | NETLINK_CB(skb).dst_groups = group; | ||
147 | if (echo) | ||
148 | atomic_inc(&skb->users); | ||
149 | netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); | ||
150 | if (echo) | ||
151 | err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); | ||
152 | return err; | ||
153 | } | ||
154 | |||
155 | int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) | ||
156 | { | ||
157 | struct rtattr *mx = (struct rtattr*)skb->tail; | ||
158 | int i; | ||
159 | |||
160 | RTA_PUT(skb, RTA_METRICS, 0, NULL); | ||
161 | for (i=0; i<RTAX_MAX; i++) { | ||
162 | if (metrics[i]) | ||
163 | RTA_PUT(skb, i+1, sizeof(u32), metrics+i); | ||
164 | } | ||
165 | mx->rta_len = skb->tail - (u8*)mx; | ||
166 | if (mx->rta_len == RTA_LENGTH(0)) | ||
167 | skb_trim(skb, (u8*)mx - skb->data); | ||
168 | return 0; | ||
169 | |||
170 | rtattr_failure: | ||
171 | skb_trim(skb, (u8*)mx - skb->data); | ||
172 | return -1; | ||
173 | } | ||
174 | |||
175 | |||
176 | static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | ||
177 | int type, u32 pid, u32 seq, u32 change) | ||
178 | { | ||
179 | struct ifinfomsg *r; | ||
180 | struct nlmsghdr *nlh; | ||
181 | unsigned char *b = skb->tail; | ||
182 | |||
183 | nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r)); | ||
184 | if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; | ||
185 | r = NLMSG_DATA(nlh); | ||
186 | r->ifi_family = AF_UNSPEC; | ||
187 | r->ifi_type = dev->type; | ||
188 | r->ifi_index = dev->ifindex; | ||
189 | r->ifi_flags = dev_get_flags(dev); | ||
190 | r->ifi_change = change; | ||
191 | |||
192 | RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); | ||
193 | |||
194 | if (1) { | ||
195 | u32 txqlen = dev->tx_queue_len; | ||
196 | RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen); | ||
197 | } | ||
198 | |||
199 | if (1) { | ||
200 | u32 weight = dev->weight; | ||
201 | RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight); | ||
202 | } | ||
203 | |||
204 | if (1) { | ||
205 | struct rtnl_link_ifmap map = { | ||
206 | .mem_start = dev->mem_start, | ||
207 | .mem_end = dev->mem_end, | ||
208 | .base_addr = dev->base_addr, | ||
209 | .irq = dev->irq, | ||
210 | .dma = dev->dma, | ||
211 | .port = dev->if_port, | ||
212 | }; | ||
213 | RTA_PUT(skb, IFLA_MAP, sizeof(map), &map); | ||
214 | } | ||
215 | |||
216 | if (dev->addr_len) { | ||
217 | RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); | ||
218 | RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); | ||
219 | } | ||
220 | |||
221 | if (1) { | ||
222 | u32 mtu = dev->mtu; | ||
223 | RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); | ||
224 | } | ||
225 | |||
226 | if (dev->ifindex != dev->iflink) { | ||
227 | u32 iflink = dev->iflink; | ||
228 | RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink); | ||
229 | } | ||
230 | |||
231 | if (dev->qdisc_sleeping) | ||
232 | RTA_PUT(skb, IFLA_QDISC, | ||
233 | strlen(dev->qdisc_sleeping->ops->id) + 1, | ||
234 | dev->qdisc_sleeping->ops->id); | ||
235 | |||
236 | if (dev->master) { | ||
237 | u32 master = dev->master->ifindex; | ||
238 | RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master); | ||
239 | } | ||
240 | |||
241 | if (dev->get_stats) { | ||
242 | unsigned long *stats = (unsigned long*)dev->get_stats(dev); | ||
243 | if (stats) { | ||
244 | struct rtattr *a; | ||
245 | __u32 *s; | ||
246 | int i; | ||
247 | int n = sizeof(struct rtnl_link_stats)/4; | ||
248 | |||
249 | a = __RTA_PUT(skb, IFLA_STATS, n*4); | ||
250 | s = RTA_DATA(a); | ||
251 | for (i=0; i<n; i++) | ||
252 | s[i] = stats[i]; | ||
253 | } | ||
254 | } | ||
255 | nlh->nlmsg_len = skb->tail - b; | ||
256 | return skb->len; | ||
257 | |||
258 | nlmsg_failure: | ||
259 | rtattr_failure: | ||
260 | skb_trim(skb, b - skb->data); | ||
261 | return -1; | ||
262 | } | ||
263 | |||
264 | static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | ||
265 | { | ||
266 | int idx; | ||
267 | int s_idx = cb->args[0]; | ||
268 | struct net_device *dev; | ||
269 | |||
270 | read_lock(&dev_base_lock); | ||
271 | for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { | ||
272 | if (idx < s_idx) | ||
273 | continue; | ||
274 | if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) | ||
275 | break; | ||
276 | } | ||
277 | read_unlock(&dev_base_lock); | ||
278 | cb->args[0] = idx; | ||
279 | |||
280 | return skb->len; | ||
281 | } | ||
282 | |||
283 | static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | ||
284 | { | ||
285 | struct ifinfomsg *ifm = NLMSG_DATA(nlh); | ||
286 | struct rtattr **ida = arg; | ||
287 | struct net_device *dev; | ||
288 | int err, send_addr_notify = 0; | ||
289 | |||
290 | if (ifm->ifi_index >= 0) | ||
291 | dev = dev_get_by_index(ifm->ifi_index); | ||
292 | else if (ida[IFLA_IFNAME - 1]) { | ||
293 | char ifname[IFNAMSIZ]; | ||
294 | |||
295 | if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], | ||
296 | IFNAMSIZ) >= IFNAMSIZ) | ||
297 | return -EINVAL; | ||
298 | dev = dev_get_by_name(ifname); | ||
299 | } else | ||
300 | return -EINVAL; | ||
301 | |||
302 | if (!dev) | ||
303 | return -ENODEV; | ||
304 | |||
305 | err = -EINVAL; | ||
306 | |||
307 | if (ifm->ifi_flags) | ||
308 | dev_change_flags(dev, ifm->ifi_flags); | ||
309 | |||
310 | if (ida[IFLA_MAP - 1]) { | ||
311 | struct rtnl_link_ifmap *u_map; | ||
312 | struct ifmap k_map; | ||
313 | |||
314 | if (!dev->set_config) { | ||
315 | err = -EOPNOTSUPP; | ||
316 | goto out; | ||
317 | } | ||
318 | |||
319 | if (!netif_device_present(dev)) { | ||
320 | err = -ENODEV; | ||
321 | goto out; | ||
322 | } | ||
323 | |||
324 | if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map))) | ||
325 | goto out; | ||
326 | |||
327 | u_map = RTA_DATA(ida[IFLA_MAP - 1]); | ||
328 | |||
329 | k_map.mem_start = (unsigned long) u_map->mem_start; | ||
330 | k_map.mem_end = (unsigned long) u_map->mem_end; | ||
331 | k_map.base_addr = (unsigned short) u_map->base_addr; | ||
332 | k_map.irq = (unsigned char) u_map->irq; | ||
333 | k_map.dma = (unsigned char) u_map->dma; | ||
334 | k_map.port = (unsigned char) u_map->port; | ||
335 | |||
336 | err = dev->set_config(dev, &k_map); | ||
337 | |||
338 | if (err) | ||
339 | goto out; | ||
340 | } | ||
341 | |||
342 | if (ida[IFLA_ADDRESS - 1]) { | ||
343 | if (!dev->set_mac_address) { | ||
344 | err = -EOPNOTSUPP; | ||
345 | goto out; | ||
346 | } | ||
347 | if (!netif_device_present(dev)) { | ||
348 | err = -ENODEV; | ||
349 | goto out; | ||
350 | } | ||
351 | if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) | ||
352 | goto out; | ||
353 | |||
354 | err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1])); | ||
355 | if (err) | ||
356 | goto out; | ||
357 | send_addr_notify = 1; | ||
358 | } | ||
359 | |||
360 | if (ida[IFLA_BROADCAST - 1]) { | ||
361 | if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len)) | ||
362 | goto out; | ||
363 | memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]), | ||
364 | dev->addr_len); | ||
365 | send_addr_notify = 1; | ||
366 | } | ||
367 | |||
368 | if (ida[IFLA_MTU - 1]) { | ||
369 | if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32))) | ||
370 | goto out; | ||
371 | err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1]))); | ||
372 | |||
373 | if (err) | ||
374 | goto out; | ||
375 | |||
376 | } | ||
377 | |||
378 | if (ida[IFLA_TXQLEN - 1]) { | ||
379 | if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32))) | ||
380 | goto out; | ||
381 | |||
382 | dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1])); | ||
383 | } | ||
384 | |||
385 | if (ida[IFLA_WEIGHT - 1]) { | ||
386 | if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32))) | ||
387 | goto out; | ||
388 | |||
389 | dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); | ||
390 | } | ||
391 | |||
392 | if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) { | ||
393 | char ifname[IFNAMSIZ]; | ||
394 | |||
395 | if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], | ||
396 | IFNAMSIZ) >= IFNAMSIZ) | ||
397 | goto out; | ||
398 | err = dev_change_name(dev, ifname); | ||
399 | if (err) | ||
400 | goto out; | ||
401 | } | ||
402 | |||
403 | err = 0; | ||
404 | |||
405 | out: | ||
406 | if (send_addr_notify) | ||
407 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
408 | |||
409 | dev_put(dev); | ||
410 | return err; | ||
411 | } | ||
412 | |||
413 | static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) | ||
414 | { | ||
415 | int idx; | ||
416 | int s_idx = cb->family; | ||
417 | |||
418 | if (s_idx == 0) | ||
419 | s_idx = 1; | ||
420 | for (idx=1; idx<NPROTO; idx++) { | ||
421 | int type = cb->nlh->nlmsg_type-RTM_BASE; | ||
422 | if (idx < s_idx || idx == PF_PACKET) | ||
423 | continue; | ||
424 | if (rtnetlink_links[idx] == NULL || | ||
425 | rtnetlink_links[idx][type].dumpit == NULL) | ||
426 | continue; | ||
427 | if (idx > s_idx) | ||
428 | memset(&cb->args[0], 0, sizeof(cb->args)); | ||
429 | if (rtnetlink_links[idx][type].dumpit(skb, cb)) | ||
430 | break; | ||
431 | } | ||
432 | cb->family = idx; | ||
433 | |||
434 | return skb->len; | ||
435 | } | ||
436 | |||
437 | void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) | ||
438 | { | ||
439 | struct sk_buff *skb; | ||
440 | int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + | ||
441 | sizeof(struct rtnl_link_ifmap) + | ||
442 | sizeof(struct rtnl_link_stats) + 128); | ||
443 | |||
444 | skb = alloc_skb(size, GFP_KERNEL); | ||
445 | if (!skb) | ||
446 | return; | ||
447 | |||
448 | if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { | ||
449 | kfree_skb(skb); | ||
450 | return; | ||
451 | } | ||
452 | NETLINK_CB(skb).dst_groups = RTMGRP_LINK; | ||
453 | netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); | ||
454 | } | ||
455 | |||
456 | static int rtnetlink_done(struct netlink_callback *cb) | ||
457 | { | ||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | /* Protected by RTNL sempahore. */ | ||
462 | static struct rtattr **rta_buf; | ||
463 | static int rtattr_max; | ||
464 | |||
465 | /* Process one rtnetlink message. */ | ||
466 | |||
467 | static __inline__ int | ||
468 | rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | ||
469 | { | ||
470 | struct rtnetlink_link *link; | ||
471 | struct rtnetlink_link *link_tab; | ||
472 | int sz_idx, kind; | ||
473 | int min_len; | ||
474 | int family; | ||
475 | int type; | ||
476 | int err; | ||
477 | |||
478 | /* Only requests are handled by kernel now */ | ||
479 | if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) | ||
480 | return 0; | ||
481 | |||
482 | type = nlh->nlmsg_type; | ||
483 | |||
484 | /* A control message: ignore them */ | ||
485 | if (type < RTM_BASE) | ||
486 | return 0; | ||
487 | |||
488 | /* Unknown message: reply with EINVAL */ | ||
489 | if (type > RTM_MAX) | ||
490 | goto err_inval; | ||
491 | |||
492 | type -= RTM_BASE; | ||
493 | |||
494 | /* All the messages must have at least 1 byte length */ | ||
495 | if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) | ||
496 | return 0; | ||
497 | |||
498 | family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; | ||
499 | if (family >= NPROTO) { | ||
500 | *errp = -EAFNOSUPPORT; | ||
501 | return -1; | ||
502 | } | ||
503 | |||
504 | link_tab = rtnetlink_links[family]; | ||
505 | if (link_tab == NULL) | ||
506 | link_tab = rtnetlink_links[PF_UNSPEC]; | ||
507 | link = &link_tab[type]; | ||
508 | |||
509 | sz_idx = type>>2; | ||
510 | kind = type&3; | ||
511 | |||
512 | if (kind != 2 && security_netlink_recv(skb)) { | ||
513 | *errp = -EPERM; | ||
514 | return -1; | ||
515 | } | ||
516 | |||
517 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { | ||
518 | u32 rlen; | ||
519 | |||
520 | if (link->dumpit == NULL) | ||
521 | link = &(rtnetlink_links[PF_UNSPEC][type]); | ||
522 | |||
523 | if (link->dumpit == NULL) | ||
524 | goto err_inval; | ||
525 | |||
526 | if ((*errp = netlink_dump_start(rtnl, skb, nlh, | ||
527 | link->dumpit, | ||
528 | rtnetlink_done)) != 0) { | ||
529 | return -1; | ||
530 | } | ||
531 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
532 | if (rlen > skb->len) | ||
533 | rlen = skb->len; | ||
534 | skb_pull(skb, rlen); | ||
535 | return -1; | ||
536 | } | ||
537 | |||
538 | memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); | ||
539 | |||
540 | min_len = rtm_min[sz_idx]; | ||
541 | if (nlh->nlmsg_len < min_len) | ||
542 | goto err_inval; | ||
543 | |||
544 | if (nlh->nlmsg_len > min_len) { | ||
545 | int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); | ||
546 | struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len); | ||
547 | |||
548 | while (RTA_OK(attr, attrlen)) { | ||
549 | unsigned flavor = attr->rta_type; | ||
550 | if (flavor) { | ||
551 | if (flavor > rta_max[sz_idx]) | ||
552 | goto err_inval; | ||
553 | rta_buf[flavor-1] = attr; | ||
554 | } | ||
555 | attr = RTA_NEXT(attr, attrlen); | ||
556 | } | ||
557 | } | ||
558 | |||
559 | if (link->doit == NULL) | ||
560 | link = &(rtnetlink_links[PF_UNSPEC][type]); | ||
561 | if (link->doit == NULL) | ||
562 | goto err_inval; | ||
563 | err = link->doit(skb, nlh, (void *)&rta_buf[0]); | ||
564 | |||
565 | *errp = err; | ||
566 | return err; | ||
567 | |||
568 | err_inval: | ||
569 | *errp = -EINVAL; | ||
570 | return -1; | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * Process one packet of messages. | ||
575 | * Malformed skbs with wrong lengths of messages are discarded silently. | ||
576 | */ | ||
577 | |||
578 | static inline int rtnetlink_rcv_skb(struct sk_buff *skb) | ||
579 | { | ||
580 | int err; | ||
581 | struct nlmsghdr * nlh; | ||
582 | |||
583 | while (skb->len >= NLMSG_SPACE(0)) { | ||
584 | u32 rlen; | ||
585 | |||
586 | nlh = (struct nlmsghdr *)skb->data; | ||
587 | if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | ||
588 | return 0; | ||
589 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
590 | if (rlen > skb->len) | ||
591 | rlen = skb->len; | ||
592 | if (rtnetlink_rcv_msg(skb, nlh, &err)) { | ||
593 | /* Not error, but we must interrupt processing here: | ||
594 | * Note, that in this case we do not pull message | ||
595 | * from skb, it will be processed later. | ||
596 | */ | ||
597 | if (err == 0) | ||
598 | return -1; | ||
599 | netlink_ack(skb, nlh, err); | ||
600 | } else if (nlh->nlmsg_flags&NLM_F_ACK) | ||
601 | netlink_ack(skb, nlh, 0); | ||
602 | skb_pull(skb, rlen); | ||
603 | } | ||
604 | |||
605 | return 0; | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * rtnetlink input queue processing routine: | ||
610 | * - try to acquire shared lock. If it is failed, defer processing. | ||
611 | * - feed skbs to rtnetlink_rcv_skb, until it refuse a message, | ||
612 | * that will occur, when a dump started and/or acquisition of | ||
613 | * exclusive lock failed. | ||
614 | */ | ||
615 | |||
616 | static void rtnetlink_rcv(struct sock *sk, int len) | ||
617 | { | ||
618 | do { | ||
619 | struct sk_buff *skb; | ||
620 | |||
621 | if (rtnl_shlock_nowait()) | ||
622 | return; | ||
623 | |||
624 | while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { | ||
625 | if (rtnetlink_rcv_skb(skb)) { | ||
626 | if (skb->len) | ||
627 | skb_queue_head(&sk->sk_receive_queue, | ||
628 | skb); | ||
629 | else | ||
630 | kfree_skb(skb); | ||
631 | break; | ||
632 | } | ||
633 | kfree_skb(skb); | ||
634 | } | ||
635 | |||
636 | up(&rtnl_sem); | ||
637 | |||
638 | netdev_run_todo(); | ||
639 | } while (rtnl && rtnl->sk_receive_queue.qlen); | ||
640 | } | ||
641 | |||
642 | static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] = | ||
643 | { | ||
644 | [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo }, | ||
645 | [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, | ||
646 | [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, | ||
647 | [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, | ||
648 | [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, | ||
649 | [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, | ||
650 | [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info } | ||
651 | }; | ||
652 | |||
653 | static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) | ||
654 | { | ||
655 | struct net_device *dev = ptr; | ||
656 | switch (event) { | ||
657 | case NETDEV_UNREGISTER: | ||
658 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); | ||
659 | break; | ||
660 | case NETDEV_REGISTER: | ||
661 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); | ||
662 | break; | ||
663 | case NETDEV_UP: | ||
664 | case NETDEV_DOWN: | ||
665 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | ||
666 | break; | ||
667 | case NETDEV_CHANGE: | ||
668 | case NETDEV_GOING_DOWN: | ||
669 | break; | ||
670 | default: | ||
671 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); | ||
672 | break; | ||
673 | } | ||
674 | return NOTIFY_DONE; | ||
675 | } | ||
676 | |||
677 | static struct notifier_block rtnetlink_dev_notifier = { | ||
678 | .notifier_call = rtnetlink_event, | ||
679 | }; | ||
680 | |||
681 | void __init rtnetlink_init(void) | ||
682 | { | ||
683 | int i; | ||
684 | |||
685 | rtattr_max = 0; | ||
686 | for (i = 0; i < ARRAY_SIZE(rta_max); i++) | ||
687 | if (rta_max[i] > rtattr_max) | ||
688 | rtattr_max = rta_max[i]; | ||
689 | rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL); | ||
690 | if (!rta_buf) | ||
691 | panic("rtnetlink_init: cannot allocate rta_buf\n"); | ||
692 | |||
693 | rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); | ||
694 | if (rtnl == NULL) | ||
695 | panic("rtnetlink_init: cannot initialize rtnetlink\n"); | ||
696 | netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); | ||
697 | register_netdevice_notifier(&rtnetlink_dev_notifier); | ||
698 | rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table; | ||
699 | rtnetlink_links[PF_PACKET] = link_rtnetlink_table; | ||
700 | } | ||
701 | |||
702 | EXPORT_SYMBOL(__rta_fill); | ||
703 | EXPORT_SYMBOL(rtattr_strlcpy); | ||
704 | EXPORT_SYMBOL(rtattr_parse); | ||
705 | EXPORT_SYMBOL(rtnetlink_links); | ||
706 | EXPORT_SYMBOL(rtnetlink_put_metrics); | ||
707 | EXPORT_SYMBOL(rtnl); | ||
708 | EXPORT_SYMBOL(rtnl_lock); | ||
709 | EXPORT_SYMBOL(rtnl_lock_interruptible); | ||
710 | EXPORT_SYMBOL(rtnl_sem); | ||
711 | EXPORT_SYMBOL(rtnl_unlock); | ||
diff --git a/net/core/scm.c b/net/core/scm.c new file mode 100644 index 000000000000..a2ebf30f6aa8 --- /dev/null +++ b/net/core/scm.c | |||
@@ -0,0 +1,291 @@ | |||
1 | /* scm.c - Socket level control messages processing. | ||
2 | * | ||
3 | * Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
4 | * Alignment and value checking mods by Craig Metz | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/signal.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/major.h> | ||
19 | #include <linux/stat.h> | ||
20 | #include <linux/socket.h> | ||
21 | #include <linux/file.h> | ||
22 | #include <linux/fcntl.h> | ||
23 | #include <linux/net.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/netdevice.h> | ||
26 | #include <linux/security.h> | ||
27 | |||
28 | #include <asm/system.h> | ||
29 | #include <asm/uaccess.h> | ||
30 | |||
31 | #include <net/protocol.h> | ||
32 | #include <linux/skbuff.h> | ||
33 | #include <net/sock.h> | ||
34 | #include <net/compat.h> | ||
35 | #include <net/scm.h> | ||
36 | |||
37 | |||
38 | /* | ||
39 | * Only allow a user to send credentials, that they could set with | ||
40 | * setu(g)id. | ||
41 | */ | ||
42 | |||
43 | static __inline__ int scm_check_creds(struct ucred *creds) | ||
44 | { | ||
45 | if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) && | ||
46 | ((creds->uid == current->uid || creds->uid == current->euid || | ||
47 | creds->uid == current->suid) || capable(CAP_SETUID)) && | ||
48 | ((creds->gid == current->gid || creds->gid == current->egid || | ||
49 | creds->gid == current->sgid) || capable(CAP_SETGID))) { | ||
50 | return 0; | ||
51 | } | ||
52 | return -EPERM; | ||
53 | } | ||
54 | |||
55 | static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) | ||
56 | { | ||
57 | int *fdp = (int*)CMSG_DATA(cmsg); | ||
58 | struct scm_fp_list *fpl = *fplp; | ||
59 | struct file **fpp; | ||
60 | int i, num; | ||
61 | |||
62 | num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int); | ||
63 | |||
64 | if (num <= 0) | ||
65 | return 0; | ||
66 | |||
67 | if (num > SCM_MAX_FD) | ||
68 | return -EINVAL; | ||
69 | |||
70 | if (!fpl) | ||
71 | { | ||
72 | fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL); | ||
73 | if (!fpl) | ||
74 | return -ENOMEM; | ||
75 | *fplp = fpl; | ||
76 | fpl->count = 0; | ||
77 | } | ||
78 | fpp = &fpl->fp[fpl->count]; | ||
79 | |||
80 | if (fpl->count + num > SCM_MAX_FD) | ||
81 | return -EINVAL; | ||
82 | |||
83 | /* | ||
84 | * Verify the descriptors and increment the usage count. | ||
85 | */ | ||
86 | |||
87 | for (i=0; i< num; i++) | ||
88 | { | ||
89 | int fd = fdp[i]; | ||
90 | struct file *file; | ||
91 | |||
92 | if (fd < 0 || !(file = fget(fd))) | ||
93 | return -EBADF; | ||
94 | *fpp++ = file; | ||
95 | fpl->count++; | ||
96 | } | ||
97 | return num; | ||
98 | } | ||
99 | |||
100 | void __scm_destroy(struct scm_cookie *scm) | ||
101 | { | ||
102 | struct scm_fp_list *fpl = scm->fp; | ||
103 | int i; | ||
104 | |||
105 | if (fpl) { | ||
106 | scm->fp = NULL; | ||
107 | for (i=fpl->count-1; i>=0; i--) | ||
108 | fput(fpl->fp[i]); | ||
109 | kfree(fpl); | ||
110 | } | ||
111 | } | ||
112 | |||
113 | int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) | ||
114 | { | ||
115 | struct cmsghdr *cmsg; | ||
116 | int err; | ||
117 | |||
118 | for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) | ||
119 | { | ||
120 | err = -EINVAL; | ||
121 | |||
122 | /* Verify that cmsg_len is at least sizeof(struct cmsghdr) */ | ||
123 | /* The first check was omitted in <= 2.2.5. The reasoning was | ||
124 | that parser checks cmsg_len in any case, so that | ||
125 | additional check would be work duplication. | ||
126 | But if cmsg_level is not SOL_SOCKET, we do not check | ||
127 | for too short ancillary data object at all! Oops. | ||
128 | OK, let's add it... | ||
129 | */ | ||
130 | if (!CMSG_OK(msg, cmsg)) | ||
131 | goto error; | ||
132 | |||
133 | if (cmsg->cmsg_level != SOL_SOCKET) | ||
134 | continue; | ||
135 | |||
136 | switch (cmsg->cmsg_type) | ||
137 | { | ||
138 | case SCM_RIGHTS: | ||
139 | err=scm_fp_copy(cmsg, &p->fp); | ||
140 | if (err<0) | ||
141 | goto error; | ||
142 | break; | ||
143 | case SCM_CREDENTIALS: | ||
144 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) | ||
145 | goto error; | ||
146 | memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); | ||
147 | err = scm_check_creds(&p->creds); | ||
148 | if (err) | ||
149 | goto error; | ||
150 | break; | ||
151 | default: | ||
152 | goto error; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | if (p->fp && !p->fp->count) | ||
157 | { | ||
158 | kfree(p->fp); | ||
159 | p->fp = NULL; | ||
160 | } | ||
161 | return 0; | ||
162 | |||
163 | error: | ||
164 | scm_destroy(p); | ||
165 | return err; | ||
166 | } | ||
167 | |||
168 | int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) | ||
169 | { | ||
170 | struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control; | ||
171 | struct cmsghdr cmhdr; | ||
172 | int cmlen = CMSG_LEN(len); | ||
173 | int err; | ||
174 | |||
175 | if (MSG_CMSG_COMPAT & msg->msg_flags) | ||
176 | return put_cmsg_compat(msg, level, type, len, data); | ||
177 | |||
178 | if (cm==NULL || msg->msg_controllen < sizeof(*cm)) { | ||
179 | msg->msg_flags |= MSG_CTRUNC; | ||
180 | return 0; /* XXX: return error? check spec. */ | ||
181 | } | ||
182 | if (msg->msg_controllen < cmlen) { | ||
183 | msg->msg_flags |= MSG_CTRUNC; | ||
184 | cmlen = msg->msg_controllen; | ||
185 | } | ||
186 | cmhdr.cmsg_level = level; | ||
187 | cmhdr.cmsg_type = type; | ||
188 | cmhdr.cmsg_len = cmlen; | ||
189 | |||
190 | err = -EFAULT; | ||
191 | if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) | ||
192 | goto out; | ||
193 | if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr))) | ||
194 | goto out; | ||
195 | cmlen = CMSG_SPACE(len); | ||
196 | msg->msg_control += cmlen; | ||
197 | msg->msg_controllen -= cmlen; | ||
198 | err = 0; | ||
199 | out: | ||
200 | return err; | ||
201 | } | ||
202 | |||
203 | void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) | ||
204 | { | ||
205 | struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control; | ||
206 | |||
207 | int fdmax = 0; | ||
208 | int fdnum = scm->fp->count; | ||
209 | struct file **fp = scm->fp->fp; | ||
210 | int __user *cmfptr; | ||
211 | int err = 0, i; | ||
212 | |||
213 | if (MSG_CMSG_COMPAT & msg->msg_flags) { | ||
214 | scm_detach_fds_compat(msg, scm); | ||
215 | return; | ||
216 | } | ||
217 | |||
218 | if (msg->msg_controllen > sizeof(struct cmsghdr)) | ||
219 | fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr)) | ||
220 | / sizeof(int)); | ||
221 | |||
222 | if (fdnum < fdmax) | ||
223 | fdmax = fdnum; | ||
224 | |||
225 | for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) | ||
226 | { | ||
227 | int new_fd; | ||
228 | err = security_file_receive(fp[i]); | ||
229 | if (err) | ||
230 | break; | ||
231 | err = get_unused_fd(); | ||
232 | if (err < 0) | ||
233 | break; | ||
234 | new_fd = err; | ||
235 | err = put_user(new_fd, cmfptr); | ||
236 | if (err) { | ||
237 | put_unused_fd(new_fd); | ||
238 | break; | ||
239 | } | ||
240 | /* Bump the usage count and install the file. */ | ||
241 | get_file(fp[i]); | ||
242 | fd_install(new_fd, fp[i]); | ||
243 | } | ||
244 | |||
245 | if (i > 0) | ||
246 | { | ||
247 | int cmlen = CMSG_LEN(i*sizeof(int)); | ||
248 | if (!err) | ||
249 | err = put_user(SOL_SOCKET, &cm->cmsg_level); | ||
250 | if (!err) | ||
251 | err = put_user(SCM_RIGHTS, &cm->cmsg_type); | ||
252 | if (!err) | ||
253 | err = put_user(cmlen, &cm->cmsg_len); | ||
254 | if (!err) { | ||
255 | cmlen = CMSG_SPACE(i*sizeof(int)); | ||
256 | msg->msg_control += cmlen; | ||
257 | msg->msg_controllen -= cmlen; | ||
258 | } | ||
259 | } | ||
260 | if (i < fdnum || (fdnum && fdmax <= 0)) | ||
261 | msg->msg_flags |= MSG_CTRUNC; | ||
262 | |||
263 | /* | ||
264 | * All of the files that fit in the message have had their | ||
265 | * usage counts incremented, so we just free the list. | ||
266 | */ | ||
267 | __scm_destroy(scm); | ||
268 | } | ||
269 | |||
270 | struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) | ||
271 | { | ||
272 | struct scm_fp_list *new_fpl; | ||
273 | int i; | ||
274 | |||
275 | if (!fpl) | ||
276 | return NULL; | ||
277 | |||
278 | new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); | ||
279 | if (new_fpl) { | ||
280 | for (i=fpl->count-1; i>=0; i--) | ||
281 | get_file(fpl->fp[i]); | ||
282 | memcpy(new_fpl, fpl, sizeof(*fpl)); | ||
283 | } | ||
284 | return new_fpl; | ||
285 | } | ||
286 | |||
287 | EXPORT_SYMBOL(__scm_destroy); | ||
288 | EXPORT_SYMBOL(__scm_send); | ||
289 | EXPORT_SYMBOL(put_cmsg); | ||
290 | EXPORT_SYMBOL(scm_detach_fds); | ||
291 | EXPORT_SYMBOL(scm_fp_dup); | ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c new file mode 100644 index 000000000000..bf02ca9f80ac --- /dev/null +++ b/net/core/skbuff.c | |||
@@ -0,0 +1,1460 @@ | |||
1 | /* | ||
2 | * Routines having to do with the 'struct sk_buff' memory handlers. | ||
3 | * | ||
4 | * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> | ||
5 | * Florian La Roche <rzsfl@rz.uni-sb.de> | ||
6 | * | ||
7 | * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ | ||
8 | * | ||
9 | * Fixes: | ||
10 | * Alan Cox : Fixed the worst of the load | ||
11 | * balancer bugs. | ||
12 | * Dave Platt : Interrupt stacking fix. | ||
13 | * Richard Kooijman : Timestamp fixes. | ||
14 | * Alan Cox : Changed buffer format. | ||
15 | * Alan Cox : destructor hook for AF_UNIX etc. | ||
16 | * Linus Torvalds : Better skb_clone. | ||
17 | * Alan Cox : Added skb_copy. | ||
18 | * Alan Cox : Added all the changed routines Linus | ||
19 | * only put in the headers | ||
20 | * Ray VanTassle : Fixed --skb->lock in free | ||
21 | * Alan Cox : skb_copy copy arp field | ||
22 | * Andi Kleen : slabified it. | ||
23 | * Robert Olsson : Removed skb_head_pool | ||
24 | * | ||
25 | * NOTE: | ||
26 | * The __skb_ routines should be called with interrupts | ||
27 | * disabled, or you better be *real* sure that the operation is atomic | ||
28 | * with respect to whatever list is being frobbed (e.g. via lock_sock() | ||
29 | * or via disabling bottom half handlers, etc). | ||
30 | * | ||
31 | * This program is free software; you can redistribute it and/or | ||
32 | * modify it under the terms of the GNU General Public License | ||
33 | * as published by the Free Software Foundation; either version | ||
34 | * 2 of the License, or (at your option) any later version. | ||
35 | */ | ||
36 | |||
37 | /* | ||
38 | * The functions in this file will not compile correctly with gcc 2.4.x | ||
39 | */ | ||
40 | |||
41 | #include <linux/config.h> | ||
42 | #include <linux/module.h> | ||
43 | #include <linux/types.h> | ||
44 | #include <linux/kernel.h> | ||
45 | #include <linux/sched.h> | ||
46 | #include <linux/mm.h> | ||
47 | #include <linux/interrupt.h> | ||
48 | #include <linux/in.h> | ||
49 | #include <linux/inet.h> | ||
50 | #include <linux/slab.h> | ||
51 | #include <linux/netdevice.h> | ||
52 | #ifdef CONFIG_NET_CLS_ACT | ||
53 | #include <net/pkt_sched.h> | ||
54 | #endif | ||
55 | #include <linux/string.h> | ||
56 | #include <linux/skbuff.h> | ||
57 | #include <linux/cache.h> | ||
58 | #include <linux/rtnetlink.h> | ||
59 | #include <linux/init.h> | ||
60 | #include <linux/highmem.h> | ||
61 | |||
62 | #include <net/protocol.h> | ||
63 | #include <net/dst.h> | ||
64 | #include <net/sock.h> | ||
65 | #include <net/checksum.h> | ||
66 | #include <net/xfrm.h> | ||
67 | |||
68 | #include <asm/uaccess.h> | ||
69 | #include <asm/system.h> | ||
70 | |||
71 | static kmem_cache_t *skbuff_head_cache; | ||
72 | |||
73 | /* | ||
74 | * Keep out-of-line to prevent kernel bloat. | ||
75 | * __builtin_return_address is not used because it is not always | ||
76 | * reliable. | ||
77 | */ | ||
78 | |||
79 | /** | ||
80 | * skb_over_panic - private function | ||
81 | * @skb: buffer | ||
82 | * @sz: size | ||
83 | * @here: address | ||
84 | * | ||
85 | * Out of line support code for skb_put(). Not user callable. | ||
86 | */ | ||
87 | void skb_over_panic(struct sk_buff *skb, int sz, void *here) | ||
88 | { | ||
89 | printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s", | ||
90 | here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); | ||
91 | BUG(); | ||
92 | } | ||
93 | |||
94 | /** | ||
95 | * skb_under_panic - private function | ||
96 | * @skb: buffer | ||
97 | * @sz: size | ||
98 | * @here: address | ||
99 | * | ||
100 | * Out of line support code for skb_push(). Not user callable. | ||
101 | */ | ||
102 | |||
103 | void skb_under_panic(struct sk_buff *skb, int sz, void *here) | ||
104 | { | ||
105 | printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s", | ||
106 | here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); | ||
107 | BUG(); | ||
108 | } | ||
109 | |||
110 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | ||
111 | * 'private' fields and also do memory statistics to find all the | ||
112 | * [BEEP] leaks. | ||
113 | * | ||
114 | */ | ||
115 | |||
116 | /** | ||
117 | * alloc_skb - allocate a network buffer | ||
118 | * @size: size to allocate | ||
119 | * @gfp_mask: allocation mask | ||
120 | * | ||
121 | * Allocate a new &sk_buff. The returned buffer has no headroom and a | ||
122 | * tail room of size bytes. The object has a reference count of one. | ||
123 | * The return is the buffer. On a failure the return is %NULL. | ||
124 | * | ||
125 | * Buffers may only be allocated from interrupts using a @gfp_mask of | ||
126 | * %GFP_ATOMIC. | ||
127 | */ | ||
128 | struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) | ||
129 | { | ||
130 | struct sk_buff *skb; | ||
131 | u8 *data; | ||
132 | |||
133 | /* Get the HEAD */ | ||
134 | skb = kmem_cache_alloc(skbuff_head_cache, | ||
135 | gfp_mask & ~__GFP_DMA); | ||
136 | if (!skb) | ||
137 | goto out; | ||
138 | |||
139 | /* Get the DATA. Size must match skb_add_mtu(). */ | ||
140 | size = SKB_DATA_ALIGN(size); | ||
141 | data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); | ||
142 | if (!data) | ||
143 | goto nodata; | ||
144 | |||
145 | memset(skb, 0, offsetof(struct sk_buff, truesize)); | ||
146 | skb->truesize = size + sizeof(struct sk_buff); | ||
147 | atomic_set(&skb->users, 1); | ||
148 | skb->head = data; | ||
149 | skb->data = data; | ||
150 | skb->tail = data; | ||
151 | skb->end = data + size; | ||
152 | |||
153 | atomic_set(&(skb_shinfo(skb)->dataref), 1); | ||
154 | skb_shinfo(skb)->nr_frags = 0; | ||
155 | skb_shinfo(skb)->tso_size = 0; | ||
156 | skb_shinfo(skb)->tso_segs = 0; | ||
157 | skb_shinfo(skb)->frag_list = NULL; | ||
158 | out: | ||
159 | return skb; | ||
160 | nodata: | ||
161 | kmem_cache_free(skbuff_head_cache, skb); | ||
162 | skb = NULL; | ||
163 | goto out; | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * alloc_skb_from_cache - allocate a network buffer | ||
168 | * @cp: kmem_cache from which to allocate the data area | ||
169 | * (object size must be big enough for @size bytes + skb overheads) | ||
170 | * @size: size to allocate | ||
171 | * @gfp_mask: allocation mask | ||
172 | * | ||
173 | * Allocate a new &sk_buff. The returned buffer has no headroom and | ||
174 | * tail room of size bytes. The object has a reference count of one. | ||
175 | * The return is the buffer. On a failure the return is %NULL. | ||
176 | * | ||
177 | * Buffers may only be allocated from interrupts using a @gfp_mask of | ||
178 | * %GFP_ATOMIC. | ||
179 | */ | ||
180 | struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, | ||
181 | unsigned int size, int gfp_mask) | ||
182 | { | ||
183 | struct sk_buff *skb; | ||
184 | u8 *data; | ||
185 | |||
186 | /* Get the HEAD */ | ||
187 | skb = kmem_cache_alloc(skbuff_head_cache, | ||
188 | gfp_mask & ~__GFP_DMA); | ||
189 | if (!skb) | ||
190 | goto out; | ||
191 | |||
192 | /* Get the DATA. */ | ||
193 | size = SKB_DATA_ALIGN(size); | ||
194 | data = kmem_cache_alloc(cp, gfp_mask); | ||
195 | if (!data) | ||
196 | goto nodata; | ||
197 | |||
198 | memset(skb, 0, offsetof(struct sk_buff, truesize)); | ||
199 | skb->truesize = size + sizeof(struct sk_buff); | ||
200 | atomic_set(&skb->users, 1); | ||
201 | skb->head = data; | ||
202 | skb->data = data; | ||
203 | skb->tail = data; | ||
204 | skb->end = data + size; | ||
205 | |||
206 | atomic_set(&(skb_shinfo(skb)->dataref), 1); | ||
207 | skb_shinfo(skb)->nr_frags = 0; | ||
208 | skb_shinfo(skb)->tso_size = 0; | ||
209 | skb_shinfo(skb)->tso_segs = 0; | ||
210 | skb_shinfo(skb)->frag_list = NULL; | ||
211 | out: | ||
212 | return skb; | ||
213 | nodata: | ||
214 | kmem_cache_free(skbuff_head_cache, skb); | ||
215 | skb = NULL; | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | |||
220 | static void skb_drop_fraglist(struct sk_buff *skb) | ||
221 | { | ||
222 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | ||
223 | |||
224 | skb_shinfo(skb)->frag_list = NULL; | ||
225 | |||
226 | do { | ||
227 | struct sk_buff *this = list; | ||
228 | list = list->next; | ||
229 | kfree_skb(this); | ||
230 | } while (list); | ||
231 | } | ||
232 | |||
233 | static void skb_clone_fraglist(struct sk_buff *skb) | ||
234 | { | ||
235 | struct sk_buff *list; | ||
236 | |||
237 | for (list = skb_shinfo(skb)->frag_list; list; list = list->next) | ||
238 | skb_get(list); | ||
239 | } | ||
240 | |||
241 | void skb_release_data(struct sk_buff *skb) | ||
242 | { | ||
243 | if (!skb->cloned || | ||
244 | !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, | ||
245 | &skb_shinfo(skb)->dataref)) { | ||
246 | if (skb_shinfo(skb)->nr_frags) { | ||
247 | int i; | ||
248 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
249 | put_page(skb_shinfo(skb)->frags[i].page); | ||
250 | } | ||
251 | |||
252 | if (skb_shinfo(skb)->frag_list) | ||
253 | skb_drop_fraglist(skb); | ||
254 | |||
255 | kfree(skb->head); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * Free an skbuff by memory without cleaning the state. | ||
261 | */ | ||
262 | void kfree_skbmem(struct sk_buff *skb) | ||
263 | { | ||
264 | skb_release_data(skb); | ||
265 | kmem_cache_free(skbuff_head_cache, skb); | ||
266 | } | ||
267 | |||
268 | /** | ||
269 | * __kfree_skb - private function | ||
270 | * @skb: buffer | ||
271 | * | ||
272 | * Free an sk_buff. Release anything attached to the buffer. | ||
273 | * Clean the state. This is an internal helper function. Users should | ||
274 | * always call kfree_skb | ||
275 | */ | ||
276 | |||
277 | void __kfree_skb(struct sk_buff *skb) | ||
278 | { | ||
279 | if (skb->list) { | ||
280 | printk(KERN_WARNING "Warning: kfree_skb passed an skb still " | ||
281 | "on a list (from %p).\n", NET_CALLER(skb)); | ||
282 | BUG(); | ||
283 | } | ||
284 | |||
285 | dst_release(skb->dst); | ||
286 | #ifdef CONFIG_XFRM | ||
287 | secpath_put(skb->sp); | ||
288 | #endif | ||
289 | if(skb->destructor) { | ||
290 | if (in_irq()) | ||
291 | printk(KERN_WARNING "Warning: kfree_skb on " | ||
292 | "hard IRQ %p\n", NET_CALLER(skb)); | ||
293 | skb->destructor(skb); | ||
294 | } | ||
295 | #ifdef CONFIG_NETFILTER | ||
296 | nf_conntrack_put(skb->nfct); | ||
297 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
298 | nf_bridge_put(skb->nf_bridge); | ||
299 | #endif | ||
300 | #endif | ||
301 | /* XXX: IS this still necessary? - JHS */ | ||
302 | #ifdef CONFIG_NET_SCHED | ||
303 | skb->tc_index = 0; | ||
304 | #ifdef CONFIG_NET_CLS_ACT | ||
305 | skb->tc_verd = 0; | ||
306 | skb->tc_classid = 0; | ||
307 | #endif | ||
308 | #endif | ||
309 | |||
310 | kfree_skbmem(skb); | ||
311 | } | ||
312 | |||
313 | /** | ||
314 | * skb_clone - duplicate an sk_buff | ||
315 | * @skb: buffer to clone | ||
316 | * @gfp_mask: allocation priority | ||
317 | * | ||
318 | * Duplicate an &sk_buff. The new one is not owned by a socket. Both | ||
319 | * copies share the same packet data but not structure. The new | ||
320 | * buffer has a reference count of 1. If the allocation fails the | ||
321 | * function returns %NULL otherwise the new buffer is returned. | ||
322 | * | ||
323 | * If this function is called from an interrupt gfp_mask() must be | ||
324 | * %GFP_ATOMIC. | ||
325 | */ | ||
326 | |||
327 | struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) | ||
328 | { | ||
329 | struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | ||
330 | |||
331 | if (!n) | ||
332 | return NULL; | ||
333 | |||
334 | #define C(x) n->x = skb->x | ||
335 | |||
336 | n->next = n->prev = NULL; | ||
337 | n->list = NULL; | ||
338 | n->sk = NULL; | ||
339 | C(stamp); | ||
340 | C(dev); | ||
341 | C(real_dev); | ||
342 | C(h); | ||
343 | C(nh); | ||
344 | C(mac); | ||
345 | C(dst); | ||
346 | dst_clone(skb->dst); | ||
347 | C(sp); | ||
348 | #ifdef CONFIG_INET | ||
349 | secpath_get(skb->sp); | ||
350 | #endif | ||
351 | memcpy(n->cb, skb->cb, sizeof(skb->cb)); | ||
352 | C(len); | ||
353 | C(data_len); | ||
354 | C(csum); | ||
355 | C(local_df); | ||
356 | n->cloned = 1; | ||
357 | n->nohdr = 0; | ||
358 | C(pkt_type); | ||
359 | C(ip_summed); | ||
360 | C(priority); | ||
361 | C(protocol); | ||
362 | C(security); | ||
363 | n->destructor = NULL; | ||
364 | #ifdef CONFIG_NETFILTER | ||
365 | C(nfmark); | ||
366 | C(nfcache); | ||
367 | C(nfct); | ||
368 | nf_conntrack_get(skb->nfct); | ||
369 | C(nfctinfo); | ||
370 | #ifdef CONFIG_NETFILTER_DEBUG | ||
371 | C(nf_debug); | ||
372 | #endif | ||
373 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
374 | C(nf_bridge); | ||
375 | nf_bridge_get(skb->nf_bridge); | ||
376 | #endif | ||
377 | #endif /*CONFIG_NETFILTER*/ | ||
378 | #if defined(CONFIG_HIPPI) | ||
379 | C(private); | ||
380 | #endif | ||
381 | #ifdef CONFIG_NET_SCHED | ||
382 | C(tc_index); | ||
383 | #ifdef CONFIG_NET_CLS_ACT | ||
384 | n->tc_verd = SET_TC_VERD(skb->tc_verd,0); | ||
385 | n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); | ||
386 | n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); | ||
387 | C(input_dev); | ||
388 | C(tc_classid); | ||
389 | #endif | ||
390 | |||
391 | #endif | ||
392 | C(truesize); | ||
393 | atomic_set(&n->users, 1); | ||
394 | C(head); | ||
395 | C(data); | ||
396 | C(tail); | ||
397 | C(end); | ||
398 | |||
399 | atomic_inc(&(skb_shinfo(skb)->dataref)); | ||
400 | skb->cloned = 1; | ||
401 | |||
402 | return n; | ||
403 | } | ||
404 | |||
405 | static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | ||
406 | { | ||
407 | /* | ||
408 | * Shift between the two data areas in bytes | ||
409 | */ | ||
410 | unsigned long offset = new->data - old->data; | ||
411 | |||
412 | new->list = NULL; | ||
413 | new->sk = NULL; | ||
414 | new->dev = old->dev; | ||
415 | new->real_dev = old->real_dev; | ||
416 | new->priority = old->priority; | ||
417 | new->protocol = old->protocol; | ||
418 | new->dst = dst_clone(old->dst); | ||
419 | #ifdef CONFIG_INET | ||
420 | new->sp = secpath_get(old->sp); | ||
421 | #endif | ||
422 | new->h.raw = old->h.raw + offset; | ||
423 | new->nh.raw = old->nh.raw + offset; | ||
424 | new->mac.raw = old->mac.raw + offset; | ||
425 | memcpy(new->cb, old->cb, sizeof(old->cb)); | ||
426 | new->local_df = old->local_df; | ||
427 | new->pkt_type = old->pkt_type; | ||
428 | new->stamp = old->stamp; | ||
429 | new->destructor = NULL; | ||
430 | new->security = old->security; | ||
431 | #ifdef CONFIG_NETFILTER | ||
432 | new->nfmark = old->nfmark; | ||
433 | new->nfcache = old->nfcache; | ||
434 | new->nfct = old->nfct; | ||
435 | nf_conntrack_get(old->nfct); | ||
436 | new->nfctinfo = old->nfctinfo; | ||
437 | #ifdef CONFIG_NETFILTER_DEBUG | ||
438 | new->nf_debug = old->nf_debug; | ||
439 | #endif | ||
440 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
441 | new->nf_bridge = old->nf_bridge; | ||
442 | nf_bridge_get(old->nf_bridge); | ||
443 | #endif | ||
444 | #endif | ||
445 | #ifdef CONFIG_NET_SCHED | ||
446 | #ifdef CONFIG_NET_CLS_ACT | ||
447 | new->tc_verd = old->tc_verd; | ||
448 | #endif | ||
449 | new->tc_index = old->tc_index; | ||
450 | #endif | ||
451 | atomic_set(&new->users, 1); | ||
452 | skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; | ||
453 | skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * skb_copy - create private copy of an sk_buff | ||
458 | * @skb: buffer to copy | ||
459 | * @gfp_mask: allocation priority | ||
460 | * | ||
461 | * Make a copy of both an &sk_buff and its data. This is used when the | ||
462 | * caller wishes to modify the data and needs a private copy of the | ||
463 | * data to alter. Returns %NULL on failure or the pointer to the buffer | ||
464 | * on success. The returned buffer has a reference count of 1. | ||
465 | * | ||
466 | * As by-product this function converts non-linear &sk_buff to linear | ||
467 | * one, so that &sk_buff becomes completely private and caller is allowed | ||
468 | * to modify all the data of returned buffer. This means that this | ||
469 | * function is not recommended for use in circumstances when only | ||
470 | * header is going to be modified. Use pskb_copy() instead. | ||
471 | */ | ||
472 | |||
473 | struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) | ||
474 | { | ||
475 | int headerlen = skb->data - skb->head; | ||
476 | /* | ||
477 | * Allocate the copy buffer | ||
478 | */ | ||
479 | struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, | ||
480 | gfp_mask); | ||
481 | if (!n) | ||
482 | return NULL; | ||
483 | |||
484 | /* Set the data pointer */ | ||
485 | skb_reserve(n, headerlen); | ||
486 | /* Set the tail pointer and length */ | ||
487 | skb_put(n, skb->len); | ||
488 | n->csum = skb->csum; | ||
489 | n->ip_summed = skb->ip_summed; | ||
490 | |||
491 | if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) | ||
492 | BUG(); | ||
493 | |||
494 | copy_skb_header(n, skb); | ||
495 | return n; | ||
496 | } | ||
497 | |||
498 | |||
499 | /** | ||
500 | * pskb_copy - create copy of an sk_buff with private head. | ||
501 | * @skb: buffer to copy | ||
502 | * @gfp_mask: allocation priority | ||
503 | * | ||
504 | * Make a copy of both an &sk_buff and part of its data, located | ||
505 | * in header. Fragmented data remain shared. This is used when | ||
506 | * the caller wishes to modify only header of &sk_buff and needs | ||
507 | * private copy of the header to alter. Returns %NULL on failure | ||
508 | * or the pointer to the buffer on success. | ||
509 | * The returned buffer has a reference count of 1. | ||
510 | */ | ||
511 | |||
512 | struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) | ||
513 | { | ||
514 | /* | ||
515 | * Allocate the copy buffer | ||
516 | */ | ||
517 | struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); | ||
518 | |||
519 | if (!n) | ||
520 | goto out; | ||
521 | |||
522 | /* Set the data pointer */ | ||
523 | skb_reserve(n, skb->data - skb->head); | ||
524 | /* Set the tail pointer and length */ | ||
525 | skb_put(n, skb_headlen(skb)); | ||
526 | /* Copy the bytes */ | ||
527 | memcpy(n->data, skb->data, n->len); | ||
528 | n->csum = skb->csum; | ||
529 | n->ip_summed = skb->ip_summed; | ||
530 | |||
531 | n->data_len = skb->data_len; | ||
532 | n->len = skb->len; | ||
533 | |||
534 | if (skb_shinfo(skb)->nr_frags) { | ||
535 | int i; | ||
536 | |||
537 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
538 | skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; | ||
539 | get_page(skb_shinfo(n)->frags[i].page); | ||
540 | } | ||
541 | skb_shinfo(n)->nr_frags = i; | ||
542 | } | ||
543 | |||
544 | if (skb_shinfo(skb)->frag_list) { | ||
545 | skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; | ||
546 | skb_clone_fraglist(n); | ||
547 | } | ||
548 | |||
549 | copy_skb_header(n, skb); | ||
550 | out: | ||
551 | return n; | ||
552 | } | ||
553 | |||
554 | /** | ||
555 | * pskb_expand_head - reallocate header of &sk_buff | ||
556 | * @skb: buffer to reallocate | ||
557 | * @nhead: room to add at head | ||
558 | * @ntail: room to add at tail | ||
559 | * @gfp_mask: allocation priority | ||
560 | * | ||
561 | * Expands (or creates identical copy, if &nhead and &ntail are zero) | ||
562 | * header of skb. &sk_buff itself is not changed. &sk_buff MUST have | ||
563 | * reference count of 1. Returns zero in the case of success or error, | ||
564 | * if expansion failed. In the last case, &sk_buff is not changed. | ||
565 | * | ||
566 | * All the pointers pointing into skb header may change and must be | ||
567 | * reloaded after call to this function. | ||
568 | */ | ||
569 | |||
570 | int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) | ||
571 | { | ||
572 | int i; | ||
573 | u8 *data; | ||
574 | int size = nhead + (skb->end - skb->head) + ntail; | ||
575 | long off; | ||
576 | |||
577 | if (skb_shared(skb)) | ||
578 | BUG(); | ||
579 | |||
580 | size = SKB_DATA_ALIGN(size); | ||
581 | |||
582 | data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); | ||
583 | if (!data) | ||
584 | goto nodata; | ||
585 | |||
586 | /* Copy only real data... and, alas, header. This should be | ||
587 | * optimized for the cases when header is void. */ | ||
588 | memcpy(data + nhead, skb->head, skb->tail - skb->head); | ||
589 | memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); | ||
590 | |||
591 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
592 | get_page(skb_shinfo(skb)->frags[i].page); | ||
593 | |||
594 | if (skb_shinfo(skb)->frag_list) | ||
595 | skb_clone_fraglist(skb); | ||
596 | |||
597 | skb_release_data(skb); | ||
598 | |||
599 | off = (data + nhead) - skb->head; | ||
600 | |||
601 | skb->head = data; | ||
602 | skb->end = data + size; | ||
603 | skb->data += off; | ||
604 | skb->tail += off; | ||
605 | skb->mac.raw += off; | ||
606 | skb->h.raw += off; | ||
607 | skb->nh.raw += off; | ||
608 | skb->cloned = 0; | ||
609 | skb->nohdr = 0; | ||
610 | atomic_set(&skb_shinfo(skb)->dataref, 1); | ||
611 | return 0; | ||
612 | |||
613 | nodata: | ||
614 | return -ENOMEM; | ||
615 | } | ||
616 | |||
617 | /* Make private copy of skb with writable head and some headroom */ | ||
618 | |||
619 | struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) | ||
620 | { | ||
621 | struct sk_buff *skb2; | ||
622 | int delta = headroom - skb_headroom(skb); | ||
623 | |||
624 | if (delta <= 0) | ||
625 | skb2 = pskb_copy(skb, GFP_ATOMIC); | ||
626 | else { | ||
627 | skb2 = skb_clone(skb, GFP_ATOMIC); | ||
628 | if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, | ||
629 | GFP_ATOMIC)) { | ||
630 | kfree_skb(skb2); | ||
631 | skb2 = NULL; | ||
632 | } | ||
633 | } | ||
634 | return skb2; | ||
635 | } | ||
636 | |||
637 | |||
638 | /** | ||
639 | * skb_copy_expand - copy and expand sk_buff | ||
640 | * @skb: buffer to copy | ||
641 | * @newheadroom: new free bytes at head | ||
642 | * @newtailroom: new free bytes at tail | ||
643 | * @gfp_mask: allocation priority | ||
644 | * | ||
645 | * Make a copy of both an &sk_buff and its data and while doing so | ||
646 | * allocate additional space. | ||
647 | * | ||
648 | * This is used when the caller wishes to modify the data and needs a | ||
649 | * private copy of the data to alter as well as more space for new fields. | ||
650 | * Returns %NULL on failure or the pointer to the buffer | ||
651 | * on success. The returned buffer has a reference count of 1. | ||
652 | * | ||
653 | * You must pass %GFP_ATOMIC as the allocation priority if this function | ||
654 | * is called from an interrupt. | ||
655 | * | ||
656 | * BUG ALERT: ip_summed is not copied. Why does this work? Is it used | ||
657 | * only by netfilter in the cases when checksum is recalculated? --ANK | ||
658 | */ | ||
659 | struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | ||
660 | int newheadroom, int newtailroom, int gfp_mask) | ||
661 | { | ||
662 | /* | ||
663 | * Allocate the copy buffer | ||
664 | */ | ||
665 | struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, | ||
666 | gfp_mask); | ||
667 | int head_copy_len, head_copy_off; | ||
668 | |||
669 | if (!n) | ||
670 | return NULL; | ||
671 | |||
672 | skb_reserve(n, newheadroom); | ||
673 | |||
674 | /* Set the tail pointer and length */ | ||
675 | skb_put(n, skb->len); | ||
676 | |||
677 | head_copy_len = skb_headroom(skb); | ||
678 | head_copy_off = 0; | ||
679 | if (newheadroom <= head_copy_len) | ||
680 | head_copy_len = newheadroom; | ||
681 | else | ||
682 | head_copy_off = newheadroom - head_copy_len; | ||
683 | |||
684 | /* Copy the linear header and data. */ | ||
685 | if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, | ||
686 | skb->len + head_copy_len)) | ||
687 | BUG(); | ||
688 | |||
689 | copy_skb_header(n, skb); | ||
690 | |||
691 | return n; | ||
692 | } | ||
693 | |||
694 | /** | ||
695 | * skb_pad - zero pad the tail of an skb | ||
696 | * @skb: buffer to pad | ||
697 | * @pad: space to pad | ||
698 | * | ||
699 | * Ensure that a buffer is followed by a padding area that is zero | ||
700 | * filled. Used by network drivers which may DMA or transfer data | ||
701 | * beyond the buffer end onto the wire. | ||
702 | * | ||
703 | * May return NULL in out of memory cases. | ||
704 | */ | ||
705 | |||
706 | struct sk_buff *skb_pad(struct sk_buff *skb, int pad) | ||
707 | { | ||
708 | struct sk_buff *nskb; | ||
709 | |||
710 | /* If the skbuff is non linear tailroom is always zero.. */ | ||
711 | if (skb_tailroom(skb) >= pad) { | ||
712 | memset(skb->data+skb->len, 0, pad); | ||
713 | return skb; | ||
714 | } | ||
715 | |||
716 | nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); | ||
717 | kfree_skb(skb); | ||
718 | if (nskb) | ||
719 | memset(nskb->data+nskb->len, 0, pad); | ||
720 | return nskb; | ||
721 | } | ||
722 | |||
723 | /* Trims skb to length len. It can change skb pointers, if "realloc" is 1. | ||
724 | * If realloc==0 and trimming is impossible without change of data, | ||
725 | * it is BUG(). | ||
726 | */ | ||
727 | |||
728 | int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) | ||
729 | { | ||
730 | int offset = skb_headlen(skb); | ||
731 | int nfrags = skb_shinfo(skb)->nr_frags; | ||
732 | int i; | ||
733 | |||
734 | for (i = 0; i < nfrags; i++) { | ||
735 | int end = offset + skb_shinfo(skb)->frags[i].size; | ||
736 | if (end > len) { | ||
737 | if (skb_cloned(skb)) { | ||
738 | if (!realloc) | ||
739 | BUG(); | ||
740 | if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | ||
741 | return -ENOMEM; | ||
742 | } | ||
743 | if (len <= offset) { | ||
744 | put_page(skb_shinfo(skb)->frags[i].page); | ||
745 | skb_shinfo(skb)->nr_frags--; | ||
746 | } else { | ||
747 | skb_shinfo(skb)->frags[i].size = len - offset; | ||
748 | } | ||
749 | } | ||
750 | offset = end; | ||
751 | } | ||
752 | |||
753 | if (offset < len) { | ||
754 | skb->data_len -= skb->len - len; | ||
755 | skb->len = len; | ||
756 | } else { | ||
757 | if (len <= skb_headlen(skb)) { | ||
758 | skb->len = len; | ||
759 | skb->data_len = 0; | ||
760 | skb->tail = skb->data + len; | ||
761 | if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) | ||
762 | skb_drop_fraglist(skb); | ||
763 | } else { | ||
764 | skb->data_len -= skb->len - len; | ||
765 | skb->len = len; | ||
766 | } | ||
767 | } | ||
768 | |||
769 | return 0; | ||
770 | } | ||
771 | |||
772 | /** | ||
773 | * __pskb_pull_tail - advance tail of skb header | ||
774 | * @skb: buffer to reallocate | ||
775 | * @delta: number of bytes to advance tail | ||
776 | * | ||
777 | * The function makes a sense only on a fragmented &sk_buff, | ||
778 | * it expands header moving its tail forward and copying necessary | ||
779 | * data from fragmented part. | ||
780 | * | ||
781 | * &sk_buff MUST have reference count of 1. | ||
782 | * | ||
783 | * Returns %NULL (and &sk_buff does not change) if pull failed | ||
784 | * or value of new tail of skb in the case of success. | ||
785 | * | ||
786 | * All the pointers pointing into skb header may change and must be | ||
787 | * reloaded after call to this function. | ||
788 | */ | ||
789 | |||
790 | /* Moves tail of skb head forward, copying data from fragmented part, | ||
791 | * when it is necessary. | ||
792 | * 1. It may fail due to malloc failure. | ||
793 | * 2. It may change skb pointers. | ||
794 | * | ||
795 | * It is pretty complicated. Luckily, it is called only in exceptional cases. | ||
796 | */ | ||
797 | unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) | ||
798 | { | ||
799 | /* If skb has not enough free space at tail, get new one | ||
800 | * plus 128 bytes for future expansions. If we have enough | ||
801 | * room at tail, reallocate without expansion only if skb is cloned. | ||
802 | */ | ||
803 | int i, k, eat = (skb->tail + delta) - skb->end; | ||
804 | |||
805 | if (eat > 0 || skb_cloned(skb)) { | ||
806 | if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, | ||
807 | GFP_ATOMIC)) | ||
808 | return NULL; | ||
809 | } | ||
810 | |||
811 | if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) | ||
812 | BUG(); | ||
813 | |||
814 | /* Optimization: no fragments, no reasons to preestimate | ||
815 | * size of pulled pages. Superb. | ||
816 | */ | ||
817 | if (!skb_shinfo(skb)->frag_list) | ||
818 | goto pull_pages; | ||
819 | |||
820 | /* Estimate size of pulled pages. */ | ||
821 | eat = delta; | ||
822 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
823 | if (skb_shinfo(skb)->frags[i].size >= eat) | ||
824 | goto pull_pages; | ||
825 | eat -= skb_shinfo(skb)->frags[i].size; | ||
826 | } | ||
827 | |||
828 | /* If we need update frag list, we are in troubles. | ||
829 | * Certainly, it possible to add an offset to skb data, | ||
830 | * but taking into account that pulling is expected to | ||
831 | * be very rare operation, it is worth to fight against | ||
832 | * further bloating skb head and crucify ourselves here instead. | ||
833 | * Pure masohism, indeed. 8)8) | ||
834 | */ | ||
835 | if (eat) { | ||
836 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | ||
837 | struct sk_buff *clone = NULL; | ||
838 | struct sk_buff *insp = NULL; | ||
839 | |||
840 | do { | ||
841 | if (!list) | ||
842 | BUG(); | ||
843 | |||
844 | if (list->len <= eat) { | ||
845 | /* Eaten as whole. */ | ||
846 | eat -= list->len; | ||
847 | list = list->next; | ||
848 | insp = list; | ||
849 | } else { | ||
850 | /* Eaten partially. */ | ||
851 | |||
852 | if (skb_shared(list)) { | ||
853 | /* Sucks! We need to fork list. :-( */ | ||
854 | clone = skb_clone(list, GFP_ATOMIC); | ||
855 | if (!clone) | ||
856 | return NULL; | ||
857 | insp = list->next; | ||
858 | list = clone; | ||
859 | } else { | ||
860 | /* This may be pulled without | ||
861 | * problems. */ | ||
862 | insp = list; | ||
863 | } | ||
864 | if (!pskb_pull(list, eat)) { | ||
865 | if (clone) | ||
866 | kfree_skb(clone); | ||
867 | return NULL; | ||
868 | } | ||
869 | break; | ||
870 | } | ||
871 | } while (eat); | ||
872 | |||
873 | /* Free pulled out fragments. */ | ||
874 | while ((list = skb_shinfo(skb)->frag_list) != insp) { | ||
875 | skb_shinfo(skb)->frag_list = list->next; | ||
876 | kfree_skb(list); | ||
877 | } | ||
878 | /* And insert new clone at head. */ | ||
879 | if (clone) { | ||
880 | clone->next = list; | ||
881 | skb_shinfo(skb)->frag_list = clone; | ||
882 | } | ||
883 | } | ||
884 | /* Success! Now we may commit changes to skb data. */ | ||
885 | |||
886 | pull_pages: | ||
887 | eat = delta; | ||
888 | k = 0; | ||
889 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
890 | if (skb_shinfo(skb)->frags[i].size <= eat) { | ||
891 | put_page(skb_shinfo(skb)->frags[i].page); | ||
892 | eat -= skb_shinfo(skb)->frags[i].size; | ||
893 | } else { | ||
894 | skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; | ||
895 | if (eat) { | ||
896 | skb_shinfo(skb)->frags[k].page_offset += eat; | ||
897 | skb_shinfo(skb)->frags[k].size -= eat; | ||
898 | eat = 0; | ||
899 | } | ||
900 | k++; | ||
901 | } | ||
902 | } | ||
903 | skb_shinfo(skb)->nr_frags = k; | ||
904 | |||
905 | skb->tail += delta; | ||
906 | skb->data_len -= delta; | ||
907 | |||
908 | return skb->tail; | ||
909 | } | ||
910 | |||
911 | /* Copy some data bits from skb to kernel buffer. */ | ||
912 | |||
913 | int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) | ||
914 | { | ||
915 | int i, copy; | ||
916 | int start = skb_headlen(skb); | ||
917 | |||
918 | if (offset > (int)skb->len - len) | ||
919 | goto fault; | ||
920 | |||
921 | /* Copy header. */ | ||
922 | if ((copy = start - offset) > 0) { | ||
923 | if (copy > len) | ||
924 | copy = len; | ||
925 | memcpy(to, skb->data + offset, copy); | ||
926 | if ((len -= copy) == 0) | ||
927 | return 0; | ||
928 | offset += copy; | ||
929 | to += copy; | ||
930 | } | ||
931 | |||
932 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
933 | int end; | ||
934 | |||
935 | BUG_TRAP(start <= offset + len); | ||
936 | |||
937 | end = start + skb_shinfo(skb)->frags[i].size; | ||
938 | if ((copy = end - offset) > 0) { | ||
939 | u8 *vaddr; | ||
940 | |||
941 | if (copy > len) | ||
942 | copy = len; | ||
943 | |||
944 | vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); | ||
945 | memcpy(to, | ||
946 | vaddr + skb_shinfo(skb)->frags[i].page_offset+ | ||
947 | offset - start, copy); | ||
948 | kunmap_skb_frag(vaddr); | ||
949 | |||
950 | if ((len -= copy) == 0) | ||
951 | return 0; | ||
952 | offset += copy; | ||
953 | to += copy; | ||
954 | } | ||
955 | start = end; | ||
956 | } | ||
957 | |||
958 | if (skb_shinfo(skb)->frag_list) { | ||
959 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | ||
960 | |||
961 | for (; list; list = list->next) { | ||
962 | int end; | ||
963 | |||
964 | BUG_TRAP(start <= offset + len); | ||
965 | |||
966 | end = start + list->len; | ||
967 | if ((copy = end - offset) > 0) { | ||
968 | if (copy > len) | ||
969 | copy = len; | ||
970 | if (skb_copy_bits(list, offset - start, | ||
971 | to, copy)) | ||
972 | goto fault; | ||
973 | if ((len -= copy) == 0) | ||
974 | return 0; | ||
975 | offset += copy; | ||
976 | to += copy; | ||
977 | } | ||
978 | start = end; | ||
979 | } | ||
980 | } | ||
981 | if (!len) | ||
982 | return 0; | ||
983 | |||
984 | fault: | ||
985 | return -EFAULT; | ||
986 | } | ||
987 | |||
988 | /* Checksum skb data. */ | ||
989 | |||
990 | unsigned int skb_checksum(const struct sk_buff *skb, int offset, | ||
991 | int len, unsigned int csum) | ||
992 | { | ||
993 | int start = skb_headlen(skb); | ||
994 | int i, copy = start - offset; | ||
995 | int pos = 0; | ||
996 | |||
997 | /* Checksum header. */ | ||
998 | if (copy > 0) { | ||
999 | if (copy > len) | ||
1000 | copy = len; | ||
1001 | csum = csum_partial(skb->data + offset, copy, csum); | ||
1002 | if ((len -= copy) == 0) | ||
1003 | return csum; | ||
1004 | offset += copy; | ||
1005 | pos = copy; | ||
1006 | } | ||
1007 | |||
1008 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
1009 | int end; | ||
1010 | |||
1011 | BUG_TRAP(start <= offset + len); | ||
1012 | |||
1013 | end = start + skb_shinfo(skb)->frags[i].size; | ||
1014 | if ((copy = end - offset) > 0) { | ||
1015 | unsigned int csum2; | ||
1016 | u8 *vaddr; | ||
1017 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | ||
1018 | |||
1019 | if (copy > len) | ||
1020 | copy = len; | ||
1021 | vaddr = kmap_skb_frag(frag); | ||
1022 | csum2 = csum_partial(vaddr + frag->page_offset + | ||
1023 | offset - start, copy, 0); | ||
1024 | kunmap_skb_frag(vaddr); | ||
1025 | csum = csum_block_add(csum, csum2, pos); | ||
1026 | if (!(len -= copy)) | ||
1027 | return csum; | ||
1028 | offset += copy; | ||
1029 | pos += copy; | ||
1030 | } | ||
1031 | start = end; | ||
1032 | } | ||
1033 | |||
1034 | if (skb_shinfo(skb)->frag_list) { | ||
1035 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | ||
1036 | |||
1037 | for (; list; list = list->next) { | ||
1038 | int end; | ||
1039 | |||
1040 | BUG_TRAP(start <= offset + len); | ||
1041 | |||
1042 | end = start + list->len; | ||
1043 | if ((copy = end - offset) > 0) { | ||
1044 | unsigned int csum2; | ||
1045 | if (copy > len) | ||
1046 | copy = len; | ||
1047 | csum2 = skb_checksum(list, offset - start, | ||
1048 | copy, 0); | ||
1049 | csum = csum_block_add(csum, csum2, pos); | ||
1050 | if ((len -= copy) == 0) | ||
1051 | return csum; | ||
1052 | offset += copy; | ||
1053 | pos += copy; | ||
1054 | } | ||
1055 | start = end; | ||
1056 | } | ||
1057 | } | ||
1058 | if (len) | ||
1059 | BUG(); | ||
1060 | |||
1061 | return csum; | ||
1062 | } | ||
1063 | |||
1064 | /* Both of above in one bottle. */ | ||
1065 | |||
1066 | unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, | ||
1067 | u8 *to, int len, unsigned int csum) | ||
1068 | { | ||
1069 | int start = skb_headlen(skb); | ||
1070 | int i, copy = start - offset; | ||
1071 | int pos = 0; | ||
1072 | |||
1073 | /* Copy header. */ | ||
1074 | if (copy > 0) { | ||
1075 | if (copy > len) | ||
1076 | copy = len; | ||
1077 | csum = csum_partial_copy_nocheck(skb->data + offset, to, | ||
1078 | copy, csum); | ||
1079 | if ((len -= copy) == 0) | ||
1080 | return csum; | ||
1081 | offset += copy; | ||
1082 | to += copy; | ||
1083 | pos = copy; | ||
1084 | } | ||
1085 | |||
1086 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
1087 | int end; | ||
1088 | |||
1089 | BUG_TRAP(start <= offset + len); | ||
1090 | |||
1091 | end = start + skb_shinfo(skb)->frags[i].size; | ||
1092 | if ((copy = end - offset) > 0) { | ||
1093 | unsigned int csum2; | ||
1094 | u8 *vaddr; | ||
1095 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | ||
1096 | |||
1097 | if (copy > len) | ||
1098 | copy = len; | ||
1099 | vaddr = kmap_skb_frag(frag); | ||
1100 | csum2 = csum_partial_copy_nocheck(vaddr + | ||
1101 | frag->page_offset + | ||
1102 | offset - start, to, | ||
1103 | copy, 0); | ||
1104 | kunmap_skb_frag(vaddr); | ||
1105 | csum = csum_block_add(csum, csum2, pos); | ||
1106 | if (!(len -= copy)) | ||
1107 | return csum; | ||
1108 | offset += copy; | ||
1109 | to += copy; | ||
1110 | pos += copy; | ||
1111 | } | ||
1112 | start = end; | ||
1113 | } | ||
1114 | |||
1115 | if (skb_shinfo(skb)->frag_list) { | ||
1116 | struct sk_buff *list = skb_shinfo(skb)->frag_list; | ||
1117 | |||
1118 | for (; list; list = list->next) { | ||
1119 | unsigned int csum2; | ||
1120 | int end; | ||
1121 | |||
1122 | BUG_TRAP(start <= offset + len); | ||
1123 | |||
1124 | end = start + list->len; | ||
1125 | if ((copy = end - offset) > 0) { | ||
1126 | if (copy > len) | ||
1127 | copy = len; | ||
1128 | csum2 = skb_copy_and_csum_bits(list, | ||
1129 | offset - start, | ||
1130 | to, copy, 0); | ||
1131 | csum = csum_block_add(csum, csum2, pos); | ||
1132 | if ((len -= copy) == 0) | ||
1133 | return csum; | ||
1134 | offset += copy; | ||
1135 | to += copy; | ||
1136 | pos += copy; | ||
1137 | } | ||
1138 | start = end; | ||
1139 | } | ||
1140 | } | ||
1141 | if (len) | ||
1142 | BUG(); | ||
1143 | return csum; | ||
1144 | } | ||
1145 | |||
1146 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) | ||
1147 | { | ||
1148 | unsigned int csum; | ||
1149 | long csstart; | ||
1150 | |||
1151 | if (skb->ip_summed == CHECKSUM_HW) | ||
1152 | csstart = skb->h.raw - skb->data; | ||
1153 | else | ||
1154 | csstart = skb_headlen(skb); | ||
1155 | |||
1156 | if (csstart > skb_headlen(skb)) | ||
1157 | BUG(); | ||
1158 | |||
1159 | memcpy(to, skb->data, csstart); | ||
1160 | |||
1161 | csum = 0; | ||
1162 | if (csstart != skb->len) | ||
1163 | csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, | ||
1164 | skb->len - csstart, 0); | ||
1165 | |||
1166 | if (skb->ip_summed == CHECKSUM_HW) { | ||
1167 | long csstuff = csstart + skb->csum; | ||
1168 | |||
1169 | *((unsigned short *)(to + csstuff)) = csum_fold(csum); | ||
1170 | } | ||
1171 | } | ||
1172 | |||
1173 | /** | ||
1174 | * skb_dequeue - remove from the head of the queue | ||
1175 | * @list: list to dequeue from | ||
1176 | * | ||
1177 | * Remove the head of the list. The list lock is taken so the function | ||
1178 | * may be used safely with other locking list functions. The head item is | ||
1179 | * returned or %NULL if the list is empty. | ||
1180 | */ | ||
1181 | |||
1182 | struct sk_buff *skb_dequeue(struct sk_buff_head *list) | ||
1183 | { | ||
1184 | unsigned long flags; | ||
1185 | struct sk_buff *result; | ||
1186 | |||
1187 | spin_lock_irqsave(&list->lock, flags); | ||
1188 | result = __skb_dequeue(list); | ||
1189 | spin_unlock_irqrestore(&list->lock, flags); | ||
1190 | return result; | ||
1191 | } | ||
1192 | |||
1193 | /** | ||
1194 | * skb_dequeue_tail - remove from the tail of the queue | ||
1195 | * @list: list to dequeue from | ||
1196 | * | ||
1197 | * Remove the tail of the list. The list lock is taken so the function | ||
1198 | * may be used safely with other locking list functions. The tail item is | ||
1199 | * returned or %NULL if the list is empty. | ||
1200 | */ | ||
1201 | struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) | ||
1202 | { | ||
1203 | unsigned long flags; | ||
1204 | struct sk_buff *result; | ||
1205 | |||
1206 | spin_lock_irqsave(&list->lock, flags); | ||
1207 | result = __skb_dequeue_tail(list); | ||
1208 | spin_unlock_irqrestore(&list->lock, flags); | ||
1209 | return result; | ||
1210 | } | ||
1211 | |||
1212 | /** | ||
1213 | * skb_queue_purge - empty a list | ||
1214 | * @list: list to empty | ||
1215 | * | ||
1216 | * Delete all buffers on an &sk_buff list. Each buffer is removed from | ||
1217 | * the list and one reference dropped. This function takes the list | ||
1218 | * lock and is atomic with respect to other list locking functions. | ||
1219 | */ | ||
1220 | void skb_queue_purge(struct sk_buff_head *list) | ||
1221 | { | ||
1222 | struct sk_buff *skb; | ||
1223 | while ((skb = skb_dequeue(list)) != NULL) | ||
1224 | kfree_skb(skb); | ||
1225 | } | ||
1226 | |||
1227 | /** | ||
1228 | * skb_queue_head - queue a buffer at the list head | ||
1229 | * @list: list to use | ||
1230 | * @newsk: buffer to queue | ||
1231 | * | ||
1232 | * Queue a buffer at the start of the list. This function takes the | ||
1233 | * list lock and can be used safely with other locking &sk_buff functions | ||
1234 | * safely. | ||
1235 | * | ||
1236 | * A buffer cannot be placed on two lists at the same time. | ||
1237 | */ | ||
1238 | void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) | ||
1239 | { | ||
1240 | unsigned long flags; | ||
1241 | |||
1242 | spin_lock_irqsave(&list->lock, flags); | ||
1243 | __skb_queue_head(list, newsk); | ||
1244 | spin_unlock_irqrestore(&list->lock, flags); | ||
1245 | } | ||
1246 | |||
1247 | /** | ||
1248 | * skb_queue_tail - queue a buffer at the list tail | ||
1249 | * @list: list to use | ||
1250 | * @newsk: buffer to queue | ||
1251 | * | ||
1252 | * Queue a buffer at the tail of the list. This function takes the | ||
1253 | * list lock and can be used safely with other locking &sk_buff functions | ||
1254 | * safely. | ||
1255 | * | ||
1256 | * A buffer cannot be placed on two lists at the same time. | ||
1257 | */ | ||
1258 | void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) | ||
1259 | { | ||
1260 | unsigned long flags; | ||
1261 | |||
1262 | spin_lock_irqsave(&list->lock, flags); | ||
1263 | __skb_queue_tail(list, newsk); | ||
1264 | spin_unlock_irqrestore(&list->lock, flags); | ||
1265 | } | ||
1266 | /** | ||
1267 | * skb_unlink - remove a buffer from a list | ||
1268 | * @skb: buffer to remove | ||
1269 | * | ||
1270 | * Place a packet after a given packet in a list. The list locks are taken | ||
1271 | * and this function is atomic with respect to other list locked calls | ||
1272 | * | ||
1273 | * Works even without knowing the list it is sitting on, which can be | ||
1274 | * handy at times. It also means that THE LIST MUST EXIST when you | ||
1275 | * unlink. Thus a list must have its contents unlinked before it is | ||
1276 | * destroyed. | ||
1277 | */ | ||
1278 | void skb_unlink(struct sk_buff *skb) | ||
1279 | { | ||
1280 | struct sk_buff_head *list = skb->list; | ||
1281 | |||
1282 | if (list) { | ||
1283 | unsigned long flags; | ||
1284 | |||
1285 | spin_lock_irqsave(&list->lock, flags); | ||
1286 | if (skb->list == list) | ||
1287 | __skb_unlink(skb, skb->list); | ||
1288 | spin_unlock_irqrestore(&list->lock, flags); | ||
1289 | } | ||
1290 | } | ||
1291 | |||
1292 | |||
1293 | /** | ||
1294 | * skb_append - append a buffer | ||
1295 | * @old: buffer to insert after | ||
1296 | * @newsk: buffer to insert | ||
1297 | * | ||
1298 | * Place a packet after a given packet in a list. The list locks are taken | ||
1299 | * and this function is atomic with respect to other list locked calls. | ||
1300 | * A buffer cannot be placed on two lists at the same time. | ||
1301 | */ | ||
1302 | |||
1303 | void skb_append(struct sk_buff *old, struct sk_buff *newsk) | ||
1304 | { | ||
1305 | unsigned long flags; | ||
1306 | |||
1307 | spin_lock_irqsave(&old->list->lock, flags); | ||
1308 | __skb_append(old, newsk); | ||
1309 | spin_unlock_irqrestore(&old->list->lock, flags); | ||
1310 | } | ||
1311 | |||
1312 | |||
1313 | /** | ||
1314 | * skb_insert - insert a buffer | ||
1315 | * @old: buffer to insert before | ||
1316 | * @newsk: buffer to insert | ||
1317 | * | ||
1318 | * Place a packet before a given packet in a list. The list locks are taken | ||
1319 | * and this function is atomic with respect to other list locked calls | ||
1320 | * A buffer cannot be placed on two lists at the same time. | ||
1321 | */ | ||
1322 | |||
1323 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk) | ||
1324 | { | ||
1325 | unsigned long flags; | ||
1326 | |||
1327 | spin_lock_irqsave(&old->list->lock, flags); | ||
1328 | __skb_insert(newsk, old->prev, old, old->list); | ||
1329 | spin_unlock_irqrestore(&old->list->lock, flags); | ||
1330 | } | ||
1331 | |||
1332 | #if 0 | ||
1333 | /* | ||
1334 | * Tune the memory allocator for a new MTU size. | ||
1335 | */ | ||
1336 | void skb_add_mtu(int mtu) | ||
1337 | { | ||
1338 | /* Must match allocation in alloc_skb */ | ||
1339 | mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); | ||
1340 | |||
1341 | kmem_add_cache_size(mtu); | ||
1342 | } | ||
1343 | #endif | ||
1344 | |||
1345 | static inline void skb_split_inside_header(struct sk_buff *skb, | ||
1346 | struct sk_buff* skb1, | ||
1347 | const u32 len, const int pos) | ||
1348 | { | ||
1349 | int i; | ||
1350 | |||
1351 | memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); | ||
1352 | |||
1353 | /* And move data appendix as is. */ | ||
1354 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
1355 | skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; | ||
1356 | |||
1357 | skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; | ||
1358 | skb_shinfo(skb)->nr_frags = 0; | ||
1359 | skb1->data_len = skb->data_len; | ||
1360 | skb1->len += skb1->data_len; | ||
1361 | skb->data_len = 0; | ||
1362 | skb->len = len; | ||
1363 | skb->tail = skb->data + len; | ||
1364 | } | ||
1365 | |||
1366 | static inline void skb_split_no_header(struct sk_buff *skb, | ||
1367 | struct sk_buff* skb1, | ||
1368 | const u32 len, int pos) | ||
1369 | { | ||
1370 | int i, k = 0; | ||
1371 | const int nfrags = skb_shinfo(skb)->nr_frags; | ||
1372 | |||
1373 | skb_shinfo(skb)->nr_frags = 0; | ||
1374 | skb1->len = skb1->data_len = skb->len - len; | ||
1375 | skb->len = len; | ||
1376 | skb->data_len = len - pos; | ||
1377 | |||
1378 | for (i = 0; i < nfrags; i++) { | ||
1379 | int size = skb_shinfo(skb)->frags[i].size; | ||
1380 | |||
1381 | if (pos + size > len) { | ||
1382 | skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; | ||
1383 | |||
1384 | if (pos < len) { | ||
1385 | /* Split frag. | ||
1386 | * We have two variants in this case: | ||
1387 | * 1. Move all the frag to the second | ||
1388 | * part, if it is possible. F.e. | ||
1389 | * this approach is mandatory for TUX, | ||
1390 | * where splitting is expensive. | ||
1391 | * 2. Split is accurately. We make this. | ||
1392 | */ | ||
1393 | get_page(skb_shinfo(skb)->frags[i].page); | ||
1394 | skb_shinfo(skb1)->frags[0].page_offset += len - pos; | ||
1395 | skb_shinfo(skb1)->frags[0].size -= len - pos; | ||
1396 | skb_shinfo(skb)->frags[i].size = len - pos; | ||
1397 | skb_shinfo(skb)->nr_frags++; | ||
1398 | } | ||
1399 | k++; | ||
1400 | } else | ||
1401 | skb_shinfo(skb)->nr_frags++; | ||
1402 | pos += size; | ||
1403 | } | ||
1404 | skb_shinfo(skb1)->nr_frags = k; | ||
1405 | } | ||
1406 | |||
1407 | /** | ||
1408 | * skb_split - Split fragmented skb to two parts at length len. | ||
1409 | * @skb: the buffer to split | ||
1410 | * @skb1: the buffer to receive the second part | ||
1411 | * @len: new length for skb | ||
1412 | */ | ||
1413 | void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) | ||
1414 | { | ||
1415 | int pos = skb_headlen(skb); | ||
1416 | |||
1417 | if (len < pos) /* Split line is inside header. */ | ||
1418 | skb_split_inside_header(skb, skb1, len, pos); | ||
1419 | else /* Second chunk has no header, nothing to copy. */ | ||
1420 | skb_split_no_header(skb, skb1, len, pos); | ||
1421 | } | ||
1422 | |||
1423 | void __init skb_init(void) | ||
1424 | { | ||
1425 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", | ||
1426 | sizeof(struct sk_buff), | ||
1427 | 0, | ||
1428 | SLAB_HWCACHE_ALIGN, | ||
1429 | NULL, NULL); | ||
1430 | if (!skbuff_head_cache) | ||
1431 | panic("cannot create skbuff cache"); | ||
1432 | } | ||
1433 | |||
1434 | EXPORT_SYMBOL(___pskb_trim); | ||
1435 | EXPORT_SYMBOL(__kfree_skb); | ||
1436 | EXPORT_SYMBOL(__pskb_pull_tail); | ||
1437 | EXPORT_SYMBOL(alloc_skb); | ||
1438 | EXPORT_SYMBOL(pskb_copy); | ||
1439 | EXPORT_SYMBOL(pskb_expand_head); | ||
1440 | EXPORT_SYMBOL(skb_checksum); | ||
1441 | EXPORT_SYMBOL(skb_clone); | ||
1442 | EXPORT_SYMBOL(skb_clone_fraglist); | ||
1443 | EXPORT_SYMBOL(skb_copy); | ||
1444 | EXPORT_SYMBOL(skb_copy_and_csum_bits); | ||
1445 | EXPORT_SYMBOL(skb_copy_and_csum_dev); | ||
1446 | EXPORT_SYMBOL(skb_copy_bits); | ||
1447 | EXPORT_SYMBOL(skb_copy_expand); | ||
1448 | EXPORT_SYMBOL(skb_over_panic); | ||
1449 | EXPORT_SYMBOL(skb_pad); | ||
1450 | EXPORT_SYMBOL(skb_realloc_headroom); | ||
1451 | EXPORT_SYMBOL(skb_under_panic); | ||
1452 | EXPORT_SYMBOL(skb_dequeue); | ||
1453 | EXPORT_SYMBOL(skb_dequeue_tail); | ||
1454 | EXPORT_SYMBOL(skb_insert); | ||
1455 | EXPORT_SYMBOL(skb_queue_purge); | ||
1456 | EXPORT_SYMBOL(skb_queue_head); | ||
1457 | EXPORT_SYMBOL(skb_queue_tail); | ||
1458 | EXPORT_SYMBOL(skb_unlink); | ||
1459 | EXPORT_SYMBOL(skb_append); | ||
1460 | EXPORT_SYMBOL(skb_split); | ||
diff --git a/net/core/sock.c b/net/core/sock.c new file mode 100644 index 000000000000..629ab4a5b45b --- /dev/null +++ b/net/core/sock.c | |||
@@ -0,0 +1,1565 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Generic socket support routines. Memory allocators, socket lock/release | ||
7 | * handler for protocols to use and generic option handler. | ||
8 | * | ||
9 | * | ||
10 | * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ | ||
11 | * | ||
12 | * Authors: Ross Biro, <bir7@leland.Stanford.Edu> | ||
13 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | ||
14 | * Florian La Roche, <flla@stud.uni-sb.de> | ||
15 | * Alan Cox, <A.Cox@swansea.ac.uk> | ||
16 | * | ||
17 | * Fixes: | ||
18 | * Alan Cox : Numerous verify_area() problems | ||
19 | * Alan Cox : Connecting on a connecting socket | ||
20 | * now returns an error for tcp. | ||
21 | * Alan Cox : sock->protocol is set correctly. | ||
22 | * and is not sometimes left as 0. | ||
23 | * Alan Cox : connect handles icmp errors on a | ||
24 | * connect properly. Unfortunately there | ||
25 | * is a restart syscall nasty there. I | ||
26 | * can't match BSD without hacking the C | ||
27 | * library. Ideas urgently sought! | ||
28 | * Alan Cox : Disallow bind() to addresses that are | ||
29 | * not ours - especially broadcast ones!! | ||
30 | * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) | ||
31 | * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, | ||
32 | * instead they leave that for the DESTROY timer. | ||
33 | * Alan Cox : Clean up error flag in accept | ||
34 | * Alan Cox : TCP ack handling is buggy, the DESTROY timer | ||
35 | * was buggy. Put a remove_sock() in the handler | ||
36 | * for memory when we hit 0. Also altered the timer | ||
37 | * code. The ACK stuff can wait and needs major | ||
38 | * TCP layer surgery. | ||
39 | * Alan Cox : Fixed TCP ack bug, removed remove sock | ||
40 | * and fixed timer/inet_bh race. | ||
41 | * Alan Cox : Added zapped flag for TCP | ||
42 | * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code | ||
43 | * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb | ||
44 | * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources | ||
45 | * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. | ||
46 | * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... | ||
47 | * Rick Sladkey : Relaxed UDP rules for matching packets. | ||
48 | * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support | ||
49 | * Pauline Middelink : identd support | ||
50 | * Alan Cox : Fixed connect() taking signals I think. | ||
51 | * Alan Cox : SO_LINGER supported | ||
52 | * Alan Cox : Error reporting fixes | ||
53 | * Anonymous : inet_create tidied up (sk->reuse setting) | ||
54 | * Alan Cox : inet sockets don't set sk->type! | ||
55 | * Alan Cox : Split socket option code | ||
56 | * Alan Cox : Callbacks | ||
57 | * Alan Cox : Nagle flag for Charles & Johannes stuff | ||
58 | * Alex : Removed restriction on inet fioctl | ||
59 | * Alan Cox : Splitting INET from NET core | ||
60 | * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() | ||
61 | * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code | ||
62 | * Alan Cox : Split IP from generic code | ||
63 | * Alan Cox : New kfree_skbmem() | ||
64 | * Alan Cox : Make SO_DEBUG superuser only. | ||
65 | * Alan Cox : Allow anyone to clear SO_DEBUG | ||
66 | * (compatibility fix) | ||
67 | * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. | ||
68 | * Alan Cox : Allocator for a socket is settable. | ||
69 | * Alan Cox : SO_ERROR includes soft errors. | ||
70 | * Alan Cox : Allow NULL arguments on some SO_ opts | ||
71 | * Alan Cox : Generic socket allocation to make hooks | ||
72 | * easier (suggested by Craig Metz). | ||
73 | * Michael Pall : SO_ERROR returns positive errno again | ||
74 | * Steve Whitehouse: Added default destructor to free | ||
75 | * protocol private data. | ||
76 | * Steve Whitehouse: Added various other default routines | ||
77 | * common to several socket families. | ||
78 | * Chris Evans : Call suser() check last on F_SETOWN | ||
79 | * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. | ||
80 | * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() | ||
81 | * Andi Kleen : Fix write_space callback | ||
82 | * Chris Evans : Security fixes - signedness again | ||
83 | * Arnaldo C. Melo : cleanups, use skb_queue_purge | ||
84 | * | ||
85 | * To Fix: | ||
86 | * | ||
87 | * | ||
88 | * This program is free software; you can redistribute it and/or | ||
89 | * modify it under the terms of the GNU General Public License | ||
90 | * as published by the Free Software Foundation; either version | ||
91 | * 2 of the License, or (at your option) any later version. | ||
92 | */ | ||
93 | |||
94 | #include <linux/config.h> | ||
95 | #include <linux/errno.h> | ||
96 | #include <linux/types.h> | ||
97 | #include <linux/socket.h> | ||
98 | #include <linux/in.h> | ||
99 | #include <linux/kernel.h> | ||
100 | #include <linux/major.h> | ||
101 | #include <linux/module.h> | ||
102 | #include <linux/proc_fs.h> | ||
103 | #include <linux/seq_file.h> | ||
104 | #include <linux/sched.h> | ||
105 | #include <linux/timer.h> | ||
106 | #include <linux/string.h> | ||
107 | #include <linux/sockios.h> | ||
108 | #include <linux/net.h> | ||
109 | #include <linux/mm.h> | ||
110 | #include <linux/slab.h> | ||
111 | #include <linux/interrupt.h> | ||
112 | #include <linux/poll.h> | ||
113 | #include <linux/tcp.h> | ||
114 | #include <linux/init.h> | ||
115 | |||
116 | #include <asm/uaccess.h> | ||
117 | #include <asm/system.h> | ||
118 | |||
119 | #include <linux/netdevice.h> | ||
120 | #include <net/protocol.h> | ||
121 | #include <linux/skbuff.h> | ||
122 | #include <net/sock.h> | ||
123 | #include <net/xfrm.h> | ||
124 | #include <linux/ipsec.h> | ||
125 | |||
126 | #include <linux/filter.h> | ||
127 | |||
128 | #ifdef CONFIG_INET | ||
129 | #include <net/tcp.h> | ||
130 | #endif | ||
131 | |||
132 | /* Take into consideration the size of the struct sk_buff overhead in the | ||
133 | * determination of these values, since that is non-constant across | ||
134 | * platforms. This makes socket queueing behavior and performance | ||
135 | * not depend upon such differences. | ||
136 | */ | ||
137 | #define _SK_MEM_PACKETS 256 | ||
138 | #define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) | ||
139 | #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | ||
140 | #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | ||
141 | |||
142 | /* Run time adjustable parameters. */ | ||
143 | __u32 sysctl_wmem_max = SK_WMEM_MAX; | ||
144 | __u32 sysctl_rmem_max = SK_RMEM_MAX; | ||
145 | __u32 sysctl_wmem_default = SK_WMEM_MAX; | ||
146 | __u32 sysctl_rmem_default = SK_RMEM_MAX; | ||
147 | |||
148 | /* Maximal space eaten by iovec or ancilliary data plus some space */ | ||
149 | int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); | ||
150 | |||
151 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | ||
152 | { | ||
153 | struct timeval tv; | ||
154 | |||
155 | if (optlen < sizeof(tv)) | ||
156 | return -EINVAL; | ||
157 | if (copy_from_user(&tv, optval, sizeof(tv))) | ||
158 | return -EFAULT; | ||
159 | |||
160 | *timeo_p = MAX_SCHEDULE_TIMEOUT; | ||
161 | if (tv.tv_sec == 0 && tv.tv_usec == 0) | ||
162 | return 0; | ||
163 | if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) | ||
164 | *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static void sock_warn_obsolete_bsdism(const char *name) | ||
169 | { | ||
170 | static int warned; | ||
171 | static char warncomm[TASK_COMM_LEN]; | ||
172 | if (strcmp(warncomm, current->comm) && warned < 5) { | ||
173 | strcpy(warncomm, current->comm); | ||
174 | printk(KERN_WARNING "process `%s' is using obsolete " | ||
175 | "%s SO_BSDCOMPAT\n", warncomm, name); | ||
176 | warned++; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | static void sock_disable_timestamp(struct sock *sk) | ||
181 | { | ||
182 | if (sock_flag(sk, SOCK_TIMESTAMP)) { | ||
183 | sock_reset_flag(sk, SOCK_TIMESTAMP); | ||
184 | net_disable_timestamp(); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | |||
189 | /* | ||
190 | * This is meant for all protocols to use and covers goings on | ||
191 | * at the socket level. Everything here is generic. | ||
192 | */ | ||
193 | |||
194 | int sock_setsockopt(struct socket *sock, int level, int optname, | ||
195 | char __user *optval, int optlen) | ||
196 | { | ||
197 | struct sock *sk=sock->sk; | ||
198 | struct sk_filter *filter; | ||
199 | int val; | ||
200 | int valbool; | ||
201 | struct linger ling; | ||
202 | int ret = 0; | ||
203 | |||
204 | /* | ||
205 | * Options without arguments | ||
206 | */ | ||
207 | |||
208 | #ifdef SO_DONTLINGER /* Compatibility item... */ | ||
209 | switch (optname) { | ||
210 | case SO_DONTLINGER: | ||
211 | sock_reset_flag(sk, SOCK_LINGER); | ||
212 | return 0; | ||
213 | } | ||
214 | #endif | ||
215 | |||
216 | if(optlen<sizeof(int)) | ||
217 | return(-EINVAL); | ||
218 | |||
219 | if (get_user(val, (int __user *)optval)) | ||
220 | return -EFAULT; | ||
221 | |||
222 | valbool = val?1:0; | ||
223 | |||
224 | lock_sock(sk); | ||
225 | |||
226 | switch(optname) | ||
227 | { | ||
228 | case SO_DEBUG: | ||
229 | if(val && !capable(CAP_NET_ADMIN)) | ||
230 | { | ||
231 | ret = -EACCES; | ||
232 | } | ||
233 | else if (valbool) | ||
234 | sock_set_flag(sk, SOCK_DBG); | ||
235 | else | ||
236 | sock_reset_flag(sk, SOCK_DBG); | ||
237 | break; | ||
238 | case SO_REUSEADDR: | ||
239 | sk->sk_reuse = valbool; | ||
240 | break; | ||
241 | case SO_TYPE: | ||
242 | case SO_ERROR: | ||
243 | ret = -ENOPROTOOPT; | ||
244 | break; | ||
245 | case SO_DONTROUTE: | ||
246 | if (valbool) | ||
247 | sock_set_flag(sk, SOCK_LOCALROUTE); | ||
248 | else | ||
249 | sock_reset_flag(sk, SOCK_LOCALROUTE); | ||
250 | break; | ||
251 | case SO_BROADCAST: | ||
252 | sock_valbool_flag(sk, SOCK_BROADCAST, valbool); | ||
253 | break; | ||
254 | case SO_SNDBUF: | ||
255 | /* Don't error on this BSD doesn't and if you think | ||
256 | about it this is right. Otherwise apps have to | ||
257 | play 'guess the biggest size' games. RCVBUF/SNDBUF | ||
258 | are treated in BSD as hints */ | ||
259 | |||
260 | if (val > sysctl_wmem_max) | ||
261 | val = sysctl_wmem_max; | ||
262 | |||
263 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
264 | if ((val * 2) < SOCK_MIN_SNDBUF) | ||
265 | sk->sk_sndbuf = SOCK_MIN_SNDBUF; | ||
266 | else | ||
267 | sk->sk_sndbuf = val * 2; | ||
268 | |||
269 | /* | ||
270 | * Wake up sending tasks if we | ||
271 | * upped the value. | ||
272 | */ | ||
273 | sk->sk_write_space(sk); | ||
274 | break; | ||
275 | |||
276 | case SO_RCVBUF: | ||
277 | /* Don't error on this BSD doesn't and if you think | ||
278 | about it this is right. Otherwise apps have to | ||
279 | play 'guess the biggest size' games. RCVBUF/SNDBUF | ||
280 | are treated in BSD as hints */ | ||
281 | |||
282 | if (val > sysctl_rmem_max) | ||
283 | val = sysctl_rmem_max; | ||
284 | |||
285 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
286 | /* FIXME: is this lower bound the right one? */ | ||
287 | if ((val * 2) < SOCK_MIN_RCVBUF) | ||
288 | sk->sk_rcvbuf = SOCK_MIN_RCVBUF; | ||
289 | else | ||
290 | sk->sk_rcvbuf = val * 2; | ||
291 | break; | ||
292 | |||
293 | case SO_KEEPALIVE: | ||
294 | #ifdef CONFIG_INET | ||
295 | if (sk->sk_protocol == IPPROTO_TCP) | ||
296 | tcp_set_keepalive(sk, valbool); | ||
297 | #endif | ||
298 | sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); | ||
299 | break; | ||
300 | |||
301 | case SO_OOBINLINE: | ||
302 | sock_valbool_flag(sk, SOCK_URGINLINE, valbool); | ||
303 | break; | ||
304 | |||
305 | case SO_NO_CHECK: | ||
306 | sk->sk_no_check = valbool; | ||
307 | break; | ||
308 | |||
309 | case SO_PRIORITY: | ||
310 | if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) | ||
311 | sk->sk_priority = val; | ||
312 | else | ||
313 | ret = -EPERM; | ||
314 | break; | ||
315 | |||
316 | case SO_LINGER: | ||
317 | if(optlen<sizeof(ling)) { | ||
318 | ret = -EINVAL; /* 1003.1g */ | ||
319 | break; | ||
320 | } | ||
321 | if (copy_from_user(&ling,optval,sizeof(ling))) { | ||
322 | ret = -EFAULT; | ||
323 | break; | ||
324 | } | ||
325 | if (!ling.l_onoff) | ||
326 | sock_reset_flag(sk, SOCK_LINGER); | ||
327 | else { | ||
328 | #if (BITS_PER_LONG == 32) | ||
329 | if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) | ||
330 | sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; | ||
331 | else | ||
332 | #endif | ||
333 | sk->sk_lingertime = ling.l_linger * HZ; | ||
334 | sock_set_flag(sk, SOCK_LINGER); | ||
335 | } | ||
336 | break; | ||
337 | |||
338 | case SO_BSDCOMPAT: | ||
339 | sock_warn_obsolete_bsdism("setsockopt"); | ||
340 | break; | ||
341 | |||
342 | case SO_PASSCRED: | ||
343 | if (valbool) | ||
344 | set_bit(SOCK_PASSCRED, &sock->flags); | ||
345 | else | ||
346 | clear_bit(SOCK_PASSCRED, &sock->flags); | ||
347 | break; | ||
348 | |||
349 | case SO_TIMESTAMP: | ||
350 | if (valbool) { | ||
351 | sock_set_flag(sk, SOCK_RCVTSTAMP); | ||
352 | sock_enable_timestamp(sk); | ||
353 | } else | ||
354 | sock_reset_flag(sk, SOCK_RCVTSTAMP); | ||
355 | break; | ||
356 | |||
357 | case SO_RCVLOWAT: | ||
358 | if (val < 0) | ||
359 | val = INT_MAX; | ||
360 | sk->sk_rcvlowat = val ? : 1; | ||
361 | break; | ||
362 | |||
363 | case SO_RCVTIMEO: | ||
364 | ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); | ||
365 | break; | ||
366 | |||
367 | case SO_SNDTIMEO: | ||
368 | ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); | ||
369 | break; | ||
370 | |||
371 | #ifdef CONFIG_NETDEVICES | ||
372 | case SO_BINDTODEVICE: | ||
373 | { | ||
374 | char devname[IFNAMSIZ]; | ||
375 | |||
376 | /* Sorry... */ | ||
377 | if (!capable(CAP_NET_RAW)) { | ||
378 | ret = -EPERM; | ||
379 | break; | ||
380 | } | ||
381 | |||
382 | /* Bind this socket to a particular device like "eth0", | ||
383 | * as specified in the passed interface name. If the | ||
384 | * name is "" or the option length is zero the socket | ||
385 | * is not bound. | ||
386 | */ | ||
387 | |||
388 | if (!valbool) { | ||
389 | sk->sk_bound_dev_if = 0; | ||
390 | } else { | ||
391 | if (optlen > IFNAMSIZ) | ||
392 | optlen = IFNAMSIZ; | ||
393 | if (copy_from_user(devname, optval, optlen)) { | ||
394 | ret = -EFAULT; | ||
395 | break; | ||
396 | } | ||
397 | |||
398 | /* Remove any cached route for this socket. */ | ||
399 | sk_dst_reset(sk); | ||
400 | |||
401 | if (devname[0] == '\0') { | ||
402 | sk->sk_bound_dev_if = 0; | ||
403 | } else { | ||
404 | struct net_device *dev = dev_get_by_name(devname); | ||
405 | if (!dev) { | ||
406 | ret = -ENODEV; | ||
407 | break; | ||
408 | } | ||
409 | sk->sk_bound_dev_if = dev->ifindex; | ||
410 | dev_put(dev); | ||
411 | } | ||
412 | } | ||
413 | break; | ||
414 | } | ||
415 | #endif | ||
416 | |||
417 | |||
418 | case SO_ATTACH_FILTER: | ||
419 | ret = -EINVAL; | ||
420 | if (optlen == sizeof(struct sock_fprog)) { | ||
421 | struct sock_fprog fprog; | ||
422 | |||
423 | ret = -EFAULT; | ||
424 | if (copy_from_user(&fprog, optval, sizeof(fprog))) | ||
425 | break; | ||
426 | |||
427 | ret = sk_attach_filter(&fprog, sk); | ||
428 | } | ||
429 | break; | ||
430 | |||
431 | case SO_DETACH_FILTER: | ||
432 | spin_lock_bh(&sk->sk_lock.slock); | ||
433 | filter = sk->sk_filter; | ||
434 | if (filter) { | ||
435 | sk->sk_filter = NULL; | ||
436 | spin_unlock_bh(&sk->sk_lock.slock); | ||
437 | sk_filter_release(sk, filter); | ||
438 | break; | ||
439 | } | ||
440 | spin_unlock_bh(&sk->sk_lock.slock); | ||
441 | ret = -ENONET; | ||
442 | break; | ||
443 | |||
444 | /* We implement the SO_SNDLOWAT etc to | ||
445 | not be settable (1003.1g 5.3) */ | ||
446 | default: | ||
447 | ret = -ENOPROTOOPT; | ||
448 | break; | ||
449 | } | ||
450 | release_sock(sk); | ||
451 | return ret; | ||
452 | } | ||
453 | |||
454 | |||
455 | int sock_getsockopt(struct socket *sock, int level, int optname, | ||
456 | char __user *optval, int __user *optlen) | ||
457 | { | ||
458 | struct sock *sk = sock->sk; | ||
459 | |||
460 | union | ||
461 | { | ||
462 | int val; | ||
463 | struct linger ling; | ||
464 | struct timeval tm; | ||
465 | } v; | ||
466 | |||
467 | unsigned int lv = sizeof(int); | ||
468 | int len; | ||
469 | |||
470 | if(get_user(len,optlen)) | ||
471 | return -EFAULT; | ||
472 | if(len < 0) | ||
473 | return -EINVAL; | ||
474 | |||
475 | switch(optname) | ||
476 | { | ||
477 | case SO_DEBUG: | ||
478 | v.val = sock_flag(sk, SOCK_DBG); | ||
479 | break; | ||
480 | |||
481 | case SO_DONTROUTE: | ||
482 | v.val = sock_flag(sk, SOCK_LOCALROUTE); | ||
483 | break; | ||
484 | |||
485 | case SO_BROADCAST: | ||
486 | v.val = !!sock_flag(sk, SOCK_BROADCAST); | ||
487 | break; | ||
488 | |||
489 | case SO_SNDBUF: | ||
490 | v.val = sk->sk_sndbuf; | ||
491 | break; | ||
492 | |||
493 | case SO_RCVBUF: | ||
494 | v.val = sk->sk_rcvbuf; | ||
495 | break; | ||
496 | |||
497 | case SO_REUSEADDR: | ||
498 | v.val = sk->sk_reuse; | ||
499 | break; | ||
500 | |||
501 | case SO_KEEPALIVE: | ||
502 | v.val = !!sock_flag(sk, SOCK_KEEPOPEN); | ||
503 | break; | ||
504 | |||
505 | case SO_TYPE: | ||
506 | v.val = sk->sk_type; | ||
507 | break; | ||
508 | |||
509 | case SO_ERROR: | ||
510 | v.val = -sock_error(sk); | ||
511 | if(v.val==0) | ||
512 | v.val = xchg(&sk->sk_err_soft, 0); | ||
513 | break; | ||
514 | |||
515 | case SO_OOBINLINE: | ||
516 | v.val = !!sock_flag(sk, SOCK_URGINLINE); | ||
517 | break; | ||
518 | |||
519 | case SO_NO_CHECK: | ||
520 | v.val = sk->sk_no_check; | ||
521 | break; | ||
522 | |||
523 | case SO_PRIORITY: | ||
524 | v.val = sk->sk_priority; | ||
525 | break; | ||
526 | |||
527 | case SO_LINGER: | ||
528 | lv = sizeof(v.ling); | ||
529 | v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER); | ||
530 | v.ling.l_linger = sk->sk_lingertime / HZ; | ||
531 | break; | ||
532 | |||
533 | case SO_BSDCOMPAT: | ||
534 | sock_warn_obsolete_bsdism("getsockopt"); | ||
535 | break; | ||
536 | |||
537 | case SO_TIMESTAMP: | ||
538 | v.val = sock_flag(sk, SOCK_RCVTSTAMP); | ||
539 | break; | ||
540 | |||
541 | case SO_RCVTIMEO: | ||
542 | lv=sizeof(struct timeval); | ||
543 | if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { | ||
544 | v.tm.tv_sec = 0; | ||
545 | v.tm.tv_usec = 0; | ||
546 | } else { | ||
547 | v.tm.tv_sec = sk->sk_rcvtimeo / HZ; | ||
548 | v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; | ||
549 | } | ||
550 | break; | ||
551 | |||
552 | case SO_SNDTIMEO: | ||
553 | lv=sizeof(struct timeval); | ||
554 | if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { | ||
555 | v.tm.tv_sec = 0; | ||
556 | v.tm.tv_usec = 0; | ||
557 | } else { | ||
558 | v.tm.tv_sec = sk->sk_sndtimeo / HZ; | ||
559 | v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; | ||
560 | } | ||
561 | break; | ||
562 | |||
563 | case SO_RCVLOWAT: | ||
564 | v.val = sk->sk_rcvlowat; | ||
565 | break; | ||
566 | |||
567 | case SO_SNDLOWAT: | ||
568 | v.val=1; | ||
569 | break; | ||
570 | |||
571 | case SO_PASSCRED: | ||
572 | v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0; | ||
573 | break; | ||
574 | |||
575 | case SO_PEERCRED: | ||
576 | if (len > sizeof(sk->sk_peercred)) | ||
577 | len = sizeof(sk->sk_peercred); | ||
578 | if (copy_to_user(optval, &sk->sk_peercred, len)) | ||
579 | return -EFAULT; | ||
580 | goto lenout; | ||
581 | |||
582 | case SO_PEERNAME: | ||
583 | { | ||
584 | char address[128]; | ||
585 | |||
586 | if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) | ||
587 | return -ENOTCONN; | ||
588 | if (lv < len) | ||
589 | return -EINVAL; | ||
590 | if (copy_to_user(optval, address, len)) | ||
591 | return -EFAULT; | ||
592 | goto lenout; | ||
593 | } | ||
594 | |||
595 | /* Dubious BSD thing... Probably nobody even uses it, but | ||
596 | * the UNIX standard wants it for whatever reason... -DaveM | ||
597 | */ | ||
598 | case SO_ACCEPTCONN: | ||
599 | v.val = sk->sk_state == TCP_LISTEN; | ||
600 | break; | ||
601 | |||
602 | case SO_PEERSEC: | ||
603 | return security_socket_getpeersec(sock, optval, optlen, len); | ||
604 | |||
605 | default: | ||
606 | return(-ENOPROTOOPT); | ||
607 | } | ||
608 | if (len > lv) | ||
609 | len = lv; | ||
610 | if (copy_to_user(optval, &v, len)) | ||
611 | return -EFAULT; | ||
612 | lenout: | ||
613 | if (put_user(len, optlen)) | ||
614 | return -EFAULT; | ||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | /** | ||
619 | * sk_alloc - All socket objects are allocated here | ||
620 | * @family - protocol family | ||
621 | * @priority - for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) | ||
622 | * @prot - struct proto associated with this new sock instance | ||
623 | * @zero_it - if we should zero the newly allocated sock | ||
624 | */ | ||
625 | struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it) | ||
626 | { | ||
627 | struct sock *sk = NULL; | ||
628 | kmem_cache_t *slab = prot->slab; | ||
629 | |||
630 | if (slab != NULL) | ||
631 | sk = kmem_cache_alloc(slab, priority); | ||
632 | else | ||
633 | sk = kmalloc(prot->obj_size, priority); | ||
634 | |||
635 | if (sk) { | ||
636 | if (zero_it) { | ||
637 | memset(sk, 0, prot->obj_size); | ||
638 | sk->sk_family = family; | ||
639 | sk->sk_prot = prot; | ||
640 | sock_lock_init(sk); | ||
641 | } | ||
642 | |||
643 | if (security_sk_alloc(sk, family, priority)) { | ||
644 | kmem_cache_free(slab, sk); | ||
645 | sk = NULL; | ||
646 | } else | ||
647 | __module_get(prot->owner); | ||
648 | } | ||
649 | return sk; | ||
650 | } | ||
651 | |||
652 | void sk_free(struct sock *sk) | ||
653 | { | ||
654 | struct sk_filter *filter; | ||
655 | struct module *owner = sk->sk_prot->owner; | ||
656 | |||
657 | if (sk->sk_destruct) | ||
658 | sk->sk_destruct(sk); | ||
659 | |||
660 | filter = sk->sk_filter; | ||
661 | if (filter) { | ||
662 | sk_filter_release(sk, filter); | ||
663 | sk->sk_filter = NULL; | ||
664 | } | ||
665 | |||
666 | sock_disable_timestamp(sk); | ||
667 | |||
668 | if (atomic_read(&sk->sk_omem_alloc)) | ||
669 | printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", | ||
670 | __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); | ||
671 | |||
672 | security_sk_free(sk); | ||
673 | if (sk->sk_prot->slab != NULL) | ||
674 | kmem_cache_free(sk->sk_prot->slab, sk); | ||
675 | else | ||
676 | kfree(sk); | ||
677 | module_put(owner); | ||
678 | } | ||
679 | |||
680 | void __init sk_init(void) | ||
681 | { | ||
682 | if (num_physpages <= 4096) { | ||
683 | sysctl_wmem_max = 32767; | ||
684 | sysctl_rmem_max = 32767; | ||
685 | sysctl_wmem_default = 32767; | ||
686 | sysctl_rmem_default = 32767; | ||
687 | } else if (num_physpages >= 131072) { | ||
688 | sysctl_wmem_max = 131071; | ||
689 | sysctl_rmem_max = 131071; | ||
690 | } | ||
691 | } | ||
692 | |||
693 | /* | ||
694 | * Simple resource managers for sockets. | ||
695 | */ | ||
696 | |||
697 | |||
698 | /* | ||
699 | * Write buffer destructor automatically called from kfree_skb. | ||
700 | */ | ||
701 | void sock_wfree(struct sk_buff *skb) | ||
702 | { | ||
703 | struct sock *sk = skb->sk; | ||
704 | |||
705 | /* In case it might be waiting for more memory. */ | ||
706 | atomic_sub(skb->truesize, &sk->sk_wmem_alloc); | ||
707 | if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) | ||
708 | sk->sk_write_space(sk); | ||
709 | sock_put(sk); | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | * Read buffer destructor automatically called from kfree_skb. | ||
714 | */ | ||
715 | void sock_rfree(struct sk_buff *skb) | ||
716 | { | ||
717 | struct sock *sk = skb->sk; | ||
718 | |||
719 | atomic_sub(skb->truesize, &sk->sk_rmem_alloc); | ||
720 | } | ||
721 | |||
722 | |||
723 | int sock_i_uid(struct sock *sk) | ||
724 | { | ||
725 | int uid; | ||
726 | |||
727 | read_lock(&sk->sk_callback_lock); | ||
728 | uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; | ||
729 | read_unlock(&sk->sk_callback_lock); | ||
730 | return uid; | ||
731 | } | ||
732 | |||
733 | unsigned long sock_i_ino(struct sock *sk) | ||
734 | { | ||
735 | unsigned long ino; | ||
736 | |||
737 | read_lock(&sk->sk_callback_lock); | ||
738 | ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; | ||
739 | read_unlock(&sk->sk_callback_lock); | ||
740 | return ino; | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Allocate a skb from the socket's send buffer. | ||
745 | */ | ||
746 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) | ||
747 | { | ||
748 | if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { | ||
749 | struct sk_buff * skb = alloc_skb(size, priority); | ||
750 | if (skb) { | ||
751 | skb_set_owner_w(skb, sk); | ||
752 | return skb; | ||
753 | } | ||
754 | } | ||
755 | return NULL; | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * Allocate a skb from the socket's receive buffer. | ||
760 | */ | ||
761 | struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) | ||
762 | { | ||
763 | if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { | ||
764 | struct sk_buff *skb = alloc_skb(size, priority); | ||
765 | if (skb) { | ||
766 | skb_set_owner_r(skb, sk); | ||
767 | return skb; | ||
768 | } | ||
769 | } | ||
770 | return NULL; | ||
771 | } | ||
772 | |||
773 | /* | ||
774 | * Allocate a memory block from the socket's option memory buffer. | ||
775 | */ | ||
776 | void *sock_kmalloc(struct sock *sk, int size, int priority) | ||
777 | { | ||
778 | if ((unsigned)size <= sysctl_optmem_max && | ||
779 | atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { | ||
780 | void *mem; | ||
781 | /* First do the add, to avoid the race if kmalloc | ||
782 | * might sleep. | ||
783 | */ | ||
784 | atomic_add(size, &sk->sk_omem_alloc); | ||
785 | mem = kmalloc(size, priority); | ||
786 | if (mem) | ||
787 | return mem; | ||
788 | atomic_sub(size, &sk->sk_omem_alloc); | ||
789 | } | ||
790 | return NULL; | ||
791 | } | ||
792 | |||
793 | /* | ||
794 | * Free an option memory block. | ||
795 | */ | ||
796 | void sock_kfree_s(struct sock *sk, void *mem, int size) | ||
797 | { | ||
798 | kfree(mem); | ||
799 | atomic_sub(size, &sk->sk_omem_alloc); | ||
800 | } | ||
801 | |||
802 | /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. | ||
803 | I think, these locks should be removed for datagram sockets. | ||
804 | */ | ||
805 | static long sock_wait_for_wmem(struct sock * sk, long timeo) | ||
806 | { | ||
807 | DEFINE_WAIT(wait); | ||
808 | |||
809 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | ||
810 | for (;;) { | ||
811 | if (!timeo) | ||
812 | break; | ||
813 | if (signal_pending(current)) | ||
814 | break; | ||
815 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
816 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
817 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) | ||
818 | break; | ||
819 | if (sk->sk_shutdown & SEND_SHUTDOWN) | ||
820 | break; | ||
821 | if (sk->sk_err) | ||
822 | break; | ||
823 | timeo = schedule_timeout(timeo); | ||
824 | } | ||
825 | finish_wait(sk->sk_sleep, &wait); | ||
826 | return timeo; | ||
827 | } | ||
828 | |||
829 | |||
830 | /* | ||
831 | * Generic send/receive buffer handlers | ||
832 | */ | ||
833 | |||
834 | static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, | ||
835 | unsigned long header_len, | ||
836 | unsigned long data_len, | ||
837 | int noblock, int *errcode) | ||
838 | { | ||
839 | struct sk_buff *skb; | ||
840 | unsigned int gfp_mask; | ||
841 | long timeo; | ||
842 | int err; | ||
843 | |||
844 | gfp_mask = sk->sk_allocation; | ||
845 | if (gfp_mask & __GFP_WAIT) | ||
846 | gfp_mask |= __GFP_REPEAT; | ||
847 | |||
848 | timeo = sock_sndtimeo(sk, noblock); | ||
849 | while (1) { | ||
850 | err = sock_error(sk); | ||
851 | if (err != 0) | ||
852 | goto failure; | ||
853 | |||
854 | err = -EPIPE; | ||
855 | if (sk->sk_shutdown & SEND_SHUTDOWN) | ||
856 | goto failure; | ||
857 | |||
858 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { | ||
859 | skb = alloc_skb(header_len, sk->sk_allocation); | ||
860 | if (skb) { | ||
861 | int npages; | ||
862 | int i; | ||
863 | |||
864 | /* No pages, we're done... */ | ||
865 | if (!data_len) | ||
866 | break; | ||
867 | |||
868 | npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; | ||
869 | skb->truesize += data_len; | ||
870 | skb_shinfo(skb)->nr_frags = npages; | ||
871 | for (i = 0; i < npages; i++) { | ||
872 | struct page *page; | ||
873 | skb_frag_t *frag; | ||
874 | |||
875 | page = alloc_pages(sk->sk_allocation, 0); | ||
876 | if (!page) { | ||
877 | err = -ENOBUFS; | ||
878 | skb_shinfo(skb)->nr_frags = i; | ||
879 | kfree_skb(skb); | ||
880 | goto failure; | ||
881 | } | ||
882 | |||
883 | frag = &skb_shinfo(skb)->frags[i]; | ||
884 | frag->page = page; | ||
885 | frag->page_offset = 0; | ||
886 | frag->size = (data_len >= PAGE_SIZE ? | ||
887 | PAGE_SIZE : | ||
888 | data_len); | ||
889 | data_len -= PAGE_SIZE; | ||
890 | } | ||
891 | |||
892 | /* Full success... */ | ||
893 | break; | ||
894 | } | ||
895 | err = -ENOBUFS; | ||
896 | goto failure; | ||
897 | } | ||
898 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | ||
899 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
900 | err = -EAGAIN; | ||
901 | if (!timeo) | ||
902 | goto failure; | ||
903 | if (signal_pending(current)) | ||
904 | goto interrupted; | ||
905 | timeo = sock_wait_for_wmem(sk, timeo); | ||
906 | } | ||
907 | |||
908 | skb_set_owner_w(skb, sk); | ||
909 | return skb; | ||
910 | |||
911 | interrupted: | ||
912 | err = sock_intr_errno(timeo); | ||
913 | failure: | ||
914 | *errcode = err; | ||
915 | return NULL; | ||
916 | } | ||
917 | |||
918 | struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, | ||
919 | int noblock, int *errcode) | ||
920 | { | ||
921 | return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); | ||
922 | } | ||
923 | |||
924 | static void __lock_sock(struct sock *sk) | ||
925 | { | ||
926 | DEFINE_WAIT(wait); | ||
927 | |||
928 | for(;;) { | ||
929 | prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, | ||
930 | TASK_UNINTERRUPTIBLE); | ||
931 | spin_unlock_bh(&sk->sk_lock.slock); | ||
932 | schedule(); | ||
933 | spin_lock_bh(&sk->sk_lock.slock); | ||
934 | if(!sock_owned_by_user(sk)) | ||
935 | break; | ||
936 | } | ||
937 | finish_wait(&sk->sk_lock.wq, &wait); | ||
938 | } | ||
939 | |||
940 | static void __release_sock(struct sock *sk) | ||
941 | { | ||
942 | struct sk_buff *skb = sk->sk_backlog.head; | ||
943 | |||
944 | do { | ||
945 | sk->sk_backlog.head = sk->sk_backlog.tail = NULL; | ||
946 | bh_unlock_sock(sk); | ||
947 | |||
948 | do { | ||
949 | struct sk_buff *next = skb->next; | ||
950 | |||
951 | skb->next = NULL; | ||
952 | sk->sk_backlog_rcv(sk, skb); | ||
953 | |||
954 | /* | ||
955 | * We are in process context here with softirqs | ||
956 | * disabled, use cond_resched_softirq() to preempt. | ||
957 | * This is safe to do because we've taken the backlog | ||
958 | * queue private: | ||
959 | */ | ||
960 | cond_resched_softirq(); | ||
961 | |||
962 | skb = next; | ||
963 | } while (skb != NULL); | ||
964 | |||
965 | bh_lock_sock(sk); | ||
966 | } while((skb = sk->sk_backlog.head) != NULL); | ||
967 | } | ||
968 | |||
969 | /** | ||
970 | * sk_wait_data - wait for data to arrive at sk_receive_queue | ||
971 | * sk - sock to wait on | ||
972 | * timeo - for how long | ||
973 | * | ||
974 | * Now socket state including sk->sk_err is changed only under lock, | ||
975 | * hence we may omit checks after joining wait queue. | ||
976 | * We check receive queue before schedule() only as optimization; | ||
977 | * it is very likely that release_sock() added new data. | ||
978 | */ | ||
979 | int sk_wait_data(struct sock *sk, long *timeo) | ||
980 | { | ||
981 | int rc; | ||
982 | DEFINE_WAIT(wait); | ||
983 | |||
984 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
985 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | ||
986 | rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); | ||
987 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | ||
988 | finish_wait(sk->sk_sleep, &wait); | ||
989 | return rc; | ||
990 | } | ||
991 | |||
992 | EXPORT_SYMBOL(sk_wait_data); | ||
993 | |||
994 | /* | ||
995 | * Set of default routines for initialising struct proto_ops when | ||
996 | * the protocol does not support a particular function. In certain | ||
997 | * cases where it makes no sense for a protocol to have a "do nothing" | ||
998 | * function, some default processing is provided. | ||
999 | */ | ||
1000 | |||
1001 | int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) | ||
1002 | { | ||
1003 | return -EOPNOTSUPP; | ||
1004 | } | ||
1005 | |||
1006 | int sock_no_connect(struct socket *sock, struct sockaddr *saddr, | ||
1007 | int len, int flags) | ||
1008 | { | ||
1009 | return -EOPNOTSUPP; | ||
1010 | } | ||
1011 | |||
1012 | int sock_no_socketpair(struct socket *sock1, struct socket *sock2) | ||
1013 | { | ||
1014 | return -EOPNOTSUPP; | ||
1015 | } | ||
1016 | |||
1017 | int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) | ||
1018 | { | ||
1019 | return -EOPNOTSUPP; | ||
1020 | } | ||
1021 | |||
1022 | int sock_no_getname(struct socket *sock, struct sockaddr *saddr, | ||
1023 | int *len, int peer) | ||
1024 | { | ||
1025 | return -EOPNOTSUPP; | ||
1026 | } | ||
1027 | |||
1028 | unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) | ||
1029 | { | ||
1030 | return 0; | ||
1031 | } | ||
1032 | |||
1033 | int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | ||
1034 | { | ||
1035 | return -EOPNOTSUPP; | ||
1036 | } | ||
1037 | |||
1038 | int sock_no_listen(struct socket *sock, int backlog) | ||
1039 | { | ||
1040 | return -EOPNOTSUPP; | ||
1041 | } | ||
1042 | |||
1043 | int sock_no_shutdown(struct socket *sock, int how) | ||
1044 | { | ||
1045 | return -EOPNOTSUPP; | ||
1046 | } | ||
1047 | |||
1048 | int sock_no_setsockopt(struct socket *sock, int level, int optname, | ||
1049 | char __user *optval, int optlen) | ||
1050 | { | ||
1051 | return -EOPNOTSUPP; | ||
1052 | } | ||
1053 | |||
1054 | int sock_no_getsockopt(struct socket *sock, int level, int optname, | ||
1055 | char __user *optval, int __user *optlen) | ||
1056 | { | ||
1057 | return -EOPNOTSUPP; | ||
1058 | } | ||
1059 | |||
1060 | int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, | ||
1061 | size_t len) | ||
1062 | { | ||
1063 | return -EOPNOTSUPP; | ||
1064 | } | ||
1065 | |||
1066 | int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, | ||
1067 | size_t len, int flags) | ||
1068 | { | ||
1069 | return -EOPNOTSUPP; | ||
1070 | } | ||
1071 | |||
1072 | int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) | ||
1073 | { | ||
1074 | /* Mirror missing mmap method error code */ | ||
1075 | return -ENODEV; | ||
1076 | } | ||
1077 | |||
1078 | ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) | ||
1079 | { | ||
1080 | ssize_t res; | ||
1081 | struct msghdr msg = {.msg_flags = flags}; | ||
1082 | struct kvec iov; | ||
1083 | char *kaddr = kmap(page); | ||
1084 | iov.iov_base = kaddr + offset; | ||
1085 | iov.iov_len = size; | ||
1086 | res = kernel_sendmsg(sock, &msg, &iov, 1, size); | ||
1087 | kunmap(page); | ||
1088 | return res; | ||
1089 | } | ||
1090 | |||
1091 | /* | ||
1092 | * Default Socket Callbacks | ||
1093 | */ | ||
1094 | |||
1095 | static void sock_def_wakeup(struct sock *sk) | ||
1096 | { | ||
1097 | read_lock(&sk->sk_callback_lock); | ||
1098 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
1099 | wake_up_interruptible_all(sk->sk_sleep); | ||
1100 | read_unlock(&sk->sk_callback_lock); | ||
1101 | } | ||
1102 | |||
1103 | static void sock_def_error_report(struct sock *sk) | ||
1104 | { | ||
1105 | read_lock(&sk->sk_callback_lock); | ||
1106 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
1107 | wake_up_interruptible(sk->sk_sleep); | ||
1108 | sk_wake_async(sk,0,POLL_ERR); | ||
1109 | read_unlock(&sk->sk_callback_lock); | ||
1110 | } | ||
1111 | |||
1112 | static void sock_def_readable(struct sock *sk, int len) | ||
1113 | { | ||
1114 | read_lock(&sk->sk_callback_lock); | ||
1115 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
1116 | wake_up_interruptible(sk->sk_sleep); | ||
1117 | sk_wake_async(sk,1,POLL_IN); | ||
1118 | read_unlock(&sk->sk_callback_lock); | ||
1119 | } | ||
1120 | |||
1121 | static void sock_def_write_space(struct sock *sk) | ||
1122 | { | ||
1123 | read_lock(&sk->sk_callback_lock); | ||
1124 | |||
1125 | /* Do not wake up a writer until he can make "significant" | ||
1126 | * progress. --DaveM | ||
1127 | */ | ||
1128 | if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { | ||
1129 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
1130 | wake_up_interruptible(sk->sk_sleep); | ||
1131 | |||
1132 | /* Should agree with poll, otherwise some programs break */ | ||
1133 | if (sock_writeable(sk)) | ||
1134 | sk_wake_async(sk, 2, POLL_OUT); | ||
1135 | } | ||
1136 | |||
1137 | read_unlock(&sk->sk_callback_lock); | ||
1138 | } | ||
1139 | |||
1140 | static void sock_def_destruct(struct sock *sk) | ||
1141 | { | ||
1142 | if (sk->sk_protinfo) | ||
1143 | kfree(sk->sk_protinfo); | ||
1144 | } | ||
1145 | |||
1146 | void sk_send_sigurg(struct sock *sk) | ||
1147 | { | ||
1148 | if (sk->sk_socket && sk->sk_socket->file) | ||
1149 | if (send_sigurg(&sk->sk_socket->file->f_owner)) | ||
1150 | sk_wake_async(sk, 3, POLL_PRI); | ||
1151 | } | ||
1152 | |||
1153 | void sk_reset_timer(struct sock *sk, struct timer_list* timer, | ||
1154 | unsigned long expires) | ||
1155 | { | ||
1156 | if (!mod_timer(timer, expires)) | ||
1157 | sock_hold(sk); | ||
1158 | } | ||
1159 | |||
1160 | EXPORT_SYMBOL(sk_reset_timer); | ||
1161 | |||
1162 | void sk_stop_timer(struct sock *sk, struct timer_list* timer) | ||
1163 | { | ||
1164 | if (timer_pending(timer) && del_timer(timer)) | ||
1165 | __sock_put(sk); | ||
1166 | } | ||
1167 | |||
1168 | EXPORT_SYMBOL(sk_stop_timer); | ||
1169 | |||
1170 | void sock_init_data(struct socket *sock, struct sock *sk) | ||
1171 | { | ||
1172 | skb_queue_head_init(&sk->sk_receive_queue); | ||
1173 | skb_queue_head_init(&sk->sk_write_queue); | ||
1174 | skb_queue_head_init(&sk->sk_error_queue); | ||
1175 | |||
1176 | sk->sk_send_head = NULL; | ||
1177 | |||
1178 | init_timer(&sk->sk_timer); | ||
1179 | |||
1180 | sk->sk_allocation = GFP_KERNEL; | ||
1181 | sk->sk_rcvbuf = sysctl_rmem_default; | ||
1182 | sk->sk_sndbuf = sysctl_wmem_default; | ||
1183 | sk->sk_state = TCP_CLOSE; | ||
1184 | sk->sk_socket = sock; | ||
1185 | |||
1186 | sock_set_flag(sk, SOCK_ZAPPED); | ||
1187 | |||
1188 | if(sock) | ||
1189 | { | ||
1190 | sk->sk_type = sock->type; | ||
1191 | sk->sk_sleep = &sock->wait; | ||
1192 | sock->sk = sk; | ||
1193 | } else | ||
1194 | sk->sk_sleep = NULL; | ||
1195 | |||
1196 | rwlock_init(&sk->sk_dst_lock); | ||
1197 | rwlock_init(&sk->sk_callback_lock); | ||
1198 | |||
1199 | sk->sk_state_change = sock_def_wakeup; | ||
1200 | sk->sk_data_ready = sock_def_readable; | ||
1201 | sk->sk_write_space = sock_def_write_space; | ||
1202 | sk->sk_error_report = sock_def_error_report; | ||
1203 | sk->sk_destruct = sock_def_destruct; | ||
1204 | |||
1205 | sk->sk_sndmsg_page = NULL; | ||
1206 | sk->sk_sndmsg_off = 0; | ||
1207 | |||
1208 | sk->sk_peercred.pid = 0; | ||
1209 | sk->sk_peercred.uid = -1; | ||
1210 | sk->sk_peercred.gid = -1; | ||
1211 | sk->sk_write_pending = 0; | ||
1212 | sk->sk_rcvlowat = 1; | ||
1213 | sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; | ||
1214 | sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; | ||
1215 | |||
1216 | sk->sk_stamp.tv_sec = -1L; | ||
1217 | sk->sk_stamp.tv_usec = -1L; | ||
1218 | |||
1219 | atomic_set(&sk->sk_refcnt, 1); | ||
1220 | } | ||
1221 | |||
1222 | void fastcall lock_sock(struct sock *sk) | ||
1223 | { | ||
1224 | might_sleep(); | ||
1225 | spin_lock_bh(&(sk->sk_lock.slock)); | ||
1226 | if (sk->sk_lock.owner) | ||
1227 | __lock_sock(sk); | ||
1228 | sk->sk_lock.owner = (void *)1; | ||
1229 | spin_unlock_bh(&(sk->sk_lock.slock)); | ||
1230 | } | ||
1231 | |||
1232 | EXPORT_SYMBOL(lock_sock); | ||
1233 | |||
1234 | void fastcall release_sock(struct sock *sk) | ||
1235 | { | ||
1236 | spin_lock_bh(&(sk->sk_lock.slock)); | ||
1237 | if (sk->sk_backlog.tail) | ||
1238 | __release_sock(sk); | ||
1239 | sk->sk_lock.owner = NULL; | ||
1240 | if (waitqueue_active(&(sk->sk_lock.wq))) | ||
1241 | wake_up(&(sk->sk_lock.wq)); | ||
1242 | spin_unlock_bh(&(sk->sk_lock.slock)); | ||
1243 | } | ||
1244 | EXPORT_SYMBOL(release_sock); | ||
1245 | |||
1246 | int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) | ||
1247 | { | ||
1248 | if (!sock_flag(sk, SOCK_TIMESTAMP)) | ||
1249 | sock_enable_timestamp(sk); | ||
1250 | if (sk->sk_stamp.tv_sec == -1) | ||
1251 | return -ENOENT; | ||
1252 | if (sk->sk_stamp.tv_sec == 0) | ||
1253 | do_gettimeofday(&sk->sk_stamp); | ||
1254 | return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ? | ||
1255 | -EFAULT : 0; | ||
1256 | } | ||
1257 | EXPORT_SYMBOL(sock_get_timestamp); | ||
1258 | |||
1259 | void sock_enable_timestamp(struct sock *sk) | ||
1260 | { | ||
1261 | if (!sock_flag(sk, SOCK_TIMESTAMP)) { | ||
1262 | sock_set_flag(sk, SOCK_TIMESTAMP); | ||
1263 | net_enable_timestamp(); | ||
1264 | } | ||
1265 | } | ||
1266 | EXPORT_SYMBOL(sock_enable_timestamp); | ||
1267 | |||
1268 | /* | ||
1269 | * Get a socket option on an socket. | ||
1270 | * | ||
1271 | * FIX: POSIX 1003.1g is very ambiguous here. It states that | ||
1272 | * asynchronous errors should be reported by getsockopt. We assume | ||
1273 | * this means if you specify SO_ERROR (otherwise whats the point of it). | ||
1274 | */ | ||
1275 | int sock_common_getsockopt(struct socket *sock, int level, int optname, | ||
1276 | char __user *optval, int __user *optlen) | ||
1277 | { | ||
1278 | struct sock *sk = sock->sk; | ||
1279 | |||
1280 | return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); | ||
1281 | } | ||
1282 | |||
1283 | EXPORT_SYMBOL(sock_common_getsockopt); | ||
1284 | |||
1285 | int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, | ||
1286 | struct msghdr *msg, size_t size, int flags) | ||
1287 | { | ||
1288 | struct sock *sk = sock->sk; | ||
1289 | int addr_len = 0; | ||
1290 | int err; | ||
1291 | |||
1292 | err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, | ||
1293 | flags & ~MSG_DONTWAIT, &addr_len); | ||
1294 | if (err >= 0) | ||
1295 | msg->msg_namelen = addr_len; | ||
1296 | return err; | ||
1297 | } | ||
1298 | |||
1299 | EXPORT_SYMBOL(sock_common_recvmsg); | ||
1300 | |||
1301 | /* | ||
1302 | * Set socket options on an inet socket. | ||
1303 | */ | ||
1304 | int sock_common_setsockopt(struct socket *sock, int level, int optname, | ||
1305 | char __user *optval, int optlen) | ||
1306 | { | ||
1307 | struct sock *sk = sock->sk; | ||
1308 | |||
1309 | return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); | ||
1310 | } | ||
1311 | |||
1312 | EXPORT_SYMBOL(sock_common_setsockopt); | ||
1313 | |||
1314 | void sk_common_release(struct sock *sk) | ||
1315 | { | ||
1316 | if (sk->sk_prot->destroy) | ||
1317 | sk->sk_prot->destroy(sk); | ||
1318 | |||
1319 | /* | ||
1320 | * Observation: when sock_common_release is called, processes have | ||
1321 | * no access to socket. But net still has. | ||
1322 | * Step one, detach it from networking: | ||
1323 | * | ||
1324 | * A. Remove from hash tables. | ||
1325 | */ | ||
1326 | |||
1327 | sk->sk_prot->unhash(sk); | ||
1328 | |||
1329 | /* | ||
1330 | * In this point socket cannot receive new packets, but it is possible | ||
1331 | * that some packets are in flight because some CPU runs receiver and | ||
1332 | * did hash table lookup before we unhashed socket. They will achieve | ||
1333 | * receive queue and will be purged by socket destructor. | ||
1334 | * | ||
1335 | * Also we still have packets pending on receive queue and probably, | ||
1336 | * our own packets waiting in device queues. sock_destroy will drain | ||
1337 | * receive queue, but transmitted packets will delay socket destruction | ||
1338 | * until the last reference will be released. | ||
1339 | */ | ||
1340 | |||
1341 | sock_orphan(sk); | ||
1342 | |||
1343 | xfrm_sk_free_policy(sk); | ||
1344 | |||
1345 | #ifdef INET_REFCNT_DEBUG | ||
1346 | if (atomic_read(&sk->sk_refcnt) != 1) | ||
1347 | printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n", | ||
1348 | sk, atomic_read(&sk->sk_refcnt)); | ||
1349 | #endif | ||
1350 | sock_put(sk); | ||
1351 | } | ||
1352 | |||
1353 | EXPORT_SYMBOL(sk_common_release); | ||
1354 | |||
1355 | static DEFINE_RWLOCK(proto_list_lock); | ||
1356 | static LIST_HEAD(proto_list); | ||
1357 | |||
1358 | int proto_register(struct proto *prot, int alloc_slab) | ||
1359 | { | ||
1360 | int rc = -ENOBUFS; | ||
1361 | |||
1362 | write_lock(&proto_list_lock); | ||
1363 | |||
1364 | if (alloc_slab) { | ||
1365 | prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, | ||
1366 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
1367 | |||
1368 | if (prot->slab == NULL) { | ||
1369 | printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", | ||
1370 | prot->name); | ||
1371 | goto out_unlock; | ||
1372 | } | ||
1373 | } | ||
1374 | |||
1375 | list_add(&prot->node, &proto_list); | ||
1376 | rc = 0; | ||
1377 | out_unlock: | ||
1378 | write_unlock(&proto_list_lock); | ||
1379 | return rc; | ||
1380 | } | ||
1381 | |||
1382 | EXPORT_SYMBOL(proto_register); | ||
1383 | |||
1384 | void proto_unregister(struct proto *prot) | ||
1385 | { | ||
1386 | write_lock(&proto_list_lock); | ||
1387 | |||
1388 | if (prot->slab != NULL) { | ||
1389 | kmem_cache_destroy(prot->slab); | ||
1390 | prot->slab = NULL; | ||
1391 | } | ||
1392 | |||
1393 | list_del(&prot->node); | ||
1394 | write_unlock(&proto_list_lock); | ||
1395 | } | ||
1396 | |||
1397 | EXPORT_SYMBOL(proto_unregister); | ||
1398 | |||
1399 | #ifdef CONFIG_PROC_FS | ||
1400 | static inline struct proto *__proto_head(void) | ||
1401 | { | ||
1402 | return list_entry(proto_list.next, struct proto, node); | ||
1403 | } | ||
1404 | |||
1405 | static inline struct proto *proto_head(void) | ||
1406 | { | ||
1407 | return list_empty(&proto_list) ? NULL : __proto_head(); | ||
1408 | } | ||
1409 | |||
1410 | static inline struct proto *proto_next(struct proto *proto) | ||
1411 | { | ||
1412 | return proto->node.next == &proto_list ? NULL : | ||
1413 | list_entry(proto->node.next, struct proto, node); | ||
1414 | } | ||
1415 | |||
1416 | static inline struct proto *proto_get_idx(loff_t pos) | ||
1417 | { | ||
1418 | struct proto *proto; | ||
1419 | loff_t i = 0; | ||
1420 | |||
1421 | list_for_each_entry(proto, &proto_list, node) | ||
1422 | if (i++ == pos) | ||
1423 | goto out; | ||
1424 | |||
1425 | proto = NULL; | ||
1426 | out: | ||
1427 | return proto; | ||
1428 | } | ||
1429 | |||
1430 | static void *proto_seq_start(struct seq_file *seq, loff_t *pos) | ||
1431 | { | ||
1432 | read_lock(&proto_list_lock); | ||
1433 | return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN; | ||
1434 | } | ||
1435 | |||
1436 | static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1437 | { | ||
1438 | ++*pos; | ||
1439 | return v == SEQ_START_TOKEN ? proto_head() : proto_next(v); | ||
1440 | } | ||
1441 | |||
1442 | static void proto_seq_stop(struct seq_file *seq, void *v) | ||
1443 | { | ||
1444 | read_unlock(&proto_list_lock); | ||
1445 | } | ||
1446 | |||
1447 | static char proto_method_implemented(const void *method) | ||
1448 | { | ||
1449 | return method == NULL ? 'n' : 'y'; | ||
1450 | } | ||
1451 | |||
1452 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) | ||
1453 | { | ||
1454 | seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s " | ||
1455 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", | ||
1456 | proto->name, | ||
1457 | proto->obj_size, | ||
1458 | proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1, | ||
1459 | proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, | ||
1460 | proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", | ||
1461 | proto->max_header, | ||
1462 | proto->slab == NULL ? "no" : "yes", | ||
1463 | module_name(proto->owner), | ||
1464 | proto_method_implemented(proto->close), | ||
1465 | proto_method_implemented(proto->connect), | ||
1466 | proto_method_implemented(proto->disconnect), | ||
1467 | proto_method_implemented(proto->accept), | ||
1468 | proto_method_implemented(proto->ioctl), | ||
1469 | proto_method_implemented(proto->init), | ||
1470 | proto_method_implemented(proto->destroy), | ||
1471 | proto_method_implemented(proto->shutdown), | ||
1472 | proto_method_implemented(proto->setsockopt), | ||
1473 | proto_method_implemented(proto->getsockopt), | ||
1474 | proto_method_implemented(proto->sendmsg), | ||
1475 | proto_method_implemented(proto->recvmsg), | ||
1476 | proto_method_implemented(proto->sendpage), | ||
1477 | proto_method_implemented(proto->bind), | ||
1478 | proto_method_implemented(proto->backlog_rcv), | ||
1479 | proto_method_implemented(proto->hash), | ||
1480 | proto_method_implemented(proto->unhash), | ||
1481 | proto_method_implemented(proto->get_port), | ||
1482 | proto_method_implemented(proto->enter_memory_pressure)); | ||
1483 | } | ||
1484 | |||
1485 | static int proto_seq_show(struct seq_file *seq, void *v) | ||
1486 | { | ||
1487 | if (v == SEQ_START_TOKEN) | ||
1488 | seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", | ||
1489 | "protocol", | ||
1490 | "size", | ||
1491 | "sockets", | ||
1492 | "memory", | ||
1493 | "press", | ||
1494 | "maxhdr", | ||
1495 | "slab", | ||
1496 | "module", | ||
1497 | "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); | ||
1498 | else | ||
1499 | proto_seq_printf(seq, v); | ||
1500 | return 0; | ||
1501 | } | ||
1502 | |||
1503 | static struct seq_operations proto_seq_ops = { | ||
1504 | .start = proto_seq_start, | ||
1505 | .next = proto_seq_next, | ||
1506 | .stop = proto_seq_stop, | ||
1507 | .show = proto_seq_show, | ||
1508 | }; | ||
1509 | |||
1510 | static int proto_seq_open(struct inode *inode, struct file *file) | ||
1511 | { | ||
1512 | return seq_open(file, &proto_seq_ops); | ||
1513 | } | ||
1514 | |||
1515 | static struct file_operations proto_seq_fops = { | ||
1516 | .owner = THIS_MODULE, | ||
1517 | .open = proto_seq_open, | ||
1518 | .read = seq_read, | ||
1519 | .llseek = seq_lseek, | ||
1520 | .release = seq_release, | ||
1521 | }; | ||
1522 | |||
1523 | static int __init proto_init(void) | ||
1524 | { | ||
1525 | /* register /proc/net/protocols */ | ||
1526 | return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; | ||
1527 | } | ||
1528 | |||
1529 | subsys_initcall(proto_init); | ||
1530 | |||
1531 | #endif /* PROC_FS */ | ||
1532 | |||
1533 | EXPORT_SYMBOL(sk_alloc); | ||
1534 | EXPORT_SYMBOL(sk_free); | ||
1535 | EXPORT_SYMBOL(sk_send_sigurg); | ||
1536 | EXPORT_SYMBOL(sock_alloc_send_skb); | ||
1537 | EXPORT_SYMBOL(sock_init_data); | ||
1538 | EXPORT_SYMBOL(sock_kfree_s); | ||
1539 | EXPORT_SYMBOL(sock_kmalloc); | ||
1540 | EXPORT_SYMBOL(sock_no_accept); | ||
1541 | EXPORT_SYMBOL(sock_no_bind); | ||
1542 | EXPORT_SYMBOL(sock_no_connect); | ||
1543 | EXPORT_SYMBOL(sock_no_getname); | ||
1544 | EXPORT_SYMBOL(sock_no_getsockopt); | ||
1545 | EXPORT_SYMBOL(sock_no_ioctl); | ||
1546 | EXPORT_SYMBOL(sock_no_listen); | ||
1547 | EXPORT_SYMBOL(sock_no_mmap); | ||
1548 | EXPORT_SYMBOL(sock_no_poll); | ||
1549 | EXPORT_SYMBOL(sock_no_recvmsg); | ||
1550 | EXPORT_SYMBOL(sock_no_sendmsg); | ||
1551 | EXPORT_SYMBOL(sock_no_sendpage); | ||
1552 | EXPORT_SYMBOL(sock_no_setsockopt); | ||
1553 | EXPORT_SYMBOL(sock_no_shutdown); | ||
1554 | EXPORT_SYMBOL(sock_no_socketpair); | ||
1555 | EXPORT_SYMBOL(sock_rfree); | ||
1556 | EXPORT_SYMBOL(sock_setsockopt); | ||
1557 | EXPORT_SYMBOL(sock_wfree); | ||
1558 | EXPORT_SYMBOL(sock_wmalloc); | ||
1559 | EXPORT_SYMBOL(sock_i_uid); | ||
1560 | EXPORT_SYMBOL(sock_i_ino); | ||
1561 | #ifdef CONFIG_SYSCTL | ||
1562 | EXPORT_SYMBOL(sysctl_optmem_max); | ||
1563 | EXPORT_SYMBOL(sysctl_rmem_max); | ||
1564 | EXPORT_SYMBOL(sysctl_wmem_max); | ||
1565 | #endif | ||
diff --git a/net/core/stream.c b/net/core/stream.c new file mode 100644 index 000000000000..1e27a57b5a97 --- /dev/null +++ b/net/core/stream.c | |||
@@ -0,0 +1,287 @@ | |||
1 | /* | ||
2 | * SUCS NET3: | ||
3 | * | ||
4 | * Generic stream handling routines. These are generic for most | ||
5 | * protocols. Even IP. Tonight 8-). | ||
6 | * This is used because TCP, LLC (others too) layer all have mostly | ||
7 | * identical sendmsg() and recvmsg() code. | ||
8 | * So we (will) share it here. | ||
9 | * | ||
10 | * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
11 | * (from old tcp.c code) | ||
12 | * Alan Cox <alan@redhat.com> (Borrowed comments 8-)) | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/net.h> | ||
17 | #include <linux/signal.h> | ||
18 | #include <linux/tcp.h> | ||
19 | #include <linux/wait.h> | ||
20 | #include <net/sock.h> | ||
21 | |||
22 | /** | ||
23 | * sk_stream_write_space - stream socket write_space callback. | ||
24 | * sk - socket | ||
25 | * | ||
26 | * FIXME: write proper description | ||
27 | */ | ||
28 | void sk_stream_write_space(struct sock *sk) | ||
29 | { | ||
30 | struct socket *sock = sk->sk_socket; | ||
31 | |||
32 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { | ||
33 | clear_bit(SOCK_NOSPACE, &sock->flags); | ||
34 | |||
35 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
36 | wake_up_interruptible(sk->sk_sleep); | ||
37 | if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) | ||
38 | sock_wake_async(sock, 2, POLL_OUT); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | EXPORT_SYMBOL(sk_stream_write_space); | ||
43 | |||
44 | /** | ||
45 | * sk_stream_wait_connect - Wait for a socket to get into the connected state | ||
46 | * @sk - sock to wait on | ||
47 | * @timeo_p - for how long to wait | ||
48 | * | ||
49 | * Must be called with the socket locked. | ||
50 | */ | ||
51 | int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | ||
52 | { | ||
53 | struct task_struct *tsk = current; | ||
54 | DEFINE_WAIT(wait); | ||
55 | |||
56 | while (1) { | ||
57 | if (sk->sk_err) | ||
58 | return sock_error(sk); | ||
59 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) | ||
60 | return -EPIPE; | ||
61 | if (!*timeo_p) | ||
62 | return -EAGAIN; | ||
63 | if (signal_pending(tsk)) | ||
64 | return sock_intr_errno(*timeo_p); | ||
65 | |||
66 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
67 | sk->sk_write_pending++; | ||
68 | if (sk_wait_event(sk, timeo_p, | ||
69 | !((1 << sk->sk_state) & | ||
70 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))) | ||
71 | break; | ||
72 | finish_wait(sk->sk_sleep, &wait); | ||
73 | sk->sk_write_pending--; | ||
74 | } | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | EXPORT_SYMBOL(sk_stream_wait_connect); | ||
79 | |||
80 | /** | ||
81 | * sk_stream_closing - Return 1 if we still have things to send in our buffers. | ||
82 | * @sk - socket to verify | ||
83 | */ | ||
84 | static inline int sk_stream_closing(struct sock *sk) | ||
85 | { | ||
86 | return (1 << sk->sk_state) & | ||
87 | (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK); | ||
88 | } | ||
89 | |||
90 | void sk_stream_wait_close(struct sock *sk, long timeout) | ||
91 | { | ||
92 | if (timeout) { | ||
93 | DEFINE_WAIT(wait); | ||
94 | |||
95 | do { | ||
96 | prepare_to_wait(sk->sk_sleep, &wait, | ||
97 | TASK_INTERRUPTIBLE); | ||
98 | if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) | ||
99 | break; | ||
100 | } while (!signal_pending(current) && timeout); | ||
101 | |||
102 | finish_wait(sk->sk_sleep, &wait); | ||
103 | } | ||
104 | } | ||
105 | |||
106 | EXPORT_SYMBOL(sk_stream_wait_close); | ||
107 | |||
108 | /** | ||
109 | * sk_stream_wait_memory - Wait for more memory for a socket | ||
110 | * @sk - socket to wait for memory | ||
111 | * @timeo_p - for how long | ||
112 | */ | ||
113 | int sk_stream_wait_memory(struct sock *sk, long *timeo_p) | ||
114 | { | ||
115 | int err = 0; | ||
116 | long vm_wait = 0; | ||
117 | long current_timeo = *timeo_p; | ||
118 | DEFINE_WAIT(wait); | ||
119 | |||
120 | if (sk_stream_memory_free(sk)) | ||
121 | current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; | ||
122 | |||
123 | while (1) { | ||
124 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | ||
125 | |||
126 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
127 | |||
128 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | ||
129 | goto do_error; | ||
130 | if (!*timeo_p) | ||
131 | goto do_nonblock; | ||
132 | if (signal_pending(current)) | ||
133 | goto do_interrupted; | ||
134 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | ||
135 | if (sk_stream_memory_free(sk) && !vm_wait) | ||
136 | break; | ||
137 | |||
138 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
139 | sk->sk_write_pending++; | ||
140 | sk_wait_event(sk, ¤t_timeo, sk_stream_memory_free(sk) && | ||
141 | vm_wait); | ||
142 | sk->sk_write_pending--; | ||
143 | |||
144 | if (vm_wait) { | ||
145 | vm_wait -= current_timeo; | ||
146 | current_timeo = *timeo_p; | ||
147 | if (current_timeo != MAX_SCHEDULE_TIMEOUT && | ||
148 | (current_timeo -= vm_wait) < 0) | ||
149 | current_timeo = 0; | ||
150 | vm_wait = 0; | ||
151 | } | ||
152 | *timeo_p = current_timeo; | ||
153 | } | ||
154 | out: | ||
155 | finish_wait(sk->sk_sleep, &wait); | ||
156 | return err; | ||
157 | |||
158 | do_error: | ||
159 | err = -EPIPE; | ||
160 | goto out; | ||
161 | do_nonblock: | ||
162 | err = -EAGAIN; | ||
163 | goto out; | ||
164 | do_interrupted: | ||
165 | err = sock_intr_errno(*timeo_p); | ||
166 | goto out; | ||
167 | } | ||
168 | |||
169 | EXPORT_SYMBOL(sk_stream_wait_memory); | ||
170 | |||
171 | void sk_stream_rfree(struct sk_buff *skb) | ||
172 | { | ||
173 | struct sock *sk = skb->sk; | ||
174 | |||
175 | atomic_sub(skb->truesize, &sk->sk_rmem_alloc); | ||
176 | sk->sk_forward_alloc += skb->truesize; | ||
177 | } | ||
178 | |||
179 | EXPORT_SYMBOL(sk_stream_rfree); | ||
180 | |||
181 | int sk_stream_error(struct sock *sk, int flags, int err) | ||
182 | { | ||
183 | if (err == -EPIPE) | ||
184 | err = sock_error(sk) ? : -EPIPE; | ||
185 | if (err == -EPIPE && !(flags & MSG_NOSIGNAL)) | ||
186 | send_sig(SIGPIPE, current, 0); | ||
187 | return err; | ||
188 | } | ||
189 | |||
190 | EXPORT_SYMBOL(sk_stream_error); | ||
191 | |||
192 | void __sk_stream_mem_reclaim(struct sock *sk) | ||
193 | { | ||
194 | if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) { | ||
195 | atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM, | ||
196 | sk->sk_prot->memory_allocated); | ||
197 | sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; | ||
198 | if (*sk->sk_prot->memory_pressure && | ||
199 | (atomic_read(sk->sk_prot->memory_allocated) < | ||
200 | sk->sk_prot->sysctl_mem[0])) | ||
201 | *sk->sk_prot->memory_pressure = 0; | ||
202 | } | ||
203 | } | ||
204 | |||
205 | EXPORT_SYMBOL(__sk_stream_mem_reclaim); | ||
206 | |||
207 | int sk_stream_mem_schedule(struct sock *sk, int size, int kind) | ||
208 | { | ||
209 | int amt = sk_stream_pages(size); | ||
210 | |||
211 | sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM; | ||
212 | atomic_add(amt, sk->sk_prot->memory_allocated); | ||
213 | |||
214 | /* Under limit. */ | ||
215 | if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) { | ||
216 | if (*sk->sk_prot->memory_pressure) | ||
217 | *sk->sk_prot->memory_pressure = 0; | ||
218 | return 1; | ||
219 | } | ||
220 | |||
221 | /* Over hard limit. */ | ||
222 | if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) { | ||
223 | sk->sk_prot->enter_memory_pressure(); | ||
224 | goto suppress_allocation; | ||
225 | } | ||
226 | |||
227 | /* Under pressure. */ | ||
228 | if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1]) | ||
229 | sk->sk_prot->enter_memory_pressure(); | ||
230 | |||
231 | if (kind) { | ||
232 | if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0]) | ||
233 | return 1; | ||
234 | } else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0]) | ||
235 | return 1; | ||
236 | |||
237 | if (!*sk->sk_prot->memory_pressure || | ||
238 | sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) * | ||
239 | sk_stream_pages(sk->sk_wmem_queued + | ||
240 | atomic_read(&sk->sk_rmem_alloc) + | ||
241 | sk->sk_forward_alloc)) | ||
242 | return 1; | ||
243 | |||
244 | suppress_allocation: | ||
245 | |||
246 | if (!kind) { | ||
247 | sk_stream_moderate_sndbuf(sk); | ||
248 | |||
249 | /* Fail only if socket is _under_ its sndbuf. | ||
250 | * In this case we cannot block, so that we have to fail. | ||
251 | */ | ||
252 | if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) | ||
253 | return 1; | ||
254 | } | ||
255 | |||
256 | /* Alas. Undo changes. */ | ||
257 | sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM; | ||
258 | atomic_sub(amt, sk->sk_prot->memory_allocated); | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | EXPORT_SYMBOL(sk_stream_mem_schedule); | ||
263 | |||
264 | void sk_stream_kill_queues(struct sock *sk) | ||
265 | { | ||
266 | /* First the read buffer. */ | ||
267 | __skb_queue_purge(&sk->sk_receive_queue); | ||
268 | |||
269 | /* Next, the error queue. */ | ||
270 | __skb_queue_purge(&sk->sk_error_queue); | ||
271 | |||
272 | /* Next, the write queue. */ | ||
273 | BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); | ||
274 | |||
275 | /* Account for returned memory. */ | ||
276 | sk_stream_mem_reclaim(sk); | ||
277 | |||
278 | BUG_TRAP(!sk->sk_wmem_queued); | ||
279 | BUG_TRAP(!sk->sk_forward_alloc); | ||
280 | |||
281 | /* It is _impossible_ for the backlog to contain anything | ||
282 | * when we get here. All user references to this socket | ||
283 | * have gone away, only the net layer knows can touch it. | ||
284 | */ | ||
285 | } | ||
286 | |||
287 | EXPORT_SYMBOL(sk_stream_kill_queues); | ||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c new file mode 100644 index 000000000000..c8be646cb191 --- /dev/null +++ b/net/core/sysctl_net_core.c | |||
@@ -0,0 +1,182 @@ | |||
1 | /* -*- linux-c -*- | ||
2 | * sysctl_net_core.c: sysctl interface to net core subsystem. | ||
3 | * | ||
4 | * Begun April 1, 1996, Mike Shaver. | ||
5 | * Added /proc/sys/net/core directory entry (empty =) ). [MS] | ||
6 | */ | ||
7 | |||
8 | #include <linux/mm.h> | ||
9 | #include <linux/sysctl.h> | ||
10 | #include <linux/config.h> | ||
11 | #include <linux/module.h> | ||
12 | |||
13 | #ifdef CONFIG_SYSCTL | ||
14 | |||
15 | extern int netdev_max_backlog; | ||
16 | extern int weight_p; | ||
17 | extern int no_cong_thresh; | ||
18 | extern int no_cong; | ||
19 | extern int lo_cong; | ||
20 | extern int mod_cong; | ||
21 | extern int netdev_fastroute; | ||
22 | extern int net_msg_cost; | ||
23 | extern int net_msg_burst; | ||
24 | |||
25 | extern __u32 sysctl_wmem_max; | ||
26 | extern __u32 sysctl_rmem_max; | ||
27 | extern __u32 sysctl_wmem_default; | ||
28 | extern __u32 sysctl_rmem_default; | ||
29 | |||
30 | extern int sysctl_core_destroy_delay; | ||
31 | extern int sysctl_optmem_max; | ||
32 | extern int sysctl_somaxconn; | ||
33 | |||
34 | #ifdef CONFIG_NET_DIVERT | ||
35 | extern char sysctl_divert_version[]; | ||
36 | #endif /* CONFIG_NET_DIVERT */ | ||
37 | |||
38 | /* | ||
39 | * This strdup() is used for creating copies of network | ||
40 | * device names to be handed over to sysctl. | ||
41 | */ | ||
42 | |||
43 | char *net_sysctl_strdup(const char *s) | ||
44 | { | ||
45 | char *rv = kmalloc(strlen(s)+1, GFP_KERNEL); | ||
46 | if (rv) | ||
47 | strcpy(rv, s); | ||
48 | return rv; | ||
49 | } | ||
50 | |||
51 | ctl_table core_table[] = { | ||
52 | #ifdef CONFIG_NET | ||
53 | { | ||
54 | .ctl_name = NET_CORE_WMEM_MAX, | ||
55 | .procname = "wmem_max", | ||
56 | .data = &sysctl_wmem_max, | ||
57 | .maxlen = sizeof(int), | ||
58 | .mode = 0644, | ||
59 | .proc_handler = &proc_dointvec | ||
60 | }, | ||
61 | { | ||
62 | .ctl_name = NET_CORE_RMEM_MAX, | ||
63 | .procname = "rmem_max", | ||
64 | .data = &sysctl_rmem_max, | ||
65 | .maxlen = sizeof(int), | ||
66 | .mode = 0644, | ||
67 | .proc_handler = &proc_dointvec | ||
68 | }, | ||
69 | { | ||
70 | .ctl_name = NET_CORE_WMEM_DEFAULT, | ||
71 | .procname = "wmem_default", | ||
72 | .data = &sysctl_wmem_default, | ||
73 | .maxlen = sizeof(int), | ||
74 | .mode = 0644, | ||
75 | .proc_handler = &proc_dointvec | ||
76 | }, | ||
77 | { | ||
78 | .ctl_name = NET_CORE_RMEM_DEFAULT, | ||
79 | .procname = "rmem_default", | ||
80 | .data = &sysctl_rmem_default, | ||
81 | .maxlen = sizeof(int), | ||
82 | .mode = 0644, | ||
83 | .proc_handler = &proc_dointvec | ||
84 | }, | ||
85 | { | ||
86 | .ctl_name = NET_CORE_DEV_WEIGHT, | ||
87 | .procname = "dev_weight", | ||
88 | .data = &weight_p, | ||
89 | .maxlen = sizeof(int), | ||
90 | .mode = 0644, | ||
91 | .proc_handler = &proc_dointvec | ||
92 | }, | ||
93 | { | ||
94 | .ctl_name = NET_CORE_MAX_BACKLOG, | ||
95 | .procname = "netdev_max_backlog", | ||
96 | .data = &netdev_max_backlog, | ||
97 | .maxlen = sizeof(int), | ||
98 | .mode = 0644, | ||
99 | .proc_handler = &proc_dointvec | ||
100 | }, | ||
101 | { | ||
102 | .ctl_name = NET_CORE_NO_CONG_THRESH, | ||
103 | .procname = "no_cong_thresh", | ||
104 | .data = &no_cong_thresh, | ||
105 | .maxlen = sizeof(int), | ||
106 | .mode = 0644, | ||
107 | .proc_handler = &proc_dointvec | ||
108 | }, | ||
109 | { | ||
110 | .ctl_name = NET_CORE_NO_CONG, | ||
111 | .procname = "no_cong", | ||
112 | .data = &no_cong, | ||
113 | .maxlen = sizeof(int), | ||
114 | .mode = 0644, | ||
115 | .proc_handler = &proc_dointvec | ||
116 | }, | ||
117 | { | ||
118 | .ctl_name = NET_CORE_LO_CONG, | ||
119 | .procname = "lo_cong", | ||
120 | .data = &lo_cong, | ||
121 | .maxlen = sizeof(int), | ||
122 | .mode = 0644, | ||
123 | .proc_handler = &proc_dointvec | ||
124 | }, | ||
125 | { | ||
126 | .ctl_name = NET_CORE_MOD_CONG, | ||
127 | .procname = "mod_cong", | ||
128 | .data = &mod_cong, | ||
129 | .maxlen = sizeof(int), | ||
130 | .mode = 0644, | ||
131 | .proc_handler = &proc_dointvec | ||
132 | }, | ||
133 | { | ||
134 | .ctl_name = NET_CORE_MSG_COST, | ||
135 | .procname = "message_cost", | ||
136 | .data = &net_msg_cost, | ||
137 | .maxlen = sizeof(int), | ||
138 | .mode = 0644, | ||
139 | .proc_handler = &proc_dointvec_jiffies, | ||
140 | .strategy = &sysctl_jiffies, | ||
141 | }, | ||
142 | { | ||
143 | .ctl_name = NET_CORE_MSG_BURST, | ||
144 | .procname = "message_burst", | ||
145 | .data = &net_msg_burst, | ||
146 | .maxlen = sizeof(int), | ||
147 | .mode = 0644, | ||
148 | .proc_handler = &proc_dointvec, | ||
149 | }, | ||
150 | { | ||
151 | .ctl_name = NET_CORE_OPTMEM_MAX, | ||
152 | .procname = "optmem_max", | ||
153 | .data = &sysctl_optmem_max, | ||
154 | .maxlen = sizeof(int), | ||
155 | .mode = 0644, | ||
156 | .proc_handler = &proc_dointvec | ||
157 | }, | ||
158 | #ifdef CONFIG_NET_DIVERT | ||
159 | { | ||
160 | .ctl_name = NET_CORE_DIVERT_VERSION, | ||
161 | .procname = "divert_version", | ||
162 | .data = (void *)sysctl_divert_version, | ||
163 | .maxlen = 32, | ||
164 | .mode = 0444, | ||
165 | .proc_handler = &proc_dostring | ||
166 | }, | ||
167 | #endif /* CONFIG_NET_DIVERT */ | ||
168 | #endif /* CONFIG_NET */ | ||
169 | { | ||
170 | .ctl_name = NET_CORE_SOMAXCONN, | ||
171 | .procname = "somaxconn", | ||
172 | .data = &sysctl_somaxconn, | ||
173 | .maxlen = sizeof(int), | ||
174 | .mode = 0644, | ||
175 | .proc_handler = &proc_dointvec | ||
176 | }, | ||
177 | { .ctl_name = 0 } | ||
178 | }; | ||
179 | |||
180 | EXPORT_SYMBOL(net_sysctl_strdup); | ||
181 | |||
182 | #endif | ||
diff --git a/net/core/utils.c b/net/core/utils.c new file mode 100644 index 000000000000..e11a8654f363 --- /dev/null +++ b/net/core/utils.c | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * Generic address resultion entity | ||
3 | * | ||
4 | * Authors: | ||
5 | * net_random Alan Cox | ||
6 | * net_ratelimit Andy Kleen | ||
7 | * | ||
8 | * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/jiffies.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/random.h> | ||
23 | #include <linux/percpu.h> | ||
24 | #include <linux/init.h> | ||
25 | |||
26 | #include <asm/system.h> | ||
27 | #include <asm/uaccess.h> | ||
28 | |||
29 | |||
30 | /* | ||
31 | This is a maximally equidistributed combined Tausworthe generator | ||
32 | based on code from GNU Scientific Library 1.5 (30 Jun 2004) | ||
33 | |||
34 | x_n = (s1_n ^ s2_n ^ s3_n) | ||
35 | |||
36 | s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) | ||
37 | s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) | ||
38 | s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) | ||
39 | |||
40 | The period of this generator is about 2^88. | ||
41 | |||
42 | From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe | ||
43 | Generators", Mathematics of Computation, 65, 213 (1996), 203--213. | ||
44 | |||
45 | This is available on the net from L'Ecuyer's home page, | ||
46 | |||
47 | http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps | ||
48 | ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps | ||
49 | |||
50 | There is an erratum in the paper "Tables of Maximally | ||
51 | Equidistributed Combined LFSR Generators", Mathematics of | ||
52 | Computation, 68, 225 (1999), 261--269: | ||
53 | http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps | ||
54 | |||
55 | ... the k_j most significant bits of z_j must be non- | ||
56 | zero, for each j. (Note: this restriction also applies to the | ||
57 | computer code given in [4], but was mistakenly not mentioned in | ||
58 | that paper.) | ||
59 | |||
60 | This affects the seeding procedure by imposing the requirement | ||
61 | s1 > 1, s2 > 7, s3 > 15. | ||
62 | |||
63 | */ | ||
64 | struct nrnd_state { | ||
65 | u32 s1, s2, s3; | ||
66 | }; | ||
67 | |||
68 | static DEFINE_PER_CPU(struct nrnd_state, net_rand_state); | ||
69 | |||
70 | static u32 __net_random(struct nrnd_state *state) | ||
71 | { | ||
72 | #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) | ||
73 | |||
74 | state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); | ||
75 | state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); | ||
76 | state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); | ||
77 | |||
78 | return (state->s1 ^ state->s2 ^ state->s3); | ||
79 | } | ||
80 | |||
81 | static void __net_srandom(struct nrnd_state *state, unsigned long s) | ||
82 | { | ||
83 | if (s == 0) | ||
84 | s = 1; /* default seed is 1 */ | ||
85 | |||
86 | #define LCG(n) (69069 * n) | ||
87 | state->s1 = LCG(s); | ||
88 | state->s2 = LCG(state->s1); | ||
89 | state->s3 = LCG(state->s2); | ||
90 | |||
91 | /* "warm it up" */ | ||
92 | __net_random(state); | ||
93 | __net_random(state); | ||
94 | __net_random(state); | ||
95 | __net_random(state); | ||
96 | __net_random(state); | ||
97 | __net_random(state); | ||
98 | } | ||
99 | |||
100 | |||
101 | unsigned long net_random(void) | ||
102 | { | ||
103 | unsigned long r; | ||
104 | struct nrnd_state *state = &get_cpu_var(net_rand_state); | ||
105 | r = __net_random(state); | ||
106 | put_cpu_var(state); | ||
107 | return r; | ||
108 | } | ||
109 | |||
110 | |||
111 | void net_srandom(unsigned long entropy) | ||
112 | { | ||
113 | struct nrnd_state *state = &get_cpu_var(net_rand_state); | ||
114 | __net_srandom(state, state->s1^entropy); | ||
115 | put_cpu_var(state); | ||
116 | } | ||
117 | |||
118 | void __init net_random_init(void) | ||
119 | { | ||
120 | int i; | ||
121 | |||
122 | for (i = 0; i < NR_CPUS; i++) { | ||
123 | struct nrnd_state *state = &per_cpu(net_rand_state,i); | ||
124 | __net_srandom(state, i+jiffies); | ||
125 | } | ||
126 | } | ||
127 | |||
128 | static int net_random_reseed(void) | ||
129 | { | ||
130 | int i; | ||
131 | unsigned long seed[NR_CPUS]; | ||
132 | |||
133 | get_random_bytes(seed, sizeof(seed)); | ||
134 | for (i = 0; i < NR_CPUS; i++) { | ||
135 | struct nrnd_state *state = &per_cpu(net_rand_state,i); | ||
136 | __net_srandom(state, seed[i]); | ||
137 | } | ||
138 | return 0; | ||
139 | } | ||
140 | late_initcall(net_random_reseed); | ||
141 | |||
142 | int net_msg_cost = 5*HZ; | ||
143 | int net_msg_burst = 10; | ||
144 | |||
145 | /* | ||
146 | * All net warning printk()s should be guarded by this function. | ||
147 | */ | ||
148 | int net_ratelimit(void) | ||
149 | { | ||
150 | return __printk_ratelimit(net_msg_cost, net_msg_burst); | ||
151 | } | ||
152 | |||
153 | EXPORT_SYMBOL(net_random); | ||
154 | EXPORT_SYMBOL(net_ratelimit); | ||
155 | EXPORT_SYMBOL(net_srandom); | ||
diff --git a/net/core/wireless.c b/net/core/wireless.c new file mode 100644 index 000000000000..750cc5daeb03 --- /dev/null +++ b/net/core/wireless.c | |||
@@ -0,0 +1,1459 @@ | |||
1 | /* | ||
2 | * This file implement the Wireless Extensions APIs. | ||
3 | * | ||
4 | * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> | ||
5 | * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved. | ||
6 | * | ||
7 | * (As all part of the Linux kernel, this file is GPL) | ||
8 | */ | ||
9 | |||
10 | /************************** DOCUMENTATION **************************/ | ||
11 | /* | ||
12 | * API definition : | ||
13 | * -------------- | ||
14 | * See <linux/wireless.h> for details of the APIs and the rest. | ||
15 | * | ||
16 | * History : | ||
17 | * ------- | ||
18 | * | ||
19 | * v1 - 5.12.01 - Jean II | ||
20 | * o Created this file. | ||
21 | * | ||
22 | * v2 - 13.12.01 - Jean II | ||
23 | * o Move /proc/net/wireless stuff from net/core/dev.c to here | ||
24 | * o Make Wireless Extension IOCTLs go through here | ||
25 | * o Added iw_handler handling ;-) | ||
26 | * o Added standard ioctl description | ||
27 | * o Initial dumb commit strategy based on orinoco.c | ||
28 | * | ||
29 | * v3 - 19.12.01 - Jean II | ||
30 | * o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call | ||
31 | * o Add event dispatcher function | ||
32 | * o Add event description | ||
33 | * o Propagate events as rtnetlink IFLA_WIRELESS option | ||
34 | * o Generate event on selected SET requests | ||
35 | * | ||
36 | * v4 - 18.04.02 - Jean II | ||
37 | * o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1 | ||
38 | * | ||
39 | * v5 - 21.06.02 - Jean II | ||
40 | * o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup) | ||
41 | * o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes | ||
42 | * o Add IWEVCUSTOM for driver specific event/scanning token | ||
43 | * o Turn on WE_STRICT_WRITE by default + kernel warning | ||
44 | * o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num) | ||
45 | * o Fix off-by-one in test (extra_size <= IFNAMSIZ) | ||
46 | * | ||
47 | * v6 - 9.01.03 - Jean II | ||
48 | * o Add common spy support : iw_handler_set_spy(), wireless_spy_update() | ||
49 | * o Add enhanced spy support : iw_handler_set_thrspy() and event. | ||
50 | * o Add WIRELESS_EXT version display in /proc/net/wireless | ||
51 | * | ||
52 | * v6 - 18.06.04 - Jean II | ||
53 | * o Change get_spydata() method for added safety | ||
54 | * o Remove spy #ifdef, they are always on -> cleaner code | ||
55 | * o Allow any size GET request if user specifies length > max | ||
56 | * and if request has IW_DESCR_FLAG_NOMAX flag or is SIOCGIWPRIV | ||
57 | * o Start migrating get_wireless_stats to struct iw_handler_def | ||
58 | * o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus | ||
59 | * Based on patch from Pavel Roskin <proski@gnu.org> : | ||
60 | * o Fix kernel data leak to user space in private handler handling | ||
61 | */ | ||
62 | |||
63 | /***************************** INCLUDES *****************************/ | ||
64 | |||
65 | #include <linux/config.h> /* Not needed ??? */ | ||
66 | #include <linux/module.h> | ||
67 | #include <linux/types.h> /* off_t */ | ||
68 | #include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ | ||
69 | #include <linux/proc_fs.h> | ||
70 | #include <linux/rtnetlink.h> /* rtnetlink stuff */ | ||
71 | #include <linux/seq_file.h> | ||
72 | #include <linux/init.h> /* for __init */ | ||
73 | #include <linux/if_arp.h> /* ARPHRD_ETHER */ | ||
74 | |||
75 | #include <linux/wireless.h> /* Pretty obvious */ | ||
76 | #include <net/iw_handler.h> /* New driver API */ | ||
77 | |||
78 | #include <asm/uaccess.h> /* copy_to_user() */ | ||
79 | |||
80 | /**************************** CONSTANTS ****************************/ | ||
81 | |||
82 | /* Debugging stuff */ | ||
83 | #undef WE_IOCTL_DEBUG /* Debug IOCTL API */ | ||
84 | #undef WE_EVENT_DEBUG /* Debug Event dispatcher */ | ||
85 | #undef WE_SPY_DEBUG /* Debug enhanced spy support */ | ||
86 | |||
87 | /* Options */ | ||
88 | #define WE_EVENT_NETLINK /* Propagate events using rtnetlink */ | ||
89 | #define WE_SET_EVENT /* Generate an event on some set commands */ | ||
90 | |||
91 | /************************* GLOBAL VARIABLES *************************/ | ||
92 | /* | ||
93 | * You should not use global variables, because of re-entrancy. | ||
94 | * On our case, it's only const, so it's OK... | ||
95 | */ | ||
96 | /* | ||
97 | * Meta-data about all the standard Wireless Extension request we | ||
98 | * know about. | ||
99 | */ | ||
100 | static const struct iw_ioctl_description standard_ioctl[] = { | ||
101 | [SIOCSIWCOMMIT - SIOCIWFIRST] = { | ||
102 | .header_type = IW_HEADER_TYPE_NULL, | ||
103 | }, | ||
104 | [SIOCGIWNAME - SIOCIWFIRST] = { | ||
105 | .header_type = IW_HEADER_TYPE_CHAR, | ||
106 | .flags = IW_DESCR_FLAG_DUMP, | ||
107 | }, | ||
108 | [SIOCSIWNWID - SIOCIWFIRST] = { | ||
109 | .header_type = IW_HEADER_TYPE_PARAM, | ||
110 | .flags = IW_DESCR_FLAG_EVENT, | ||
111 | }, | ||
112 | [SIOCGIWNWID - SIOCIWFIRST] = { | ||
113 | .header_type = IW_HEADER_TYPE_PARAM, | ||
114 | .flags = IW_DESCR_FLAG_DUMP, | ||
115 | }, | ||
116 | [SIOCSIWFREQ - SIOCIWFIRST] = { | ||
117 | .header_type = IW_HEADER_TYPE_FREQ, | ||
118 | .flags = IW_DESCR_FLAG_EVENT, | ||
119 | }, | ||
120 | [SIOCGIWFREQ - SIOCIWFIRST] = { | ||
121 | .header_type = IW_HEADER_TYPE_FREQ, | ||
122 | .flags = IW_DESCR_FLAG_DUMP, | ||
123 | }, | ||
124 | [SIOCSIWMODE - SIOCIWFIRST] = { | ||
125 | .header_type = IW_HEADER_TYPE_UINT, | ||
126 | .flags = IW_DESCR_FLAG_EVENT, | ||
127 | }, | ||
128 | [SIOCGIWMODE - SIOCIWFIRST] = { | ||
129 | .header_type = IW_HEADER_TYPE_UINT, | ||
130 | .flags = IW_DESCR_FLAG_DUMP, | ||
131 | }, | ||
132 | [SIOCSIWSENS - SIOCIWFIRST] = { | ||
133 | .header_type = IW_HEADER_TYPE_PARAM, | ||
134 | }, | ||
135 | [SIOCGIWSENS - SIOCIWFIRST] = { | ||
136 | .header_type = IW_HEADER_TYPE_PARAM, | ||
137 | }, | ||
138 | [SIOCSIWRANGE - SIOCIWFIRST] = { | ||
139 | .header_type = IW_HEADER_TYPE_NULL, | ||
140 | }, | ||
141 | [SIOCGIWRANGE - SIOCIWFIRST] = { | ||
142 | .header_type = IW_HEADER_TYPE_POINT, | ||
143 | .token_size = 1, | ||
144 | .max_tokens = sizeof(struct iw_range), | ||
145 | .flags = IW_DESCR_FLAG_DUMP, | ||
146 | }, | ||
147 | [SIOCSIWPRIV - SIOCIWFIRST] = { | ||
148 | .header_type = IW_HEADER_TYPE_NULL, | ||
149 | }, | ||
150 | [SIOCGIWPRIV - SIOCIWFIRST] = { /* (handled directly by us) */ | ||
151 | .header_type = IW_HEADER_TYPE_NULL, | ||
152 | }, | ||
153 | [SIOCSIWSTATS - SIOCIWFIRST] = { | ||
154 | .header_type = IW_HEADER_TYPE_NULL, | ||
155 | }, | ||
156 | [SIOCGIWSTATS - SIOCIWFIRST] = { /* (handled directly by us) */ | ||
157 | .header_type = IW_HEADER_TYPE_NULL, | ||
158 | .flags = IW_DESCR_FLAG_DUMP, | ||
159 | }, | ||
160 | [SIOCSIWSPY - SIOCIWFIRST] = { | ||
161 | .header_type = IW_HEADER_TYPE_POINT, | ||
162 | .token_size = sizeof(struct sockaddr), | ||
163 | .max_tokens = IW_MAX_SPY, | ||
164 | }, | ||
165 | [SIOCGIWSPY - SIOCIWFIRST] = { | ||
166 | .header_type = IW_HEADER_TYPE_POINT, | ||
167 | .token_size = sizeof(struct sockaddr) + | ||
168 | sizeof(struct iw_quality), | ||
169 | .max_tokens = IW_MAX_SPY, | ||
170 | }, | ||
171 | [SIOCSIWTHRSPY - SIOCIWFIRST] = { | ||
172 | .header_type = IW_HEADER_TYPE_POINT, | ||
173 | .token_size = sizeof(struct iw_thrspy), | ||
174 | .min_tokens = 1, | ||
175 | .max_tokens = 1, | ||
176 | }, | ||
177 | [SIOCGIWTHRSPY - SIOCIWFIRST] = { | ||
178 | .header_type = IW_HEADER_TYPE_POINT, | ||
179 | .token_size = sizeof(struct iw_thrspy), | ||
180 | .min_tokens = 1, | ||
181 | .max_tokens = 1, | ||
182 | }, | ||
183 | [SIOCSIWAP - SIOCIWFIRST] = { | ||
184 | .header_type = IW_HEADER_TYPE_ADDR, | ||
185 | }, | ||
186 | [SIOCGIWAP - SIOCIWFIRST] = { | ||
187 | .header_type = IW_HEADER_TYPE_ADDR, | ||
188 | .flags = IW_DESCR_FLAG_DUMP, | ||
189 | }, | ||
190 | [SIOCGIWAPLIST - SIOCIWFIRST] = { | ||
191 | .header_type = IW_HEADER_TYPE_POINT, | ||
192 | .token_size = sizeof(struct sockaddr) + | ||
193 | sizeof(struct iw_quality), | ||
194 | .max_tokens = IW_MAX_AP, | ||
195 | .flags = IW_DESCR_FLAG_NOMAX, | ||
196 | }, | ||
197 | [SIOCSIWSCAN - SIOCIWFIRST] = { | ||
198 | .header_type = IW_HEADER_TYPE_PARAM, | ||
199 | }, | ||
200 | [SIOCGIWSCAN - SIOCIWFIRST] = { | ||
201 | .header_type = IW_HEADER_TYPE_POINT, | ||
202 | .token_size = 1, | ||
203 | .max_tokens = IW_SCAN_MAX_DATA, | ||
204 | .flags = IW_DESCR_FLAG_NOMAX, | ||
205 | }, | ||
206 | [SIOCSIWESSID - SIOCIWFIRST] = { | ||
207 | .header_type = IW_HEADER_TYPE_POINT, | ||
208 | .token_size = 1, | ||
209 | .max_tokens = IW_ESSID_MAX_SIZE + 1, | ||
210 | .flags = IW_DESCR_FLAG_EVENT, | ||
211 | }, | ||
212 | [SIOCGIWESSID - SIOCIWFIRST] = { | ||
213 | .header_type = IW_HEADER_TYPE_POINT, | ||
214 | .token_size = 1, | ||
215 | .max_tokens = IW_ESSID_MAX_SIZE + 1, | ||
216 | .flags = IW_DESCR_FLAG_DUMP, | ||
217 | }, | ||
218 | [SIOCSIWNICKN - SIOCIWFIRST] = { | ||
219 | .header_type = IW_HEADER_TYPE_POINT, | ||
220 | .token_size = 1, | ||
221 | .max_tokens = IW_ESSID_MAX_SIZE + 1, | ||
222 | }, | ||
223 | [SIOCGIWNICKN - SIOCIWFIRST] = { | ||
224 | .header_type = IW_HEADER_TYPE_POINT, | ||
225 | .token_size = 1, | ||
226 | .max_tokens = IW_ESSID_MAX_SIZE + 1, | ||
227 | }, | ||
228 | [SIOCSIWRATE - SIOCIWFIRST] = { | ||
229 | .header_type = IW_HEADER_TYPE_PARAM, | ||
230 | }, | ||
231 | [SIOCGIWRATE - SIOCIWFIRST] = { | ||
232 | .header_type = IW_HEADER_TYPE_PARAM, | ||
233 | }, | ||
234 | [SIOCSIWRTS - SIOCIWFIRST] = { | ||
235 | .header_type = IW_HEADER_TYPE_PARAM, | ||
236 | }, | ||
237 | [SIOCGIWRTS - SIOCIWFIRST] = { | ||
238 | .header_type = IW_HEADER_TYPE_PARAM, | ||
239 | }, | ||
240 | [SIOCSIWFRAG - SIOCIWFIRST] = { | ||
241 | .header_type = IW_HEADER_TYPE_PARAM, | ||
242 | }, | ||
243 | [SIOCGIWFRAG - SIOCIWFIRST] = { | ||
244 | .header_type = IW_HEADER_TYPE_PARAM, | ||
245 | }, | ||
246 | [SIOCSIWTXPOW - SIOCIWFIRST] = { | ||
247 | .header_type = IW_HEADER_TYPE_PARAM, | ||
248 | }, | ||
249 | [SIOCGIWTXPOW - SIOCIWFIRST] = { | ||
250 | .header_type = IW_HEADER_TYPE_PARAM, | ||
251 | }, | ||
252 | [SIOCSIWRETRY - SIOCIWFIRST] = { | ||
253 | .header_type = IW_HEADER_TYPE_PARAM, | ||
254 | }, | ||
255 | [SIOCGIWRETRY - SIOCIWFIRST] = { | ||
256 | .header_type = IW_HEADER_TYPE_PARAM, | ||
257 | }, | ||
258 | [SIOCSIWENCODE - SIOCIWFIRST] = { | ||
259 | .header_type = IW_HEADER_TYPE_POINT, | ||
260 | .token_size = 1, | ||
261 | .max_tokens = IW_ENCODING_TOKEN_MAX, | ||
262 | .flags = IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT, | ||
263 | }, | ||
264 | [SIOCGIWENCODE - SIOCIWFIRST] = { | ||
265 | .header_type = IW_HEADER_TYPE_POINT, | ||
266 | .token_size = 1, | ||
267 | .max_tokens = IW_ENCODING_TOKEN_MAX, | ||
268 | .flags = IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT, | ||
269 | }, | ||
270 | [SIOCSIWPOWER - SIOCIWFIRST] = { | ||
271 | .header_type = IW_HEADER_TYPE_PARAM, | ||
272 | }, | ||
273 | [SIOCGIWPOWER - SIOCIWFIRST] = { | ||
274 | .header_type = IW_HEADER_TYPE_PARAM, | ||
275 | }, | ||
276 | }; | ||
277 | static const int standard_ioctl_num = (sizeof(standard_ioctl) / | ||
278 | sizeof(struct iw_ioctl_description)); | ||
279 | |||
280 | /* | ||
281 | * Meta-data about all the additional standard Wireless Extension events | ||
282 | * we know about. | ||
283 | */ | ||
284 | static const struct iw_ioctl_description standard_event[] = { | ||
285 | [IWEVTXDROP - IWEVFIRST] = { | ||
286 | .header_type = IW_HEADER_TYPE_ADDR, | ||
287 | }, | ||
288 | [IWEVQUAL - IWEVFIRST] = { | ||
289 | .header_type = IW_HEADER_TYPE_QUAL, | ||
290 | }, | ||
291 | [IWEVCUSTOM - IWEVFIRST] = { | ||
292 | .header_type = IW_HEADER_TYPE_POINT, | ||
293 | .token_size = 1, | ||
294 | .max_tokens = IW_CUSTOM_MAX, | ||
295 | }, | ||
296 | [IWEVREGISTERED - IWEVFIRST] = { | ||
297 | .header_type = IW_HEADER_TYPE_ADDR, | ||
298 | }, | ||
299 | [IWEVEXPIRED - IWEVFIRST] = { | ||
300 | .header_type = IW_HEADER_TYPE_ADDR, | ||
301 | }, | ||
302 | }; | ||
303 | static const int standard_event_num = (sizeof(standard_event) / | ||
304 | sizeof(struct iw_ioctl_description)); | ||
305 | |||
306 | /* Size (in bytes) of the various private data types */ | ||
307 | static const char iw_priv_type_size[] = { | ||
308 | 0, /* IW_PRIV_TYPE_NONE */ | ||
309 | 1, /* IW_PRIV_TYPE_BYTE */ | ||
310 | 1, /* IW_PRIV_TYPE_CHAR */ | ||
311 | 0, /* Not defined */ | ||
312 | sizeof(__u32), /* IW_PRIV_TYPE_INT */ | ||
313 | sizeof(struct iw_freq), /* IW_PRIV_TYPE_FLOAT */ | ||
314 | sizeof(struct sockaddr), /* IW_PRIV_TYPE_ADDR */ | ||
315 | 0, /* Not defined */ | ||
316 | }; | ||
317 | |||
318 | /* Size (in bytes) of various events */ | ||
319 | static const int event_type_size[] = { | ||
320 | IW_EV_LCP_LEN, /* IW_HEADER_TYPE_NULL */ | ||
321 | 0, | ||
322 | IW_EV_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */ | ||
323 | 0, | ||
324 | IW_EV_UINT_LEN, /* IW_HEADER_TYPE_UINT */ | ||
325 | IW_EV_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */ | ||
326 | IW_EV_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */ | ||
327 | 0, | ||
328 | IW_EV_POINT_LEN, /* Without variable payload */ | ||
329 | IW_EV_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */ | ||
330 | IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ | ||
331 | }; | ||
332 | |||
333 | /************************ COMMON SUBROUTINES ************************/ | ||
334 | /* | ||
335 | * Stuff that may be used in various place or doesn't fit in one | ||
336 | * of the section below. | ||
337 | */ | ||
338 | |||
339 | /* ---------------------------------------------------------------- */ | ||
340 | /* | ||
341 | * Return the driver handler associated with a specific Wireless Extension. | ||
342 | * Called from various place, so make sure it remains efficient. | ||
343 | */ | ||
344 | static inline iw_handler get_handler(struct net_device *dev, | ||
345 | unsigned int cmd) | ||
346 | { | ||
347 | /* Don't "optimise" the following variable, it will crash */ | ||
348 | unsigned int index; /* *MUST* be unsigned */ | ||
349 | |||
350 | /* Check if we have some wireless handlers defined */ | ||
351 | if(dev->wireless_handlers == NULL) | ||
352 | return NULL; | ||
353 | |||
354 | /* Try as a standard command */ | ||
355 | index = cmd - SIOCIWFIRST; | ||
356 | if(index < dev->wireless_handlers->num_standard) | ||
357 | return dev->wireless_handlers->standard[index]; | ||
358 | |||
359 | /* Try as a private command */ | ||
360 | index = cmd - SIOCIWFIRSTPRIV; | ||
361 | if(index < dev->wireless_handlers->num_private) | ||
362 | return dev->wireless_handlers->private[index]; | ||
363 | |||
364 | /* Not found */ | ||
365 | return NULL; | ||
366 | } | ||
367 | |||
368 | /* ---------------------------------------------------------------- */ | ||
369 | /* | ||
370 | * Get statistics out of the driver | ||
371 | */ | ||
372 | static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) | ||
373 | { | ||
374 | /* New location */ | ||
375 | if((dev->wireless_handlers != NULL) && | ||
376 | (dev->wireless_handlers->get_wireless_stats != NULL)) | ||
377 | return dev->wireless_handlers->get_wireless_stats(dev); | ||
378 | |||
379 | /* Old location, will be phased out in next WE */ | ||
380 | return (dev->get_wireless_stats ? | ||
381 | dev->get_wireless_stats(dev) : | ||
382 | (struct iw_statistics *) NULL); | ||
383 | } | ||
384 | |||
385 | /* ---------------------------------------------------------------- */ | ||
386 | /* | ||
387 | * Call the commit handler in the driver | ||
388 | * (if exist and if conditions are right) | ||
389 | * | ||
390 | * Note : our current commit strategy is currently pretty dumb, | ||
391 | * but we will be able to improve on that... | ||
392 | * The goal is to try to agreagate as many changes as possible | ||
393 | * before doing the commit. Drivers that will define a commit handler | ||
394 | * are usually those that need a reset after changing parameters, so | ||
395 | * we want to minimise the number of reset. | ||
396 | * A cool idea is to use a timer : at each "set" command, we re-set the | ||
397 | * timer, when the timer eventually fires, we call the driver. | ||
398 | * Hopefully, more on that later. | ||
399 | * | ||
400 | * Also, I'm waiting to see how many people will complain about the | ||
401 | * netif_running(dev) test. I'm open on that one... | ||
402 | * Hopefully, the driver will remember to do a commit in "open()" ;-) | ||
403 | */ | ||
404 | static inline int call_commit_handler(struct net_device * dev) | ||
405 | { | ||
406 | if((netif_running(dev)) && | ||
407 | (dev->wireless_handlers->standard[0] != NULL)) { | ||
408 | /* Call the commit handler on the driver */ | ||
409 | return dev->wireless_handlers->standard[0](dev, NULL, | ||
410 | NULL, NULL); | ||
411 | } else | ||
412 | return 0; /* Command completed successfully */ | ||
413 | } | ||
414 | |||
415 | /* ---------------------------------------------------------------- */ | ||
416 | /* | ||
417 | * Calculate size of private arguments | ||
418 | */ | ||
419 | static inline int get_priv_size(__u16 args) | ||
420 | { | ||
421 | int num = args & IW_PRIV_SIZE_MASK; | ||
422 | int type = (args & IW_PRIV_TYPE_MASK) >> 12; | ||
423 | |||
424 | return num * iw_priv_type_size[type]; | ||
425 | } | ||
426 | |||
427 | /* ---------------------------------------------------------------- */ | ||
428 | /* | ||
429 | * Re-calculate the size of private arguments | ||
430 | */ | ||
431 | static inline int adjust_priv_size(__u16 args, | ||
432 | union iwreq_data * wrqu) | ||
433 | { | ||
434 | int num = wrqu->data.length; | ||
435 | int max = args & IW_PRIV_SIZE_MASK; | ||
436 | int type = (args & IW_PRIV_TYPE_MASK) >> 12; | ||
437 | |||
438 | /* Make sure the driver doesn't goof up */ | ||
439 | if (max < num) | ||
440 | num = max; | ||
441 | |||
442 | return num * iw_priv_type_size[type]; | ||
443 | } | ||
444 | |||
445 | |||
446 | /******************** /proc/net/wireless SUPPORT ********************/ | ||
447 | /* | ||
448 | * The /proc/net/wireless file is a human readable user-space interface | ||
449 | * exporting various wireless specific statistics from the wireless devices. | ||
450 | * This is the most popular part of the Wireless Extensions ;-) | ||
451 | * | ||
452 | * This interface is a pure clone of /proc/net/dev (in net/core/dev.c). | ||
453 | * The content of the file is basically the content of "struct iw_statistics". | ||
454 | */ | ||
455 | |||
456 | #ifdef CONFIG_PROC_FS | ||
457 | |||
458 | /* ---------------------------------------------------------------- */ | ||
459 | /* | ||
460 | * Print one entry (line) of /proc/net/wireless | ||
461 | */ | ||
462 | static __inline__ void wireless_seq_printf_stats(struct seq_file *seq, | ||
463 | struct net_device *dev) | ||
464 | { | ||
465 | /* Get stats from the driver */ | ||
466 | struct iw_statistics *stats = get_wireless_stats(dev); | ||
467 | |||
468 | if (stats) { | ||
469 | seq_printf(seq, "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d " | ||
470 | "%6d %6d %6d\n", | ||
471 | dev->name, stats->status, stats->qual.qual, | ||
472 | stats->qual.updated & IW_QUAL_QUAL_UPDATED | ||
473 | ? '.' : ' ', | ||
474 | ((__u8) stats->qual.level), | ||
475 | stats->qual.updated & IW_QUAL_LEVEL_UPDATED | ||
476 | ? '.' : ' ', | ||
477 | ((__u8) stats->qual.noise), | ||
478 | stats->qual.updated & IW_QUAL_NOISE_UPDATED | ||
479 | ? '.' : ' ', | ||
480 | stats->discard.nwid, stats->discard.code, | ||
481 | stats->discard.fragment, stats->discard.retries, | ||
482 | stats->discard.misc, stats->miss.beacon); | ||
483 | stats->qual.updated = 0; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | /* ---------------------------------------------------------------- */ | ||
488 | /* | ||
489 | * Print info for /proc/net/wireless (print all entries) | ||
490 | */ | ||
491 | static int wireless_seq_show(struct seq_file *seq, void *v) | ||
492 | { | ||
493 | if (v == SEQ_START_TOKEN) | ||
494 | seq_printf(seq, "Inter-| sta-| Quality | Discarded " | ||
495 | "packets | Missed | WE\n" | ||
496 | " face | tus | link level noise | nwid " | ||
497 | "crypt frag retry misc | beacon | %d\n", | ||
498 | WIRELESS_EXT); | ||
499 | else | ||
500 | wireless_seq_printf_stats(seq, v); | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); | ||
505 | extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); | ||
506 | extern void dev_seq_stop(struct seq_file *seq, void *v); | ||
507 | |||
508 | static struct seq_operations wireless_seq_ops = { | ||
509 | .start = dev_seq_start, | ||
510 | .next = dev_seq_next, | ||
511 | .stop = dev_seq_stop, | ||
512 | .show = wireless_seq_show, | ||
513 | }; | ||
514 | |||
515 | static int wireless_seq_open(struct inode *inode, struct file *file) | ||
516 | { | ||
517 | return seq_open(file, &wireless_seq_ops); | ||
518 | } | ||
519 | |||
520 | static struct file_operations wireless_seq_fops = { | ||
521 | .owner = THIS_MODULE, | ||
522 | .open = wireless_seq_open, | ||
523 | .read = seq_read, | ||
524 | .llseek = seq_lseek, | ||
525 | .release = seq_release, | ||
526 | }; | ||
527 | |||
528 | int __init wireless_proc_init(void) | ||
529 | { | ||
530 | if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) | ||
531 | return -ENOMEM; | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | #endif /* CONFIG_PROC_FS */ | ||
536 | |||
537 | /************************** IOCTL SUPPORT **************************/ | ||
538 | /* | ||
539 | * The original user space API to configure all those Wireless Extensions | ||
540 | * is through IOCTLs. | ||
541 | * In there, we check if we need to call the new driver API (iw_handler) | ||
542 | * or just call the driver ioctl handler. | ||
543 | */ | ||
544 | |||
545 | /* ---------------------------------------------------------------- */ | ||
546 | /* | ||
547 | * Allow programatic access to /proc/net/wireless even if /proc | ||
548 | * doesn't exist... Also more efficient... | ||
549 | */ | ||
550 | static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr) | ||
551 | { | ||
552 | /* Get stats from the driver */ | ||
553 | struct iw_statistics *stats; | ||
554 | |||
555 | stats = get_wireless_stats(dev); | ||
556 | if (stats != (struct iw_statistics *) NULL) { | ||
557 | struct iwreq * wrq = (struct iwreq *)ifr; | ||
558 | |||
559 | /* Copy statistics to the user buffer */ | ||
560 | if(copy_to_user(wrq->u.data.pointer, stats, | ||
561 | sizeof(struct iw_statistics))) | ||
562 | return -EFAULT; | ||
563 | |||
564 | /* Check if we need to clear the update flag */ | ||
565 | if(wrq->u.data.flags != 0) | ||
566 | stats->qual.updated = 0; | ||
567 | return 0; | ||
568 | } else | ||
569 | return -EOPNOTSUPP; | ||
570 | } | ||
571 | |||
572 | /* ---------------------------------------------------------------- */ | ||
573 | /* | ||
574 | * Export the driver private handler definition | ||
575 | * They will be picked up by tools like iwpriv... | ||
576 | */ | ||
577 | static inline int ioctl_export_private(struct net_device * dev, | ||
578 | struct ifreq * ifr) | ||
579 | { | ||
580 | struct iwreq * iwr = (struct iwreq *) ifr; | ||
581 | |||
582 | /* Check if the driver has something to export */ | ||
583 | if((dev->wireless_handlers->num_private_args == 0) || | ||
584 | (dev->wireless_handlers->private_args == NULL)) | ||
585 | return -EOPNOTSUPP; | ||
586 | |||
587 | /* Check NULL pointer */ | ||
588 | if(iwr->u.data.pointer == NULL) | ||
589 | return -EFAULT; | ||
590 | |||
591 | /* Check if there is enough buffer up there */ | ||
592 | if(iwr->u.data.length < dev->wireless_handlers->num_private_args) { | ||
593 | /* User space can't know in advance how large the buffer | ||
594 | * needs to be. Give it a hint, so that we can support | ||
595 | * any size buffer we want somewhat efficiently... */ | ||
596 | iwr->u.data.length = dev->wireless_handlers->num_private_args; | ||
597 | return -E2BIG; | ||
598 | } | ||
599 | |||
600 | /* Set the number of available ioctls. */ | ||
601 | iwr->u.data.length = dev->wireless_handlers->num_private_args; | ||
602 | |||
603 | /* Copy structure to the user buffer. */ | ||
604 | if (copy_to_user(iwr->u.data.pointer, | ||
605 | dev->wireless_handlers->private_args, | ||
606 | sizeof(struct iw_priv_args) * iwr->u.data.length)) | ||
607 | return -EFAULT; | ||
608 | |||
609 | return 0; | ||
610 | } | ||
611 | |||
612 | /* ---------------------------------------------------------------- */ | ||
613 | /* | ||
614 | * Wrapper to call a standard Wireless Extension handler. | ||
615 | * We do various checks and also take care of moving data between | ||
616 | * user space and kernel space. | ||
617 | */ | ||
618 | static inline int ioctl_standard_call(struct net_device * dev, | ||
619 | struct ifreq * ifr, | ||
620 | unsigned int cmd, | ||
621 | iw_handler handler) | ||
622 | { | ||
623 | struct iwreq * iwr = (struct iwreq *) ifr; | ||
624 | const struct iw_ioctl_description * descr; | ||
625 | struct iw_request_info info; | ||
626 | int ret = -EINVAL; | ||
627 | |||
628 | /* Get the description of the IOCTL */ | ||
629 | if((cmd - SIOCIWFIRST) >= standard_ioctl_num) | ||
630 | return -EOPNOTSUPP; | ||
631 | descr = &(standard_ioctl[cmd - SIOCIWFIRST]); | ||
632 | |||
633 | #ifdef WE_IOCTL_DEBUG | ||
634 | printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n", | ||
635 | ifr->ifr_name, cmd); | ||
636 | printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens); | ||
637 | #endif /* WE_IOCTL_DEBUG */ | ||
638 | |||
639 | /* Prepare the call */ | ||
640 | info.cmd = cmd; | ||
641 | info.flags = 0; | ||
642 | |||
643 | /* Check if we have a pointer to user space data or not */ | ||
644 | if(descr->header_type != IW_HEADER_TYPE_POINT) { | ||
645 | |||
646 | /* No extra arguments. Trivial to handle */ | ||
647 | ret = handler(dev, &info, &(iwr->u), NULL); | ||
648 | |||
649 | #ifdef WE_SET_EVENT | ||
650 | /* Generate an event to notify listeners of the change */ | ||
651 | if((descr->flags & IW_DESCR_FLAG_EVENT) && | ||
652 | ((ret == 0) || (ret == -EIWCOMMIT))) | ||
653 | wireless_send_event(dev, cmd, &(iwr->u), NULL); | ||
654 | #endif /* WE_SET_EVENT */ | ||
655 | } else { | ||
656 | char * extra; | ||
657 | int extra_size; | ||
658 | int user_length = 0; | ||
659 | int err; | ||
660 | |||
661 | /* Calculate space needed by arguments. Always allocate | ||
662 | * for max space. Easier, and won't last long... */ | ||
663 | extra_size = descr->max_tokens * descr->token_size; | ||
664 | |||
665 | /* Check what user space is giving us */ | ||
666 | if(IW_IS_SET(cmd)) { | ||
667 | /* Check NULL pointer */ | ||
668 | if((iwr->u.data.pointer == NULL) && | ||
669 | (iwr->u.data.length != 0)) | ||
670 | return -EFAULT; | ||
671 | /* Check if number of token fits within bounds */ | ||
672 | if(iwr->u.data.length > descr->max_tokens) | ||
673 | return -E2BIG; | ||
674 | if(iwr->u.data.length < descr->min_tokens) | ||
675 | return -EINVAL; | ||
676 | } else { | ||
677 | /* Check NULL pointer */ | ||
678 | if(iwr->u.data.pointer == NULL) | ||
679 | return -EFAULT; | ||
680 | /* Save user space buffer size for checking */ | ||
681 | user_length = iwr->u.data.length; | ||
682 | |||
683 | /* Don't check if user_length > max to allow forward | ||
684 | * compatibility. The test user_length < min is | ||
685 | * implied by the test at the end. */ | ||
686 | |||
687 | /* Support for very large requests */ | ||
688 | if((descr->flags & IW_DESCR_FLAG_NOMAX) && | ||
689 | (user_length > descr->max_tokens)) { | ||
690 | /* Allow userspace to GET more than max so | ||
691 | * we can support any size GET requests. | ||
692 | * There is still a limit : -ENOMEM. */ | ||
693 | extra_size = user_length * descr->token_size; | ||
694 | /* Note : user_length is originally a __u16, | ||
695 | * and token_size is controlled by us, | ||
696 | * so extra_size won't get negative and | ||
697 | * won't overflow... */ | ||
698 | } | ||
699 | } | ||
700 | |||
701 | #ifdef WE_IOCTL_DEBUG | ||
702 | printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n", | ||
703 | dev->name, extra_size); | ||
704 | #endif /* WE_IOCTL_DEBUG */ | ||
705 | |||
706 | /* Create the kernel buffer */ | ||
707 | extra = kmalloc(extra_size, GFP_KERNEL); | ||
708 | if (extra == NULL) { | ||
709 | return -ENOMEM; | ||
710 | } | ||
711 | |||
712 | /* If it is a SET, get all the extra data in here */ | ||
713 | if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { | ||
714 | err = copy_from_user(extra, iwr->u.data.pointer, | ||
715 | iwr->u.data.length * | ||
716 | descr->token_size); | ||
717 | if (err) { | ||
718 | kfree(extra); | ||
719 | return -EFAULT; | ||
720 | } | ||
721 | #ifdef WE_IOCTL_DEBUG | ||
722 | printk(KERN_DEBUG "%s (WE) : Got %d bytes\n", | ||
723 | dev->name, | ||
724 | iwr->u.data.length * descr->token_size); | ||
725 | #endif /* WE_IOCTL_DEBUG */ | ||
726 | } | ||
727 | |||
728 | /* Call the handler */ | ||
729 | ret = handler(dev, &info, &(iwr->u), extra); | ||
730 | |||
731 | /* If we have something to return to the user */ | ||
732 | if (!ret && IW_IS_GET(cmd)) { | ||
733 | /* Check if there is enough buffer up there */ | ||
734 | if(user_length < iwr->u.data.length) { | ||
735 | kfree(extra); | ||
736 | return -E2BIG; | ||
737 | } | ||
738 | |||
739 | err = copy_to_user(iwr->u.data.pointer, extra, | ||
740 | iwr->u.data.length * | ||
741 | descr->token_size); | ||
742 | if (err) | ||
743 | ret = -EFAULT; | ||
744 | #ifdef WE_IOCTL_DEBUG | ||
745 | printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n", | ||
746 | dev->name, | ||
747 | iwr->u.data.length * descr->token_size); | ||
748 | #endif /* WE_IOCTL_DEBUG */ | ||
749 | } | ||
750 | |||
751 | #ifdef WE_SET_EVENT | ||
752 | /* Generate an event to notify listeners of the change */ | ||
753 | if((descr->flags & IW_DESCR_FLAG_EVENT) && | ||
754 | ((ret == 0) || (ret == -EIWCOMMIT))) { | ||
755 | if(descr->flags & IW_DESCR_FLAG_RESTRICT) | ||
756 | /* If the event is restricted, don't | ||
757 | * export the payload */ | ||
758 | wireless_send_event(dev, cmd, &(iwr->u), NULL); | ||
759 | else | ||
760 | wireless_send_event(dev, cmd, &(iwr->u), | ||
761 | extra); | ||
762 | } | ||
763 | #endif /* WE_SET_EVENT */ | ||
764 | |||
765 | /* Cleanup - I told you it wasn't that long ;-) */ | ||
766 | kfree(extra); | ||
767 | } | ||
768 | |||
769 | /* Call commit handler if needed and defined */ | ||
770 | if(ret == -EIWCOMMIT) | ||
771 | ret = call_commit_handler(dev); | ||
772 | |||
773 | /* Here, we will generate the appropriate event if needed */ | ||
774 | |||
775 | return ret; | ||
776 | } | ||
777 | |||
778 | /* ---------------------------------------------------------------- */ | ||
779 | /* | ||
780 | * Wrapper to call a private Wireless Extension handler. | ||
781 | * We do various checks and also take care of moving data between | ||
782 | * user space and kernel space. | ||
783 | * It's not as nice and slimline as the standard wrapper. The cause | ||
784 | * is struct iw_priv_args, which was not really designed for the | ||
785 | * job we are going here. | ||
786 | * | ||
787 | * IMPORTANT : This function prevent to set and get data on the same | ||
788 | * IOCTL and enforce the SET/GET convention. Not doing it would be | ||
789 | * far too hairy... | ||
790 | * If you need to set and get data at the same time, please don't use | ||
791 | * a iw_handler but process it in your ioctl handler (i.e. use the | ||
792 | * old driver API). | ||
793 | */ | ||
794 | static inline int ioctl_private_call(struct net_device * dev, | ||
795 | struct ifreq * ifr, | ||
796 | unsigned int cmd, | ||
797 | iw_handler handler) | ||
798 | { | ||
799 | struct iwreq * iwr = (struct iwreq *) ifr; | ||
800 | const struct iw_priv_args * descr = NULL; | ||
801 | struct iw_request_info info; | ||
802 | int extra_size = 0; | ||
803 | int i; | ||
804 | int ret = -EINVAL; | ||
805 | |||
806 | /* Get the description of the IOCTL */ | ||
807 | for(i = 0; i < dev->wireless_handlers->num_private_args; i++) | ||
808 | if(cmd == dev->wireless_handlers->private_args[i].cmd) { | ||
809 | descr = &(dev->wireless_handlers->private_args[i]); | ||
810 | break; | ||
811 | } | ||
812 | |||
813 | #ifdef WE_IOCTL_DEBUG | ||
814 | printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n", | ||
815 | ifr->ifr_name, cmd); | ||
816 | if(descr) { | ||
817 | printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n", | ||
818 | dev->name, descr->name, | ||
819 | descr->set_args, descr->get_args); | ||
820 | } | ||
821 | #endif /* WE_IOCTL_DEBUG */ | ||
822 | |||
823 | /* Compute the size of the set/get arguments */ | ||
824 | if(descr != NULL) { | ||
825 | if(IW_IS_SET(cmd)) { | ||
826 | int offset = 0; /* For sub-ioctls */ | ||
827 | /* Check for sub-ioctl handler */ | ||
828 | if(descr->name[0] == '\0') | ||
829 | /* Reserve one int for sub-ioctl index */ | ||
830 | offset = sizeof(__u32); | ||
831 | |||
832 | /* Size of set arguments */ | ||
833 | extra_size = get_priv_size(descr->set_args); | ||
834 | |||
835 | /* Does it fits in iwr ? */ | ||
836 | if((descr->set_args & IW_PRIV_SIZE_FIXED) && | ||
837 | ((extra_size + offset) <= IFNAMSIZ)) | ||
838 | extra_size = 0; | ||
839 | } else { | ||
840 | /* Size of get arguments */ | ||
841 | extra_size = get_priv_size(descr->get_args); | ||
842 | |||
843 | /* Does it fits in iwr ? */ | ||
844 | if((descr->get_args & IW_PRIV_SIZE_FIXED) && | ||
845 | (extra_size <= IFNAMSIZ)) | ||
846 | extra_size = 0; | ||
847 | } | ||
848 | } | ||
849 | |||
850 | /* Prepare the call */ | ||
851 | info.cmd = cmd; | ||
852 | info.flags = 0; | ||
853 | |||
854 | /* Check if we have a pointer to user space data or not. */ | ||
855 | if(extra_size == 0) { | ||
856 | /* No extra arguments. Trivial to handle */ | ||
857 | ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u)); | ||
858 | } else { | ||
859 | char * extra; | ||
860 | int err; | ||
861 | |||
862 | /* Check what user space is giving us */ | ||
863 | if(IW_IS_SET(cmd)) { | ||
864 | /* Check NULL pointer */ | ||
865 | if((iwr->u.data.pointer == NULL) && | ||
866 | (iwr->u.data.length != 0)) | ||
867 | return -EFAULT; | ||
868 | |||
869 | /* Does it fits within bounds ? */ | ||
870 | if(iwr->u.data.length > (descr->set_args & | ||
871 | IW_PRIV_SIZE_MASK)) | ||
872 | return -E2BIG; | ||
873 | } else { | ||
874 | /* Check NULL pointer */ | ||
875 | if(iwr->u.data.pointer == NULL) | ||
876 | return -EFAULT; | ||
877 | } | ||
878 | |||
879 | #ifdef WE_IOCTL_DEBUG | ||
880 | printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n", | ||
881 | dev->name, extra_size); | ||
882 | #endif /* WE_IOCTL_DEBUG */ | ||
883 | |||
884 | /* Always allocate for max space. Easier, and won't last | ||
885 | * long... */ | ||
886 | extra = kmalloc(extra_size, GFP_KERNEL); | ||
887 | if (extra == NULL) { | ||
888 | return -ENOMEM; | ||
889 | } | ||
890 | |||
891 | /* If it is a SET, get all the extra data in here */ | ||
892 | if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { | ||
893 | err = copy_from_user(extra, iwr->u.data.pointer, | ||
894 | extra_size); | ||
895 | if (err) { | ||
896 | kfree(extra); | ||
897 | return -EFAULT; | ||
898 | } | ||
899 | #ifdef WE_IOCTL_DEBUG | ||
900 | printk(KERN_DEBUG "%s (WE) : Got %d elem\n", | ||
901 | dev->name, iwr->u.data.length); | ||
902 | #endif /* WE_IOCTL_DEBUG */ | ||
903 | } | ||
904 | |||
905 | /* Call the handler */ | ||
906 | ret = handler(dev, &info, &(iwr->u), extra); | ||
907 | |||
908 | /* If we have something to return to the user */ | ||
909 | if (!ret && IW_IS_GET(cmd)) { | ||
910 | |||
911 | /* Adjust for the actual length if it's variable, | ||
912 | * avoid leaking kernel bits outside. */ | ||
913 | if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) { | ||
914 | extra_size = adjust_priv_size(descr->get_args, | ||
915 | &(iwr->u)); | ||
916 | } | ||
917 | |||
918 | err = copy_to_user(iwr->u.data.pointer, extra, | ||
919 | extra_size); | ||
920 | if (err) | ||
921 | ret = -EFAULT; | ||
922 | #ifdef WE_IOCTL_DEBUG | ||
923 | printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n", | ||
924 | dev->name, iwr->u.data.length); | ||
925 | #endif /* WE_IOCTL_DEBUG */ | ||
926 | } | ||
927 | |||
928 | /* Cleanup - I told you it wasn't that long ;-) */ | ||
929 | kfree(extra); | ||
930 | } | ||
931 | |||
932 | |||
933 | /* Call commit handler if needed and defined */ | ||
934 | if(ret == -EIWCOMMIT) | ||
935 | ret = call_commit_handler(dev); | ||
936 | |||
937 | return ret; | ||
938 | } | ||
939 | |||
940 | /* ---------------------------------------------------------------- */ | ||
941 | /* | ||
942 | * Main IOCTl dispatcher. Called from the main networking code | ||
943 | * (dev_ioctl() in net/core/dev.c). | ||
944 | * Check the type of IOCTL and call the appropriate wrapper... | ||
945 | */ | ||
946 | int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) | ||
947 | { | ||
948 | struct net_device *dev; | ||
949 | iw_handler handler; | ||
950 | |||
951 | /* Permissions are already checked in dev_ioctl() before calling us. | ||
952 | * The copy_to/from_user() of ifr is also dealt with in there */ | ||
953 | |||
954 | /* Make sure the device exist */ | ||
955 | if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) | ||
956 | return -ENODEV; | ||
957 | |||
958 | /* A bunch of special cases, then the generic case... | ||
959 | * Note that 'cmd' is already filtered in dev_ioctl() with | ||
960 | * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */ | ||
961 | switch(cmd) | ||
962 | { | ||
963 | case SIOCGIWSTATS: | ||
964 | /* Get Wireless Stats */ | ||
965 | return dev_iwstats(dev, ifr); | ||
966 | |||
967 | case SIOCGIWPRIV: | ||
968 | /* Check if we have some wireless handlers defined */ | ||
969 | if(dev->wireless_handlers != NULL) { | ||
970 | /* We export to user space the definition of | ||
971 | * the private handler ourselves */ | ||
972 | return ioctl_export_private(dev, ifr); | ||
973 | } | ||
974 | // ## Fall-through for old API ## | ||
975 | default: | ||
976 | /* Generic IOCTL */ | ||
977 | /* Basic check */ | ||
978 | if (!netif_device_present(dev)) | ||
979 | return -ENODEV; | ||
980 | /* New driver API : try to find the handler */ | ||
981 | handler = get_handler(dev, cmd); | ||
982 | if(handler != NULL) { | ||
983 | /* Standard and private are not the same */ | ||
984 | if(cmd < SIOCIWFIRSTPRIV) | ||
985 | return ioctl_standard_call(dev, | ||
986 | ifr, | ||
987 | cmd, | ||
988 | handler); | ||
989 | else | ||
990 | return ioctl_private_call(dev, | ||
991 | ifr, | ||
992 | cmd, | ||
993 | handler); | ||
994 | } | ||
995 | /* Old driver API : call driver ioctl handler */ | ||
996 | if (dev->do_ioctl) { | ||
997 | return dev->do_ioctl(dev, ifr, cmd); | ||
998 | } | ||
999 | return -EOPNOTSUPP; | ||
1000 | } | ||
1001 | /* Not reached */ | ||
1002 | return -EINVAL; | ||
1003 | } | ||
1004 | |||
1005 | /************************* EVENT PROCESSING *************************/ | ||
1006 | /* | ||
1007 | * Process events generated by the wireless layer or the driver. | ||
1008 | * Most often, the event will be propagated through rtnetlink | ||
1009 | */ | ||
1010 | |||
1011 | #ifdef WE_EVENT_NETLINK | ||
1012 | /* "rtnl" is defined in net/core/rtnetlink.c, but we need it here. | ||
1013 | * It is declared in <linux/rtnetlink.h> */ | ||
1014 | |||
1015 | /* ---------------------------------------------------------------- */ | ||
1016 | /* | ||
1017 | * Fill a rtnetlink message with our event data. | ||
1018 | * Note that we propage only the specified event and don't dump the | ||
1019 | * current wireless config. Dumping the wireless config is far too | ||
1020 | * expensive (for each parameter, the driver need to query the hardware). | ||
1021 | */ | ||
1022 | static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb, | ||
1023 | struct net_device * dev, | ||
1024 | int type, | ||
1025 | char * event, | ||
1026 | int event_len) | ||
1027 | { | ||
1028 | struct ifinfomsg *r; | ||
1029 | struct nlmsghdr *nlh; | ||
1030 | unsigned char *b = skb->tail; | ||
1031 | |||
1032 | nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); | ||
1033 | r = NLMSG_DATA(nlh); | ||
1034 | r->ifi_family = AF_UNSPEC; | ||
1035 | r->ifi_type = dev->type; | ||
1036 | r->ifi_index = dev->ifindex; | ||
1037 | r->ifi_flags = dev->flags; | ||
1038 | r->ifi_change = 0; /* Wireless changes don't affect those flags */ | ||
1039 | |||
1040 | /* Add the wireless events in the netlink packet */ | ||
1041 | RTA_PUT(skb, IFLA_WIRELESS, | ||
1042 | event_len, event); | ||
1043 | |||
1044 | nlh->nlmsg_len = skb->tail - b; | ||
1045 | return skb->len; | ||
1046 | |||
1047 | nlmsg_failure: | ||
1048 | rtattr_failure: | ||
1049 | skb_trim(skb, b - skb->data); | ||
1050 | return -1; | ||
1051 | } | ||
1052 | |||
1053 | /* ---------------------------------------------------------------- */ | ||
1054 | /* | ||
1055 | * Create and broadcast and send it on the standard rtnetlink socket | ||
1056 | * This is a pure clone rtmsg_ifinfo() in net/core/rtnetlink.c | ||
1057 | * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field | ||
1058 | * within a RTM_NEWLINK event. | ||
1059 | */ | ||
1060 | static inline void rtmsg_iwinfo(struct net_device * dev, | ||
1061 | char * event, | ||
1062 | int event_len) | ||
1063 | { | ||
1064 | struct sk_buff *skb; | ||
1065 | int size = NLMSG_GOODSIZE; | ||
1066 | |||
1067 | skb = alloc_skb(size, GFP_ATOMIC); | ||
1068 | if (!skb) | ||
1069 | return; | ||
1070 | |||
1071 | if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, | ||
1072 | event, event_len) < 0) { | ||
1073 | kfree_skb(skb); | ||
1074 | return; | ||
1075 | } | ||
1076 | NETLINK_CB(skb).dst_groups = RTMGRP_LINK; | ||
1077 | netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC); | ||
1078 | } | ||
1079 | #endif /* WE_EVENT_NETLINK */ | ||
1080 | |||
1081 | /* ---------------------------------------------------------------- */ | ||
1082 | /* | ||
1083 | * Main event dispatcher. Called from other parts and drivers. | ||
1084 | * Send the event on the appropriate channels. | ||
1085 | * May be called from interrupt context. | ||
1086 | */ | ||
1087 | void wireless_send_event(struct net_device * dev, | ||
1088 | unsigned int cmd, | ||
1089 | union iwreq_data * wrqu, | ||
1090 | char * extra) | ||
1091 | { | ||
1092 | const struct iw_ioctl_description * descr = NULL; | ||
1093 | int extra_len = 0; | ||
1094 | struct iw_event *event; /* Mallocated whole event */ | ||
1095 | int event_len; /* Its size */ | ||
1096 | int hdr_len; /* Size of the event header */ | ||
1097 | /* Don't "optimise" the following variable, it will crash */ | ||
1098 | unsigned cmd_index; /* *MUST* be unsigned */ | ||
1099 | |||
1100 | /* Get the description of the IOCTL */ | ||
1101 | if(cmd <= SIOCIWLAST) { | ||
1102 | cmd_index = cmd - SIOCIWFIRST; | ||
1103 | if(cmd_index < standard_ioctl_num) | ||
1104 | descr = &(standard_ioctl[cmd_index]); | ||
1105 | } else { | ||
1106 | cmd_index = cmd - IWEVFIRST; | ||
1107 | if(cmd_index < standard_event_num) | ||
1108 | descr = &(standard_event[cmd_index]); | ||
1109 | } | ||
1110 | /* Don't accept unknown events */ | ||
1111 | if(descr == NULL) { | ||
1112 | /* Note : we don't return an error to the driver, because | ||
1113 | * the driver would not know what to do about it. It can't | ||
1114 | * return an error to the user, because the event is not | ||
1115 | * initiated by a user request. | ||
1116 | * The best the driver could do is to log an error message. | ||
1117 | * We will do it ourselves instead... | ||
1118 | */ | ||
1119 | printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", | ||
1120 | dev->name, cmd); | ||
1121 | return; | ||
1122 | } | ||
1123 | #ifdef WE_EVENT_DEBUG | ||
1124 | printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n", | ||
1125 | dev->name, cmd); | ||
1126 | printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens); | ||
1127 | #endif /* WE_EVENT_DEBUG */ | ||
1128 | |||
1129 | /* Check extra parameters and set extra_len */ | ||
1130 | if(descr->header_type == IW_HEADER_TYPE_POINT) { | ||
1131 | /* Check if number of token fits within bounds */ | ||
1132 | if(wrqu->data.length > descr->max_tokens) { | ||
1133 | printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); | ||
1134 | return; | ||
1135 | } | ||
1136 | if(wrqu->data.length < descr->min_tokens) { | ||
1137 | printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); | ||
1138 | return; | ||
1139 | } | ||
1140 | /* Calculate extra_len - extra is NULL for restricted events */ | ||
1141 | if(extra != NULL) | ||
1142 | extra_len = wrqu->data.length * descr->token_size; | ||
1143 | #ifdef WE_EVENT_DEBUG | ||
1144 | printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len); | ||
1145 | #endif /* WE_EVENT_DEBUG */ | ||
1146 | } | ||
1147 | |||
1148 | /* Total length of the event */ | ||
1149 | hdr_len = event_type_size[descr->header_type]; | ||
1150 | event_len = hdr_len + extra_len; | ||
1151 | |||
1152 | #ifdef WE_EVENT_DEBUG | ||
1153 | printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len); | ||
1154 | #endif /* WE_EVENT_DEBUG */ | ||
1155 | |||
1156 | /* Create temporary buffer to hold the event */ | ||
1157 | event = kmalloc(event_len, GFP_ATOMIC); | ||
1158 | if(event == NULL) | ||
1159 | return; | ||
1160 | |||
1161 | /* Fill event */ | ||
1162 | event->len = event_len; | ||
1163 | event->cmd = cmd; | ||
1164 | memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN); | ||
1165 | if(extra != NULL) | ||
1166 | memcpy(((char *) event) + hdr_len, extra, extra_len); | ||
1167 | |||
1168 | #ifdef WE_EVENT_NETLINK | ||
1169 | /* rtnetlink event channel */ | ||
1170 | rtmsg_iwinfo(dev, (char *) event, event_len); | ||
1171 | #endif /* WE_EVENT_NETLINK */ | ||
1172 | |||
1173 | /* Cleanup */ | ||
1174 | kfree(event); | ||
1175 | |||
1176 | return; /* Always success, I guess ;-) */ | ||
1177 | } | ||
1178 | |||
1179 | /********************** ENHANCED IWSPY SUPPORT **********************/ | ||
1180 | /* | ||
1181 | * In the old days, the driver was handling spy support all by itself. | ||
1182 | * Now, the driver can delegate this task to Wireless Extensions. | ||
1183 | * It needs to use those standard spy iw_handler in struct iw_handler_def, | ||
1184 | * push data to us via wireless_spy_update() and include struct iw_spy_data | ||
1185 | * in its private part (and advertise it in iw_handler_def->spy_offset). | ||
1186 | * One of the main advantage of centralising spy support here is that | ||
1187 | * it becomes much easier to improve and extend it without having to touch | ||
1188 | * the drivers. One example is the addition of the Spy-Threshold events. | ||
1189 | */ | ||
1190 | |||
1191 | /* ---------------------------------------------------------------- */ | ||
1192 | /* | ||
1193 | * Return the pointer to the spy data in the driver. | ||
1194 | * Because this is called on the Rx path via wireless_spy_update(), | ||
1195 | * we want it to be efficient... | ||
1196 | */ | ||
1197 | static inline struct iw_spy_data * get_spydata(struct net_device *dev) | ||
1198 | { | ||
1199 | /* This is the new way */ | ||
1200 | if(dev->wireless_data) | ||
1201 | return(dev->wireless_data->spy_data); | ||
1202 | |||
1203 | /* This is the old way. Doesn't work for multi-headed drivers. | ||
1204 | * It will be removed in the next version of WE. */ | ||
1205 | return (dev->priv + dev->wireless_handlers->spy_offset); | ||
1206 | } | ||
1207 | |||
1208 | /*------------------------------------------------------------------*/ | ||
1209 | /* | ||
1210 | * Standard Wireless Handler : set Spy List | ||
1211 | */ | ||
1212 | int iw_handler_set_spy(struct net_device * dev, | ||
1213 | struct iw_request_info * info, | ||
1214 | union iwreq_data * wrqu, | ||
1215 | char * extra) | ||
1216 | { | ||
1217 | struct iw_spy_data * spydata = get_spydata(dev); | ||
1218 | struct sockaddr * address = (struct sockaddr *) extra; | ||
1219 | |||
1220 | if(!dev->wireless_data) | ||
1221 | /* Help user know that driver needs updating */ | ||
1222 | printk(KERN_DEBUG "%s (WE) : Driver using old/buggy spy support, please fix driver !\n", | ||
1223 | dev->name); | ||
1224 | /* Make sure driver is not buggy or using the old API */ | ||
1225 | if(!spydata) | ||
1226 | return -EOPNOTSUPP; | ||
1227 | |||
1228 | /* Disable spy collection while we copy the addresses. | ||
1229 | * While we copy addresses, any call to wireless_spy_update() | ||
1230 | * will NOP. This is OK, as anyway the addresses are changing. */ | ||
1231 | spydata->spy_number = 0; | ||
1232 | |||
1233 | /* We want to operate without locking, because wireless_spy_update() | ||
1234 | * most likely will happen in the interrupt handler, and therefore | ||
1235 | * have its own locking constraints and needs performance. | ||
1236 | * The rtnl_lock() make sure we don't race with the other iw_handlers. | ||
1237 | * This make sure wireless_spy_update() "see" that the spy list | ||
1238 | * is temporarily disabled. */ | ||
1239 | wmb(); | ||
1240 | |||
1241 | /* Are there are addresses to copy? */ | ||
1242 | if(wrqu->data.length > 0) { | ||
1243 | int i; | ||
1244 | |||
1245 | /* Copy addresses */ | ||
1246 | for(i = 0; i < wrqu->data.length; i++) | ||
1247 | memcpy(spydata->spy_address[i], address[i].sa_data, | ||
1248 | ETH_ALEN); | ||
1249 | /* Reset stats */ | ||
1250 | memset(spydata->spy_stat, 0, | ||
1251 | sizeof(struct iw_quality) * IW_MAX_SPY); | ||
1252 | |||
1253 | #ifdef WE_SPY_DEBUG | ||
1254 | printk(KERN_DEBUG "iw_handler_set_spy() : offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length); | ||
1255 | for (i = 0; i < wrqu->data.length; i++) | ||
1256 | printk(KERN_DEBUG | ||
1257 | "%02X:%02X:%02X:%02X:%02X:%02X \n", | ||
1258 | spydata->spy_address[i][0], | ||
1259 | spydata->spy_address[i][1], | ||
1260 | spydata->spy_address[i][2], | ||
1261 | spydata->spy_address[i][3], | ||
1262 | spydata->spy_address[i][4], | ||
1263 | spydata->spy_address[i][5]); | ||
1264 | #endif /* WE_SPY_DEBUG */ | ||
1265 | } | ||
1266 | |||
1267 | /* Make sure above is updated before re-enabling */ | ||
1268 | wmb(); | ||
1269 | |||
1270 | /* Enable addresses */ | ||
1271 | spydata->spy_number = wrqu->data.length; | ||
1272 | |||
1273 | return 0; | ||
1274 | } | ||
1275 | |||
1276 | /*------------------------------------------------------------------*/ | ||
1277 | /* | ||
1278 | * Standard Wireless Handler : get Spy List | ||
1279 | */ | ||
1280 | int iw_handler_get_spy(struct net_device * dev, | ||
1281 | struct iw_request_info * info, | ||
1282 | union iwreq_data * wrqu, | ||
1283 | char * extra) | ||
1284 | { | ||
1285 | struct iw_spy_data * spydata = get_spydata(dev); | ||
1286 | struct sockaddr * address = (struct sockaddr *) extra; | ||
1287 | int i; | ||
1288 | |||
1289 | /* Make sure driver is not buggy or using the old API */ | ||
1290 | if(!spydata) | ||
1291 | return -EOPNOTSUPP; | ||
1292 | |||
1293 | wrqu->data.length = spydata->spy_number; | ||
1294 | |||
1295 | /* Copy addresses. */ | ||
1296 | for(i = 0; i < spydata->spy_number; i++) { | ||
1297 | memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); | ||
1298 | address[i].sa_family = AF_UNIX; | ||
1299 | } | ||
1300 | /* Copy stats to the user buffer (just after). */ | ||
1301 | if(spydata->spy_number > 0) | ||
1302 | memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), | ||
1303 | spydata->spy_stat, | ||
1304 | sizeof(struct iw_quality) * spydata->spy_number); | ||
1305 | /* Reset updated flags. */ | ||
1306 | for(i = 0; i < spydata->spy_number; i++) | ||
1307 | spydata->spy_stat[i].updated = 0; | ||
1308 | return 0; | ||
1309 | } | ||
1310 | |||
1311 | /*------------------------------------------------------------------*/ | ||
1312 | /* | ||
1313 | * Standard Wireless Handler : set spy threshold | ||
1314 | */ | ||
1315 | int iw_handler_set_thrspy(struct net_device * dev, | ||
1316 | struct iw_request_info *info, | ||
1317 | union iwreq_data * wrqu, | ||
1318 | char * extra) | ||
1319 | { | ||
1320 | struct iw_spy_data * spydata = get_spydata(dev); | ||
1321 | struct iw_thrspy * threshold = (struct iw_thrspy *) extra; | ||
1322 | |||
1323 | /* Make sure driver is not buggy or using the old API */ | ||
1324 | if(!spydata) | ||
1325 | return -EOPNOTSUPP; | ||
1326 | |||
1327 | /* Just do it */ | ||
1328 | memcpy(&(spydata->spy_thr_low), &(threshold->low), | ||
1329 | 2 * sizeof(struct iw_quality)); | ||
1330 | |||
1331 | /* Clear flag */ | ||
1332 | memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); | ||
1333 | |||
1334 | #ifdef WE_SPY_DEBUG | ||
1335 | printk(KERN_DEBUG "iw_handler_set_thrspy() : low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level); | ||
1336 | #endif /* WE_SPY_DEBUG */ | ||
1337 | |||
1338 | return 0; | ||
1339 | } | ||
1340 | |||
1341 | /*------------------------------------------------------------------*/ | ||
1342 | /* | ||
1343 | * Standard Wireless Handler : get spy threshold | ||
1344 | */ | ||
1345 | int iw_handler_get_thrspy(struct net_device * dev, | ||
1346 | struct iw_request_info *info, | ||
1347 | union iwreq_data * wrqu, | ||
1348 | char * extra) | ||
1349 | { | ||
1350 | struct iw_spy_data * spydata = get_spydata(dev); | ||
1351 | struct iw_thrspy * threshold = (struct iw_thrspy *) extra; | ||
1352 | |||
1353 | /* Make sure driver is not buggy or using the old API */ | ||
1354 | if(!spydata) | ||
1355 | return -EOPNOTSUPP; | ||
1356 | |||
1357 | /* Just do it */ | ||
1358 | memcpy(&(threshold->low), &(spydata->spy_thr_low), | ||
1359 | 2 * sizeof(struct iw_quality)); | ||
1360 | |||
1361 | return 0; | ||
1362 | } | ||
1363 | |||
1364 | /*------------------------------------------------------------------*/ | ||
1365 | /* | ||
1366 | * Prepare and send a Spy Threshold event | ||
1367 | */ | ||
1368 | static void iw_send_thrspy_event(struct net_device * dev, | ||
1369 | struct iw_spy_data * spydata, | ||
1370 | unsigned char * address, | ||
1371 | struct iw_quality * wstats) | ||
1372 | { | ||
1373 | union iwreq_data wrqu; | ||
1374 | struct iw_thrspy threshold; | ||
1375 | |||
1376 | /* Init */ | ||
1377 | wrqu.data.length = 1; | ||
1378 | wrqu.data.flags = 0; | ||
1379 | /* Copy address */ | ||
1380 | memcpy(threshold.addr.sa_data, address, ETH_ALEN); | ||
1381 | threshold.addr.sa_family = ARPHRD_ETHER; | ||
1382 | /* Copy stats */ | ||
1383 | memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality)); | ||
1384 | /* Copy also thresholds */ | ||
1385 | memcpy(&(threshold.low), &(spydata->spy_thr_low), | ||
1386 | 2 * sizeof(struct iw_quality)); | ||
1387 | |||
1388 | #ifdef WE_SPY_DEBUG | ||
1389 | printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n", | ||
1390 | threshold.addr.sa_data[0], | ||
1391 | threshold.addr.sa_data[1], | ||
1392 | threshold.addr.sa_data[2], | ||
1393 | threshold.addr.sa_data[3], | ||
1394 | threshold.addr.sa_data[4], | ||
1395 | threshold.addr.sa_data[5], threshold.qual.level); | ||
1396 | #endif /* WE_SPY_DEBUG */ | ||
1397 | |||
1398 | /* Send event to user space */ | ||
1399 | wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); | ||
1400 | } | ||
1401 | |||
1402 | /* ---------------------------------------------------------------- */ | ||
1403 | /* | ||
1404 | * Call for the driver to update the spy data. | ||
1405 | * For now, the spy data is a simple array. As the size of the array is | ||
1406 | * small, this is good enough. If we wanted to support larger number of | ||
1407 | * spy addresses, we should use something more efficient... | ||
1408 | */ | ||
1409 | void wireless_spy_update(struct net_device * dev, | ||
1410 | unsigned char * address, | ||
1411 | struct iw_quality * wstats) | ||
1412 | { | ||
1413 | struct iw_spy_data * spydata = get_spydata(dev); | ||
1414 | int i; | ||
1415 | int match = -1; | ||
1416 | |||
1417 | /* Make sure driver is not buggy or using the old API */ | ||
1418 | if(!spydata) | ||
1419 | return; | ||
1420 | |||
1421 | #ifdef WE_SPY_DEBUG | ||
1422 | printk(KERN_DEBUG "wireless_spy_update() : offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); | ||
1423 | #endif /* WE_SPY_DEBUG */ | ||
1424 | |||
1425 | /* Update all records that match */ | ||
1426 | for(i = 0; i < spydata->spy_number; i++) | ||
1427 | if(!memcmp(address, spydata->spy_address[i], ETH_ALEN)) { | ||
1428 | memcpy(&(spydata->spy_stat[i]), wstats, | ||
1429 | sizeof(struct iw_quality)); | ||
1430 | match = i; | ||
1431 | } | ||
1432 | |||
1433 | /* Generate an event if we cross the spy threshold. | ||
1434 | * To avoid event storms, we have a simple hysteresis : we generate | ||
1435 | * event only when we go under the low threshold or above the | ||
1436 | * high threshold. */ | ||
1437 | if(match >= 0) { | ||
1438 | if(spydata->spy_thr_under[match]) { | ||
1439 | if(wstats->level > spydata->spy_thr_high.level) { | ||
1440 | spydata->spy_thr_under[match] = 0; | ||
1441 | iw_send_thrspy_event(dev, spydata, | ||
1442 | address, wstats); | ||
1443 | } | ||
1444 | } else { | ||
1445 | if(wstats->level < spydata->spy_thr_low.level) { | ||
1446 | spydata->spy_thr_under[match] = 1; | ||
1447 | iw_send_thrspy_event(dev, spydata, | ||
1448 | address, wstats); | ||
1449 | } | ||
1450 | } | ||
1451 | } | ||
1452 | } | ||
1453 | |||
1454 | EXPORT_SYMBOL(iw_handler_get_spy); | ||
1455 | EXPORT_SYMBOL(iw_handler_get_thrspy); | ||
1456 | EXPORT_SYMBOL(iw_handler_set_spy); | ||
1457 | EXPORT_SYMBOL(iw_handler_set_thrspy); | ||
1458 | EXPORT_SYMBOL(wireless_send_event); | ||
1459 | EXPORT_SYMBOL(wireless_spy_update); | ||