aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/sock.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core/sock.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'net/core/sock.c')
-rw-r--r--net/core/sock.c1565
1 files changed, 1565 insertions, 0 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
new file mode 100644
index 000000000000..629ab4a5b45b
--- /dev/null
+++ b/net/core/sock.c
@@ -0,0 +1,1565 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
12 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
37 * code. The ACK stuff can wait and needs major
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 * Alan Cox : Generic socket allocation to make hooks
72 * easier (suggested by Craig Metz).
73 * Michael Pall : SO_ERROR returns positive errno again
74 * Steve Whitehouse: Added default destructor to free
75 * protocol private data.
76 * Steve Whitehouse: Added various other default routines
77 * common to several socket families.
78 * Chris Evans : Call suser() check last on F_SETOWN
79 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
81 * Andi Kleen : Fix write_space callback
82 * Chris Evans : Security fixes - signedness again
83 * Arnaldo C. Melo : cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 * This program is free software; you can redistribute it and/or
89 * modify it under the terms of the GNU General Public License
90 * as published by the Free Software Foundation; either version
91 * 2 of the License, or (at your option) any later version.
92 */
93
94#include <linux/config.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/major.h>
101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/timer.h>
106#include <linux/string.h>
107#include <linux/sockios.h>
108#include <linux/net.h>
109#include <linux/mm.h>
110#include <linux/slab.h>
111#include <linux/interrupt.h>
112#include <linux/poll.h>
113#include <linux/tcp.h>
114#include <linux/init.h>
115
116#include <asm/uaccess.h>
117#include <asm/system.h>
118
119#include <linux/netdevice.h>
120#include <net/protocol.h>
121#include <linux/skbuff.h>
122#include <net/sock.h>
123#include <net/xfrm.h>
124#include <linux/ipsec.h>
125
126#include <linux/filter.h>
127
128#ifdef CONFIG_INET
129#include <net/tcp.h>
130#endif
131
132/* Take into consideration the size of the struct sk_buff overhead in the
133 * determination of these values, since that is non-constant across
134 * platforms. This makes socket queueing behavior and performance
135 * not depend upon such differences.
136 */
137#define _SK_MEM_PACKETS 256
138#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
139#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141
142/* Run time adjustable parameters. */
143__u32 sysctl_wmem_max = SK_WMEM_MAX;
144__u32 sysctl_rmem_max = SK_RMEM_MAX;
145__u32 sysctl_wmem_default = SK_WMEM_MAX;
146__u32 sysctl_rmem_default = SK_RMEM_MAX;
147
148/* Maximal space eaten by iovec or ancilliary data plus some space */
149int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
150
151static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
152{
153 struct timeval tv;
154
155 if (optlen < sizeof(tv))
156 return -EINVAL;
157 if (copy_from_user(&tv, optval, sizeof(tv)))
158 return -EFAULT;
159
160 *timeo_p = MAX_SCHEDULE_TIMEOUT;
161 if (tv.tv_sec == 0 && tv.tv_usec == 0)
162 return 0;
163 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
164 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
165 return 0;
166}
167
168static void sock_warn_obsolete_bsdism(const char *name)
169{
170 static int warned;
171 static char warncomm[TASK_COMM_LEN];
172 if (strcmp(warncomm, current->comm) && warned < 5) {
173 strcpy(warncomm, current->comm);
174 printk(KERN_WARNING "process `%s' is using obsolete "
175 "%s SO_BSDCOMPAT\n", warncomm, name);
176 warned++;
177 }
178}
179
180static void sock_disable_timestamp(struct sock *sk)
181{
182 if (sock_flag(sk, SOCK_TIMESTAMP)) {
183 sock_reset_flag(sk, SOCK_TIMESTAMP);
184 net_disable_timestamp();
185 }
186}
187
188
189/*
190 * This is meant for all protocols to use and covers goings on
191 * at the socket level. Everything here is generic.
192 */
193
194int sock_setsockopt(struct socket *sock, int level, int optname,
195 char __user *optval, int optlen)
196{
197 struct sock *sk=sock->sk;
198 struct sk_filter *filter;
199 int val;
200 int valbool;
201 struct linger ling;
202 int ret = 0;
203
204 /*
205 * Options without arguments
206 */
207
208#ifdef SO_DONTLINGER /* Compatibility item... */
209 switch (optname) {
210 case SO_DONTLINGER:
211 sock_reset_flag(sk, SOCK_LINGER);
212 return 0;
213 }
214#endif
215
216 if(optlen<sizeof(int))
217 return(-EINVAL);
218
219 if (get_user(val, (int __user *)optval))
220 return -EFAULT;
221
222 valbool = val?1:0;
223
224 lock_sock(sk);
225
226 switch(optname)
227 {
228 case SO_DEBUG:
229 if(val && !capable(CAP_NET_ADMIN))
230 {
231 ret = -EACCES;
232 }
233 else if (valbool)
234 sock_set_flag(sk, SOCK_DBG);
235 else
236 sock_reset_flag(sk, SOCK_DBG);
237 break;
238 case SO_REUSEADDR:
239 sk->sk_reuse = valbool;
240 break;
241 case SO_TYPE:
242 case SO_ERROR:
243 ret = -ENOPROTOOPT;
244 break;
245 case SO_DONTROUTE:
246 if (valbool)
247 sock_set_flag(sk, SOCK_LOCALROUTE);
248 else
249 sock_reset_flag(sk, SOCK_LOCALROUTE);
250 break;
251 case SO_BROADCAST:
252 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
253 break;
254 case SO_SNDBUF:
255 /* Don't error on this BSD doesn't and if you think
256 about it this is right. Otherwise apps have to
257 play 'guess the biggest size' games. RCVBUF/SNDBUF
258 are treated in BSD as hints */
259
260 if (val > sysctl_wmem_max)
261 val = sysctl_wmem_max;
262
263 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
264 if ((val * 2) < SOCK_MIN_SNDBUF)
265 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
266 else
267 sk->sk_sndbuf = val * 2;
268
269 /*
270 * Wake up sending tasks if we
271 * upped the value.
272 */
273 sk->sk_write_space(sk);
274 break;
275
276 case SO_RCVBUF:
277 /* Don't error on this BSD doesn't and if you think
278 about it this is right. Otherwise apps have to
279 play 'guess the biggest size' games. RCVBUF/SNDBUF
280 are treated in BSD as hints */
281
282 if (val > sysctl_rmem_max)
283 val = sysctl_rmem_max;
284
285 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
286 /* FIXME: is this lower bound the right one? */
287 if ((val * 2) < SOCK_MIN_RCVBUF)
288 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
289 else
290 sk->sk_rcvbuf = val * 2;
291 break;
292
293 case SO_KEEPALIVE:
294#ifdef CONFIG_INET
295 if (sk->sk_protocol == IPPROTO_TCP)
296 tcp_set_keepalive(sk, valbool);
297#endif
298 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
299 break;
300
301 case SO_OOBINLINE:
302 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
303 break;
304
305 case SO_NO_CHECK:
306 sk->sk_no_check = valbool;
307 break;
308
309 case SO_PRIORITY:
310 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
311 sk->sk_priority = val;
312 else
313 ret = -EPERM;
314 break;
315
316 case SO_LINGER:
317 if(optlen<sizeof(ling)) {
318 ret = -EINVAL; /* 1003.1g */
319 break;
320 }
321 if (copy_from_user(&ling,optval,sizeof(ling))) {
322 ret = -EFAULT;
323 break;
324 }
325 if (!ling.l_onoff)
326 sock_reset_flag(sk, SOCK_LINGER);
327 else {
328#if (BITS_PER_LONG == 32)
329 if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
330 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
331 else
332#endif
333 sk->sk_lingertime = ling.l_linger * HZ;
334 sock_set_flag(sk, SOCK_LINGER);
335 }
336 break;
337
338 case SO_BSDCOMPAT:
339 sock_warn_obsolete_bsdism("setsockopt");
340 break;
341
342 case SO_PASSCRED:
343 if (valbool)
344 set_bit(SOCK_PASSCRED, &sock->flags);
345 else
346 clear_bit(SOCK_PASSCRED, &sock->flags);
347 break;
348
349 case SO_TIMESTAMP:
350 if (valbool) {
351 sock_set_flag(sk, SOCK_RCVTSTAMP);
352 sock_enable_timestamp(sk);
353 } else
354 sock_reset_flag(sk, SOCK_RCVTSTAMP);
355 break;
356
357 case SO_RCVLOWAT:
358 if (val < 0)
359 val = INT_MAX;
360 sk->sk_rcvlowat = val ? : 1;
361 break;
362
363 case SO_RCVTIMEO:
364 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
365 break;
366
367 case SO_SNDTIMEO:
368 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
369 break;
370
371#ifdef CONFIG_NETDEVICES
372 case SO_BINDTODEVICE:
373 {
374 char devname[IFNAMSIZ];
375
376 /* Sorry... */
377 if (!capable(CAP_NET_RAW)) {
378 ret = -EPERM;
379 break;
380 }
381
382 /* Bind this socket to a particular device like "eth0",
383 * as specified in the passed interface name. If the
384 * name is "" or the option length is zero the socket
385 * is not bound.
386 */
387
388 if (!valbool) {
389 sk->sk_bound_dev_if = 0;
390 } else {
391 if (optlen > IFNAMSIZ)
392 optlen = IFNAMSIZ;
393 if (copy_from_user(devname, optval, optlen)) {
394 ret = -EFAULT;
395 break;
396 }
397
398 /* Remove any cached route for this socket. */
399 sk_dst_reset(sk);
400
401 if (devname[0] == '\0') {
402 sk->sk_bound_dev_if = 0;
403 } else {
404 struct net_device *dev = dev_get_by_name(devname);
405 if (!dev) {
406 ret = -ENODEV;
407 break;
408 }
409 sk->sk_bound_dev_if = dev->ifindex;
410 dev_put(dev);
411 }
412 }
413 break;
414 }
415#endif
416
417
418 case SO_ATTACH_FILTER:
419 ret = -EINVAL;
420 if (optlen == sizeof(struct sock_fprog)) {
421 struct sock_fprog fprog;
422
423 ret = -EFAULT;
424 if (copy_from_user(&fprog, optval, sizeof(fprog)))
425 break;
426
427 ret = sk_attach_filter(&fprog, sk);
428 }
429 break;
430
431 case SO_DETACH_FILTER:
432 spin_lock_bh(&sk->sk_lock.slock);
433 filter = sk->sk_filter;
434 if (filter) {
435 sk->sk_filter = NULL;
436 spin_unlock_bh(&sk->sk_lock.slock);
437 sk_filter_release(sk, filter);
438 break;
439 }
440 spin_unlock_bh(&sk->sk_lock.slock);
441 ret = -ENONET;
442 break;
443
444 /* We implement the SO_SNDLOWAT etc to
445 not be settable (1003.1g 5.3) */
446 default:
447 ret = -ENOPROTOOPT;
448 break;
449 }
450 release_sock(sk);
451 return ret;
452}
453
454
455int sock_getsockopt(struct socket *sock, int level, int optname,
456 char __user *optval, int __user *optlen)
457{
458 struct sock *sk = sock->sk;
459
460 union
461 {
462 int val;
463 struct linger ling;
464 struct timeval tm;
465 } v;
466
467 unsigned int lv = sizeof(int);
468 int len;
469
470 if(get_user(len,optlen))
471 return -EFAULT;
472 if(len < 0)
473 return -EINVAL;
474
475 switch(optname)
476 {
477 case SO_DEBUG:
478 v.val = sock_flag(sk, SOCK_DBG);
479 break;
480
481 case SO_DONTROUTE:
482 v.val = sock_flag(sk, SOCK_LOCALROUTE);
483 break;
484
485 case SO_BROADCAST:
486 v.val = !!sock_flag(sk, SOCK_BROADCAST);
487 break;
488
489 case SO_SNDBUF:
490 v.val = sk->sk_sndbuf;
491 break;
492
493 case SO_RCVBUF:
494 v.val = sk->sk_rcvbuf;
495 break;
496
497 case SO_REUSEADDR:
498 v.val = sk->sk_reuse;
499 break;
500
501 case SO_KEEPALIVE:
502 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
503 break;
504
505 case SO_TYPE:
506 v.val = sk->sk_type;
507 break;
508
509 case SO_ERROR:
510 v.val = -sock_error(sk);
511 if(v.val==0)
512 v.val = xchg(&sk->sk_err_soft, 0);
513 break;
514
515 case SO_OOBINLINE:
516 v.val = !!sock_flag(sk, SOCK_URGINLINE);
517 break;
518
519 case SO_NO_CHECK:
520 v.val = sk->sk_no_check;
521 break;
522
523 case SO_PRIORITY:
524 v.val = sk->sk_priority;
525 break;
526
527 case SO_LINGER:
528 lv = sizeof(v.ling);
529 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
530 v.ling.l_linger = sk->sk_lingertime / HZ;
531 break;
532
533 case SO_BSDCOMPAT:
534 sock_warn_obsolete_bsdism("getsockopt");
535 break;
536
537 case SO_TIMESTAMP:
538 v.val = sock_flag(sk, SOCK_RCVTSTAMP);
539 break;
540
541 case SO_RCVTIMEO:
542 lv=sizeof(struct timeval);
543 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
544 v.tm.tv_sec = 0;
545 v.tm.tv_usec = 0;
546 } else {
547 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
548 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
549 }
550 break;
551
552 case SO_SNDTIMEO:
553 lv=sizeof(struct timeval);
554 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
555 v.tm.tv_sec = 0;
556 v.tm.tv_usec = 0;
557 } else {
558 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
559 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
560 }
561 break;
562
563 case SO_RCVLOWAT:
564 v.val = sk->sk_rcvlowat;
565 break;
566
567 case SO_SNDLOWAT:
568 v.val=1;
569 break;
570
571 case SO_PASSCRED:
572 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
573 break;
574
575 case SO_PEERCRED:
576 if (len > sizeof(sk->sk_peercred))
577 len = sizeof(sk->sk_peercred);
578 if (copy_to_user(optval, &sk->sk_peercred, len))
579 return -EFAULT;
580 goto lenout;
581
582 case SO_PEERNAME:
583 {
584 char address[128];
585
586 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
587 return -ENOTCONN;
588 if (lv < len)
589 return -EINVAL;
590 if (copy_to_user(optval, address, len))
591 return -EFAULT;
592 goto lenout;
593 }
594
595 /* Dubious BSD thing... Probably nobody even uses it, but
596 * the UNIX standard wants it for whatever reason... -DaveM
597 */
598 case SO_ACCEPTCONN:
599 v.val = sk->sk_state == TCP_LISTEN;
600 break;
601
602 case SO_PEERSEC:
603 return security_socket_getpeersec(sock, optval, optlen, len);
604
605 default:
606 return(-ENOPROTOOPT);
607 }
608 if (len > lv)
609 len = lv;
610 if (copy_to_user(optval, &v, len))
611 return -EFAULT;
612lenout:
613 if (put_user(len, optlen))
614 return -EFAULT;
615 return 0;
616}
617
618/**
619 * sk_alloc - All socket objects are allocated here
620 * @family - protocol family
621 * @priority - for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
622 * @prot - struct proto associated with this new sock instance
623 * @zero_it - if we should zero the newly allocated sock
624 */
625struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
626{
627 struct sock *sk = NULL;
628 kmem_cache_t *slab = prot->slab;
629
630 if (slab != NULL)
631 sk = kmem_cache_alloc(slab, priority);
632 else
633 sk = kmalloc(prot->obj_size, priority);
634
635 if (sk) {
636 if (zero_it) {
637 memset(sk, 0, prot->obj_size);
638 sk->sk_family = family;
639 sk->sk_prot = prot;
640 sock_lock_init(sk);
641 }
642
643 if (security_sk_alloc(sk, family, priority)) {
644 kmem_cache_free(slab, sk);
645 sk = NULL;
646 } else
647 __module_get(prot->owner);
648 }
649 return sk;
650}
651
652void sk_free(struct sock *sk)
653{
654 struct sk_filter *filter;
655 struct module *owner = sk->sk_prot->owner;
656
657 if (sk->sk_destruct)
658 sk->sk_destruct(sk);
659
660 filter = sk->sk_filter;
661 if (filter) {
662 sk_filter_release(sk, filter);
663 sk->sk_filter = NULL;
664 }
665
666 sock_disable_timestamp(sk);
667
668 if (atomic_read(&sk->sk_omem_alloc))
669 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
670 __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
671
672 security_sk_free(sk);
673 if (sk->sk_prot->slab != NULL)
674 kmem_cache_free(sk->sk_prot->slab, sk);
675 else
676 kfree(sk);
677 module_put(owner);
678}
679
680void __init sk_init(void)
681{
682 if (num_physpages <= 4096) {
683 sysctl_wmem_max = 32767;
684 sysctl_rmem_max = 32767;
685 sysctl_wmem_default = 32767;
686 sysctl_rmem_default = 32767;
687 } else if (num_physpages >= 131072) {
688 sysctl_wmem_max = 131071;
689 sysctl_rmem_max = 131071;
690 }
691}
692
693/*
694 * Simple resource managers for sockets.
695 */
696
697
698/*
699 * Write buffer destructor automatically called from kfree_skb.
700 */
701void sock_wfree(struct sk_buff *skb)
702{
703 struct sock *sk = skb->sk;
704
705 /* In case it might be waiting for more memory. */
706 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
707 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
708 sk->sk_write_space(sk);
709 sock_put(sk);
710}
711
712/*
713 * Read buffer destructor automatically called from kfree_skb.
714 */
715void sock_rfree(struct sk_buff *skb)
716{
717 struct sock *sk = skb->sk;
718
719 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
720}
721
722
723int sock_i_uid(struct sock *sk)
724{
725 int uid;
726
727 read_lock(&sk->sk_callback_lock);
728 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
729 read_unlock(&sk->sk_callback_lock);
730 return uid;
731}
732
733unsigned long sock_i_ino(struct sock *sk)
734{
735 unsigned long ino;
736
737 read_lock(&sk->sk_callback_lock);
738 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
739 read_unlock(&sk->sk_callback_lock);
740 return ino;
741}
742
743/*
744 * Allocate a skb from the socket's send buffer.
745 */
746struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
747{
748 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
749 struct sk_buff * skb = alloc_skb(size, priority);
750 if (skb) {
751 skb_set_owner_w(skb, sk);
752 return skb;
753 }
754 }
755 return NULL;
756}
757
758/*
759 * Allocate a skb from the socket's receive buffer.
760 */
761struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
762{
763 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
764 struct sk_buff *skb = alloc_skb(size, priority);
765 if (skb) {
766 skb_set_owner_r(skb, sk);
767 return skb;
768 }
769 }
770 return NULL;
771}
772
773/*
774 * Allocate a memory block from the socket's option memory buffer.
775 */
776void *sock_kmalloc(struct sock *sk, int size, int priority)
777{
778 if ((unsigned)size <= sysctl_optmem_max &&
779 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
780 void *mem;
781 /* First do the add, to avoid the race if kmalloc
782 * might sleep.
783 */
784 atomic_add(size, &sk->sk_omem_alloc);
785 mem = kmalloc(size, priority);
786 if (mem)
787 return mem;
788 atomic_sub(size, &sk->sk_omem_alloc);
789 }
790 return NULL;
791}
792
793/*
794 * Free an option memory block.
795 */
796void sock_kfree_s(struct sock *sk, void *mem, int size)
797{
798 kfree(mem);
799 atomic_sub(size, &sk->sk_omem_alloc);
800}
801
802/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
803 I think, these locks should be removed for datagram sockets.
804 */
805static long sock_wait_for_wmem(struct sock * sk, long timeo)
806{
807 DEFINE_WAIT(wait);
808
809 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
810 for (;;) {
811 if (!timeo)
812 break;
813 if (signal_pending(current))
814 break;
815 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
816 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
817 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
818 break;
819 if (sk->sk_shutdown & SEND_SHUTDOWN)
820 break;
821 if (sk->sk_err)
822 break;
823 timeo = schedule_timeout(timeo);
824 }
825 finish_wait(sk->sk_sleep, &wait);
826 return timeo;
827}
828
829
830/*
831 * Generic send/receive buffer handlers
832 */
833
834static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
835 unsigned long header_len,
836 unsigned long data_len,
837 int noblock, int *errcode)
838{
839 struct sk_buff *skb;
840 unsigned int gfp_mask;
841 long timeo;
842 int err;
843
844 gfp_mask = sk->sk_allocation;
845 if (gfp_mask & __GFP_WAIT)
846 gfp_mask |= __GFP_REPEAT;
847
848 timeo = sock_sndtimeo(sk, noblock);
849 while (1) {
850 err = sock_error(sk);
851 if (err != 0)
852 goto failure;
853
854 err = -EPIPE;
855 if (sk->sk_shutdown & SEND_SHUTDOWN)
856 goto failure;
857
858 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
859 skb = alloc_skb(header_len, sk->sk_allocation);
860 if (skb) {
861 int npages;
862 int i;
863
864 /* No pages, we're done... */
865 if (!data_len)
866 break;
867
868 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
869 skb->truesize += data_len;
870 skb_shinfo(skb)->nr_frags = npages;
871 for (i = 0; i < npages; i++) {
872 struct page *page;
873 skb_frag_t *frag;
874
875 page = alloc_pages(sk->sk_allocation, 0);
876 if (!page) {
877 err = -ENOBUFS;
878 skb_shinfo(skb)->nr_frags = i;
879 kfree_skb(skb);
880 goto failure;
881 }
882
883 frag = &skb_shinfo(skb)->frags[i];
884 frag->page = page;
885 frag->page_offset = 0;
886 frag->size = (data_len >= PAGE_SIZE ?
887 PAGE_SIZE :
888 data_len);
889 data_len -= PAGE_SIZE;
890 }
891
892 /* Full success... */
893 break;
894 }
895 err = -ENOBUFS;
896 goto failure;
897 }
898 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
899 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
900 err = -EAGAIN;
901 if (!timeo)
902 goto failure;
903 if (signal_pending(current))
904 goto interrupted;
905 timeo = sock_wait_for_wmem(sk, timeo);
906 }
907
908 skb_set_owner_w(skb, sk);
909 return skb;
910
911interrupted:
912 err = sock_intr_errno(timeo);
913failure:
914 *errcode = err;
915 return NULL;
916}
917
918struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
919 int noblock, int *errcode)
920{
921 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
922}
923
924static void __lock_sock(struct sock *sk)
925{
926 DEFINE_WAIT(wait);
927
928 for(;;) {
929 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
930 TASK_UNINTERRUPTIBLE);
931 spin_unlock_bh(&sk->sk_lock.slock);
932 schedule();
933 spin_lock_bh(&sk->sk_lock.slock);
934 if(!sock_owned_by_user(sk))
935 break;
936 }
937 finish_wait(&sk->sk_lock.wq, &wait);
938}
939
940static void __release_sock(struct sock *sk)
941{
942 struct sk_buff *skb = sk->sk_backlog.head;
943
944 do {
945 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
946 bh_unlock_sock(sk);
947
948 do {
949 struct sk_buff *next = skb->next;
950
951 skb->next = NULL;
952 sk->sk_backlog_rcv(sk, skb);
953
954 /*
955 * We are in process context here with softirqs
956 * disabled, use cond_resched_softirq() to preempt.
957 * This is safe to do because we've taken the backlog
958 * queue private:
959 */
960 cond_resched_softirq();
961
962 skb = next;
963 } while (skb != NULL);
964
965 bh_lock_sock(sk);
966 } while((skb = sk->sk_backlog.head) != NULL);
967}
968
969/**
970 * sk_wait_data - wait for data to arrive at sk_receive_queue
971 * sk - sock to wait on
972 * timeo - for how long
973 *
974 * Now socket state including sk->sk_err is changed only under lock,
975 * hence we may omit checks after joining wait queue.
976 * We check receive queue before schedule() only as optimization;
977 * it is very likely that release_sock() added new data.
978 */
979int sk_wait_data(struct sock *sk, long *timeo)
980{
981 int rc;
982 DEFINE_WAIT(wait);
983
984 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
985 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
986 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
987 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
988 finish_wait(sk->sk_sleep, &wait);
989 return rc;
990}
991
992EXPORT_SYMBOL(sk_wait_data);
993
994/*
995 * Set of default routines for initialising struct proto_ops when
996 * the protocol does not support a particular function. In certain
997 * cases where it makes no sense for a protocol to have a "do nothing"
998 * function, some default processing is provided.
999 */
1000
1001int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1002{
1003 return -EOPNOTSUPP;
1004}
1005
1006int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1007 int len, int flags)
1008{
1009 return -EOPNOTSUPP;
1010}
1011
1012int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1013{
1014 return -EOPNOTSUPP;
1015}
1016
1017int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1018{
1019 return -EOPNOTSUPP;
1020}
1021
1022int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1023 int *len, int peer)
1024{
1025 return -EOPNOTSUPP;
1026}
1027
1028unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1029{
1030 return 0;
1031}
1032
1033int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1034{
1035 return -EOPNOTSUPP;
1036}
1037
1038int sock_no_listen(struct socket *sock, int backlog)
1039{
1040 return -EOPNOTSUPP;
1041}
1042
1043int sock_no_shutdown(struct socket *sock, int how)
1044{
1045 return -EOPNOTSUPP;
1046}
1047
1048int sock_no_setsockopt(struct socket *sock, int level, int optname,
1049 char __user *optval, int optlen)
1050{
1051 return -EOPNOTSUPP;
1052}
1053
1054int sock_no_getsockopt(struct socket *sock, int level, int optname,
1055 char __user *optval, int __user *optlen)
1056{
1057 return -EOPNOTSUPP;
1058}
1059
1060int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1061 size_t len)
1062{
1063 return -EOPNOTSUPP;
1064}
1065
1066int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1067 size_t len, int flags)
1068{
1069 return -EOPNOTSUPP;
1070}
1071
1072int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1073{
1074 /* Mirror missing mmap method error code */
1075 return -ENODEV;
1076}
1077
1078ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1079{
1080 ssize_t res;
1081 struct msghdr msg = {.msg_flags = flags};
1082 struct kvec iov;
1083 char *kaddr = kmap(page);
1084 iov.iov_base = kaddr + offset;
1085 iov.iov_len = size;
1086 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1087 kunmap(page);
1088 return res;
1089}
1090
1091/*
1092 * Default Socket Callbacks
1093 */
1094
1095static void sock_def_wakeup(struct sock *sk)
1096{
1097 read_lock(&sk->sk_callback_lock);
1098 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1099 wake_up_interruptible_all(sk->sk_sleep);
1100 read_unlock(&sk->sk_callback_lock);
1101}
1102
1103static void sock_def_error_report(struct sock *sk)
1104{
1105 read_lock(&sk->sk_callback_lock);
1106 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1107 wake_up_interruptible(sk->sk_sleep);
1108 sk_wake_async(sk,0,POLL_ERR);
1109 read_unlock(&sk->sk_callback_lock);
1110}
1111
1112static void sock_def_readable(struct sock *sk, int len)
1113{
1114 read_lock(&sk->sk_callback_lock);
1115 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1116 wake_up_interruptible(sk->sk_sleep);
1117 sk_wake_async(sk,1,POLL_IN);
1118 read_unlock(&sk->sk_callback_lock);
1119}
1120
1121static void sock_def_write_space(struct sock *sk)
1122{
1123 read_lock(&sk->sk_callback_lock);
1124
1125 /* Do not wake up a writer until he can make "significant"
1126 * progress. --DaveM
1127 */
1128 if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1129 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1130 wake_up_interruptible(sk->sk_sleep);
1131
1132 /* Should agree with poll, otherwise some programs break */
1133 if (sock_writeable(sk))
1134 sk_wake_async(sk, 2, POLL_OUT);
1135 }
1136
1137 read_unlock(&sk->sk_callback_lock);
1138}
1139
1140static void sock_def_destruct(struct sock *sk)
1141{
1142 if (sk->sk_protinfo)
1143 kfree(sk->sk_protinfo);
1144}
1145
1146void sk_send_sigurg(struct sock *sk)
1147{
1148 if (sk->sk_socket && sk->sk_socket->file)
1149 if (send_sigurg(&sk->sk_socket->file->f_owner))
1150 sk_wake_async(sk, 3, POLL_PRI);
1151}
1152
1153void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1154 unsigned long expires)
1155{
1156 if (!mod_timer(timer, expires))
1157 sock_hold(sk);
1158}
1159
1160EXPORT_SYMBOL(sk_reset_timer);
1161
1162void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1163{
1164 if (timer_pending(timer) && del_timer(timer))
1165 __sock_put(sk);
1166}
1167
1168EXPORT_SYMBOL(sk_stop_timer);
1169
1170void sock_init_data(struct socket *sock, struct sock *sk)
1171{
1172 skb_queue_head_init(&sk->sk_receive_queue);
1173 skb_queue_head_init(&sk->sk_write_queue);
1174 skb_queue_head_init(&sk->sk_error_queue);
1175
1176 sk->sk_send_head = NULL;
1177
1178 init_timer(&sk->sk_timer);
1179
1180 sk->sk_allocation = GFP_KERNEL;
1181 sk->sk_rcvbuf = sysctl_rmem_default;
1182 sk->sk_sndbuf = sysctl_wmem_default;
1183 sk->sk_state = TCP_CLOSE;
1184 sk->sk_socket = sock;
1185
1186 sock_set_flag(sk, SOCK_ZAPPED);
1187
1188 if(sock)
1189 {
1190 sk->sk_type = sock->type;
1191 sk->sk_sleep = &sock->wait;
1192 sock->sk = sk;
1193 } else
1194 sk->sk_sleep = NULL;
1195
1196 rwlock_init(&sk->sk_dst_lock);
1197 rwlock_init(&sk->sk_callback_lock);
1198
1199 sk->sk_state_change = sock_def_wakeup;
1200 sk->sk_data_ready = sock_def_readable;
1201 sk->sk_write_space = sock_def_write_space;
1202 sk->sk_error_report = sock_def_error_report;
1203 sk->sk_destruct = sock_def_destruct;
1204
1205 sk->sk_sndmsg_page = NULL;
1206 sk->sk_sndmsg_off = 0;
1207
1208 sk->sk_peercred.pid = 0;
1209 sk->sk_peercred.uid = -1;
1210 sk->sk_peercred.gid = -1;
1211 sk->sk_write_pending = 0;
1212 sk->sk_rcvlowat = 1;
1213 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1214 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1215
1216 sk->sk_stamp.tv_sec = -1L;
1217 sk->sk_stamp.tv_usec = -1L;
1218
1219 atomic_set(&sk->sk_refcnt, 1);
1220}
1221
1222void fastcall lock_sock(struct sock *sk)
1223{
1224 might_sleep();
1225 spin_lock_bh(&(sk->sk_lock.slock));
1226 if (sk->sk_lock.owner)
1227 __lock_sock(sk);
1228 sk->sk_lock.owner = (void *)1;
1229 spin_unlock_bh(&(sk->sk_lock.slock));
1230}
1231
1232EXPORT_SYMBOL(lock_sock);
1233
1234void fastcall release_sock(struct sock *sk)
1235{
1236 spin_lock_bh(&(sk->sk_lock.slock));
1237 if (sk->sk_backlog.tail)
1238 __release_sock(sk);
1239 sk->sk_lock.owner = NULL;
1240 if (waitqueue_active(&(sk->sk_lock.wq)))
1241 wake_up(&(sk->sk_lock.wq));
1242 spin_unlock_bh(&(sk->sk_lock.slock));
1243}
1244EXPORT_SYMBOL(release_sock);
1245
1246int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1247{
1248 if (!sock_flag(sk, SOCK_TIMESTAMP))
1249 sock_enable_timestamp(sk);
1250 if (sk->sk_stamp.tv_sec == -1)
1251 return -ENOENT;
1252 if (sk->sk_stamp.tv_sec == 0)
1253 do_gettimeofday(&sk->sk_stamp);
1254 return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1255 -EFAULT : 0;
1256}
1257EXPORT_SYMBOL(sock_get_timestamp);
1258
1259void sock_enable_timestamp(struct sock *sk)
1260{
1261 if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1262 sock_set_flag(sk, SOCK_TIMESTAMP);
1263 net_enable_timestamp();
1264 }
1265}
1266EXPORT_SYMBOL(sock_enable_timestamp);
1267
1268/*
1269 * Get a socket option on an socket.
1270 *
1271 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1272 * asynchronous errors should be reported by getsockopt. We assume
1273 * this means if you specify SO_ERROR (otherwise whats the point of it).
1274 */
1275int sock_common_getsockopt(struct socket *sock, int level, int optname,
1276 char __user *optval, int __user *optlen)
1277{
1278 struct sock *sk = sock->sk;
1279
1280 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1281}
1282
1283EXPORT_SYMBOL(sock_common_getsockopt);
1284
1285int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1286 struct msghdr *msg, size_t size, int flags)
1287{
1288 struct sock *sk = sock->sk;
1289 int addr_len = 0;
1290 int err;
1291
1292 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1293 flags & ~MSG_DONTWAIT, &addr_len);
1294 if (err >= 0)
1295 msg->msg_namelen = addr_len;
1296 return err;
1297}
1298
1299EXPORT_SYMBOL(sock_common_recvmsg);
1300
1301/*
1302 * Set socket options on an inet socket.
1303 */
1304int sock_common_setsockopt(struct socket *sock, int level, int optname,
1305 char __user *optval, int optlen)
1306{
1307 struct sock *sk = sock->sk;
1308
1309 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1310}
1311
1312EXPORT_SYMBOL(sock_common_setsockopt);
1313
1314void sk_common_release(struct sock *sk)
1315{
1316 if (sk->sk_prot->destroy)
1317 sk->sk_prot->destroy(sk);
1318
1319 /*
1320 * Observation: when sock_common_release is called, processes have
1321 * no access to socket. But net still has.
1322 * Step one, detach it from networking:
1323 *
1324 * A. Remove from hash tables.
1325 */
1326
1327 sk->sk_prot->unhash(sk);
1328
1329 /*
1330 * In this point socket cannot receive new packets, but it is possible
1331 * that some packets are in flight because some CPU runs receiver and
1332 * did hash table lookup before we unhashed socket. They will achieve
1333 * receive queue and will be purged by socket destructor.
1334 *
1335 * Also we still have packets pending on receive queue and probably,
1336 * our own packets waiting in device queues. sock_destroy will drain
1337 * receive queue, but transmitted packets will delay socket destruction
1338 * until the last reference will be released.
1339 */
1340
1341 sock_orphan(sk);
1342
1343 xfrm_sk_free_policy(sk);
1344
1345#ifdef INET_REFCNT_DEBUG
1346 if (atomic_read(&sk->sk_refcnt) != 1)
1347 printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
1348 sk, atomic_read(&sk->sk_refcnt));
1349#endif
1350 sock_put(sk);
1351}
1352
1353EXPORT_SYMBOL(sk_common_release);
1354
1355static DEFINE_RWLOCK(proto_list_lock);
1356static LIST_HEAD(proto_list);
1357
1358int proto_register(struct proto *prot, int alloc_slab)
1359{
1360 int rc = -ENOBUFS;
1361
1362 write_lock(&proto_list_lock);
1363
1364 if (alloc_slab) {
1365 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1366 SLAB_HWCACHE_ALIGN, NULL, NULL);
1367
1368 if (prot->slab == NULL) {
1369 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1370 prot->name);
1371 goto out_unlock;
1372 }
1373 }
1374
1375 list_add(&prot->node, &proto_list);
1376 rc = 0;
1377out_unlock:
1378 write_unlock(&proto_list_lock);
1379 return rc;
1380}
1381
1382EXPORT_SYMBOL(proto_register);
1383
1384void proto_unregister(struct proto *prot)
1385{
1386 write_lock(&proto_list_lock);
1387
1388 if (prot->slab != NULL) {
1389 kmem_cache_destroy(prot->slab);
1390 prot->slab = NULL;
1391 }
1392
1393 list_del(&prot->node);
1394 write_unlock(&proto_list_lock);
1395}
1396
1397EXPORT_SYMBOL(proto_unregister);
1398
1399#ifdef CONFIG_PROC_FS
1400static inline struct proto *__proto_head(void)
1401{
1402 return list_entry(proto_list.next, struct proto, node);
1403}
1404
1405static inline struct proto *proto_head(void)
1406{
1407 return list_empty(&proto_list) ? NULL : __proto_head();
1408}
1409
1410static inline struct proto *proto_next(struct proto *proto)
1411{
1412 return proto->node.next == &proto_list ? NULL :
1413 list_entry(proto->node.next, struct proto, node);
1414}
1415
1416static inline struct proto *proto_get_idx(loff_t pos)
1417{
1418 struct proto *proto;
1419 loff_t i = 0;
1420
1421 list_for_each_entry(proto, &proto_list, node)
1422 if (i++ == pos)
1423 goto out;
1424
1425 proto = NULL;
1426out:
1427 return proto;
1428}
1429
1430static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1431{
1432 read_lock(&proto_list_lock);
1433 return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1434}
1435
1436static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1437{
1438 ++*pos;
1439 return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1440}
1441
1442static void proto_seq_stop(struct seq_file *seq, void *v)
1443{
1444 read_unlock(&proto_list_lock);
1445}
1446
1447static char proto_method_implemented(const void *method)
1448{
1449 return method == NULL ? 'n' : 'y';
1450}
1451
1452static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1453{
1454 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
1455 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1456 proto->name,
1457 proto->obj_size,
1458 proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1459 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1460 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1461 proto->max_header,
1462 proto->slab == NULL ? "no" : "yes",
1463 module_name(proto->owner),
1464 proto_method_implemented(proto->close),
1465 proto_method_implemented(proto->connect),
1466 proto_method_implemented(proto->disconnect),
1467 proto_method_implemented(proto->accept),
1468 proto_method_implemented(proto->ioctl),
1469 proto_method_implemented(proto->init),
1470 proto_method_implemented(proto->destroy),
1471 proto_method_implemented(proto->shutdown),
1472 proto_method_implemented(proto->setsockopt),
1473 proto_method_implemented(proto->getsockopt),
1474 proto_method_implemented(proto->sendmsg),
1475 proto_method_implemented(proto->recvmsg),
1476 proto_method_implemented(proto->sendpage),
1477 proto_method_implemented(proto->bind),
1478 proto_method_implemented(proto->backlog_rcv),
1479 proto_method_implemented(proto->hash),
1480 proto_method_implemented(proto->unhash),
1481 proto_method_implemented(proto->get_port),
1482 proto_method_implemented(proto->enter_memory_pressure));
1483}
1484
1485static int proto_seq_show(struct seq_file *seq, void *v)
1486{
1487 if (v == SEQ_START_TOKEN)
1488 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1489 "protocol",
1490 "size",
1491 "sockets",
1492 "memory",
1493 "press",
1494 "maxhdr",
1495 "slab",
1496 "module",
1497 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1498 else
1499 proto_seq_printf(seq, v);
1500 return 0;
1501}
1502
1503static struct seq_operations proto_seq_ops = {
1504 .start = proto_seq_start,
1505 .next = proto_seq_next,
1506 .stop = proto_seq_stop,
1507 .show = proto_seq_show,
1508};
1509
1510static int proto_seq_open(struct inode *inode, struct file *file)
1511{
1512 return seq_open(file, &proto_seq_ops);
1513}
1514
1515static struct file_operations proto_seq_fops = {
1516 .owner = THIS_MODULE,
1517 .open = proto_seq_open,
1518 .read = seq_read,
1519 .llseek = seq_lseek,
1520 .release = seq_release,
1521};
1522
1523static int __init proto_init(void)
1524{
1525 /* register /proc/net/protocols */
1526 return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1527}
1528
1529subsys_initcall(proto_init);
1530
1531#endif /* PROC_FS */
1532
1533EXPORT_SYMBOL(sk_alloc);
1534EXPORT_SYMBOL(sk_free);
1535EXPORT_SYMBOL(sk_send_sigurg);
1536EXPORT_SYMBOL(sock_alloc_send_skb);
1537EXPORT_SYMBOL(sock_init_data);
1538EXPORT_SYMBOL(sock_kfree_s);
1539EXPORT_SYMBOL(sock_kmalloc);
1540EXPORT_SYMBOL(sock_no_accept);
1541EXPORT_SYMBOL(sock_no_bind);
1542EXPORT_SYMBOL(sock_no_connect);
1543EXPORT_SYMBOL(sock_no_getname);
1544EXPORT_SYMBOL(sock_no_getsockopt);
1545EXPORT_SYMBOL(sock_no_ioctl);
1546EXPORT_SYMBOL(sock_no_listen);
1547EXPORT_SYMBOL(sock_no_mmap);
1548EXPORT_SYMBOL(sock_no_poll);
1549EXPORT_SYMBOL(sock_no_recvmsg);
1550EXPORT_SYMBOL(sock_no_sendmsg);
1551EXPORT_SYMBOL(sock_no_sendpage);
1552EXPORT_SYMBOL(sock_no_setsockopt);
1553EXPORT_SYMBOL(sock_no_shutdown);
1554EXPORT_SYMBOL(sock_no_socketpair);
1555EXPORT_SYMBOL(sock_rfree);
1556EXPORT_SYMBOL(sock_setsockopt);
1557EXPORT_SYMBOL(sock_wfree);
1558EXPORT_SYMBOL(sock_wmalloc);
1559EXPORT_SYMBOL(sock_i_uid);
1560EXPORT_SYMBOL(sock_i_ino);
1561#ifdef CONFIG_SYSCTL
1562EXPORT_SYMBOL(sysctl_optmem_max);
1563EXPORT_SYMBOL(sysctl_rmem_max);
1564EXPORT_SYMBOL(sysctl_wmem_max);
1565#endif