aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h262
1 files changed, 112 insertions, 150 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 6e1542da33a1..902324488d0f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -47,6 +47,7 @@
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/lockdep.h> 48#include <linux/lockdep.h>
49#include <linux/netdevice.h> 49#include <linux/netdevice.h>
50#include <linux/pcounter.h>
50#include <linux/skbuff.h> /* struct sk_buff */ 51#include <linux/skbuff.h> /* struct sk_buff */
51#include <linux/mm.h> 52#include <linux/mm.h>
52#include <linux/security.h> 53#include <linux/security.h>
@@ -56,7 +57,6 @@
56#include <asm/atomic.h> 57#include <asm/atomic.h>
57#include <net/dst.h> 58#include <net/dst.h>
58#include <net/checksum.h> 59#include <net/checksum.h>
59#include <net/net_namespace.h>
60 60
61/* 61/*
62 * This structure really needs to be cleaned up. 62 * This structure really needs to be cleaned up.
@@ -94,6 +94,7 @@ typedef struct {
94 94
95struct sock; 95struct sock;
96struct proto; 96struct proto;
97struct net;
97 98
98/** 99/**
99 * struct sock_common - minimal network layer representation of sockets 100 * struct sock_common - minimal network layer representation of sockets
@@ -145,7 +146,8 @@ struct sock_common {
145 * @sk_forward_alloc: space allocated forward 146 * @sk_forward_alloc: space allocated forward
146 * @sk_allocation: allocation mode 147 * @sk_allocation: allocation mode
147 * @sk_sndbuf: size of send buffer in bytes 148 * @sk_sndbuf: size of send buffer in bytes
148 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings 149 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
150 * %SO_OOBINLINE settings
149 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets 151 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
150 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 152 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
151 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) 153 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
@@ -153,9 +155,12 @@ struct sock_common {
153 * @sk_backlog: always used with the per-socket spinlock held 155 * @sk_backlog: always used with the per-socket spinlock held
154 * @sk_callback_lock: used with the callbacks in the end of this struct 156 * @sk_callback_lock: used with the callbacks in the end of this struct
155 * @sk_error_queue: rarely used 157 * @sk_error_queue: rarely used
156 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) 158 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
159 * IPV6_ADDRFORM for instance)
157 * @sk_err: last error 160 * @sk_err: last error
158 * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' 161 * @sk_err_soft: errors that don't cause failure but are the cause of a
162 * persistent failure not just 'timed out'
163 * @sk_drops: raw drops counter
159 * @sk_ack_backlog: current listen backlog 164 * @sk_ack_backlog: current listen backlog
160 * @sk_max_ack_backlog: listen backlog set in listen() 165 * @sk_max_ack_backlog: listen backlog set in listen()
161 * @sk_priority: %SO_PRIORITY setting 166 * @sk_priority: %SO_PRIORITY setting
@@ -239,6 +244,7 @@ struct sock {
239 rwlock_t sk_callback_lock; 244 rwlock_t sk_callback_lock;
240 int sk_err, 245 int sk_err,
241 sk_err_soft; 246 sk_err_soft;
247 atomic_t sk_drops;
242 unsigned short sk_ack_backlog; 248 unsigned short sk_ack_backlog;
243 unsigned short sk_max_ack_backlog; 249 unsigned short sk_max_ack_backlog;
244 __u32 sk_priority; 250 __u32 sk_priority;
@@ -439,7 +445,7 @@ static inline int sk_acceptq_is_full(struct sock *sk)
439 */ 445 */
440static inline int sk_stream_min_wspace(struct sock *sk) 446static inline int sk_stream_min_wspace(struct sock *sk)
441{ 447{
442 return sk->sk_wmem_queued / 2; 448 return sk->sk_wmem_queued >> 1;
443} 449}
444 450
445static inline int sk_stream_wspace(struct sock *sk) 451static inline int sk_stream_wspace(struct sock *sk)
@@ -454,25 +460,6 @@ static inline int sk_stream_memory_free(struct sock *sk)
454 return sk->sk_wmem_queued < sk->sk_sndbuf; 460 return sk->sk_wmem_queued < sk->sk_sndbuf;
455} 461}
456 462
457extern void sk_stream_rfree(struct sk_buff *skb);
458
459static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk)
460{
461 skb->sk = sk;
462 skb->destructor = sk_stream_rfree;
463 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
464 sk->sk_forward_alloc -= skb->truesize;
465}
466
467static inline void sk_stream_free_skb(struct sock *sk, struct sk_buff *skb)
468{
469 skb_truesize_check(skb);
470 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
471 sk->sk_wmem_queued -= skb->truesize;
472 sk->sk_forward_alloc += skb->truesize;
473 __kfree_skb(skb);
474}
475
476/* The per-socket spinlock must be held here. */ 463/* The per-socket spinlock must be held here. */
477static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) 464static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
478{ 465{
@@ -560,14 +547,11 @@ struct proto {
560 void (*unhash)(struct sock *sk); 547 void (*unhash)(struct sock *sk);
561 int (*get_port)(struct sock *sk, unsigned short snum); 548 int (*get_port)(struct sock *sk, unsigned short snum);
562 549
563#ifdef CONFIG_SMP
564 /* Keeping track of sockets in use */ 550 /* Keeping track of sockets in use */
565 void (*inuse_add)(struct proto *prot, int inc); 551#ifdef CONFIG_PROC_FS
566 int (*inuse_getval)(const struct proto *prot); 552 struct pcounter inuse;
567 int *inuse_ptr;
568#else
569 int inuse;
570#endif 553#endif
554
571 /* Memory pressure */ 555 /* Memory pressure */
572 void (*enter_memory_pressure)(void); 556 void (*enter_memory_pressure)(void);
573 atomic_t *memory_allocated; /* Current allocated memory. */ 557 atomic_t *memory_allocated; /* Current allocated memory. */
@@ -575,7 +559,7 @@ struct proto {
575 /* 559 /*
576 * Pressure flag: try to collapse. 560 * Pressure flag: try to collapse.
577 * Technical note: it is used by multiple contexts non atomically. 561 * Technical note: it is used by multiple contexts non atomically.
578 * All the sk_stream_mem_schedule() is of this nature: accounting 562 * All the __sk_mem_schedule() is of this nature: accounting
579 * is strict, actions are advisory and have some latency. 563 * is strict, actions are advisory and have some latency.
580 */ 564 */
581 int *memory_pressure; 565 int *memory_pressure;
@@ -602,36 +586,6 @@ struct proto {
602#endif 586#endif
603}; 587};
604 588
605/*
606 * Special macros to let protos use a fast version of inuse{get|add}
607 * using a static percpu variable per proto instead of an allocated one,
608 * saving one dereference.
609 * This might be changed if/when dynamic percpu vars become fast.
610 */
611#ifdef CONFIG_SMP
612# define DEFINE_PROTO_INUSE(NAME) \
613static DEFINE_PER_CPU(int, NAME##_inuse); \
614static void NAME##_inuse_add(struct proto *prot, int inc) \
615{ \
616 __get_cpu_var(NAME##_inuse) += inc; \
617} \
618 \
619static int NAME##_inuse_getval(const struct proto *prot)\
620{ \
621 int res = 0, cpu; \
622 \
623 for_each_possible_cpu(cpu) \
624 res += per_cpu(NAME##_inuse, cpu); \
625 return res; \
626}
627# define REF_PROTO_INUSE(NAME) \
628 .inuse_add = NAME##_inuse_add, \
629 .inuse_getval = NAME##_inuse_getval,
630#else
631# define DEFINE_PROTO_INUSE(NAME)
632# define REF_PROTO_INUSE(NAME)
633#endif
634
635extern int proto_register(struct proto *prot, int alloc_slab); 589extern int proto_register(struct proto *prot, int alloc_slab);
636extern void proto_unregister(struct proto *prot); 590extern void proto_unregister(struct proto *prot);
637 591
@@ -660,33 +614,42 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
660#define sk_refcnt_debug_release(sk) do { } while (0) 614#define sk_refcnt_debug_release(sk) do { } while (0)
661#endif /* SOCK_REFCNT_DEBUG */ 615#endif /* SOCK_REFCNT_DEBUG */
662 616
617
618#ifdef CONFIG_PROC_FS
619# define DEFINE_PROTO_INUSE(NAME) DEFINE_PCOUNTER(NAME)
620# define REF_PROTO_INUSE(NAME) PCOUNTER_MEMBER_INITIALIZER(NAME, .inuse)
663/* Called with local bh disabled */ 621/* Called with local bh disabled */
664static __inline__ void sock_prot_inc_use(struct proto *prot) 622static inline void sock_prot_inuse_add(struct proto *prot, int inc)
665{ 623{
666#ifdef CONFIG_SMP 624 pcounter_add(&prot->inuse, inc);
667 prot->inuse_add(prot, 1);
668#else
669 prot->inuse++;
670#endif
671} 625}
672 626static inline int sock_prot_inuse_init(struct proto *proto)
673static __inline__ void sock_prot_dec_use(struct proto *prot)
674{ 627{
675#ifdef CONFIG_SMP 628 return pcounter_alloc(&proto->inuse);
676 prot->inuse_add(prot, -1);
677#else
678 prot->inuse--;
679#endif
680} 629}
681 630static inline int sock_prot_inuse_get(struct proto *proto)
682static __inline__ int sock_prot_inuse(struct proto *proto)
683{ 631{
684#ifdef CONFIG_SMP 632 return pcounter_getval(&proto->inuse);
685 return proto->inuse_getval(proto); 633}
634static inline void sock_prot_inuse_free(struct proto *proto)
635{
636 pcounter_free(&proto->inuse);
637}
686#else 638#else
687 return proto->inuse; 639# define DEFINE_PROTO_INUSE(NAME)
688#endif 640# define REF_PROTO_INUSE(NAME)
641static void inline sock_prot_inuse_add(struct proto *prot, int inc)
642{
643}
644static int inline sock_prot_inuse_init(struct proto *proto)
645{
646 return 0;
689} 647}
648static void inline sock_prot_inuse_free(struct proto *proto)
649{
650}
651#endif
652
690 653
691/* With per-bucket locks this operation is not-atomic, so that 654/* With per-bucket locks this operation is not-atomic, so that
692 * this version is not worse. 655 * this version is not worse.
@@ -750,32 +713,81 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
750 return &container_of(socket, struct socket_alloc, socket)->vfs_inode; 713 return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
751} 714}
752 715
753extern void __sk_stream_mem_reclaim(struct sock *sk); 716/*
754extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); 717 * Functions for memory accounting
718 */
719extern int __sk_mem_schedule(struct sock *sk, int size, int kind);
720extern void __sk_mem_reclaim(struct sock *sk);
755 721
756#define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE) 722#define SK_MEM_QUANTUM ((int)PAGE_SIZE)
723#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
724#define SK_MEM_SEND 0
725#define SK_MEM_RECV 1
757 726
758static inline int sk_stream_pages(int amt) 727static inline int sk_mem_pages(int amt)
759{ 728{
760 return DIV_ROUND_UP(amt, SK_STREAM_MEM_QUANTUM); 729 return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
761} 730}
762 731
763static inline void sk_stream_mem_reclaim(struct sock *sk) 732static inline int sk_has_account(struct sock *sk)
764{ 733{
765 if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) 734 /* return true if protocol supports memory accounting */
766 __sk_stream_mem_reclaim(sk); 735 return !!sk->sk_prot->memory_allocated;
767} 736}
768 737
769static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) 738static inline int sk_wmem_schedule(struct sock *sk, int size)
770{ 739{
771 return (int)skb->truesize <= sk->sk_forward_alloc || 740 if (!sk_has_account(sk))
772 sk_stream_mem_schedule(sk, skb->truesize, 1); 741 return 1;
742 return size <= sk->sk_forward_alloc ||
743 __sk_mem_schedule(sk, size, SK_MEM_SEND);
773} 744}
774 745
775static inline int sk_stream_wmem_schedule(struct sock *sk, int size) 746static inline int sk_rmem_schedule(struct sock *sk, int size)
776{ 747{
748 if (!sk_has_account(sk))
749 return 1;
777 return size <= sk->sk_forward_alloc || 750 return size <= sk->sk_forward_alloc ||
778 sk_stream_mem_schedule(sk, size, 0); 751 __sk_mem_schedule(sk, size, SK_MEM_RECV);
752}
753
754static inline void sk_mem_reclaim(struct sock *sk)
755{
756 if (!sk_has_account(sk))
757 return;
758 if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
759 __sk_mem_reclaim(sk);
760}
761
762static inline void sk_mem_reclaim_partial(struct sock *sk)
763{
764 if (!sk_has_account(sk))
765 return;
766 if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
767 __sk_mem_reclaim(sk);
768}
769
770static inline void sk_mem_charge(struct sock *sk, int size)
771{
772 if (!sk_has_account(sk))
773 return;
774 sk->sk_forward_alloc -= size;
775}
776
777static inline void sk_mem_uncharge(struct sock *sk, int size)
778{
779 if (!sk_has_account(sk))
780 return;
781 sk->sk_forward_alloc += size;
782}
783
784static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
785{
786 skb_truesize_check(skb);
787 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
788 sk->sk_wmem_queued -= skb->truesize;
789 sk_mem_uncharge(sk, skb->truesize);
790 __kfree_skb(skb);
779} 791}
780 792
781/* Used by processes to "lock" a socket state, so that 793/* Used by processes to "lock" a socket state, so that
@@ -812,14 +824,14 @@ do { \
812 lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ 824 lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
813} while (0) 825} while (0)
814 826
815extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass)); 827extern void lock_sock_nested(struct sock *sk, int subclass);
816 828
817static inline void lock_sock(struct sock *sk) 829static inline void lock_sock(struct sock *sk)
818{ 830{
819 lock_sock_nested(sk, 0); 831 lock_sock_nested(sk, 0);
820} 832}
821 833
822extern void FASTCALL(release_sock(struct sock *sk)); 834extern void release_sock(struct sock *sk);
823 835
824/* BH context may only use the following locking interface. */ 836/* BH context may only use the following locking interface. */
825#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) 837#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
@@ -1113,12 +1125,6 @@ static inline int sk_can_gso(const struct sock *sk)
1113 1125
1114extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); 1126extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
1115 1127
1116static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
1117{
1118 sk->sk_wmem_queued += skb->truesize;
1119 sk->sk_forward_alloc -= skb->truesize;
1120}
1121
1122static inline int skb_copy_to_page(struct sock *sk, char __user *from, 1128static inline int skb_copy_to_page(struct sock *sk, char __user *from,
1123 struct sk_buff *skb, struct page *page, 1129 struct sk_buff *skb, struct page *page,
1124 int off, int copy) 1130 int off, int copy)
@@ -1138,7 +1144,7 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from,
1138 skb->data_len += copy; 1144 skb->data_len += copy;
1139 skb->truesize += copy; 1145 skb->truesize += copy;
1140 sk->sk_wmem_queued += copy; 1146 sk->sk_wmem_queued += copy;
1141 sk->sk_forward_alloc -= copy; 1147 sk_mem_charge(sk, copy);
1142 return 0; 1148 return 0;
1143} 1149}
1144 1150
@@ -1164,6 +1170,7 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
1164 skb->sk = sk; 1170 skb->sk = sk;
1165 skb->destructor = sock_rfree; 1171 skb->destructor = sock_rfree;
1166 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 1172 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
1173 sk_mem_charge(sk, skb->truesize);
1167} 1174}
1168 1175
1169extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, 1176extern void sk_reset_timer(struct sock *sk, struct timer_list* timer,
@@ -1225,45 +1232,12 @@ static inline void sk_wake_async(struct sock *sk, int how, int band)
1225static inline void sk_stream_moderate_sndbuf(struct sock *sk) 1232static inline void sk_stream_moderate_sndbuf(struct sock *sk)
1226{ 1233{
1227 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { 1234 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
1228 sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued / 2); 1235 sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
1229 sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF); 1236 sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF);
1230 } 1237 }
1231} 1238}
1232 1239
1233static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, 1240struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
1234 int size, int mem,
1235 gfp_t gfp)
1236{
1237 struct sk_buff *skb;
1238
1239 /* The TCP header must be at least 32-bit aligned. */
1240 size = ALIGN(size, 4);
1241
1242 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
1243 if (skb) {
1244 skb->truesize += mem;
1245 if (sk_stream_wmem_schedule(sk, skb->truesize)) {
1246 /*
1247 * Make sure that we have exactly size bytes
1248 * available to the caller, no more, no less.
1249 */
1250 skb_reserve(skb, skb_tailroom(skb) - size);
1251 return skb;
1252 }
1253 __kfree_skb(skb);
1254 } else {
1255 sk->sk_prot->enter_memory_pressure();
1256 sk_stream_moderate_sndbuf(sk);
1257 }
1258 return NULL;
1259}
1260
1261static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk,
1262 int size,
1263 gfp_t gfp)
1264{
1265 return sk_stream_alloc_pskb(sk, size, 0, gfp);
1266}
1267 1241
1268static inline struct page *sk_stream_alloc_page(struct sock *sk) 1242static inline struct page *sk_stream_alloc_page(struct sock *sk)
1269{ 1243{
@@ -1282,7 +1256,7 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
1282 */ 1256 */
1283static inline int sock_writeable(const struct sock *sk) 1257static inline int sock_writeable(const struct sock *sk)
1284{ 1258{
1285 return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); 1259 return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
1286} 1260}
1287 1261
1288static inline gfp_t gfp_any(void) 1262static inline gfp_t gfp_any(void)
@@ -1391,23 +1365,11 @@ extern int net_msg_warn;
1391 lock_sock(sk); \ 1365 lock_sock(sk); \
1392 } 1366 }
1393 1367
1394static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
1395{
1396 if (valbool)
1397 sock_set_flag(sk, bit);
1398 else
1399 sock_reset_flag(sk, bit);
1400}
1401
1402extern __u32 sysctl_wmem_max; 1368extern __u32 sysctl_wmem_max;
1403extern __u32 sysctl_rmem_max; 1369extern __u32 sysctl_rmem_max;
1404 1370
1405extern void sk_init(void); 1371extern void sk_init(void);
1406 1372
1407#ifdef CONFIG_SYSCTL
1408extern struct ctl_table core_table[];
1409#endif
1410
1411extern int sysctl_optmem_max; 1373extern int sysctl_optmem_max;
1412 1374
1413extern __u32 sysctl_wmem_default; 1375extern __u32 sysctl_wmem_default;