aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h236
1 files changed, 121 insertions, 115 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 43fc3fa50d62..e3fb4c047f4c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -47,6 +47,7 @@
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/lockdep.h> 48#include <linux/lockdep.h>
49#include <linux/netdevice.h> 49#include <linux/netdevice.h>
50#include <linux/pcounter.h>
50#include <linux/skbuff.h> /* struct sk_buff */ 51#include <linux/skbuff.h> /* struct sk_buff */
51#include <linux/mm.h> 52#include <linux/mm.h>
52#include <linux/security.h> 53#include <linux/security.h>
@@ -56,7 +57,6 @@
56#include <asm/atomic.h> 57#include <asm/atomic.h>
57#include <net/dst.h> 58#include <net/dst.h>
58#include <net/checksum.h> 59#include <net/checksum.h>
59#include <net/net_namespace.h>
60 60
61/* 61/*
62 * This structure really needs to be cleaned up. 62 * This structure really needs to be cleaned up.
@@ -94,6 +94,7 @@ typedef struct {
94 94
95struct sock; 95struct sock;
96struct proto; 96struct proto;
97struct net;
97 98
98/** 99/**
99 * struct sock_common - minimal network layer representation of sockets 100 * struct sock_common - minimal network layer representation of sockets
@@ -145,7 +146,8 @@ struct sock_common {
145 * @sk_forward_alloc: space allocated forward 146 * @sk_forward_alloc: space allocated forward
146 * @sk_allocation: allocation mode 147 * @sk_allocation: allocation mode
147 * @sk_sndbuf: size of send buffer in bytes 148 * @sk_sndbuf: size of send buffer in bytes
148 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings 149 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
150 * %SO_OOBINLINE settings
149 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets 151 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
150 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 152 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
151 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) 153 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
@@ -153,9 +155,12 @@ struct sock_common {
153 * @sk_backlog: always used with the per-socket spinlock held 155 * @sk_backlog: always used with the per-socket spinlock held
154 * @sk_callback_lock: used with the callbacks in the end of this struct 156 * @sk_callback_lock: used with the callbacks in the end of this struct
155 * @sk_error_queue: rarely used 157 * @sk_error_queue: rarely used
156 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) 158 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
159 * IPV6_ADDRFORM for instance)
157 * @sk_err: last error 160 * @sk_err: last error
158 * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' 161 * @sk_err_soft: errors that don't cause failure but are the cause of a
162 * persistent failure not just 'timed out'
163 * @sk_drops: raw drops counter
159 * @sk_ack_backlog: current listen backlog 164 * @sk_ack_backlog: current listen backlog
160 * @sk_max_ack_backlog: listen backlog set in listen() 165 * @sk_max_ack_backlog: listen backlog set in listen()
161 * @sk_priority: %SO_PRIORITY setting 166 * @sk_priority: %SO_PRIORITY setting
@@ -239,6 +244,7 @@ struct sock {
239 rwlock_t sk_callback_lock; 244 rwlock_t sk_callback_lock;
240 int sk_err, 245 int sk_err,
241 sk_err_soft; 246 sk_err_soft;
247 atomic_t sk_drops;
242 unsigned short sk_ack_backlog; 248 unsigned short sk_ack_backlog;
243 unsigned short sk_max_ack_backlog; 249 unsigned short sk_max_ack_backlog;
244 __u32 sk_priority; 250 __u32 sk_priority;
@@ -256,6 +262,8 @@ struct sock {
256 __u32 sk_sndmsg_off; 262 __u32 sk_sndmsg_off;
257 int sk_write_pending; 263 int sk_write_pending;
258 void *sk_security; 264 void *sk_security;
265 __u32 sk_mark;
266 /* XXX 4 bytes hole on 64 bit */
259 void (*sk_state_change)(struct sock *sk); 267 void (*sk_state_change)(struct sock *sk);
260 void (*sk_data_ready)(struct sock *sk, int bytes); 268 void (*sk_data_ready)(struct sock *sk, int bytes);
261 void (*sk_write_space)(struct sock *sk); 269 void (*sk_write_space)(struct sock *sk);
@@ -439,7 +447,7 @@ static inline int sk_acceptq_is_full(struct sock *sk)
439 */ 447 */
440static inline int sk_stream_min_wspace(struct sock *sk) 448static inline int sk_stream_min_wspace(struct sock *sk)
441{ 449{
442 return sk->sk_wmem_queued / 2; 450 return sk->sk_wmem_queued >> 1;
443} 451}
444 452
445static inline int sk_stream_wspace(struct sock *sk) 453static inline int sk_stream_wspace(struct sock *sk)
@@ -454,25 +462,6 @@ static inline int sk_stream_memory_free(struct sock *sk)
454 return sk->sk_wmem_queued < sk->sk_sndbuf; 462 return sk->sk_wmem_queued < sk->sk_sndbuf;
455} 463}
456 464
457extern void sk_stream_rfree(struct sk_buff *skb);
458
459static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk)
460{
461 skb->sk = sk;
462 skb->destructor = sk_stream_rfree;
463 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
464 sk->sk_forward_alloc -= skb->truesize;
465}
466
467static inline void sk_stream_free_skb(struct sock *sk, struct sk_buff *skb)
468{
469 skb_truesize_check(skb);
470 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
471 sk->sk_wmem_queued -= skb->truesize;
472 sk->sk_forward_alloc += skb->truesize;
473 __kfree_skb(skb);
474}
475
476/* The per-socket spinlock must be held here. */ 465/* The per-socket spinlock must be held here. */
477static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) 466static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
478{ 467{
@@ -560,6 +549,11 @@ struct proto {
560 void (*unhash)(struct sock *sk); 549 void (*unhash)(struct sock *sk);
561 int (*get_port)(struct sock *sk, unsigned short snum); 550 int (*get_port)(struct sock *sk, unsigned short snum);
562 551
552 /* Keeping track of sockets in use */
553#ifdef CONFIG_PROC_FS
554 struct pcounter inuse;
555#endif
556
563 /* Memory pressure */ 557 /* Memory pressure */
564 void (*enter_memory_pressure)(void); 558 void (*enter_memory_pressure)(void);
565 atomic_t *memory_allocated; /* Current allocated memory. */ 559 atomic_t *memory_allocated; /* Current allocated memory. */
@@ -567,7 +561,7 @@ struct proto {
567 /* 561 /*
568 * Pressure flag: try to collapse. 562 * Pressure flag: try to collapse.
569 * Technical note: it is used by multiple contexts non atomically. 563 * Technical note: it is used by multiple contexts non atomically.
570 * All the sk_stream_mem_schedule() is of this nature: accounting 564 * All the __sk_mem_schedule() is of this nature: accounting
571 * is strict, actions are advisory and have some latency. 565 * is strict, actions are advisory and have some latency.
572 */ 566 */
573 int *memory_pressure; 567 int *memory_pressure;
@@ -592,10 +586,6 @@ struct proto {
592#ifdef SOCK_REFCNT_DEBUG 586#ifdef SOCK_REFCNT_DEBUG
593 atomic_t socks; 587 atomic_t socks;
594#endif 588#endif
595 struct {
596 int inuse;
597 u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
598 } stats[NR_CPUS];
599}; 589};
600 590
601extern int proto_register(struct proto *prot, int alloc_slab); 591extern int proto_register(struct proto *prot, int alloc_slab);
@@ -626,16 +616,42 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
626#define sk_refcnt_debug_release(sk) do { } while (0) 616#define sk_refcnt_debug_release(sk) do { } while (0)
627#endif /* SOCK_REFCNT_DEBUG */ 617#endif /* SOCK_REFCNT_DEBUG */
628 618
619
620#ifdef CONFIG_PROC_FS
621# define DEFINE_PROTO_INUSE(NAME) DEFINE_PCOUNTER(NAME)
622# define REF_PROTO_INUSE(NAME) PCOUNTER_MEMBER_INITIALIZER(NAME, .inuse)
629/* Called with local bh disabled */ 623/* Called with local bh disabled */
630static __inline__ void sock_prot_inc_use(struct proto *prot) 624static inline void sock_prot_inuse_add(struct proto *prot, int inc)
631{ 625{
632 prot->stats[smp_processor_id()].inuse++; 626 pcounter_add(&prot->inuse, inc);
633} 627}
634 628static inline int sock_prot_inuse_init(struct proto *proto)
635static __inline__ void sock_prot_dec_use(struct proto *prot) 629{
630 return pcounter_alloc(&proto->inuse);
631}
632static inline int sock_prot_inuse_get(struct proto *proto)
633{
634 return pcounter_getval(&proto->inuse);
635}
636static inline void sock_prot_inuse_free(struct proto *proto)
637{
638 pcounter_free(&proto->inuse);
639}
640#else
641# define DEFINE_PROTO_INUSE(NAME)
642# define REF_PROTO_INUSE(NAME)
643static void inline sock_prot_inuse_add(struct proto *prot, int inc)
644{
645}
646static int inline sock_prot_inuse_init(struct proto *proto)
636{ 647{
637 prot->stats[smp_processor_id()].inuse--; 648 return 0;
638} 649}
650static void inline sock_prot_inuse_free(struct proto *proto)
651{
652}
653#endif
654
639 655
640/* With per-bucket locks this operation is not-atomic, so that 656/* With per-bucket locks this operation is not-atomic, so that
641 * this version is not worse. 657 * this version is not worse.
@@ -699,32 +715,81 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
699 return &container_of(socket, struct socket_alloc, socket)->vfs_inode; 715 return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
700} 716}
701 717
702extern void __sk_stream_mem_reclaim(struct sock *sk); 718/*
703extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); 719 * Functions for memory accounting
720 */
721extern int __sk_mem_schedule(struct sock *sk, int size, int kind);
722extern void __sk_mem_reclaim(struct sock *sk);
704 723
705#define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE) 724#define SK_MEM_QUANTUM ((int)PAGE_SIZE)
725#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
726#define SK_MEM_SEND 0
727#define SK_MEM_RECV 1
706 728
707static inline int sk_stream_pages(int amt) 729static inline int sk_mem_pages(int amt)
708{ 730{
709 return DIV_ROUND_UP(amt, SK_STREAM_MEM_QUANTUM); 731 return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
710} 732}
711 733
712static inline void sk_stream_mem_reclaim(struct sock *sk) 734static inline int sk_has_account(struct sock *sk)
713{ 735{
714 if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) 736 /* return true if protocol supports memory accounting */
715 __sk_stream_mem_reclaim(sk); 737 return !!sk->sk_prot->memory_allocated;
716} 738}
717 739
718static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) 740static inline int sk_wmem_schedule(struct sock *sk, int size)
719{ 741{
720 return (int)skb->truesize <= sk->sk_forward_alloc || 742 if (!sk_has_account(sk))
721 sk_stream_mem_schedule(sk, skb->truesize, 1); 743 return 1;
744 return size <= sk->sk_forward_alloc ||
745 __sk_mem_schedule(sk, size, SK_MEM_SEND);
722} 746}
723 747
724static inline int sk_stream_wmem_schedule(struct sock *sk, int size) 748static inline int sk_rmem_schedule(struct sock *sk, int size)
725{ 749{
750 if (!sk_has_account(sk))
751 return 1;
726 return size <= sk->sk_forward_alloc || 752 return size <= sk->sk_forward_alloc ||
727 sk_stream_mem_schedule(sk, size, 0); 753 __sk_mem_schedule(sk, size, SK_MEM_RECV);
754}
755
756static inline void sk_mem_reclaim(struct sock *sk)
757{
758 if (!sk_has_account(sk))
759 return;
760 if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
761 __sk_mem_reclaim(sk);
762}
763
764static inline void sk_mem_reclaim_partial(struct sock *sk)
765{
766 if (!sk_has_account(sk))
767 return;
768 if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
769 __sk_mem_reclaim(sk);
770}
771
772static inline void sk_mem_charge(struct sock *sk, int size)
773{
774 if (!sk_has_account(sk))
775 return;
776 sk->sk_forward_alloc -= size;
777}
778
779static inline void sk_mem_uncharge(struct sock *sk, int size)
780{
781 if (!sk_has_account(sk))
782 return;
783 sk->sk_forward_alloc += size;
784}
785
786static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
787{
788 skb_truesize_check(skb);
789 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
790 sk->sk_wmem_queued -= skb->truesize;
791 sk_mem_uncharge(sk, skb->truesize);
792 __kfree_skb(skb);
728} 793}
729 794
730/* Used by processes to "lock" a socket state, so that 795/* Used by processes to "lock" a socket state, so that
@@ -761,14 +826,14 @@ do { \
761 lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ 826 lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
762} while (0) 827} while (0)
763 828
764extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass)); 829extern void lock_sock_nested(struct sock *sk, int subclass);
765 830
766static inline void lock_sock(struct sock *sk) 831static inline void lock_sock(struct sock *sk)
767{ 832{
768 lock_sock_nested(sk, 0); 833 lock_sock_nested(sk, 0);
769} 834}
770 835
771extern void FASTCALL(release_sock(struct sock *sk)); 836extern void release_sock(struct sock *sk);
772 837
773/* BH context may only use the following locking interface. */ 838/* BH context may only use the following locking interface. */
774#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) 839#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
@@ -779,7 +844,7 @@ extern void FASTCALL(release_sock(struct sock *sk));
779 844
780extern struct sock *sk_alloc(struct net *net, int family, 845extern struct sock *sk_alloc(struct net *net, int family,
781 gfp_t priority, 846 gfp_t priority,
782 struct proto *prot, int zero_it); 847 struct proto *prot);
783extern void sk_free(struct sock *sk); 848extern void sk_free(struct sock *sk);
784extern struct sock *sk_clone(const struct sock *sk, 849extern struct sock *sk_clone(const struct sock *sk,
785 const gfp_t priority); 850 const gfp_t priority);
@@ -893,7 +958,7 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
893 return err; 958 return err;
894 959
895 rcu_read_lock_bh(); 960 rcu_read_lock_bh();
896 filter = sk->sk_filter; 961 filter = rcu_dereference(sk->sk_filter);
897 if (filter) { 962 if (filter) {
898 unsigned int pkt_len = sk_run_filter(skb, filter->insns, 963 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
899 filter->len); 964 filter->len);
@@ -993,20 +1058,6 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
993 write_unlock_bh(&sk->sk_callback_lock); 1058 write_unlock_bh(&sk->sk_callback_lock);
994} 1059}
995 1060
996static inline void sock_copy(struct sock *nsk, const struct sock *osk)
997{
998#ifdef CONFIG_SECURITY_NETWORK
999 void *sptr = nsk->sk_security;
1000#endif
1001
1002 memcpy(nsk, osk, osk->sk_prot->obj_size);
1003 get_net(nsk->sk_net);
1004#ifdef CONFIG_SECURITY_NETWORK
1005 nsk->sk_security = sptr;
1006 security_sk_clone(osk, nsk);
1007#endif
1008}
1009
1010extern int sock_i_uid(struct sock *sk); 1061extern int sock_i_uid(struct sock *sk);
1011extern unsigned long sock_i_ino(struct sock *sk); 1062extern unsigned long sock_i_ino(struct sock *sk);
1012 1063
@@ -1076,12 +1127,6 @@ static inline int sk_can_gso(const struct sock *sk)
1076 1127
1077extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); 1128extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
1078 1129
1079static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
1080{
1081 sk->sk_wmem_queued += skb->truesize;
1082 sk->sk_forward_alloc -= skb->truesize;
1083}
1084
1085static inline int skb_copy_to_page(struct sock *sk, char __user *from, 1130static inline int skb_copy_to_page(struct sock *sk, char __user *from,
1086 struct sk_buff *skb, struct page *page, 1131 struct sk_buff *skb, struct page *page,
1087 int off, int copy) 1132 int off, int copy)
@@ -1101,7 +1146,7 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from,
1101 skb->data_len += copy; 1146 skb->data_len += copy;
1102 skb->truesize += copy; 1147 skb->truesize += copy;
1103 sk->sk_wmem_queued += copy; 1148 sk->sk_wmem_queued += copy;
1104 sk->sk_forward_alloc -= copy; 1149 sk_mem_charge(sk, copy);
1105 return 0; 1150 return 0;
1106} 1151}
1107 1152
@@ -1127,6 +1172,7 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
1127 skb->sk = sk; 1172 skb->sk = sk;
1128 skb->destructor = sock_rfree; 1173 skb->destructor = sock_rfree;
1129 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 1174 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
1175 sk_mem_charge(sk, skb->truesize);
1130} 1176}
1131 1177
1132extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, 1178extern void sk_reset_timer(struct sock *sk, struct timer_list* timer,
@@ -1188,40 +1234,12 @@ static inline void sk_wake_async(struct sock *sk, int how, int band)
1188static inline void sk_stream_moderate_sndbuf(struct sock *sk) 1234static inline void sk_stream_moderate_sndbuf(struct sock *sk)
1189{ 1235{
1190 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { 1236 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
1191 sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued / 2); 1237 sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
1192 sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF); 1238 sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF);
1193 } 1239 }
1194} 1240}
1195 1241
1196static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, 1242struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
1197 int size, int mem,
1198 gfp_t gfp)
1199{
1200 struct sk_buff *skb;
1201 int hdr_len;
1202
1203 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
1204 skb = alloc_skb_fclone(size + hdr_len, gfp);
1205 if (skb) {
1206 skb->truesize += mem;
1207 if (sk_stream_wmem_schedule(sk, skb->truesize)) {
1208 skb_reserve(skb, hdr_len);
1209 return skb;
1210 }
1211 __kfree_skb(skb);
1212 } else {
1213 sk->sk_prot->enter_memory_pressure();
1214 sk_stream_moderate_sndbuf(sk);
1215 }
1216 return NULL;
1217}
1218
1219static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk,
1220 int size,
1221 gfp_t gfp)
1222{
1223 return sk_stream_alloc_pskb(sk, size, 0, gfp);
1224}
1225 1243
1226static inline struct page *sk_stream_alloc_page(struct sock *sk) 1244static inline struct page *sk_stream_alloc_page(struct sock *sk)
1227{ 1245{
@@ -1240,7 +1258,7 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
1240 */ 1258 */
1241static inline int sock_writeable(const struct sock *sk) 1259static inline int sock_writeable(const struct sock *sk)
1242{ 1260{
1243 return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); 1261 return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
1244} 1262}
1245 1263
1246static inline gfp_t gfp_any(void) 1264static inline gfp_t gfp_any(void)
@@ -1349,23 +1367,11 @@ extern int net_msg_warn;
1349 lock_sock(sk); \ 1367 lock_sock(sk); \
1350 } 1368 }
1351 1369
1352static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
1353{
1354 if (valbool)
1355 sock_set_flag(sk, bit);
1356 else
1357 sock_reset_flag(sk, bit);
1358}
1359
1360extern __u32 sysctl_wmem_max; 1370extern __u32 sysctl_wmem_max;
1361extern __u32 sysctl_rmem_max; 1371extern __u32 sysctl_rmem_max;
1362 1372
1363extern void sk_init(void); 1373extern void sk_init(void);
1364 1374
1365#ifdef CONFIG_SYSCTL
1366extern struct ctl_table core_table[];
1367#endif
1368
1369extern int sysctl_optmem_max; 1375extern int sysctl_optmem_max;
1370 1376
1371extern __u32 sysctl_wmem_default; 1377extern __u32 sysctl_wmem_default;