diff options
author | Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 2005-08-09 23:09:30 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 18:42:13 -0400 |
commit | 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 (patch) | |
tree | ddd004afe2f7c8295f6fdb94d34f78a42b5961cb /include | |
parent | 33b62231908c58ae04185e4f1063d1e35a7c8576 (diff) |
[INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets
This paves the way to generalise the rest of the sock ID lookup
routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro
kernels (where IPv6 is always built as a module):
[root@qemu ~]# grep tw_sock /proc/slabinfo
tw_sock_TCPv6 0 0 128 31 1
tw_sock_TCP 0 0 96 41 1
[root@qemu ~]#
Now if a protocol wants to use the TIME_WAIT generic infrastructure it
only has to set the sk_prot->twsk_obj_size field with the size of its
inet_timewait_sock derived sock and proto_register will create
sk_prot->twsk_slab, for now its only for INET sockets, but we can
introduce timewait_sock later if some non INET transport protocolo
wants to use this stuff.
Next changesets will take advantage of this new infrastructure to
generalise even more TCP code.
[acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size
/tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o
/tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o
[acme@toy net-2.6.14]$
Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1
(qemu host)).
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/ipv6.h | 52 | ||||
-rw-r--r-- | include/linux/tcp.h | 15 | ||||
-rw-r--r-- | include/net/inet_hashtables.h | 41 | ||||
-rw-r--r-- | include/net/inet_timewait_sock.h | 142 | ||||
-rw-r--r-- | include/net/sock.h | 17 | ||||
-rw-r--r-- | include/net/tcp.h | 202 |
6 files changed, 262 insertions, 207 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6fcd6a0ade24..98fa32316e40 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h | |||
@@ -308,6 +308,41 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, | |||
308 | 308 | ||
309 | #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) | 309 | #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) |
310 | #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) | 310 | #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) |
311 | |||
312 | #include <linux/tcp.h> | ||
313 | |||
314 | struct tcp6_timewait_sock { | ||
315 | struct tcp_timewait_sock tw_v6_sk; | ||
316 | struct in6_addr tw_v6_daddr; | ||
317 | struct in6_addr tw_v6_rcv_saddr; | ||
318 | }; | ||
319 | |||
320 | static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) | ||
321 | { | ||
322 | return (struct tcp6_timewait_sock *)sk; | ||
323 | } | ||
324 | |||
325 | static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) | ||
326 | { | ||
327 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
328 | &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; | ||
329 | } | ||
330 | |||
331 | static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) | ||
332 | { | ||
333 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; | ||
334 | } | ||
335 | |||
336 | static inline int tcp_twsk_ipv6only(const struct sock *sk) | ||
337 | { | ||
338 | return inet_twsk(sk)->tw_ipv6only; | ||
339 | } | ||
340 | |||
341 | static inline int tcp_v6_ipv6only(const struct sock *sk) | ||
342 | { | ||
343 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
344 | ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); | ||
345 | } | ||
311 | #else | 346 | #else |
312 | #define __ipv6_only_sock(sk) 0 | 347 | #define __ipv6_only_sock(sk) 0 |
313 | #define ipv6_only_sock(sk) 0 | 348 | #define ipv6_only_sock(sk) 0 |
@@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) | |||
322 | return NULL; | 357 | return NULL; |
323 | } | 358 | } |
324 | 359 | ||
325 | #endif | 360 | #define __tcp_v6_rcv_saddr(__sk) NULL |
361 | #define tcp_v6_rcv_saddr(__sk) NULL | ||
362 | #define tcp_twsk_ipv6only(__sk) 0 | ||
363 | #define tcp_v6_ipv6only(__sk) 0 | ||
364 | #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ | ||
326 | 365 | ||
327 | #endif | 366 | #define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ |
367 | (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
368 | ((__sk)->sk_family == AF_INET6) && \ | ||
369 | ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ | ||
370 | ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ | ||
371 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
328 | 372 | ||
329 | #endif | 373 | #endif /* __KERNEL__ */ |
374 | |||
375 | #endif /* _IPV6_H */ | ||
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b88fe05fdcbf..5d295b1b3de7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -179,6 +179,7 @@ struct tcp_info | |||
179 | #include <linux/skbuff.h> | 179 | #include <linux/skbuff.h> |
180 | #include <linux/ip.h> | 180 | #include <linux/ip.h> |
181 | #include <net/sock.h> | 181 | #include <net/sock.h> |
182 | #include <net/inet_timewait_sock.h> | ||
182 | 183 | ||
183 | /* This defines a selective acknowledgement block. */ | 184 | /* This defines a selective acknowledgement block. */ |
184 | struct tcp_sack_block { | 185 | struct tcp_sack_block { |
@@ -387,6 +388,20 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) | |||
387 | return (struct tcp_sock *)sk; | 388 | return (struct tcp_sock *)sk; |
388 | } | 389 | } |
389 | 390 | ||
391 | struct tcp_timewait_sock { | ||
392 | struct inet_timewait_sock tw_sk; | ||
393 | __u32 tw_rcv_nxt; | ||
394 | __u32 tw_snd_nxt; | ||
395 | __u32 tw_rcv_wnd; | ||
396 | __u32 tw_ts_recent; | ||
397 | long tw_ts_recent_stamp; | ||
398 | }; | ||
399 | |||
400 | static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) | ||
401 | { | ||
402 | return (struct tcp_timewait_sock *)sk; | ||
403 | } | ||
404 | |||
390 | static inline void *tcp_ca(const struct tcp_sock *tp) | 405 | static inline void *tcp_ca(const struct tcp_sock *tp) |
391 | { | 406 | { |
392 | return (void *) tp->ca_priv; | 407 | return (void *) tp->ca_priv; |
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1c4fa0065a8e..c38c637e0734 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #ifndef _INET_HASHTABLES_H | 14 | #ifndef _INET_HASHTABLES_H |
15 | #define _INET_HASHTABLES_H | 15 | #define _INET_HASHTABLES_H |
16 | 16 | ||
17 | #include <linux/config.h> | ||
18 | |||
17 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
18 | #include <linux/ip.h> | 20 | #include <linux/ip.h> |
19 | #include <linux/ipv6.h> | 21 | #include <linux/ipv6.h> |
@@ -310,4 +312,43 @@ sherry_cache: | |||
310 | read_unlock(&hashinfo->lhash_lock); | 312 | read_unlock(&hashinfo->lhash_lock); |
311 | return sk; | 313 | return sk; |
312 | } | 314 | } |
315 | |||
316 | /* Socket demux engine toys. */ | ||
317 | #ifdef __BIG_ENDIAN | ||
318 | #define INET_COMBINED_PORTS(__sport, __dport) \ | ||
319 | (((__u32)(__sport) << 16) | (__u32)(__dport)) | ||
320 | #else /* __LITTLE_ENDIAN */ | ||
321 | #define INET_COMBINED_PORTS(__sport, __dport) \ | ||
322 | (((__u32)(__dport) << 16) | (__u32)(__sport)) | ||
323 | #endif | ||
324 | |||
325 | #if (BITS_PER_LONG == 64) | ||
326 | #ifdef __BIG_ENDIAN | ||
327 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
328 | const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); | ||
329 | #else /* __LITTLE_ENDIAN */ | ||
330 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
331 | const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); | ||
332 | #endif /* __BIG_ENDIAN */ | ||
333 | #define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
334 | (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ | ||
335 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
336 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
337 | #define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
338 | (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ | ||
339 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | ||
340 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
341 | #else /* 32-bit arch */ | ||
342 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) | ||
343 | #define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ | ||
344 | ((inet_sk(__sk)->daddr == (__saddr)) && \ | ||
345 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | ||
346 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
347 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
348 | #define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ | ||
349 | ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ | ||
350 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ | ||
351 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | ||
352 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
353 | #endif /* 64-bit arch */ | ||
313 | #endif /* _INET_HASHTABLES_H */ | 354 | #endif /* _INET_HASHTABLES_H */ |
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 000000000000..ce117048f2fd --- /dev/null +++ b/include/net/inet_timewait_sock.h | |||
@@ -0,0 +1,142 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Definitions for a generic INET TIMEWAIT sock | ||
7 | * | ||
8 | * From code originally in net/tcp.h | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | #ifndef _INET_TIMEWAIT_SOCK_ | ||
16 | #define _INET_TIMEWAIT_SOCK_ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | |||
20 | #include <linux/list.h> | ||
21 | #include <linux/types.h> | ||
22 | |||
23 | #include <net/sock.h> | ||
24 | #include <net/tcp_states.h> | ||
25 | |||
26 | #include <asm/atomic.h> | ||
27 | |||
28 | #if (BITS_PER_LONG == 64) | ||
29 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 | ||
30 | #else | ||
31 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 | ||
32 | #endif | ||
33 | |||
34 | struct inet_bind_bucket; | ||
35 | |||
36 | /* | ||
37 | * This is a TIME_WAIT sock. It works around the memory consumption | ||
38 | * problems of sockets in such a state on heavily loaded servers, but | ||
39 | * without violating the protocol specification. | ||
40 | */ | ||
41 | struct inet_timewait_sock { | ||
42 | /* | ||
43 | * Now struct sock also uses sock_common, so please just | ||
44 | * don't add nothing before this first member (__tw_common) --acme | ||
45 | */ | ||
46 | struct sock_common __tw_common; | ||
47 | #define tw_family __tw_common.skc_family | ||
48 | #define tw_state __tw_common.skc_state | ||
49 | #define tw_reuse __tw_common.skc_reuse | ||
50 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | ||
51 | #define tw_node __tw_common.skc_node | ||
52 | #define tw_bind_node __tw_common.skc_bind_node | ||
53 | #define tw_refcnt __tw_common.skc_refcnt | ||
54 | #define tw_prot __tw_common.skc_prot | ||
55 | volatile unsigned char tw_substate; | ||
56 | /* 3 bits hole, try to pack */ | ||
57 | unsigned char tw_rcv_wscale; | ||
58 | /* Socket demultiplex comparisons on incoming packets. */ | ||
59 | /* these five are in inet_sock */ | ||
60 | __u16 tw_sport; | ||
61 | __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); | ||
62 | __u32 tw_rcv_saddr; | ||
63 | __u16 tw_dport; | ||
64 | __u16 tw_num; | ||
65 | /* And these are ours. */ | ||
66 | __u8 tw_ipv6only:1; | ||
67 | /* 31 bits hole, try to pack */ | ||
68 | int tw_hashent; | ||
69 | int tw_timeout; | ||
70 | unsigned long tw_ttd; | ||
71 | struct inet_bind_bucket *tw_tb; | ||
72 | struct hlist_node tw_death_node; | ||
73 | }; | ||
74 | |||
75 | static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, | ||
76 | struct hlist_head *list) | ||
77 | { | ||
78 | hlist_add_head(&tw->tw_node, list); | ||
79 | } | ||
80 | |||
81 | static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, | ||
82 | struct hlist_head *list) | ||
83 | { | ||
84 | hlist_add_head(&tw->tw_bind_node, list); | ||
85 | } | ||
86 | |||
87 | static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) | ||
88 | { | ||
89 | return tw->tw_death_node.pprev != NULL; | ||
90 | } | ||
91 | |||
92 | static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) | ||
93 | { | ||
94 | tw->tw_death_node.pprev = NULL; | ||
95 | } | ||
96 | |||
97 | static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | ||
98 | { | ||
99 | __hlist_del(&tw->tw_death_node); | ||
100 | inet_twsk_dead_node_init(tw); | ||
101 | } | ||
102 | |||
103 | static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | ||
104 | { | ||
105 | if (inet_twsk_dead_hashed(tw)) { | ||
106 | __inet_twsk_del_dead_node(tw); | ||
107 | return 1; | ||
108 | } | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | #define inet_twsk_for_each(tw, node, head) \ | ||
113 | hlist_for_each_entry(tw, node, head, tw_node) | ||
114 | |||
115 | #define inet_twsk_for_each_inmate(tw, node, jail) \ | ||
116 | hlist_for_each_entry(tw, node, jail, tw_death_node) | ||
117 | |||
118 | #define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ | ||
119 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | ||
120 | |||
121 | static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) | ||
122 | { | ||
123 | return (struct inet_timewait_sock *)sk; | ||
124 | } | ||
125 | |||
126 | static inline u32 inet_rcv_saddr(const struct sock *sk) | ||
127 | { | ||
128 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
129 | inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; | ||
130 | } | ||
131 | |||
132 | static inline void inet_twsk_put(struct inet_timewait_sock *tw) | ||
133 | { | ||
134 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | ||
135 | #ifdef SOCK_REFCNT_DEBUG | ||
136 | printk(KERN_DEBUG "%s timewait_sock %p released\n", | ||
137 | tw->tw_prot->name, tw); | ||
138 | #endif | ||
139 | kmem_cache_free(tw->tw_prot->twsk_slab, tw); | ||
140 | } | ||
141 | } | ||
142 | #endif /* _INET_TIMEWAIT_SOCK_ */ | ||
diff --git a/include/net/sock.h b/include/net/sock.h index 391d00b5b7b4..c902c57bf2b7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ | |||
88 | } while(0) | 88 | } while(0) |
89 | 89 | ||
90 | struct sock; | 90 | struct sock; |
91 | struct proto; | ||
91 | 92 | ||
92 | /** | 93 | /** |
93 | * struct sock_common - minimal network layer representation of sockets | 94 | * struct sock_common - minimal network layer representation of sockets |
@@ -98,10 +99,11 @@ struct sock; | |||
98 | * @skc_node: main hash linkage for various protocol lookup tables | 99 | * @skc_node: main hash linkage for various protocol lookup tables |
99 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 100 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
100 | * @skc_refcnt: reference count | 101 | * @skc_refcnt: reference count |
102 | * @skc_prot: protocol handlers inside a network family | ||
101 | * | 103 | * |
102 | * This is the minimal network layer representation of sockets, the header | 104 | * This is the minimal network layer representation of sockets, the header |
103 | * for struct sock and struct tcp_tw_bucket. | 105 | * for struct sock and struct inet_timewait_sock. |
104 | */ | 106 | */ |
105 | struct sock_common { | 107 | struct sock_common { |
106 | unsigned short skc_family; | 108 | unsigned short skc_family; |
107 | volatile unsigned char skc_state; | 109 | volatile unsigned char skc_state; |
@@ -110,11 +112,12 @@ struct sock_common { | |||
110 | struct hlist_node skc_node; | 112 | struct hlist_node skc_node; |
111 | struct hlist_node skc_bind_node; | 113 | struct hlist_node skc_bind_node; |
112 | atomic_t skc_refcnt; | 114 | atomic_t skc_refcnt; |
115 | struct proto *skc_prot; | ||
113 | }; | 116 | }; |
114 | 117 | ||
115 | /** | 118 | /** |
116 | * struct sock - network layer representation of sockets | 119 | * struct sock - network layer representation of sockets |
117 | * @__sk_common: shared layout with tcp_tw_bucket | 120 | * @__sk_common: shared layout with inet_timewait_sock |
118 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN | 121 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN |
119 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 122 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
120 | * @sk_lock: synchronizer | 123 | * @sk_lock: synchronizer |
@@ -140,7 +143,6 @@ struct sock_common { | |||
140 | * @sk_backlog: always used with the per-socket spinlock held | 143 | * @sk_backlog: always used with the per-socket spinlock held |
141 | * @sk_callback_lock: used with the callbacks in the end of this struct | 144 | * @sk_callback_lock: used with the callbacks in the end of this struct |
142 | * @sk_error_queue: rarely used | 145 | * @sk_error_queue: rarely used |
143 | * @sk_prot: protocol handlers inside a network family | ||
144 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) | 146 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) |
145 | * @sk_err: last error | 147 | * @sk_err: last error |
146 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' | 148 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' |
@@ -173,7 +175,7 @@ struct sock_common { | |||
173 | */ | 175 | */ |
174 | struct sock { | 176 | struct sock { |
175 | /* | 177 | /* |
176 | * Now struct tcp_tw_bucket also uses sock_common, so please just | 178 | * Now struct inet_timewait_sock also uses sock_common, so please just |
177 | * don't add nothing before this first member (__sk_common) --acme | 179 | * don't add nothing before this first member (__sk_common) --acme |
178 | */ | 180 | */ |
179 | struct sock_common __sk_common; | 181 | struct sock_common __sk_common; |
@@ -184,6 +186,7 @@ struct sock { | |||
184 | #define sk_node __sk_common.skc_node | 186 | #define sk_node __sk_common.skc_node |
185 | #define sk_bind_node __sk_common.skc_bind_node | 187 | #define sk_bind_node __sk_common.skc_bind_node |
186 | #define sk_refcnt __sk_common.skc_refcnt | 188 | #define sk_refcnt __sk_common.skc_refcnt |
189 | #define sk_prot __sk_common.skc_prot | ||
187 | unsigned char sk_shutdown : 2, | 190 | unsigned char sk_shutdown : 2, |
188 | sk_no_check : 2, | 191 | sk_no_check : 2, |
189 | sk_userlocks : 4; | 192 | sk_userlocks : 4; |
@@ -218,7 +221,6 @@ struct sock { | |||
218 | struct sk_buff *tail; | 221 | struct sk_buff *tail; |
219 | } sk_backlog; | 222 | } sk_backlog; |
220 | struct sk_buff_head sk_error_queue; | 223 | struct sk_buff_head sk_error_queue; |
221 | struct proto *sk_prot; | ||
222 | struct proto *sk_prot_creator; | 224 | struct proto *sk_prot_creator; |
223 | rwlock_t sk_callback_lock; | 225 | rwlock_t sk_callback_lock; |
224 | int sk_err, | 226 | int sk_err, |
@@ -557,6 +559,9 @@ struct proto { | |||
557 | kmem_cache_t *slab; | 559 | kmem_cache_t *slab; |
558 | unsigned int obj_size; | 560 | unsigned int obj_size; |
559 | 561 | ||
562 | kmem_cache_t *twsk_slab; | ||
563 | unsigned int twsk_obj_size; | ||
564 | |||
560 | struct request_sock_ops *rsk_prot; | 565 | struct request_sock_ops *rsk_prot; |
561 | 566 | ||
562 | struct module *owner; | 567 | struct module *owner; |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d026d81d8c8..cf8e664176ad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -38,207 +38,14 @@ | |||
38 | #include <net/ip.h> | 38 | #include <net/ip.h> |
39 | #include <net/tcp_states.h> | 39 | #include <net/tcp_states.h> |
40 | 40 | ||
41 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
42 | #include <linux/ipv6.h> | ||
43 | #endif | ||
44 | #include <linux/seq_file.h> | 41 | #include <linux/seq_file.h> |
45 | 42 | ||
46 | extern struct inet_hashinfo tcp_hashinfo; | 43 | extern struct inet_hashinfo tcp_hashinfo; |
47 | 44 | ||
48 | #if (BITS_PER_LONG == 64) | ||
49 | #define TCP_ADDRCMP_ALIGN_BYTES 8 | ||
50 | #else | ||
51 | #define TCP_ADDRCMP_ALIGN_BYTES 4 | ||
52 | #endif | ||
53 | |||
54 | /* This is a TIME_WAIT bucket. It works around the memory consumption | ||
55 | * problems of sockets in such a state on heavily loaded servers, but | ||
56 | * without violating the protocol specification. | ||
57 | */ | ||
58 | struct tcp_tw_bucket { | ||
59 | /* | ||
60 | * Now struct sock also uses sock_common, so please just | ||
61 | * don't add nothing before this first member (__tw_common) --acme | ||
62 | */ | ||
63 | struct sock_common __tw_common; | ||
64 | #define tw_family __tw_common.skc_family | ||
65 | #define tw_state __tw_common.skc_state | ||
66 | #define tw_reuse __tw_common.skc_reuse | ||
67 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | ||
68 | #define tw_node __tw_common.skc_node | ||
69 | #define tw_bind_node __tw_common.skc_bind_node | ||
70 | #define tw_refcnt __tw_common.skc_refcnt | ||
71 | volatile unsigned char tw_substate; | ||
72 | unsigned char tw_rcv_wscale; | ||
73 | __u16 tw_sport; | ||
74 | /* Socket demultiplex comparisons on incoming packets. */ | ||
75 | /* these five are in inet_sock */ | ||
76 | __u32 tw_daddr | ||
77 | __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); | ||
78 | __u32 tw_rcv_saddr; | ||
79 | __u16 tw_dport; | ||
80 | __u16 tw_num; | ||
81 | /* And these are ours. */ | ||
82 | int tw_hashent; | ||
83 | int tw_timeout; | ||
84 | __u32 tw_rcv_nxt; | ||
85 | __u32 tw_snd_nxt; | ||
86 | __u32 tw_rcv_wnd; | ||
87 | __u32 tw_ts_recent; | ||
88 | long tw_ts_recent_stamp; | ||
89 | unsigned long tw_ttd; | ||
90 | struct inet_bind_bucket *tw_tb; | ||
91 | struct hlist_node tw_death_node; | ||
92 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
93 | struct in6_addr tw_v6_daddr; | ||
94 | struct in6_addr tw_v6_rcv_saddr; | ||
95 | int tw_v6_ipv6only; | ||
96 | #endif | ||
97 | }; | ||
98 | |||
99 | static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, | ||
100 | struct hlist_head *list) | ||
101 | { | ||
102 | hlist_add_head(&tw->tw_node, list); | ||
103 | } | ||
104 | |||
105 | static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, | ||
106 | struct hlist_head *list) | ||
107 | { | ||
108 | hlist_add_head(&tw->tw_bind_node, list); | ||
109 | } | ||
110 | |||
111 | static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) | ||
112 | { | ||
113 | return tw->tw_death_node.pprev != NULL; | ||
114 | } | ||
115 | |||
116 | static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) | ||
117 | { | ||
118 | tw->tw_death_node.pprev = NULL; | ||
119 | } | ||
120 | |||
121 | static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
122 | { | ||
123 | __hlist_del(&tw->tw_death_node); | ||
124 | tw_dead_node_init(tw); | ||
125 | } | ||
126 | |||
127 | static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
128 | { | ||
129 | if (tw_dead_hashed(tw)) { | ||
130 | __tw_del_dead_node(tw); | ||
131 | return 1; | ||
132 | } | ||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | #define tw_for_each(tw, node, head) \ | ||
137 | hlist_for_each_entry(tw, node, head, tw_node) | ||
138 | |||
139 | #define tw_for_each_inmate(tw, node, jail) \ | ||
140 | hlist_for_each_entry(tw, node, jail, tw_death_node) | ||
141 | |||
142 | #define tw_for_each_inmate_safe(tw, node, safe, jail) \ | ||
143 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | ||
144 | |||
145 | #define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) | ||
146 | |||
147 | static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) | ||
148 | { | ||
149 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
150 | inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; | ||
151 | } | ||
152 | |||
153 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
154 | static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) | ||
155 | { | ||
156 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
157 | &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; | ||
158 | } | ||
159 | |||
160 | static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) | ||
161 | { | ||
162 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; | ||
163 | } | ||
164 | |||
165 | #define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) | ||
166 | |||
167 | static inline int tcp_v6_ipv6only(const struct sock *sk) | ||
168 | { | ||
169 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
170 | ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); | ||
171 | } | ||
172 | #else | ||
173 | # define __tcp_v6_rcv_saddr(__sk) NULL | ||
174 | # define tcp_v6_rcv_saddr(__sk) NULL | ||
175 | # define tcptw_sk_ipv6only(__sk) 0 | ||
176 | # define tcp_v6_ipv6only(__sk) 0 | ||
177 | #endif | ||
178 | |||
179 | extern kmem_cache_t *tcp_timewait_cachep; | ||
180 | |||
181 | static inline void tcp_tw_put(struct tcp_tw_bucket *tw) | ||
182 | { | ||
183 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | ||
184 | #ifdef SOCK_REFCNT_DEBUG | ||
185 | printk(KERN_DEBUG "tw_bucket %p released\n", tw); | ||
186 | #endif | ||
187 | kmem_cache_free(tcp_timewait_cachep, tw); | ||
188 | } | ||
189 | } | ||
190 | |||
191 | extern atomic_t tcp_orphan_count; | 45 | extern atomic_t tcp_orphan_count; |
192 | extern int tcp_tw_count; | 46 | extern int tcp_tw_count; |
193 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); | 47 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); |
194 | extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); | 48 | extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); |
195 | |||
196 | |||
197 | /* Socket demux engine toys. */ | ||
198 | #ifdef __BIG_ENDIAN | ||
199 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
200 | (((__u32)(__sport)<<16) | (__u32)(__dport)) | ||
201 | #else /* __LITTLE_ENDIAN */ | ||
202 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
203 | (((__u32)(__dport)<<16) | (__u32)(__sport)) | ||
204 | #endif | ||
205 | |||
206 | #if (BITS_PER_LONG == 64) | ||
207 | #ifdef __BIG_ENDIAN | ||
208 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
209 | __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); | ||
210 | #else /* __LITTLE_ENDIAN */ | ||
211 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
212 | __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); | ||
213 | #endif /* __BIG_ENDIAN */ | ||
214 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
215 | (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ | ||
216 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
217 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
218 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
219 | (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ | ||
220 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
221 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
222 | #else /* 32-bit arch */ | ||
223 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) | ||
224 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
225 | ((inet_sk(__sk)->daddr == (__saddr)) && \ | ||
226 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | ||
227 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
228 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
229 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
230 | ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ | ||
231 | (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ | ||
232 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
233 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
234 | #endif /* 64-bit arch */ | ||
235 | |||
236 | #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ | ||
237 | (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
238 | ((__sk)->sk_family == AF_INET6) && \ | ||
239 | ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ | ||
240 | ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ | ||
241 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
242 | 49 | ||
243 | #define MAX_TCP_HEADER (128 + MAX_HEADER) | 50 | #define MAX_TCP_HEADER (128 + MAX_HEADER) |
244 | 51 | ||
@@ -543,7 +350,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); | |||
543 | 350 | ||
544 | extern int tcp_v4_remember_stamp(struct sock *sk); | 351 | extern int tcp_v4_remember_stamp(struct sock *sk); |
545 | 352 | ||
546 | extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); | 353 | extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); |
547 | 354 | ||
548 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, | 355 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, |
549 | struct msghdr *msg, size_t size); | 356 | struct msghdr *msg, size_t size); |
@@ -616,10 +423,9 @@ enum tcp_tw_status | |||
616 | }; | 423 | }; |
617 | 424 | ||
618 | 425 | ||
619 | extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, | 426 | extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, |
620 | struct sk_buff *skb, | 427 | struct sk_buff *skb, |
621 | struct tcphdr *th, | 428 | const struct tcphdr *th); |
622 | unsigned len); | ||
623 | 429 | ||
624 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, | 430 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, |
625 | struct request_sock *req, | 431 | struct request_sock *req, |