diff options
author | Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 2005-08-09 23:09:30 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 18:42:13 -0400 |
commit | 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 (patch) | |
tree | ddd004afe2f7c8295f6fdb94d34f78a42b5961cb /include/net/tcp.h | |
parent | 33b62231908c58ae04185e4f1063d1e35a7c8576 (diff) |
[INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets
This paves the way to generalise the rest of the sock ID lookup
routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro
kernels (where IPv6 is always built as a module):
[root@qemu ~]# grep tw_sock /proc/slabinfo
tw_sock_TCPv6 0 0 128 31 1
tw_sock_TCP 0 0 96 41 1
[root@qemu ~]#
Now if a protocol wants to use the TIME_WAIT generic infrastructure it
only has to set the sk_prot->twsk_obj_size field with the size of its
inet_timewait_sock derived sock and proto_register will create
sk_prot->twsk_slab, for now its only for INET sockets, but we can
introduce timewait_sock later if some non INET transport protocolo
wants to use this stuff.
Next changesets will take advantage of this new infrastructure to
generalise even more TCP code.
[acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size
/tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o
/tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o
[acme@toy net-2.6.14]$
Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1
(qemu host)).
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r-- | include/net/tcp.h | 202 |
1 files changed, 4 insertions, 198 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d026d81d8c8..cf8e664176ad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -38,207 +38,14 @@ | |||
38 | #include <net/ip.h> | 38 | #include <net/ip.h> |
39 | #include <net/tcp_states.h> | 39 | #include <net/tcp_states.h> |
40 | 40 | ||
41 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
42 | #include <linux/ipv6.h> | ||
43 | #endif | ||
44 | #include <linux/seq_file.h> | 41 | #include <linux/seq_file.h> |
45 | 42 | ||
46 | extern struct inet_hashinfo tcp_hashinfo; | 43 | extern struct inet_hashinfo tcp_hashinfo; |
47 | 44 | ||
48 | #if (BITS_PER_LONG == 64) | ||
49 | #define TCP_ADDRCMP_ALIGN_BYTES 8 | ||
50 | #else | ||
51 | #define TCP_ADDRCMP_ALIGN_BYTES 4 | ||
52 | #endif | ||
53 | |||
54 | /* This is a TIME_WAIT bucket. It works around the memory consumption | ||
55 | * problems of sockets in such a state on heavily loaded servers, but | ||
56 | * without violating the protocol specification. | ||
57 | */ | ||
58 | struct tcp_tw_bucket { | ||
59 | /* | ||
60 | * Now struct sock also uses sock_common, so please just | ||
61 | * don't add nothing before this first member (__tw_common) --acme | ||
62 | */ | ||
63 | struct sock_common __tw_common; | ||
64 | #define tw_family __tw_common.skc_family | ||
65 | #define tw_state __tw_common.skc_state | ||
66 | #define tw_reuse __tw_common.skc_reuse | ||
67 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | ||
68 | #define tw_node __tw_common.skc_node | ||
69 | #define tw_bind_node __tw_common.skc_bind_node | ||
70 | #define tw_refcnt __tw_common.skc_refcnt | ||
71 | volatile unsigned char tw_substate; | ||
72 | unsigned char tw_rcv_wscale; | ||
73 | __u16 tw_sport; | ||
74 | /* Socket demultiplex comparisons on incoming packets. */ | ||
75 | /* these five are in inet_sock */ | ||
76 | __u32 tw_daddr | ||
77 | __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); | ||
78 | __u32 tw_rcv_saddr; | ||
79 | __u16 tw_dport; | ||
80 | __u16 tw_num; | ||
81 | /* And these are ours. */ | ||
82 | int tw_hashent; | ||
83 | int tw_timeout; | ||
84 | __u32 tw_rcv_nxt; | ||
85 | __u32 tw_snd_nxt; | ||
86 | __u32 tw_rcv_wnd; | ||
87 | __u32 tw_ts_recent; | ||
88 | long tw_ts_recent_stamp; | ||
89 | unsigned long tw_ttd; | ||
90 | struct inet_bind_bucket *tw_tb; | ||
91 | struct hlist_node tw_death_node; | ||
92 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
93 | struct in6_addr tw_v6_daddr; | ||
94 | struct in6_addr tw_v6_rcv_saddr; | ||
95 | int tw_v6_ipv6only; | ||
96 | #endif | ||
97 | }; | ||
98 | |||
99 | static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, | ||
100 | struct hlist_head *list) | ||
101 | { | ||
102 | hlist_add_head(&tw->tw_node, list); | ||
103 | } | ||
104 | |||
105 | static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, | ||
106 | struct hlist_head *list) | ||
107 | { | ||
108 | hlist_add_head(&tw->tw_bind_node, list); | ||
109 | } | ||
110 | |||
111 | static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) | ||
112 | { | ||
113 | return tw->tw_death_node.pprev != NULL; | ||
114 | } | ||
115 | |||
116 | static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) | ||
117 | { | ||
118 | tw->tw_death_node.pprev = NULL; | ||
119 | } | ||
120 | |||
121 | static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
122 | { | ||
123 | __hlist_del(&tw->tw_death_node); | ||
124 | tw_dead_node_init(tw); | ||
125 | } | ||
126 | |||
127 | static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
128 | { | ||
129 | if (tw_dead_hashed(tw)) { | ||
130 | __tw_del_dead_node(tw); | ||
131 | return 1; | ||
132 | } | ||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | #define tw_for_each(tw, node, head) \ | ||
137 | hlist_for_each_entry(tw, node, head, tw_node) | ||
138 | |||
139 | #define tw_for_each_inmate(tw, node, jail) \ | ||
140 | hlist_for_each_entry(tw, node, jail, tw_death_node) | ||
141 | |||
142 | #define tw_for_each_inmate_safe(tw, node, safe, jail) \ | ||
143 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | ||
144 | |||
145 | #define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) | ||
146 | |||
147 | static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) | ||
148 | { | ||
149 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
150 | inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; | ||
151 | } | ||
152 | |||
153 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
154 | static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) | ||
155 | { | ||
156 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
157 | &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; | ||
158 | } | ||
159 | |||
160 | static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) | ||
161 | { | ||
162 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; | ||
163 | } | ||
164 | |||
165 | #define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) | ||
166 | |||
167 | static inline int tcp_v6_ipv6only(const struct sock *sk) | ||
168 | { | ||
169 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
170 | ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); | ||
171 | } | ||
172 | #else | ||
173 | # define __tcp_v6_rcv_saddr(__sk) NULL | ||
174 | # define tcp_v6_rcv_saddr(__sk) NULL | ||
175 | # define tcptw_sk_ipv6only(__sk) 0 | ||
176 | # define tcp_v6_ipv6only(__sk) 0 | ||
177 | #endif | ||
178 | |||
179 | extern kmem_cache_t *tcp_timewait_cachep; | ||
180 | |||
181 | static inline void tcp_tw_put(struct tcp_tw_bucket *tw) | ||
182 | { | ||
183 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | ||
184 | #ifdef SOCK_REFCNT_DEBUG | ||
185 | printk(KERN_DEBUG "tw_bucket %p released\n", tw); | ||
186 | #endif | ||
187 | kmem_cache_free(tcp_timewait_cachep, tw); | ||
188 | } | ||
189 | } | ||
190 | |||
191 | extern atomic_t tcp_orphan_count; | 45 | extern atomic_t tcp_orphan_count; |
192 | extern int tcp_tw_count; | 46 | extern int tcp_tw_count; |
193 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); | 47 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); |
194 | extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); | 48 | extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); |
195 | |||
196 | |||
197 | /* Socket demux engine toys. */ | ||
198 | #ifdef __BIG_ENDIAN | ||
199 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
200 | (((__u32)(__sport)<<16) | (__u32)(__dport)) | ||
201 | #else /* __LITTLE_ENDIAN */ | ||
202 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
203 | (((__u32)(__dport)<<16) | (__u32)(__sport)) | ||
204 | #endif | ||
205 | |||
206 | #if (BITS_PER_LONG == 64) | ||
207 | #ifdef __BIG_ENDIAN | ||
208 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
209 | __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); | ||
210 | #else /* __LITTLE_ENDIAN */ | ||
211 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
212 | __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); | ||
213 | #endif /* __BIG_ENDIAN */ | ||
214 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
215 | (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ | ||
216 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
217 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
218 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
219 | (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ | ||
220 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
221 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
222 | #else /* 32-bit arch */ | ||
223 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) | ||
224 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
225 | ((inet_sk(__sk)->daddr == (__saddr)) && \ | ||
226 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | ||
227 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
228 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
229 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
230 | ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ | ||
231 | (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ | ||
232 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
233 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
234 | #endif /* 64-bit arch */ | ||
235 | |||
236 | #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ | ||
237 | (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
238 | ((__sk)->sk_family == AF_INET6) && \ | ||
239 | ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ | ||
240 | ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ | ||
241 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
242 | 49 | ||
243 | #define MAX_TCP_HEADER (128 + MAX_HEADER) | 50 | #define MAX_TCP_HEADER (128 + MAX_HEADER) |
244 | 51 | ||
@@ -543,7 +350,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); | |||
543 | 350 | ||
544 | extern int tcp_v4_remember_stamp(struct sock *sk); | 351 | extern int tcp_v4_remember_stamp(struct sock *sk); |
545 | 352 | ||
546 | extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); | 353 | extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); |
547 | 354 | ||
548 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, | 355 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, |
549 | struct msghdr *msg, size_t size); | 356 | struct msghdr *msg, size_t size); |
@@ -616,10 +423,9 @@ enum tcp_tw_status | |||
616 | }; | 423 | }; |
617 | 424 | ||
618 | 425 | ||
619 | extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, | 426 | extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, |
620 | struct sk_buff *skb, | 427 | struct sk_buff *skb, |
621 | struct tcphdr *th, | 428 | const struct tcphdr *th); |
622 | unsigned len); | ||
623 | 429 | ||
624 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, | 430 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, |
625 | struct request_sock *req, | 431 | struct request_sock *req, |