diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 944 |
1 files changed, 163 insertions, 781 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 67c670886c1f..13dfb391cdf1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -64,7 +64,9 @@ | |||
64 | #include <linux/times.h> | 64 | #include <linux/times.h> |
65 | 65 | ||
66 | #include <net/icmp.h> | 66 | #include <net/icmp.h> |
67 | #include <net/inet_hashtables.h> | ||
67 | #include <net/tcp.h> | 68 | #include <net/tcp.h> |
69 | #include <net/transp_v6.h> | ||
68 | #include <net/ipv6.h> | 70 | #include <net/ipv6.h> |
69 | #include <net/inet_common.h> | 71 | #include <net/inet_common.h> |
70 | #include <net/xfrm.h> | 72 | #include <net/xfrm.h> |
@@ -75,7 +77,6 @@ | |||
75 | #include <linux/proc_fs.h> | 77 | #include <linux/proc_fs.h> |
76 | #include <linux/seq_file.h> | 78 | #include <linux/seq_file.h> |
77 | 79 | ||
78 | extern int sysctl_ip_dynaddr; | ||
79 | int sysctl_tcp_tw_reuse; | 80 | int sysctl_tcp_tw_reuse; |
80 | int sysctl_tcp_low_latency; | 81 | int sysctl_tcp_low_latency; |
81 | 82 | ||
@@ -88,463 +89,29 @@ static struct socket *tcp_socket; | |||
88 | void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, | 89 | void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, |
89 | struct sk_buff *skb); | 90 | struct sk_buff *skb); |
90 | 91 | ||
91 | struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { | 92 | struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { |
92 | .__tcp_lhash_lock = RW_LOCK_UNLOCKED, | 93 | .lhash_lock = RW_LOCK_UNLOCKED, |
93 | .__tcp_lhash_users = ATOMIC_INIT(0), | 94 | .lhash_users = ATOMIC_INIT(0), |
94 | .__tcp_lhash_wait | 95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), |
95 | = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait), | 96 | .portalloc_lock = SPIN_LOCK_UNLOCKED, |
96 | .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED | 97 | .port_rover = 1024 - 1, |
97 | }; | 98 | }; |
98 | 99 | ||
99 | /* | ||
100 | * This array holds the first and last local port number. | ||
101 | * For high-usage systems, use sysctl to change this to | ||
102 | * 32768-61000 | ||
103 | */ | ||
104 | int sysctl_local_port_range[2] = { 1024, 4999 }; | ||
105 | int tcp_port_rover = 1024 - 1; | ||
106 | |||
107 | static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, | ||
108 | __u32 faddr, __u16 fport) | ||
109 | { | ||
110 | int h = (laddr ^ lport) ^ (faddr ^ fport); | ||
111 | h ^= h >> 16; | ||
112 | h ^= h >> 8; | ||
113 | return h & (tcp_ehash_size - 1); | ||
114 | } | ||
115 | |||
116 | static __inline__ int tcp_sk_hashfn(struct sock *sk) | ||
117 | { | ||
118 | struct inet_sock *inet = inet_sk(sk); | ||
119 | __u32 laddr = inet->rcv_saddr; | ||
120 | __u16 lport = inet->num; | ||
121 | __u32 faddr = inet->daddr; | ||
122 | __u16 fport = inet->dport; | ||
123 | |||
124 | return tcp_hashfn(laddr, lport, faddr, fport); | ||
125 | } | ||
126 | |||
127 | /* Allocate and initialize a new TCP local port bind bucket. | ||
128 | * The bindhash mutex for snum's hash chain must be held here. | ||
129 | */ | ||
130 | struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, | ||
131 | unsigned short snum) | ||
132 | { | ||
133 | struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep, | ||
134 | SLAB_ATOMIC); | ||
135 | if (tb) { | ||
136 | tb->port = snum; | ||
137 | tb->fastreuse = 0; | ||
138 | INIT_HLIST_HEAD(&tb->owners); | ||
139 | hlist_add_head(&tb->node, &head->chain); | ||
140 | } | ||
141 | return tb; | ||
142 | } | ||
143 | |||
144 | /* Caller must hold hashbucket lock for this tb with local BH disabled */ | ||
145 | void tcp_bucket_destroy(struct tcp_bind_bucket *tb) | ||
146 | { | ||
147 | if (hlist_empty(&tb->owners)) { | ||
148 | __hlist_del(&tb->node); | ||
149 | kmem_cache_free(tcp_bucket_cachep, tb); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | /* Caller must disable local BH processing. */ | ||
154 | static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) | ||
155 | { | ||
156 | struct tcp_bind_hashbucket *head = | ||
157 | &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)]; | ||
158 | struct tcp_bind_bucket *tb; | ||
159 | |||
160 | spin_lock(&head->lock); | ||
161 | tb = tcp_sk(sk)->bind_hash; | ||
162 | sk_add_bind_node(child, &tb->owners); | ||
163 | tcp_sk(child)->bind_hash = tb; | ||
164 | spin_unlock(&head->lock); | ||
165 | } | ||
166 | |||
167 | inline void tcp_inherit_port(struct sock *sk, struct sock *child) | ||
168 | { | ||
169 | local_bh_disable(); | ||
170 | __tcp_inherit_port(sk, child); | ||
171 | local_bh_enable(); | ||
172 | } | ||
173 | |||
174 | void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, | ||
175 | unsigned short snum) | ||
176 | { | ||
177 | inet_sk(sk)->num = snum; | ||
178 | sk_add_bind_node(sk, &tb->owners); | ||
179 | tcp_sk(sk)->bind_hash = tb; | ||
180 | } | ||
181 | |||
182 | static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) | ||
183 | { | ||
184 | const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); | ||
185 | struct sock *sk2; | ||
186 | struct hlist_node *node; | ||
187 | int reuse = sk->sk_reuse; | ||
188 | |||
189 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
190 | if (sk != sk2 && | ||
191 | !tcp_v6_ipv6only(sk2) && | ||
192 | (!sk->sk_bound_dev_if || | ||
193 | !sk2->sk_bound_dev_if || | ||
194 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | ||
195 | if (!reuse || !sk2->sk_reuse || | ||
196 | sk2->sk_state == TCP_LISTEN) { | ||
197 | const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); | ||
198 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | ||
199 | sk2_rcv_saddr == sk_rcv_saddr) | ||
200 | break; | ||
201 | } | ||
202 | } | ||
203 | } | ||
204 | return node != NULL; | ||
205 | } | ||
206 | |||
207 | /* Obtain a reference to a local port for the given sock, | ||
208 | * if snum is zero it means select any available local port. | ||
209 | */ | ||
210 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 100 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) |
211 | { | 101 | { |
212 | struct tcp_bind_hashbucket *head; | 102 | return inet_csk_get_port(&tcp_hashinfo, sk, snum); |
213 | struct hlist_node *node; | ||
214 | struct tcp_bind_bucket *tb; | ||
215 | int ret; | ||
216 | |||
217 | local_bh_disable(); | ||
218 | if (!snum) { | ||
219 | int low = sysctl_local_port_range[0]; | ||
220 | int high = sysctl_local_port_range[1]; | ||
221 | int remaining = (high - low) + 1; | ||
222 | int rover; | ||
223 | |||
224 | spin_lock(&tcp_portalloc_lock); | ||
225 | if (tcp_port_rover < low) | ||
226 | rover = low; | ||
227 | else | ||
228 | rover = tcp_port_rover; | ||
229 | do { | ||
230 | rover++; | ||
231 | if (rover > high) | ||
232 | rover = low; | ||
233 | head = &tcp_bhash[tcp_bhashfn(rover)]; | ||
234 | spin_lock(&head->lock); | ||
235 | tb_for_each(tb, node, &head->chain) | ||
236 | if (tb->port == rover) | ||
237 | goto next; | ||
238 | break; | ||
239 | next: | ||
240 | spin_unlock(&head->lock); | ||
241 | } while (--remaining > 0); | ||
242 | tcp_port_rover = rover; | ||
243 | spin_unlock(&tcp_portalloc_lock); | ||
244 | |||
245 | /* Exhausted local port range during search? It is not | ||
246 | * possible for us to be holding one of the bind hash | ||
247 | * locks if this test triggers, because if 'remaining' | ||
248 | * drops to zero, we broke out of the do/while loop at | ||
249 | * the top level, not from the 'break;' statement. | ||
250 | */ | ||
251 | ret = 1; | ||
252 | if (unlikely(remaining <= 0)) | ||
253 | goto fail; | ||
254 | |||
255 | /* OK, here is the one we will use. HEAD is | ||
256 | * non-NULL and we hold it's mutex. | ||
257 | */ | ||
258 | snum = rover; | ||
259 | } else { | ||
260 | head = &tcp_bhash[tcp_bhashfn(snum)]; | ||
261 | spin_lock(&head->lock); | ||
262 | tb_for_each(tb, node, &head->chain) | ||
263 | if (tb->port == snum) | ||
264 | goto tb_found; | ||
265 | } | ||
266 | tb = NULL; | ||
267 | goto tb_not_found; | ||
268 | tb_found: | ||
269 | if (!hlist_empty(&tb->owners)) { | ||
270 | if (sk->sk_reuse > 1) | ||
271 | goto success; | ||
272 | if (tb->fastreuse > 0 && | ||
273 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | ||
274 | goto success; | ||
275 | } else { | ||
276 | ret = 1; | ||
277 | if (tcp_bind_conflict(sk, tb)) | ||
278 | goto fail_unlock; | ||
279 | } | ||
280 | } | ||
281 | tb_not_found: | ||
282 | ret = 1; | ||
283 | if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) | ||
284 | goto fail_unlock; | ||
285 | if (hlist_empty(&tb->owners)) { | ||
286 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | ||
287 | tb->fastreuse = 1; | ||
288 | else | ||
289 | tb->fastreuse = 0; | ||
290 | } else if (tb->fastreuse && | ||
291 | (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) | ||
292 | tb->fastreuse = 0; | ||
293 | success: | ||
294 | if (!tcp_sk(sk)->bind_hash) | ||
295 | tcp_bind_hash(sk, tb, snum); | ||
296 | BUG_TRAP(tcp_sk(sk)->bind_hash == tb); | ||
297 | ret = 0; | ||
298 | |||
299 | fail_unlock: | ||
300 | spin_unlock(&head->lock); | ||
301 | fail: | ||
302 | local_bh_enable(); | ||
303 | return ret; | ||
304 | } | ||
305 | |||
306 | /* Get rid of any references to a local port held by the | ||
307 | * given sock. | ||
308 | */ | ||
309 | static void __tcp_put_port(struct sock *sk) | ||
310 | { | ||
311 | struct inet_sock *inet = inet_sk(sk); | ||
312 | struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)]; | ||
313 | struct tcp_bind_bucket *tb; | ||
314 | |||
315 | spin_lock(&head->lock); | ||
316 | tb = tcp_sk(sk)->bind_hash; | ||
317 | __sk_del_bind_node(sk); | ||
318 | tcp_sk(sk)->bind_hash = NULL; | ||
319 | inet->num = 0; | ||
320 | tcp_bucket_destroy(tb); | ||
321 | spin_unlock(&head->lock); | ||
322 | } | ||
323 | |||
324 | void tcp_put_port(struct sock *sk) | ||
325 | { | ||
326 | local_bh_disable(); | ||
327 | __tcp_put_port(sk); | ||
328 | local_bh_enable(); | ||
329 | } | ||
330 | |||
331 | /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | ||
332 | * Look, when several writers sleep and reader wakes them up, all but one | ||
333 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | ||
334 | * this, _but_ remember, it adds useless work on UP machines (wake up each | ||
335 | * exclusive lock release). It should be ifdefed really. | ||
336 | */ | ||
337 | |||
338 | void tcp_listen_wlock(void) | ||
339 | { | ||
340 | write_lock(&tcp_lhash_lock); | ||
341 | |||
342 | if (atomic_read(&tcp_lhash_users)) { | ||
343 | DEFINE_WAIT(wait); | ||
344 | |||
345 | for (;;) { | ||
346 | prepare_to_wait_exclusive(&tcp_lhash_wait, | ||
347 | &wait, TASK_UNINTERRUPTIBLE); | ||
348 | if (!atomic_read(&tcp_lhash_users)) | ||
349 | break; | ||
350 | write_unlock_bh(&tcp_lhash_lock); | ||
351 | schedule(); | ||
352 | write_lock_bh(&tcp_lhash_lock); | ||
353 | } | ||
354 | |||
355 | finish_wait(&tcp_lhash_wait, &wait); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) | ||
360 | { | ||
361 | struct hlist_head *list; | ||
362 | rwlock_t *lock; | ||
363 | |||
364 | BUG_TRAP(sk_unhashed(sk)); | ||
365 | if (listen_possible && sk->sk_state == TCP_LISTEN) { | ||
366 | list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; | ||
367 | lock = &tcp_lhash_lock; | ||
368 | tcp_listen_wlock(); | ||
369 | } else { | ||
370 | list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain; | ||
371 | lock = &tcp_ehash[sk->sk_hashent].lock; | ||
372 | write_lock(lock); | ||
373 | } | ||
374 | __sk_add_node(sk, list); | ||
375 | sock_prot_inc_use(sk->sk_prot); | ||
376 | write_unlock(lock); | ||
377 | if (listen_possible && sk->sk_state == TCP_LISTEN) | ||
378 | wake_up(&tcp_lhash_wait); | ||
379 | } | 103 | } |
380 | 104 | ||
381 | static void tcp_v4_hash(struct sock *sk) | 105 | static void tcp_v4_hash(struct sock *sk) |
382 | { | 106 | { |
383 | if (sk->sk_state != TCP_CLOSE) { | 107 | inet_hash(&tcp_hashinfo, sk); |
384 | local_bh_disable(); | ||
385 | __tcp_v4_hash(sk, 1); | ||
386 | local_bh_enable(); | ||
387 | } | ||
388 | } | 108 | } |
389 | 109 | ||
390 | void tcp_unhash(struct sock *sk) | 110 | void tcp_unhash(struct sock *sk) |
391 | { | 111 | { |
392 | rwlock_t *lock; | 112 | inet_unhash(&tcp_hashinfo, sk); |
393 | |||
394 | if (sk_unhashed(sk)) | ||
395 | goto ende; | ||
396 | |||
397 | if (sk->sk_state == TCP_LISTEN) { | ||
398 | local_bh_disable(); | ||
399 | tcp_listen_wlock(); | ||
400 | lock = &tcp_lhash_lock; | ||
401 | } else { | ||
402 | struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; | ||
403 | lock = &head->lock; | ||
404 | write_lock_bh(&head->lock); | ||
405 | } | ||
406 | |||
407 | if (__sk_del_node_init(sk)) | ||
408 | sock_prot_dec_use(sk->sk_prot); | ||
409 | write_unlock_bh(lock); | ||
410 | |||
411 | ende: | ||
412 | if (sk->sk_state == TCP_LISTEN) | ||
413 | wake_up(&tcp_lhash_wait); | ||
414 | } | ||
415 | |||
416 | /* Don't inline this cruft. Here are some nice properties to | ||
417 | * exploit here. The BSD API does not allow a listening TCP | ||
418 | * to specify the remote port nor the remote address for the | ||
419 | * connection. So always assume those are both wildcarded | ||
420 | * during the search since they can never be otherwise. | ||
421 | */ | ||
422 | static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, | ||
423 | unsigned short hnum, int dif) | ||
424 | { | ||
425 | struct sock *result = NULL, *sk; | ||
426 | struct hlist_node *node; | ||
427 | int score, hiscore; | ||
428 | |||
429 | hiscore=-1; | ||
430 | sk_for_each(sk, node, head) { | ||
431 | struct inet_sock *inet = inet_sk(sk); | ||
432 | |||
433 | if (inet->num == hnum && !ipv6_only_sock(sk)) { | ||
434 | __u32 rcv_saddr = inet->rcv_saddr; | ||
435 | |||
436 | score = (sk->sk_family == PF_INET ? 1 : 0); | ||
437 | if (rcv_saddr) { | ||
438 | if (rcv_saddr != daddr) | ||
439 | continue; | ||
440 | score+=2; | ||
441 | } | ||
442 | if (sk->sk_bound_dev_if) { | ||
443 | if (sk->sk_bound_dev_if != dif) | ||
444 | continue; | ||
445 | score+=2; | ||
446 | } | ||
447 | if (score == 5) | ||
448 | return sk; | ||
449 | if (score > hiscore) { | ||
450 | hiscore = score; | ||
451 | result = sk; | ||
452 | } | ||
453 | } | ||
454 | } | ||
455 | return result; | ||
456 | } | ||
457 | |||
458 | /* Optimize the common listener case. */ | ||
459 | static inline struct sock *tcp_v4_lookup_listener(u32 daddr, | ||
460 | unsigned short hnum, int dif) | ||
461 | { | ||
462 | struct sock *sk = NULL; | ||
463 | struct hlist_head *head; | ||
464 | |||
465 | read_lock(&tcp_lhash_lock); | ||
466 | head = &tcp_listening_hash[tcp_lhashfn(hnum)]; | ||
467 | if (!hlist_empty(head)) { | ||
468 | struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | ||
469 | |||
470 | if (inet->num == hnum && !sk->sk_node.next && | ||
471 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | ||
472 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | ||
473 | !sk->sk_bound_dev_if) | ||
474 | goto sherry_cache; | ||
475 | sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif); | ||
476 | } | ||
477 | if (sk) { | ||
478 | sherry_cache: | ||
479 | sock_hold(sk); | ||
480 | } | ||
481 | read_unlock(&tcp_lhash_lock); | ||
482 | return sk; | ||
483 | } | 113 | } |
484 | 114 | ||
485 | /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | ||
486 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | ||
487 | * | ||
488 | * Local BH must be disabled here. | ||
489 | */ | ||
490 | |||
491 | static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, | ||
492 | u32 daddr, u16 hnum, | ||
493 | int dif) | ||
494 | { | ||
495 | struct tcp_ehash_bucket *head; | ||
496 | TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) | ||
497 | __u32 ports = TCP_COMBINED_PORTS(sport, hnum); | ||
498 | struct sock *sk; | ||
499 | struct hlist_node *node; | ||
500 | /* Optimize here for direct hit, only listening connections can | ||
501 | * have wildcards anyways. | ||
502 | */ | ||
503 | int hash = tcp_hashfn(daddr, hnum, saddr, sport); | ||
504 | head = &tcp_ehash[hash]; | ||
505 | read_lock(&head->lock); | ||
506 | sk_for_each(sk, node, &head->chain) { | ||
507 | if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
508 | goto hit; /* You sunk my battleship! */ | ||
509 | } | ||
510 | |||
511 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | ||
512 | sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { | ||
513 | if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
514 | goto hit; | ||
515 | } | ||
516 | sk = NULL; | ||
517 | out: | ||
518 | read_unlock(&head->lock); | ||
519 | return sk; | ||
520 | hit: | ||
521 | sock_hold(sk); | ||
522 | goto out; | ||
523 | } | ||
524 | |||
525 | static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport, | ||
526 | u32 daddr, u16 hnum, int dif) | ||
527 | { | ||
528 | struct sock *sk = __tcp_v4_lookup_established(saddr, sport, | ||
529 | daddr, hnum, dif); | ||
530 | |||
531 | return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif); | ||
532 | } | ||
533 | |||
534 | inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, | ||
535 | u16 dport, int dif) | ||
536 | { | ||
537 | struct sock *sk; | ||
538 | |||
539 | local_bh_disable(); | ||
540 | sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif); | ||
541 | local_bh_enable(); | ||
542 | |||
543 | return sk; | ||
544 | } | ||
545 | |||
546 | EXPORT_SYMBOL_GPL(tcp_v4_lookup); | ||
547 | |||
548 | static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) | 115 | static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) |
549 | { | 116 | { |
550 | return secure_tcp_sequence_number(skb->nh.iph->daddr, | 117 | return secure_tcp_sequence_number(skb->nh.iph->daddr, |
@@ -555,27 +122,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) | |||
555 | 122 | ||
556 | /* called with local bh disabled */ | 123 | /* called with local bh disabled */ |
557 | static int __tcp_v4_check_established(struct sock *sk, __u16 lport, | 124 | static int __tcp_v4_check_established(struct sock *sk, __u16 lport, |
558 | struct tcp_tw_bucket **twp) | 125 | struct inet_timewait_sock **twp) |
559 | { | 126 | { |
560 | struct inet_sock *inet = inet_sk(sk); | 127 | struct inet_sock *inet = inet_sk(sk); |
561 | u32 daddr = inet->rcv_saddr; | 128 | u32 daddr = inet->rcv_saddr; |
562 | u32 saddr = inet->daddr; | 129 | u32 saddr = inet->daddr; |
563 | int dif = sk->sk_bound_dev_if; | 130 | int dif = sk->sk_bound_dev_if; |
564 | TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) | 131 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
565 | __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); | 132 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); |
566 | int hash = tcp_hashfn(daddr, lport, saddr, inet->dport); | 133 | const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); |
567 | struct tcp_ehash_bucket *head = &tcp_ehash[hash]; | 134 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; |
568 | struct sock *sk2; | 135 | struct sock *sk2; |
569 | struct hlist_node *node; | 136 | const struct hlist_node *node; |
570 | struct tcp_tw_bucket *tw; | 137 | struct inet_timewait_sock *tw; |
571 | 138 | ||
572 | write_lock(&head->lock); | 139 | write_lock(&head->lock); |
573 | 140 | ||
574 | /* Check TIME-WAIT sockets first. */ | 141 | /* Check TIME-WAIT sockets first. */ |
575 | sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { | 142 | sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { |
576 | tw = (struct tcp_tw_bucket *)sk2; | 143 | tw = inet_twsk(sk2); |
577 | 144 | ||
578 | if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { | 145 | if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { |
146 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); | ||
579 | struct tcp_sock *tp = tcp_sk(sk); | 147 | struct tcp_sock *tp = tcp_sk(sk); |
580 | 148 | ||
581 | /* With PAWS, it is safe from the viewpoint | 149 | /* With PAWS, it is safe from the viewpoint |
@@ -592,15 +160,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, | |||
592 | fall back to VJ's scheme and use initial | 160 | fall back to VJ's scheme and use initial |
593 | timestamp retrieved from peer table. | 161 | timestamp retrieved from peer table. |
594 | */ | 162 | */ |
595 | if (tw->tw_ts_recent_stamp && | 163 | if (tcptw->tw_ts_recent_stamp && |
596 | (!twp || (sysctl_tcp_tw_reuse && | 164 | (!twp || (sysctl_tcp_tw_reuse && |
597 | xtime.tv_sec - | 165 | xtime.tv_sec - |
598 | tw->tw_ts_recent_stamp > 1))) { | 166 | tcptw->tw_ts_recent_stamp > 1))) { |
599 | if ((tp->write_seq = | 167 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; |
600 | tw->tw_snd_nxt + 65535 + 2) == 0) | 168 | if (tp->write_seq == 0) |
601 | tp->write_seq = 1; | 169 | tp->write_seq = 1; |
602 | tp->rx_opt.ts_recent = tw->tw_ts_recent; | 170 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; |
603 | tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; | 171 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
604 | sock_hold(sk2); | 172 | sock_hold(sk2); |
605 | goto unique; | 173 | goto unique; |
606 | } else | 174 | } else |
@@ -611,7 +179,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, | |||
611 | 179 | ||
612 | /* And established part... */ | 180 | /* And established part... */ |
613 | sk_for_each(sk2, node, &head->chain) { | 181 | sk_for_each(sk2, node, &head->chain) { |
614 | if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | 182 | if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) |
615 | goto not_unique; | 183 | goto not_unique; |
616 | } | 184 | } |
617 | 185 | ||
@@ -631,10 +199,10 @@ unique: | |||
631 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 199 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
632 | } else if (tw) { | 200 | } else if (tw) { |
633 | /* Silly. Should hash-dance instead... */ | 201 | /* Silly. Should hash-dance instead... */ |
634 | tcp_tw_deschedule(tw); | 202 | inet_twsk_deschedule(tw, &tcp_death_row); |
635 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 203 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
636 | 204 | ||
637 | tcp_tw_put(tw); | 205 | inet_twsk_put(tw); |
638 | } | 206 | } |
639 | 207 | ||
640 | return 0; | 208 | return 0; |
@@ -657,9 +225,9 @@ static inline u32 connect_port_offset(const struct sock *sk) | |||
657 | */ | 225 | */ |
658 | static inline int tcp_v4_hash_connect(struct sock *sk) | 226 | static inline int tcp_v4_hash_connect(struct sock *sk) |
659 | { | 227 | { |
660 | unsigned short snum = inet_sk(sk)->num; | 228 | const unsigned short snum = inet_sk(sk)->num; |
661 | struct tcp_bind_hashbucket *head; | 229 | struct inet_bind_hashbucket *head; |
662 | struct tcp_bind_bucket *tb; | 230 | struct inet_bind_bucket *tb; |
663 | int ret; | 231 | int ret; |
664 | 232 | ||
665 | if (!snum) { | 233 | if (!snum) { |
@@ -671,19 +239,19 @@ static inline int tcp_v4_hash_connect(struct sock *sk) | |||
671 | static u32 hint; | 239 | static u32 hint; |
672 | u32 offset = hint + connect_port_offset(sk); | 240 | u32 offset = hint + connect_port_offset(sk); |
673 | struct hlist_node *node; | 241 | struct hlist_node *node; |
674 | struct tcp_tw_bucket *tw = NULL; | 242 | struct inet_timewait_sock *tw = NULL; |
675 | 243 | ||
676 | local_bh_disable(); | 244 | local_bh_disable(); |
677 | for (i = 1; i <= range; i++) { | 245 | for (i = 1; i <= range; i++) { |
678 | port = low + (i + offset) % range; | 246 | port = low + (i + offset) % range; |
679 | head = &tcp_bhash[tcp_bhashfn(port)]; | 247 | head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; |
680 | spin_lock(&head->lock); | 248 | spin_lock(&head->lock); |
681 | 249 | ||
682 | /* Does not bother with rcv_saddr checks, | 250 | /* Does not bother with rcv_saddr checks, |
683 | * because the established check is already | 251 | * because the established check is already |
684 | * unique enough. | 252 | * unique enough. |
685 | */ | 253 | */ |
686 | tb_for_each(tb, node, &head->chain) { | 254 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
687 | if (tb->port == port) { | 255 | if (tb->port == port) { |
688 | BUG_TRAP(!hlist_empty(&tb->owners)); | 256 | BUG_TRAP(!hlist_empty(&tb->owners)); |
689 | if (tb->fastreuse >= 0) | 257 | if (tb->fastreuse >= 0) |
@@ -696,7 +264,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) | |||
696 | } | 264 | } |
697 | } | 265 | } |
698 | 266 | ||
699 | tb = tcp_bucket_create(head, port); | 267 | tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); |
700 | if (!tb) { | 268 | if (!tb) { |
701 | spin_unlock(&head->lock); | 269 | spin_unlock(&head->lock); |
702 | break; | 270 | break; |
@@ -715,27 +283,27 @@ ok: | |||
715 | hint += i; | 283 | hint += i; |
716 | 284 | ||
717 | /* Head lock still held and bh's disabled */ | 285 | /* Head lock still held and bh's disabled */ |
718 | tcp_bind_hash(sk, tb, port); | 286 | inet_bind_hash(sk, tb, port); |
719 | if (sk_unhashed(sk)) { | 287 | if (sk_unhashed(sk)) { |
720 | inet_sk(sk)->sport = htons(port); | 288 | inet_sk(sk)->sport = htons(port); |
721 | __tcp_v4_hash(sk, 0); | 289 | __inet_hash(&tcp_hashinfo, sk, 0); |
722 | } | 290 | } |
723 | spin_unlock(&head->lock); | 291 | spin_unlock(&head->lock); |
724 | 292 | ||
725 | if (tw) { | 293 | if (tw) { |
726 | tcp_tw_deschedule(tw); | 294 | inet_twsk_deschedule(tw, &tcp_death_row);; |
727 | tcp_tw_put(tw); | 295 | inet_twsk_put(tw); |
728 | } | 296 | } |
729 | 297 | ||
730 | ret = 0; | 298 | ret = 0; |
731 | goto out; | 299 | goto out; |
732 | } | 300 | } |
733 | 301 | ||
734 | head = &tcp_bhash[tcp_bhashfn(snum)]; | 302 | head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; |
735 | tb = tcp_sk(sk)->bind_hash; | 303 | tb = inet_csk(sk)->icsk_bind_hash; |
736 | spin_lock_bh(&head->lock); | 304 | spin_lock_bh(&head->lock); |
737 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 305 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
738 | __tcp_v4_hash(sk, 0); | 306 | __inet_hash(&tcp_hashinfo, sk, 0); |
739 | spin_unlock_bh(&head->lock); | 307 | spin_unlock_bh(&head->lock); |
740 | return 0; | 308 | return 0; |
741 | } else { | 309 | } else { |
@@ -798,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
798 | tp->write_seq = 0; | 366 | tp->write_seq = 0; |
799 | } | 367 | } |
800 | 368 | ||
801 | if (sysctl_tcp_tw_recycle && | 369 | if (tcp_death_row.sysctl_tw_recycle && |
802 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { | 370 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { |
803 | struct inet_peer *peer = rt_get_peer(rt); | 371 | struct inet_peer *peer = rt_get_peer(rt); |
804 | 372 | ||
@@ -837,8 +405,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
837 | goto failure; | 405 | goto failure; |
838 | 406 | ||
839 | /* OK, now commit destination to socket. */ | 407 | /* OK, now commit destination to socket. */ |
840 | __sk_dst_set(sk, &rt->u.dst); | 408 | sk_setup_caps(sk, &rt->u.dst); |
841 | tcp_v4_setup_caps(sk, &rt->u.dst); | ||
842 | 409 | ||
843 | if (!tp->write_seq) | 410 | if (!tp->write_seq) |
844 | tp->write_seq = secure_tcp_sequence_number(inet->saddr, | 411 | tp->write_seq = secure_tcp_sequence_number(inet->saddr, |
@@ -864,53 +431,6 @@ failure: | |||
864 | return err; | 431 | return err; |
865 | } | 432 | } |
866 | 433 | ||
867 | static __inline__ int tcp_v4_iif(struct sk_buff *skb) | ||
868 | { | ||
869 | return ((struct rtable *)skb->dst)->rt_iif; | ||
870 | } | ||
871 | |||
872 | static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd) | ||
873 | { | ||
874 | return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); | ||
875 | } | ||
876 | |||
877 | static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, | ||
878 | struct request_sock ***prevp, | ||
879 | __u16 rport, | ||
880 | __u32 raddr, __u32 laddr) | ||
881 | { | ||
882 | struct listen_sock *lopt = tp->accept_queue.listen_opt; | ||
883 | struct request_sock *req, **prev; | ||
884 | |||
885 | for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; | ||
886 | (req = *prev) != NULL; | ||
887 | prev = &req->dl_next) { | ||
888 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
889 | |||
890 | if (ireq->rmt_port == rport && | ||
891 | ireq->rmt_addr == raddr && | ||
892 | ireq->loc_addr == laddr && | ||
893 | TCP_INET_FAMILY(req->rsk_ops->family)) { | ||
894 | BUG_TRAP(!req->sk); | ||
895 | *prevp = prev; | ||
896 | break; | ||
897 | } | ||
898 | } | ||
899 | |||
900 | return req; | ||
901 | } | ||
902 | |||
903 | static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) | ||
904 | { | ||
905 | struct tcp_sock *tp = tcp_sk(sk); | ||
906 | struct listen_sock *lopt = tp->accept_queue.listen_opt; | ||
907 | u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | ||
908 | |||
909 | reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); | ||
910 | tcp_synq_added(sk); | ||
911 | } | ||
912 | |||
913 | |||
914 | /* | 434 | /* |
915 | * This routine does path mtu discovery as defined in RFC1191. | 435 | * This routine does path mtu discovery as defined in RFC1191. |
916 | */ | 436 | */ |
@@ -993,14 +513,14 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
993 | return; | 513 | return; |
994 | } | 514 | } |
995 | 515 | ||
996 | sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, | 516 | sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, |
997 | th->source, tcp_v4_iif(skb)); | 517 | th->source, inet_iif(skb)); |
998 | if (!sk) { | 518 | if (!sk) { |
999 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 519 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
1000 | return; | 520 | return; |
1001 | } | 521 | } |
1002 | if (sk->sk_state == TCP_TIME_WAIT) { | 522 | if (sk->sk_state == TCP_TIME_WAIT) { |
1003 | tcp_tw_put((struct tcp_tw_bucket *)sk); | 523 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1004 | return; | 524 | return; |
1005 | } | 525 | } |
1006 | 526 | ||
@@ -1054,8 +574,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
1054 | if (sock_owned_by_user(sk)) | 574 | if (sock_owned_by_user(sk)) |
1055 | goto out; | 575 | goto out; |
1056 | 576 | ||
1057 | req = tcp_v4_search_req(tp, &prev, th->dest, | 577 | req = inet_csk_search_req(sk, &prev, th->dest, |
1058 | iph->daddr, iph->saddr); | 578 | iph->daddr, iph->saddr); |
1059 | if (!req) | 579 | if (!req) |
1060 | goto out; | 580 | goto out; |
1061 | 581 | ||
@@ -1075,7 +595,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
1075 | * created socket, and POSIX does not want network | 595 | * created socket, and POSIX does not want network |
1076 | * errors returned from accept(). | 596 | * errors returned from accept(). |
1077 | */ | 597 | */ |
1078 | tcp_synq_drop(sk, req, prev); | 598 | inet_csk_reqsk_queue_drop(sk, req, prev); |
1079 | goto out; | 599 | goto out; |
1080 | 600 | ||
1081 | case TCP_SYN_SENT: | 601 | case TCP_SYN_SENT: |
@@ -1245,12 +765,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, | |||
1245 | 765 | ||
1246 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) | 766 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) |
1247 | { | 767 | { |
1248 | struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; | 768 | struct inet_timewait_sock *tw = inet_twsk(sk); |
769 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); | ||
1249 | 770 | ||
1250 | tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, | 771 | tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, |
1251 | tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); | 772 | tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); |
1252 | 773 | ||
1253 | tcp_tw_put(tw); | 774 | inet_twsk_put(tw); |
1254 | } | 775 | } |
1255 | 776 | ||
1256 | static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | 777 | static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) |
@@ -1259,36 +780,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | |||
1259 | req->ts_recent); | 780 | req->ts_recent); |
1260 | } | 781 | } |
1261 | 782 | ||
1262 | static struct dst_entry* tcp_v4_route_req(struct sock *sk, | ||
1263 | struct request_sock *req) | ||
1264 | { | ||
1265 | struct rtable *rt; | ||
1266 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
1267 | struct ip_options *opt = inet_rsk(req)->opt; | ||
1268 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
1269 | .nl_u = { .ip4_u = | ||
1270 | { .daddr = ((opt && opt->srr) ? | ||
1271 | opt->faddr : | ||
1272 | ireq->rmt_addr), | ||
1273 | .saddr = ireq->loc_addr, | ||
1274 | .tos = RT_CONN_FLAGS(sk) } }, | ||
1275 | .proto = IPPROTO_TCP, | ||
1276 | .uli_u = { .ports = | ||
1277 | { .sport = inet_sk(sk)->sport, | ||
1278 | .dport = ireq->rmt_port } } }; | ||
1279 | |||
1280 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
1281 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
1282 | return NULL; | ||
1283 | } | ||
1284 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { | ||
1285 | ip_rt_put(rt); | ||
1286 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
1287 | return NULL; | ||
1288 | } | ||
1289 | return &rt->u.dst; | ||
1290 | } | ||
1291 | |||
1292 | /* | 783 | /* |
1293 | * Send a SYN-ACK after having received an ACK. | 784 | * Send a SYN-ACK after having received an ACK. |
1294 | * This still operates on a request_sock only, not on a big | 785 | * This still operates on a request_sock only, not on a big |
@@ -1302,7 +793,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
1302 | struct sk_buff * skb; | 793 | struct sk_buff * skb; |
1303 | 794 | ||
1304 | /* First, grab a route. */ | 795 | /* First, grab a route. */ |
1305 | if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) | 796 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) |
1306 | goto out; | 797 | goto out; |
1307 | 798 | ||
1308 | skb = tcp_make_synack(sk, dst, req); | 799 | skb = tcp_make_synack(sk, dst, req); |
@@ -1404,7 +895,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1404 | * limitations, they conserve resources and peer is | 895 | * limitations, they conserve resources and peer is |
1405 | * evidently real one. | 896 | * evidently real one. |
1406 | */ | 897 | */ |
1407 | if (tcp_synq_is_full(sk) && !isn) { | 898 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { |
1408 | #ifdef CONFIG_SYN_COOKIES | 899 | #ifdef CONFIG_SYN_COOKIES |
1409 | if (sysctl_tcp_syncookies) { | 900 | if (sysctl_tcp_syncookies) { |
1410 | want_cookie = 1; | 901 | want_cookie = 1; |
@@ -1418,7 +909,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1418 | * clogging syn queue with openreqs with exponentially increasing | 909 | * clogging syn queue with openreqs with exponentially increasing |
1419 | * timeout. | 910 | * timeout. |
1420 | */ | 911 | */ |
1421 | if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) | 912 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) |
1422 | goto drop; | 913 | goto drop; |
1423 | 914 | ||
1424 | req = reqsk_alloc(&tcp_request_sock_ops); | 915 | req = reqsk_alloc(&tcp_request_sock_ops); |
@@ -1474,8 +965,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1474 | * are made in the function processing timewait state. | 965 | * are made in the function processing timewait state. |
1475 | */ | 966 | */ |
1476 | if (tmp_opt.saw_tstamp && | 967 | if (tmp_opt.saw_tstamp && |
1477 | sysctl_tcp_tw_recycle && | 968 | tcp_death_row.sysctl_tw_recycle && |
1478 | (dst = tcp_v4_route_req(sk, req)) != NULL && | 969 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1479 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 970 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1480 | peer->v4daddr == saddr) { | 971 | peer->v4daddr == saddr) { |
1481 | if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && | 972 | if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && |
@@ -1488,7 +979,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1488 | } | 979 | } |
1489 | /* Kill the following clause, if you dislike this way. */ | 980 | /* Kill the following clause, if you dislike this way. */ |
1490 | else if (!sysctl_tcp_syncookies && | 981 | else if (!sysctl_tcp_syncookies && |
1491 | (sysctl_max_syn_backlog - tcp_synq_len(sk) < | 982 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < |
1492 | (sysctl_max_syn_backlog >> 2)) && | 983 | (sysctl_max_syn_backlog >> 2)) && |
1493 | (!peer || !peer->tcp_ts_stamp) && | 984 | (!peer || !peer->tcp_ts_stamp) && |
1494 | (!dst || !dst_metric(dst, RTAX_RTT))) { | 985 | (!dst || !dst_metric(dst, RTAX_RTT))) { |
@@ -1499,11 +990,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1499 | * to destinations, already remembered | 990 | * to destinations, already remembered |
1500 | * to the moment of synflood. | 991 | * to the moment of synflood. |
1501 | */ | 992 | */ |
1502 | LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open " | 993 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " |
1503 | "request from %u.%u." | 994 | "request from %u.%u.%u.%u/%u\n", |
1504 | "%u.%u/%u\n", | 995 | NIPQUAD(saddr), |
1505 | NIPQUAD(saddr), | 996 | ntohs(skb->h.th->source)); |
1506 | ntohs(skb->h.th->source))); | ||
1507 | dst_release(dst); | 997 | dst_release(dst); |
1508 | goto drop_and_free; | 998 | goto drop_and_free; |
1509 | } | 999 | } |
@@ -1518,7 +1008,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1518 | if (want_cookie) { | 1008 | if (want_cookie) { |
1519 | reqsk_free(req); | 1009 | reqsk_free(req); |
1520 | } else { | 1010 | } else { |
1521 | tcp_v4_synq_add(sk, req); | 1011 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1522 | } | 1012 | } |
1523 | return 0; | 1013 | return 0; |
1524 | 1014 | ||
@@ -1546,15 +1036,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1546 | if (sk_acceptq_is_full(sk)) | 1036 | if (sk_acceptq_is_full(sk)) |
1547 | goto exit_overflow; | 1037 | goto exit_overflow; |
1548 | 1038 | ||
1549 | if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) | 1039 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) |
1550 | goto exit; | 1040 | goto exit; |
1551 | 1041 | ||
1552 | newsk = tcp_create_openreq_child(sk, req, skb); | 1042 | newsk = tcp_create_openreq_child(sk, req, skb); |
1553 | if (!newsk) | 1043 | if (!newsk) |
1554 | goto exit; | 1044 | goto exit; |
1555 | 1045 | ||
1556 | newsk->sk_dst_cache = dst; | 1046 | sk_setup_caps(newsk, dst); |
1557 | tcp_v4_setup_caps(newsk, dst); | ||
1558 | 1047 | ||
1559 | newtp = tcp_sk(newsk); | 1048 | newtp = tcp_sk(newsk); |
1560 | newinet = inet_sk(newsk); | 1049 | newinet = inet_sk(newsk); |
@@ -1564,7 +1053,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1564 | newinet->saddr = ireq->loc_addr; | 1053 | newinet->saddr = ireq->loc_addr; |
1565 | newinet->opt = ireq->opt; | 1054 | newinet->opt = ireq->opt; |
1566 | ireq->opt = NULL; | 1055 | ireq->opt = NULL; |
1567 | newinet->mc_index = tcp_v4_iif(skb); | 1056 | newinet->mc_index = inet_iif(skb); |
1568 | newinet->mc_ttl = skb->nh.iph->ttl; | 1057 | newinet->mc_ttl = skb->nh.iph->ttl; |
1569 | newtp->ext_header_len = 0; | 1058 | newtp->ext_header_len = 0; |
1570 | if (newinet->opt) | 1059 | if (newinet->opt) |
@@ -1575,8 +1064,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1575 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); | 1064 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); |
1576 | tcp_initialize_rcv_mss(newsk); | 1065 | tcp_initialize_rcv_mss(newsk); |
1577 | 1066 | ||
1578 | __tcp_v4_hash(newsk, 0); | 1067 | __inet_hash(&tcp_hashinfo, newsk, 0); |
1579 | __tcp_inherit_port(sk, newsk); | 1068 | __inet_inherit_port(&tcp_hashinfo, sk, newsk); |
1580 | 1069 | ||
1581 | return newsk; | 1070 | return newsk; |
1582 | 1071 | ||
@@ -1592,27 +1081,24 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1592 | { | 1081 | { |
1593 | struct tcphdr *th = skb->h.th; | 1082 | struct tcphdr *th = skb->h.th; |
1594 | struct iphdr *iph = skb->nh.iph; | 1083 | struct iphdr *iph = skb->nh.iph; |
1595 | struct tcp_sock *tp = tcp_sk(sk); | ||
1596 | struct sock *nsk; | 1084 | struct sock *nsk; |
1597 | struct request_sock **prev; | 1085 | struct request_sock **prev; |
1598 | /* Find possible connection requests. */ | 1086 | /* Find possible connection requests. */ |
1599 | struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source, | 1087 | struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, |
1600 | iph->saddr, iph->daddr); | 1088 | iph->saddr, iph->daddr); |
1601 | if (req) | 1089 | if (req) |
1602 | return tcp_check_req(sk, skb, req, prev); | 1090 | return tcp_check_req(sk, skb, req, prev); |
1603 | 1091 | ||
1604 | nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr, | 1092 | nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, |
1605 | th->source, | 1093 | th->source, skb->nh.iph->daddr, |
1606 | skb->nh.iph->daddr, | 1094 | ntohs(th->dest), inet_iif(skb)); |
1607 | ntohs(th->dest), | ||
1608 | tcp_v4_iif(skb)); | ||
1609 | 1095 | ||
1610 | if (nsk) { | 1096 | if (nsk) { |
1611 | if (nsk->sk_state != TCP_TIME_WAIT) { | 1097 | if (nsk->sk_state != TCP_TIME_WAIT) { |
1612 | bh_lock_sock(nsk); | 1098 | bh_lock_sock(nsk); |
1613 | return nsk; | 1099 | return nsk; |
1614 | } | 1100 | } |
1615 | tcp_tw_put((struct tcp_tw_bucket *)nsk); | 1101 | inet_twsk_put((struct inet_timewait_sock *)nsk); |
1616 | return NULL; | 1102 | return NULL; |
1617 | } | 1103 | } |
1618 | 1104 | ||
@@ -1631,7 +1117,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) | |||
1631 | skb->nh.iph->daddr, skb->csum)) | 1117 | skb->nh.iph->daddr, skb->csum)) |
1632 | return 0; | 1118 | return 0; |
1633 | 1119 | ||
1634 | LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n")); | 1120 | LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n"); |
1635 | skb->ip_summed = CHECKSUM_NONE; | 1121 | skb->ip_summed = CHECKSUM_NONE; |
1636 | } | 1122 | } |
1637 | if (skb->len <= 76) { | 1123 | if (skb->len <= 76) { |
@@ -1747,9 +1233,9 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1747 | TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; | 1233 | TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; |
1748 | TCP_SKB_CB(skb)->sacked = 0; | 1234 | TCP_SKB_CB(skb)->sacked = 0; |
1749 | 1235 | ||
1750 | sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, | 1236 | sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, |
1751 | skb->nh.iph->daddr, ntohs(th->dest), | 1237 | skb->nh.iph->daddr, ntohs(th->dest), |
1752 | tcp_v4_iif(skb)); | 1238 | inet_iif(skb)); |
1753 | 1239 | ||
1754 | if (!sk) | 1240 | if (!sk) |
1755 | goto no_tcp_socket; | 1241 | goto no_tcp_socket; |
@@ -1801,24 +1287,26 @@ discard_and_relse: | |||
1801 | 1287 | ||
1802 | do_time_wait: | 1288 | do_time_wait: |
1803 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 1289 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
1804 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1290 | inet_twsk_put((struct inet_timewait_sock *) sk); |
1805 | goto discard_it; | 1291 | goto discard_it; |
1806 | } | 1292 | } |
1807 | 1293 | ||
1808 | if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { | 1294 | if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { |
1809 | TCP_INC_STATS_BH(TCP_MIB_INERRS); | 1295 | TCP_INC_STATS_BH(TCP_MIB_INERRS); |
1810 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1296 | inet_twsk_put((struct inet_timewait_sock *) sk); |
1811 | goto discard_it; | 1297 | goto discard_it; |
1812 | } | 1298 | } |
1813 | switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, | 1299 | switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, |
1814 | skb, th, skb->len)) { | 1300 | skb, th)) { |
1815 | case TCP_TW_SYN: { | 1301 | case TCP_TW_SYN: { |
1816 | struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, | 1302 | struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, |
1817 | ntohs(th->dest), | 1303 | skb->nh.iph->daddr, |
1818 | tcp_v4_iif(skb)); | 1304 | ntohs(th->dest), |
1305 | inet_iif(skb)); | ||
1819 | if (sk2) { | 1306 | if (sk2) { |
1820 | tcp_tw_deschedule((struct tcp_tw_bucket *)sk); | 1307 | inet_twsk_deschedule((struct inet_timewait_sock *)sk, |
1821 | tcp_tw_put((struct tcp_tw_bucket *)sk); | 1308 | &tcp_death_row); |
1309 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
1822 | sk = sk2; | 1310 | sk = sk2; |
1823 | goto process; | 1311 | goto process; |
1824 | } | 1312 | } |
@@ -1834,112 +1322,6 @@ do_time_wait: | |||
1834 | goto discard_it; | 1322 | goto discard_it; |
1835 | } | 1323 | } |
1836 | 1324 | ||
1837 | /* With per-bucket locks this operation is not-atomic, so that | ||
1838 | * this version is not worse. | ||
1839 | */ | ||
1840 | static void __tcp_v4_rehash(struct sock *sk) | ||
1841 | { | ||
1842 | sk->sk_prot->unhash(sk); | ||
1843 | sk->sk_prot->hash(sk); | ||
1844 | } | ||
1845 | |||
1846 | static int tcp_v4_reselect_saddr(struct sock *sk) | ||
1847 | { | ||
1848 | struct inet_sock *inet = inet_sk(sk); | ||
1849 | int err; | ||
1850 | struct rtable *rt; | ||
1851 | __u32 old_saddr = inet->saddr; | ||
1852 | __u32 new_saddr; | ||
1853 | __u32 daddr = inet->daddr; | ||
1854 | |||
1855 | if (inet->opt && inet->opt->srr) | ||
1856 | daddr = inet->opt->faddr; | ||
1857 | |||
1858 | /* Query new route. */ | ||
1859 | err = ip_route_connect(&rt, daddr, 0, | ||
1860 | RT_CONN_FLAGS(sk), | ||
1861 | sk->sk_bound_dev_if, | ||
1862 | IPPROTO_TCP, | ||
1863 | inet->sport, inet->dport, sk); | ||
1864 | if (err) | ||
1865 | return err; | ||
1866 | |||
1867 | __sk_dst_set(sk, &rt->u.dst); | ||
1868 | tcp_v4_setup_caps(sk, &rt->u.dst); | ||
1869 | |||
1870 | new_saddr = rt->rt_src; | ||
1871 | |||
1872 | if (new_saddr == old_saddr) | ||
1873 | return 0; | ||
1874 | |||
1875 | if (sysctl_ip_dynaddr > 1) { | ||
1876 | printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->" | ||
1877 | "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", | ||
1878 | NIPQUAD(old_saddr), | ||
1879 | NIPQUAD(new_saddr)); | ||
1880 | } | ||
1881 | |||
1882 | inet->saddr = new_saddr; | ||
1883 | inet->rcv_saddr = new_saddr; | ||
1884 | |||
1885 | /* XXX The only one ugly spot where we need to | ||
1886 | * XXX really change the sockets identity after | ||
1887 | * XXX it has entered the hashes. -DaveM | ||
1888 | * | ||
1889 | * Besides that, it does not check for connection | ||
1890 | * uniqueness. Wait for troubles. | ||
1891 | */ | ||
1892 | __tcp_v4_rehash(sk); | ||
1893 | return 0; | ||
1894 | } | ||
1895 | |||
1896 | int tcp_v4_rebuild_header(struct sock *sk) | ||
1897 | { | ||
1898 | struct inet_sock *inet = inet_sk(sk); | ||
1899 | struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); | ||
1900 | u32 daddr; | ||
1901 | int err; | ||
1902 | |||
1903 | /* Route is OK, nothing to do. */ | ||
1904 | if (rt) | ||
1905 | return 0; | ||
1906 | |||
1907 | /* Reroute. */ | ||
1908 | daddr = inet->daddr; | ||
1909 | if (inet->opt && inet->opt->srr) | ||
1910 | daddr = inet->opt->faddr; | ||
1911 | |||
1912 | { | ||
1913 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
1914 | .nl_u = { .ip4_u = | ||
1915 | { .daddr = daddr, | ||
1916 | .saddr = inet->saddr, | ||
1917 | .tos = RT_CONN_FLAGS(sk) } }, | ||
1918 | .proto = IPPROTO_TCP, | ||
1919 | .uli_u = { .ports = | ||
1920 | { .sport = inet->sport, | ||
1921 | .dport = inet->dport } } }; | ||
1922 | |||
1923 | err = ip_route_output_flow(&rt, &fl, sk, 0); | ||
1924 | } | ||
1925 | if (!err) { | ||
1926 | __sk_dst_set(sk, &rt->u.dst); | ||
1927 | tcp_v4_setup_caps(sk, &rt->u.dst); | ||
1928 | return 0; | ||
1929 | } | ||
1930 | |||
1931 | /* Routing failed... */ | ||
1932 | sk->sk_route_caps = 0; | ||
1933 | |||
1934 | if (!sysctl_ip_dynaddr || | ||
1935 | sk->sk_state != TCP_SYN_SENT || | ||
1936 | (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || | ||
1937 | (err = tcp_v4_reselect_saddr(sk)) != 0) | ||
1938 | sk->sk_err_soft = -err; | ||
1939 | |||
1940 | return err; | ||
1941 | } | ||
1942 | |||
1943 | static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) | 1325 | static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) |
1944 | { | 1326 | { |
1945 | struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; | 1327 | struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; |
@@ -1988,18 +1370,18 @@ int tcp_v4_remember_stamp(struct sock *sk) | |||
1988 | return 0; | 1370 | return 0; |
1989 | } | 1371 | } |
1990 | 1372 | ||
1991 | int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) | 1373 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) |
1992 | { | 1374 | { |
1993 | struct inet_peer *peer = NULL; | 1375 | struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); |
1994 | |||
1995 | peer = inet_getpeer(tw->tw_daddr, 1); | ||
1996 | 1376 | ||
1997 | if (peer) { | 1377 | if (peer) { |
1998 | if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 || | 1378 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
1379 | |||
1380 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | ||
1999 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && | 1381 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && |
2000 | peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) { | 1382 | peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { |
2001 | peer->tcp_ts_stamp = tw->tw_ts_recent_stamp; | 1383 | peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; |
2002 | peer->tcp_ts = tw->tw_ts_recent; | 1384 | peer->tcp_ts = tcptw->tw_ts_recent; |
2003 | } | 1385 | } |
2004 | inet_putpeer(peer); | 1386 | inet_putpeer(peer); |
2005 | return 1; | 1387 | return 1; |
@@ -2011,7 +1393,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) | |||
2011 | struct tcp_func ipv4_specific = { | 1393 | struct tcp_func ipv4_specific = { |
2012 | .queue_xmit = ip_queue_xmit, | 1394 | .queue_xmit = ip_queue_xmit, |
2013 | .send_check = tcp_v4_send_check, | 1395 | .send_check = tcp_v4_send_check, |
2014 | .rebuild_header = tcp_v4_rebuild_header, | 1396 | .rebuild_header = inet_sk_rebuild_header, |
2015 | .conn_request = tcp_v4_conn_request, | 1397 | .conn_request = tcp_v4_conn_request, |
2016 | .syn_recv_sock = tcp_v4_syn_recv_sock, | 1398 | .syn_recv_sock = tcp_v4_syn_recv_sock, |
2017 | .remember_stamp = tcp_v4_remember_stamp, | 1399 | .remember_stamp = tcp_v4_remember_stamp, |
@@ -2027,13 +1409,14 @@ struct tcp_func ipv4_specific = { | |||
2027 | */ | 1409 | */ |
2028 | static int tcp_v4_init_sock(struct sock *sk) | 1410 | static int tcp_v4_init_sock(struct sock *sk) |
2029 | { | 1411 | { |
1412 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2030 | struct tcp_sock *tp = tcp_sk(sk); | 1413 | struct tcp_sock *tp = tcp_sk(sk); |
2031 | 1414 | ||
2032 | skb_queue_head_init(&tp->out_of_order_queue); | 1415 | skb_queue_head_init(&tp->out_of_order_queue); |
2033 | tcp_init_xmit_timers(sk); | 1416 | tcp_init_xmit_timers(sk); |
2034 | tcp_prequeue_init(tp); | 1417 | tcp_prequeue_init(tp); |
2035 | 1418 | ||
2036 | tp->rto = TCP_TIMEOUT_INIT; | 1419 | icsk->icsk_rto = TCP_TIMEOUT_INIT; |
2037 | tp->mdev = TCP_TIMEOUT_INIT; | 1420 | tp->mdev = TCP_TIMEOUT_INIT; |
2038 | 1421 | ||
2039 | /* So many TCP implementations out there (incorrectly) count the | 1422 | /* So many TCP implementations out there (incorrectly) count the |
@@ -2051,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
2051 | tp->mss_cache = 536; | 1434 | tp->mss_cache = 536; |
2052 | 1435 | ||
2053 | tp->reordering = sysctl_tcp_reordering; | 1436 | tp->reordering = sysctl_tcp_reordering; |
2054 | tp->ca_ops = &tcp_init_congestion_ops; | 1437 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; |
2055 | 1438 | ||
2056 | sk->sk_state = TCP_CLOSE; | 1439 | sk->sk_state = TCP_CLOSE; |
2057 | 1440 | ||
@@ -2074,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
2074 | 1457 | ||
2075 | tcp_clear_xmit_timers(sk); | 1458 | tcp_clear_xmit_timers(sk); |
2076 | 1459 | ||
2077 | tcp_cleanup_congestion_control(tp); | 1460 | tcp_cleanup_congestion_control(sk); |
2078 | 1461 | ||
2079 | /* Cleanup up the write buffer. */ | 1462 | /* Cleanup up the write buffer. */ |
2080 | sk_stream_writequeue_purge(sk); | 1463 | sk_stream_writequeue_purge(sk); |
@@ -2086,8 +1469,8 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
2086 | __skb_queue_purge(&tp->ucopy.prequeue); | 1469 | __skb_queue_purge(&tp->ucopy.prequeue); |
2087 | 1470 | ||
2088 | /* Clean up a referenced TCP bind bucket. */ | 1471 | /* Clean up a referenced TCP bind bucket. */ |
2089 | if (tp->bind_hash) | 1472 | if (inet_csk(sk)->icsk_bind_hash) |
2090 | tcp_put_port(sk); | 1473 | inet_put_port(&tcp_hashinfo, sk); |
2091 | 1474 | ||
2092 | /* | 1475 | /* |
2093 | * If sendmsg cached page exists, toss it. | 1476 | * If sendmsg cached page exists, toss it. |
@@ -2107,13 +1490,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); | |||
2107 | #ifdef CONFIG_PROC_FS | 1490 | #ifdef CONFIG_PROC_FS |
2108 | /* Proc filesystem TCP sock list dumping. */ | 1491 | /* Proc filesystem TCP sock list dumping. */ |
2109 | 1492 | ||
2110 | static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) | 1493 | static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) |
2111 | { | 1494 | { |
2112 | return hlist_empty(head) ? NULL : | 1495 | return hlist_empty(head) ? NULL : |
2113 | list_entry(head->first, struct tcp_tw_bucket, tw_node); | 1496 | list_entry(head->first, struct inet_timewait_sock, tw_node); |
2114 | } | 1497 | } |
2115 | 1498 | ||
2116 | static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) | 1499 | static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) |
2117 | { | 1500 | { |
2118 | return tw->tw_node.next ? | 1501 | return tw->tw_node.next ? |
2119 | hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; | 1502 | hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; |
@@ -2121,14 +1504,14 @@ static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) | |||
2121 | 1504 | ||
2122 | static void *listening_get_next(struct seq_file *seq, void *cur) | 1505 | static void *listening_get_next(struct seq_file *seq, void *cur) |
2123 | { | 1506 | { |
2124 | struct tcp_sock *tp; | 1507 | struct inet_connection_sock *icsk; |
2125 | struct hlist_node *node; | 1508 | struct hlist_node *node; |
2126 | struct sock *sk = cur; | 1509 | struct sock *sk = cur; |
2127 | struct tcp_iter_state* st = seq->private; | 1510 | struct tcp_iter_state* st = seq->private; |
2128 | 1511 | ||
2129 | if (!sk) { | 1512 | if (!sk) { |
2130 | st->bucket = 0; | 1513 | st->bucket = 0; |
2131 | sk = sk_head(&tcp_listening_hash[0]); | 1514 | sk = sk_head(&tcp_hashinfo.listening_hash[0]); |
2132 | goto get_sk; | 1515 | goto get_sk; |
2133 | } | 1516 | } |
2134 | 1517 | ||
@@ -2137,7 +1520,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2137 | if (st->state == TCP_SEQ_STATE_OPENREQ) { | 1520 | if (st->state == TCP_SEQ_STATE_OPENREQ) { |
2138 | struct request_sock *req = cur; | 1521 | struct request_sock *req = cur; |
2139 | 1522 | ||
2140 | tp = tcp_sk(st->syn_wait_sk); | 1523 | icsk = inet_csk(st->syn_wait_sk); |
2141 | req = req->dl_next; | 1524 | req = req->dl_next; |
2142 | while (1) { | 1525 | while (1) { |
2143 | while (req) { | 1526 | while (req) { |
@@ -2150,17 +1533,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2150 | if (++st->sbucket >= TCP_SYNQ_HSIZE) | 1533 | if (++st->sbucket >= TCP_SYNQ_HSIZE) |
2151 | break; | 1534 | break; |
2152 | get_req: | 1535 | get_req: |
2153 | req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; | 1536 | req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; |
2154 | } | 1537 | } |
2155 | sk = sk_next(st->syn_wait_sk); | 1538 | sk = sk_next(st->syn_wait_sk); |
2156 | st->state = TCP_SEQ_STATE_LISTENING; | 1539 | st->state = TCP_SEQ_STATE_LISTENING; |
2157 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1540 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2158 | } else { | 1541 | } else { |
2159 | tp = tcp_sk(sk); | 1542 | icsk = inet_csk(sk); |
2160 | read_lock_bh(&tp->accept_queue.syn_wait_lock); | 1543 | read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2161 | if (reqsk_queue_len(&tp->accept_queue)) | 1544 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) |
2162 | goto start_req; | 1545 | goto start_req; |
2163 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1546 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2164 | sk = sk_next(sk); | 1547 | sk = sk_next(sk); |
2165 | } | 1548 | } |
2166 | get_sk: | 1549 | get_sk: |
@@ -2169,9 +1552,9 @@ get_sk: | |||
2169 | cur = sk; | 1552 | cur = sk; |
2170 | goto out; | 1553 | goto out; |
2171 | } | 1554 | } |
2172 | tp = tcp_sk(sk); | 1555 | icsk = inet_csk(sk); |
2173 | read_lock_bh(&tp->accept_queue.syn_wait_lock); | 1556 | read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2174 | if (reqsk_queue_len(&tp->accept_queue)) { | 1557 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) { |
2175 | start_req: | 1558 | start_req: |
2176 | st->uid = sock_i_uid(sk); | 1559 | st->uid = sock_i_uid(sk); |
2177 | st->syn_wait_sk = sk; | 1560 | st->syn_wait_sk = sk; |
@@ -2179,10 +1562,10 @@ start_req: | |||
2179 | st->sbucket = 0; | 1562 | st->sbucket = 0; |
2180 | goto get_req; | 1563 | goto get_req; |
2181 | } | 1564 | } |
2182 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1565 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2183 | } | 1566 | } |
2184 | if (++st->bucket < TCP_LHTABLE_SIZE) { | 1567 | if (++st->bucket < INET_LHTABLE_SIZE) { |
2185 | sk = sk_head(&tcp_listening_hash[st->bucket]); | 1568 | sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); |
2186 | goto get_sk; | 1569 | goto get_sk; |
2187 | } | 1570 | } |
2188 | cur = NULL; | 1571 | cur = NULL; |
@@ -2206,16 +1589,16 @@ static void *established_get_first(struct seq_file *seq) | |||
2206 | struct tcp_iter_state* st = seq->private; | 1589 | struct tcp_iter_state* st = seq->private; |
2207 | void *rc = NULL; | 1590 | void *rc = NULL; |
2208 | 1591 | ||
2209 | for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) { | 1592 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { |
2210 | struct sock *sk; | 1593 | struct sock *sk; |
2211 | struct hlist_node *node; | 1594 | struct hlist_node *node; |
2212 | struct tcp_tw_bucket *tw; | 1595 | struct inet_timewait_sock *tw; |
2213 | 1596 | ||
2214 | /* We can reschedule _before_ having picked the target: */ | 1597 | /* We can reschedule _before_ having picked the target: */ |
2215 | cond_resched_softirq(); | 1598 | cond_resched_softirq(); |
2216 | 1599 | ||
2217 | read_lock(&tcp_ehash[st->bucket].lock); | 1600 | read_lock(&tcp_hashinfo.ehash[st->bucket].lock); |
2218 | sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { | 1601 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
2219 | if (sk->sk_family != st->family) { | 1602 | if (sk->sk_family != st->family) { |
2220 | continue; | 1603 | continue; |
2221 | } | 1604 | } |
@@ -2223,15 +1606,15 @@ static void *established_get_first(struct seq_file *seq) | |||
2223 | goto out; | 1606 | goto out; |
2224 | } | 1607 | } |
2225 | st->state = TCP_SEQ_STATE_TIME_WAIT; | 1608 | st->state = TCP_SEQ_STATE_TIME_WAIT; |
2226 | tw_for_each(tw, node, | 1609 | inet_twsk_for_each(tw, node, |
2227 | &tcp_ehash[st->bucket + tcp_ehash_size].chain) { | 1610 | &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { |
2228 | if (tw->tw_family != st->family) { | 1611 | if (tw->tw_family != st->family) { |
2229 | continue; | 1612 | continue; |
2230 | } | 1613 | } |
2231 | rc = tw; | 1614 | rc = tw; |
2232 | goto out; | 1615 | goto out; |
2233 | } | 1616 | } |
2234 | read_unlock(&tcp_ehash[st->bucket].lock); | 1617 | read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); |
2235 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1618 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2236 | } | 1619 | } |
2237 | out: | 1620 | out: |
@@ -2241,7 +1624,7 @@ out: | |||
2241 | static void *established_get_next(struct seq_file *seq, void *cur) | 1624 | static void *established_get_next(struct seq_file *seq, void *cur) |
2242 | { | 1625 | { |
2243 | struct sock *sk = cur; | 1626 | struct sock *sk = cur; |
2244 | struct tcp_tw_bucket *tw; | 1627 | struct inet_timewait_sock *tw; |
2245 | struct hlist_node *node; | 1628 | struct hlist_node *node; |
2246 | struct tcp_iter_state* st = seq->private; | 1629 | struct tcp_iter_state* st = seq->private; |
2247 | 1630 | ||
@@ -2258,15 +1641,15 @@ get_tw: | |||
2258 | cur = tw; | 1641 | cur = tw; |
2259 | goto out; | 1642 | goto out; |
2260 | } | 1643 | } |
2261 | read_unlock(&tcp_ehash[st->bucket].lock); | 1644 | read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); |
2262 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1645 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2263 | 1646 | ||
2264 | /* We can reschedule between buckets: */ | 1647 | /* We can reschedule between buckets: */ |
2265 | cond_resched_softirq(); | 1648 | cond_resched_softirq(); |
2266 | 1649 | ||
2267 | if (++st->bucket < tcp_ehash_size) { | 1650 | if (++st->bucket < tcp_hashinfo.ehash_size) { |
2268 | read_lock(&tcp_ehash[st->bucket].lock); | 1651 | read_lock(&tcp_hashinfo.ehash[st->bucket].lock); |
2269 | sk = sk_head(&tcp_ehash[st->bucket].chain); | 1652 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); |
2270 | } else { | 1653 | } else { |
2271 | cur = NULL; | 1654 | cur = NULL; |
2272 | goto out; | 1655 | goto out; |
@@ -2280,7 +1663,7 @@ get_tw: | |||
2280 | } | 1663 | } |
2281 | 1664 | ||
2282 | st->state = TCP_SEQ_STATE_TIME_WAIT; | 1665 | st->state = TCP_SEQ_STATE_TIME_WAIT; |
2283 | tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain); | 1666 | tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain); |
2284 | goto get_tw; | 1667 | goto get_tw; |
2285 | found: | 1668 | found: |
2286 | cur = sk; | 1669 | cur = sk; |
@@ -2304,12 +1687,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) | |||
2304 | void *rc; | 1687 | void *rc; |
2305 | struct tcp_iter_state* st = seq->private; | 1688 | struct tcp_iter_state* st = seq->private; |
2306 | 1689 | ||
2307 | tcp_listen_lock(); | 1690 | inet_listen_lock(&tcp_hashinfo); |
2308 | st->state = TCP_SEQ_STATE_LISTENING; | 1691 | st->state = TCP_SEQ_STATE_LISTENING; |
2309 | rc = listening_get_idx(seq, &pos); | 1692 | rc = listening_get_idx(seq, &pos); |
2310 | 1693 | ||
2311 | if (!rc) { | 1694 | if (!rc) { |
2312 | tcp_listen_unlock(); | 1695 | inet_listen_unlock(&tcp_hashinfo); |
2313 | local_bh_disable(); | 1696 | local_bh_disable(); |
2314 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1697 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2315 | rc = established_get_idx(seq, pos); | 1698 | rc = established_get_idx(seq, pos); |
@@ -2342,7 +1725,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2342 | case TCP_SEQ_STATE_LISTENING: | 1725 | case TCP_SEQ_STATE_LISTENING: |
2343 | rc = listening_get_next(seq, v); | 1726 | rc = listening_get_next(seq, v); |
2344 | if (!rc) { | 1727 | if (!rc) { |
2345 | tcp_listen_unlock(); | 1728 | inet_listen_unlock(&tcp_hashinfo); |
2346 | local_bh_disable(); | 1729 | local_bh_disable(); |
2347 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1730 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2348 | rc = established_get_first(seq); | 1731 | rc = established_get_first(seq); |
@@ -2365,17 +1748,17 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
2365 | switch (st->state) { | 1748 | switch (st->state) { |
2366 | case TCP_SEQ_STATE_OPENREQ: | 1749 | case TCP_SEQ_STATE_OPENREQ: |
2367 | if (v) { | 1750 | if (v) { |
2368 | struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); | 1751 | struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); |
2369 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1752 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2370 | } | 1753 | } |
2371 | case TCP_SEQ_STATE_LISTENING: | 1754 | case TCP_SEQ_STATE_LISTENING: |
2372 | if (v != SEQ_START_TOKEN) | 1755 | if (v != SEQ_START_TOKEN) |
2373 | tcp_listen_unlock(); | 1756 | inet_listen_unlock(&tcp_hashinfo); |
2374 | break; | 1757 | break; |
2375 | case TCP_SEQ_STATE_TIME_WAIT: | 1758 | case TCP_SEQ_STATE_TIME_WAIT: |
2376 | case TCP_SEQ_STATE_ESTABLISHED: | 1759 | case TCP_SEQ_STATE_ESTABLISHED: |
2377 | if (v) | 1760 | if (v) |
2378 | read_unlock(&tcp_ehash[st->bucket].lock); | 1761 | read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); |
2379 | local_bh_enable(); | 1762 | local_bh_enable(); |
2380 | break; | 1763 | break; |
2381 | } | 1764 | } |
@@ -2472,18 +1855,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) | |||
2472 | int timer_active; | 1855 | int timer_active; |
2473 | unsigned long timer_expires; | 1856 | unsigned long timer_expires; |
2474 | struct tcp_sock *tp = tcp_sk(sp); | 1857 | struct tcp_sock *tp = tcp_sk(sp); |
1858 | const struct inet_connection_sock *icsk = inet_csk(sp); | ||
2475 | struct inet_sock *inet = inet_sk(sp); | 1859 | struct inet_sock *inet = inet_sk(sp); |
2476 | unsigned int dest = inet->daddr; | 1860 | unsigned int dest = inet->daddr; |
2477 | unsigned int src = inet->rcv_saddr; | 1861 | unsigned int src = inet->rcv_saddr; |
2478 | __u16 destp = ntohs(inet->dport); | 1862 | __u16 destp = ntohs(inet->dport); |
2479 | __u16 srcp = ntohs(inet->sport); | 1863 | __u16 srcp = ntohs(inet->sport); |
2480 | 1864 | ||
2481 | if (tp->pending == TCP_TIME_RETRANS) { | 1865 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { |
2482 | timer_active = 1; | 1866 | timer_active = 1; |
2483 | timer_expires = tp->timeout; | 1867 | timer_expires = icsk->icsk_timeout; |
2484 | } else if (tp->pending == TCP_TIME_PROBE0) { | 1868 | } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { |
2485 | timer_active = 4; | 1869 | timer_active = 4; |
2486 | timer_expires = tp->timeout; | 1870 | timer_expires = icsk->icsk_timeout; |
2487 | } else if (timer_pending(&sp->sk_timer)) { | 1871 | } else if (timer_pending(&sp->sk_timer)) { |
2488 | timer_active = 2; | 1872 | timer_active = 2; |
2489 | timer_expires = sp->sk_timer.expires; | 1873 | timer_expires = sp->sk_timer.expires; |
@@ -2498,17 +1882,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) | |||
2498 | tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, | 1882 | tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, |
2499 | timer_active, | 1883 | timer_active, |
2500 | jiffies_to_clock_t(timer_expires - jiffies), | 1884 | jiffies_to_clock_t(timer_expires - jiffies), |
2501 | tp->retransmits, | 1885 | icsk->icsk_retransmits, |
2502 | sock_i_uid(sp), | 1886 | sock_i_uid(sp), |
2503 | tp->probes_out, | 1887 | icsk->icsk_probes_out, |
2504 | sock_i_ino(sp), | 1888 | sock_i_ino(sp), |
2505 | atomic_read(&sp->sk_refcnt), sp, | 1889 | atomic_read(&sp->sk_refcnt), sp, |
2506 | tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong, | 1890 | icsk->icsk_rto, |
1891 | icsk->icsk_ack.ato, | ||
1892 | (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, | ||
2507 | tp->snd_cwnd, | 1893 | tp->snd_cwnd, |
2508 | tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); | 1894 | tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); |
2509 | } | 1895 | } |
2510 | 1896 | ||
2511 | static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) | 1897 | static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) |
2512 | { | 1898 | { |
2513 | unsigned int dest, src; | 1899 | unsigned int dest, src; |
2514 | __u16 destp, srcp; | 1900 | __u16 destp, srcp; |
@@ -2588,7 +1974,7 @@ struct proto tcp_prot = { | |||
2588 | .close = tcp_close, | 1974 | .close = tcp_close, |
2589 | .connect = tcp_v4_connect, | 1975 | .connect = tcp_v4_connect, |
2590 | .disconnect = tcp_disconnect, | 1976 | .disconnect = tcp_disconnect, |
2591 | .accept = tcp_accept, | 1977 | .accept = inet_csk_accept, |
2592 | .ioctl = tcp_ioctl, | 1978 | .ioctl = tcp_ioctl, |
2593 | .init = tcp_v4_init_sock, | 1979 | .init = tcp_v4_init_sock, |
2594 | .destroy = tcp_v4_destroy_sock, | 1980 | .destroy = tcp_v4_destroy_sock, |
@@ -2603,6 +1989,7 @@ struct proto tcp_prot = { | |||
2603 | .get_port = tcp_v4_get_port, | 1989 | .get_port = tcp_v4_get_port, |
2604 | .enter_memory_pressure = tcp_enter_memory_pressure, | 1990 | .enter_memory_pressure = tcp_enter_memory_pressure, |
2605 | .sockets_allocated = &tcp_sockets_allocated, | 1991 | .sockets_allocated = &tcp_sockets_allocated, |
1992 | .orphan_count = &tcp_orphan_count, | ||
2606 | .memory_allocated = &tcp_memory_allocated, | 1993 | .memory_allocated = &tcp_memory_allocated, |
2607 | .memory_pressure = &tcp_memory_pressure, | 1994 | .memory_pressure = &tcp_memory_pressure, |
2608 | .sysctl_mem = sysctl_tcp_mem, | 1995 | .sysctl_mem = sysctl_tcp_mem, |
@@ -2610,6 +1997,7 @@ struct proto tcp_prot = { | |||
2610 | .sysctl_rmem = sysctl_tcp_rmem, | 1997 | .sysctl_rmem = sysctl_tcp_rmem, |
2611 | .max_header = MAX_TCP_HEADER, | 1998 | .max_header = MAX_TCP_HEADER, |
2612 | .obj_size = sizeof(struct tcp_sock), | 1999 | .obj_size = sizeof(struct tcp_sock), |
2000 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | ||
2613 | .rsk_prot = &tcp_request_sock_ops, | 2001 | .rsk_prot = &tcp_request_sock_ops, |
2614 | }; | 2002 | }; |
2615 | 2003 | ||
@@ -2631,19 +2019,13 @@ void __init tcp_v4_init(struct net_proto_family *ops) | |||
2631 | } | 2019 | } |
2632 | 2020 | ||
2633 | EXPORT_SYMBOL(ipv4_specific); | 2021 | EXPORT_SYMBOL(ipv4_specific); |
2634 | EXPORT_SYMBOL(tcp_bind_hash); | 2022 | EXPORT_SYMBOL(inet_bind_bucket_create); |
2635 | EXPORT_SYMBOL(tcp_bucket_create); | ||
2636 | EXPORT_SYMBOL(tcp_hashinfo); | 2023 | EXPORT_SYMBOL(tcp_hashinfo); |
2637 | EXPORT_SYMBOL(tcp_inherit_port); | ||
2638 | EXPORT_SYMBOL(tcp_listen_wlock); | ||
2639 | EXPORT_SYMBOL(tcp_port_rover); | ||
2640 | EXPORT_SYMBOL(tcp_prot); | 2024 | EXPORT_SYMBOL(tcp_prot); |
2641 | EXPORT_SYMBOL(tcp_put_port); | ||
2642 | EXPORT_SYMBOL(tcp_unhash); | 2025 | EXPORT_SYMBOL(tcp_unhash); |
2643 | EXPORT_SYMBOL(tcp_v4_conn_request); | 2026 | EXPORT_SYMBOL(tcp_v4_conn_request); |
2644 | EXPORT_SYMBOL(tcp_v4_connect); | 2027 | EXPORT_SYMBOL(tcp_v4_connect); |
2645 | EXPORT_SYMBOL(tcp_v4_do_rcv); | 2028 | EXPORT_SYMBOL(tcp_v4_do_rcv); |
2646 | EXPORT_SYMBOL(tcp_v4_rebuild_header); | ||
2647 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | 2029 | EXPORT_SYMBOL(tcp_v4_remember_stamp); |
2648 | EXPORT_SYMBOL(tcp_v4_send_check); | 2030 | EXPORT_SYMBOL(tcp_v4_send_check); |
2649 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | 2031 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); |