aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c944
1 files changed, 163 insertions, 781 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 67c670886c1f..13dfb391cdf1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -64,7 +64,9 @@
64#include <linux/times.h> 64#include <linux/times.h>
65 65
66#include <net/icmp.h> 66#include <net/icmp.h>
67#include <net/inet_hashtables.h>
67#include <net/tcp.h> 68#include <net/tcp.h>
69#include <net/transp_v6.h>
68#include <net/ipv6.h> 70#include <net/ipv6.h>
69#include <net/inet_common.h> 71#include <net/inet_common.h>
70#include <net/xfrm.h> 72#include <net/xfrm.h>
@@ -75,7 +77,6 @@
75#include <linux/proc_fs.h> 77#include <linux/proc_fs.h>
76#include <linux/seq_file.h> 78#include <linux/seq_file.h>
77 79
78extern int sysctl_ip_dynaddr;
79int sysctl_tcp_tw_reuse; 80int sysctl_tcp_tw_reuse;
80int sysctl_tcp_low_latency; 81int sysctl_tcp_low_latency;
81 82
@@ -88,463 +89,29 @@ static struct socket *tcp_socket;
88void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 89void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
89 struct sk_buff *skb); 90 struct sk_buff *skb);
90 91
91struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { 92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
92 .__tcp_lhash_lock = RW_LOCK_UNLOCKED, 93 .lhash_lock = RW_LOCK_UNLOCKED,
93 .__tcp_lhash_users = ATOMIC_INIT(0), 94 .lhash_users = ATOMIC_INIT(0),
94 .__tcp_lhash_wait 95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
95 = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait), 96 .portalloc_lock = SPIN_LOCK_UNLOCKED,
96 .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED 97 .port_rover = 1024 - 1,
97}; 98};
98 99
99/*
100 * This array holds the first and last local port number.
101 * For high-usage systems, use sysctl to change this to
102 * 32768-61000
103 */
104int sysctl_local_port_range[2] = { 1024, 4999 };
105int tcp_port_rover = 1024 - 1;
106
107static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
108 __u32 faddr, __u16 fport)
109{
110 int h = (laddr ^ lport) ^ (faddr ^ fport);
111 h ^= h >> 16;
112 h ^= h >> 8;
113 return h & (tcp_ehash_size - 1);
114}
115
116static __inline__ int tcp_sk_hashfn(struct sock *sk)
117{
118 struct inet_sock *inet = inet_sk(sk);
119 __u32 laddr = inet->rcv_saddr;
120 __u16 lport = inet->num;
121 __u32 faddr = inet->daddr;
122 __u16 fport = inet->dport;
123
124 return tcp_hashfn(laddr, lport, faddr, fport);
125}
126
127/* Allocate and initialize a new TCP local port bind bucket.
128 * The bindhash mutex for snum's hash chain must be held here.
129 */
130struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
131 unsigned short snum)
132{
133 struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
134 SLAB_ATOMIC);
135 if (tb) {
136 tb->port = snum;
137 tb->fastreuse = 0;
138 INIT_HLIST_HEAD(&tb->owners);
139 hlist_add_head(&tb->node, &head->chain);
140 }
141 return tb;
142}
143
144/* Caller must hold hashbucket lock for this tb with local BH disabled */
145void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
146{
147 if (hlist_empty(&tb->owners)) {
148 __hlist_del(&tb->node);
149 kmem_cache_free(tcp_bucket_cachep, tb);
150 }
151}
152
153/* Caller must disable local BH processing. */
154static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
155{
156 struct tcp_bind_hashbucket *head =
157 &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
158 struct tcp_bind_bucket *tb;
159
160 spin_lock(&head->lock);
161 tb = tcp_sk(sk)->bind_hash;
162 sk_add_bind_node(child, &tb->owners);
163 tcp_sk(child)->bind_hash = tb;
164 spin_unlock(&head->lock);
165}
166
167inline void tcp_inherit_port(struct sock *sk, struct sock *child)
168{
169 local_bh_disable();
170 __tcp_inherit_port(sk, child);
171 local_bh_enable();
172}
173
174void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
175 unsigned short snum)
176{
177 inet_sk(sk)->num = snum;
178 sk_add_bind_node(sk, &tb->owners);
179 tcp_sk(sk)->bind_hash = tb;
180}
181
182static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
183{
184 const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
185 struct sock *sk2;
186 struct hlist_node *node;
187 int reuse = sk->sk_reuse;
188
189 sk_for_each_bound(sk2, node, &tb->owners) {
190 if (sk != sk2 &&
191 !tcp_v6_ipv6only(sk2) &&
192 (!sk->sk_bound_dev_if ||
193 !sk2->sk_bound_dev_if ||
194 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
195 if (!reuse || !sk2->sk_reuse ||
196 sk2->sk_state == TCP_LISTEN) {
197 const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
198 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
199 sk2_rcv_saddr == sk_rcv_saddr)
200 break;
201 }
202 }
203 }
204 return node != NULL;
205}
206
207/* Obtain a reference to a local port for the given sock,
208 * if snum is zero it means select any available local port.
209 */
210static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 100static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
211{ 101{
212 struct tcp_bind_hashbucket *head; 102 return inet_csk_get_port(&tcp_hashinfo, sk, snum);
213 struct hlist_node *node;
214 struct tcp_bind_bucket *tb;
215 int ret;
216
217 local_bh_disable();
218 if (!snum) {
219 int low = sysctl_local_port_range[0];
220 int high = sysctl_local_port_range[1];
221 int remaining = (high - low) + 1;
222 int rover;
223
224 spin_lock(&tcp_portalloc_lock);
225 if (tcp_port_rover < low)
226 rover = low;
227 else
228 rover = tcp_port_rover;
229 do {
230 rover++;
231 if (rover > high)
232 rover = low;
233 head = &tcp_bhash[tcp_bhashfn(rover)];
234 spin_lock(&head->lock);
235 tb_for_each(tb, node, &head->chain)
236 if (tb->port == rover)
237 goto next;
238 break;
239 next:
240 spin_unlock(&head->lock);
241 } while (--remaining > 0);
242 tcp_port_rover = rover;
243 spin_unlock(&tcp_portalloc_lock);
244
245 /* Exhausted local port range during search? It is not
246 * possible for us to be holding one of the bind hash
247 * locks if this test triggers, because if 'remaining'
248 * drops to zero, we broke out of the do/while loop at
249 * the top level, not from the 'break;' statement.
250 */
251 ret = 1;
252 if (unlikely(remaining <= 0))
253 goto fail;
254
255 /* OK, here is the one we will use. HEAD is
256 * non-NULL and we hold it's mutex.
257 */
258 snum = rover;
259 } else {
260 head = &tcp_bhash[tcp_bhashfn(snum)];
261 spin_lock(&head->lock);
262 tb_for_each(tb, node, &head->chain)
263 if (tb->port == snum)
264 goto tb_found;
265 }
266 tb = NULL;
267 goto tb_not_found;
268tb_found:
269 if (!hlist_empty(&tb->owners)) {
270 if (sk->sk_reuse > 1)
271 goto success;
272 if (tb->fastreuse > 0 &&
273 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
274 goto success;
275 } else {
276 ret = 1;
277 if (tcp_bind_conflict(sk, tb))
278 goto fail_unlock;
279 }
280 }
281tb_not_found:
282 ret = 1;
283 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
284 goto fail_unlock;
285 if (hlist_empty(&tb->owners)) {
286 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
287 tb->fastreuse = 1;
288 else
289 tb->fastreuse = 0;
290 } else if (tb->fastreuse &&
291 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
292 tb->fastreuse = 0;
293success:
294 if (!tcp_sk(sk)->bind_hash)
295 tcp_bind_hash(sk, tb, snum);
296 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
297 ret = 0;
298
299fail_unlock:
300 spin_unlock(&head->lock);
301fail:
302 local_bh_enable();
303 return ret;
304}
305
306/* Get rid of any references to a local port held by the
307 * given sock.
308 */
309static void __tcp_put_port(struct sock *sk)
310{
311 struct inet_sock *inet = inet_sk(sk);
312 struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
313 struct tcp_bind_bucket *tb;
314
315 spin_lock(&head->lock);
316 tb = tcp_sk(sk)->bind_hash;
317 __sk_del_bind_node(sk);
318 tcp_sk(sk)->bind_hash = NULL;
319 inet->num = 0;
320 tcp_bucket_destroy(tb);
321 spin_unlock(&head->lock);
322}
323
324void tcp_put_port(struct sock *sk)
325{
326 local_bh_disable();
327 __tcp_put_port(sk);
328 local_bh_enable();
329}
330
331/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
332 * Look, when several writers sleep and reader wakes them up, all but one
333 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
334 * this, _but_ remember, it adds useless work on UP machines (wake up each
335 * exclusive lock release). It should be ifdefed really.
336 */
337
338void tcp_listen_wlock(void)
339{
340 write_lock(&tcp_lhash_lock);
341
342 if (atomic_read(&tcp_lhash_users)) {
343 DEFINE_WAIT(wait);
344
345 for (;;) {
346 prepare_to_wait_exclusive(&tcp_lhash_wait,
347 &wait, TASK_UNINTERRUPTIBLE);
348 if (!atomic_read(&tcp_lhash_users))
349 break;
350 write_unlock_bh(&tcp_lhash_lock);
351 schedule();
352 write_lock_bh(&tcp_lhash_lock);
353 }
354
355 finish_wait(&tcp_lhash_wait, &wait);
356 }
357}
358
359static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
360{
361 struct hlist_head *list;
362 rwlock_t *lock;
363
364 BUG_TRAP(sk_unhashed(sk));
365 if (listen_possible && sk->sk_state == TCP_LISTEN) {
366 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
367 lock = &tcp_lhash_lock;
368 tcp_listen_wlock();
369 } else {
370 list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
371 lock = &tcp_ehash[sk->sk_hashent].lock;
372 write_lock(lock);
373 }
374 __sk_add_node(sk, list);
375 sock_prot_inc_use(sk->sk_prot);
376 write_unlock(lock);
377 if (listen_possible && sk->sk_state == TCP_LISTEN)
378 wake_up(&tcp_lhash_wait);
379} 103}
380 104
381static void tcp_v4_hash(struct sock *sk) 105static void tcp_v4_hash(struct sock *sk)
382{ 106{
383 if (sk->sk_state != TCP_CLOSE) { 107 inet_hash(&tcp_hashinfo, sk);
384 local_bh_disable();
385 __tcp_v4_hash(sk, 1);
386 local_bh_enable();
387 }
388} 108}
389 109
390void tcp_unhash(struct sock *sk) 110void tcp_unhash(struct sock *sk)
391{ 111{
392 rwlock_t *lock; 112 inet_unhash(&tcp_hashinfo, sk);
393
394 if (sk_unhashed(sk))
395 goto ende;
396
397 if (sk->sk_state == TCP_LISTEN) {
398 local_bh_disable();
399 tcp_listen_wlock();
400 lock = &tcp_lhash_lock;
401 } else {
402 struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
403 lock = &head->lock;
404 write_lock_bh(&head->lock);
405 }
406
407 if (__sk_del_node_init(sk))
408 sock_prot_dec_use(sk->sk_prot);
409 write_unlock_bh(lock);
410
411 ende:
412 if (sk->sk_state == TCP_LISTEN)
413 wake_up(&tcp_lhash_wait);
414}
415
416/* Don't inline this cruft. Here are some nice properties to
417 * exploit here. The BSD API does not allow a listening TCP
418 * to specify the remote port nor the remote address for the
419 * connection. So always assume those are both wildcarded
420 * during the search since they can never be otherwise.
421 */
422static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
423 unsigned short hnum, int dif)
424{
425 struct sock *result = NULL, *sk;
426 struct hlist_node *node;
427 int score, hiscore;
428
429 hiscore=-1;
430 sk_for_each(sk, node, head) {
431 struct inet_sock *inet = inet_sk(sk);
432
433 if (inet->num == hnum && !ipv6_only_sock(sk)) {
434 __u32 rcv_saddr = inet->rcv_saddr;
435
436 score = (sk->sk_family == PF_INET ? 1 : 0);
437 if (rcv_saddr) {
438 if (rcv_saddr != daddr)
439 continue;
440 score+=2;
441 }
442 if (sk->sk_bound_dev_if) {
443 if (sk->sk_bound_dev_if != dif)
444 continue;
445 score+=2;
446 }
447 if (score == 5)
448 return sk;
449 if (score > hiscore) {
450 hiscore = score;
451 result = sk;
452 }
453 }
454 }
455 return result;
456}
457
458/* Optimize the common listener case. */
459static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
460 unsigned short hnum, int dif)
461{
462 struct sock *sk = NULL;
463 struct hlist_head *head;
464
465 read_lock(&tcp_lhash_lock);
466 head = &tcp_listening_hash[tcp_lhashfn(hnum)];
467 if (!hlist_empty(head)) {
468 struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
469
470 if (inet->num == hnum && !sk->sk_node.next &&
471 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
472 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
473 !sk->sk_bound_dev_if)
474 goto sherry_cache;
475 sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
476 }
477 if (sk) {
478sherry_cache:
479 sock_hold(sk);
480 }
481 read_unlock(&tcp_lhash_lock);
482 return sk;
483} 113}
484 114
485/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
486 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
487 *
488 * Local BH must be disabled here.
489 */
490
491static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
492 u32 daddr, u16 hnum,
493 int dif)
494{
495 struct tcp_ehash_bucket *head;
496 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
497 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
498 struct sock *sk;
499 struct hlist_node *node;
500 /* Optimize here for direct hit, only listening connections can
501 * have wildcards anyways.
502 */
503 int hash = tcp_hashfn(daddr, hnum, saddr, sport);
504 head = &tcp_ehash[hash];
505 read_lock(&head->lock);
506 sk_for_each(sk, node, &head->chain) {
507 if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
508 goto hit; /* You sunk my battleship! */
509 }
510
511 /* Must check for a TIME_WAIT'er before going to listener hash. */
512 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
513 if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
514 goto hit;
515 }
516 sk = NULL;
517out:
518 read_unlock(&head->lock);
519 return sk;
520hit:
521 sock_hold(sk);
522 goto out;
523}
524
525static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
526 u32 daddr, u16 hnum, int dif)
527{
528 struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
529 daddr, hnum, dif);
530
531 return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
532}
533
534inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
535 u16 dport, int dif)
536{
537 struct sock *sk;
538
539 local_bh_disable();
540 sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
541 local_bh_enable();
542
543 return sk;
544}
545
546EXPORT_SYMBOL_GPL(tcp_v4_lookup);
547
548static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) 115static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
549{ 116{
550 return secure_tcp_sequence_number(skb->nh.iph->daddr, 117 return secure_tcp_sequence_number(skb->nh.iph->daddr,
@@ -555,27 +122,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
555 122
556/* called with local bh disabled */ 123/* called with local bh disabled */
557static int __tcp_v4_check_established(struct sock *sk, __u16 lport, 124static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
558 struct tcp_tw_bucket **twp) 125 struct inet_timewait_sock **twp)
559{ 126{
560 struct inet_sock *inet = inet_sk(sk); 127 struct inet_sock *inet = inet_sk(sk);
561 u32 daddr = inet->rcv_saddr; 128 u32 daddr = inet->rcv_saddr;
562 u32 saddr = inet->daddr; 129 u32 saddr = inet->daddr;
563 int dif = sk->sk_bound_dev_if; 130 int dif = sk->sk_bound_dev_if;
564 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) 131 INET_ADDR_COOKIE(acookie, saddr, daddr)
565 __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); 132 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
566 int hash = tcp_hashfn(daddr, lport, saddr, inet->dport); 133 const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
567 struct tcp_ehash_bucket *head = &tcp_ehash[hash]; 134 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
568 struct sock *sk2; 135 struct sock *sk2;
569 struct hlist_node *node; 136 const struct hlist_node *node;
570 struct tcp_tw_bucket *tw; 137 struct inet_timewait_sock *tw;
571 138
572 write_lock(&head->lock); 139 write_lock(&head->lock);
573 140
574 /* Check TIME-WAIT sockets first. */ 141 /* Check TIME-WAIT sockets first. */
575 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { 142 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
576 tw = (struct tcp_tw_bucket *)sk2; 143 tw = inet_twsk(sk2);
577 144
578 if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { 145 if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
146 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
579 struct tcp_sock *tp = tcp_sk(sk); 147 struct tcp_sock *tp = tcp_sk(sk);
580 148
581 /* With PAWS, it is safe from the viewpoint 149 /* With PAWS, it is safe from the viewpoint
@@ -592,15 +160,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
592 fall back to VJ's scheme and use initial 160 fall back to VJ's scheme and use initial
593 timestamp retrieved from peer table. 161 timestamp retrieved from peer table.
594 */ 162 */
595 if (tw->tw_ts_recent_stamp && 163 if (tcptw->tw_ts_recent_stamp &&
596 (!twp || (sysctl_tcp_tw_reuse && 164 (!twp || (sysctl_tcp_tw_reuse &&
597 xtime.tv_sec - 165 xtime.tv_sec -
598 tw->tw_ts_recent_stamp > 1))) { 166 tcptw->tw_ts_recent_stamp > 1))) {
599 if ((tp->write_seq = 167 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
600 tw->tw_snd_nxt + 65535 + 2) == 0) 168 if (tp->write_seq == 0)
601 tp->write_seq = 1; 169 tp->write_seq = 1;
602 tp->rx_opt.ts_recent = tw->tw_ts_recent; 170 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
603 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; 171 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
604 sock_hold(sk2); 172 sock_hold(sk2);
605 goto unique; 173 goto unique;
606 } else 174 } else
@@ -611,7 +179,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
611 179
612 /* And established part... */ 180 /* And established part... */
613 sk_for_each(sk2, node, &head->chain) { 181 sk_for_each(sk2, node, &head->chain) {
614 if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) 182 if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
615 goto not_unique; 183 goto not_unique;
616 } 184 }
617 185
@@ -631,10 +199,10 @@ unique:
631 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); 199 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
632 } else if (tw) { 200 } else if (tw) {
633 /* Silly. Should hash-dance instead... */ 201 /* Silly. Should hash-dance instead... */
634 tcp_tw_deschedule(tw); 202 inet_twsk_deschedule(tw, &tcp_death_row);
635 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); 203 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
636 204
637 tcp_tw_put(tw); 205 inet_twsk_put(tw);
638 } 206 }
639 207
640 return 0; 208 return 0;
@@ -657,9 +225,9 @@ static inline u32 connect_port_offset(const struct sock *sk)
657 */ 225 */
658static inline int tcp_v4_hash_connect(struct sock *sk) 226static inline int tcp_v4_hash_connect(struct sock *sk)
659{ 227{
660 unsigned short snum = inet_sk(sk)->num; 228 const unsigned short snum = inet_sk(sk)->num;
661 struct tcp_bind_hashbucket *head; 229 struct inet_bind_hashbucket *head;
662 struct tcp_bind_bucket *tb; 230 struct inet_bind_bucket *tb;
663 int ret; 231 int ret;
664 232
665 if (!snum) { 233 if (!snum) {
@@ -671,19 +239,19 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
671 static u32 hint; 239 static u32 hint;
672 u32 offset = hint + connect_port_offset(sk); 240 u32 offset = hint + connect_port_offset(sk);
673 struct hlist_node *node; 241 struct hlist_node *node;
674 struct tcp_tw_bucket *tw = NULL; 242 struct inet_timewait_sock *tw = NULL;
675 243
676 local_bh_disable(); 244 local_bh_disable();
677 for (i = 1; i <= range; i++) { 245 for (i = 1; i <= range; i++) {
678 port = low + (i + offset) % range; 246 port = low + (i + offset) % range;
679 head = &tcp_bhash[tcp_bhashfn(port)]; 247 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
680 spin_lock(&head->lock); 248 spin_lock(&head->lock);
681 249
682 /* Does not bother with rcv_saddr checks, 250 /* Does not bother with rcv_saddr checks,
683 * because the established check is already 251 * because the established check is already
684 * unique enough. 252 * unique enough.
685 */ 253 */
686 tb_for_each(tb, node, &head->chain) { 254 inet_bind_bucket_for_each(tb, node, &head->chain) {
687 if (tb->port == port) { 255 if (tb->port == port) {
688 BUG_TRAP(!hlist_empty(&tb->owners)); 256 BUG_TRAP(!hlist_empty(&tb->owners));
689 if (tb->fastreuse >= 0) 257 if (tb->fastreuse >= 0)
@@ -696,7 +264,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
696 } 264 }
697 } 265 }
698 266
699 tb = tcp_bucket_create(head, port); 267 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
700 if (!tb) { 268 if (!tb) {
701 spin_unlock(&head->lock); 269 spin_unlock(&head->lock);
702 break; 270 break;
@@ -715,27 +283,27 @@ ok:
715 hint += i; 283 hint += i;
716 284
717 /* Head lock still held and bh's disabled */ 285 /* Head lock still held and bh's disabled */
718 tcp_bind_hash(sk, tb, port); 286 inet_bind_hash(sk, tb, port);
719 if (sk_unhashed(sk)) { 287 if (sk_unhashed(sk)) {
720 inet_sk(sk)->sport = htons(port); 288 inet_sk(sk)->sport = htons(port);
721 __tcp_v4_hash(sk, 0); 289 __inet_hash(&tcp_hashinfo, sk, 0);
722 } 290 }
723 spin_unlock(&head->lock); 291 spin_unlock(&head->lock);
724 292
725 if (tw) { 293 if (tw) {
726 tcp_tw_deschedule(tw); 294 inet_twsk_deschedule(tw, &tcp_death_row);;
727 tcp_tw_put(tw); 295 inet_twsk_put(tw);
728 } 296 }
729 297
730 ret = 0; 298 ret = 0;
731 goto out; 299 goto out;
732 } 300 }
733 301
734 head = &tcp_bhash[tcp_bhashfn(snum)]; 302 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
735 tb = tcp_sk(sk)->bind_hash; 303 tb = inet_csk(sk)->icsk_bind_hash;
736 spin_lock_bh(&head->lock); 304 spin_lock_bh(&head->lock);
737 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 305 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
738 __tcp_v4_hash(sk, 0); 306 __inet_hash(&tcp_hashinfo, sk, 0);
739 spin_unlock_bh(&head->lock); 307 spin_unlock_bh(&head->lock);
740 return 0; 308 return 0;
741 } else { 309 } else {
@@ -798,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
798 tp->write_seq = 0; 366 tp->write_seq = 0;
799 } 367 }
800 368
801 if (sysctl_tcp_tw_recycle && 369 if (tcp_death_row.sysctl_tw_recycle &&
802 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 370 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
803 struct inet_peer *peer = rt_get_peer(rt); 371 struct inet_peer *peer = rt_get_peer(rt);
804 372
@@ -837,8 +405,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
837 goto failure; 405 goto failure;
838 406
839 /* OK, now commit destination to socket. */ 407 /* OK, now commit destination to socket. */
840 __sk_dst_set(sk, &rt->u.dst); 408 sk_setup_caps(sk, &rt->u.dst);
841 tcp_v4_setup_caps(sk, &rt->u.dst);
842 409
843 if (!tp->write_seq) 410 if (!tp->write_seq)
844 tp->write_seq = secure_tcp_sequence_number(inet->saddr, 411 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
@@ -864,53 +431,6 @@ failure:
864 return err; 431 return err;
865} 432}
866 433
867static __inline__ int tcp_v4_iif(struct sk_buff *skb)
868{
869 return ((struct rtable *)skb->dst)->rt_iif;
870}
871
872static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
873{
874 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
875}
876
877static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
878 struct request_sock ***prevp,
879 __u16 rport,
880 __u32 raddr, __u32 laddr)
881{
882 struct listen_sock *lopt = tp->accept_queue.listen_opt;
883 struct request_sock *req, **prev;
884
885 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
886 (req = *prev) != NULL;
887 prev = &req->dl_next) {
888 const struct inet_request_sock *ireq = inet_rsk(req);
889
890 if (ireq->rmt_port == rport &&
891 ireq->rmt_addr == raddr &&
892 ireq->loc_addr == laddr &&
893 TCP_INET_FAMILY(req->rsk_ops->family)) {
894 BUG_TRAP(!req->sk);
895 *prevp = prev;
896 break;
897 }
898 }
899
900 return req;
901}
902
903static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
904{
905 struct tcp_sock *tp = tcp_sk(sk);
906 struct listen_sock *lopt = tp->accept_queue.listen_opt;
907 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
908
909 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
910 tcp_synq_added(sk);
911}
912
913
914/* 434/*
915 * This routine does path mtu discovery as defined in RFC1191. 435 * This routine does path mtu discovery as defined in RFC1191.
916 */ 436 */
@@ -993,14 +513,14 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
993 return; 513 return;
994 } 514 }
995 515
996 sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, 516 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
997 th->source, tcp_v4_iif(skb)); 517 th->source, inet_iif(skb));
998 if (!sk) { 518 if (!sk) {
999 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 519 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
1000 return; 520 return;
1001 } 521 }
1002 if (sk->sk_state == TCP_TIME_WAIT) { 522 if (sk->sk_state == TCP_TIME_WAIT) {
1003 tcp_tw_put((struct tcp_tw_bucket *)sk); 523 inet_twsk_put((struct inet_timewait_sock *)sk);
1004 return; 524 return;
1005 } 525 }
1006 526
@@ -1054,8 +574,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1054 if (sock_owned_by_user(sk)) 574 if (sock_owned_by_user(sk))
1055 goto out; 575 goto out;
1056 576
1057 req = tcp_v4_search_req(tp, &prev, th->dest, 577 req = inet_csk_search_req(sk, &prev, th->dest,
1058 iph->daddr, iph->saddr); 578 iph->daddr, iph->saddr);
1059 if (!req) 579 if (!req)
1060 goto out; 580 goto out;
1061 581
@@ -1075,7 +595,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1075 * created socket, and POSIX does not want network 595 * created socket, and POSIX does not want network
1076 * errors returned from accept(). 596 * errors returned from accept().
1077 */ 597 */
1078 tcp_synq_drop(sk, req, prev); 598 inet_csk_reqsk_queue_drop(sk, req, prev);
1079 goto out; 599 goto out;
1080 600
1081 case TCP_SYN_SENT: 601 case TCP_SYN_SENT:
@@ -1245,12 +765,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
1245 765
1246static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 766static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1247{ 767{
1248 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; 768 struct inet_timewait_sock *tw = inet_twsk(sk);
769 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1249 770
1250 tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, 771 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1251 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); 772 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
1252 773
1253 tcp_tw_put(tw); 774 inet_twsk_put(tw);
1254} 775}
1255 776
1256static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) 777static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
@@ -1259,36 +780,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1259 req->ts_recent); 780 req->ts_recent);
1260} 781}
1261 782
1262static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1263 struct request_sock *req)
1264{
1265 struct rtable *rt;
1266 const struct inet_request_sock *ireq = inet_rsk(req);
1267 struct ip_options *opt = inet_rsk(req)->opt;
1268 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1269 .nl_u = { .ip4_u =
1270 { .daddr = ((opt && opt->srr) ?
1271 opt->faddr :
1272 ireq->rmt_addr),
1273 .saddr = ireq->loc_addr,
1274 .tos = RT_CONN_FLAGS(sk) } },
1275 .proto = IPPROTO_TCP,
1276 .uli_u = { .ports =
1277 { .sport = inet_sk(sk)->sport,
1278 .dport = ireq->rmt_port } } };
1279
1280 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1281 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1282 return NULL;
1283 }
1284 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1285 ip_rt_put(rt);
1286 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1287 return NULL;
1288 }
1289 return &rt->u.dst;
1290}
1291
1292/* 783/*
1293 * Send a SYN-ACK after having received an ACK. 784 * Send a SYN-ACK after having received an ACK.
1294 * This still operates on a request_sock only, not on a big 785 * This still operates on a request_sock only, not on a big
@@ -1302,7 +793,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1302 struct sk_buff * skb; 793 struct sk_buff * skb;
1303 794
1304 /* First, grab a route. */ 795 /* First, grab a route. */
1305 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) 796 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1306 goto out; 797 goto out;
1307 798
1308 skb = tcp_make_synack(sk, dst, req); 799 skb = tcp_make_synack(sk, dst, req);
@@ -1404,7 +895,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1404 * limitations, they conserve resources and peer is 895 * limitations, they conserve resources and peer is
1405 * evidently real one. 896 * evidently real one.
1406 */ 897 */
1407 if (tcp_synq_is_full(sk) && !isn) { 898 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1408#ifdef CONFIG_SYN_COOKIES 899#ifdef CONFIG_SYN_COOKIES
1409 if (sysctl_tcp_syncookies) { 900 if (sysctl_tcp_syncookies) {
1410 want_cookie = 1; 901 want_cookie = 1;
@@ -1418,7 +909,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1418 * clogging syn queue with openreqs with exponentially increasing 909 * clogging syn queue with openreqs with exponentially increasing
1419 * timeout. 910 * timeout.
1420 */ 911 */
1421 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 912 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1422 goto drop; 913 goto drop;
1423 914
1424 req = reqsk_alloc(&tcp_request_sock_ops); 915 req = reqsk_alloc(&tcp_request_sock_ops);
@@ -1474,8 +965,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1474 * are made in the function processing timewait state. 965 * are made in the function processing timewait state.
1475 */ 966 */
1476 if (tmp_opt.saw_tstamp && 967 if (tmp_opt.saw_tstamp &&
1477 sysctl_tcp_tw_recycle && 968 tcp_death_row.sysctl_tw_recycle &&
1478 (dst = tcp_v4_route_req(sk, req)) != NULL && 969 (dst = inet_csk_route_req(sk, req)) != NULL &&
1479 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 970 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1480 peer->v4daddr == saddr) { 971 peer->v4daddr == saddr) {
1481 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && 972 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
@@ -1488,7 +979,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1488 } 979 }
1489 /* Kill the following clause, if you dislike this way. */ 980 /* Kill the following clause, if you dislike this way. */
1490 else if (!sysctl_tcp_syncookies && 981 else if (!sysctl_tcp_syncookies &&
1491 (sysctl_max_syn_backlog - tcp_synq_len(sk) < 982 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1492 (sysctl_max_syn_backlog >> 2)) && 983 (sysctl_max_syn_backlog >> 2)) &&
1493 (!peer || !peer->tcp_ts_stamp) && 984 (!peer || !peer->tcp_ts_stamp) &&
1494 (!dst || !dst_metric(dst, RTAX_RTT))) { 985 (!dst || !dst_metric(dst, RTAX_RTT))) {
@@ -1499,11 +990,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1499 * to destinations, already remembered 990 * to destinations, already remembered
1500 * to the moment of synflood. 991 * to the moment of synflood.
1501 */ 992 */
1502 LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open " 993 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1503 "request from %u.%u." 994 "request from %u.%u.%u.%u/%u\n",
1504 "%u.%u/%u\n", 995 NIPQUAD(saddr),
1505 NIPQUAD(saddr), 996 ntohs(skb->h.th->source));
1506 ntohs(skb->h.th->source)));
1507 dst_release(dst); 997 dst_release(dst);
1508 goto drop_and_free; 998 goto drop_and_free;
1509 } 999 }
@@ -1518,7 +1008,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1518 if (want_cookie) { 1008 if (want_cookie) {
1519 reqsk_free(req); 1009 reqsk_free(req);
1520 } else { 1010 } else {
1521 tcp_v4_synq_add(sk, req); 1011 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1522 } 1012 }
1523 return 0; 1013 return 0;
1524 1014
@@ -1546,15 +1036,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1546 if (sk_acceptq_is_full(sk)) 1036 if (sk_acceptq_is_full(sk))
1547 goto exit_overflow; 1037 goto exit_overflow;
1548 1038
1549 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) 1039 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1550 goto exit; 1040 goto exit;
1551 1041
1552 newsk = tcp_create_openreq_child(sk, req, skb); 1042 newsk = tcp_create_openreq_child(sk, req, skb);
1553 if (!newsk) 1043 if (!newsk)
1554 goto exit; 1044 goto exit;
1555 1045
1556 newsk->sk_dst_cache = dst; 1046 sk_setup_caps(newsk, dst);
1557 tcp_v4_setup_caps(newsk, dst);
1558 1047
1559 newtp = tcp_sk(newsk); 1048 newtp = tcp_sk(newsk);
1560 newinet = inet_sk(newsk); 1049 newinet = inet_sk(newsk);
@@ -1564,7 +1053,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1564 newinet->saddr = ireq->loc_addr; 1053 newinet->saddr = ireq->loc_addr;
1565 newinet->opt = ireq->opt; 1054 newinet->opt = ireq->opt;
1566 ireq->opt = NULL; 1055 ireq->opt = NULL;
1567 newinet->mc_index = tcp_v4_iif(skb); 1056 newinet->mc_index = inet_iif(skb);
1568 newinet->mc_ttl = skb->nh.iph->ttl; 1057 newinet->mc_ttl = skb->nh.iph->ttl;
1569 newtp->ext_header_len = 0; 1058 newtp->ext_header_len = 0;
1570 if (newinet->opt) 1059 if (newinet->opt)
@@ -1575,8 +1064,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1575 newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 1064 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1576 tcp_initialize_rcv_mss(newsk); 1065 tcp_initialize_rcv_mss(newsk);
1577 1066
1578 __tcp_v4_hash(newsk, 0); 1067 __inet_hash(&tcp_hashinfo, newsk, 0);
1579 __tcp_inherit_port(sk, newsk); 1068 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1580 1069
1581 return newsk; 1070 return newsk;
1582 1071
@@ -1592,27 +1081,24 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1592{ 1081{
1593 struct tcphdr *th = skb->h.th; 1082 struct tcphdr *th = skb->h.th;
1594 struct iphdr *iph = skb->nh.iph; 1083 struct iphdr *iph = skb->nh.iph;
1595 struct tcp_sock *tp = tcp_sk(sk);
1596 struct sock *nsk; 1084 struct sock *nsk;
1597 struct request_sock **prev; 1085 struct request_sock **prev;
1598 /* Find possible connection requests. */ 1086 /* Find possible connection requests. */
1599 struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source, 1087 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1600 iph->saddr, iph->daddr); 1088 iph->saddr, iph->daddr);
1601 if (req) 1089 if (req)
1602 return tcp_check_req(sk, skb, req, prev); 1090 return tcp_check_req(sk, skb, req, prev);
1603 1091
1604 nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr, 1092 nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1605 th->source, 1093 th->source, skb->nh.iph->daddr,
1606 skb->nh.iph->daddr, 1094 ntohs(th->dest), inet_iif(skb));
1607 ntohs(th->dest),
1608 tcp_v4_iif(skb));
1609 1095
1610 if (nsk) { 1096 if (nsk) {
1611 if (nsk->sk_state != TCP_TIME_WAIT) { 1097 if (nsk->sk_state != TCP_TIME_WAIT) {
1612 bh_lock_sock(nsk); 1098 bh_lock_sock(nsk);
1613 return nsk; 1099 return nsk;
1614 } 1100 }
1615 tcp_tw_put((struct tcp_tw_bucket *)nsk); 1101 inet_twsk_put((struct inet_timewait_sock *)nsk);
1616 return NULL; 1102 return NULL;
1617 } 1103 }
1618 1104
@@ -1631,7 +1117,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb)
1631 skb->nh.iph->daddr, skb->csum)) 1117 skb->nh.iph->daddr, skb->csum))
1632 return 0; 1118 return 0;
1633 1119
1634 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n")); 1120 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
1635 skb->ip_summed = CHECKSUM_NONE; 1121 skb->ip_summed = CHECKSUM_NONE;
1636 } 1122 }
1637 if (skb->len <= 76) { 1123 if (skb->len <= 76) {
@@ -1747,9 +1233,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
1747 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; 1233 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1748 TCP_SKB_CB(skb)->sacked = 0; 1234 TCP_SKB_CB(skb)->sacked = 0;
1749 1235
1750 sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, 1236 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1751 skb->nh.iph->daddr, ntohs(th->dest), 1237 skb->nh.iph->daddr, ntohs(th->dest),
1752 tcp_v4_iif(skb)); 1238 inet_iif(skb));
1753 1239
1754 if (!sk) 1240 if (!sk)
1755 goto no_tcp_socket; 1241 goto no_tcp_socket;
@@ -1801,24 +1287,26 @@ discard_and_relse:
1801 1287
1802do_time_wait: 1288do_time_wait:
1803 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1289 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1804 tcp_tw_put((struct tcp_tw_bucket *) sk); 1290 inet_twsk_put((struct inet_timewait_sock *) sk);
1805 goto discard_it; 1291 goto discard_it;
1806 } 1292 }
1807 1293
1808 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1294 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1809 TCP_INC_STATS_BH(TCP_MIB_INERRS); 1295 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1810 tcp_tw_put((struct tcp_tw_bucket *) sk); 1296 inet_twsk_put((struct inet_timewait_sock *) sk);
1811 goto discard_it; 1297 goto discard_it;
1812 } 1298 }
1813 switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, 1299 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1814 skb, th, skb->len)) { 1300 skb, th)) {
1815 case TCP_TW_SYN: { 1301 case TCP_TW_SYN: {
1816 struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, 1302 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1817 ntohs(th->dest), 1303 skb->nh.iph->daddr,
1818 tcp_v4_iif(skb)); 1304 ntohs(th->dest),
1305 inet_iif(skb));
1819 if (sk2) { 1306 if (sk2) {
1820 tcp_tw_deschedule((struct tcp_tw_bucket *)sk); 1307 inet_twsk_deschedule((struct inet_timewait_sock *)sk,
1821 tcp_tw_put((struct tcp_tw_bucket *)sk); 1308 &tcp_death_row);
1309 inet_twsk_put((struct inet_timewait_sock *)sk);
1822 sk = sk2; 1310 sk = sk2;
1823 goto process; 1311 goto process;
1824 } 1312 }
@@ -1834,112 +1322,6 @@ do_time_wait:
1834 goto discard_it; 1322 goto discard_it;
1835} 1323}
1836 1324
1837/* With per-bucket locks this operation is not-atomic, so that
1838 * this version is not worse.
1839 */
1840static void __tcp_v4_rehash(struct sock *sk)
1841{
1842 sk->sk_prot->unhash(sk);
1843 sk->sk_prot->hash(sk);
1844}
1845
1846static int tcp_v4_reselect_saddr(struct sock *sk)
1847{
1848 struct inet_sock *inet = inet_sk(sk);
1849 int err;
1850 struct rtable *rt;
1851 __u32 old_saddr = inet->saddr;
1852 __u32 new_saddr;
1853 __u32 daddr = inet->daddr;
1854
1855 if (inet->opt && inet->opt->srr)
1856 daddr = inet->opt->faddr;
1857
1858 /* Query new route. */
1859 err = ip_route_connect(&rt, daddr, 0,
1860 RT_CONN_FLAGS(sk),
1861 sk->sk_bound_dev_if,
1862 IPPROTO_TCP,
1863 inet->sport, inet->dport, sk);
1864 if (err)
1865 return err;
1866
1867 __sk_dst_set(sk, &rt->u.dst);
1868 tcp_v4_setup_caps(sk, &rt->u.dst);
1869
1870 new_saddr = rt->rt_src;
1871
1872 if (new_saddr == old_saddr)
1873 return 0;
1874
1875 if (sysctl_ip_dynaddr > 1) {
1876 printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
1877 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1878 NIPQUAD(old_saddr),
1879 NIPQUAD(new_saddr));
1880 }
1881
1882 inet->saddr = new_saddr;
1883 inet->rcv_saddr = new_saddr;
1884
1885 /* XXX The only one ugly spot where we need to
1886 * XXX really change the sockets identity after
1887 * XXX it has entered the hashes. -DaveM
1888 *
1889 * Besides that, it does not check for connection
1890 * uniqueness. Wait for troubles.
1891 */
1892 __tcp_v4_rehash(sk);
1893 return 0;
1894}
1895
1896int tcp_v4_rebuild_header(struct sock *sk)
1897{
1898 struct inet_sock *inet = inet_sk(sk);
1899 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1900 u32 daddr;
1901 int err;
1902
1903 /* Route is OK, nothing to do. */
1904 if (rt)
1905 return 0;
1906
1907 /* Reroute. */
1908 daddr = inet->daddr;
1909 if (inet->opt && inet->opt->srr)
1910 daddr = inet->opt->faddr;
1911
1912 {
1913 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1914 .nl_u = { .ip4_u =
1915 { .daddr = daddr,
1916 .saddr = inet->saddr,
1917 .tos = RT_CONN_FLAGS(sk) } },
1918 .proto = IPPROTO_TCP,
1919 .uli_u = { .ports =
1920 { .sport = inet->sport,
1921 .dport = inet->dport } } };
1922
1923 err = ip_route_output_flow(&rt, &fl, sk, 0);
1924 }
1925 if (!err) {
1926 __sk_dst_set(sk, &rt->u.dst);
1927 tcp_v4_setup_caps(sk, &rt->u.dst);
1928 return 0;
1929 }
1930
1931 /* Routing failed... */
1932 sk->sk_route_caps = 0;
1933
1934 if (!sysctl_ip_dynaddr ||
1935 sk->sk_state != TCP_SYN_SENT ||
1936 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1937 (err = tcp_v4_reselect_saddr(sk)) != 0)
1938 sk->sk_err_soft = -err;
1939
1940 return err;
1941}
1942
1943static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) 1325static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1944{ 1326{
1945 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; 1327 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
@@ -1988,18 +1370,18 @@ int tcp_v4_remember_stamp(struct sock *sk)
1988 return 0; 1370 return 0;
1989} 1371}
1990 1372
1991int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) 1373int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1992{ 1374{
1993 struct inet_peer *peer = NULL; 1375 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1994
1995 peer = inet_getpeer(tw->tw_daddr, 1);
1996 1376
1997 if (peer) { 1377 if (peer) {
1998 if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 || 1378 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1379
1380 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1999 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && 1381 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2000 peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) { 1382 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
2001 peer->tcp_ts_stamp = tw->tw_ts_recent_stamp; 1383 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
2002 peer->tcp_ts = tw->tw_ts_recent; 1384 peer->tcp_ts = tcptw->tw_ts_recent;
2003 } 1385 }
2004 inet_putpeer(peer); 1386 inet_putpeer(peer);
2005 return 1; 1387 return 1;
@@ -2011,7 +1393,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
2011struct tcp_func ipv4_specific = { 1393struct tcp_func ipv4_specific = {
2012 .queue_xmit = ip_queue_xmit, 1394 .queue_xmit = ip_queue_xmit,
2013 .send_check = tcp_v4_send_check, 1395 .send_check = tcp_v4_send_check,
2014 .rebuild_header = tcp_v4_rebuild_header, 1396 .rebuild_header = inet_sk_rebuild_header,
2015 .conn_request = tcp_v4_conn_request, 1397 .conn_request = tcp_v4_conn_request,
2016 .syn_recv_sock = tcp_v4_syn_recv_sock, 1398 .syn_recv_sock = tcp_v4_syn_recv_sock,
2017 .remember_stamp = tcp_v4_remember_stamp, 1399 .remember_stamp = tcp_v4_remember_stamp,
@@ -2027,13 +1409,14 @@ struct tcp_func ipv4_specific = {
2027 */ 1409 */
2028static int tcp_v4_init_sock(struct sock *sk) 1410static int tcp_v4_init_sock(struct sock *sk)
2029{ 1411{
1412 struct inet_connection_sock *icsk = inet_csk(sk);
2030 struct tcp_sock *tp = tcp_sk(sk); 1413 struct tcp_sock *tp = tcp_sk(sk);
2031 1414
2032 skb_queue_head_init(&tp->out_of_order_queue); 1415 skb_queue_head_init(&tp->out_of_order_queue);
2033 tcp_init_xmit_timers(sk); 1416 tcp_init_xmit_timers(sk);
2034 tcp_prequeue_init(tp); 1417 tcp_prequeue_init(tp);
2035 1418
2036 tp->rto = TCP_TIMEOUT_INIT; 1419 icsk->icsk_rto = TCP_TIMEOUT_INIT;
2037 tp->mdev = TCP_TIMEOUT_INIT; 1420 tp->mdev = TCP_TIMEOUT_INIT;
2038 1421
2039 /* So many TCP implementations out there (incorrectly) count the 1422 /* So many TCP implementations out there (incorrectly) count the
@@ -2051,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk)
2051 tp->mss_cache = 536; 1434 tp->mss_cache = 536;
2052 1435
2053 tp->reordering = sysctl_tcp_reordering; 1436 tp->reordering = sysctl_tcp_reordering;
2054 tp->ca_ops = &tcp_init_congestion_ops; 1437 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
2055 1438
2056 sk->sk_state = TCP_CLOSE; 1439 sk->sk_state = TCP_CLOSE;
2057 1440
@@ -2074,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
2074 1457
2075 tcp_clear_xmit_timers(sk); 1458 tcp_clear_xmit_timers(sk);
2076 1459
2077 tcp_cleanup_congestion_control(tp); 1460 tcp_cleanup_congestion_control(sk);
2078 1461
2079 /* Cleanup up the write buffer. */ 1462 /* Cleanup up the write buffer. */
2080 sk_stream_writequeue_purge(sk); 1463 sk_stream_writequeue_purge(sk);
@@ -2086,8 +1469,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
2086 __skb_queue_purge(&tp->ucopy.prequeue); 1469 __skb_queue_purge(&tp->ucopy.prequeue);
2087 1470
2088 /* Clean up a referenced TCP bind bucket. */ 1471 /* Clean up a referenced TCP bind bucket. */
2089 if (tp->bind_hash) 1472 if (inet_csk(sk)->icsk_bind_hash)
2090 tcp_put_port(sk); 1473 inet_put_port(&tcp_hashinfo, sk);
2091 1474
2092 /* 1475 /*
2093 * If sendmsg cached page exists, toss it. 1476 * If sendmsg cached page exists, toss it.
@@ -2107,13 +1490,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
2107#ifdef CONFIG_PROC_FS 1490#ifdef CONFIG_PROC_FS
2108/* Proc filesystem TCP sock list dumping. */ 1491/* Proc filesystem TCP sock list dumping. */
2109 1492
2110static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) 1493static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
2111{ 1494{
2112 return hlist_empty(head) ? NULL : 1495 return hlist_empty(head) ? NULL :
2113 list_entry(head->first, struct tcp_tw_bucket, tw_node); 1496 list_entry(head->first, struct inet_timewait_sock, tw_node);
2114} 1497}
2115 1498
2116static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) 1499static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2117{ 1500{
2118 return tw->tw_node.next ? 1501 return tw->tw_node.next ?
2119 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1502 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
@@ -2121,14 +1504,14 @@ static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
2121 1504
2122static void *listening_get_next(struct seq_file *seq, void *cur) 1505static void *listening_get_next(struct seq_file *seq, void *cur)
2123{ 1506{
2124 struct tcp_sock *tp; 1507 struct inet_connection_sock *icsk;
2125 struct hlist_node *node; 1508 struct hlist_node *node;
2126 struct sock *sk = cur; 1509 struct sock *sk = cur;
2127 struct tcp_iter_state* st = seq->private; 1510 struct tcp_iter_state* st = seq->private;
2128 1511
2129 if (!sk) { 1512 if (!sk) {
2130 st->bucket = 0; 1513 st->bucket = 0;
2131 sk = sk_head(&tcp_listening_hash[0]); 1514 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
2132 goto get_sk; 1515 goto get_sk;
2133 } 1516 }
2134 1517
@@ -2137,7 +1520,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2137 if (st->state == TCP_SEQ_STATE_OPENREQ) { 1520 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2138 struct request_sock *req = cur; 1521 struct request_sock *req = cur;
2139 1522
2140 tp = tcp_sk(st->syn_wait_sk); 1523 icsk = inet_csk(st->syn_wait_sk);
2141 req = req->dl_next; 1524 req = req->dl_next;
2142 while (1) { 1525 while (1) {
2143 while (req) { 1526 while (req) {
@@ -2150,17 +1533,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2150 if (++st->sbucket >= TCP_SYNQ_HSIZE) 1533 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2151 break; 1534 break;
2152get_req: 1535get_req:
2153 req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; 1536 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2154 } 1537 }
2155 sk = sk_next(st->syn_wait_sk); 1538 sk = sk_next(st->syn_wait_sk);
2156 st->state = TCP_SEQ_STATE_LISTENING; 1539 st->state = TCP_SEQ_STATE_LISTENING;
2157 read_unlock_bh(&tp->accept_queue.syn_wait_lock); 1540 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2158 } else { 1541 } else {
2159 tp = tcp_sk(sk); 1542 icsk = inet_csk(sk);
2160 read_lock_bh(&tp->accept_queue.syn_wait_lock); 1543 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2161 if (reqsk_queue_len(&tp->accept_queue)) 1544 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2162 goto start_req; 1545 goto start_req;
2163 read_unlock_bh(&tp->accept_queue.syn_wait_lock); 1546 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2164 sk = sk_next(sk); 1547 sk = sk_next(sk);
2165 } 1548 }
2166get_sk: 1549get_sk:
@@ -2169,9 +1552,9 @@ get_sk:
2169 cur = sk; 1552 cur = sk;
2170 goto out; 1553 goto out;
2171 } 1554 }
2172 tp = tcp_sk(sk); 1555 icsk = inet_csk(sk);
2173 read_lock_bh(&tp->accept_queue.syn_wait_lock); 1556 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2174 if (reqsk_queue_len(&tp->accept_queue)) { 1557 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2175start_req: 1558start_req:
2176 st->uid = sock_i_uid(sk); 1559 st->uid = sock_i_uid(sk);
2177 st->syn_wait_sk = sk; 1560 st->syn_wait_sk = sk;
@@ -2179,10 +1562,10 @@ start_req:
2179 st->sbucket = 0; 1562 st->sbucket = 0;
2180 goto get_req; 1563 goto get_req;
2181 } 1564 }
2182 read_unlock_bh(&tp->accept_queue.syn_wait_lock); 1565 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2183 } 1566 }
2184 if (++st->bucket < TCP_LHTABLE_SIZE) { 1567 if (++st->bucket < INET_LHTABLE_SIZE) {
2185 sk = sk_head(&tcp_listening_hash[st->bucket]); 1568 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2186 goto get_sk; 1569 goto get_sk;
2187 } 1570 }
2188 cur = NULL; 1571 cur = NULL;
@@ -2206,16 +1589,16 @@ static void *established_get_first(struct seq_file *seq)
2206 struct tcp_iter_state* st = seq->private; 1589 struct tcp_iter_state* st = seq->private;
2207 void *rc = NULL; 1590 void *rc = NULL;
2208 1591
2209 for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) { 1592 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2210 struct sock *sk; 1593 struct sock *sk;
2211 struct hlist_node *node; 1594 struct hlist_node *node;
2212 struct tcp_tw_bucket *tw; 1595 struct inet_timewait_sock *tw;
2213 1596
2214 /* We can reschedule _before_ having picked the target: */ 1597 /* We can reschedule _before_ having picked the target: */
2215 cond_resched_softirq(); 1598 cond_resched_softirq();
2216 1599
2217 read_lock(&tcp_ehash[st->bucket].lock); 1600 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2218 sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { 1601 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2219 if (sk->sk_family != st->family) { 1602 if (sk->sk_family != st->family) {
2220 continue; 1603 continue;
2221 } 1604 }
@@ -2223,15 +1606,15 @@ static void *established_get_first(struct seq_file *seq)
2223 goto out; 1606 goto out;
2224 } 1607 }
2225 st->state = TCP_SEQ_STATE_TIME_WAIT; 1608 st->state = TCP_SEQ_STATE_TIME_WAIT;
2226 tw_for_each(tw, node, 1609 inet_twsk_for_each(tw, node,
2227 &tcp_ehash[st->bucket + tcp_ehash_size].chain) { 1610 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
2228 if (tw->tw_family != st->family) { 1611 if (tw->tw_family != st->family) {
2229 continue; 1612 continue;
2230 } 1613 }
2231 rc = tw; 1614 rc = tw;
2232 goto out; 1615 goto out;
2233 } 1616 }
2234 read_unlock(&tcp_ehash[st->bucket].lock); 1617 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2235 st->state = TCP_SEQ_STATE_ESTABLISHED; 1618 st->state = TCP_SEQ_STATE_ESTABLISHED;
2236 } 1619 }
2237out: 1620out:
@@ -2241,7 +1624,7 @@ out:
2241static void *established_get_next(struct seq_file *seq, void *cur) 1624static void *established_get_next(struct seq_file *seq, void *cur)
2242{ 1625{
2243 struct sock *sk = cur; 1626 struct sock *sk = cur;
2244 struct tcp_tw_bucket *tw; 1627 struct inet_timewait_sock *tw;
2245 struct hlist_node *node; 1628 struct hlist_node *node;
2246 struct tcp_iter_state* st = seq->private; 1629 struct tcp_iter_state* st = seq->private;
2247 1630
@@ -2258,15 +1641,15 @@ get_tw:
2258 cur = tw; 1641 cur = tw;
2259 goto out; 1642 goto out;
2260 } 1643 }
2261 read_unlock(&tcp_ehash[st->bucket].lock); 1644 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2262 st->state = TCP_SEQ_STATE_ESTABLISHED; 1645 st->state = TCP_SEQ_STATE_ESTABLISHED;
2263 1646
2264 /* We can reschedule between buckets: */ 1647 /* We can reschedule between buckets: */
2265 cond_resched_softirq(); 1648 cond_resched_softirq();
2266 1649
2267 if (++st->bucket < tcp_ehash_size) { 1650 if (++st->bucket < tcp_hashinfo.ehash_size) {
2268 read_lock(&tcp_ehash[st->bucket].lock); 1651 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2269 sk = sk_head(&tcp_ehash[st->bucket].chain); 1652 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2270 } else { 1653 } else {
2271 cur = NULL; 1654 cur = NULL;
2272 goto out; 1655 goto out;
@@ -2280,7 +1663,7 @@ get_tw:
2280 } 1663 }
2281 1664
2282 st->state = TCP_SEQ_STATE_TIME_WAIT; 1665 st->state = TCP_SEQ_STATE_TIME_WAIT;
2283 tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain); 1666 tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
2284 goto get_tw; 1667 goto get_tw;
2285found: 1668found:
2286 cur = sk; 1669 cur = sk;
@@ -2304,12 +1687,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2304 void *rc; 1687 void *rc;
2305 struct tcp_iter_state* st = seq->private; 1688 struct tcp_iter_state* st = seq->private;
2306 1689
2307 tcp_listen_lock(); 1690 inet_listen_lock(&tcp_hashinfo);
2308 st->state = TCP_SEQ_STATE_LISTENING; 1691 st->state = TCP_SEQ_STATE_LISTENING;
2309 rc = listening_get_idx(seq, &pos); 1692 rc = listening_get_idx(seq, &pos);
2310 1693
2311 if (!rc) { 1694 if (!rc) {
2312 tcp_listen_unlock(); 1695 inet_listen_unlock(&tcp_hashinfo);
2313 local_bh_disable(); 1696 local_bh_disable();
2314 st->state = TCP_SEQ_STATE_ESTABLISHED; 1697 st->state = TCP_SEQ_STATE_ESTABLISHED;
2315 rc = established_get_idx(seq, pos); 1698 rc = established_get_idx(seq, pos);
@@ -2342,7 +1725,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2342 case TCP_SEQ_STATE_LISTENING: 1725 case TCP_SEQ_STATE_LISTENING:
2343 rc = listening_get_next(seq, v); 1726 rc = listening_get_next(seq, v);
2344 if (!rc) { 1727 if (!rc) {
2345 tcp_listen_unlock(); 1728 inet_listen_unlock(&tcp_hashinfo);
2346 local_bh_disable(); 1729 local_bh_disable();
2347 st->state = TCP_SEQ_STATE_ESTABLISHED; 1730 st->state = TCP_SEQ_STATE_ESTABLISHED;
2348 rc = established_get_first(seq); 1731 rc = established_get_first(seq);
@@ -2365,17 +1748,17 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2365 switch (st->state) { 1748 switch (st->state) {
2366 case TCP_SEQ_STATE_OPENREQ: 1749 case TCP_SEQ_STATE_OPENREQ:
2367 if (v) { 1750 if (v) {
2368 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); 1751 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2369 read_unlock_bh(&tp->accept_queue.syn_wait_lock); 1752 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2370 } 1753 }
2371 case TCP_SEQ_STATE_LISTENING: 1754 case TCP_SEQ_STATE_LISTENING:
2372 if (v != SEQ_START_TOKEN) 1755 if (v != SEQ_START_TOKEN)
2373 tcp_listen_unlock(); 1756 inet_listen_unlock(&tcp_hashinfo);
2374 break; 1757 break;
2375 case TCP_SEQ_STATE_TIME_WAIT: 1758 case TCP_SEQ_STATE_TIME_WAIT:
2376 case TCP_SEQ_STATE_ESTABLISHED: 1759 case TCP_SEQ_STATE_ESTABLISHED:
2377 if (v) 1760 if (v)
2378 read_unlock(&tcp_ehash[st->bucket].lock); 1761 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2379 local_bh_enable(); 1762 local_bh_enable();
2380 break; 1763 break;
2381 } 1764 }
@@ -2472,18 +1855,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2472 int timer_active; 1855 int timer_active;
2473 unsigned long timer_expires; 1856 unsigned long timer_expires;
2474 struct tcp_sock *tp = tcp_sk(sp); 1857 struct tcp_sock *tp = tcp_sk(sp);
1858 const struct inet_connection_sock *icsk = inet_csk(sp);
2475 struct inet_sock *inet = inet_sk(sp); 1859 struct inet_sock *inet = inet_sk(sp);
2476 unsigned int dest = inet->daddr; 1860 unsigned int dest = inet->daddr;
2477 unsigned int src = inet->rcv_saddr; 1861 unsigned int src = inet->rcv_saddr;
2478 __u16 destp = ntohs(inet->dport); 1862 __u16 destp = ntohs(inet->dport);
2479 __u16 srcp = ntohs(inet->sport); 1863 __u16 srcp = ntohs(inet->sport);
2480 1864
2481 if (tp->pending == TCP_TIME_RETRANS) { 1865 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2482 timer_active = 1; 1866 timer_active = 1;
2483 timer_expires = tp->timeout; 1867 timer_expires = icsk->icsk_timeout;
2484 } else if (tp->pending == TCP_TIME_PROBE0) { 1868 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2485 timer_active = 4; 1869 timer_active = 4;
2486 timer_expires = tp->timeout; 1870 timer_expires = icsk->icsk_timeout;
2487 } else if (timer_pending(&sp->sk_timer)) { 1871 } else if (timer_pending(&sp->sk_timer)) {
2488 timer_active = 2; 1872 timer_active = 2;
2489 timer_expires = sp->sk_timer.expires; 1873 timer_expires = sp->sk_timer.expires;
@@ -2498,17 +1882,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2498 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, 1882 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
2499 timer_active, 1883 timer_active,
2500 jiffies_to_clock_t(timer_expires - jiffies), 1884 jiffies_to_clock_t(timer_expires - jiffies),
2501 tp->retransmits, 1885 icsk->icsk_retransmits,
2502 sock_i_uid(sp), 1886 sock_i_uid(sp),
2503 tp->probes_out, 1887 icsk->icsk_probes_out,
2504 sock_i_ino(sp), 1888 sock_i_ino(sp),
2505 atomic_read(&sp->sk_refcnt), sp, 1889 atomic_read(&sp->sk_refcnt), sp,
2506 tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong, 1890 icsk->icsk_rto,
1891 icsk->icsk_ack.ato,
1892 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2507 tp->snd_cwnd, 1893 tp->snd_cwnd,
2508 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); 1894 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2509} 1895}
2510 1896
2511static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) 1897static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
2512{ 1898{
2513 unsigned int dest, src; 1899 unsigned int dest, src;
2514 __u16 destp, srcp; 1900 __u16 destp, srcp;
@@ -2588,7 +1974,7 @@ struct proto tcp_prot = {
2588 .close = tcp_close, 1974 .close = tcp_close,
2589 .connect = tcp_v4_connect, 1975 .connect = tcp_v4_connect,
2590 .disconnect = tcp_disconnect, 1976 .disconnect = tcp_disconnect,
2591 .accept = tcp_accept, 1977 .accept = inet_csk_accept,
2592 .ioctl = tcp_ioctl, 1978 .ioctl = tcp_ioctl,
2593 .init = tcp_v4_init_sock, 1979 .init = tcp_v4_init_sock,
2594 .destroy = tcp_v4_destroy_sock, 1980 .destroy = tcp_v4_destroy_sock,
@@ -2603,6 +1989,7 @@ struct proto tcp_prot = {
2603 .get_port = tcp_v4_get_port, 1989 .get_port = tcp_v4_get_port,
2604 .enter_memory_pressure = tcp_enter_memory_pressure, 1990 .enter_memory_pressure = tcp_enter_memory_pressure,
2605 .sockets_allocated = &tcp_sockets_allocated, 1991 .sockets_allocated = &tcp_sockets_allocated,
1992 .orphan_count = &tcp_orphan_count,
2606 .memory_allocated = &tcp_memory_allocated, 1993 .memory_allocated = &tcp_memory_allocated,
2607 .memory_pressure = &tcp_memory_pressure, 1994 .memory_pressure = &tcp_memory_pressure,
2608 .sysctl_mem = sysctl_tcp_mem, 1995 .sysctl_mem = sysctl_tcp_mem,
@@ -2610,6 +1997,7 @@ struct proto tcp_prot = {
2610 .sysctl_rmem = sysctl_tcp_rmem, 1997 .sysctl_rmem = sysctl_tcp_rmem,
2611 .max_header = MAX_TCP_HEADER, 1998 .max_header = MAX_TCP_HEADER,
2612 .obj_size = sizeof(struct tcp_sock), 1999 .obj_size = sizeof(struct tcp_sock),
2000 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2613 .rsk_prot = &tcp_request_sock_ops, 2001 .rsk_prot = &tcp_request_sock_ops,
2614}; 2002};
2615 2003
@@ -2631,19 +2019,13 @@ void __init tcp_v4_init(struct net_proto_family *ops)
2631} 2019}
2632 2020
2633EXPORT_SYMBOL(ipv4_specific); 2021EXPORT_SYMBOL(ipv4_specific);
2634EXPORT_SYMBOL(tcp_bind_hash); 2022EXPORT_SYMBOL(inet_bind_bucket_create);
2635EXPORT_SYMBOL(tcp_bucket_create);
2636EXPORT_SYMBOL(tcp_hashinfo); 2023EXPORT_SYMBOL(tcp_hashinfo);
2637EXPORT_SYMBOL(tcp_inherit_port);
2638EXPORT_SYMBOL(tcp_listen_wlock);
2639EXPORT_SYMBOL(tcp_port_rover);
2640EXPORT_SYMBOL(tcp_prot); 2024EXPORT_SYMBOL(tcp_prot);
2641EXPORT_SYMBOL(tcp_put_port);
2642EXPORT_SYMBOL(tcp_unhash); 2025EXPORT_SYMBOL(tcp_unhash);
2643EXPORT_SYMBOL(tcp_v4_conn_request); 2026EXPORT_SYMBOL(tcp_v4_conn_request);
2644EXPORT_SYMBOL(tcp_v4_connect); 2027EXPORT_SYMBOL(tcp_v4_connect);
2645EXPORT_SYMBOL(tcp_v4_do_rcv); 2028EXPORT_SYMBOL(tcp_v4_do_rcv);
2646EXPORT_SYMBOL(tcp_v4_rebuild_header);
2647EXPORT_SYMBOL(tcp_v4_remember_stamp); 2029EXPORT_SYMBOL(tcp_v4_remember_stamp);
2648EXPORT_SYMBOL(tcp_v4_send_check); 2030EXPORT_SYMBOL(tcp_v4_send_check);
2649EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 2031EXPORT_SYMBOL(tcp_v4_syn_recv_sock);