aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@ghostprotocols.net>2005-08-09 23:09:30 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 18:42:13 -0400
commit8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 (patch)
treeddd004afe2f7c8295f6fdb94d34f78a42b5961cb /net/ipv4/tcp_ipv4.c
parent33b62231908c58ae04185e4f1063d1e35a7c8576 (diff)
[INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets
This paves the way to generalise the rest of the sock ID lookup routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro kernels (where IPv6 is always built as a module): [root@qemu ~]# grep tw_sock /proc/slabinfo tw_sock_TCPv6 0 0 128 31 1 tw_sock_TCP 0 0 96 41 1 [root@qemu ~]# Now if a protocol wants to use the TIME_WAIT generic infrastructure it only has to set the sk_prot->twsk_obj_size field with the size of its inet_timewait_sock derived sock and proto_register will create sk_prot->twsk_slab, for now its only for INET sockets, but we can introduce timewait_sock later if some non INET transport protocolo wants to use this stuff. Next changesets will take advantage of this new infrastructure to generalise even more TCP code. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o /tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o [acme@toy net-2.6.14]$ Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1 (qemu host)). Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c107
1 files changed, 55 insertions, 52 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a678709b36f..ce423e48ebe 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -106,7 +106,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 };
106 106
107static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) 107static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
108{ 108{
109 const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); 109 const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
110 struct sock *sk2; 110 struct sock *sk2;
111 struct hlist_node *node; 111 struct hlist_node *node;
112 int reuse = sk->sk_reuse; 112 int reuse = sk->sk_reuse;
@@ -119,7 +119,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb
119 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 119 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
120 if (!reuse || !sk2->sk_reuse || 120 if (!reuse || !sk2->sk_reuse ||
121 sk2->sk_state == TCP_LISTEN) { 121 sk2->sk_state == TCP_LISTEN) {
122 const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); 122 const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
123 if (!sk2_rcv_saddr || !sk_rcv_saddr || 123 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
124 sk2_rcv_saddr == sk_rcv_saddr) 124 sk2_rcv_saddr == sk_rcv_saddr)
125 break; 125 break;
@@ -251,10 +251,10 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr,
251 const int dif) 251 const int dif)
252{ 252{
253 struct inet_ehash_bucket *head; 253 struct inet_ehash_bucket *head;
254 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) 254 INET_ADDR_COOKIE(acookie, saddr, daddr)
255 __u32 ports = TCP_COMBINED_PORTS(sport, hnum); 255 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
256 struct sock *sk; 256 struct sock *sk;
257 struct hlist_node *node; 257 const struct hlist_node *node;
258 /* Optimize here for direct hit, only listening connections can 258 /* Optimize here for direct hit, only listening connections can
259 * have wildcards anyways. 259 * have wildcards anyways.
260 */ 260 */
@@ -262,13 +262,13 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr,
262 head = &tcp_hashinfo.ehash[hash]; 262 head = &tcp_hashinfo.ehash[hash];
263 read_lock(&head->lock); 263 read_lock(&head->lock);
264 sk_for_each(sk, node, &head->chain) { 264 sk_for_each(sk, node, &head->chain) {
265 if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) 265 if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
266 goto hit; /* You sunk my battleship! */ 266 goto hit; /* You sunk my battleship! */
267 } 267 }
268 268
269 /* Must check for a TIME_WAIT'er before going to listener hash. */ 269 /* Must check for a TIME_WAIT'er before going to listener hash. */
270 sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { 270 sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
271 if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) 271 if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
272 goto hit; 272 goto hit;
273 } 273 }
274 sk = NULL; 274 sk = NULL;
@@ -313,27 +313,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
313 313
314/* called with local bh disabled */ 314/* called with local bh disabled */
315static int __tcp_v4_check_established(struct sock *sk, __u16 lport, 315static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
316 struct tcp_tw_bucket **twp) 316 struct inet_timewait_sock **twp)
317{ 317{
318 struct inet_sock *inet = inet_sk(sk); 318 struct inet_sock *inet = inet_sk(sk);
319 u32 daddr = inet->rcv_saddr; 319 u32 daddr = inet->rcv_saddr;
320 u32 saddr = inet->daddr; 320 u32 saddr = inet->daddr;
321 int dif = sk->sk_bound_dev_if; 321 int dif = sk->sk_bound_dev_if;
322 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) 322 INET_ADDR_COOKIE(acookie, saddr, daddr)
323 __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); 323 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
324 const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); 324 const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
325 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; 325 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
326 struct sock *sk2; 326 struct sock *sk2;
327 struct hlist_node *node; 327 const struct hlist_node *node;
328 struct tcp_tw_bucket *tw; 328 struct inet_timewait_sock *tw;
329 329
330 write_lock(&head->lock); 330 write_lock(&head->lock);
331 331
332 /* Check TIME-WAIT sockets first. */ 332 /* Check TIME-WAIT sockets first. */
333 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { 333 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
334 tw = (struct tcp_tw_bucket *)sk2; 334 tw = inet_twsk(sk2);
335 335
336 if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { 336 if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
337 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
337 struct tcp_sock *tp = tcp_sk(sk); 338 struct tcp_sock *tp = tcp_sk(sk);
338 339
339 /* With PAWS, it is safe from the viewpoint 340 /* With PAWS, it is safe from the viewpoint
@@ -350,15 +351,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
350 fall back to VJ's scheme and use initial 351 fall back to VJ's scheme and use initial
351 timestamp retrieved from peer table. 352 timestamp retrieved from peer table.
352 */ 353 */
353 if (tw->tw_ts_recent_stamp && 354 if (tcptw->tw_ts_recent_stamp &&
354 (!twp || (sysctl_tcp_tw_reuse && 355 (!twp || (sysctl_tcp_tw_reuse &&
355 xtime.tv_sec - 356 xtime.tv_sec -
356 tw->tw_ts_recent_stamp > 1))) { 357 tcptw->tw_ts_recent_stamp > 1))) {
357 if ((tp->write_seq = 358 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
358 tw->tw_snd_nxt + 65535 + 2) == 0) 359 if (tp->write_seq == 0)
359 tp->write_seq = 1; 360 tp->write_seq = 1;
360 tp->rx_opt.ts_recent = tw->tw_ts_recent; 361 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
361 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; 362 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
362 sock_hold(sk2); 363 sock_hold(sk2);
363 goto unique; 364 goto unique;
364 } else 365 } else
@@ -369,7 +370,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
369 370
370 /* And established part... */ 371 /* And established part... */
371 sk_for_each(sk2, node, &head->chain) { 372 sk_for_each(sk2, node, &head->chain) {
372 if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) 373 if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
373 goto not_unique; 374 goto not_unique;
374 } 375 }
375 376
@@ -392,7 +393,7 @@ unique:
392 tcp_tw_deschedule(tw); 393 tcp_tw_deschedule(tw);
393 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); 394 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
394 395
395 tcp_tw_put(tw); 396 inet_twsk_put(tw);
396 } 397 }
397 398
398 return 0; 399 return 0;
@@ -429,7 +430,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
429 static u32 hint; 430 static u32 hint;
430 u32 offset = hint + connect_port_offset(sk); 431 u32 offset = hint + connect_port_offset(sk);
431 struct hlist_node *node; 432 struct hlist_node *node;
432 struct tcp_tw_bucket *tw = NULL; 433 struct inet_timewait_sock *tw = NULL;
433 434
434 local_bh_disable(); 435 local_bh_disable();
435 for (i = 1; i <= range; i++) { 436 for (i = 1; i <= range; i++) {
@@ -482,7 +483,7 @@ ok:
482 483
483 if (tw) { 484 if (tw) {
484 tcp_tw_deschedule(tw); 485 tcp_tw_deschedule(tw);
485 tcp_tw_put(tw); 486 inet_twsk_put(tw);
486 } 487 }
487 488
488 ret = 0; 489 ret = 0;
@@ -757,7 +758,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
757 return; 758 return;
758 } 759 }
759 if (sk->sk_state == TCP_TIME_WAIT) { 760 if (sk->sk_state == TCP_TIME_WAIT) {
760 tcp_tw_put((struct tcp_tw_bucket *)sk); 761 inet_twsk_put((struct inet_timewait_sock *)sk);
761 return; 762 return;
762 } 763 }
763 764
@@ -1002,12 +1003,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
1002 1003
1003static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 1004static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1004{ 1005{
1005 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; 1006 struct inet_timewait_sock *tw = inet_twsk(sk);
1007 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1006 1008
1007 tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, 1009 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1008 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); 1010 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
1009 1011
1010 tcp_tw_put(tw); 1012 inet_twsk_put(tw);
1011} 1013}
1012 1014
1013static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) 1015static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
@@ -1368,7 +1370,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1368 bh_lock_sock(nsk); 1370 bh_lock_sock(nsk);
1369 return nsk; 1371 return nsk;
1370 } 1372 }
1371 tcp_tw_put((struct tcp_tw_bucket *)nsk); 1373 inet_twsk_put((struct inet_timewait_sock *)nsk);
1372 return NULL; 1374 return NULL;
1373 } 1375 }
1374 1376
@@ -1557,25 +1559,25 @@ discard_and_relse:
1557 1559
1558do_time_wait: 1560do_time_wait:
1559 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1561 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1560 tcp_tw_put((struct tcp_tw_bucket *) sk); 1562 inet_twsk_put((struct inet_timewait_sock *) sk);
1561 goto discard_it; 1563 goto discard_it;
1562 } 1564 }
1563 1565
1564 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1566 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1565 TCP_INC_STATS_BH(TCP_MIB_INERRS); 1567 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1566 tcp_tw_put((struct tcp_tw_bucket *) sk); 1568 inet_twsk_put((struct inet_timewait_sock *) sk);
1567 goto discard_it; 1569 goto discard_it;
1568 } 1570 }
1569 switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, 1571 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1570 skb, th, skb->len)) { 1572 skb, th)) {
1571 case TCP_TW_SYN: { 1573 case TCP_TW_SYN: {
1572 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 1574 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1573 skb->nh.iph->daddr, 1575 skb->nh.iph->daddr,
1574 ntohs(th->dest), 1576 ntohs(th->dest),
1575 tcp_v4_iif(skb)); 1577 tcp_v4_iif(skb));
1576 if (sk2) { 1578 if (sk2) {
1577 tcp_tw_deschedule((struct tcp_tw_bucket *)sk); 1579 tcp_tw_deschedule((struct inet_timewait_sock *)sk);
1578 tcp_tw_put((struct tcp_tw_bucket *)sk); 1580 inet_twsk_put((struct inet_timewait_sock *)sk);
1579 sk = sk2; 1581 sk = sk2;
1580 goto process; 1582 goto process;
1581 } 1583 }
@@ -1639,18 +1641,18 @@ int tcp_v4_remember_stamp(struct sock *sk)
1639 return 0; 1641 return 0;
1640} 1642}
1641 1643
1642int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) 1644int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1643{ 1645{
1644 struct inet_peer *peer = NULL; 1646 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1645
1646 peer = inet_getpeer(tw->tw_daddr, 1);
1647 1647
1648 if (peer) { 1648 if (peer) {
1649 if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 || 1649 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1650
1651 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1650 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && 1652 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1651 peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) { 1653 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1652 peer->tcp_ts_stamp = tw->tw_ts_recent_stamp; 1654 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1653 peer->tcp_ts = tw->tw_ts_recent; 1655 peer->tcp_ts = tcptw->tw_ts_recent;
1654 } 1656 }
1655 inet_putpeer(peer); 1657 inet_putpeer(peer);
1656 return 1; 1658 return 1;
@@ -1758,13 +1760,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
1758#ifdef CONFIG_PROC_FS 1760#ifdef CONFIG_PROC_FS
1759/* Proc filesystem TCP sock list dumping. */ 1761/* Proc filesystem TCP sock list dumping. */
1760 1762
1761static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) 1763static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1762{ 1764{
1763 return hlist_empty(head) ? NULL : 1765 return hlist_empty(head) ? NULL :
1764 list_entry(head->first, struct tcp_tw_bucket, tw_node); 1766 list_entry(head->first, struct inet_timewait_sock, tw_node);
1765} 1767}
1766 1768
1767static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) 1769static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1768{ 1770{
1769 return tw->tw_node.next ? 1771 return tw->tw_node.next ?
1770 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1772 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
@@ -1860,7 +1862,7 @@ static void *established_get_first(struct seq_file *seq)
1860 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { 1862 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1861 struct sock *sk; 1863 struct sock *sk;
1862 struct hlist_node *node; 1864 struct hlist_node *node;
1863 struct tcp_tw_bucket *tw; 1865 struct inet_timewait_sock *tw;
1864 1866
1865 /* We can reschedule _before_ having picked the target: */ 1867 /* We can reschedule _before_ having picked the target: */
1866 cond_resched_softirq(); 1868 cond_resched_softirq();
@@ -1874,8 +1876,8 @@ static void *established_get_first(struct seq_file *seq)
1874 goto out; 1876 goto out;
1875 } 1877 }
1876 st->state = TCP_SEQ_STATE_TIME_WAIT; 1878 st->state = TCP_SEQ_STATE_TIME_WAIT;
1877 tw_for_each(tw, node, 1879 inet_twsk_for_each(tw, node,
1878 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { 1880 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
1879 if (tw->tw_family != st->family) { 1881 if (tw->tw_family != st->family) {
1880 continue; 1882 continue;
1881 } 1883 }
@@ -1892,7 +1894,7 @@ out:
1892static void *established_get_next(struct seq_file *seq, void *cur) 1894static void *established_get_next(struct seq_file *seq, void *cur)
1893{ 1895{
1894 struct sock *sk = cur; 1896 struct sock *sk = cur;
1895 struct tcp_tw_bucket *tw; 1897 struct inet_timewait_sock *tw;
1896 struct hlist_node *node; 1898 struct hlist_node *node;
1897 struct tcp_iter_state* st = seq->private; 1899 struct tcp_iter_state* st = seq->private;
1898 1900
@@ -2159,7 +2161,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2159 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); 2161 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2160} 2162}
2161 2163
2162static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) 2164static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
2163{ 2165{
2164 unsigned int dest, src; 2166 unsigned int dest, src;
2165 __u16 destp, srcp; 2167 __u16 destp, srcp;
@@ -2261,6 +2263,7 @@ struct proto tcp_prot = {
2261 .sysctl_rmem = sysctl_tcp_rmem, 2263 .sysctl_rmem = sysctl_tcp_rmem,
2262 .max_header = MAX_TCP_HEADER, 2264 .max_header = MAX_TCP_HEADER,
2263 .obj_size = sizeof(struct tcp_sock), 2265 .obj_size = sizeof(struct tcp_sock),
2266 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2264 .rsk_prot = &tcp_request_sock_ops, 2267 .rsk_prot = &tcp_request_sock_ops,
2265}; 2268};
2266 2269