diff options
author | Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 2005-08-09 23:09:30 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 18:42:13 -0400 |
commit | 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 (patch) | |
tree | ddd004afe2f7c8295f6fdb94d34f78a42b5961cb /net/ipv6 | |
parent | 33b62231908c58ae04185e4f1063d1e35a7c8576 (diff) |
[INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets
This paves the way to generalise the rest of the sock ID lookup
routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro
kernels (where IPv6 is always built as a module):
[root@qemu ~]# grep tw_sock /proc/slabinfo
tw_sock_TCPv6 0 0 128 31 1
tw_sock_TCP 0 0 96 41 1
[root@qemu ~]#
Now if a protocol wants to use the TIME_WAIT generic infrastructure it
only has to set the sk_prot->twsk_obj_size field with the size of its
inet_timewait_sock derived sock and proto_register will create
sk_prot->twsk_slab, for now its only for INET sockets, but we can
introduce timewait_sock later if some non INET transport protocolo
wants to use this stuff.
Next changesets will take advantage of this new infrastructure to
generalise even more TCP code.
[acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size
/tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o
/tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o
[acme@toy net-2.6.14]$
Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1
(qemu host)).
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 100 |
2 files changed, 54 insertions, 48 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 77004b9456c0..4582d9cf4bbe 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -1041,7 +1041,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) | |||
1041 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; | 1041 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; |
1042 | const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); | 1042 | const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); |
1043 | u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; | 1043 | u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; |
1044 | u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); | 1044 | u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); |
1045 | int sk_ipv6only = ipv6_only_sock(sk); | 1045 | int sk_ipv6only = ipv6_only_sock(sk); |
1046 | int sk2_ipv6only = tcp_v6_ipv6only(sk2); | 1046 | int sk2_ipv6only = tcp_v6_ipv6only(sk2); |
1047 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); | 1047 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93a66b9a76e1..af8ad5bb273b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -308,33 +308,32 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u | |||
308 | struct in6_addr *daddr, u16 hnum, | 308 | struct in6_addr *daddr, u16 hnum, |
309 | int dif) | 309 | int dif) |
310 | { | 310 | { |
311 | struct inet_ehash_bucket *head; | ||
312 | struct sock *sk; | 311 | struct sock *sk; |
313 | struct hlist_node *node; | 312 | const struct hlist_node *node; |
314 | __u32 ports = TCP_COMBINED_PORTS(sport, hnum); | 313 | const __u32 ports = INET_COMBINED_PORTS(sport, hnum); |
315 | int hash; | ||
316 | |||
317 | /* Optimize here for direct hit, only listening connections can | 314 | /* Optimize here for direct hit, only listening connections can |
318 | * have wildcards anyways. | 315 | * have wildcards anyways. |
319 | */ | 316 | */ |
320 | hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); | 317 | const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); |
321 | head = &tcp_hashinfo.ehash[hash]; | 318 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; |
319 | |||
322 | read_lock(&head->lock); | 320 | read_lock(&head->lock); |
323 | sk_for_each(sk, node, &head->chain) { | 321 | sk_for_each(sk, node, &head->chain) { |
324 | /* For IPV6 do the cheaper port and family tests first. */ | 322 | /* For IPV6 do the cheaper port and family tests first. */ |
325 | if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) | 323 | if (INET6_MATCH(sk, saddr, daddr, ports, dif)) |
326 | goto hit; /* You sunk my battleship! */ | 324 | goto hit; /* You sunk my battleship! */ |
327 | } | 325 | } |
328 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | 326 | /* Must check for a TIME_WAIT'er before going to listener hash. */ |
329 | sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { | 327 | sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { |
330 | /* FIXME: acme: check this... */ | 328 | const struct inet_timewait_sock *tw = inet_twsk(sk); |
331 | struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; | ||
332 | 329 | ||
333 | if(*((__u32 *)&(tw->tw_dport)) == ports && | 330 | if(*((__u32 *)&(tw->tw_dport)) == ports && |
334 | sk->sk_family == PF_INET6) { | 331 | sk->sk_family == PF_INET6) { |
335 | if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && | 332 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); |
336 | ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && | 333 | |
337 | (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | 334 | if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && |
335 | ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && | ||
336 | (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | ||
338 | goto hit; | 337 | goto hit; |
339 | } | 338 | } |
340 | } | 339 | } |
@@ -455,43 +454,46 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) | |||
455 | } | 454 | } |
456 | 455 | ||
457 | static int __tcp_v6_check_established(struct sock *sk, __u16 lport, | 456 | static int __tcp_v6_check_established(struct sock *sk, __u16 lport, |
458 | struct tcp_tw_bucket **twp) | 457 | struct inet_timewait_sock **twp) |
459 | { | 458 | { |
460 | struct inet_sock *inet = inet_sk(sk); | 459 | struct inet_sock *inet = inet_sk(sk); |
461 | struct ipv6_pinfo *np = inet6_sk(sk); | 460 | struct ipv6_pinfo *np = inet6_sk(sk); |
462 | struct in6_addr *daddr = &np->rcv_saddr; | 461 | struct in6_addr *daddr = &np->rcv_saddr; |
463 | struct in6_addr *saddr = &np->daddr; | 462 | struct in6_addr *saddr = &np->daddr; |
464 | int dif = sk->sk_bound_dev_if; | 463 | int dif = sk->sk_bound_dev_if; |
465 | u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); | 464 | const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); |
466 | int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); | 465 | const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); |
467 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; | 466 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; |
468 | struct sock *sk2; | 467 | struct sock *sk2; |
469 | struct hlist_node *node; | 468 | const struct hlist_node *node; |
470 | struct tcp_tw_bucket *tw; | 469 | struct inet_timewait_sock *tw; |
471 | 470 | ||
472 | write_lock(&head->lock); | 471 | write_lock(&head->lock); |
473 | 472 | ||
474 | /* Check TIME-WAIT sockets first. */ | 473 | /* Check TIME-WAIT sockets first. */ |
475 | sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { | 474 | sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { |
476 | tw = (struct tcp_tw_bucket*)sk2; | 475 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); |
476 | |||
477 | tw = inet_twsk(sk2); | ||
477 | 478 | ||
478 | if(*((__u32 *)&(tw->tw_dport)) == ports && | 479 | if(*((__u32 *)&(tw->tw_dport)) == ports && |
479 | sk2->sk_family == PF_INET6 && | 480 | sk2->sk_family == PF_INET6 && |
480 | ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && | 481 | ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && |
481 | ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && | 482 | ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && |
482 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { | 483 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { |
484 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); | ||
483 | struct tcp_sock *tp = tcp_sk(sk); | 485 | struct tcp_sock *tp = tcp_sk(sk); |
484 | 486 | ||
485 | if (tw->tw_ts_recent_stamp && | 487 | if (tcptw->tw_ts_recent_stamp && |
486 | (!twp || (sysctl_tcp_tw_reuse && | 488 | (!twp || |
487 | xtime.tv_sec - | 489 | (sysctl_tcp_tw_reuse && |
488 | tw->tw_ts_recent_stamp > 1))) { | 490 | xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { |
489 | /* See comment in tcp_ipv4.c */ | 491 | /* See comment in tcp_ipv4.c */ |
490 | tp->write_seq = tw->tw_snd_nxt + 65535 + 2; | 492 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; |
491 | if (!tp->write_seq) | 493 | if (!tp->write_seq) |
492 | tp->write_seq = 1; | 494 | tp->write_seq = 1; |
493 | tp->rx_opt.ts_recent = tw->tw_ts_recent; | 495 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; |
494 | tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; | 496 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
495 | sock_hold(sk2); | 497 | sock_hold(sk2); |
496 | goto unique; | 498 | goto unique; |
497 | } else | 499 | } else |
@@ -502,7 +504,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, | |||
502 | 504 | ||
503 | /* And established part... */ | 505 | /* And established part... */ |
504 | sk_for_each(sk2, node, &head->chain) { | 506 | sk_for_each(sk2, node, &head->chain) { |
505 | if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif)) | 507 | if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) |
506 | goto not_unique; | 508 | goto not_unique; |
507 | } | 509 | } |
508 | 510 | ||
@@ -521,7 +523,7 @@ unique: | |||
521 | tcp_tw_deschedule(tw); | 523 | tcp_tw_deschedule(tw); |
522 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 524 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
523 | 525 | ||
524 | tcp_tw_put(tw); | 526 | inet_twsk_put(tw); |
525 | } | 527 | } |
526 | return 0; | 528 | return 0; |
527 | 529 | ||
@@ -556,7 +558,7 @@ static int tcp_v6_hash_connect(struct sock *sk) | |||
556 | static u32 hint; | 558 | static u32 hint; |
557 | u32 offset = hint + tcpv6_port_offset(sk); | 559 | u32 offset = hint + tcpv6_port_offset(sk); |
558 | struct hlist_node *node; | 560 | struct hlist_node *node; |
559 | struct tcp_tw_bucket *tw = NULL; | 561 | struct inet_timewait_sock *tw = NULL; |
560 | 562 | ||
561 | local_bh_disable(); | 563 | local_bh_disable(); |
562 | for (i = 1; i <= range; i++) { | 564 | for (i = 1; i <= range; i++) { |
@@ -609,7 +611,7 @@ ok: | |||
609 | 611 | ||
610 | if (tw) { | 612 | if (tw) { |
611 | tcp_tw_deschedule(tw); | 613 | tcp_tw_deschedule(tw); |
612 | tcp_tw_put(tw); | 614 | inet_twsk_put(tw); |
613 | } | 615 | } |
614 | 616 | ||
615 | ret = 0; | 617 | ret = 0; |
@@ -845,7 +847,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
845 | } | 847 | } |
846 | 848 | ||
847 | if (sk->sk_state == TCP_TIME_WAIT) { | 849 | if (sk->sk_state == TCP_TIME_WAIT) { |
848 | tcp_tw_put((struct tcp_tw_bucket*)sk); | 850 | inet_twsk_put((struct inet_timewait_sock *)sk); |
849 | return; | 851 | return; |
850 | } | 852 | } |
851 | 853 | ||
@@ -1223,12 +1225,14 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 | |||
1223 | 1225 | ||
1224 | static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) | 1226 | static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) |
1225 | { | 1227 | { |
1226 | struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; | 1228 | struct inet_timewait_sock *tw = inet_twsk(sk); |
1229 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); | ||
1227 | 1230 | ||
1228 | tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, | 1231 | tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, |
1229 | tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); | 1232 | tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, |
1233 | tcptw->tw_ts_recent); | ||
1230 | 1234 | ||
1231 | tcp_tw_put(tw); | 1235 | inet_twsk_put(tw); |
1232 | } | 1236 | } |
1233 | 1237 | ||
1234 | static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | 1238 | static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) |
@@ -1261,7 +1265,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
1261 | bh_lock_sock(nsk); | 1265 | bh_lock_sock(nsk); |
1262 | return nsk; | 1266 | return nsk; |
1263 | } | 1267 | } |
1264 | tcp_tw_put((struct tcp_tw_bucket*)nsk); | 1268 | inet_twsk_put((struct inet_timewait_sock *)nsk); |
1265 | return NULL; | 1269 | return NULL; |
1266 | } | 1270 | } |
1267 | 1271 | ||
@@ -1798,26 +1802,26 @@ discard_and_relse: | |||
1798 | 1802 | ||
1799 | do_time_wait: | 1803 | do_time_wait: |
1800 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 1804 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
1801 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1805 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1802 | goto discard_it; | 1806 | goto discard_it; |
1803 | } | 1807 | } |
1804 | 1808 | ||
1805 | if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { | 1809 | if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { |
1806 | TCP_INC_STATS_BH(TCP_MIB_INERRS); | 1810 | TCP_INC_STATS_BH(TCP_MIB_INERRS); |
1807 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1811 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1808 | goto discard_it; | 1812 | goto discard_it; |
1809 | } | 1813 | } |
1810 | 1814 | ||
1811 | switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, | 1815 | switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, |
1812 | skb, th, skb->len)) { | 1816 | skb, th)) { |
1813 | case TCP_TW_SYN: | 1817 | case TCP_TW_SYN: |
1814 | { | 1818 | { |
1815 | struct sock *sk2; | 1819 | struct sock *sk2; |
1816 | 1820 | ||
1817 | sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); | 1821 | sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); |
1818 | if (sk2 != NULL) { | 1822 | if (sk2 != NULL) { |
1819 | tcp_tw_deschedule((struct tcp_tw_bucket *)sk); | 1823 | tcp_tw_deschedule((struct inet_timewait_sock *)sk); |
1820 | tcp_tw_put((struct tcp_tw_bucket *)sk); | 1824 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1821 | sk = sk2; | 1825 | sk = sk2; |
1822 | goto process; | 1826 | goto process; |
1823 | } | 1827 | } |
@@ -2137,17 +2141,18 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) | |||
2137 | } | 2141 | } |
2138 | 2142 | ||
2139 | static void get_timewait6_sock(struct seq_file *seq, | 2143 | static void get_timewait6_sock(struct seq_file *seq, |
2140 | struct tcp_tw_bucket *tw, int i) | 2144 | struct inet_timewait_sock *tw, int i) |
2141 | { | 2145 | { |
2142 | struct in6_addr *dest, *src; | 2146 | struct in6_addr *dest, *src; |
2143 | __u16 destp, srcp; | 2147 | __u16 destp, srcp; |
2148 | struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); | ||
2144 | int ttd = tw->tw_ttd - jiffies; | 2149 | int ttd = tw->tw_ttd - jiffies; |
2145 | 2150 | ||
2146 | if (ttd < 0) | 2151 | if (ttd < 0) |
2147 | ttd = 0; | 2152 | ttd = 0; |
2148 | 2153 | ||
2149 | dest = &tw->tw_v6_daddr; | 2154 | dest = &tcp6tw->tw_v6_daddr; |
2150 | src = &tw->tw_v6_rcv_saddr; | 2155 | src = &tcp6tw->tw_v6_rcv_saddr; |
2151 | destp = ntohs(tw->tw_dport); | 2156 | destp = ntohs(tw->tw_dport); |
2152 | srcp = ntohs(tw->tw_sport); | 2157 | srcp = ntohs(tw->tw_sport); |
2153 | 2158 | ||
@@ -2244,6 +2249,7 @@ struct proto tcpv6_prot = { | |||
2244 | .sysctl_rmem = sysctl_tcp_rmem, | 2249 | .sysctl_rmem = sysctl_tcp_rmem, |
2245 | .max_header = MAX_TCP_HEADER, | 2250 | .max_header = MAX_TCP_HEADER, |
2246 | .obj_size = sizeof(struct tcp6_sock), | 2251 | .obj_size = sizeof(struct tcp6_sock), |
2252 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), | ||
2247 | .rsk_prot = &tcp6_request_sock_ops, | 2253 | .rsk_prot = &tcp6_request_sock_ops, |
2248 | }; | 2254 | }; |
2249 | 2255 | ||