diff options
-rw-r--r-- | include/net/ip_vs.h | 2 | ||||
-rw-r--r-- | include/net/netns/ip_vs.h | 8 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ftp.c | 8 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto.c | 13 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_tcp.c | 97 |
5 files changed, 79 insertions, 49 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 88d4e40b538a..3c45a00cdc3e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -807,7 +807,7 @@ extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); | |||
807 | 807 | ||
808 | extern const char * ip_vs_state_name(__u16 proto, int state); | 808 | extern const char * ip_vs_state_name(__u16 proto, int state); |
809 | 809 | ||
810 | extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); | 810 | extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); |
811 | extern int ip_vs_check_template(struct ip_vs_conn *ct); | 811 | extern int ip_vs_check_template(struct ip_vs_conn *ct); |
812 | extern void ip_vs_random_dropentry(void); | 812 | extern void ip_vs_random_dropentry(void); |
813 | extern int ip_vs_conn_init(void); | 813 | extern int ip_vs_conn_init(void); |
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 6f4e089b8db2..ac77363647ab 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h | |||
@@ -31,6 +31,14 @@ struct netns_ipvs { | |||
31 | /* ip_vs_proto */ | 31 | /* ip_vs_proto */ |
32 | #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ | 32 | #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ |
33 | struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; | 33 | struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; |
34 | /* ip_vs_proto_tcp */ | ||
35 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
36 | #define TCP_APP_TAB_BITS 4 | ||
37 | #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) | ||
38 | #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) | ||
39 | struct list_head tcp_apps[TCP_APP_TAB_SIZE]; | ||
40 | spinlock_t tcp_app_lock; | ||
41 | #endif | ||
34 | 42 | ||
35 | /* ip_vs_lblc */ | 43 | /* ip_vs_lblc */ |
36 | int sysctl_lblc_expiration; | 44 | int sysctl_lblc_expiration; |
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 0e762f322aa3..b38ae941f677 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c | |||
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
157 | int ret = 0; | 157 | int ret = 0; |
158 | enum ip_conntrack_info ctinfo; | 158 | enum ip_conntrack_info ctinfo; |
159 | struct nf_conn *ct; | 159 | struct nf_conn *ct; |
160 | struct net *net; | ||
160 | 161 | ||
161 | #ifdef CONFIG_IP_VS_IPV6 | 162 | #ifdef CONFIG_IP_VS_IPV6 |
162 | /* This application helper doesn't work with IPv6 yet, | 163 | /* This application helper doesn't work with IPv6 yet, |
@@ -257,8 +258,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
257 | * would be adjusted twice. | 258 | * would be adjusted twice. |
258 | */ | 259 | */ |
259 | 260 | ||
261 | net = skb_net(skb); | ||
260 | cp->app_data = NULL; | 262 | cp->app_data = NULL; |
261 | ip_vs_tcp_conn_listen(n_cp); | 263 | ip_vs_tcp_conn_listen(net, n_cp); |
262 | ip_vs_conn_put(n_cp); | 264 | ip_vs_conn_put(n_cp); |
263 | return ret; | 265 | return ret; |
264 | } | 266 | } |
@@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
287 | union nf_inet_addr to; | 289 | union nf_inet_addr to; |
288 | __be16 port; | 290 | __be16 port; |
289 | struct ip_vs_conn *n_cp; | 291 | struct ip_vs_conn *n_cp; |
292 | struct net *net; | ||
290 | 293 | ||
291 | #ifdef CONFIG_IP_VS_IPV6 | 294 | #ifdef CONFIG_IP_VS_IPV6 |
292 | /* This application helper doesn't work with IPv6 yet, | 295 | /* This application helper doesn't work with IPv6 yet, |
@@ -378,7 +381,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
378 | /* | 381 | /* |
379 | * Move tunnel to listen state | 382 | * Move tunnel to listen state |
380 | */ | 383 | */ |
381 | ip_vs_tcp_conn_listen(n_cp); | 384 | net = skb_net(skb); |
385 | ip_vs_tcp_conn_listen(net, n_cp); | ||
382 | ip_vs_conn_put(n_cp); | 386 | ip_vs_conn_put(n_cp); |
383 | 387 | ||
384 | return 1; | 388 | return 1; |
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 576e29648c53..320c6a65f370 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c | |||
@@ -307,12 +307,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, | |||
307 | */ | 307 | */ |
308 | static int __net_init __ip_vs_protocol_init(struct net *net) | 308 | static int __net_init __ip_vs_protocol_init(struct net *net) |
309 | { | 309 | { |
310 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
311 | register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); | ||
312 | #endif | ||
310 | return 0; | 313 | return 0; |
311 | } | 314 | } |
312 | 315 | ||
313 | static void __net_exit __ip_vs_protocol_cleanup(struct net *net) | 316 | static void __net_exit __ip_vs_protocol_cleanup(struct net *net) |
314 | { | 317 | { |
315 | /* empty */ | 318 | struct netns_ipvs *ipvs = net_ipvs(net); |
319 | struct ip_vs_proto_data *pd; | ||
320 | int i; | ||
321 | |||
322 | /* unregister all the ipvs proto data for this netns */ | ||
323 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { | ||
324 | while ((pd = ipvs->proto_data_table[i]) != NULL) | ||
325 | unregister_ip_vs_proto_netns(net, pd); | ||
326 | } | ||
316 | } | 327 | } |
317 | 328 | ||
318 | static struct pernet_operations ipvs_proto_ops = { | 329 | static struct pernet_operations ipvs_proto_ops = { |
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index c175d3166263..9d9df3d61093 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c | |||
@@ -9,8 +9,12 @@ | |||
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Changes: | 12 | * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> |
13 | * | 13 | * |
14 | * Network name space (netns) aware. | ||
15 | * Global data moved to netns i.e struct netns_ipvs | ||
16 | * tcp_timeouts table has copy per netns in a hash table per | ||
17 | * protocol ip_vs_proto_data and is handled by netns | ||
14 | */ | 18 | */ |
15 | 19 | ||
16 | #define KMSG_COMPONENT "IPVS" | 20 | #define KMSG_COMPONENT "IPVS" |
@@ -345,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = { | |||
345 | /* | 349 | /* |
346 | * Timeout table[state] | 350 | * Timeout table[state] |
347 | */ | 351 | */ |
348 | static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { | 352 | static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { |
349 | [IP_VS_TCP_S_NONE] = 2*HZ, | 353 | [IP_VS_TCP_S_NONE] = 2*HZ, |
350 | [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, | 354 | [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, |
351 | [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, | 355 | [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, |
@@ -460,13 +464,6 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) | |||
460 | tcp_state_table = (on? tcp_states_dos : tcp_states); | 464 | tcp_state_table = (on? tcp_states_dos : tcp_states); |
461 | } | 465 | } |
462 | 466 | ||
463 | static int | ||
464 | tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
465 | { | ||
466 | return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, | ||
467 | tcp_state_name_table, sname, to); | ||
468 | } | ||
469 | |||
470 | static inline int tcp_state_idx(struct tcphdr *th) | 467 | static inline int tcp_state_idx(struct tcphdr *th) |
471 | { | 468 | { |
472 | if (th->rst) | 469 | if (th->rst) |
@@ -487,6 +484,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
487 | int state_idx; | 484 | int state_idx; |
488 | int new_state = IP_VS_TCP_S_CLOSE; | 485 | int new_state = IP_VS_TCP_S_CLOSE; |
489 | int state_off = tcp_state_off[direction]; | 486 | int state_off = tcp_state_off[direction]; |
487 | struct ip_vs_proto_data *pd; /* Temp fix */ | ||
490 | 488 | ||
491 | /* | 489 | /* |
492 | * Update state offset to INPUT_ONLY if necessary | 490 | * Update state offset to INPUT_ONLY if necessary |
@@ -542,10 +540,13 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
542 | } | 540 | } |
543 | } | 541 | } |
544 | 542 | ||
545 | cp->timeout = pp->timeout_table[cp->state = new_state]; | 543 | pd = ip_vs_proto_data_get(&init_net, pp->protocol); |
544 | if (likely(pd)) | ||
545 | cp->timeout = pd->timeout_table[cp->state = new_state]; | ||
546 | else /* What to do ? */ | ||
547 | cp->timeout = tcp_timeouts[cp->state = new_state]; | ||
546 | } | 548 | } |
547 | 549 | ||
548 | |||
549 | /* | 550 | /* |
550 | * Handle state transitions | 551 | * Handle state transitions |
551 | */ | 552 | */ |
@@ -573,17 +574,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, | |||
573 | return 1; | 574 | return 1; |
574 | } | 575 | } |
575 | 576 | ||
576 | |||
577 | /* | ||
578 | * Hash table for TCP application incarnations | ||
579 | */ | ||
580 | #define TCP_APP_TAB_BITS 4 | ||
581 | #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) | ||
582 | #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) | ||
583 | |||
584 | static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; | ||
585 | static DEFINE_SPINLOCK(tcp_app_lock); | ||
586 | |||
587 | static inline __u16 tcp_app_hashkey(__be16 port) | 577 | static inline __u16 tcp_app_hashkey(__be16 port) |
588 | { | 578 | { |
589 | return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) | 579 | return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) |
@@ -597,21 +587,23 @@ static int tcp_register_app(struct ip_vs_app *inc) | |||
597 | __u16 hash; | 587 | __u16 hash; |
598 | __be16 port = inc->port; | 588 | __be16 port = inc->port; |
599 | int ret = 0; | 589 | int ret = 0; |
590 | struct netns_ipvs *ipvs = net_ipvs(&init_net); | ||
591 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); | ||
600 | 592 | ||
601 | hash = tcp_app_hashkey(port); | 593 | hash = tcp_app_hashkey(port); |
602 | 594 | ||
603 | spin_lock_bh(&tcp_app_lock); | 595 | spin_lock_bh(&ipvs->tcp_app_lock); |
604 | list_for_each_entry(i, &tcp_apps[hash], p_list) { | 596 | list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { |
605 | if (i->port == port) { | 597 | if (i->port == port) { |
606 | ret = -EEXIST; | 598 | ret = -EEXIST; |
607 | goto out; | 599 | goto out; |
608 | } | 600 | } |
609 | } | 601 | } |
610 | list_add(&inc->p_list, &tcp_apps[hash]); | 602 | list_add(&inc->p_list, &ipvs->tcp_apps[hash]); |
611 | atomic_inc(&ip_vs_protocol_tcp.appcnt); | 603 | atomic_inc(&pd->pp->appcnt); |
612 | 604 | ||
613 | out: | 605 | out: |
614 | spin_unlock_bh(&tcp_app_lock); | 606 | spin_unlock_bh(&ipvs->tcp_app_lock); |
615 | return ret; | 607 | return ret; |
616 | } | 608 | } |
617 | 609 | ||
@@ -619,16 +611,20 @@ static int tcp_register_app(struct ip_vs_app *inc) | |||
619 | static void | 611 | static void |
620 | tcp_unregister_app(struct ip_vs_app *inc) | 612 | tcp_unregister_app(struct ip_vs_app *inc) |
621 | { | 613 | { |
622 | spin_lock_bh(&tcp_app_lock); | 614 | struct netns_ipvs *ipvs = net_ipvs(&init_net); |
623 | atomic_dec(&ip_vs_protocol_tcp.appcnt); | 615 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); |
616 | |||
617 | spin_lock_bh(&ipvs->tcp_app_lock); | ||
618 | atomic_dec(&pd->pp->appcnt); | ||
624 | list_del(&inc->p_list); | 619 | list_del(&inc->p_list); |
625 | spin_unlock_bh(&tcp_app_lock); | 620 | spin_unlock_bh(&ipvs->tcp_app_lock); |
626 | } | 621 | } |
627 | 622 | ||
628 | 623 | ||
629 | static int | 624 | static int |
630 | tcp_app_conn_bind(struct ip_vs_conn *cp) | 625 | tcp_app_conn_bind(struct ip_vs_conn *cp) |
631 | { | 626 | { |
627 | struct netns_ipvs *ipvs = net_ipvs(&init_net); | ||
632 | int hash; | 628 | int hash; |
633 | struct ip_vs_app *inc; | 629 | struct ip_vs_app *inc; |
634 | int result = 0; | 630 | int result = 0; |
@@ -640,12 +636,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
640 | /* Lookup application incarnations and bind the right one */ | 636 | /* Lookup application incarnations and bind the right one */ |
641 | hash = tcp_app_hashkey(cp->vport); | 637 | hash = tcp_app_hashkey(cp->vport); |
642 | 638 | ||
643 | spin_lock(&tcp_app_lock); | 639 | spin_lock(&ipvs->tcp_app_lock); |
644 | list_for_each_entry(inc, &tcp_apps[hash], p_list) { | 640 | list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { |
645 | if (inc->port == cp->vport) { | 641 | if (inc->port == cp->vport) { |
646 | if (unlikely(!ip_vs_app_inc_get(inc))) | 642 | if (unlikely(!ip_vs_app_inc_get(inc))) |
647 | break; | 643 | break; |
648 | spin_unlock(&tcp_app_lock); | 644 | spin_unlock(&ipvs->tcp_app_lock); |
649 | 645 | ||
650 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" | 646 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" |
651 | "%s:%u to app %s on port %u\n", | 647 | "%s:%u to app %s on port %u\n", |
@@ -662,7 +658,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
662 | goto out; | 658 | goto out; |
663 | } | 659 | } |
664 | } | 660 | } |
665 | spin_unlock(&tcp_app_lock); | 661 | spin_unlock(&ipvs->tcp_app_lock); |
666 | 662 | ||
667 | out: | 663 | out: |
668 | return result; | 664 | return result; |
@@ -672,24 +668,34 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
672 | /* | 668 | /* |
673 | * Set LISTEN timeout. (ip_vs_conn_put will setup timer) | 669 | * Set LISTEN timeout. (ip_vs_conn_put will setup timer) |
674 | */ | 670 | */ |
675 | void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) | 671 | void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) |
676 | { | 672 | { |
673 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); | ||
674 | |||
677 | spin_lock(&cp->lock); | 675 | spin_lock(&cp->lock); |
678 | cp->state = IP_VS_TCP_S_LISTEN; | 676 | cp->state = IP_VS_TCP_S_LISTEN; |
679 | cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; | 677 | cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] |
678 | : tcp_timeouts[IP_VS_TCP_S_LISTEN]); | ||
680 | spin_unlock(&cp->lock); | 679 | spin_unlock(&cp->lock); |
681 | } | 680 | } |
682 | 681 | ||
683 | 682 | /* --------------------------------------------- | |
684 | static void ip_vs_tcp_init(struct ip_vs_protocol *pp) | 683 | * timeouts is netns related now. |
684 | * --------------------------------------------- | ||
685 | */ | ||
686 | static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) | ||
685 | { | 687 | { |
686 | IP_VS_INIT_HASH_TABLE(tcp_apps); | 688 | struct netns_ipvs *ipvs = net_ipvs(net); |
687 | pp->timeout_table = tcp_timeouts; | ||
688 | } | ||
689 | 689 | ||
690 | ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); | ||
691 | spin_lock_init(&ipvs->tcp_app_lock); | ||
692 | pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, | ||
693 | sizeof(tcp_timeouts)); | ||
694 | } | ||
690 | 695 | ||
691 | static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) | 696 | static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) |
692 | { | 697 | { |
698 | kfree(pd->timeout_table); | ||
693 | } | 699 | } |
694 | 700 | ||
695 | 701 | ||
@@ -699,8 +705,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { | |||
699 | .num_states = IP_VS_TCP_S_LAST, | 705 | .num_states = IP_VS_TCP_S_LAST, |
700 | .dont_defrag = 0, | 706 | .dont_defrag = 0, |
701 | .appcnt = ATOMIC_INIT(0), | 707 | .appcnt = ATOMIC_INIT(0), |
702 | .init = ip_vs_tcp_init, | 708 | .init = NULL, |
703 | .exit = ip_vs_tcp_exit, | 709 | .exit = NULL, |
710 | .init_netns = __ip_vs_tcp_init, | ||
711 | .exit_netns = __ip_vs_tcp_exit, | ||
704 | .register_app = tcp_register_app, | 712 | .register_app = tcp_register_app, |
705 | .unregister_app = tcp_unregister_app, | 713 | .unregister_app = tcp_unregister_app, |
706 | .conn_schedule = tcp_conn_schedule, | 714 | .conn_schedule = tcp_conn_schedule, |
@@ -714,5 +722,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { | |||
714 | .app_conn_bind = tcp_app_conn_bind, | 722 | .app_conn_bind = tcp_app_conn_bind, |
715 | .debug_packet = ip_vs_tcpudp_debug_packet, | 723 | .debug_packet = ip_vs_tcpudp_debug_packet, |
716 | .timeout_change = tcp_timeout_change, | 724 | .timeout_change = tcp_timeout_change, |
717 | .set_state_timeout = tcp_set_state_timeout, | ||
718 | }; | 725 | }; |