aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/ip_vs.h2
-rw-r--r--include/net/netns/ip_vs.h8
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c97
5 files changed, 79 insertions, 49 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 88d4e40b538a..3c45a00cdc3e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -807,7 +807,7 @@ extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
807 807
808extern const char * ip_vs_state_name(__u16 proto, int state); 808extern const char * ip_vs_state_name(__u16 proto, int state);
809 809
810extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); 810extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
811extern int ip_vs_check_template(struct ip_vs_conn *ct); 811extern int ip_vs_check_template(struct ip_vs_conn *ct);
812extern void ip_vs_random_dropentry(void); 812extern void ip_vs_random_dropentry(void);
813extern int ip_vs_conn_init(void); 813extern int ip_vs_conn_init(void);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 6f4e089b8db2..ac77363647ab 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -31,6 +31,14 @@ struct netns_ipvs {
31 /* ip_vs_proto */ 31 /* ip_vs_proto */
32 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ 32 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
33 struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; 33 struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
34 /* ip_vs_proto_tcp */
35#ifdef CONFIG_IP_VS_PROTO_TCP
36 #define TCP_APP_TAB_BITS 4
37 #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
38 #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
39 struct list_head tcp_apps[TCP_APP_TAB_SIZE];
40 spinlock_t tcp_app_lock;
41#endif
34 42
35 /* ip_vs_lblc */ 43 /* ip_vs_lblc */
36 int sysctl_lblc_expiration; 44 int sysctl_lblc_expiration;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 0e762f322aa3..b38ae941f677 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
157 int ret = 0; 157 int ret = 0;
158 enum ip_conntrack_info ctinfo; 158 enum ip_conntrack_info ctinfo;
159 struct nf_conn *ct; 159 struct nf_conn *ct;
160 struct net *net;
160 161
161#ifdef CONFIG_IP_VS_IPV6 162#ifdef CONFIG_IP_VS_IPV6
162 /* This application helper doesn't work with IPv6 yet, 163 /* This application helper doesn't work with IPv6 yet,
@@ -257,8 +258,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
257 * would be adjusted twice. 258 * would be adjusted twice.
258 */ 259 */
259 260
261 net = skb_net(skb);
260 cp->app_data = NULL; 262 cp->app_data = NULL;
261 ip_vs_tcp_conn_listen(n_cp); 263 ip_vs_tcp_conn_listen(net, n_cp);
262 ip_vs_conn_put(n_cp); 264 ip_vs_conn_put(n_cp);
263 return ret; 265 return ret;
264 } 266 }
@@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
287 union nf_inet_addr to; 289 union nf_inet_addr to;
288 __be16 port; 290 __be16 port;
289 struct ip_vs_conn *n_cp; 291 struct ip_vs_conn *n_cp;
292 struct net *net;
290 293
291#ifdef CONFIG_IP_VS_IPV6 294#ifdef CONFIG_IP_VS_IPV6
292 /* This application helper doesn't work with IPv6 yet, 295 /* This application helper doesn't work with IPv6 yet,
@@ -378,7 +381,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
378 /* 381 /*
379 * Move tunnel to listen state 382 * Move tunnel to listen state
380 */ 383 */
381 ip_vs_tcp_conn_listen(n_cp); 384 net = skb_net(skb);
385 ip_vs_tcp_conn_listen(net, n_cp);
382 ip_vs_conn_put(n_cp); 386 ip_vs_conn_put(n_cp);
383 387
384 return 1; 388 return 1;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 576e29648c53..320c6a65f370 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -307,12 +307,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
307 */ 307 */
308static int __net_init __ip_vs_protocol_init(struct net *net) 308static int __net_init __ip_vs_protocol_init(struct net *net)
309{ 309{
310#ifdef CONFIG_IP_VS_PROTO_TCP
311 register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
312#endif
310 return 0; 313 return 0;
311} 314}
312 315
313static void __net_exit __ip_vs_protocol_cleanup(struct net *net) 316static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
314{ 317{
315 /* empty */ 318 struct netns_ipvs *ipvs = net_ipvs(net);
319 struct ip_vs_proto_data *pd;
320 int i;
321
322 /* unregister all the ipvs proto data for this netns */
323 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
324 while ((pd = ipvs->proto_data_table[i]) != NULL)
325 unregister_ip_vs_proto_netns(net, pd);
326 }
316} 327}
317 328
318static struct pernet_operations ipvs_proto_ops = { 329static struct pernet_operations ipvs_proto_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index c175d3166263..9d9df3d61093 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
13 * 13 *
14 * Network name space (netns) aware.
15 * Global data moved to netns i.e struct netns_ipvs
16 * tcp_timeouts table has copy per netns in a hash table per
17 * protocol ip_vs_proto_data and is handled by netns
14 */ 18 */
15 19
16#define KMSG_COMPONENT "IPVS" 20#define KMSG_COMPONENT "IPVS"
@@ -345,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
345/* 349/*
346 * Timeout table[state] 350 * Timeout table[state]
347 */ 351 */
348static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { 352static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
349 [IP_VS_TCP_S_NONE] = 2*HZ, 353 [IP_VS_TCP_S_NONE] = 2*HZ,
350 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, 354 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
351 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, 355 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -460,13 +464,6 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
460 tcp_state_table = (on? tcp_states_dos : tcp_states); 464 tcp_state_table = (on? tcp_states_dos : tcp_states);
461} 465}
462 466
463static int
464tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
465{
466 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
467 tcp_state_name_table, sname, to);
468}
469
470static inline int tcp_state_idx(struct tcphdr *th) 467static inline int tcp_state_idx(struct tcphdr *th)
471{ 468{
472 if (th->rst) 469 if (th->rst)
@@ -487,6 +484,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
487 int state_idx; 484 int state_idx;
488 int new_state = IP_VS_TCP_S_CLOSE; 485 int new_state = IP_VS_TCP_S_CLOSE;
489 int state_off = tcp_state_off[direction]; 486 int state_off = tcp_state_off[direction];
487 struct ip_vs_proto_data *pd; /* Temp fix */
490 488
491 /* 489 /*
492 * Update state offset to INPUT_ONLY if necessary 490 * Update state offset to INPUT_ONLY if necessary
@@ -542,10 +540,13 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
542 } 540 }
543 } 541 }
544 542
545 cp->timeout = pp->timeout_table[cp->state = new_state]; 543 pd = ip_vs_proto_data_get(&init_net, pp->protocol);
544 if (likely(pd))
545 cp->timeout = pd->timeout_table[cp->state = new_state];
546 else /* What to do ? */
547 cp->timeout = tcp_timeouts[cp->state = new_state];
546} 548}
547 549
548
549/* 550/*
550 * Handle state transitions 551 * Handle state transitions
551 */ 552 */
@@ -573,17 +574,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
573 return 1; 574 return 1;
574} 575}
575 576
576
577/*
578 * Hash table for TCP application incarnations
579 */
580#define TCP_APP_TAB_BITS 4
581#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
582#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
583
584static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
585static DEFINE_SPINLOCK(tcp_app_lock);
586
587static inline __u16 tcp_app_hashkey(__be16 port) 577static inline __u16 tcp_app_hashkey(__be16 port)
588{ 578{
589 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) 579 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -597,21 +587,23 @@ static int tcp_register_app(struct ip_vs_app *inc)
597 __u16 hash; 587 __u16 hash;
598 __be16 port = inc->port; 588 __be16 port = inc->port;
599 int ret = 0; 589 int ret = 0;
590 struct netns_ipvs *ipvs = net_ipvs(&init_net);
591 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
600 592
601 hash = tcp_app_hashkey(port); 593 hash = tcp_app_hashkey(port);
602 594
603 spin_lock_bh(&tcp_app_lock); 595 spin_lock_bh(&ipvs->tcp_app_lock);
604 list_for_each_entry(i, &tcp_apps[hash], p_list) { 596 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
605 if (i->port == port) { 597 if (i->port == port) {
606 ret = -EEXIST; 598 ret = -EEXIST;
607 goto out; 599 goto out;
608 } 600 }
609 } 601 }
610 list_add(&inc->p_list, &tcp_apps[hash]); 602 list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
611 atomic_inc(&ip_vs_protocol_tcp.appcnt); 603 atomic_inc(&pd->pp->appcnt);
612 604
613 out: 605 out:
614 spin_unlock_bh(&tcp_app_lock); 606 spin_unlock_bh(&ipvs->tcp_app_lock);
615 return ret; 607 return ret;
616} 608}
617 609
@@ -619,16 +611,20 @@ static int tcp_register_app(struct ip_vs_app *inc)
619static void 611static void
620tcp_unregister_app(struct ip_vs_app *inc) 612tcp_unregister_app(struct ip_vs_app *inc)
621{ 613{
622 spin_lock_bh(&tcp_app_lock); 614 struct netns_ipvs *ipvs = net_ipvs(&init_net);
623 atomic_dec(&ip_vs_protocol_tcp.appcnt); 615 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
616
617 spin_lock_bh(&ipvs->tcp_app_lock);
618 atomic_dec(&pd->pp->appcnt);
624 list_del(&inc->p_list); 619 list_del(&inc->p_list);
625 spin_unlock_bh(&tcp_app_lock); 620 spin_unlock_bh(&ipvs->tcp_app_lock);
626} 621}
627 622
628 623
629static int 624static int
630tcp_app_conn_bind(struct ip_vs_conn *cp) 625tcp_app_conn_bind(struct ip_vs_conn *cp)
631{ 626{
627 struct netns_ipvs *ipvs = net_ipvs(&init_net);
632 int hash; 628 int hash;
633 struct ip_vs_app *inc; 629 struct ip_vs_app *inc;
634 int result = 0; 630 int result = 0;
@@ -640,12 +636,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
640 /* Lookup application incarnations and bind the right one */ 636 /* Lookup application incarnations and bind the right one */
641 hash = tcp_app_hashkey(cp->vport); 637 hash = tcp_app_hashkey(cp->vport);
642 638
643 spin_lock(&tcp_app_lock); 639 spin_lock(&ipvs->tcp_app_lock);
644 list_for_each_entry(inc, &tcp_apps[hash], p_list) { 640 list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
645 if (inc->port == cp->vport) { 641 if (inc->port == cp->vport) {
646 if (unlikely(!ip_vs_app_inc_get(inc))) 642 if (unlikely(!ip_vs_app_inc_get(inc)))
647 break; 643 break;
648 spin_unlock(&tcp_app_lock); 644 spin_unlock(&ipvs->tcp_app_lock);
649 645
650 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 646 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
651 "%s:%u to app %s on port %u\n", 647 "%s:%u to app %s on port %u\n",
@@ -662,7 +658,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
662 goto out; 658 goto out;
663 } 659 }
664 } 660 }
665 spin_unlock(&tcp_app_lock); 661 spin_unlock(&ipvs->tcp_app_lock);
666 662
667 out: 663 out:
668 return result; 664 return result;
@@ -672,24 +668,34 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
672/* 668/*
673 * Set LISTEN timeout. (ip_vs_conn_put will setup timer) 669 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
674 */ 670 */
675void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) 671void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
676{ 672{
673 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
674
677 spin_lock(&cp->lock); 675 spin_lock(&cp->lock);
678 cp->state = IP_VS_TCP_S_LISTEN; 676 cp->state = IP_VS_TCP_S_LISTEN;
679 cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; 677 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
678 : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
680 spin_unlock(&cp->lock); 679 spin_unlock(&cp->lock);
681} 680}
682 681
683 682/* ---------------------------------------------
684static void ip_vs_tcp_init(struct ip_vs_protocol *pp) 683 * timeouts is netns related now.
684 * ---------------------------------------------
685 */
686static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
685{ 687{
686 IP_VS_INIT_HASH_TABLE(tcp_apps); 688 struct netns_ipvs *ipvs = net_ipvs(net);
687 pp->timeout_table = tcp_timeouts;
688}
689 689
690 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
691 spin_lock_init(&ipvs->tcp_app_lock);
692 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
693 sizeof(tcp_timeouts));
694}
690 695
691static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) 696static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
692{ 697{
698 kfree(pd->timeout_table);
693} 699}
694 700
695 701
@@ -699,8 +705,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
699 .num_states = IP_VS_TCP_S_LAST, 705 .num_states = IP_VS_TCP_S_LAST,
700 .dont_defrag = 0, 706 .dont_defrag = 0,
701 .appcnt = ATOMIC_INIT(0), 707 .appcnt = ATOMIC_INIT(0),
702 .init = ip_vs_tcp_init, 708 .init = NULL,
703 .exit = ip_vs_tcp_exit, 709 .exit = NULL,
710 .init_netns = __ip_vs_tcp_init,
711 .exit_netns = __ip_vs_tcp_exit,
704 .register_app = tcp_register_app, 712 .register_app = tcp_register_app,
705 .unregister_app = tcp_unregister_app, 713 .unregister_app = tcp_unregister_app,
706 .conn_schedule = tcp_conn_schedule, 714 .conn_schedule = tcp_conn_schedule,
@@ -714,5 +722,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
714 .app_conn_bind = tcp_app_conn_bind, 722 .app_conn_bind = tcp_app_conn_bind,
715 .debug_packet = ip_vs_tcpudp_debug_packet, 723 .debug_packet = ip_vs_tcpudp_debug_packet,
716 .timeout_change = tcp_timeout_change, 724 .timeout_change = tcp_timeout_change,
717 .set_state_timeout = tcp_set_state_timeout,
718}; 725};