aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/ip_vs.h30
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c30
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c25
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c121
5 files changed, 165 insertions, 48 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 30e43c8c0283..d3a4b934d521 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -504,6 +504,7 @@ struct ip_vs_conn {
504 * state transition triggerd 504 * state transition triggerd
505 * synchronization 505 * synchronization
506 */ 506 */
507 unsigned long sync_endtime; /* jiffies + sent_retries */
507 508
508 /* Control members */ 509 /* Control members */
509 struct ip_vs_conn *control; /* Master control connection */ 510 struct ip_vs_conn *control; /* Master control connection */
@@ -875,6 +876,8 @@ struct netns_ipvs {
875 int sysctl_expire_nodest_conn; 876 int sysctl_expire_nodest_conn;
876 int sysctl_expire_quiescent_template; 877 int sysctl_expire_quiescent_template;
877 int sysctl_sync_threshold[2]; 878 int sysctl_sync_threshold[2];
879 unsigned int sysctl_sync_refresh_period;
880 int sysctl_sync_retries;
878 int sysctl_nat_icmp_send; 881 int sysctl_nat_icmp_send;
879 882
880 /* ip_vs_lblc */ 883 /* ip_vs_lblc */
@@ -916,10 +919,13 @@ struct netns_ipvs {
916#define DEFAULT_SYNC_THRESHOLD 3 919#define DEFAULT_SYNC_THRESHOLD 3
917#define DEFAULT_SYNC_PERIOD 50 920#define DEFAULT_SYNC_PERIOD 50
918#define DEFAULT_SYNC_VER 1 921#define DEFAULT_SYNC_VER 1
922#define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ)
923#define DEFAULT_SYNC_RETRIES 0
919#define IPVS_SYNC_WAKEUP_RATE 8 924#define IPVS_SYNC_WAKEUP_RATE 8
920#define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4) 925#define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4)
921#define IPVS_SYNC_SEND_DELAY (HZ / 50) 926#define IPVS_SYNC_SEND_DELAY (HZ / 50)
922#define IPVS_SYNC_CHECK_PERIOD HZ 927#define IPVS_SYNC_CHECK_PERIOD HZ
928#define IPVS_SYNC_FLUSH_TIME (HZ * 2)
923 929
924#ifdef CONFIG_SYSCTL 930#ifdef CONFIG_SYSCTL
925 931
@@ -930,7 +936,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
930 936
931static inline int sysctl_sync_period(struct netns_ipvs *ipvs) 937static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
932{ 938{
933 return ipvs->sysctl_sync_threshold[1]; 939 return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
940}
941
942static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
943{
944 return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
945}
946
947static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
948{
949 return ipvs->sysctl_sync_retries;
934} 950}
935 951
936static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) 952static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
@@ -960,6 +976,16 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
960 return DEFAULT_SYNC_PERIOD; 976 return DEFAULT_SYNC_PERIOD;
961} 977}
962 978
979static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
980{
981 return DEFAULT_SYNC_REFRESH_PERIOD;
982}
983
984static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
985{
986 return DEFAULT_SYNC_RETRIES & 3;
987}
988
963static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) 989static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
964{ 990{
965 return DEFAULT_SYNC_VER; 991 return DEFAULT_SYNC_VER;
@@ -1248,7 +1274,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1248extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, 1274extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
1249 __u8 syncid); 1275 __u8 syncid);
1250extern int stop_sync_thread(struct net *net, int state); 1276extern int stop_sync_thread(struct net *net, int state);
1251extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); 1277extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
1252 1278
1253 1279
1254/* 1280/*
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index fd74f881d04a..4f3205def28f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -762,7 +762,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
762static void ip_vs_conn_expire(unsigned long data) 762static void ip_vs_conn_expire(unsigned long data)
763{ 763{
764 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 764 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
765 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 765 struct net *net = ip_vs_conn_net(cp);
766 struct netns_ipvs *ipvs = net_ipvs(net);
766 767
767 cp->timeout = 60*HZ; 768 cp->timeout = 60*HZ;
768 769
@@ -827,6 +828,9 @@ static void ip_vs_conn_expire(unsigned long data)
827 atomic_read(&cp->refcnt)-1, 828 atomic_read(&cp->refcnt)-1,
828 atomic_read(&cp->n_control)); 829 atomic_read(&cp->n_control));
829 830
831 if (ipvs->sync_state & IP_VS_STATE_MASTER)
832 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
833
830 ip_vs_conn_put(cp); 834 ip_vs_conn_put(cp);
831} 835}
832 836
@@ -900,6 +904,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
900 /* Set its state and timeout */ 904 /* Set its state and timeout */
901 cp->state = 0; 905 cp->state = 0;
902 cp->timeout = 3*HZ; 906 cp->timeout = 3*HZ;
907 cp->sync_endtime = jiffies & ~3UL;
903 908
904 /* Bind its packet transmitter */ 909 /* Bind its packet transmitter */
905#ifdef CONFIG_IP_VS_IPV6 910#ifdef CONFIG_IP_VS_IPV6
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c8f36b96f44f..a54b018c6eea 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1613 else 1613 else
1614 pkts = atomic_add_return(1, &cp->in_pkts); 1614 pkts = atomic_add_return(1, &cp->in_pkts);
1615 1615
1616 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && 1616 if (ipvs->sync_state & IP_VS_STATE_MASTER)
1617 cp->protocol == IPPROTO_SCTP) { 1617 ip_vs_sync_conn(net, cp, pkts);
1618 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1619 (pkts % sysctl_sync_period(ipvs)
1620 == sysctl_sync_threshold(ipvs))) ||
1621 (cp->old_state != cp->state &&
1622 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1623 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
1624 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
1625 ip_vs_sync_conn(net, cp);
1626 goto out;
1627 }
1628 }
1629
1630 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1631 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1632 (((cp->protocol != IPPROTO_TCP ||
1633 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1634 (pkts % sysctl_sync_period(ipvs)
1635 == sysctl_sync_threshold(ipvs))) ||
1636 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1637 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1638 (cp->state == IP_VS_TCP_S_CLOSE) ||
1639 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
1640 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
1641 ip_vs_sync_conn(net, cp);
1642out:
1643 cp->old_state = cp->state;
1644 1618
1645 ip_vs_conn_put(cp); 1619 ip_vs_conn_put(cp);
1646 return ret; 1620 return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd3827ec25c9..a77b9bd433aa 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
1599} 1599}
1600 1600
1601#ifdef CONFIG_SYSCTL 1601#ifdef CONFIG_SYSCTL
1602
1603static int zero;
1604static int three = 3;
1605
1602static int 1606static int
1603proc_do_defense_mode(ctl_table *table, int write, 1607proc_do_defense_mode(ctl_table *table, int write,
1604 void __user *buffer, size_t *lenp, loff_t *ppos) 1608 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
1632 memcpy(val, valp, sizeof(val)); 1636 memcpy(val, valp, sizeof(val));
1633 1637
1634 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1638 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1635 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { 1639 if (write && (valp[0] < 0 || valp[1] < 0 ||
1640 (valp[0] >= valp[1] && valp[1]))) {
1636 /* Restore the correct value */ 1641 /* Restore the correct value */
1637 memcpy(valp, val, sizeof(val)); 1642 memcpy(valp, val, sizeof(val));
1638 } 1643 }
@@ -1755,6 +1760,20 @@ static struct ctl_table vs_vars[] = {
1755 .proc_handler = proc_do_sync_threshold, 1760 .proc_handler = proc_do_sync_threshold,
1756 }, 1761 },
1757 { 1762 {
1763 .procname = "sync_refresh_period",
1764 .maxlen = sizeof(int),
1765 .mode = 0644,
1766 .proc_handler = proc_dointvec_jiffies,
1767 },
1768 {
1769 .procname = "sync_retries",
1770 .maxlen = sizeof(int),
1771 .mode = 0644,
1772 .proc_handler = proc_dointvec_minmax,
1773 .extra1 = &zero,
1774 .extra2 = &three,
1775 },
1776 {
1758 .procname = "nat_icmp_send", 1777 .procname = "nat_icmp_send",
1759 .maxlen = sizeof(int), 1778 .maxlen = sizeof(int),
1760 .mode = 0644, 1779 .mode = 0644,
@@ -3678,6 +3697,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3678 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 3697 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3679 tbl[idx].data = &ipvs->sysctl_sync_threshold; 3698 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3680 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3699 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3700 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3701 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3702 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3703 tbl[idx++].data = &ipvs->sysctl_sync_retries;
3681 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3704 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3682 3705
3683 3706
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index b3235b230139..8d6a4219e904 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -451,11 +451,94 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
451 return sb; 451 return sb;
452} 452}
453 453
454/* Check if conn should be synced.
455 * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
456 * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
457 * sync_retries times with period of sync_refresh_period/8
458 * - (2) if both sync_refresh_period and sync_period are 0 send sync only
459 * for state changes or only once when pkts matches sync_threshold
460 * - (3) templates: rate can be reduced only with sync_refresh_period or
461 * with (2)
462 */
463static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
464 struct ip_vs_conn *cp, int pkts)
465{
466 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
467 unsigned long now = jiffies;
468 unsigned long n = (now + cp->timeout) & ~3UL;
469 unsigned int sync_refresh_period;
470 int sync_period;
471 int force;
472
473 /* Check if we sync in current state */
474 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
475 force = 0;
476 else if (likely(cp->protocol == IPPROTO_TCP)) {
477 if (!((1 << cp->state) &
478 ((1 << IP_VS_TCP_S_ESTABLISHED) |
479 (1 << IP_VS_TCP_S_FIN_WAIT) |
480 (1 << IP_VS_TCP_S_CLOSE) |
481 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
482 (1 << IP_VS_TCP_S_TIME_WAIT))))
483 return 0;
484 force = cp->state != cp->old_state;
485 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
486 goto set;
487 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
488 if (!((1 << cp->state) &
489 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
490 (1 << IP_VS_SCTP_S_CLOSED) |
491 (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
492 (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
493 return 0;
494 force = cp->state != cp->old_state;
495 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
496 goto set;
497 } else {
498 /* UDP or another protocol with single state */
499 force = 0;
500 }
501
502 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
503 if (sync_refresh_period > 0) {
504 long diff = n - orig;
505 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
506
507 /* Avoid sync if difference is below sync_refresh_period
508 * and below the half timeout.
509 */
510 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
511 int retries = orig & 3;
512
513 if (retries >= sysctl_sync_retries(ipvs))
514 return 0;
515 if (time_before(now, orig - cp->timeout +
516 (sync_refresh_period >> 3)))
517 return 0;
518 n |= retries + 1;
519 }
520 }
521 sync_period = sysctl_sync_period(ipvs);
522 if (sync_period > 0) {
523 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
524 pkts % sync_period != sysctl_sync_threshold(ipvs))
525 return 0;
526 } else if (sync_refresh_period <= 0 &&
527 pkts != sysctl_sync_threshold(ipvs))
528 return 0;
529
530set:
531 cp->old_state = cp->state;
532 n = cmpxchg(&cp->sync_endtime, orig, n);
533 return n == orig || force;
534}
535
454/* 536/*
455 * Version 0 , could be switched in by sys_ctl. 537 * Version 0 , could be switched in by sys_ctl.
456 * Add an ip_vs_conn information into the current sync_buff. 538 * Add an ip_vs_conn information into the current sync_buff.
457 */ 539 */
458void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) 540static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
541 int pkts)
459{ 542{
460 struct netns_ipvs *ipvs = net_ipvs(net); 543 struct netns_ipvs *ipvs = net_ipvs(net);
461 struct ip_vs_sync_mesg_v0 *m; 544 struct ip_vs_sync_mesg_v0 *m;
@@ -468,6 +551,9 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
468 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 551 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
469 return; 552 return;
470 553
554 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
555 return;
556
471 spin_lock(&ipvs->sync_buff_lock); 557 spin_lock(&ipvs->sync_buff_lock);
472 if (!ipvs->sync_buff) { 558 if (!ipvs->sync_buff) {
473 ipvs->sync_buff = 559 ipvs->sync_buff =
@@ -513,8 +599,14 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
513 spin_unlock(&ipvs->sync_buff_lock); 599 spin_unlock(&ipvs->sync_buff_lock);
514 600
515 /* synchronize its controller if it has */ 601 /* synchronize its controller if it has */
516 if (cp->control) 602 cp = cp->control;
517 ip_vs_sync_conn(net, cp->control); 603 if (cp) {
604 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
605 pkts = atomic_add_return(1, &cp->in_pkts);
606 else
607 pkts = sysctl_sync_threshold(ipvs);
608 ip_vs_sync_conn(net, cp->control, pkts);
609 }
518} 610}
519 611
520/* 612/*
@@ -522,7 +614,7 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
522 * Called by ip_vs_in. 614 * Called by ip_vs_in.
523 * Sending Version 1 messages 615 * Sending Version 1 messages
524 */ 616 */
525void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) 617void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
526{ 618{
527 struct netns_ipvs *ipvs = net_ipvs(net); 619 struct netns_ipvs *ipvs = net_ipvs(net);
528 struct ip_vs_sync_mesg *m; 620 struct ip_vs_sync_mesg *m;
@@ -532,13 +624,16 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
532 624
533 /* Handle old version of the protocol */ 625 /* Handle old version of the protocol */
534 if (sysctl_sync_ver(ipvs) == 0) { 626 if (sysctl_sync_ver(ipvs) == 0) {
535 ip_vs_sync_conn_v0(net, cp); 627 ip_vs_sync_conn_v0(net, cp, pkts);
536 return; 628 return;
537 } 629 }
538 /* Do not sync ONE PACKET */ 630 /* Do not sync ONE PACKET */
539 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 631 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
540 goto control; 632 goto control;
541sloop: 633sloop:
634 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
635 goto control;
636
542 /* Sanity checks */ 637 /* Sanity checks */
543 pe_name_len = 0; 638 pe_name_len = 0;
544 if (cp->pe_data_len) { 639 if (cp->pe_data_len) {
@@ -653,16 +748,10 @@ control:
653 cp = cp->control; 748 cp = cp->control;
654 if (!cp) 749 if (!cp)
655 return; 750 return;
656 /* 751 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
657 * Reduce sync rate for templates 752 pkts = atomic_add_return(1, &cp->in_pkts);
658 * i.e only increment in_pkts for Templates. 753 else
659 */ 754 pkts = sysctl_sync_threshold(ipvs);
660 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
661 int pkts = atomic_add_return(1, &cp->in_pkts);
662
663 if (pkts % sysctl_sync_period(ipvs) != 1)
664 return;
665 }
666 goto sloop; 755 goto sloop;
667} 756}
668 757
@@ -1494,7 +1583,7 @@ next_sync_buff(struct netns_ipvs *ipvs)
1494 if (sb) 1583 if (sb)
1495 return sb; 1584 return sb;
1496 /* Do not delay entries in buffer for more than 2 seconds */ 1585 /* Do not delay entries in buffer for more than 2 seconds */
1497 return get_curr_sync_buff(ipvs, 2 * HZ); 1586 return get_curr_sync_buff(ipvs, IPVS_SYNC_FLUSH_TIME);
1498} 1587}
1499 1588
1500static int sync_thread_master(void *data) 1589static int sync_thread_master(void *data)