aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJulian Anastasov <ja@ssi.bg>2012-04-24 16:46:40 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2012-05-08 13:40:10 -0400
commit749c42b620a9511782bc38d0a88702a42434529e (patch)
tree057b15f2bbd7fad96becdada1dc2ee775482e0b6 /net
parent1c003b1580e20ff9f500846677303a695b1837cc (diff)
ipvs: reduce sync rate with time thresholds
Add two new sysctl vars to control the sync rate with the main idea to reduce the rate for connection templates because currently it depends on the packet rate for controlled connections. This mechanism should be useful also for normal connections with high traffic. sync_refresh_period: in seconds, difference in reported connection timer that triggers new sync message. It can be used to avoid sync messages for the specified period (or half of the connection timeout if it is lower) if connection state is not changed from last sync. sync_retries: integer, 0..3, defines sync retries with period of sync_refresh_period/8. Useful to protect against loss of sync messages. Allow sysctl_sync_threshold to be used with sysctl_sync_period=0, so that only single sync message is sent if sync_refresh_period is also 0. Add new field "sync_endtime" in connection structure to hold the reported time when connection expires. The 2 lowest bits will represent the retry count. As the sysctl_sync_period now can be 0 use ACCESS_ONCE to avoid division by zero. Special thanks to Aleksey Chudov for being patient with me, for his extensive reports and helping in all tests. Signed-off-by: Julian Anastasov <ja@ssi.bg> Tested-by: Aleksey Chudov <aleksey.chudov@gmail.com> Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c30
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c25
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c121
4 files changed, 137 insertions, 46 deletions
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index fd74f881d04a..4f3205def28f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -762,7 +762,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
762static void ip_vs_conn_expire(unsigned long data) 762static void ip_vs_conn_expire(unsigned long data)
763{ 763{
764 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 764 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
765 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 765 struct net *net = ip_vs_conn_net(cp);
766 struct netns_ipvs *ipvs = net_ipvs(net);
766 767
767 cp->timeout = 60*HZ; 768 cp->timeout = 60*HZ;
768 769
@@ -827,6 +828,9 @@ static void ip_vs_conn_expire(unsigned long data)
827 atomic_read(&cp->refcnt)-1, 828 atomic_read(&cp->refcnt)-1,
828 atomic_read(&cp->n_control)); 829 atomic_read(&cp->n_control));
829 830
831 if (ipvs->sync_state & IP_VS_STATE_MASTER)
832 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
833
830 ip_vs_conn_put(cp); 834 ip_vs_conn_put(cp);
831} 835}
832 836
@@ -900,6 +904,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
900 /* Set its state and timeout */ 904 /* Set its state and timeout */
901 cp->state = 0; 905 cp->state = 0;
902 cp->timeout = 3*HZ; 906 cp->timeout = 3*HZ;
907 cp->sync_endtime = jiffies & ~3UL;
903 908
904 /* Bind its packet transmitter */ 909 /* Bind its packet transmitter */
905#ifdef CONFIG_IP_VS_IPV6 910#ifdef CONFIG_IP_VS_IPV6
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c8f36b96f44f..a54b018c6eea 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1613 else 1613 else
1614 pkts = atomic_add_return(1, &cp->in_pkts); 1614 pkts = atomic_add_return(1, &cp->in_pkts);
1615 1615
1616 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && 1616 if (ipvs->sync_state & IP_VS_STATE_MASTER)
1617 cp->protocol == IPPROTO_SCTP) { 1617 ip_vs_sync_conn(net, cp, pkts);
1618 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1619 (pkts % sysctl_sync_period(ipvs)
1620 == sysctl_sync_threshold(ipvs))) ||
1621 (cp->old_state != cp->state &&
1622 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1623 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
1624 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
1625 ip_vs_sync_conn(net, cp);
1626 goto out;
1627 }
1628 }
1629
1630 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1631 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1632 (((cp->protocol != IPPROTO_TCP ||
1633 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1634 (pkts % sysctl_sync_period(ipvs)
1635 == sysctl_sync_threshold(ipvs))) ||
1636 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1637 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1638 (cp->state == IP_VS_TCP_S_CLOSE) ||
1639 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
1640 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
1641 ip_vs_sync_conn(net, cp);
1642out:
1643 cp->old_state = cp->state;
1644 1618
1645 ip_vs_conn_put(cp); 1619 ip_vs_conn_put(cp);
1646 return ret; 1620 return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd3827ec25c9..a77b9bd433aa 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
1599} 1599}
1600 1600
1601#ifdef CONFIG_SYSCTL 1601#ifdef CONFIG_SYSCTL
1602
1603static int zero;
1604static int three = 3;
1605
1602static int 1606static int
1603proc_do_defense_mode(ctl_table *table, int write, 1607proc_do_defense_mode(ctl_table *table, int write,
1604 void __user *buffer, size_t *lenp, loff_t *ppos) 1608 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
1632 memcpy(val, valp, sizeof(val)); 1636 memcpy(val, valp, sizeof(val));
1633 1637
1634 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1638 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1635 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { 1639 if (write && (valp[0] < 0 || valp[1] < 0 ||
1640 (valp[0] >= valp[1] && valp[1]))) {
1636 /* Restore the correct value */ 1641 /* Restore the correct value */
1637 memcpy(valp, val, sizeof(val)); 1642 memcpy(valp, val, sizeof(val));
1638 } 1643 }
@@ -1755,6 +1760,20 @@ static struct ctl_table vs_vars[] = {
1755 .proc_handler = proc_do_sync_threshold, 1760 .proc_handler = proc_do_sync_threshold,
1756 }, 1761 },
1757 { 1762 {
1763 .procname = "sync_refresh_period",
1764 .maxlen = sizeof(int),
1765 .mode = 0644,
1766 .proc_handler = proc_dointvec_jiffies,
1767 },
1768 {
1769 .procname = "sync_retries",
1770 .maxlen = sizeof(int),
1771 .mode = 0644,
1772 .proc_handler = proc_dointvec_minmax,
1773 .extra1 = &zero,
1774 .extra2 = &three,
1775 },
1776 {
1758 .procname = "nat_icmp_send", 1777 .procname = "nat_icmp_send",
1759 .maxlen = sizeof(int), 1778 .maxlen = sizeof(int),
1760 .mode = 0644, 1779 .mode = 0644,
@@ -3678,6 +3697,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3678 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 3697 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3679 tbl[idx].data = &ipvs->sysctl_sync_threshold; 3698 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3680 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3699 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3700 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3701 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3702 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3703 tbl[idx++].data = &ipvs->sysctl_sync_retries;
3681 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3704 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3682 3705
3683 3706
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index b3235b230139..8d6a4219e904 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -451,11 +451,94 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
451 return sb; 451 return sb;
452} 452}
453 453
454/* Check if conn should be synced.
455 * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
456 * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
457 * sync_retries times with period of sync_refresh_period/8
458 * - (2) if both sync_refresh_period and sync_period are 0 send sync only
459 * for state changes or only once when pkts matches sync_threshold
460 * - (3) templates: rate can be reduced only with sync_refresh_period or
461 * with (2)
462 */
463static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
464 struct ip_vs_conn *cp, int pkts)
465{
466 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
467 unsigned long now = jiffies;
468 unsigned long n = (now + cp->timeout) & ~3UL;
469 unsigned int sync_refresh_period;
470 int sync_period;
471 int force;
472
473 /* Check if we sync in current state */
474 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
475 force = 0;
476 else if (likely(cp->protocol == IPPROTO_TCP)) {
477 if (!((1 << cp->state) &
478 ((1 << IP_VS_TCP_S_ESTABLISHED) |
479 (1 << IP_VS_TCP_S_FIN_WAIT) |
480 (1 << IP_VS_TCP_S_CLOSE) |
481 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
482 (1 << IP_VS_TCP_S_TIME_WAIT))))
483 return 0;
484 force = cp->state != cp->old_state;
485 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
486 goto set;
487 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
488 if (!((1 << cp->state) &
489 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
490 (1 << IP_VS_SCTP_S_CLOSED) |
491 (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
492 (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
493 return 0;
494 force = cp->state != cp->old_state;
495 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
496 goto set;
497 } else {
498 /* UDP or another protocol with single state */
499 force = 0;
500 }
501
502 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
503 if (sync_refresh_period > 0) {
504 long diff = n - orig;
505 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
506
507 /* Avoid sync if difference is below sync_refresh_period
508 * and below the half timeout.
509 */
510 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
511 int retries = orig & 3;
512
513 if (retries >= sysctl_sync_retries(ipvs))
514 return 0;
515 if (time_before(now, orig - cp->timeout +
516 (sync_refresh_period >> 3)))
517 return 0;
518 n |= retries + 1;
519 }
520 }
521 sync_period = sysctl_sync_period(ipvs);
522 if (sync_period > 0) {
523 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
524 pkts % sync_period != sysctl_sync_threshold(ipvs))
525 return 0;
526 } else if (sync_refresh_period <= 0 &&
527 pkts != sysctl_sync_threshold(ipvs))
528 return 0;
529
530set:
531 cp->old_state = cp->state;
532 n = cmpxchg(&cp->sync_endtime, orig, n);
533 return n == orig || force;
534}
535
454/* 536/*
455 * Version 0 , could be switched in by sys_ctl. 537 * Version 0 , could be switched in by sys_ctl.
456 * Add an ip_vs_conn information into the current sync_buff. 538 * Add an ip_vs_conn information into the current sync_buff.
457 */ 539 */
458void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) 540static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
541 int pkts)
459{ 542{
460 struct netns_ipvs *ipvs = net_ipvs(net); 543 struct netns_ipvs *ipvs = net_ipvs(net);
461 struct ip_vs_sync_mesg_v0 *m; 544 struct ip_vs_sync_mesg_v0 *m;
@@ -468,6 +551,9 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
468 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 551 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
469 return; 552 return;
470 553
554 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
555 return;
556
471 spin_lock(&ipvs->sync_buff_lock); 557 spin_lock(&ipvs->sync_buff_lock);
472 if (!ipvs->sync_buff) { 558 if (!ipvs->sync_buff) {
473 ipvs->sync_buff = 559 ipvs->sync_buff =
@@ -513,8 +599,14 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
513 spin_unlock(&ipvs->sync_buff_lock); 599 spin_unlock(&ipvs->sync_buff_lock);
514 600
515 /* synchronize its controller if it has */ 601 /* synchronize its controller if it has */
516 if (cp->control) 602 cp = cp->control;
517 ip_vs_sync_conn(net, cp->control); 603 if (cp) {
604 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
605 pkts = atomic_add_return(1, &cp->in_pkts);
606 else
607 pkts = sysctl_sync_threshold(ipvs);
608 ip_vs_sync_conn(net, cp->control, pkts);
609 }
518} 610}
519 611
520/* 612/*
@@ -522,7 +614,7 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
522 * Called by ip_vs_in. 614 * Called by ip_vs_in.
523 * Sending Version 1 messages 615 * Sending Version 1 messages
524 */ 616 */
525void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) 617void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
526{ 618{
527 struct netns_ipvs *ipvs = net_ipvs(net); 619 struct netns_ipvs *ipvs = net_ipvs(net);
528 struct ip_vs_sync_mesg *m; 620 struct ip_vs_sync_mesg *m;
@@ -532,13 +624,16 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
532 624
533 /* Handle old version of the protocol */ 625 /* Handle old version of the protocol */
534 if (sysctl_sync_ver(ipvs) == 0) { 626 if (sysctl_sync_ver(ipvs) == 0) {
535 ip_vs_sync_conn_v0(net, cp); 627 ip_vs_sync_conn_v0(net, cp, pkts);
536 return; 628 return;
537 } 629 }
538 /* Do not sync ONE PACKET */ 630 /* Do not sync ONE PACKET */
539 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 631 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
540 goto control; 632 goto control;
541sloop: 633sloop:
634 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
635 goto control;
636
542 /* Sanity checks */ 637 /* Sanity checks */
543 pe_name_len = 0; 638 pe_name_len = 0;
544 if (cp->pe_data_len) { 639 if (cp->pe_data_len) {
@@ -653,16 +748,10 @@ control:
653 cp = cp->control; 748 cp = cp->control;
654 if (!cp) 749 if (!cp)
655 return; 750 return;
656 /* 751 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
657 * Reduce sync rate for templates 752 pkts = atomic_add_return(1, &cp->in_pkts);
658 * i.e only increment in_pkts for Templates. 753 else
659 */ 754 pkts = sysctl_sync_threshold(ipvs);
660 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
661 int pkts = atomic_add_return(1, &cp->in_pkts);
662
663 if (pkts % sysctl_sync_period(ipvs) != 1)
664 return;
665 }
666 goto sloop; 755 goto sloop;
667} 756}
668 757
@@ -1494,7 +1583,7 @@ next_sync_buff(struct netns_ipvs *ipvs)
1494 if (sb) 1583 if (sb)
1495 return sb; 1584 return sb;
1496 /* Do not delay entries in buffer for more than 2 seconds */ 1585 /* Do not delay entries in buffer for more than 2 seconds */
1497 return get_curr_sync_buff(ipvs, 2 * HZ); 1586 return get_curr_sync_buff(ipvs, IPVS_SYNC_FLUSH_TIME);
1498} 1587}
1499 1588
1500static int sync_thread_master(void *data) 1589static int sync_thread_master(void *data)