aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/bonding/bond_main.c
diff options
context:
space:
mode:
authorJiri Bohac <jbohac@suse.cz>2012-08-30 08:02:47 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-31 16:37:12 -0400
commitda210f559019ba1cd4ebee2a28ad158bfb95bab2 (patch)
tree9dd79ea671975ded4249dd6643f69de104c5580d /drivers/net/bonding/bond_main.c
parenteb7e0575966f9c84434e92c8a3f69719cc2e7571 (diff)
bonding: add some slack to arp monitoring time limits
Currently, all the time limits in the bonding ARP monitor are in multiples of arp_interval -- the time interval at which the ARP monitor is periodically scheduled. With a fast network round-trip and a little scheduling latency of the ARP monitor work, a limit of n*delta_in_ticks may effectively mean (n-1)*delta_in_ticks. This is fatal in case of n==1 (the link will stay down forever) and makes the behaviour non-deterministic in all the other cases. Add a delta_in_ticks/2 time slack to all the time limits. Signed-off-by: Jiri Bohac <jbohac@suse.cz> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r--drivers/net/bonding/bond_main.c31
1 files changed, 20 insertions, 11 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b24ce257ac7b..7858c58df4a3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
2811 arp_work.work); 2811 arp_work.work);
2812 struct slave *slave, *oldcurrent; 2812 struct slave *slave, *oldcurrent;
2813 int do_failover = 0; 2813 int do_failover = 0;
2814 int delta_in_ticks; 2814 int delta_in_ticks, extra_ticks;
2815 int i; 2815 int i;
2816 2816
2817 read_lock(&bond->lock); 2817 read_lock(&bond->lock);
2818 2818
2819 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 2819 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
2820 extra_ticks = delta_in_ticks / 2;
2820 2821
2821 if (bond->slave_cnt == 0) 2822 if (bond->slave_cnt == 0)
2822 goto re_arm; 2823 goto re_arm;
@@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
2839 if (slave->link != BOND_LINK_UP) { 2840 if (slave->link != BOND_LINK_UP) {
2840 if (time_in_range(jiffies, 2841 if (time_in_range(jiffies,
2841 trans_start - delta_in_ticks, 2842 trans_start - delta_in_ticks,
2842 trans_start + delta_in_ticks) && 2843 trans_start + delta_in_ticks + extra_ticks) &&
2843 time_in_range(jiffies, 2844 time_in_range(jiffies,
2844 slave->dev->last_rx - delta_in_ticks, 2845 slave->dev->last_rx - delta_in_ticks,
2845 slave->dev->last_rx + delta_in_ticks)) { 2846 slave->dev->last_rx + delta_in_ticks + extra_ticks)) {
2846 2847
2847 slave->link = BOND_LINK_UP; 2848 slave->link = BOND_LINK_UP;
2848 bond_set_active_slave(slave); 2849 bond_set_active_slave(slave);
@@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
2872 */ 2873 */
2873 if (!time_in_range(jiffies, 2874 if (!time_in_range(jiffies,
2874 trans_start - delta_in_ticks, 2875 trans_start - delta_in_ticks,
2875 trans_start + 2 * delta_in_ticks) || 2876 trans_start + 2 * delta_in_ticks + extra_ticks) ||
2876 !time_in_range(jiffies, 2877 !time_in_range(jiffies,
2877 slave->dev->last_rx - delta_in_ticks, 2878 slave->dev->last_rx - delta_in_ticks,
2878 slave->dev->last_rx + 2 * delta_in_ticks)) { 2879 slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) {
2879 2880
2880 slave->link = BOND_LINK_DOWN; 2881 slave->link = BOND_LINK_DOWN;
2881 bond_set_backup_slave(slave); 2882 bond_set_backup_slave(slave);
@@ -2933,6 +2934,14 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2933 struct slave *slave; 2934 struct slave *slave;
2934 int i, commit = 0; 2935 int i, commit = 0;
2935 unsigned long trans_start; 2936 unsigned long trans_start;
2937 int extra_ticks;
2938
2939 /* All the time comparisons below need some extra time. Otherwise, on
2940 * fast networks the ARP probe/reply may arrive within the same jiffy
2941 * as it was sent. Then, the next time the ARP monitor is run, one
2942 * arp_interval will already have passed in the comparisons.
2943 */
2944 extra_ticks = delta_in_ticks / 2;
2936 2945
2937 bond_for_each_slave(bond, slave, i) { 2946 bond_for_each_slave(bond, slave, i) {
2938 slave->new_link = BOND_LINK_NOCHANGE; 2947 slave->new_link = BOND_LINK_NOCHANGE;
@@ -2940,7 +2949,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2940 if (slave->link != BOND_LINK_UP) { 2949 if (slave->link != BOND_LINK_UP) {
2941 if (time_in_range(jiffies, 2950 if (time_in_range(jiffies,
2942 slave_last_rx(bond, slave) - delta_in_ticks, 2951 slave_last_rx(bond, slave) - delta_in_ticks,
2943 slave_last_rx(bond, slave) + delta_in_ticks)) { 2952 slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) {
2944 2953
2945 slave->new_link = BOND_LINK_UP; 2954 slave->new_link = BOND_LINK_UP;
2946 commit++; 2955 commit++;
@@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2956 */ 2965 */
2957 if (time_in_range(jiffies, 2966 if (time_in_range(jiffies,
2958 slave->jiffies - delta_in_ticks, 2967 slave->jiffies - delta_in_ticks,
2959 slave->jiffies + 2 * delta_in_ticks)) 2968 slave->jiffies + 2 * delta_in_ticks + extra_ticks))
2960 continue; 2969 continue;
2961 2970
2962 /* 2971 /*
@@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2976 !bond->current_arp_slave && 2985 !bond->current_arp_slave &&
2977 !time_in_range(jiffies, 2986 !time_in_range(jiffies,
2978 slave_last_rx(bond, slave) - delta_in_ticks, 2987 slave_last_rx(bond, slave) - delta_in_ticks,
2979 slave_last_rx(bond, slave) + 3 * delta_in_ticks)) { 2988 slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) {
2980 2989
2981 slave->new_link = BOND_LINK_DOWN; 2990 slave->new_link = BOND_LINK_DOWN;
2982 commit++; 2991 commit++;
@@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2992 if (bond_is_active_slave(slave) && 3001 if (bond_is_active_slave(slave) &&
2993 (!time_in_range(jiffies, 3002 (!time_in_range(jiffies,
2994 trans_start - delta_in_ticks, 3003 trans_start - delta_in_ticks,
2995 trans_start + 2 * delta_in_ticks) || 3004 trans_start + 2 * delta_in_ticks + extra_ticks) ||
2996 !time_in_range(jiffies, 3005 !time_in_range(jiffies,
2997 slave_last_rx(bond, slave) - delta_in_ticks, 3006 slave_last_rx(bond, slave) - delta_in_ticks,
2998 slave_last_rx(bond, slave) + 2 * delta_in_ticks))) { 3007 slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) {
2999 3008
3000 slave->new_link = BOND_LINK_DOWN; 3009 slave->new_link = BOND_LINK_DOWN;
3001 commit++; 3010 commit++;
@@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
3027 if ((!bond->curr_active_slave && 3036 if ((!bond->curr_active_slave &&
3028 time_in_range(jiffies, 3037 time_in_range(jiffies,
3029 trans_start - delta_in_ticks, 3038 trans_start - delta_in_ticks,
3030 trans_start + delta_in_ticks)) || 3039 trans_start + delta_in_ticks + delta_in_ticks / 2)) ||
3031 bond->curr_active_slave != slave) { 3040 bond->curr_active_slave != slave) {
3032 slave->link = BOND_LINK_UP; 3041 slave->link = BOND_LINK_UP;
3033 if (bond->current_arp_slave) { 3042 if (bond->current_arp_slave) {