aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/bonding
diff options
context:
space:
mode:
authordingtianhong <dingtianhong@huawei.com>2014-02-25 22:05:23 -0500
committerDavid S. Miller <davem@davemloft.net>2014-02-26 16:02:56 -0500
commitb0929915e0356acedf59504521c097ecada88b19 (patch)
treedc6e143976647fb829bd75bdaeeffb9d8b201014 /drivers/net/bonding
parent5e5b066535f0ee58e5de3a2db5fb56fa3cd7e3b1 (diff)
bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for ab arp monitor
Veaceslav has reported and fix this problem by commit f2ebd477f141bc0 (bonding: restructure locking of bond_ab_arp_probe()). According Jay's opinion, the current solution is not very well, because the notification is to indicate that the interface has actually changed state in a meaningful way, but these calls in the ab ARP monitor are internal settings of the flags to allow the ARP monitor to search for a slave to become active when there are no active slaves. The flag setting to active or backup is to permit the ARP monitor's response logic to do the right thing when deciding if the test slave (current_arp_slave) is up or not. So the best way to fix the problem is that we should not send a notification when the slave is in testing state, and check the state at the end of the monitor, if the slave's state recover, avoid to send pointless notification twice. And RTNL is really a big lock, hold it regardless the slave's state changed or not when the current_active_slave is null will loss performance (every 100ms), so we should hold it only when the slave's state changed and need to notify. I revert the old commit and add new modifications. Cc: Jay Vosburgh <fubar@us.ibm.com> Cc: Veaceslav Falico <vfalico@redhat.com> Cc: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r--drivers/net/bonding/bond_3ad.c8
-rw-r--r--drivers/net/bonding/bond_main.c80
-rw-r--r--drivers/net/bonding/bonding.h13
3 files changed, 55 insertions, 46 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 6826e4f61060..dcde56057fe1 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2130,13 +2130,7 @@ re_arm:
2130 read_unlock(&bond->lock); 2130 read_unlock(&bond->lock);
2131 2131
2132 if (should_notify_rtnl && rtnl_trylock()) { 2132 if (should_notify_rtnl && rtnl_trylock()) {
2133 bond_for_each_slave(bond, slave, iter) { 2133 bond_slave_state_notify(bond);
2134 if (slave->should_notify) {
2135 rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0,
2136 GFP_KERNEL);
2137 slave->should_notify = 0;
2138 }
2139 }
2140 rtnl_unlock(); 2134 rtnl_unlock();
2141 } 2135 }
2142 queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); 2136 queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e02029bbf5cc..82b70ff1fd28 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2623,17 +2623,17 @@ do_failover:
2623 2623
2624/* 2624/*
2625 * Send ARP probes for active-backup mode ARP monitor. 2625 * Send ARP probes for active-backup mode ARP monitor.
2626 *
2627 * Called with rcu_read_lock hold.
2626 */ 2628 */
2627static bool bond_ab_arp_probe(struct bonding *bond) 2629static bool bond_ab_arp_probe(struct bonding *bond)
2628{ 2630{
2629 struct slave *slave, *before = NULL, *new_slave = NULL, 2631 struct slave *slave, *before = NULL, *new_slave = NULL,
2630 *curr_arp_slave, *curr_active_slave; 2632 *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
2633 *curr_active_slave = rcu_dereference(bond->curr_active_slave);
2631 struct list_head *iter; 2634 struct list_head *iter;
2632 bool found = false; 2635 bool found = false;
2633 2636 bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
2634 rcu_read_lock();
2635 curr_arp_slave = rcu_dereference(bond->current_arp_slave);
2636 curr_active_slave = rcu_dereference(bond->curr_active_slave);
2637 2637
2638 if (curr_arp_slave && curr_active_slave) 2638 if (curr_arp_slave && curr_active_slave)
2639 pr_info("PROBE: c_arp %s && cas %s BAD\n", 2639 pr_info("PROBE: c_arp %s && cas %s BAD\n",
@@ -2642,32 +2642,23 @@ static bool bond_ab_arp_probe(struct bonding *bond)
2642 2642
2643 if (curr_active_slave) { 2643 if (curr_active_slave) {
2644 bond_arp_send_all(bond, curr_active_slave); 2644 bond_arp_send_all(bond, curr_active_slave);
2645 rcu_read_unlock(); 2645 return should_notify_rtnl;
2646 return true;
2647 } 2646 }
2648 rcu_read_unlock();
2649 2647
2650 /* if we don't have a curr_active_slave, search for the next available 2648 /* if we don't have a curr_active_slave, search for the next available
2651 * backup slave from the current_arp_slave and make it the candidate 2649 * backup slave from the current_arp_slave and make it the candidate
2652 * for becoming the curr_active_slave 2650 * for becoming the curr_active_slave
2653 */ 2651 */
2654 2652
2655 if (!rtnl_trylock())
2656 return false;
2657 /* curr_arp_slave might have gone away */
2658 curr_arp_slave = ACCESS_ONCE(bond->current_arp_slave);
2659
2660 if (!curr_arp_slave) { 2653 if (!curr_arp_slave) {
2661 curr_arp_slave = bond_first_slave(bond); 2654 curr_arp_slave = bond_first_slave_rcu(bond);
2662 if (!curr_arp_slave) { 2655 if (!curr_arp_slave)
2663 rtnl_unlock(); 2656 return should_notify_rtnl;
2664 return true;
2665 }
2666 } 2657 }
2667 2658
2668 bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_NOW); 2659 bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER);
2669 2660
2670 bond_for_each_slave(bond, slave, iter) { 2661 bond_for_each_slave_rcu(bond, slave, iter) {
2671 if (!found && !before && IS_UP(slave->dev)) 2662 if (!found && !before && IS_UP(slave->dev))
2672 before = slave; 2663 before = slave;
2673 2664
@@ -2686,7 +2677,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
2686 slave->link_failure_count++; 2677 slave->link_failure_count++;
2687 2678
2688 bond_set_slave_inactive_flags(slave, 2679 bond_set_slave_inactive_flags(slave,
2689 BOND_SLAVE_NOTIFY_NOW); 2680 BOND_SLAVE_NOTIFY_LATER);
2690 2681
2691 pr_info("%s: backup interface %s is now down.\n", 2682 pr_info("%s: backup interface %s is now down.\n",
2692 bond->dev->name, slave->dev->name); 2683 bond->dev->name, slave->dev->name);
@@ -2698,26 +2689,31 @@ static bool bond_ab_arp_probe(struct bonding *bond)
2698 if (!new_slave && before) 2689 if (!new_slave && before)
2699 new_slave = before; 2690 new_slave = before;
2700 2691
2701 if (!new_slave) { 2692 if (!new_slave)
2702 rtnl_unlock(); 2693 goto check_state;
2703 return true;
2704 }
2705 2694
2706 new_slave->link = BOND_LINK_BACK; 2695 new_slave->link = BOND_LINK_BACK;
2707 bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); 2696 bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER);
2708 bond_arp_send_all(bond, new_slave); 2697 bond_arp_send_all(bond, new_slave);
2709 new_slave->jiffies = jiffies; 2698 new_slave->jiffies = jiffies;
2710 rcu_assign_pointer(bond->current_arp_slave, new_slave); 2699 rcu_assign_pointer(bond->current_arp_slave, new_slave);
2711 rtnl_unlock();
2712 2700
2713 return true; 2701check_state:
2702 bond_for_each_slave_rcu(bond, slave, iter) {
2703 if (slave->should_notify) {
2704 should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW;
2705 break;
2706 }
2707 }
2708 return should_notify_rtnl;
2714} 2709}
2715 2710
2716static void bond_activebackup_arp_mon(struct work_struct *work) 2711static void bond_activebackup_arp_mon(struct work_struct *work)
2717{ 2712{
2718 struct bonding *bond = container_of(work, struct bonding, 2713 struct bonding *bond = container_of(work, struct bonding,
2719 arp_work.work); 2714 arp_work.work);
2720 bool should_notify_peers = false, should_commit = false; 2715 bool should_notify_peers = false;
2716 bool should_notify_rtnl = false;
2721 int delta_in_ticks; 2717 int delta_in_ticks;
2722 2718
2723 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 2719 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
@@ -2726,11 +2722,12 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
2726 goto re_arm; 2722 goto re_arm;
2727 2723
2728 rcu_read_lock(); 2724 rcu_read_lock();
2725
2729 should_notify_peers = bond_should_notify_peers(bond); 2726 should_notify_peers = bond_should_notify_peers(bond);
2730 should_commit = bond_ab_arp_inspect(bond);
2731 rcu_read_unlock();
2732 2727
2733 if (should_commit) { 2728 if (bond_ab_arp_inspect(bond)) {
2729 rcu_read_unlock();
2730
2734 /* Race avoidance with bond_close flush of workqueue */ 2731 /* Race avoidance with bond_close flush of workqueue */
2735 if (!rtnl_trylock()) { 2732 if (!rtnl_trylock()) {
2736 delta_in_ticks = 1; 2733 delta_in_ticks = 1;
@@ -2739,23 +2736,28 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
2739 } 2736 }
2740 2737
2741 bond_ab_arp_commit(bond); 2738 bond_ab_arp_commit(bond);
2739
2742 rtnl_unlock(); 2740 rtnl_unlock();
2741 rcu_read_lock();
2743 } 2742 }
2744 2743
2745 if (!bond_ab_arp_probe(bond)) { 2744 should_notify_rtnl = bond_ab_arp_probe(bond);
2746 /* rtnl locking failed, re-arm */ 2745 rcu_read_unlock();
2747 delta_in_ticks = 1;
2748 should_notify_peers = false;
2749 }
2750 2746
2751re_arm: 2747re_arm:
2752 if (bond->params.arp_interval) 2748 if (bond->params.arp_interval)
2753 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 2749 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
2754 2750
2755 if (should_notify_peers) { 2751 if (should_notify_peers || should_notify_rtnl) {
2756 if (!rtnl_trylock()) 2752 if (!rtnl_trylock())
2757 return; 2753 return;
2758 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); 2754
2755 if (should_notify_peers)
2756 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
2757 bond->dev);
2758 if (should_notify_rtnl)
2759 bond_slave_state_notify(bond);
2760
2759 rtnl_unlock(); 2761 rtnl_unlock();
2760 } 2762 }
2761} 2763}
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 9b280ac8c454..2b0fdec695f7 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -335,6 +335,19 @@ static inline void bond_slave_state_change(struct bonding *bond)
335 } 335 }
336} 336}
337 337
338static inline void bond_slave_state_notify(struct bonding *bond)
339{
340 struct list_head *iter;
341 struct slave *tmp;
342
343 bond_for_each_slave(bond, tmp, iter) {
344 if (tmp->should_notify) {
345 rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_KERNEL);
346 tmp->should_notify = 0;
347 }
348 }
349}
350
338static inline int bond_slave_state(struct slave *slave) 351static inline int bond_slave_state(struct slave *slave)
339{ 352{
340 return slave->backup; 353 return slave->backup;