aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/bonding
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 21:40:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 21:40:54 -0400
commit35a9ad8af0bb0fa3525e6d0d20e32551d226f38e (patch)
tree15b4b33206818886d9cff371fd2163e073b70568 /drivers/net/bonding
parentd5935b07da53f74726e2a65dd4281d0f2c70e5d4 (diff)
parent64b1f00a0830e1c53874067273a096b228d83d36 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Most notable changes in here: 1) By far the biggest accomplishment, thanks to a large range of contributors, is the addition of multi-send for transmit. This is the result of discussions back in Chicago, and the hard work of several individuals. Now, when the ->ndo_start_xmit() method of a driver sees skb->xmit_more as true, it can choose to defer the doorbell telling the driver to start processing the new TX queue entires. skb->xmit_more means that the generic networking is guaranteed to call the driver immediately with another SKB to send. There is logic added to the qdisc layer to dequeue multiple packets at a time, and the handling mis-predicted offloads in software is now done with no locks held. Finally, pktgen is extended to have a "burst" parameter that can be used to test a multi-send implementation. Several drivers have xmit_more support: i40e, igb, ixgbe, mlx4, virtio_net Adding support is almost trivial, so export more drivers to support this optimization soon. I want to thank, in no particular or implied order, Jesper Dangaard Brouer, Eric Dumazet, Alexander Duyck, Tom Herbert, Jamal Hadi Salim, John Fastabend, Florian Westphal, Daniel Borkmann, David Tat, Hannes Frederic Sowa, and Rusty Russell. 2) PTP and timestamping support in bnx2x, from Michal Kalderon. 3) Allow adjusting the rx_copybreak threshold for a driver via ethtool, and add rx_copybreak support to enic driver. From Govindarajulu Varadarajan. 4) Significant enhancements to the generic PHY layer and the bcm7xxx driver in particular (EEE support, auto power down, etc.) from Florian Fainelli. 5) Allow raw buffers to be used for flow dissection, allowing drivers to determine the optimal "linear pull" size for devices that DMA into pools of pages. The objective is to get exactly the necessary amount of headers into the linear SKB area pre-pulled, but no more. The new interface drivers use is eth_get_headlen(). From WANG Cong, with driver conversions (several had their own by-hand duplicated implementations) by Alexander Duyck and Eric Dumazet. 6) Support checksumming more smoothly and efficiently for encapsulations, and add "foo over UDP" facility. From Tom Herbert. 7) Add Broadcom SF2 switch driver to DSA layer, from Florian Fainelli. 8) eBPF now can load programs via a system call and has an extensive testsuite. Alexei Starovoitov and Daniel Borkmann. 9) Major overhaul of the packet scheduler to use RCU in several major areas such as the classifiers and rate estimators. From John Fastabend. 10) Add driver for Intel FM10000 Ethernet Switch, from Alexander Duyck. 11) Rearrange TCP_SKB_CB() to reduce cache line misses, from Eric Dumazet. 12) Add Datacenter TCP congestion control algorithm support, From Florian Westphal. 13) Reorganize sk_buff so that __copy_skb_header() is significantly faster. From Eric Dumazet" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1558 commits) netlabel: directly return netlbl_unlabel_genl_init() net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers net: description of dma_cookie cause make xmldocs warning cxgb4: clean up a type issue cxgb4: potential shift wrapping bug i40e: skb->xmit_more support net: fs_enet: Add NAPI TX net: fs_enet: Remove non NAPI RX r8169:add support for RTL8168EP net_sched: copy exts->type in tcf_exts_change() wimax: convert printk to pr_foo() af_unix: remove 0 assignment on static ipv6: Do not warn for informational ICMP messages, regardless of type. Update Intel Ethernet Driver maintainers list bridge: Save frag_max_size between PRE_ROUTING and POST_ROUTING tipc: fix bug in multicast congestion handling net: better IFF_XMIT_DST_RELEASE support net/mlx4_en: remove NETDEV_TX_BUSY 3c59x: fix bad split of cpu_to_le32(pci_map_single()) net: bcmgenet: fix Tx ring priority programming ...
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r--drivers/net/bonding/bond_3ad.c230
-rw-r--r--drivers/net/bonding/bond_3ad.h1
-rw-r--r--drivers/net/bonding/bond_alb.c305
-rw-r--r--drivers/net/bonding/bond_alb.h10
-rw-r--r--drivers/net/bonding/bond_debugfs.c8
-rw-r--r--drivers/net/bonding/bond_main.c629
-rw-r--r--drivers/net/bonding/bond_netlink.c41
-rw-r--r--drivers/net/bonding/bond_options.c39
-rw-r--r--drivers/net/bonding/bond_procfs.c27
-rw-r--r--drivers/net/bonding/bond_sysfs.c11
-rw-r--r--drivers/net/bonding/bonding.h45
11 files changed, 579 insertions, 767 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index ee2c73a9de39..2110215f3528 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
102/* ================= main 802.3ad protocol functions ================== */ 102/* ================= main 802.3ad protocol functions ================== */
103static int ad_lacpdu_send(struct port *port); 103static int ad_lacpdu_send(struct port *port);
104static int ad_marker_send(struct port *port, struct bond_marker *marker); 104static int ad_marker_send(struct port *port, struct bond_marker *marker);
105static void ad_mux_machine(struct port *port); 105static void ad_mux_machine(struct port *port, bool *update_slave_arr);
106static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); 106static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
107static void ad_tx_machine(struct port *port); 107static void ad_tx_machine(struct port *port);
108static void ad_periodic_machine(struct port *port); 108static void ad_periodic_machine(struct port *port);
109static void ad_port_selection_logic(struct port *port); 109static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
110static void ad_agg_selection_logic(struct aggregator *aggregator); 110static void ad_agg_selection_logic(struct aggregator *aggregator,
111 bool *update_slave_arr);
111static void ad_clear_agg(struct aggregator *aggregator); 112static void ad_clear_agg(struct aggregator *aggregator);
112static void ad_initialize_agg(struct aggregator *aggregator); 113static void ad_initialize_agg(struct aggregator *aggregator);
113static void ad_initialize_port(struct port *port, int lacp_fast); 114static void ad_initialize_port(struct port *port, int lacp_fast);
114static void ad_enable_collecting_distributing(struct port *port); 115static void ad_enable_collecting_distributing(struct port *port,
115static void ad_disable_collecting_distributing(struct port *port); 116 bool *update_slave_arr);
117static void ad_disable_collecting_distributing(struct port *port,
118 bool *update_slave_arr);
116static void ad_marker_info_received(struct bond_marker *marker_info, 119static void ad_marker_info_received(struct bond_marker *marker_info,
117 struct port *port); 120 struct port *port);
118static void ad_marker_response_received(struct bond_marker *marker, 121static void ad_marker_response_received(struct bond_marker *marker,
@@ -234,24 +237,6 @@ static inline int __check_agg_selection_timer(struct port *port)
234} 237}
235 238
236/** 239/**
237 * __get_state_machine_lock - lock the port's state machines
238 * @port: the port we're looking at
239 */
240static inline void __get_state_machine_lock(struct port *port)
241{
242 spin_lock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));
243}
244
245/**
246 * __release_state_machine_lock - unlock the port's state machines
247 * @port: the port we're looking at
248 */
249static inline void __release_state_machine_lock(struct port *port)
250{
251 spin_unlock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));
252}
253
254/**
255 * __get_link_speed - get a port's speed 240 * __get_link_speed - get a port's speed
256 * @port: the port we're looking at 241 * @port: the port we're looking at
257 * 242 *
@@ -315,15 +300,14 @@ static u16 __get_link_speed(struct port *port)
315static u8 __get_duplex(struct port *port) 300static u8 __get_duplex(struct port *port)
316{ 301{
317 struct slave *slave = port->slave; 302 struct slave *slave = port->slave;
318
319 u8 retval; 303 u8 retval;
320 304
321 /* handling a special case: when the configuration starts with 305 /* handling a special case: when the configuration starts with
322 * link down, it sets the duplex to 0. 306 * link down, it sets the duplex to 0.
323 */ 307 */
324 if (slave->link != BOND_LINK_UP) 308 if (slave->link != BOND_LINK_UP) {
325 retval = 0x0; 309 retval = 0x0;
326 else { 310 } else {
327 switch (slave->duplex) { 311 switch (slave->duplex) {
328 case DUPLEX_FULL: 312 case DUPLEX_FULL:
329 retval = 0x1; 313 retval = 0x1;
@@ -341,16 +325,6 @@ static u8 __get_duplex(struct port *port)
341 return retval; 325 return retval;
342} 326}
343 327
344/**
345 * __initialize_port_locks - initialize a port's STATE machine spinlock
346 * @port: the slave of the port we're looking at
347 */
348static inline void __initialize_port_locks(struct slave *slave)
349{
350 /* make sure it isn't called twice */
351 spin_lock_init(&(SLAVE_AD_INFO(slave)->state_machine_lock));
352}
353
354/* Conversions */ 328/* Conversions */
355 329
356/** 330/**
@@ -825,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
825/** 799/**
826 * ad_mux_machine - handle a port's mux state machine 800 * ad_mux_machine - handle a port's mux state machine
827 * @port: the port we're looking at 801 * @port: the port we're looking at
802 * @update_slave_arr: Does slave array need update?
828 */ 803 */
829static void ad_mux_machine(struct port *port) 804static void ad_mux_machine(struct port *port, bool *update_slave_arr)
830{ 805{
831 mux_states_t last_state; 806 mux_states_t last_state;
832 807
@@ -930,7 +905,8 @@ static void ad_mux_machine(struct port *port)
930 switch (port->sm_mux_state) { 905 switch (port->sm_mux_state) {
931 case AD_MUX_DETACHED: 906 case AD_MUX_DETACHED:
932 port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; 907 port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
933 ad_disable_collecting_distributing(port); 908 ad_disable_collecting_distributing(port,
909 update_slave_arr);
934 port->actor_oper_port_state &= ~AD_STATE_COLLECTING; 910 port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
935 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; 911 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
936 port->ntt = true; 912 port->ntt = true;
@@ -942,13 +918,15 @@ static void ad_mux_machine(struct port *port)
942 port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; 918 port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
943 port->actor_oper_port_state &= ~AD_STATE_COLLECTING; 919 port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
944 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; 920 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
945 ad_disable_collecting_distributing(port); 921 ad_disable_collecting_distributing(port,
922 update_slave_arr);
946 port->ntt = true; 923 port->ntt = true;
947 break; 924 break;
948 case AD_MUX_COLLECTING_DISTRIBUTING: 925 case AD_MUX_COLLECTING_DISTRIBUTING:
949 port->actor_oper_port_state |= AD_STATE_COLLECTING; 926 port->actor_oper_port_state |= AD_STATE_COLLECTING;
950 port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; 927 port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
951 ad_enable_collecting_distributing(port); 928 ad_enable_collecting_distributing(port,
929 update_slave_arr);
952 port->ntt = true; 930 port->ntt = true;
953 break; 931 break;
954 default: 932 default:
@@ -1216,12 +1194,13 @@ static void ad_periodic_machine(struct port *port)
1216/** 1194/**
1217 * ad_port_selection_logic - select aggregation groups 1195 * ad_port_selection_logic - select aggregation groups
1218 * @port: the port we're looking at 1196 * @port: the port we're looking at
1197 * @update_slave_arr: Does slave array need update?
1219 * 1198 *
1220 * Select aggregation groups, and assign each port for it's aggregetor. The 1199 * Select aggregation groups, and assign each port for it's aggregetor. The
1221 * selection logic is called in the inititalization (after all the handshkes), 1200 * selection logic is called in the inititalization (after all the handshkes),
1222 * and after every lacpdu receive (if selected is off). 1201 * and after every lacpdu receive (if selected is off).
1223 */ 1202 */
1224static void ad_port_selection_logic(struct port *port) 1203static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
1225{ 1204{
1226 struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; 1205 struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;
1227 struct port *last_port = NULL, *curr_port; 1206 struct port *last_port = NULL, *curr_port;
@@ -1376,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port)
1376 __agg_ports_are_ready(port->aggregator)); 1355 __agg_ports_are_ready(port->aggregator));
1377 1356
1378 aggregator = __get_first_agg(port); 1357 aggregator = __get_first_agg(port);
1379 ad_agg_selection_logic(aggregator); 1358 ad_agg_selection_logic(aggregator, update_slave_arr);
1380} 1359}
1381 1360
1382/* Decide if "agg" is a better choice for the new active aggregator that 1361/* Decide if "agg" is a better choice for the new active aggregator that
@@ -1464,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg)
1464/** 1443/**
1465 * ad_agg_selection_logic - select an aggregation group for a team 1444 * ad_agg_selection_logic - select an aggregation group for a team
1466 * @aggregator: the aggregator we're looking at 1445 * @aggregator: the aggregator we're looking at
1446 * @update_slave_arr: Does slave array need update?
1467 * 1447 *
1468 * It is assumed that only one aggregator may be selected for a team. 1448 * It is assumed that only one aggregator may be selected for a team.
1469 * 1449 *
@@ -1486,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg)
1486 * __get_active_agg() won't work correctly. This function should be better 1466 * __get_active_agg() won't work correctly. This function should be better
1487 * called with the bond itself, and retrieve the first agg from it. 1467 * called with the bond itself, and retrieve the first agg from it.
1488 */ 1468 */
1489static void ad_agg_selection_logic(struct aggregator *agg) 1469static void ad_agg_selection_logic(struct aggregator *agg,
1470 bool *update_slave_arr)
1490{ 1471{
1491 struct aggregator *best, *active, *origin; 1472 struct aggregator *best, *active, *origin;
1492 struct bonding *bond = agg->slave->bond; 1473 struct bonding *bond = agg->slave->bond;
@@ -1579,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
1579 __disable_port(port); 1560 __disable_port(port);
1580 } 1561 }
1581 } 1562 }
1563 /* Slave array needs update. */
1564 *update_slave_arr = true;
1582 } 1565 }
1583 1566
1584 /* if the selected aggregator is of join individuals 1567 /* if the selected aggregator is of join individuals
@@ -1707,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
1707/** 1690/**
1708 * ad_enable_collecting_distributing - enable a port's transmit/receive 1691 * ad_enable_collecting_distributing - enable a port's transmit/receive
1709 * @port: the port we're looking at 1692 * @port: the port we're looking at
1693 * @update_slave_arr: Does slave array need update?
1710 * 1694 *
1711 * Enable @port if it's in an active aggregator 1695 * Enable @port if it's in an active aggregator
1712 */ 1696 */
1713static void ad_enable_collecting_distributing(struct port *port) 1697static void ad_enable_collecting_distributing(struct port *port,
1698 bool *update_slave_arr)
1714{ 1699{
1715 if (port->aggregator->is_active) { 1700 if (port->aggregator->is_active) {
1716 pr_debug("Enabling port %d(LAG %d)\n", 1701 pr_debug("Enabling port %d(LAG %d)\n",
1717 port->actor_port_number, 1702 port->actor_port_number,
1718 port->aggregator->aggregator_identifier); 1703 port->aggregator->aggregator_identifier);
1719 __enable_port(port); 1704 __enable_port(port);
1705 /* Slave array needs update */
1706 *update_slave_arr = true;
1720 } 1707 }
1721} 1708}
1722 1709
1723/** 1710/**
1724 * ad_disable_collecting_distributing - disable a port's transmit/receive 1711 * ad_disable_collecting_distributing - disable a port's transmit/receive
1725 * @port: the port we're looking at 1712 * @port: the port we're looking at
1713 * @update_slave_arr: Does slave array need update?
1726 */ 1714 */
1727static void ad_disable_collecting_distributing(struct port *port) 1715static void ad_disable_collecting_distributing(struct port *port,
1716 bool *update_slave_arr)
1728{ 1717{
1729 if (port->aggregator && 1718 if (port->aggregator &&
1730 !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system), 1719 !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system),
@@ -1733,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port)
1733 port->actor_port_number, 1722 port->actor_port_number,
1734 port->aggregator->aggregator_identifier); 1723 port->aggregator->aggregator_identifier);
1735 __disable_port(port); 1724 __disable_port(port);
1725 /* Slave array needs an update */
1726 *update_slave_arr = true;
1736 } 1727 }
1737} 1728}
1738 1729
@@ -1843,7 +1834,6 @@ void bond_3ad_bind_slave(struct slave *slave)
1843 1834
1844 ad_initialize_port(port, bond->params.lacp_fast); 1835 ad_initialize_port(port, bond->params.lacp_fast);
1845 1836
1846 __initialize_port_locks(slave);
1847 port->slave = slave; 1837 port->slave = slave;
1848 port->actor_port_number = SLAVE_AD_INFO(slave)->id; 1838 port->actor_port_number = SLAVE_AD_INFO(slave)->id;
1849 /* key is determined according to the link speed, duplex and user key(which 1839 /* key is determined according to the link speed, duplex and user key(which
@@ -1898,7 +1888,10 @@ void bond_3ad_unbind_slave(struct slave *slave)
1898 struct bonding *bond = slave->bond; 1888 struct bonding *bond = slave->bond;
1899 struct slave *slave_iter; 1889 struct slave *slave_iter;
1900 struct list_head *iter; 1890 struct list_head *iter;
1891 bool dummy_slave_update; /* Ignore this value as caller updates array */
1901 1892
1893 /* Sync against bond_3ad_state_machine_handler() */
1894 spin_lock_bh(&bond->mode_lock);
1902 aggregator = &(SLAVE_AD_INFO(slave)->aggregator); 1895 aggregator = &(SLAVE_AD_INFO(slave)->aggregator);
1903 port = &(SLAVE_AD_INFO(slave)->port); 1896 port = &(SLAVE_AD_INFO(slave)->port);
1904 1897
@@ -1906,7 +1899,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
1906 if (!port->slave) { 1899 if (!port->slave) {
1907 netdev_warn(bond->dev, "Trying to unbind an uninitialized port on %s\n", 1900 netdev_warn(bond->dev, "Trying to unbind an uninitialized port on %s\n",
1908 slave->dev->name); 1901 slave->dev->name);
1909 return; 1902 goto out;
1910 } 1903 }
1911 1904
1912 netdev_dbg(bond->dev, "Unbinding Link Aggregation Group %d\n", 1905 netdev_dbg(bond->dev, "Unbinding Link Aggregation Group %d\n",
@@ -1979,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
1979 ad_clear_agg(aggregator); 1972 ad_clear_agg(aggregator);
1980 1973
1981 if (select_new_active_agg) 1974 if (select_new_active_agg)
1982 ad_agg_selection_logic(__get_first_agg(port)); 1975 ad_agg_selection_logic(__get_first_agg(port),
1976 &dummy_slave_update);
1983 } else { 1977 } else {
1984 netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); 1978 netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n");
1985 } 1979 }
@@ -1994,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
1994 /* select new active aggregator */ 1988 /* select new active aggregator */
1995 temp_aggregator = __get_first_agg(port); 1989 temp_aggregator = __get_first_agg(port);
1996 if (temp_aggregator) 1990 if (temp_aggregator)
1997 ad_agg_selection_logic(temp_aggregator); 1991 ad_agg_selection_logic(temp_aggregator,
1992 &dummy_slave_update);
1998 } 1993 }
1999 } 1994 }
2000 } 1995 }
@@ -2024,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
2024 if (select_new_active_agg) { 2019 if (select_new_active_agg) {
2025 netdev_info(bond->dev, "Removing an active aggregator\n"); 2020 netdev_info(bond->dev, "Removing an active aggregator\n");
2026 /* select new active aggregator */ 2021 /* select new active aggregator */
2027 ad_agg_selection_logic(__get_first_agg(port)); 2022 ad_agg_selection_logic(__get_first_agg(port),
2023 &dummy_slave_update);
2028 } 2024 }
2029 } 2025 }
2030 break; 2026 break;
@@ -2032,6 +2028,9 @@ void bond_3ad_unbind_slave(struct slave *slave)
2032 } 2028 }
2033 } 2029 }
2034 port->slave = NULL; 2030 port->slave = NULL;
2031
2032out:
2033 spin_unlock_bh(&bond->mode_lock);
2035} 2034}
2036 2035
2037/** 2036/**
@@ -2056,8 +2055,13 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2056 struct slave *slave; 2055 struct slave *slave;
2057 struct port *port; 2056 struct port *port;
2058 bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; 2057 bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
2058 bool update_slave_arr = false;
2059 2059
2060 read_lock(&bond->lock); 2060 /* Lock to protect data accessed by all (e.g., port->sm_vars) and
2061 * against running with bond_3ad_unbind_slave. ad_rx_machine may run
2062 * concurrently due to incoming LACPDU as well.
2063 */
2064 spin_lock_bh(&bond->mode_lock);
2061 rcu_read_lock(); 2065 rcu_read_lock();
2062 2066
2063 /* check if there are any slaves */ 2067 /* check if there are any slaves */
@@ -2079,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2079 } 2083 }
2080 2084
2081 aggregator = __get_first_agg(port); 2085 aggregator = __get_first_agg(port);
2082 ad_agg_selection_logic(aggregator); 2086 ad_agg_selection_logic(aggregator, &update_slave_arr);
2083 } 2087 }
2084 bond_3ad_set_carrier(bond); 2088 bond_3ad_set_carrier(bond);
2085 } 2089 }
@@ -2093,23 +2097,15 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2093 goto re_arm; 2097 goto re_arm;
2094 } 2098 }
2095 2099
2096 /* Lock around state machines to protect data accessed
2097 * by all (e.g., port->sm_vars). ad_rx_machine may run
2098 * concurrently due to incoming LACPDU.
2099 */
2100 __get_state_machine_lock(port);
2101
2102 ad_rx_machine(NULL, port); 2100 ad_rx_machine(NULL, port);
2103 ad_periodic_machine(port); 2101 ad_periodic_machine(port);
2104 ad_port_selection_logic(port); 2102 ad_port_selection_logic(port, &update_slave_arr);
2105 ad_mux_machine(port); 2103 ad_mux_machine(port, &update_slave_arr);
2106 ad_tx_machine(port); 2104 ad_tx_machine(port);
2107 2105
2108 /* turn off the BEGIN bit, since we already handled it */ 2106 /* turn off the BEGIN bit, since we already handled it */
2109 if (port->sm_vars & AD_PORT_BEGIN) 2107 if (port->sm_vars & AD_PORT_BEGIN)
2110 port->sm_vars &= ~AD_PORT_BEGIN; 2108 port->sm_vars &= ~AD_PORT_BEGIN;
2111
2112 __release_state_machine_lock(port);
2113 } 2109 }
2114 2110
2115re_arm: 2111re_arm:
@@ -2120,7 +2116,10 @@ re_arm:
2120 } 2116 }
2121 } 2117 }
2122 rcu_read_unlock(); 2118 rcu_read_unlock();
2123 read_unlock(&bond->lock); 2119 spin_unlock_bh(&bond->mode_lock);
2120
2121 if (update_slave_arr)
2122 bond_slave_arr_work_rearm(bond, 0);
2124 2123
2125 if (should_notify_rtnl && rtnl_trylock()) { 2124 if (should_notify_rtnl && rtnl_trylock()) {
2126 bond_slave_state_notify(bond); 2125 bond_slave_state_notify(bond);
@@ -2161,9 +2160,9 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave,
2161 netdev_dbg(slave->bond->dev, "Received LACPDU on port %d\n", 2160 netdev_dbg(slave->bond->dev, "Received LACPDU on port %d\n",
2162 port->actor_port_number); 2161 port->actor_port_number);
2163 /* Protect against concurrent state machines */ 2162 /* Protect against concurrent state machines */
2164 __get_state_machine_lock(port); 2163 spin_lock(&slave->bond->mode_lock);
2165 ad_rx_machine(lacpdu, port); 2164 ad_rx_machine(lacpdu, port);
2166 __release_state_machine_lock(port); 2165 spin_unlock(&slave->bond->mode_lock);
2167 break; 2166 break;
2168 2167
2169 case AD_TYPE_MARKER: 2168 case AD_TYPE_MARKER:
@@ -2213,7 +2212,7 @@ void bond_3ad_adapter_speed_changed(struct slave *slave)
2213 return; 2212 return;
2214 } 2213 }
2215 2214
2216 __get_state_machine_lock(port); 2215 spin_lock_bh(&slave->bond->mode_lock);
2217 2216
2218 port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; 2217 port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS;
2219 port->actor_oper_port_key = port->actor_admin_port_key |= 2218 port->actor_oper_port_key = port->actor_admin_port_key |=
@@ -2224,7 +2223,7 @@ void bond_3ad_adapter_speed_changed(struct slave *slave)
2224 */ 2223 */
2225 port->sm_vars |= AD_PORT_BEGIN; 2224 port->sm_vars |= AD_PORT_BEGIN;
2226 2225
2227 __release_state_machine_lock(port); 2226 spin_unlock_bh(&slave->bond->mode_lock);
2228} 2227}
2229 2228
2230/** 2229/**
@@ -2246,7 +2245,7 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)
2246 return; 2245 return;
2247 } 2246 }
2248 2247
2249 __get_state_machine_lock(port); 2248 spin_lock_bh(&slave->bond->mode_lock);
2250 2249
2251 port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; 2250 port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS;
2252 port->actor_oper_port_key = port->actor_admin_port_key |= 2251 port->actor_oper_port_key = port->actor_admin_port_key |=
@@ -2257,7 +2256,7 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)
2257 */ 2256 */
2258 port->sm_vars |= AD_PORT_BEGIN; 2257 port->sm_vars |= AD_PORT_BEGIN;
2259 2258
2260 __release_state_machine_lock(port); 2259 spin_unlock_bh(&slave->bond->mode_lock);
2261} 2260}
2262 2261
2263/** 2262/**
@@ -2280,7 +2279,7 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
2280 return; 2279 return;
2281 } 2280 }
2282 2281
2283 __get_state_machine_lock(port); 2282 spin_lock_bh(&slave->bond->mode_lock);
2284 /* on link down we are zeroing duplex and speed since 2283 /* on link down we are zeroing duplex and speed since
2285 * some of the adaptors(ce1000.lan) report full duplex/speed 2284 * some of the adaptors(ce1000.lan) report full duplex/speed
2286 * instead of N/A(duplex) / 0(speed). 2285 * instead of N/A(duplex) / 0(speed).
@@ -2311,7 +2310,12 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
2311 */ 2310 */
2312 port->sm_vars |= AD_PORT_BEGIN; 2311 port->sm_vars |= AD_PORT_BEGIN;
2313 2312
2314 __release_state_machine_lock(port); 2313 spin_unlock_bh(&slave->bond->mode_lock);
2314
2315 /* RTNL is held and mode_lock is released so it's safe
2316 * to update slave_array here.
2317 */
2318 bond_update_slave_arr(slave->bond, NULL);
2315} 2319}
2316 2320
2317/** 2321/**
@@ -2395,7 +2399,6 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
2395 return 0; 2399 return 0;
2396} 2400}
2397 2401
2398/* Wrapper used to hold bond->lock so no slave manipulation can occur */
2399int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) 2402int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
2400{ 2403{
2401 int ret; 2404 int ret;
@@ -2407,90 +2410,19 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
2407 return ret; 2410 return ret;
2408} 2411}
2409 2412
2410int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
2411{
2412 struct bonding *bond = netdev_priv(dev);
2413 struct slave *slave, *first_ok_slave;
2414 struct aggregator *agg;
2415 struct ad_info ad_info;
2416 struct list_head *iter;
2417 int slaves_in_agg;
2418 int slave_agg_no;
2419 int agg_id;
2420
2421 if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
2422 netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
2423 goto err_free;
2424 }
2425
2426 slaves_in_agg = ad_info.ports;
2427 agg_id = ad_info.aggregator_id;
2428
2429 if (slaves_in_agg == 0) {
2430 netdev_dbg(dev, "active aggregator is empty\n");
2431 goto err_free;
2432 }
2433
2434 slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
2435 first_ok_slave = NULL;
2436
2437 bond_for_each_slave_rcu(bond, slave, iter) {
2438 agg = SLAVE_AD_INFO(slave)->port.aggregator;
2439 if (!agg || agg->aggregator_identifier != agg_id)
2440 continue;
2441
2442 if (slave_agg_no >= 0) {
2443 if (!first_ok_slave && bond_slave_can_tx(slave))
2444 first_ok_slave = slave;
2445 slave_agg_no--;
2446 continue;
2447 }
2448
2449 if (bond_slave_can_tx(slave)) {
2450 bond_dev_queue_xmit(bond, skb, slave->dev);
2451 goto out;
2452 }
2453 }
2454
2455 if (slave_agg_no >= 0) {
2456 netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
2457 agg_id);
2458 goto err_free;
2459 }
2460
2461 /* we couldn't find any suitable slave after the agg_no, so use the
2462 * first suitable found, if found.
2463 */
2464 if (first_ok_slave)
2465 bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
2466 else
2467 goto err_free;
2468
2469out:
2470 return NETDEV_TX_OK;
2471err_free:
2472 /* no suitable interface, frame not sent */
2473 dev_kfree_skb_any(skb);
2474 goto out;
2475}
2476
2477int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, 2413int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
2478 struct slave *slave) 2414 struct slave *slave)
2479{ 2415{
2480 int ret = RX_HANDLER_ANOTHER;
2481 struct lacpdu *lacpdu, _lacpdu; 2416 struct lacpdu *lacpdu, _lacpdu;
2482 2417
2483 if (skb->protocol != PKT_TYPE_LACPDU) 2418 if (skb->protocol != PKT_TYPE_LACPDU)
2484 return ret; 2419 return RX_HANDLER_ANOTHER;
2485 2420
2486 lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); 2421 lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu);
2487 if (!lacpdu) 2422 if (!lacpdu)
2488 return ret; 2423 return RX_HANDLER_ANOTHER;
2489 2424
2490 read_lock(&bond->lock); 2425 return bond_3ad_rx_indication(lacpdu, slave, skb->len);
2491 ret = bond_3ad_rx_indication(lacpdu, slave, skb->len);
2492 read_unlock(&bond->lock);
2493 return ret;
2494} 2426}
2495 2427
2496/** 2428/**
@@ -2500,7 +2432,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
2500 * When modify lacp_rate parameter via sysfs, 2432 * When modify lacp_rate parameter via sysfs,
2501 * update actor_oper_port_state of each port. 2433 * update actor_oper_port_state of each port.
2502 * 2434 *
2503 * Hold slave->state_machine_lock, 2435 * Hold bond->mode_lock,
2504 * so we can modify port->actor_oper_port_state, 2436 * so we can modify port->actor_oper_port_state,
2505 * no matter bond is up or down. 2437 * no matter bond is up or down.
2506 */ 2438 */
@@ -2512,13 +2444,13 @@ void bond_3ad_update_lacp_rate(struct bonding *bond)
2512 int lacp_fast; 2444 int lacp_fast;
2513 2445
2514 lacp_fast = bond->params.lacp_fast; 2446 lacp_fast = bond->params.lacp_fast;
2447 spin_lock_bh(&bond->mode_lock);
2515 bond_for_each_slave(bond, slave, iter) { 2448 bond_for_each_slave(bond, slave, iter) {
2516 port = &(SLAVE_AD_INFO(slave)->port); 2449 port = &(SLAVE_AD_INFO(slave)->port);
2517 __get_state_machine_lock(port);
2518 if (lacp_fast) 2450 if (lacp_fast)
2519 port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; 2451 port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT;
2520 else 2452 else
2521 port->actor_oper_port_state &= ~AD_STATE_LACP_TIMEOUT; 2453 port->actor_oper_port_state &= ~AD_STATE_LACP_TIMEOUT;
2522 __release_state_machine_lock(port);
2523 } 2454 }
2455 spin_unlock_bh(&bond->mode_lock);
2524} 2456}
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index bb03b1df2f3e..c5f14ac63f3e 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -259,7 +259,6 @@ struct ad_bond_info {
259struct ad_slave_info { 259struct ad_slave_info {
260 struct aggregator aggregator; /* 802.3ad aggregator structure */ 260 struct aggregator aggregator; /* 802.3ad aggregator structure */
261 struct port port; /* 802.3ad port structure */ 261 struct port port; /* 802.3ad port structure */
262 spinlock_t state_machine_lock; /* mutex state machines vs. incoming LACPDU */
263 u16 id; 262 u16 id;
264}; 263};
265 264
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 95dd1f58c260..d2eadab787c5 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -100,27 +100,6 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
100 100
101/*********************** tlb specific functions ***************************/ 101/*********************** tlb specific functions ***************************/
102 102
103static inline void _lock_tx_hashtbl_bh(struct bonding *bond)
104{
105 spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
106}
107
108static inline void _unlock_tx_hashtbl_bh(struct bonding *bond)
109{
110 spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
111}
112
113static inline void _lock_tx_hashtbl(struct bonding *bond)
114{
115 spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
116}
117
118static inline void _unlock_tx_hashtbl(struct bonding *bond)
119{
120 spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
121}
122
123/* Caller must hold tx_hashtbl lock */
124static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) 103static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
125{ 104{
126 if (save_load) { 105 if (save_load) {
@@ -140,7 +119,6 @@ static inline void tlb_init_slave(struct slave *slave)
140 SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; 119 SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
141} 120}
142 121
143/* Caller must hold bond lock for read, BH disabled */
144static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, 122static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
145 int save_load) 123 int save_load)
146{ 124{
@@ -163,13 +141,12 @@ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
163 tlb_init_slave(slave); 141 tlb_init_slave(slave);
164} 142}
165 143
166/* Caller must hold bond lock for read */
167static void tlb_clear_slave(struct bonding *bond, struct slave *slave, 144static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
168 int save_load) 145 int save_load)
169{ 146{
170 _lock_tx_hashtbl_bh(bond); 147 spin_lock_bh(&bond->mode_lock);
171 __tlb_clear_slave(bond, slave, save_load); 148 __tlb_clear_slave(bond, slave, save_load);
172 _unlock_tx_hashtbl_bh(bond); 149 spin_unlock_bh(&bond->mode_lock);
173} 150}
174 151
175/* Must be called before starting the monitor timer */ 152/* Must be called before starting the monitor timer */
@@ -184,14 +161,14 @@ static int tlb_initialize(struct bonding *bond)
184 if (!new_hashtbl) 161 if (!new_hashtbl)
185 return -1; 162 return -1;
186 163
187 _lock_tx_hashtbl_bh(bond); 164 spin_lock_bh(&bond->mode_lock);
188 165
189 bond_info->tx_hashtbl = new_hashtbl; 166 bond_info->tx_hashtbl = new_hashtbl;
190 167
191 for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) 168 for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)
192 tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); 169 tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
193 170
194 _unlock_tx_hashtbl_bh(bond); 171 spin_unlock_bh(&bond->mode_lock);
195 172
196 return 0; 173 return 0;
197} 174}
@@ -200,18 +177,13 @@ static int tlb_initialize(struct bonding *bond)
200static void tlb_deinitialize(struct bonding *bond) 177static void tlb_deinitialize(struct bonding *bond)
201{ 178{
202 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 179 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
203 struct tlb_up_slave *arr;
204 180
205 _lock_tx_hashtbl_bh(bond); 181 spin_lock_bh(&bond->mode_lock);
206 182
207 kfree(bond_info->tx_hashtbl); 183 kfree(bond_info->tx_hashtbl);
208 bond_info->tx_hashtbl = NULL; 184 bond_info->tx_hashtbl = NULL;
209 185
210 _unlock_tx_hashtbl_bh(bond); 186 spin_unlock_bh(&bond->mode_lock);
211
212 arr = rtnl_dereference(bond_info->slave_arr);
213 if (arr)
214 kfree_rcu(arr, rcu);
215} 187}
216 188
217static long long compute_gap(struct slave *slave) 189static long long compute_gap(struct slave *slave)
@@ -220,7 +192,6 @@ static long long compute_gap(struct slave *slave)
220 (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ 192 (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
221} 193}
222 194
223/* Caller must hold bond lock for read */
224static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) 195static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
225{ 196{
226 struct slave *slave, *least_loaded; 197 struct slave *slave, *least_loaded;
@@ -281,42 +252,23 @@ static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
281 return assigned_slave; 252 return assigned_slave;
282} 253}
283 254
284/* Caller must hold bond lock for read */
285static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, 255static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
286 u32 skb_len) 256 u32 skb_len)
287{ 257{
288 struct slave *tx_slave; 258 struct slave *tx_slave;
289 /* 259
290 * We don't need to disable softirq here, becase 260 /* We don't need to disable softirq here, becase
291 * tlb_choose_channel() is only called by bond_alb_xmit() 261 * tlb_choose_channel() is only called by bond_alb_xmit()
292 * which already has softirq disabled. 262 * which already has softirq disabled.
293 */ 263 */
294 _lock_tx_hashtbl(bond); 264 spin_lock(&bond->mode_lock);
295 tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); 265 tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
296 _unlock_tx_hashtbl(bond); 266 spin_unlock(&bond->mode_lock);
267
297 return tx_slave; 268 return tx_slave;
298} 269}
299 270
300/*********************** rlb specific functions ***************************/ 271/*********************** rlb specific functions ***************************/
301static inline void _lock_rx_hashtbl_bh(struct bonding *bond)
302{
303 spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
304}
305
306static inline void _unlock_rx_hashtbl_bh(struct bonding *bond)
307{
308 spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
309}
310
311static inline void _lock_rx_hashtbl(struct bonding *bond)
312{
313 spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
314}
315
316static inline void _unlock_rx_hashtbl(struct bonding *bond)
317{
318 spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
319}
320 272
321/* when an ARP REPLY is received from a client update its info 273/* when an ARP REPLY is received from a client update its info
322 * in the rx_hashtbl 274 * in the rx_hashtbl
@@ -327,7 +279,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
327 struct rlb_client_info *client_info; 279 struct rlb_client_info *client_info;
328 u32 hash_index; 280 u32 hash_index;
329 281
330 _lock_rx_hashtbl_bh(bond); 282 spin_lock_bh(&bond->mode_lock);
331 283
332 hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 284 hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
333 client_info = &(bond_info->rx_hashtbl[hash_index]); 285 client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -342,7 +294,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
342 bond_info->rx_ntt = 1; 294 bond_info->rx_ntt = 1;
343 } 295 }
344 296
345 _unlock_rx_hashtbl_bh(bond); 297 spin_unlock_bh(&bond->mode_lock);
346} 298}
347 299
348static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, 300static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
@@ -378,15 +330,15 @@ out:
378 return RX_HANDLER_ANOTHER; 330 return RX_HANDLER_ANOTHER;
379} 331}
380 332
381/* Caller must hold bond lock for read */ 333/* Caller must hold rcu_read_lock() */
382static struct slave *rlb_next_rx_slave(struct bonding *bond) 334static struct slave *__rlb_next_rx_slave(struct bonding *bond)
383{ 335{
384 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 336 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
385 struct slave *before = NULL, *rx_slave = NULL, *slave; 337 struct slave *before = NULL, *rx_slave = NULL, *slave;
386 struct list_head *iter; 338 struct list_head *iter;
387 bool found = false; 339 bool found = false;
388 340
389 bond_for_each_slave(bond, slave, iter) { 341 bond_for_each_slave_rcu(bond, slave, iter) {
390 if (!bond_slave_can_tx(slave)) 342 if (!bond_slave_can_tx(slave))
391 continue; 343 continue;
392 if (!found) { 344 if (!found) {
@@ -411,35 +363,16 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)
411 return rx_slave; 363 return rx_slave;
412} 364}
413 365
414/* Caller must hold rcu_read_lock() for read */ 366/* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */
415static struct slave *__rlb_next_rx_slave(struct bonding *bond) 367static struct slave *rlb_next_rx_slave(struct bonding *bond)
416{ 368{
417 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 369 struct slave *rx_slave;
418 struct slave *before = NULL, *rx_slave = NULL, *slave;
419 struct list_head *iter;
420 bool found = false;
421 370
422 bond_for_each_slave_rcu(bond, slave, iter) { 371 ASSERT_RTNL();
423 if (!bond_slave_can_tx(slave))
424 continue;
425 if (!found) {
426 if (!before || before->speed < slave->speed)
427 before = slave;
428 } else {
429 if (!rx_slave || rx_slave->speed < slave->speed)
430 rx_slave = slave;
431 }
432 if (slave == bond_info->rx_slave)
433 found = true;
434 }
435 /* we didn't find anything after the current or we have something
436 * better before and up to the current slave
437 */
438 if (!rx_slave || (before && rx_slave->speed < before->speed))
439 rx_slave = before;
440 372
441 if (rx_slave) 373 rcu_read_lock();
442 bond_info->rx_slave = rx_slave; 374 rx_slave = __rlb_next_rx_slave(bond);
375 rcu_read_unlock();
443 376
444 return rx_slave; 377 return rx_slave;
445} 378}
@@ -447,11 +380,11 @@ static struct slave *__rlb_next_rx_slave(struct bonding *bond)
447/* teach the switch the mac of a disabled slave 380/* teach the switch the mac of a disabled slave
448 * on the primary for fault tolerance 381 * on the primary for fault tolerance
449 * 382 *
450 * Caller must hold bond->curr_slave_lock for write or bond lock for write 383 * Caller must hold RTNL
451 */ 384 */
452static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) 385static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
453{ 386{
454 struct slave *curr_active = bond_deref_active_protected(bond); 387 struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
455 388
456 if (!curr_active) 389 if (!curr_active)
457 return; 390 return;
@@ -479,7 +412,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
479 u32 index, next_index; 412 u32 index, next_index;
480 413
481 /* clear slave from rx_hashtbl */ 414 /* clear slave from rx_hashtbl */
482 _lock_rx_hashtbl_bh(bond); 415 spin_lock_bh(&bond->mode_lock);
483 416
484 rx_hash_table = bond_info->rx_hashtbl; 417 rx_hash_table = bond_info->rx_hashtbl;
485 index = bond_info->rx_hashtbl_used_head; 418 index = bond_info->rx_hashtbl_used_head;
@@ -510,14 +443,10 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
510 } 443 }
511 } 444 }
512 445
513 _unlock_rx_hashtbl_bh(bond); 446 spin_unlock_bh(&bond->mode_lock);
514
515 write_lock_bh(&bond->curr_slave_lock);
516 447
517 if (slave != bond_deref_active_protected(bond)) 448 if (slave != rtnl_dereference(bond->curr_active_slave))
518 rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); 449 rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
519
520 write_unlock_bh(&bond->curr_slave_lock);
521} 450}
522 451
523static void rlb_update_client(struct rlb_client_info *client_info) 452static void rlb_update_client(struct rlb_client_info *client_info)
@@ -565,7 +494,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
565 struct rlb_client_info *client_info; 494 struct rlb_client_info *client_info;
566 u32 hash_index; 495 u32 hash_index;
567 496
568 _lock_rx_hashtbl_bh(bond); 497 spin_lock_bh(&bond->mode_lock);
569 498
570 hash_index = bond_info->rx_hashtbl_used_head; 499 hash_index = bond_info->rx_hashtbl_used_head;
571 for (; hash_index != RLB_NULL_INDEX; 500 for (; hash_index != RLB_NULL_INDEX;
@@ -583,7 +512,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
583 */ 512 */
584 bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; 513 bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
585 514
586 _unlock_rx_hashtbl_bh(bond); 515 spin_unlock_bh(&bond->mode_lock);
587} 516}
588 517
589/* The slave was assigned a new mac address - update the clients */ 518/* The slave was assigned a new mac address - update the clients */
@@ -594,7 +523,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
594 int ntt = 0; 523 int ntt = 0;
595 u32 hash_index; 524 u32 hash_index;
596 525
597 _lock_rx_hashtbl_bh(bond); 526 spin_lock_bh(&bond->mode_lock);
598 527
599 hash_index = bond_info->rx_hashtbl_used_head; 528 hash_index = bond_info->rx_hashtbl_used_head;
600 for (; hash_index != RLB_NULL_INDEX; 529 for (; hash_index != RLB_NULL_INDEX;
@@ -615,7 +544,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
615 bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; 544 bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
616 } 545 }
617 546
618 _unlock_rx_hashtbl_bh(bond); 547 spin_unlock_bh(&bond->mode_lock);
619} 548}
620 549
621/* mark all clients using src_ip to be updated */ 550/* mark all clients using src_ip to be updated */
@@ -625,7 +554,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
625 struct rlb_client_info *client_info; 554 struct rlb_client_info *client_info;
626 u32 hash_index; 555 u32 hash_index;
627 556
628 _lock_rx_hashtbl(bond); 557 spin_lock(&bond->mode_lock);
629 558
630 hash_index = bond_info->rx_hashtbl_used_head; 559 hash_index = bond_info->rx_hashtbl_used_head;
631 for (; hash_index != RLB_NULL_INDEX; 560 for (; hash_index != RLB_NULL_INDEX;
@@ -636,7 +565,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
636 netdev_err(bond->dev, "found a client with no channel in the client's hash table\n"); 565 netdev_err(bond->dev, "found a client with no channel in the client's hash table\n");
637 continue; 566 continue;
638 } 567 }
639 /*update all clients using this src_ip, that are not assigned 568 /* update all clients using this src_ip, that are not assigned
640 * to the team's address (curr_active_slave) and have a known 569 * to the team's address (curr_active_slave) and have a known
641 * unicast mac address. 570 * unicast mac address.
642 */ 571 */
@@ -649,10 +578,9 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
649 } 578 }
650 } 579 }
651 580
652 _unlock_rx_hashtbl(bond); 581 spin_unlock(&bond->mode_lock);
653} 582}
654 583
655/* Caller must hold both bond and ptr locks for read */
656static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) 584static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
657{ 585{
658 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 586 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -661,7 +589,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
661 struct rlb_client_info *client_info; 589 struct rlb_client_info *client_info;
662 u32 hash_index = 0; 590 u32 hash_index = 0;
663 591
664 _lock_rx_hashtbl(bond); 592 spin_lock(&bond->mode_lock);
665 593
666 curr_active_slave = rcu_dereference(bond->curr_active_slave); 594 curr_active_slave = rcu_dereference(bond->curr_active_slave);
667 595
@@ -680,7 +608,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
680 608
681 assigned_slave = client_info->slave; 609 assigned_slave = client_info->slave;
682 if (assigned_slave) { 610 if (assigned_slave) {
683 _unlock_rx_hashtbl(bond); 611 spin_unlock(&bond->mode_lock);
684 return assigned_slave; 612 return assigned_slave;
685 } 613 }
686 } else { 614 } else {
@@ -742,7 +670,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
742 } 670 }
743 } 671 }
744 672
745 _unlock_rx_hashtbl(bond); 673 spin_unlock(&bond->mode_lock);
746 674
747 return assigned_slave; 675 return assigned_slave;
748} 676}
@@ -763,9 +691,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
763 return NULL; 691 return NULL;
764 692
765 if (arp->op_code == htons(ARPOP_REPLY)) { 693 if (arp->op_code == htons(ARPOP_REPLY)) {
766 /* the arp must be sent on the selected 694 /* the arp must be sent on the selected rx channel */
767 * rx channel
768 */
769 tx_slave = rlb_choose_channel(skb, bond); 695 tx_slave = rlb_choose_channel(skb, bond);
770 if (tx_slave) 696 if (tx_slave)
771 ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr); 697 ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr);
@@ -795,7 +721,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
795 return tx_slave; 721 return tx_slave;
796} 722}
797 723
798/* Caller must hold bond lock for read */
799static void rlb_rebalance(struct bonding *bond) 724static void rlb_rebalance(struct bonding *bond)
800{ 725{
801 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 726 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -804,7 +729,7 @@ static void rlb_rebalance(struct bonding *bond)
804 int ntt; 729 int ntt;
805 u32 hash_index; 730 u32 hash_index;
806 731
807 _lock_rx_hashtbl_bh(bond); 732 spin_lock_bh(&bond->mode_lock);
808 733
809 ntt = 0; 734 ntt = 0;
810 hash_index = bond_info->rx_hashtbl_used_head; 735 hash_index = bond_info->rx_hashtbl_used_head;
@@ -822,10 +747,10 @@ static void rlb_rebalance(struct bonding *bond)
822 /* update the team's flag only after the whole iteration */ 747 /* update the team's flag only after the whole iteration */
823 if (ntt) 748 if (ntt)
824 bond_info->rx_ntt = 1; 749 bond_info->rx_ntt = 1;
825 _unlock_rx_hashtbl_bh(bond); 750 spin_unlock_bh(&bond->mode_lock);
826} 751}
827 752
828/* Caller must hold rx_hashtbl lock */ 753/* Caller must hold mode_lock */
829static void rlb_init_table_entry_dst(struct rlb_client_info *entry) 754static void rlb_init_table_entry_dst(struct rlb_client_info *entry)
830{ 755{
831 entry->used_next = RLB_NULL_INDEX; 756 entry->used_next = RLB_NULL_INDEX;
@@ -913,15 +838,16 @@ static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
913 bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; 838 bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash;
914} 839}
915 840
916/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does 841/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
917 * not match arp->mac_src */ 842 * not match arp->mac_src
843 */
918static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) 844static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
919{ 845{
920 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 846 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
921 u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 847 u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
922 u32 index; 848 u32 index;
923 849
924 _lock_rx_hashtbl_bh(bond); 850 spin_lock_bh(&bond->mode_lock);
925 851
926 index = bond_info->rx_hashtbl[ip_src_hash].src_first; 852 index = bond_info->rx_hashtbl[ip_src_hash].src_first;
927 while (index != RLB_NULL_INDEX) { 853 while (index != RLB_NULL_INDEX) {
@@ -932,7 +858,7 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
932 rlb_delete_table_entry(bond, index); 858 rlb_delete_table_entry(bond, index);
933 index = next_index; 859 index = next_index;
934 } 860 }
935 _unlock_rx_hashtbl_bh(bond); 861 spin_unlock_bh(&bond->mode_lock);
936} 862}
937 863
938static int rlb_initialize(struct bonding *bond) 864static int rlb_initialize(struct bonding *bond)
@@ -946,7 +872,7 @@ static int rlb_initialize(struct bonding *bond)
946 if (!new_hashtbl) 872 if (!new_hashtbl)
947 return -1; 873 return -1;
948 874
949 _lock_rx_hashtbl_bh(bond); 875 spin_lock_bh(&bond->mode_lock);
950 876
951 bond_info->rx_hashtbl = new_hashtbl; 877 bond_info->rx_hashtbl = new_hashtbl;
952 878
@@ -955,7 +881,7 @@ static int rlb_initialize(struct bonding *bond)
955 for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) 881 for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
956 rlb_init_table_entry(bond_info->rx_hashtbl + i); 882 rlb_init_table_entry(bond_info->rx_hashtbl + i);
957 883
958 _unlock_rx_hashtbl_bh(bond); 884 spin_unlock_bh(&bond->mode_lock);
959 885
960 /* register to receive ARPs */ 886 /* register to receive ARPs */
961 bond->recv_probe = rlb_arp_recv; 887 bond->recv_probe = rlb_arp_recv;
@@ -967,13 +893,13 @@ static void rlb_deinitialize(struct bonding *bond)
967{ 893{
968 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 894 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
969 895
970 _lock_rx_hashtbl_bh(bond); 896 spin_lock_bh(&bond->mode_lock);
971 897
972 kfree(bond_info->rx_hashtbl); 898 kfree(bond_info->rx_hashtbl);
973 bond_info->rx_hashtbl = NULL; 899 bond_info->rx_hashtbl = NULL;
974 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; 900 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
975 901
976 _unlock_rx_hashtbl_bh(bond); 902 spin_unlock_bh(&bond->mode_lock);
977} 903}
978 904
979static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 905static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
@@ -981,7 +907,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
981 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 907 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
982 u32 curr_index; 908 u32 curr_index;
983 909
984 _lock_rx_hashtbl_bh(bond); 910 spin_lock_bh(&bond->mode_lock);
985 911
986 curr_index = bond_info->rx_hashtbl_used_head; 912 curr_index = bond_info->rx_hashtbl_used_head;
987 while (curr_index != RLB_NULL_INDEX) { 913 while (curr_index != RLB_NULL_INDEX) {
@@ -994,7 +920,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
994 curr_index = next_index; 920 curr_index = next_index;
995 } 921 }
996 922
997 _unlock_rx_hashtbl_bh(bond); 923 spin_unlock_bh(&bond->mode_lock);
998} 924}
999 925
1000/*********************** tlb/rlb shared functions *********************/ 926/*********************** tlb/rlb shared functions *********************/
@@ -1091,8 +1017,9 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
1091 return 0; 1017 return 0;
1092 } 1018 }
1093 1019
1094 /* for rlb each slave must have a unique hw mac addresses so that */ 1020 /* for rlb each slave must have a unique hw mac addresses so that
1095 /* each slave will receive packets destined to a different mac */ 1021 * each slave will receive packets destined to a different mac
1022 */
1096 memcpy(s_addr.sa_data, addr, dev->addr_len); 1023 memcpy(s_addr.sa_data, addr, dev->addr_len);
1097 s_addr.sa_family = dev->type; 1024 s_addr.sa_family = dev->type;
1098 if (dev_set_mac_address(dev, &s_addr)) { 1025 if (dev_set_mac_address(dev, &s_addr)) {
@@ -1103,13 +1030,10 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
1103 return 0; 1030 return 0;
1104} 1031}
1105 1032
1106/* 1033/* Swap MAC addresses between two slaves.
1107 * Swap MAC addresses between two slaves.
1108 * 1034 *
1109 * Called with RTNL held, and no other locks. 1035 * Called with RTNL held, and no other locks.
1110 *
1111 */ 1036 */
1112
1113static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) 1037static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)
1114{ 1038{
1115 u8 tmp_mac_addr[ETH_ALEN]; 1039 u8 tmp_mac_addr[ETH_ALEN];
@@ -1120,8 +1044,7 @@ static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)
1120 1044
1121} 1045}
1122 1046
1123/* 1047/* Send learning packets after MAC address swap.
1124 * Send learning packets after MAC address swap.
1125 * 1048 *
1126 * Called with RTNL and no other locks 1049 * Called with RTNL and no other locks
1127 */ 1050 */
@@ -1194,7 +1117,6 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla
1194 found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); 1117 found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr);
1195 1118
1196 if (found_slave) { 1119 if (found_slave) {
1197 /* locking: needs RTNL and nothing else */
1198 alb_swap_mac_addr(slave, found_slave); 1120 alb_swap_mac_addr(slave, found_slave);
1199 alb_fasten_mac_swap(bond, slave, found_slave); 1121 alb_fasten_mac_swap(bond, slave, found_slave);
1200 } 1122 }
@@ -1243,7 +1165,8 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav
1243 return 0; 1165 return 0;
1244 1166
1245 /* Try setting slave mac to bond address and fall-through 1167 /* Try setting slave mac to bond address and fall-through
1246 to code handling that situation below... */ 1168 * to code handling that situation below...
1169 */
1247 alb_set_slave_mac_addr(slave, bond->dev->dev_addr); 1170 alb_set_slave_mac_addr(slave, bond->dev->dev_addr);
1248 } 1171 }
1249 1172
@@ -1351,7 +1274,6 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
1351 1274
1352 if (rlb_enabled) { 1275 if (rlb_enabled) {
1353 bond->alb_info.rlb_enabled = 1; 1276 bond->alb_info.rlb_enabled = 1;
1354 /* initialize rlb */
1355 res = rlb_initialize(bond); 1277 res = rlb_initialize(bond);
1356 if (res) { 1278 if (res) {
1357 tlb_deinitialize(bond); 1279 tlb_deinitialize(bond);
@@ -1375,7 +1297,7 @@ void bond_alb_deinitialize(struct bonding *bond)
1375} 1297}
1376 1298
1377static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, 1299static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
1378 struct slave *tx_slave) 1300 struct slave *tx_slave)
1379{ 1301{
1380 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1302 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1381 struct ethhdr *eth_data = eth_hdr(skb); 1303 struct ethhdr *eth_data = eth_hdr(skb);
@@ -1388,7 +1310,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
1388 } 1310 }
1389 1311
1390 if (tx_slave && bond_slave_can_tx(tx_slave)) { 1312 if (tx_slave && bond_slave_can_tx(tx_slave)) {
1391 if (tx_slave != rcu_dereference(bond->curr_active_slave)) { 1313 if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
1392 ether_addr_copy(eth_data->h_source, 1314 ether_addr_copy(eth_data->h_source,
1393 tx_slave->dev->dev_addr); 1315 tx_slave->dev->dev_addr);
1394 } 1316 }
@@ -1398,9 +1320,9 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
1398 } 1320 }
1399 1321
1400 if (tx_slave && bond->params.tlb_dynamic_lb) { 1322 if (tx_slave && bond->params.tlb_dynamic_lb) {
1401 _lock_tx_hashtbl(bond); 1323 spin_lock(&bond->mode_lock);
1402 __tlb_clear_slave(bond, tx_slave, 0); 1324 __tlb_clear_slave(bond, tx_slave, 0);
1403 _unlock_tx_hashtbl(bond); 1325 spin_unlock(&bond->mode_lock);
1404 } 1326 }
1405 1327
1406 /* no suitable interface, frame not sent */ 1328 /* no suitable interface, frame not sent */
@@ -1409,39 +1331,9 @@ out:
1409 return NETDEV_TX_OK; 1331 return NETDEV_TX_OK;
1410} 1332}
1411 1333
1412static int bond_tlb_update_slave_arr(struct bonding *bond,
1413 struct slave *skipslave)
1414{
1415 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1416 struct slave *tx_slave;
1417 struct list_head *iter;
1418 struct tlb_up_slave *new_arr, *old_arr;
1419
1420 new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
1421 GFP_ATOMIC);
1422 if (!new_arr)
1423 return -ENOMEM;
1424
1425 bond_for_each_slave(bond, tx_slave, iter) {
1426 if (!bond_slave_can_tx(tx_slave))
1427 continue;
1428 if (skipslave == tx_slave)
1429 continue;
1430 new_arr->arr[new_arr->count++] = tx_slave;
1431 }
1432
1433 old_arr = rtnl_dereference(bond_info->slave_arr);
1434 rcu_assign_pointer(bond_info->slave_arr, new_arr);
1435 if (old_arr)
1436 kfree_rcu(old_arr, rcu);
1437
1438 return 0;
1439}
1440
1441int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1334int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1442{ 1335{
1443 struct bonding *bond = netdev_priv(bond_dev); 1336 struct bonding *bond = netdev_priv(bond_dev);
1444 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1445 struct ethhdr *eth_data; 1337 struct ethhdr *eth_data;
1446 struct slave *tx_slave = NULL; 1338 struct slave *tx_slave = NULL;
1447 u32 hash_index; 1339 u32 hash_index;
@@ -1462,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1462 hash_index & 0xFF, 1354 hash_index & 0xFF,
1463 skb->len); 1355 skb->len);
1464 } else { 1356 } else {
1465 struct tlb_up_slave *slaves; 1357 struct bond_up_slave *slaves;
1358 unsigned int count;
1466 1359
1467 slaves = rcu_dereference(bond_info->slave_arr); 1360 slaves = rcu_dereference(bond->slave_arr);
1468 if (slaves && slaves->count) 1361 count = slaves ? ACCESS_ONCE(slaves->count) : 0;
1362 if (likely(count))
1469 tx_slave = slaves->arr[hash_index % 1363 tx_slave = slaves->arr[hash_index %
1470 slaves->count]; 1364 count];
1471 } 1365 }
1472 break; 1366 break;
1473 } 1367 }
@@ -1595,13 +1489,6 @@ void bond_alb_monitor(struct work_struct *work)
1595 if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { 1489 if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) {
1596 bool strict_match; 1490 bool strict_match;
1597 1491
1598 /* change of curr_active_slave involves swapping of mac addresses.
1599 * in order to avoid this swapping from happening while
1600 * sending the learning packets, the curr_slave_lock must be held for
1601 * read.
1602 */
1603 read_lock(&bond->curr_slave_lock);
1604
1605 bond_for_each_slave_rcu(bond, slave, iter) { 1492 bond_for_each_slave_rcu(bond, slave, iter) {
1606 /* If updating current_active, use all currently 1493 /* If updating current_active, use all currently
1607 * user mac addreses (!strict_match). Otherwise, only 1494 * user mac addreses (!strict_match). Otherwise, only
@@ -1613,17 +1500,11 @@ void bond_alb_monitor(struct work_struct *work)
1613 alb_send_learning_packets(slave, slave->dev->dev_addr, 1500 alb_send_learning_packets(slave, slave->dev->dev_addr,
1614 strict_match); 1501 strict_match);
1615 } 1502 }
1616
1617 read_unlock(&bond->curr_slave_lock);
1618
1619 bond_info->lp_counter = 0; 1503 bond_info->lp_counter = 0;
1620 } 1504 }
1621 1505
1622 /* rebalance tx traffic */ 1506 /* rebalance tx traffic */
1623 if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { 1507 if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
1624
1625 read_lock(&bond->curr_slave_lock);
1626
1627 bond_for_each_slave_rcu(bond, slave, iter) { 1508 bond_for_each_slave_rcu(bond, slave, iter) {
1628 tlb_clear_slave(bond, slave, 1); 1509 tlb_clear_slave(bond, slave, 1);
1629 if (slave == rcu_access_pointer(bond->curr_active_slave)) { 1510 if (slave == rcu_access_pointer(bond->curr_active_slave)) {
@@ -1633,19 +1514,14 @@ void bond_alb_monitor(struct work_struct *work)
1633 bond_info->unbalanced_load = 0; 1514 bond_info->unbalanced_load = 0;
1634 } 1515 }
1635 } 1516 }
1636
1637 read_unlock(&bond->curr_slave_lock);
1638
1639 bond_info->tx_rebalance_counter = 0; 1517 bond_info->tx_rebalance_counter = 0;
1640 } 1518 }
1641 1519
1642 /* handle rlb stuff */
1643 if (bond_info->rlb_enabled) { 1520 if (bond_info->rlb_enabled) {
1644 if (bond_info->primary_is_promisc && 1521 if (bond_info->primary_is_promisc &&
1645 (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { 1522 (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
1646 1523
1647 /* 1524 /* dev_set_promiscuity requires rtnl and
1648 * dev_set_promiscuity requires rtnl and
1649 * nothing else. Avoid race with bond_close. 1525 * nothing else. Avoid race with bond_close.
1650 */ 1526 */
1651 rcu_read_unlock(); 1527 rcu_read_unlock();
@@ -1715,8 +1591,7 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
1715 return 0; 1591 return 0;
1716} 1592}
1717 1593
1718/* 1594/* Remove slave from tlb and rlb hash tables, and fix up MAC addresses
1719 * Remove slave from tlb and rlb hash tables, and fix up MAC addresses
1720 * if necessary. 1595 * if necessary.
1721 * 1596 *
1722 * Caller must hold RTNL and no other locks 1597 * Caller must hold RTNL and no other locks
@@ -1733,13 +1608,8 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
1733 rlb_clear_slave(bond, slave); 1608 rlb_clear_slave(bond, slave);
1734 } 1609 }
1735 1610
1736 if (bond_is_nondyn_tlb(bond))
1737 if (bond_tlb_update_slave_arr(bond, slave))
1738 pr_err("Failed to build slave-array for TLB mode.\n");
1739
1740} 1611}
1741 1612
1742/* Caller must hold bond lock for read */
1743void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) 1613void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
1744{ 1614{
1745 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1615 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -1762,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
1762 } 1632 }
1763 1633
1764 if (bond_is_nondyn_tlb(bond)) { 1634 if (bond_is_nondyn_tlb(bond)) {
1765 if (bond_tlb_update_slave_arr(bond, NULL)) 1635 if (bond_update_slave_arr(bond, NULL))
1766 pr_err("Failed to build slave-array for TLB mode.\n"); 1636 pr_err("Failed to build slave-array for TLB mode.\n");
1767 } 1637 }
1768} 1638}
@@ -1775,22 +1645,14 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
1775 * Set the bond->curr_active_slave to @new_slave and handle 1645 * Set the bond->curr_active_slave to @new_slave and handle
1776 * mac address swapping and promiscuity changes as needed. 1646 * mac address swapping and promiscuity changes as needed.
1777 * 1647 *
1778 * If new_slave is NULL, caller must hold curr_slave_lock or 1648 * Caller must hold RTNL
1779 * bond->lock for write.
1780 *
1781 * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock
1782 * for write. Processing here may sleep, so no other locks may be held.
1783 */ 1649 */
1784void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) 1650void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
1785 __releases(&bond->curr_slave_lock)
1786 __acquires(&bond->curr_slave_lock)
1787{ 1651{
1788 struct slave *swap_slave; 1652 struct slave *swap_slave;
1789 struct slave *curr_active; 1653 struct slave *curr_active;
1790 1654
1791 curr_active = rcu_dereference_protected(bond->curr_active_slave, 1655 curr_active = rtnl_dereference(bond->curr_active_slave);
1792 !new_slave ||
1793 lockdep_is_held(&bond->curr_slave_lock));
1794 if (curr_active == new_slave) 1656 if (curr_active == new_slave)
1795 return; 1657 return;
1796 1658
@@ -1812,8 +1674,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
1812 if (!swap_slave) 1674 if (!swap_slave)
1813 swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); 1675 swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr);
1814 1676
1815 /* 1677 /* Arrange for swap_slave and new_slave to temporarily be
1816 * Arrange for swap_slave and new_slave to temporarily be
1817 * ignored so we can mess with their MAC addresses without 1678 * ignored so we can mess with their MAC addresses without
1818 * fear of interference from transmit activity. 1679 * fear of interference from transmit activity.
1819 */ 1680 */
@@ -1821,10 +1682,6 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
1821 tlb_clear_slave(bond, swap_slave, 1); 1682 tlb_clear_slave(bond, swap_slave, 1);
1822 tlb_clear_slave(bond, new_slave, 1); 1683 tlb_clear_slave(bond, new_slave, 1);
1823 1684
1824 write_unlock_bh(&bond->curr_slave_lock);
1825
1826 ASSERT_RTNL();
1827
1828 /* in TLB mode, the slave might flip down/up with the old dev_addr, 1685 /* in TLB mode, the slave might flip down/up with the old dev_addr,
1829 * and thus filter bond->dev_addr's packets, so force bond's mac 1686 * and thus filter bond->dev_addr's packets, so force bond's mac
1830 */ 1687 */
@@ -1853,16 +1710,10 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
1853 alb_send_learning_packets(new_slave, bond->dev->dev_addr, 1710 alb_send_learning_packets(new_slave, bond->dev->dev_addr,
1854 false); 1711 false);
1855 } 1712 }
1856
1857 write_lock_bh(&bond->curr_slave_lock);
1858} 1713}
1859 1714
1860/* 1715/* Called with RTNL */
1861 * Called with RTNL
1862 */
1863int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) 1716int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
1864 __acquires(&bond->lock)
1865 __releases(&bond->lock)
1866{ 1717{
1867 struct bonding *bond = netdev_priv(bond_dev); 1718 struct bonding *bond = netdev_priv(bond_dev);
1868 struct sockaddr *sa = addr; 1719 struct sockaddr *sa = addr;
@@ -1895,14 +1746,12 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
1895 } else { 1746 } else {
1896 alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr); 1747 alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr);
1897 1748
1898 read_lock(&bond->lock);
1899 alb_send_learning_packets(curr_active, 1749 alb_send_learning_packets(curr_active,
1900 bond_dev->dev_addr, false); 1750 bond_dev->dev_addr, false);
1901 if (bond->alb_info.rlb_enabled) { 1751 if (bond->alb_info.rlb_enabled) {
1902 /* inform clients mac address has changed */ 1752 /* inform clients mac address has changed */
1903 rlb_req_update_slave_clients(bond, curr_active); 1753 rlb_req_update_slave_clients(bond, curr_active);
1904 } 1754 }
1905 read_unlock(&bond->lock);
1906 } 1755 }
1907 1756
1908 return 0; 1757 return 0;
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index aaeac61d03cf..1ad473b4ade5 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -139,24 +139,14 @@ struct tlb_slave_info {
139 */ 139 */
140}; 140};
141 141
142struct tlb_up_slave {
143 unsigned int count;
144 struct rcu_head rcu;
145 struct slave *arr[0];
146};
147
148struct alb_bond_info { 142struct alb_bond_info {
149 struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ 143 struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
150 spinlock_t tx_hashtbl_lock;
151 u32 unbalanced_load; 144 u32 unbalanced_load;
152 int tx_rebalance_counter; 145 int tx_rebalance_counter;
153 int lp_counter; 146 int lp_counter;
154 /* -------- non-dynamic tlb mode only ---------*/
155 struct tlb_up_slave __rcu *slave_arr; /* Up slaves */
156 /* -------- rlb parameters -------- */ 147 /* -------- rlb parameters -------- */
157 int rlb_enabled; 148 int rlb_enabled;
158 struct rlb_client_info *rx_hashtbl; /* Receive hash table */ 149 struct rlb_client_info *rx_hashtbl; /* Receive hash table */
159 spinlock_t rx_hashtbl_lock;
160 u32 rx_hashtbl_used_head; 150 u32 rx_hashtbl_used_head;
161 u8 rx_ntt; /* flag - need to transmit 151 u8 rx_ntt; /* flag - need to transmit
162 * to all rx clients 152 * to all rx clients
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 280971b227ea..8f99082f90eb 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -13,9 +13,7 @@
13 13
14static struct dentry *bonding_debug_root; 14static struct dentry *bonding_debug_root;
15 15
16/* 16/* Show RLB hash table */
17 * Show RLB hash table
18 */
19static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) 17static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
20{ 18{
21 struct bonding *bond = m->private; 19 struct bonding *bond = m->private;
@@ -29,7 +27,7 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
29 seq_printf(m, "SourceIP DestinationIP " 27 seq_printf(m, "SourceIP DestinationIP "
30 "Destination MAC DEV\n"); 28 "Destination MAC DEV\n");
31 29
32 spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 30 spin_lock_bh(&bond->mode_lock);
33 31
34 hash_index = bond_info->rx_hashtbl_used_head; 32 hash_index = bond_info->rx_hashtbl_used_head;
35 for (; hash_index != RLB_NULL_INDEX; 33 for (; hash_index != RLB_NULL_INDEX;
@@ -42,7 +40,7 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
42 client_info->slave->dev->name); 40 client_info->slave->dev->name);
43 } 41 }
44 42
45 spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 43 spin_unlock_bh(&bond->mode_lock);
46 44
47 return 0; 45 return 0;
48} 46}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 798ae69fb63c..c9ac06cfe6b7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -208,6 +208,9 @@ static int lacp_fast;
208 208
209static int bond_init(struct net_device *bond_dev); 209static int bond_init(struct net_device *bond_dev);
210static void bond_uninit(struct net_device *bond_dev); 210static void bond_uninit(struct net_device *bond_dev);
211static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
212 struct rtnl_link_stats64 *stats);
213static void bond_slave_arr_handler(struct work_struct *work);
211 214
212/*---------------------------- General routines -----------------------------*/ 215/*---------------------------- General routines -----------------------------*/
213 216
@@ -253,8 +256,7 @@ void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
253 dev_queue_xmit(skb); 256 dev_queue_xmit(skb);
254} 257}
255 258
256/* 259/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
257 * In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
258 * We don't protect the slave list iteration with a lock because: 260 * We don't protect the slave list iteration with a lock because:
259 * a. This operation is performed in IOCTL context, 261 * a. This operation is performed in IOCTL context,
260 * b. The operation is protected by the RTNL semaphore in the 8021q code, 262 * b. The operation is protected by the RTNL semaphore in the 8021q code,
@@ -326,8 +328,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
326 328
327/*------------------------------- Link status -------------------------------*/ 329/*------------------------------- Link status -------------------------------*/
328 330
329/* 331/* Set the carrier state for the master according to the state of its
330 * Set the carrier state for the master according to the state of its
331 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 332 * slaves. If any slaves are up, the master is up. In 802.3ad mode,
332 * do special 802.3ad magic. 333 * do special 802.3ad magic.
333 * 334 *
@@ -362,8 +363,7 @@ down:
362 return 0; 363 return 0;
363} 364}
364 365
365/* 366/* Get link speed and duplex from the slave's base driver
366 * Get link speed and duplex from the slave's base driver
367 * using ethtool. If for some reason the call fails or the 367 * using ethtool. If for some reason the call fails or the
368 * values are invalid, set speed and duplex to -1, 368 * values are invalid, set speed and duplex to -1,
369 * and return. 369 * and return.
@@ -416,8 +416,7 @@ const char *bond_slave_link_status(s8 link)
416 } 416 }
417} 417}
418 418
419/* 419/* if <dev> supports MII link status reporting, check its link status.
420 * if <dev> supports MII link status reporting, check its link status.
421 * 420 *
422 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 421 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
423 * depending upon the setting of the use_carrier parameter. 422 * depending upon the setting of the use_carrier parameter.
@@ -454,14 +453,14 @@ static int bond_check_dev_link(struct bonding *bond,
454 /* Ethtool can't be used, fallback to MII ioctls. */ 453 /* Ethtool can't be used, fallback to MII ioctls. */
455 ioctl = slave_ops->ndo_do_ioctl; 454 ioctl = slave_ops->ndo_do_ioctl;
456 if (ioctl) { 455 if (ioctl) {
457 /* TODO: set pointer to correct ioctl on a per team member */ 456 /* TODO: set pointer to correct ioctl on a per team member
458 /* bases to make this more efficient. that is, once */ 457 * bases to make this more efficient. that is, once
459 /* we determine the correct ioctl, we will always */ 458 * we determine the correct ioctl, we will always
460 /* call it and not the others for that team */ 459 * call it and not the others for that team
461 /* member. */ 460 * member.
462 461 */
463 /* 462
464 * We cannot assume that SIOCGMIIPHY will also read a 463 /* We cannot assume that SIOCGMIIPHY will also read a
465 * register; not all network drivers (e.g., e100) 464 * register; not all network drivers (e.g., e100)
466 * support that. 465 * support that.
467 */ 466 */
@@ -476,8 +475,7 @@ static int bond_check_dev_link(struct bonding *bond,
476 } 475 }
477 } 476 }
478 477
479 /* 478 /* If reporting, report that either there's no dev->do_ioctl,
480 * If reporting, report that either there's no dev->do_ioctl,
481 * or both SIOCGMIIREG and get_link failed (meaning that we 479 * or both SIOCGMIIREG and get_link failed (meaning that we
482 * cannot report link status). If not reporting, pretend 480 * cannot report link status). If not reporting, pretend
483 * we're ok. 481 * we're ok.
@@ -487,9 +485,7 @@ static int bond_check_dev_link(struct bonding *bond,
487 485
488/*----------------------------- Multicast list ------------------------------*/ 486/*----------------------------- Multicast list ------------------------------*/
489 487
490/* 488/* Push the promiscuity flag down to appropriate slaves */
491 * Push the promiscuity flag down to appropriate slaves
492 */
493static int bond_set_promiscuity(struct bonding *bond, int inc) 489static int bond_set_promiscuity(struct bonding *bond, int inc)
494{ 490{
495 struct list_head *iter; 491 struct list_head *iter;
@@ -512,9 +508,7 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)
512 return err; 508 return err;
513} 509}
514 510
515/* 511/* Push the allmulti flag down to all slaves */
516 * Push the allmulti flag down to all slaves
517 */
518static int bond_set_allmulti(struct bonding *bond, int inc) 512static int bond_set_allmulti(struct bonding *bond, int inc)
519{ 513{
520 struct list_head *iter; 514 struct list_head *iter;
@@ -537,8 +531,7 @@ static int bond_set_allmulti(struct bonding *bond, int inc)
537 return err; 531 return err;
538} 532}
539 533
540/* 534/* Retrieve the list of registered multicast addresses for the bonding
541 * Retrieve the list of registered multicast addresses for the bonding
542 * device and retransmit an IGMP JOIN request to the current active 535 * device and retransmit an IGMP JOIN request to the current active
543 * slave. 536 * slave.
544 */ 537 */
@@ -560,8 +553,7 @@ static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
560 rtnl_unlock(); 553 rtnl_unlock();
561} 554}
562 555
563/* Flush bond's hardware addresses from slave 556/* Flush bond's hardware addresses from slave */
564 */
565static void bond_hw_addr_flush(struct net_device *bond_dev, 557static void bond_hw_addr_flush(struct net_device *bond_dev,
566 struct net_device *slave_dev) 558 struct net_device *slave_dev)
567{ 559{
@@ -588,8 +580,6 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,
588static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, 580static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
589 struct slave *old_active) 581 struct slave *old_active)
590{ 582{
591 ASSERT_RTNL();
592
593 if (old_active) { 583 if (old_active) {
594 if (bond->dev->flags & IFF_PROMISC) 584 if (bond->dev->flags & IFF_PROMISC)
595 dev_set_promiscuity(old_active->dev, -1); 585 dev_set_promiscuity(old_active->dev, -1);
@@ -632,18 +622,15 @@ static void bond_set_dev_addr(struct net_device *bond_dev,
632 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); 622 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev);
633} 623}
634 624
635/* 625/* bond_do_fail_over_mac
636 * bond_do_fail_over_mac
637 * 626 *
638 * Perform special MAC address swapping for fail_over_mac settings 627 * Perform special MAC address swapping for fail_over_mac settings
639 * 628 *
640 * Called with RTNL, curr_slave_lock for write_bh. 629 * Called with RTNL
641 */ 630 */
642static void bond_do_fail_over_mac(struct bonding *bond, 631static void bond_do_fail_over_mac(struct bonding *bond,
643 struct slave *new_active, 632 struct slave *new_active,
644 struct slave *old_active) 633 struct slave *old_active)
645 __releases(&bond->curr_slave_lock)
646 __acquires(&bond->curr_slave_lock)
647{ 634{
648 u8 tmp_mac[ETH_ALEN]; 635 u8 tmp_mac[ETH_ALEN];
649 struct sockaddr saddr; 636 struct sockaddr saddr;
@@ -651,23 +638,17 @@ static void bond_do_fail_over_mac(struct bonding *bond,
651 638
652 switch (bond->params.fail_over_mac) { 639 switch (bond->params.fail_over_mac) {
653 case BOND_FOM_ACTIVE: 640 case BOND_FOM_ACTIVE:
654 if (new_active) { 641 if (new_active)
655 write_unlock_bh(&bond->curr_slave_lock);
656 bond_set_dev_addr(bond->dev, new_active->dev); 642 bond_set_dev_addr(bond->dev, new_active->dev);
657 write_lock_bh(&bond->curr_slave_lock);
658 }
659 break; 643 break;
660 case BOND_FOM_FOLLOW: 644 case BOND_FOM_FOLLOW:
661 /* 645 /* if new_active && old_active, swap them
662 * if new_active && old_active, swap them
663 * if just old_active, do nothing (going to no active slave) 646 * if just old_active, do nothing (going to no active slave)
664 * if just new_active, set new_active to bond's MAC 647 * if just new_active, set new_active to bond's MAC
665 */ 648 */
666 if (!new_active) 649 if (!new_active)
667 return; 650 return;
668 651
669 write_unlock_bh(&bond->curr_slave_lock);
670
671 if (old_active) { 652 if (old_active) {
672 ether_addr_copy(tmp_mac, new_active->dev->dev_addr); 653 ether_addr_copy(tmp_mac, new_active->dev->dev_addr);
673 ether_addr_copy(saddr.sa_data, 654 ether_addr_copy(saddr.sa_data,
@@ -696,7 +677,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,
696 netdev_err(bond->dev, "Error %d setting MAC of slave %s\n", 677 netdev_err(bond->dev, "Error %d setting MAC of slave %s\n",
697 -rv, new_active->dev->name); 678 -rv, new_active->dev->name);
698out: 679out:
699 write_lock_bh(&bond->curr_slave_lock);
700 break; 680 break;
701 default: 681 default:
702 netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n", 682 netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n",
@@ -708,8 +688,8 @@ out:
708 688
709static bool bond_should_change_active(struct bonding *bond) 689static bool bond_should_change_active(struct bonding *bond)
710{ 690{
711 struct slave *prim = bond->primary_slave; 691 struct slave *prim = rtnl_dereference(bond->primary_slave);
712 struct slave *curr = bond_deref_active_protected(bond); 692 struct slave *curr = rtnl_dereference(bond->curr_active_slave);
713 693
714 if (!prim || !curr || curr->link != BOND_LINK_UP) 694 if (!prim || !curr || curr->link != BOND_LINK_UP)
715 return true; 695 return true;
@@ -732,13 +712,14 @@ static bool bond_should_change_active(struct bonding *bond)
732 */ 712 */
733static struct slave *bond_find_best_slave(struct bonding *bond) 713static struct slave *bond_find_best_slave(struct bonding *bond)
734{ 714{
735 struct slave *slave, *bestslave = NULL; 715 struct slave *slave, *bestslave = NULL, *primary;
736 struct list_head *iter; 716 struct list_head *iter;
737 int mintime = bond->params.updelay; 717 int mintime = bond->params.updelay;
738 718
739 if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && 719 primary = rtnl_dereference(bond->primary_slave);
720 if (primary && primary->link == BOND_LINK_UP &&
740 bond_should_change_active(bond)) 721 bond_should_change_active(bond))
741 return bond->primary_slave; 722 return primary;
742 723
743 bond_for_each_slave(bond, slave, iter) { 724 bond_for_each_slave(bond, slave, iter) {
744 if (slave->link == BOND_LINK_UP) 725 if (slave->link == BOND_LINK_UP)
@@ -784,15 +765,15 @@ static bool bond_should_notify_peers(struct bonding *bond)
784 * because it is apparently the best available slave we have, even though its 765 * because it is apparently the best available slave we have, even though its
785 * updelay hasn't timed out yet. 766 * updelay hasn't timed out yet.
786 * 767 *
787 * If new_active is not NULL, caller must hold curr_slave_lock for write_bh. 768 * Caller must hold RTNL.
788 */ 769 */
789void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 770void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
790{ 771{
791 struct slave *old_active; 772 struct slave *old_active;
792 773
793 old_active = rcu_dereference_protected(bond->curr_active_slave, 774 ASSERT_RTNL();
794 !new_active || 775
795 lockdep_is_held(&bond->curr_slave_lock)); 776 old_active = rtnl_dereference(bond->curr_active_slave);
796 777
797 if (old_active == new_active) 778 if (old_active == new_active)
798 return; 779 return;
@@ -860,21 +841,18 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
860 bond_should_notify_peers(bond); 841 bond_should_notify_peers(bond);
861 } 842 }
862 843
863 write_unlock_bh(&bond->curr_slave_lock);
864
865 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); 844 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
866 if (should_notify_peers) 845 if (should_notify_peers)
867 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, 846 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
868 bond->dev); 847 bond->dev);
869
870 write_lock_bh(&bond->curr_slave_lock);
871 } 848 }
872 } 849 }
873 850
874 /* resend IGMP joins since active slave has changed or 851 /* resend IGMP joins since active slave has changed or
875 * all were sent on curr_active_slave. 852 * all were sent on curr_active_slave.
876 * resend only if bond is brought up with the affected 853 * resend only if bond is brought up with the affected
877 * bonding modes and the retransmission is enabled */ 854 * bonding modes and the retransmission is enabled
855 */
878 if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && 856 if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) &&
879 ((bond_uses_primary(bond) && new_active) || 857 ((bond_uses_primary(bond) && new_active) ||
880 BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) { 858 BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {
@@ -892,15 +870,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
892 * - The primary_slave has got its link back. 870 * - The primary_slave has got its link back.
893 * - A slave has got its link back and there's no old curr_active_slave. 871 * - A slave has got its link back and there's no old curr_active_slave.
894 * 872 *
895 * Caller must hold curr_slave_lock for write_bh. 873 * Caller must hold RTNL.
896 */ 874 */
897void bond_select_active_slave(struct bonding *bond) 875void bond_select_active_slave(struct bonding *bond)
898{ 876{
899 struct slave *best_slave; 877 struct slave *best_slave;
900 int rv; 878 int rv;
901 879
880 ASSERT_RTNL();
881
902 best_slave = bond_find_best_slave(bond); 882 best_slave = bond_find_best_slave(bond);
903 if (best_slave != bond_deref_active_protected(bond)) { 883 if (best_slave != rtnl_dereference(bond->curr_active_slave)) {
904 bond_change_active_slave(bond, best_slave); 884 bond_change_active_slave(bond, best_slave);
905 rv = bond_set_carrier(bond); 885 rv = bond_set_carrier(bond);
906 if (!rv) 886 if (!rv)
@@ -1022,7 +1002,8 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
1022 1002
1023static void bond_compute_features(struct bonding *bond) 1003static void bond_compute_features(struct bonding *bond)
1024{ 1004{
1025 unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE; 1005 unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
1006 IFF_XMIT_DST_RELEASE_PERM;
1026 netdev_features_t vlan_features = BOND_VLAN_FEATURES; 1007 netdev_features_t vlan_features = BOND_VLAN_FEATURES;
1027 netdev_features_t enc_features = BOND_ENC_FEATURES; 1008 netdev_features_t enc_features = BOND_ENC_FEATURES;
1028 struct net_device *bond_dev = bond->dev; 1009 struct net_device *bond_dev = bond->dev;
@@ -1058,8 +1039,10 @@ done:
1058 bond_dev->gso_max_segs = gso_max_segs; 1039 bond_dev->gso_max_segs = gso_max_segs;
1059 netif_set_gso_max_size(bond_dev, gso_max_size); 1040 netif_set_gso_max_size(bond_dev, gso_max_size);
1060 1041
1061 flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE; 1042 bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1062 bond_dev->priv_flags = flags | dst_release_flag; 1043 if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
1044 dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
1045 bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
1063 1046
1064 netdev_change_features(bond_dev); 1047 netdev_change_features(bond_dev);
1065} 1048}
@@ -1240,8 +1223,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1240 slave_dev->name); 1223 slave_dev->name);
1241 } 1224 }
1242 1225
1243 /* 1226 /* Old ifenslave binaries are no longer supported. These can
1244 * Old ifenslave binaries are no longer supported. These can
1245 * be identified with moderate accuracy by the state of the slave: 1227 * be identified with moderate accuracy by the state of the slave:
1246 * the current ifenslave will set the interface down prior to 1228 * the current ifenslave will set the interface down prior to
1247 * enslaving it; the old ifenslave will not. 1229 * enslaving it; the old ifenslave will not.
@@ -1313,7 +1295,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1313 call_netdevice_notifiers(NETDEV_JOIN, slave_dev); 1295 call_netdevice_notifiers(NETDEV_JOIN, slave_dev);
1314 1296
1315 /* If this is the first slave, then we need to set the master's hardware 1297 /* If this is the first slave, then we need to set the master's hardware
1316 * address to be the same as the slave's. */ 1298 * address to be the same as the slave's.
1299 */
1317 if (!bond_has_slaves(bond) && 1300 if (!bond_has_slaves(bond) &&
1318 bond->dev->addr_assign_type == NET_ADDR_RANDOM) 1301 bond->dev->addr_assign_type == NET_ADDR_RANDOM)
1319 bond_set_dev_addr(bond->dev, slave_dev); 1302 bond_set_dev_addr(bond->dev, slave_dev);
@@ -1326,8 +1309,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1326 1309
1327 new_slave->bond = bond; 1310 new_slave->bond = bond;
1328 new_slave->dev = slave_dev; 1311 new_slave->dev = slave_dev;
1329 /* 1312 /* Set the new_slave's queue_id to be zero. Queue ID mapping
1330 * Set the new_slave's queue_id to be zero. Queue ID mapping
1331 * is set via sysfs or module option if desired. 1313 * is set via sysfs or module option if desired.
1332 */ 1314 */
1333 new_slave->queue_id = 0; 1315 new_slave->queue_id = 0;
@@ -1340,8 +1322,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1340 goto err_free; 1322 goto err_free;
1341 } 1323 }
1342 1324
1343 /* 1325 /* Save slave's original ("permanent") mac address for modes
1344 * Save slave's original ("permanent") mac address for modes
1345 * that need it, and for restoring it upon release, and then 1326 * that need it, and for restoring it upon release, and then
1346 * set it to the master's address 1327 * set it to the master's address
1347 */ 1328 */
@@ -1349,8 +1330,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1349 1330
1350 if (!bond->params.fail_over_mac || 1331 if (!bond->params.fail_over_mac ||
1351 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 1332 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
1352 /* 1333 /* Set slave to master's mac address. The application already
1353 * Set slave to master's mac address. The application already
1354 * set the master's mac address to that of the first slave 1334 * set the master's mac address to that of the first slave
1355 */ 1335 */
1356 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1336 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
@@ -1370,6 +1350,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1370 } 1350 }
1371 1351
1372 slave_dev->priv_flags |= IFF_BONDING; 1352 slave_dev->priv_flags |= IFF_BONDING;
1353 /* initialize slave stats */
1354 dev_get_stats(new_slave->dev, &new_slave->slave_stats);
1373 1355
1374 if (bond_is_lb(bond)) { 1356 if (bond_is_lb(bond)) {
1375 /* bond_alb_init_slave() must be called before all other stages since 1357 /* bond_alb_init_slave() must be called before all other stages since
@@ -1436,8 +1418,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1436 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1418 link_reporting = bond_check_dev_link(bond, slave_dev, 1);
1437 1419
1438 if ((link_reporting == -1) && !bond->params.arp_interval) { 1420 if ((link_reporting == -1) && !bond->params.arp_interval) {
1439 /* 1421 /* miimon is set but a bonded network driver
1440 * miimon is set but a bonded network driver
1441 * does not support ETHTOOL/MII and 1422 * does not support ETHTOOL/MII and
1442 * arp_interval is not set. Note: if 1423 * arp_interval is not set. Note: if
1443 * use_carrier is enabled, we will never go 1424 * use_carrier is enabled, we will never go
@@ -1482,7 +1463,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1482 if (bond_uses_primary(bond) && bond->params.primary[0]) { 1463 if (bond_uses_primary(bond) && bond->params.primary[0]) {
1483 /* if there is a primary slave, remember it */ 1464 /* if there is a primary slave, remember it */
1484 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1465 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
1485 bond->primary_slave = new_slave; 1466 rcu_assign_pointer(bond->primary_slave, new_slave);
1486 bond->force_primary = true; 1467 bond->force_primary = true;
1487 } 1468 }
1488 } 1469 }
@@ -1570,12 +1551,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1570 1551
1571 if (bond_uses_primary(bond)) { 1552 if (bond_uses_primary(bond)) {
1572 block_netpoll_tx(); 1553 block_netpoll_tx();
1573 write_lock_bh(&bond->curr_slave_lock);
1574 bond_select_active_slave(bond); 1554 bond_select_active_slave(bond);
1575 write_unlock_bh(&bond->curr_slave_lock);
1576 unblock_netpoll_tx(); 1555 unblock_netpoll_tx();
1577 } 1556 }
1578 1557
1558 if (bond_mode_uses_xmit_hash(bond))
1559 bond_update_slave_arr(bond, NULL);
1560
1579 netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", 1561 netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
1580 slave_dev->name, 1562 slave_dev->name,
1581 bond_is_active_slave(new_slave) ? "an active" : "a backup", 1563 bond_is_active_slave(new_slave) ? "an active" : "a backup",
@@ -1596,16 +1578,16 @@ err_detach:
1596 bond_hw_addr_flush(bond_dev, slave_dev); 1578 bond_hw_addr_flush(bond_dev, slave_dev);
1597 1579
1598 vlan_vids_del_by_dev(slave_dev, bond_dev); 1580 vlan_vids_del_by_dev(slave_dev, bond_dev);
1599 if (bond->primary_slave == new_slave) 1581 if (rcu_access_pointer(bond->primary_slave) == new_slave)
1600 bond->primary_slave = NULL; 1582 RCU_INIT_POINTER(bond->primary_slave, NULL);
1601 if (rcu_access_pointer(bond->curr_active_slave) == new_slave) { 1583 if (rcu_access_pointer(bond->curr_active_slave) == new_slave) {
1602 block_netpoll_tx(); 1584 block_netpoll_tx();
1603 write_lock_bh(&bond->curr_slave_lock);
1604 bond_change_active_slave(bond, NULL); 1585 bond_change_active_slave(bond, NULL);
1605 bond_select_active_slave(bond); 1586 bond_select_active_slave(bond);
1606 write_unlock_bh(&bond->curr_slave_lock);
1607 unblock_netpoll_tx(); 1587 unblock_netpoll_tx();
1608 } 1588 }
1589 /* either primary_slave or curr_active_slave might've changed */
1590 synchronize_rcu();
1609 slave_disable_netpoll(new_slave); 1591 slave_disable_netpoll(new_slave);
1610 1592
1611err_close: 1593err_close:
@@ -1639,10 +1621,9 @@ err_undo_flags:
1639 return res; 1621 return res;
1640} 1622}
1641 1623
1642/* 1624/* Try to release the slave device <slave> from the bond device <master>
1643 * Try to release the slave device <slave> from the bond device <master>
1644 * It is legal to access curr_active_slave without a lock because all the function 1625 * It is legal to access curr_active_slave without a lock because all the function
1645 * is write-locked. If "all" is true it means that the function is being called 1626 * is RTNL-locked. If "all" is true it means that the function is being called
1646 * while destroying a bond interface and all slaves are being released. 1627 * while destroying a bond interface and all slaves are being released.
1647 * 1628 *
1648 * The rules for slave state should be: 1629 * The rules for slave state should be:
@@ -1682,18 +1663,20 @@ static int __bond_release_one(struct net_device *bond_dev,
1682 1663
1683 bond_sysfs_slave_del(slave); 1664 bond_sysfs_slave_del(slave);
1684 1665
1666 /* recompute stats just before removing the slave */
1667 bond_get_stats(bond->dev, &bond->bond_stats);
1668
1685 bond_upper_dev_unlink(bond_dev, slave_dev); 1669 bond_upper_dev_unlink(bond_dev, slave_dev);
1686 /* unregister rx_handler early so bond_handle_frame wouldn't be called 1670 /* unregister rx_handler early so bond_handle_frame wouldn't be called
1687 * for this slave anymore. 1671 * for this slave anymore.
1688 */ 1672 */
1689 netdev_rx_handler_unregister(slave_dev); 1673 netdev_rx_handler_unregister(slave_dev);
1690 write_lock_bh(&bond->lock);
1691 1674
1692 /* Inform AD package of unbinding of slave. */
1693 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1675 if (BOND_MODE(bond) == BOND_MODE_8023AD)
1694 bond_3ad_unbind_slave(slave); 1676 bond_3ad_unbind_slave(slave);
1695 1677
1696 write_unlock_bh(&bond->lock); 1678 if (bond_mode_uses_xmit_hash(bond))
1679 bond_update_slave_arr(bond, slave);
1697 1680
1698 netdev_info(bond_dev, "Releasing %s interface %s\n", 1681 netdev_info(bond_dev, "Releasing %s interface %s\n",
1699 bond_is_active_slave(slave) ? "active" : "backup", 1682 bond_is_active_slave(slave) ? "active" : "backup",
@@ -1712,14 +1695,11 @@ static int __bond_release_one(struct net_device *bond_dev,
1712 bond_dev->name, slave_dev->name); 1695 bond_dev->name, slave_dev->name);
1713 } 1696 }
1714 1697
1715 if (bond->primary_slave == slave) 1698 if (rtnl_dereference(bond->primary_slave) == slave)
1716 bond->primary_slave = NULL; 1699 RCU_INIT_POINTER(bond->primary_slave, NULL);
1717 1700
1718 if (oldcurrent == slave) { 1701 if (oldcurrent == slave)
1719 write_lock_bh(&bond->curr_slave_lock);
1720 bond_change_active_slave(bond, NULL); 1702 bond_change_active_slave(bond, NULL);
1721 write_unlock_bh(&bond->curr_slave_lock);
1722 }
1723 1703
1724 if (bond_is_lb(bond)) { 1704 if (bond_is_lb(bond)) {
1725 /* Must be called only after the slave has been 1705 /* Must be called only after the slave has been
@@ -1733,16 +1713,11 @@ static int __bond_release_one(struct net_device *bond_dev,
1733 if (all) { 1713 if (all) {
1734 RCU_INIT_POINTER(bond->curr_active_slave, NULL); 1714 RCU_INIT_POINTER(bond->curr_active_slave, NULL);
1735 } else if (oldcurrent == slave) { 1715 } else if (oldcurrent == slave) {
1736 /* 1716 /* Note that we hold RTNL over this sequence, so there
1737 * Note that we hold RTNL over this sequence, so there
1738 * is no concern that another slave add/remove event 1717 * is no concern that another slave add/remove event
1739 * will interfere. 1718 * will interfere.
1740 */ 1719 */
1741 write_lock_bh(&bond->curr_slave_lock);
1742
1743 bond_select_active_slave(bond); 1720 bond_select_active_slave(bond);
1744
1745 write_unlock_bh(&bond->curr_slave_lock);
1746 } 1721 }
1747 1722
1748 if (!bond_has_slaves(bond)) { 1723 if (!bond_has_slaves(bond)) {
@@ -1765,10 +1740,9 @@ static int __bond_release_one(struct net_device *bond_dev,
1765 netdev_info(bond_dev, "last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n", 1740 netdev_info(bond_dev, "last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n",
1766 slave_dev->name, bond_dev->name); 1741 slave_dev->name, bond_dev->name);
1767 1742
1768 /* must do this from outside any spinlocks */
1769 vlan_vids_del_by_dev(slave_dev, bond_dev); 1743 vlan_vids_del_by_dev(slave_dev, bond_dev);
1770 1744
1771 /* If the mode uses primary, then this cases was handled above by 1745 /* If the mode uses primary, then this case was handled above by
1772 * bond_change_active_slave(..., NULL) 1746 * bond_change_active_slave(..., NULL)
1773 */ 1747 */
1774 if (!bond_uses_primary(bond)) { 1748 if (!bond_uses_primary(bond)) {
@@ -1808,7 +1782,7 @@ static int __bond_release_one(struct net_device *bond_dev,
1808 1782
1809 bond_free_slave(slave); 1783 bond_free_slave(slave);
1810 1784
1811 return 0; /* deletion OK */ 1785 return 0;
1812} 1786}
1813 1787
1814/* A wrapper used because of ndo_del_link */ 1788/* A wrapper used because of ndo_del_link */
@@ -1817,10 +1791,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1817 return __bond_release_one(bond_dev, slave_dev, false); 1791 return __bond_release_one(bond_dev, slave_dev, false);
1818} 1792}
1819 1793
1820/* 1794/* First release a slave and then destroy the bond if no more slaves are left.
1821* First release a slave and then destroy the bond if no more slaves are left. 1795 * Must be under rtnl_lock when this function is called.
1822* Must be under rtnl_lock when this function is called. 1796 */
1823*/
1824static int bond_release_and_destroy(struct net_device *bond_dev, 1797static int bond_release_and_destroy(struct net_device *bond_dev,
1825 struct net_device *slave_dev) 1798 struct net_device *slave_dev)
1826{ 1799{
@@ -1843,7 +1816,6 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
1843 1816
1844 info->bond_mode = BOND_MODE(bond); 1817 info->bond_mode = BOND_MODE(bond);
1845 info->miimon = bond->params.miimon; 1818 info->miimon = bond->params.miimon;
1846
1847 info->num_slaves = bond->slave_cnt; 1819 info->num_slaves = bond->slave_cnt;
1848 1820
1849 return 0; 1821 return 0;
@@ -1906,9 +1878,7 @@ static int bond_miimon_inspect(struct bonding *bond)
1906 /*FALLTHRU*/ 1878 /*FALLTHRU*/
1907 case BOND_LINK_FAIL: 1879 case BOND_LINK_FAIL:
1908 if (link_state) { 1880 if (link_state) {
1909 /* 1881 /* recovered before downdelay expired */
1910 * recovered before downdelay expired
1911 */
1912 slave->link = BOND_LINK_UP; 1882 slave->link = BOND_LINK_UP;
1913 slave->last_link_up = jiffies; 1883 slave->last_link_up = jiffies;
1914 netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", 1884 netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
@@ -1974,7 +1944,7 @@ static int bond_miimon_inspect(struct bonding *bond)
1974static void bond_miimon_commit(struct bonding *bond) 1944static void bond_miimon_commit(struct bonding *bond)
1975{ 1945{
1976 struct list_head *iter; 1946 struct list_head *iter;
1977 struct slave *slave; 1947 struct slave *slave, *primary;
1978 1948
1979 bond_for_each_slave(bond, slave, iter) { 1949 bond_for_each_slave(bond, slave, iter) {
1980 switch (slave->new_link) { 1950 switch (slave->new_link) {
@@ -1985,13 +1955,14 @@ static void bond_miimon_commit(struct bonding *bond)
1985 slave->link = BOND_LINK_UP; 1955 slave->link = BOND_LINK_UP;
1986 slave->last_link_up = jiffies; 1956 slave->last_link_up = jiffies;
1987 1957
1958 primary = rtnl_dereference(bond->primary_slave);
1988 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 1959 if (BOND_MODE(bond) == BOND_MODE_8023AD) {
1989 /* prevent it from being the active one */ 1960 /* prevent it from being the active one */
1990 bond_set_backup_slave(slave); 1961 bond_set_backup_slave(slave);
1991 } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 1962 } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
1992 /* make it immediately active */ 1963 /* make it immediately active */
1993 bond_set_active_slave(slave); 1964 bond_set_active_slave(slave);
1994 } else if (slave != bond->primary_slave) { 1965 } else if (slave != primary) {
1995 /* prevent it from being the active one */ 1966 /* prevent it from being the active one */
1996 bond_set_backup_slave(slave); 1967 bond_set_backup_slave(slave);
1997 } 1968 }
@@ -2009,8 +1980,10 @@ static void bond_miimon_commit(struct bonding *bond)
2009 bond_alb_handle_link_change(bond, slave, 1980 bond_alb_handle_link_change(bond, slave,
2010 BOND_LINK_UP); 1981 BOND_LINK_UP);
2011 1982
2012 if (!bond->curr_active_slave || 1983 if (BOND_MODE(bond) == BOND_MODE_XOR)
2013 (slave == bond->primary_slave)) 1984 bond_update_slave_arr(bond, NULL);
1985
1986 if (!bond->curr_active_slave || slave == primary)
2014 goto do_failover; 1987 goto do_failover;
2015 1988
2016 continue; 1989 continue;
@@ -2037,6 +2010,9 @@ static void bond_miimon_commit(struct bonding *bond)
2037 bond_alb_handle_link_change(bond, slave, 2010 bond_alb_handle_link_change(bond, slave,
2038 BOND_LINK_DOWN); 2011 BOND_LINK_DOWN);
2039 2012
2013 if (BOND_MODE(bond) == BOND_MODE_XOR)
2014 bond_update_slave_arr(bond, NULL);
2015
2040 if (slave == rcu_access_pointer(bond->curr_active_slave)) 2016 if (slave == rcu_access_pointer(bond->curr_active_slave))
2041 goto do_failover; 2017 goto do_failover;
2042 2018
@@ -2051,19 +2027,15 @@ static void bond_miimon_commit(struct bonding *bond)
2051 } 2027 }
2052 2028
2053do_failover: 2029do_failover:
2054 ASSERT_RTNL();
2055 block_netpoll_tx(); 2030 block_netpoll_tx();
2056 write_lock_bh(&bond->curr_slave_lock);
2057 bond_select_active_slave(bond); 2031 bond_select_active_slave(bond);
2058 write_unlock_bh(&bond->curr_slave_lock);
2059 unblock_netpoll_tx(); 2032 unblock_netpoll_tx();
2060 } 2033 }
2061 2034
2062 bond_set_carrier(bond); 2035 bond_set_carrier(bond);
2063} 2036}
2064 2037
2065/* 2038/* bond_mii_monitor
2066 * bond_mii_monitor
2067 * 2039 *
2068 * Really a wrapper that splits the mii monitor into two phases: an 2040 * Really a wrapper that splits the mii monitor into two phases: an
2069 * inspection, then (if inspection indicates something needs to be done) 2041 * inspection, then (if inspection indicates something needs to be done)
@@ -2135,8 +2107,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
2135 return ret; 2107 return ret;
2136} 2108}
2137 2109
2138/* 2110/* We go to the (large) trouble of VLAN tagging ARP frames because
2139 * We go to the (large) trouble of VLAN tagging ARP frames because
2140 * switches in VLAN mode (especially if ports are configured as 2111 * switches in VLAN mode (especially if ports are configured as
2141 * "native" to a VLAN) might not pass non-tagged frames. 2112 * "native" to a VLAN) might not pass non-tagged frames.
2142 */ 2113 */
@@ -2363,8 +2334,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
2363 2334
2364 curr_active_slave = rcu_dereference(bond->curr_active_slave); 2335 curr_active_slave = rcu_dereference(bond->curr_active_slave);
2365 2336
2366 /* 2337 /* Backup slaves won't see the ARP reply, but do come through
2367 * Backup slaves won't see the ARP reply, but do come through
2368 * here for each ARP probe (so we swap the sip/tip to validate 2338 * here for each ARP probe (so we swap the sip/tip to validate
2369 * the probe). In a "redundant switch, common router" type of 2339 * the probe). In a "redundant switch, common router" type of
2370 * configuration, the ARP probe will (hopefully) travel from 2340 * configuration, the ARP probe will (hopefully) travel from
@@ -2404,8 +2374,7 @@ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
2404 last_act + mod * delta_in_ticks + delta_in_ticks/2); 2374 last_act + mod * delta_in_ticks + delta_in_ticks/2);
2405} 2375}
2406 2376
2407/* 2377/* This function is called regularly to monitor each slave's link
2408 * this function is called regularly to monitor each slave's link
2409 * ensuring that traffic is being sent and received when arp monitoring 2378 * ensuring that traffic is being sent and received when arp monitoring
2410 * is used in load-balancing mode. if the adapter has been dormant, then an 2379 * is used in load-balancing mode. if the adapter has been dormant, then an
2411 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2380 * arp is transmitted to generate traffic. see activebackup_arp_monitor for
@@ -2500,16 +2469,11 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
2500 2469
2501 if (slave_state_changed) { 2470 if (slave_state_changed) {
2502 bond_slave_state_change(bond); 2471 bond_slave_state_change(bond);
2472 if (BOND_MODE(bond) == BOND_MODE_XOR)
2473 bond_update_slave_arr(bond, NULL);
2503 } else if (do_failover) { 2474 } else if (do_failover) {
2504 /* the bond_select_active_slave must hold RTNL
2505 * and curr_slave_lock for write.
2506 */
2507 block_netpoll_tx(); 2475 block_netpoll_tx();
2508 write_lock_bh(&bond->curr_slave_lock);
2509
2510 bond_select_active_slave(bond); 2476 bond_select_active_slave(bond);
2511
2512 write_unlock_bh(&bond->curr_slave_lock);
2513 unblock_netpoll_tx(); 2477 unblock_netpoll_tx();
2514 } 2478 }
2515 rtnl_unlock(); 2479 rtnl_unlock();
@@ -2521,13 +2485,12 @@ re_arm:
2521 msecs_to_jiffies(bond->params.arp_interval)); 2485 msecs_to_jiffies(bond->params.arp_interval));
2522} 2486}
2523 2487
2524/* 2488/* Called to inspect slaves for active-backup mode ARP monitor link state
2525 * Called to inspect slaves for active-backup mode ARP monitor link state
2526 * changes. Sets new_link in slaves to specify what action should take 2489 * changes. Sets new_link in slaves to specify what action should take
2527 * place for the slave. Returns 0 if no changes are found, >0 if changes 2490 * place for the slave. Returns 0 if no changes are found, >0 if changes
2528 * to link states must be committed. 2491 * to link states must be committed.
2529 * 2492 *
2530 * Called with rcu_read_lock hold. 2493 * Called with rcu_read_lock held.
2531 */ 2494 */
2532static int bond_ab_arp_inspect(struct bonding *bond) 2495static int bond_ab_arp_inspect(struct bonding *bond)
2533{ 2496{
@@ -2548,16 +2511,14 @@ static int bond_ab_arp_inspect(struct bonding *bond)
2548 continue; 2511 continue;
2549 } 2512 }
2550 2513
2551 /* 2514 /* Give slaves 2*delta after being enslaved or made
2552 * Give slaves 2*delta after being enslaved or made
2553 * active. This avoids bouncing, as the last receive 2515 * active. This avoids bouncing, as the last receive
2554 * times need a full ARP monitor cycle to be updated. 2516 * times need a full ARP monitor cycle to be updated.
2555 */ 2517 */
2556 if (bond_time_in_interval(bond, slave->last_link_up, 2)) 2518 if (bond_time_in_interval(bond, slave->last_link_up, 2))
2557 continue; 2519 continue;
2558 2520
2559 /* 2521 /* Backup slave is down if:
2560 * Backup slave is down if:
2561 * - No current_arp_slave AND 2522 * - No current_arp_slave AND
2562 * - more than 3*delta since last receive AND 2523 * - more than 3*delta since last receive AND
2563 * - the bond has an IP address 2524 * - the bond has an IP address
@@ -2576,8 +2537,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
2576 commit++; 2537 commit++;
2577 } 2538 }
2578 2539
2579 /* 2540 /* Active slave is down if:
2580 * Active slave is down if:
2581 * - more than 2*delta since transmitting OR 2541 * - more than 2*delta since transmitting OR
2582 * - (more than 2*delta since receive AND 2542 * - (more than 2*delta since receive AND
2583 * the bond has an IP address) 2543 * the bond has an IP address)
@@ -2594,8 +2554,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
2594 return commit; 2554 return commit;
2595} 2555}
2596 2556
2597/* 2557/* Called to commit link state changes noted by inspection step of
2598 * Called to commit link state changes noted by inspection step of
2599 * active-backup mode ARP monitor. 2558 * active-backup mode ARP monitor.
2600 * 2559 *
2601 * Called with RTNL hold. 2560 * Called with RTNL hold.
@@ -2631,7 +2590,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
2631 slave->dev->name); 2590 slave->dev->name);
2632 2591
2633 if (!rtnl_dereference(bond->curr_active_slave) || 2592 if (!rtnl_dereference(bond->curr_active_slave) ||
2634 (slave == bond->primary_slave)) 2593 slave == rtnl_dereference(bond->primary_slave))
2635 goto do_failover; 2594 goto do_failover;
2636 2595
2637 } 2596 }
@@ -2663,21 +2622,17 @@ static void bond_ab_arp_commit(struct bonding *bond)
2663 } 2622 }
2664 2623
2665do_failover: 2624do_failover:
2666 ASSERT_RTNL();
2667 block_netpoll_tx(); 2625 block_netpoll_tx();
2668 write_lock_bh(&bond->curr_slave_lock);
2669 bond_select_active_slave(bond); 2626 bond_select_active_slave(bond);
2670 write_unlock_bh(&bond->curr_slave_lock);
2671 unblock_netpoll_tx(); 2627 unblock_netpoll_tx();
2672 } 2628 }
2673 2629
2674 bond_set_carrier(bond); 2630 bond_set_carrier(bond);
2675} 2631}
2676 2632
2677/* 2633/* Send ARP probes for active-backup mode ARP monitor.
2678 * Send ARP probes for active-backup mode ARP monitor.
2679 * 2634 *
2680 * Called with rcu_read_lock hold. 2635 * Called with rcu_read_lock held.
2681 */ 2636 */
2682static bool bond_ab_arp_probe(struct bonding *bond) 2637static bool bond_ab_arp_probe(struct bonding *bond)
2683{ 2638{
@@ -2817,9 +2772,7 @@ re_arm:
2817 2772
2818/*-------------------------- netdev event handling --------------------------*/ 2773/*-------------------------- netdev event handling --------------------------*/
2819 2774
2820/* 2775/* Change device name */
2821 * Change device name
2822 */
2823static int bond_event_changename(struct bonding *bond) 2776static int bond_event_changename(struct bonding *bond)
2824{ 2777{
2825 bond_remove_proc_entry(bond); 2778 bond_remove_proc_entry(bond);
@@ -2858,7 +2811,7 @@ static int bond_master_netdev_event(unsigned long event,
2858static int bond_slave_netdev_event(unsigned long event, 2811static int bond_slave_netdev_event(unsigned long event,
2859 struct net_device *slave_dev) 2812 struct net_device *slave_dev)
2860{ 2813{
2861 struct slave *slave = bond_slave_get_rtnl(slave_dev); 2814 struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary;
2862 struct bonding *bond; 2815 struct bonding *bond;
2863 struct net_device *bond_dev; 2816 struct net_device *bond_dev;
2864 u32 old_speed; 2817 u32 old_speed;
@@ -2872,6 +2825,7 @@ static int bond_slave_netdev_event(unsigned long event,
2872 return NOTIFY_DONE; 2825 return NOTIFY_DONE;
2873 bond_dev = slave->bond->dev; 2826 bond_dev = slave->bond->dev;
2874 bond = slave->bond; 2827 bond = slave->bond;
2828 primary = rtnl_dereference(bond->primary_slave);
2875 2829
2876 switch (event) { 2830 switch (event) {
2877 case NETDEV_UNREGISTER: 2831 case NETDEV_UNREGISTER:
@@ -2893,15 +2847,23 @@ static int bond_slave_netdev_event(unsigned long event,
2893 if (old_duplex != slave->duplex) 2847 if (old_duplex != slave->duplex)
2894 bond_3ad_adapter_duplex_changed(slave); 2848 bond_3ad_adapter_duplex_changed(slave);
2895 } 2849 }
2850 /* Refresh slave-array if applicable!
2851 * If the setup does not use miimon or arpmon (mode-specific!),
2852 * then these events will not cause the slave-array to be
2853 * refreshed. This will cause xmit to use a slave that is not
2854 * usable. Avoid such situation by refeshing the array at these
2855 * events. If these (miimon/arpmon) parameters are configured
2856 * then array gets refreshed twice and that should be fine!
2857 */
2858 if (bond_mode_uses_xmit_hash(bond))
2859 bond_update_slave_arr(bond, NULL);
2896 break; 2860 break;
2897 case NETDEV_DOWN: 2861 case NETDEV_DOWN:
2898 /* 2862 if (bond_mode_uses_xmit_hash(bond))
2899 * ... Or is it this? 2863 bond_update_slave_arr(bond, NULL);
2900 */
2901 break; 2864 break;
2902 case NETDEV_CHANGEMTU: 2865 case NETDEV_CHANGEMTU:
2903 /* 2866 /* TODO: Should slaves be allowed to
2904 * TODO: Should slaves be allowed to
2905 * independently alter their MTU? For 2867 * independently alter their MTU? For
2906 * an active-backup bond, slaves need 2868 * an active-backup bond, slaves need
2907 * not be the same type of device, so 2869 * not be the same type of device, so
@@ -2919,23 +2881,21 @@ static int bond_slave_netdev_event(unsigned long event,
2919 !bond->params.primary[0]) 2881 !bond->params.primary[0])
2920 break; 2882 break;
2921 2883
2922 if (slave == bond->primary_slave) { 2884 if (slave == primary) {
2923 /* slave's name changed - he's no longer primary */ 2885 /* slave's name changed - he's no longer primary */
2924 bond->primary_slave = NULL; 2886 RCU_INIT_POINTER(bond->primary_slave, NULL);
2925 } else if (!strcmp(slave_dev->name, bond->params.primary)) { 2887 } else if (!strcmp(slave_dev->name, bond->params.primary)) {
2926 /* we have a new primary slave */ 2888 /* we have a new primary slave */
2927 bond->primary_slave = slave; 2889 rcu_assign_pointer(bond->primary_slave, slave);
2928 } else { /* we didn't change primary - exit */ 2890 } else { /* we didn't change primary - exit */
2929 break; 2891 break;
2930 } 2892 }
2931 2893
2932 netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n", 2894 netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n",
2933 bond->primary_slave ? slave_dev->name : "none"); 2895 primary ? slave_dev->name : "none");
2934 2896
2935 block_netpoll_tx(); 2897 block_netpoll_tx();
2936 write_lock_bh(&bond->curr_slave_lock);
2937 bond_select_active_slave(bond); 2898 bond_select_active_slave(bond);
2938 write_unlock_bh(&bond->curr_slave_lock);
2939 unblock_netpoll_tx(); 2899 unblock_netpoll_tx();
2940 break; 2900 break;
2941 case NETDEV_FEAT_CHANGE: 2901 case NETDEV_FEAT_CHANGE:
@@ -2952,8 +2912,7 @@ static int bond_slave_netdev_event(unsigned long event,
2952 return NOTIFY_DONE; 2912 return NOTIFY_DONE;
2953} 2913}
2954 2914
2955/* 2915/* bond_netdev_event: handle netdev notifier chain events.
2956 * bond_netdev_event: handle netdev notifier chain events.
2957 * 2916 *
2958 * This function receives events for the netdev chain. The caller (an 2917 * This function receives events for the netdev chain. The caller (an
2959 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 2918 * ioctl handler calling blocking_notifier_call_chain) holds the necessary
@@ -3081,6 +3040,7 @@ static void bond_work_init_all(struct bonding *bond)
3081 else 3040 else
3082 INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); 3041 INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
3083 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 3042 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
3043 INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
3084} 3044}
3085 3045
3086static void bond_work_cancel_all(struct bonding *bond) 3046static void bond_work_cancel_all(struct bonding *bond)
@@ -3090,6 +3050,7 @@ static void bond_work_cancel_all(struct bonding *bond)
3090 cancel_delayed_work_sync(&bond->alb_work); 3050 cancel_delayed_work_sync(&bond->alb_work);
3091 cancel_delayed_work_sync(&bond->ad_work); 3051 cancel_delayed_work_sync(&bond->ad_work);
3092 cancel_delayed_work_sync(&bond->mcast_work); 3052 cancel_delayed_work_sync(&bond->mcast_work);
3053 cancel_delayed_work_sync(&bond->slave_arr_work);
3093} 3054}
3094 3055
3095static int bond_open(struct net_device *bond_dev) 3056static int bond_open(struct net_device *bond_dev)
@@ -3099,9 +3060,7 @@ static int bond_open(struct net_device *bond_dev)
3099 struct slave *slave; 3060 struct slave *slave;
3100 3061
3101 /* reset slave->backup and slave->inactive */ 3062 /* reset slave->backup and slave->inactive */
3102 read_lock(&bond->lock);
3103 if (bond_has_slaves(bond)) { 3063 if (bond_has_slaves(bond)) {
3104 read_lock(&bond->curr_slave_lock);
3105 bond_for_each_slave(bond, slave, iter) { 3064 bond_for_each_slave(bond, slave, iter) {
3106 if (bond_uses_primary(bond) && 3065 if (bond_uses_primary(bond) &&
3107 slave != rcu_access_pointer(bond->curr_active_slave)) { 3066 slave != rcu_access_pointer(bond->curr_active_slave)) {
@@ -3112,9 +3071,7 @@ static int bond_open(struct net_device *bond_dev)
3112 BOND_SLAVE_NOTIFY_NOW); 3071 BOND_SLAVE_NOTIFY_NOW);
3113 } 3072 }
3114 } 3073 }
3115 read_unlock(&bond->curr_slave_lock);
3116 } 3074 }
3117 read_unlock(&bond->lock);
3118 3075
3119 bond_work_init_all(bond); 3076 bond_work_init_all(bond);
3120 3077
@@ -3143,6 +3100,9 @@ static int bond_open(struct net_device *bond_dev)
3143 bond_3ad_initiate_agg_selection(bond, 1); 3100 bond_3ad_initiate_agg_selection(bond, 1);
3144 } 3101 }
3145 3102
3103 if (bond_mode_uses_xmit_hash(bond))
3104 bond_update_slave_arr(bond, NULL);
3105
3146 return 0; 3106 return 0;
3147} 3107}
3148 3108
@@ -3167,40 +3127,43 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
3167 struct list_head *iter; 3127 struct list_head *iter;
3168 struct slave *slave; 3128 struct slave *slave;
3169 3129
3170 memset(stats, 0, sizeof(*stats)); 3130 memcpy(stats, &bond->bond_stats, sizeof(*stats));
3171 3131
3172 read_lock_bh(&bond->lock);
3173 bond_for_each_slave(bond, slave, iter) { 3132 bond_for_each_slave(bond, slave, iter) {
3174 const struct rtnl_link_stats64 *sstats = 3133 const struct rtnl_link_stats64 *sstats =
3175 dev_get_stats(slave->dev, &temp); 3134 dev_get_stats(slave->dev, &temp);
3135 struct rtnl_link_stats64 *pstats = &slave->slave_stats;
3176 3136
3177 stats->rx_packets += sstats->rx_packets; 3137 stats->rx_packets += sstats->rx_packets - pstats->rx_packets;
3178 stats->rx_bytes += sstats->rx_bytes; 3138 stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes;
3179 stats->rx_errors += sstats->rx_errors; 3139 stats->rx_errors += sstats->rx_errors - pstats->rx_errors;
3180 stats->rx_dropped += sstats->rx_dropped; 3140 stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped;
3181 3141
3182 stats->tx_packets += sstats->tx_packets; 3142 stats->tx_packets += sstats->tx_packets - pstats->tx_packets;;
3183 stats->tx_bytes += sstats->tx_bytes; 3143 stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes;
3184 stats->tx_errors += sstats->tx_errors; 3144 stats->tx_errors += sstats->tx_errors - pstats->tx_errors;
3185 stats->tx_dropped += sstats->tx_dropped; 3145 stats->tx_dropped += sstats->tx_dropped - pstats->tx_dropped;
3186 3146
3187 stats->multicast += sstats->multicast; 3147 stats->multicast += sstats->multicast - pstats->multicast;
3188 stats->collisions += sstats->collisions; 3148 stats->collisions += sstats->collisions - pstats->collisions;
3189 3149
3190 stats->rx_length_errors += sstats->rx_length_errors; 3150 stats->rx_length_errors += sstats->rx_length_errors - pstats->rx_length_errors;
3191 stats->rx_over_errors += sstats->rx_over_errors; 3151 stats->rx_over_errors += sstats->rx_over_errors - pstats->rx_over_errors;
3192 stats->rx_crc_errors += sstats->rx_crc_errors; 3152 stats->rx_crc_errors += sstats->rx_crc_errors - pstats->rx_crc_errors;
3193 stats->rx_frame_errors += sstats->rx_frame_errors; 3153 stats->rx_frame_errors += sstats->rx_frame_errors - pstats->rx_frame_errors;
3194 stats->rx_fifo_errors += sstats->rx_fifo_errors; 3154 stats->rx_fifo_errors += sstats->rx_fifo_errors - pstats->rx_fifo_errors;
3195 stats->rx_missed_errors += sstats->rx_missed_errors; 3155 stats->rx_missed_errors += sstats->rx_missed_errors - pstats->rx_missed_errors;
3196 3156
3197 stats->tx_aborted_errors += sstats->tx_aborted_errors; 3157 stats->tx_aborted_errors += sstats->tx_aborted_errors - pstats->tx_aborted_errors;
3198 stats->tx_carrier_errors += sstats->tx_carrier_errors; 3158 stats->tx_carrier_errors += sstats->tx_carrier_errors - pstats->tx_carrier_errors;
3199 stats->tx_fifo_errors += sstats->tx_fifo_errors; 3159 stats->tx_fifo_errors += sstats->tx_fifo_errors - pstats->tx_fifo_errors;
3200 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3160 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors - pstats->tx_heartbeat_errors;
3201 stats->tx_window_errors += sstats->tx_window_errors; 3161 stats->tx_window_errors += sstats->tx_window_errors - pstats->tx_window_errors;
3162
3163 /* save off the slave stats for the next run */
3164 memcpy(pstats, sstats, sizeof(*sstats));
3202 } 3165 }
3203 read_unlock_bh(&bond->lock); 3166 memcpy(&bond->bond_stats, stats, sizeof(*stats));
3204 3167
3205 return stats; 3168 return stats;
3206} 3169}
@@ -3229,24 +3192,17 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
3229 mii->phy_id = 0; 3192 mii->phy_id = 0;
3230 /* Fall Through */ 3193 /* Fall Through */
3231 case SIOCGMIIREG: 3194 case SIOCGMIIREG:
3232 /* 3195 /* We do this again just in case we were called by SIOCGMIIREG
3233 * We do this again just in case we were called by SIOCGMIIREG
3234 * instead of SIOCGMIIPHY. 3196 * instead of SIOCGMIIPHY.
3235 */ 3197 */
3236 mii = if_mii(ifr); 3198 mii = if_mii(ifr);
3237 if (!mii) 3199 if (!mii)
3238 return -EINVAL; 3200 return -EINVAL;
3239 3201
3240
3241 if (mii->reg_num == 1) { 3202 if (mii->reg_num == 1) {
3242 mii->val_out = 0; 3203 mii->val_out = 0;
3243 read_lock(&bond->lock);
3244 read_lock(&bond->curr_slave_lock);
3245 if (netif_carrier_ok(bond->dev)) 3204 if (netif_carrier_ok(bond->dev))
3246 mii->val_out = BMSR_LSTATUS; 3205 mii->val_out = BMSR_LSTATUS;
3247
3248 read_unlock(&bond->curr_slave_lock);
3249 read_unlock(&bond->lock);
3250 } 3206 }
3251 3207
3252 return 0; 3208 return 0;
@@ -3277,7 +3233,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
3277 3233
3278 return res; 3234 return res;
3279 default: 3235 default:
3280 /* Go on */
3281 break; 3236 break;
3282 } 3237 }
3283 3238
@@ -3339,7 +3294,6 @@ static void bond_set_rx_mode(struct net_device *bond_dev)
3339 struct list_head *iter; 3294 struct list_head *iter;
3340 struct slave *slave; 3295 struct slave *slave;
3341 3296
3342
3343 rcu_read_lock(); 3297 rcu_read_lock();
3344 if (bond_uses_primary(bond)) { 3298 if (bond_uses_primary(bond)) {
3345 slave = rcu_dereference(bond->curr_active_slave); 3299 slave = rcu_dereference(bond->curr_active_slave);
@@ -3377,8 +3331,7 @@ static int bond_neigh_init(struct neighbour *n)
3377 if (ret) 3331 if (ret)
3378 return ret; 3332 return ret;
3379 3333
3380 /* 3334 /* Assign slave's neigh_cleanup to neighbour in case cleanup is called
3381 * Assign slave's neigh_cleanup to neighbour in case cleanup is called
3382 * after the last slave has been detached. Assumes that all slaves 3335 * after the last slave has been detached. Assumes that all slaves
3383 * utilize the same neigh_cleanup (true at this writing as only user 3336 * utilize the same neigh_cleanup (true at this writing as only user
3384 * is ipoib). 3337 * is ipoib).
@@ -3391,8 +3344,7 @@ static int bond_neigh_init(struct neighbour *n)
3391 return parms.neigh_setup(n); 3344 return parms.neigh_setup(n);
3392} 3345}
3393 3346
3394/* 3347/* The bonding ndo_neigh_setup is called at init time beofre any
3395 * The bonding ndo_neigh_setup is called at init time beofre any
3396 * slave exists. So we must declare proxy setup function which will 3348 * slave exists. So we must declare proxy setup function which will
3397 * be used at run time to resolve the actual slave neigh param setup. 3349 * be used at run time to resolve the actual slave neigh param setup.
3398 * 3350 *
@@ -3410,9 +3362,7 @@ static int bond_neigh_setup(struct net_device *dev,
3410 return 0; 3362 return 0;
3411} 3363}
3412 3364
3413/* 3365/* Change the MTU of all of a master's slaves to match the master */
3414 * Change the MTU of all of a master's slaves to match the master
3415 */
3416static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3366static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
3417{ 3367{
3418 struct bonding *bond = netdev_priv(bond_dev); 3368 struct bonding *bond = netdev_priv(bond_dev);
@@ -3422,21 +3372,6 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
3422 3372
3423 netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu); 3373 netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu);
3424 3374
3425 /* Can't hold bond->lock with bh disabled here since
3426 * some base drivers panic. On the other hand we can't
3427 * hold bond->lock without bh disabled because we'll
3428 * deadlock. The only solution is to rely on the fact
3429 * that we're under rtnl_lock here, and the slaves
3430 * list won't change. This doesn't solve the problem
3431 * of setting the slave's MTU while it is
3432 * transmitting, but the assumption is that the base
3433 * driver can handle that.
3434 *
3435 * TODO: figure out a way to safely iterate the slaves
3436 * list, but without holding a lock around the actual
3437 * call to the base driver.
3438 */
3439
3440 bond_for_each_slave(bond, slave, iter) { 3375 bond_for_each_slave(bond, slave, iter) {
3441 netdev_dbg(bond_dev, "s %p c_m %p\n", 3376 netdev_dbg(bond_dev, "s %p c_m %p\n",
3442 slave, slave->dev->netdev_ops->ndo_change_mtu); 3377 slave, slave->dev->netdev_ops->ndo_change_mtu);
@@ -3480,8 +3415,7 @@ unwind:
3480 return res; 3415 return res;
3481} 3416}
3482 3417
3483/* 3418/* Change HW address
3484 * Change HW address
3485 * 3419 *
3486 * Note that many devices must be down to change the HW address, and 3420 * Note that many devices must be down to change the HW address, and
3487 * downing the master releases all slaves. We can make bonds full of 3421 * downing the master releases all slaves. We can make bonds full of
@@ -3511,21 +3445,6 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
3511 if (!is_valid_ether_addr(sa->sa_data)) 3445 if (!is_valid_ether_addr(sa->sa_data))
3512 return -EADDRNOTAVAIL; 3446 return -EADDRNOTAVAIL;
3513 3447
3514 /* Can't hold bond->lock with bh disabled here since
3515 * some base drivers panic. On the other hand we can't
3516 * hold bond->lock without bh disabled because we'll
3517 * deadlock. The only solution is to rely on the fact
3518 * that we're under rtnl_lock here, and the slaves
3519 * list won't change. This doesn't solve the problem
3520 * of setting the slave's hw address while it is
3521 * transmitting, but the assumption is that the base
3522 * driver can handle that.
3523 *
3524 * TODO: figure out a way to safely iterate the slaves
3525 * list, but without holding a lock around the actual
3526 * call to the base driver.
3527 */
3528
3529 bond_for_each_slave(bond, slave, iter) { 3448 bond_for_each_slave(bond, slave, iter) {
3530 netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name); 3449 netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name);
3531 res = dev_set_mac_address(slave->dev, addr); 3450 res = dev_set_mac_address(slave->dev, addr);
@@ -3654,7 +3573,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
3654 */ 3573 */
3655 if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { 3574 if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
3656 slave = rcu_dereference(bond->curr_active_slave); 3575 slave = rcu_dereference(bond->curr_active_slave);
3657 if (slave && bond_slave_can_tx(slave)) 3576 if (slave)
3658 bond_dev_queue_xmit(bond, skb, slave->dev); 3577 bond_dev_queue_xmit(bond, skb, slave->dev);
3659 else 3578 else
3660 bond_xmit_slave_id(bond, skb, 0); 3579 bond_xmit_slave_id(bond, skb, 0);
@@ -3672,8 +3591,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
3672 return NETDEV_TX_OK; 3591 return NETDEV_TX_OK;
3673} 3592}
3674 3593
3675/* 3594/* In active-backup mode, we know that bond->curr_active_slave is always valid if
3676 * in active-backup mode, we know that bond->curr_active_slave is always valid if
3677 * the bond has a usable interface. 3595 * the bond has a usable interface.
3678 */ 3596 */
3679static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 3597static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
@@ -3690,20 +3608,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
3690 return NETDEV_TX_OK; 3608 return NETDEV_TX_OK;
3691} 3609}
3692 3610
3693/* In bond_xmit_xor() , we determine the output device by using a pre- 3611/* Use this to update slave_array when (a) it's not appropriate to update
3694 * determined xmit_hash_policy(), If the selected device is not enabled, 3612 * slave_array right away (note that update_slave_array() may sleep)
3695 * find the next active slave. 3613 * and / or (b) RTNL is not held.
3696 */ 3614 */
3697static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 3615void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay)
3698{ 3616{
3699 struct bonding *bond = netdev_priv(bond_dev); 3617 queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
3700 int slave_cnt = ACCESS_ONCE(bond->slave_cnt); 3618}
3701 3619
3702 if (likely(slave_cnt)) 3620/* Slave array work handler. Holds only RTNL */
3703 bond_xmit_slave_id(bond, skb, 3621static void bond_slave_arr_handler(struct work_struct *work)
3704 bond_xmit_hash(bond, skb) % slave_cnt); 3622{
3705 else 3623 struct bonding *bond = container_of(work, struct bonding,
3624 slave_arr_work.work);
3625 int ret;
3626
3627 if (!rtnl_trylock())
3628 goto err;
3629
3630 ret = bond_update_slave_arr(bond, NULL);
3631 rtnl_unlock();
3632 if (ret) {
3633 pr_warn_ratelimited("Failed to update slave array from WT\n");
3634 goto err;
3635 }
3636 return;
3637
3638err:
3639 bond_slave_arr_work_rearm(bond, 1);
3640}
3641
3642/* Build the usable slaves array in control path for modes that use xmit-hash
3643 * to determine the slave interface -
3644 * (a) BOND_MODE_8023AD
3645 * (b) BOND_MODE_XOR
3646 * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
3647 *
3648 * The caller is expected to hold RTNL only and NO other lock!
3649 */
3650int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
3651{
3652 struct slave *slave;
3653 struct list_head *iter;
3654 struct bond_up_slave *new_arr, *old_arr;
3655 int slaves_in_agg;
3656 int agg_id = 0;
3657 int ret = 0;
3658
3659#ifdef CONFIG_LOCKDEP
3660 WARN_ON(lockdep_is_held(&bond->mode_lock));
3661#endif
3662
3663 new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
3664 GFP_KERNEL);
3665 if (!new_arr) {
3666 ret = -ENOMEM;
3667 pr_err("Failed to build slave-array.\n");
3668 goto out;
3669 }
3670 if (BOND_MODE(bond) == BOND_MODE_8023AD) {
3671 struct ad_info ad_info;
3672
3673 if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
3674 pr_debug("bond_3ad_get_active_agg_info failed\n");
3675 kfree_rcu(new_arr, rcu);
3676 /* No active aggragator means it's not safe to use
3677 * the previous array.
3678 */
3679 old_arr = rtnl_dereference(bond->slave_arr);
3680 if (old_arr) {
3681 RCU_INIT_POINTER(bond->slave_arr, NULL);
3682 kfree_rcu(old_arr, rcu);
3683 }
3684 goto out;
3685 }
3686 slaves_in_agg = ad_info.ports;
3687 agg_id = ad_info.aggregator_id;
3688 }
3689 bond_for_each_slave(bond, slave, iter) {
3690 if (BOND_MODE(bond) == BOND_MODE_8023AD) {
3691 struct aggregator *agg;
3692
3693 agg = SLAVE_AD_INFO(slave)->port.aggregator;
3694 if (!agg || agg->aggregator_identifier != agg_id)
3695 continue;
3696 }
3697 if (!bond_slave_can_tx(slave))
3698 continue;
3699 if (skipslave == slave)
3700 continue;
3701 new_arr->arr[new_arr->count++] = slave;
3702 }
3703
3704 old_arr = rtnl_dereference(bond->slave_arr);
3705 rcu_assign_pointer(bond->slave_arr, new_arr);
3706 if (old_arr)
3707 kfree_rcu(old_arr, rcu);
3708out:
3709 if (ret != 0 && skipslave) {
3710 int idx;
3711
3712 /* Rare situation where caller has asked to skip a specific
3713 * slave but allocation failed (most likely!). BTW this is
3714 * only possible when the call is initiated from
3715 * __bond_release_one(). In this situation; overwrite the
3716 * skipslave entry in the array with the last entry from the
3717 * array to avoid a situation where the xmit path may choose
3718 * this to-be-skipped slave to send a packet out.
3719 */
3720 old_arr = rtnl_dereference(bond->slave_arr);
3721 for (idx = 0; idx < old_arr->count; idx++) {
3722 if (skipslave == old_arr->arr[idx]) {
3723 old_arr->arr[idx] =
3724 old_arr->arr[old_arr->count-1];
3725 old_arr->count--;
3726 break;
3727 }
3728 }
3729 }
3730 return ret;
3731}
3732
3733/* Use this Xmit function for 3AD as well as XOR modes. The current
3734 * usable slave array is formed in the control path. The xmit function
3735 * just calculates hash and sends the packet out.
3736 */
3737int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
3738{
3739 struct bonding *bond = netdev_priv(dev);
3740 struct slave *slave;
3741 struct bond_up_slave *slaves;
3742 unsigned int count;
3743
3744 slaves = rcu_dereference(bond->slave_arr);
3745 count = slaves ? ACCESS_ONCE(slaves->count) : 0;
3746 if (likely(count)) {
3747 slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
3748 bond_dev_queue_xmit(bond, skb, slave->dev);
3749 } else {
3706 dev_kfree_skb_any(skb); 3750 dev_kfree_skb_any(skb);
3751 atomic_long_inc(&dev->tx_dropped);
3752 }
3707 3753
3708 return NETDEV_TX_OK; 3754 return NETDEV_TX_OK;
3709} 3755}
@@ -3726,7 +3772,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
3726 bond_dev->name, __func__); 3772 bond_dev->name, __func__);
3727 continue; 3773 continue;
3728 } 3774 }
3729 /* bond_dev_queue_xmit always returns 0 */
3730 bond_dev_queue_xmit(bond, skb2, slave->dev); 3775 bond_dev_queue_xmit(bond, skb2, slave->dev);
3731 } 3776 }
3732 } 3777 }
@@ -3740,9 +3785,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
3740 3785
3741/*------------------------- Device initialization ---------------------------*/ 3786/*------------------------- Device initialization ---------------------------*/
3742 3787
3743/* 3788/* Lookup the slave that corresponds to a qid */
3744 * Lookup the slave that corresponds to a qid
3745 */
3746static inline int bond_slave_override(struct bonding *bond, 3789static inline int bond_slave_override(struct bonding *bond,
3747 struct sk_buff *skb) 3790 struct sk_buff *skb)
3748{ 3791{
@@ -3771,17 +3814,14 @@ static inline int bond_slave_override(struct bonding *bond,
3771static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, 3814static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
3772 void *accel_priv, select_queue_fallback_t fallback) 3815 void *accel_priv, select_queue_fallback_t fallback)
3773{ 3816{
3774 /* 3817 /* This helper function exists to help dev_pick_tx get the correct
3775 * This helper function exists to help dev_pick_tx get the correct
3776 * destination queue. Using a helper function skips a call to 3818 * destination queue. Using a helper function skips a call to
3777 * skb_tx_hash and will put the skbs in the queue we expect on their 3819 * skb_tx_hash and will put the skbs in the queue we expect on their
3778 * way down to the bonding driver. 3820 * way down to the bonding driver.
3779 */ 3821 */
3780 u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; 3822 u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
3781 3823
3782 /* 3824 /* Save the original txq to restore before passing to the driver */
3783 * Save the original txq to restore before passing to the driver
3784 */
3785 qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; 3825 qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
3786 3826
3787 if (unlikely(txq >= dev->real_num_tx_queues)) { 3827 if (unlikely(txq >= dev->real_num_tx_queues)) {
@@ -3805,12 +3845,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
3805 return bond_xmit_roundrobin(skb, dev); 3845 return bond_xmit_roundrobin(skb, dev);
3806 case BOND_MODE_ACTIVEBACKUP: 3846 case BOND_MODE_ACTIVEBACKUP:
3807 return bond_xmit_activebackup(skb, dev); 3847 return bond_xmit_activebackup(skb, dev);
3848 case BOND_MODE_8023AD:
3808 case BOND_MODE_XOR: 3849 case BOND_MODE_XOR:
3809 return bond_xmit_xor(skb, dev); 3850 return bond_3ad_xor_xmit(skb, dev);
3810 case BOND_MODE_BROADCAST: 3851 case BOND_MODE_BROADCAST:
3811 return bond_xmit_broadcast(skb, dev); 3852 return bond_xmit_broadcast(skb, dev);
3812 case BOND_MODE_8023AD:
3813 return bond_3ad_xmit_xor(skb, dev);
3814 case BOND_MODE_ALB: 3853 case BOND_MODE_ALB:
3815 return bond_alb_xmit(skb, dev); 3854 return bond_alb_xmit(skb, dev);
3816 case BOND_MODE_TLB: 3855 case BOND_MODE_TLB:
@@ -3829,8 +3868,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
3829 struct bonding *bond = netdev_priv(dev); 3868 struct bonding *bond = netdev_priv(dev);
3830 netdev_tx_t ret = NETDEV_TX_OK; 3869 netdev_tx_t ret = NETDEV_TX_OK;
3831 3870
3832 /* 3871 /* If we risk deadlock from transmitting this in the
3833 * If we risk deadlock from transmitting this in the
3834 * netpoll path, tell netpoll to queue the frame for later tx 3872 * netpoll path, tell netpoll to queue the frame for later tx
3835 */ 3873 */
3836 if (unlikely(is_netpoll_tx_blocked(dev))) 3874 if (unlikely(is_netpoll_tx_blocked(dev)))
@@ -3862,7 +3900,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev,
3862 * the true receive or transmit bandwidth (not all modes are symmetric) 3900 * the true receive or transmit bandwidth (not all modes are symmetric)
3863 * this is an accurate maximum. 3901 * this is an accurate maximum.
3864 */ 3902 */
3865 read_lock(&bond->lock);
3866 bond_for_each_slave(bond, slave, iter) { 3903 bond_for_each_slave(bond, slave, iter) {
3867 if (bond_slave_can_tx(slave)) { 3904 if (bond_slave_can_tx(slave)) {
3868 if (slave->speed != SPEED_UNKNOWN) 3905 if (slave->speed != SPEED_UNKNOWN)
@@ -3873,7 +3910,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev,
3873 } 3910 }
3874 } 3911 }
3875 ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN); 3912 ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN);
3876 read_unlock(&bond->lock);
3877 3913
3878 return 0; 3914 return 0;
3879} 3915}
@@ -3935,9 +3971,7 @@ void bond_setup(struct net_device *bond_dev)
3935{ 3971{
3936 struct bonding *bond = netdev_priv(bond_dev); 3972 struct bonding *bond = netdev_priv(bond_dev);
3937 3973
3938 /* initialize rwlocks */ 3974 spin_lock_init(&bond->mode_lock);
3939 rwlock_init(&bond->lock);
3940 rwlock_init(&bond->curr_slave_lock);
3941 bond->params = bonding_defaults; 3975 bond->params = bonding_defaults;
3942 3976
3943 /* Initialize pointers */ 3977 /* Initialize pointers */
@@ -3958,8 +3992,7 @@ void bond_setup(struct net_device *bond_dev)
3958 bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT; 3992 bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT;
3959 bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); 3993 bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
3960 3994
3961 /* don't acquire bond device's netif_tx_lock when 3995 /* don't acquire bond device's netif_tx_lock when transmitting */
3962 * transmitting */
3963 bond_dev->features |= NETIF_F_LLTX; 3996 bond_dev->features |= NETIF_F_LLTX;
3964 3997
3965 /* By default, we declare the bond to be fully 3998 /* By default, we declare the bond to be fully
@@ -3982,15 +4015,15 @@ void bond_setup(struct net_device *bond_dev)
3982 bond_dev->features |= bond_dev->hw_features; 4015 bond_dev->features |= bond_dev->hw_features;
3983} 4016}
3984 4017
3985/* 4018/* Destroy a bonding device.
3986* Destroy a bonding device. 4019 * Must be under rtnl_lock when this function is called.
3987* Must be under rtnl_lock when this function is called. 4020 */
3988*/
3989static void bond_uninit(struct net_device *bond_dev) 4021static void bond_uninit(struct net_device *bond_dev)
3990{ 4022{
3991 struct bonding *bond = netdev_priv(bond_dev); 4023 struct bonding *bond = netdev_priv(bond_dev);
3992 struct list_head *iter; 4024 struct list_head *iter;
3993 struct slave *slave; 4025 struct slave *slave;
4026 struct bond_up_slave *arr;
3994 4027
3995 bond_netpoll_cleanup(bond_dev); 4028 bond_netpoll_cleanup(bond_dev);
3996 4029
@@ -3999,6 +4032,12 @@ static void bond_uninit(struct net_device *bond_dev)
3999 __bond_release_one(bond_dev, slave->dev, true); 4032 __bond_release_one(bond_dev, slave->dev, true);
4000 netdev_info(bond_dev, "Released all slaves\n"); 4033 netdev_info(bond_dev, "Released all slaves\n");
4001 4034
4035 arr = rtnl_dereference(bond->slave_arr);
4036 if (arr) {
4037 RCU_INIT_POINTER(bond->slave_arr, NULL);
4038 kfree_rcu(arr, rcu);
4039 }
4040
4002 list_del(&bond->bond_list); 4041 list_del(&bond->bond_list);
4003 4042
4004 bond_debug_unregister(bond); 4043 bond_debug_unregister(bond);
@@ -4013,9 +4052,7 @@ static int bond_check_params(struct bond_params *params)
4013 const struct bond_opt_value *valptr; 4052 const struct bond_opt_value *valptr;
4014 int arp_all_targets_value; 4053 int arp_all_targets_value;
4015 4054
4016 /* 4055 /* Convert string parameters. */
4017 * Convert string parameters.
4018 */
4019 if (mode) { 4056 if (mode) {
4020 bond_opt_initstr(&newval, mode); 4057 bond_opt_initstr(&newval, mode);
4021 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); 4058 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval);
@@ -4192,9 +4229,9 @@ static int bond_check_params(struct bond_params *params)
4192 4229
4193 for (arp_ip_count = 0, i = 0; 4230 for (arp_ip_count = 0, i = 0;
4194 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) { 4231 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) {
4195 /* not complete check, but should be good enough to
4196 catch mistakes */
4197 __be32 ip; 4232 __be32 ip;
4233
4234 /* not a complete check, but good enough to catch mistakes */
4198 if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || 4235 if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) ||
4199 !bond_is_ip_target_ok(ip)) { 4236 !bond_is_ip_target_ok(ip)) {
4200 pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", 4237 pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n",
@@ -4377,26 +4414,14 @@ static void bond_set_lockdep_class(struct net_device *dev)
4377 dev->qdisc_tx_busylock = &bonding_tx_busylock_key; 4414 dev->qdisc_tx_busylock = &bonding_tx_busylock_key;
4378} 4415}
4379 4416
4380/* 4417/* Called from registration process */
4381 * Called from registration process
4382 */
4383static int bond_init(struct net_device *bond_dev) 4418static int bond_init(struct net_device *bond_dev)
4384{ 4419{
4385 struct bonding *bond = netdev_priv(bond_dev); 4420 struct bonding *bond = netdev_priv(bond_dev);
4386 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); 4421 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
4387 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
4388 4422
4389 netdev_dbg(bond_dev, "Begin bond_init\n"); 4423 netdev_dbg(bond_dev, "Begin bond_init\n");
4390 4424
4391 /*
4392 * Initialize locks that may be required during
4393 * en/deslave operations. All of the bond_open work
4394 * (of which this is part) should really be moved to
4395 * a phase prior to dev_open
4396 */
4397 spin_lock_init(&(bond_info->tx_hashtbl_lock));
4398 spin_lock_init(&(bond_info->rx_hashtbl_lock));
4399
4400 bond->wq = create_singlethread_workqueue(bond_dev->name); 4425 bond->wq = create_singlethread_workqueue(bond_dev->name);
4401 if (!bond->wq) 4426 if (!bond->wq)
4402 return -ENOMEM; 4427 return -ENOMEM;
@@ -4543,9 +4568,7 @@ static void __exit bonding_exit(void)
4543 unregister_pernet_subsys(&bond_net_ops); 4568 unregister_pernet_subsys(&bond_net_ops);
4544 4569
4545#ifdef CONFIG_NET_POLL_CONTROLLER 4570#ifdef CONFIG_NET_POLL_CONTROLLER
4546 /* 4571 /* Make sure we don't have an imbalance on our netpoll blocking */
4547 * Make sure we don't have an imbalance on our netpoll blocking
4548 */
4549 WARN_ON(atomic_read(&netpoll_block_tx)); 4572 WARN_ON(atomic_read(&netpoll_block_tx));
4550#endif 4573#endif
4551} 4574}
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index d163e112f04c..c13d83e15ace 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -96,6 +96,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
96 [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, 96 [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED },
97}; 97};
98 98
99static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
100 [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 },
101};
102
99static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) 103static int bond_validate(struct nlattr *tb[], struct nlattr *data[])
100{ 104{
101 if (tb[IFLA_ADDRESS]) { 105 if (tb[IFLA_ADDRESS]) {
@@ -107,6 +111,33 @@ static int bond_validate(struct nlattr *tb[], struct nlattr *data[])
107 return 0; 111 return 0;
108} 112}
109 113
114static int bond_slave_changelink(struct net_device *bond_dev,
115 struct net_device *slave_dev,
116 struct nlattr *tb[], struct nlattr *data[])
117{
118 struct bonding *bond = netdev_priv(bond_dev);
119 struct bond_opt_value newval;
120 int err;
121
122 if (!data)
123 return 0;
124
125 if (data[IFLA_BOND_SLAVE_QUEUE_ID]) {
126 u16 queue_id = nla_get_u16(data[IFLA_BOND_SLAVE_QUEUE_ID]);
127 char queue_id_str[IFNAMSIZ + 7];
128
129 /* queue_id option setting expects slave_name:queue_id */
130 snprintf(queue_id_str, sizeof(queue_id_str), "%s:%u\n",
131 slave_dev->name, queue_id);
132 bond_opt_initstr(&newval, queue_id_str);
133 err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval);
134 if (err)
135 return err;
136 }
137
138 return 0;
139}
140
110static int bond_changelink(struct net_device *bond_dev, 141static int bond_changelink(struct net_device *bond_dev,
111 struct nlattr *tb[], struct nlattr *data[]) 142 struct nlattr *tb[], struct nlattr *data[])
112{ 143{
@@ -412,6 +443,7 @@ static int bond_fill_info(struct sk_buff *skb,
412 unsigned int packets_per_slave; 443 unsigned int packets_per_slave;
413 int ifindex, i, targets_added; 444 int ifindex, i, targets_added;
414 struct nlattr *targets; 445 struct nlattr *targets;
446 struct slave *primary;
415 447
416 if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond))) 448 if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond)))
417 goto nla_put_failure; 449 goto nla_put_failure;
@@ -461,9 +493,9 @@ static int bond_fill_info(struct sk_buff *skb,
461 bond->params.arp_all_targets)) 493 bond->params.arp_all_targets))
462 goto nla_put_failure; 494 goto nla_put_failure;
463 495
464 if (bond->primary_slave && 496 primary = rtnl_dereference(bond->primary_slave);
465 nla_put_u32(skb, IFLA_BOND_PRIMARY, 497 if (primary &&
466 bond->primary_slave->dev->ifindex)) 498 nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex))
467 goto nla_put_failure; 499 goto nla_put_failure;
468 500
469 if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT, 501 if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT,
@@ -562,6 +594,9 @@ struct rtnl_link_ops bond_link_ops __read_mostly = {
562 .get_num_tx_queues = bond_get_num_tx_queues, 594 .get_num_tx_queues = bond_get_num_tx_queues,
563 .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number 595 .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number
564 as for TX queues */ 596 as for TX queues */
597 .slave_maxtype = IFLA_BOND_SLAVE_MAX,
598 .slave_policy = bond_slave_policy,
599 .slave_changelink = bond_slave_changelink,
565 .get_slave_size = bond_get_slave_size, 600 .get_slave_size = bond_get_slave_size,
566 .fill_slave_info = bond_fill_slave_info, 601 .fill_slave_info = bond_fill_slave_info,
567}; 602};
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index dc73463c2c23..b62697f4a3de 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -625,6 +625,8 @@ int __bond_opt_set(struct bonding *bond,
625out: 625out:
626 if (ret) 626 if (ret)
627 bond_opt_error_interpret(bond, opt, ret, val); 627 bond_opt_error_interpret(bond, opt, ret, val);
628 else
629 call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev);
628 630
629 return ret; 631 return ret;
630} 632}
@@ -732,15 +734,13 @@ static int bond_option_active_slave_set(struct bonding *bond,
732 } 734 }
733 735
734 block_netpoll_tx(); 736 block_netpoll_tx();
735 write_lock_bh(&bond->curr_slave_lock);
736
737 /* check to see if we are clearing active */ 737 /* check to see if we are clearing active */
738 if (!slave_dev) { 738 if (!slave_dev) {
739 netdev_info(bond->dev, "Clearing current active slave\n"); 739 netdev_info(bond->dev, "Clearing current active slave\n");
740 RCU_INIT_POINTER(bond->curr_active_slave, NULL); 740 RCU_INIT_POINTER(bond->curr_active_slave, NULL);
741 bond_select_active_slave(bond); 741 bond_select_active_slave(bond);
742 } else { 742 } else {
743 struct slave *old_active = bond_deref_active_protected(bond); 743 struct slave *old_active = rtnl_dereference(bond->curr_active_slave);
744 struct slave *new_active = bond_slave_get_rtnl(slave_dev); 744 struct slave *new_active = bond_slave_get_rtnl(slave_dev);
745 745
746 BUG_ON(!new_active); 746 BUG_ON(!new_active);
@@ -763,8 +763,6 @@ static int bond_option_active_slave_set(struct bonding *bond,
763 } 763 }
764 } 764 }
765 } 765 }
766
767 write_unlock_bh(&bond->curr_slave_lock);
768 unblock_netpoll_tx(); 766 unblock_netpoll_tx();
769 767
770 return ret; 768 return ret;
@@ -953,14 +951,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
953 951
954static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) 952static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
955{ 953{
956 int ret; 954 return _bond_option_arp_ip_target_add(bond, target);
957
958 /* not to race with bond_arp_rcv */
959 write_lock_bh(&bond->lock);
960 ret = _bond_option_arp_ip_target_add(bond, target);
961 write_unlock_bh(&bond->lock);
962
963 return ret;
964} 955}
965 956
966static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target) 957static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
@@ -989,9 +980,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
989 980
990 netdev_info(bond->dev, "Removing ARP target %pI4\n", &target); 981 netdev_info(bond->dev, "Removing ARP target %pI4\n", &target);
991 982
992 /* not to race with bond_arp_rcv */
993 write_lock_bh(&bond->lock);
994
995 bond_for_each_slave(bond, slave, iter) { 983 bond_for_each_slave(bond, slave, iter) {
996 targets_rx = slave->target_last_arp_rx; 984 targets_rx = slave->target_last_arp_rx;
997 for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++) 985 for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++)
@@ -1002,8 +990,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
1002 targets[i] = targets[i+1]; 990 targets[i] = targets[i+1];
1003 targets[i] = 0; 991 targets[i] = 0;
1004 992
1005 write_unlock_bh(&bond->lock);
1006
1007 return 0; 993 return 0;
1008} 994}
1009 995
@@ -1011,11 +997,8 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond)
1011{ 997{
1012 int i; 998 int i;
1013 999
1014 /* not to race with bond_arp_rcv */
1015 write_lock_bh(&bond->lock);
1016 for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) 1000 for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
1017 _bond_options_arp_ip_target_set(bond, i, 0, 0); 1001 _bond_options_arp_ip_target_set(bond, i, 0, 0);
1018 write_unlock_bh(&bond->lock);
1019} 1002}
1020 1003
1021static int bond_option_arp_ip_targets_set(struct bonding *bond, 1004static int bond_option_arp_ip_targets_set(struct bonding *bond,
@@ -1079,8 +1062,6 @@ static int bond_option_primary_set(struct bonding *bond,
1079 struct slave *slave; 1062 struct slave *slave;
1080 1063
1081 block_netpoll_tx(); 1064 block_netpoll_tx();
1082 read_lock(&bond->lock);
1083 write_lock_bh(&bond->curr_slave_lock);
1084 1065
1085 p = strchr(primary, '\n'); 1066 p = strchr(primary, '\n');
1086 if (p) 1067 if (p)
@@ -1088,7 +1069,7 @@ static int bond_option_primary_set(struct bonding *bond,
1088 /* check to see if we are clearing primary */ 1069 /* check to see if we are clearing primary */
1089 if (!strlen(primary)) { 1070 if (!strlen(primary)) {
1090 netdev_info(bond->dev, "Setting primary slave to None\n"); 1071 netdev_info(bond->dev, "Setting primary slave to None\n");
1091 bond->primary_slave = NULL; 1072 RCU_INIT_POINTER(bond->primary_slave, NULL);
1092 memset(bond->params.primary, 0, sizeof(bond->params.primary)); 1073 memset(bond->params.primary, 0, sizeof(bond->params.primary));
1093 bond_select_active_slave(bond); 1074 bond_select_active_slave(bond);
1094 goto out; 1075 goto out;
@@ -1098,16 +1079,16 @@ static int bond_option_primary_set(struct bonding *bond,
1098 if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) { 1079 if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) {
1099 netdev_info(bond->dev, "Setting %s as primary slave\n", 1080 netdev_info(bond->dev, "Setting %s as primary slave\n",
1100 slave->dev->name); 1081 slave->dev->name);
1101 bond->primary_slave = slave; 1082 rcu_assign_pointer(bond->primary_slave, slave);
1102 strcpy(bond->params.primary, slave->dev->name); 1083 strcpy(bond->params.primary, slave->dev->name);
1103 bond_select_active_slave(bond); 1084 bond_select_active_slave(bond);
1104 goto out; 1085 goto out;
1105 } 1086 }
1106 } 1087 }
1107 1088
1108 if (bond->primary_slave) { 1089 if (rtnl_dereference(bond->primary_slave)) {
1109 netdev_info(bond->dev, "Setting primary slave to None\n"); 1090 netdev_info(bond->dev, "Setting primary slave to None\n");
1110 bond->primary_slave = NULL; 1091 RCU_INIT_POINTER(bond->primary_slave, NULL);
1111 bond_select_active_slave(bond); 1092 bond_select_active_slave(bond);
1112 } 1093 }
1113 strncpy(bond->params.primary, primary, IFNAMSIZ); 1094 strncpy(bond->params.primary, primary, IFNAMSIZ);
@@ -1117,8 +1098,6 @@ static int bond_option_primary_set(struct bonding *bond,
1117 primary, bond->dev->name); 1098 primary, bond->dev->name);
1118 1099
1119out: 1100out:
1120 write_unlock_bh(&bond->curr_slave_lock);
1121 read_unlock(&bond->lock);
1122 unblock_netpoll_tx(); 1101 unblock_netpoll_tx();
1123 1102
1124 return 0; 1103 return 0;
@@ -1132,9 +1111,7 @@ static int bond_option_primary_reselect_set(struct bonding *bond,
1132 bond->params.primary_reselect = newval->value; 1111 bond->params.primary_reselect = newval->value;
1133 1112
1134 block_netpoll_tx(); 1113 block_netpoll_tx();
1135 write_lock_bh(&bond->curr_slave_lock);
1136 bond_select_active_slave(bond); 1114 bond_select_active_slave(bond);
1137 write_unlock_bh(&bond->curr_slave_lock);
1138 unblock_netpoll_tx(); 1115 unblock_netpoll_tx();
1139 1116
1140 return 0; 1117 return 0;
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index de62c0385dfb..a3948f8d1e53 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -7,21 +7,18 @@
7 7
8static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 8static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
9 __acquires(RCU) 9 __acquires(RCU)
10 __acquires(&bond->lock)
11{ 10{
12 struct bonding *bond = seq->private; 11 struct bonding *bond = seq->private;
13 struct list_head *iter; 12 struct list_head *iter;
14 struct slave *slave; 13 struct slave *slave;
15 loff_t off = 0; 14 loff_t off = 0;
16 15
17 /* make sure the bond won't be taken away */
18 rcu_read_lock(); 16 rcu_read_lock();
19 read_lock(&bond->lock);
20 17
21 if (*pos == 0) 18 if (*pos == 0)
22 return SEQ_START_TOKEN; 19 return SEQ_START_TOKEN;
23 20
24 bond_for_each_slave(bond, slave, iter) 21 bond_for_each_slave_rcu(bond, slave, iter)
25 if (++off == *pos) 22 if (++off == *pos)
26 return slave; 23 return slave;
27 24
@@ -37,12 +34,9 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
37 34
38 ++*pos; 35 ++*pos;
39 if (v == SEQ_START_TOKEN) 36 if (v == SEQ_START_TOKEN)
40 return bond_first_slave(bond); 37 return bond_first_slave_rcu(bond);
41 38
42 if (bond_is_last_slave(bond, v)) 39 bond_for_each_slave_rcu(bond, slave, iter) {
43 return NULL;
44
45 bond_for_each_slave(bond, slave, iter) {
46 if (found) 40 if (found)
47 return slave; 41 return slave;
48 if (slave == v) 42 if (slave == v)
@@ -53,12 +47,8 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
53} 47}
54 48
55static void bond_info_seq_stop(struct seq_file *seq, void *v) 49static void bond_info_seq_stop(struct seq_file *seq, void *v)
56 __releases(&bond->lock)
57 __releases(RCU) 50 __releases(RCU)
58{ 51{
59 struct bonding *bond = seq->private;
60
61 read_unlock(&bond->lock);
62 rcu_read_unlock(); 52 rcu_read_unlock();
63} 53}
64 54
@@ -66,7 +56,7 @@ static void bond_info_show_master(struct seq_file *seq)
66{ 56{
67 struct bonding *bond = seq->private; 57 struct bonding *bond = seq->private;
68 const struct bond_opt_value *optval; 58 const struct bond_opt_value *optval;
69 struct slave *curr; 59 struct slave *curr, *primary;
70 int i; 60 int i;
71 61
72 curr = rcu_dereference(bond->curr_active_slave); 62 curr = rcu_dereference(bond->curr_active_slave);
@@ -83,8 +73,7 @@ static void bond_info_show_master(struct seq_file *seq)
83 73
84 seq_printf(seq, "\n"); 74 seq_printf(seq, "\n");
85 75
86 if (BOND_MODE(bond) == BOND_MODE_XOR || 76 if (bond_mode_uses_xmit_hash(bond)) {
87 BOND_MODE(bond) == BOND_MODE_8023AD) {
88 optval = bond_opt_get_val(BOND_OPT_XMIT_HASH, 77 optval = bond_opt_get_val(BOND_OPT_XMIT_HASH,
89 bond->params.xmit_policy); 78 bond->params.xmit_policy);
90 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", 79 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
@@ -92,10 +81,10 @@ static void bond_info_show_master(struct seq_file *seq)
92 } 81 }
93 82
94 if (bond_uses_primary(bond)) { 83 if (bond_uses_primary(bond)) {
84 primary = rcu_dereference(bond->primary_slave);
95 seq_printf(seq, "Primary Slave: %s", 85 seq_printf(seq, "Primary Slave: %s",
96 (bond->primary_slave) ? 86 primary ? primary->dev->name : "None");
97 bond->primary_slave->dev->name : "None"); 87 if (primary) {
98 if (bond->primary_slave) {
99 optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, 88 optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT,
100 bond->params.primary_reselect); 89 bond->params.primary_reselect);
101 seq_printf(seq, " (primary_reselect %s)", 90 seq_printf(seq, " (primary_reselect %s)",
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 98db8edd9c75..8ffbafd500fd 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -91,7 +91,6 @@ static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifna
91 * creates and deletes entire bonds. 91 * creates and deletes entire bonds.
92 * 92 *
93 * The class parameter is ignored. 93 * The class parameter is ignored.
94 *
95 */ 94 */
96static ssize_t bonding_store_bonds(struct class *cls, 95static ssize_t bonding_store_bonds(struct class *cls,
97 struct class_attribute *attr, 96 struct class_attribute *attr,
@@ -425,11 +424,15 @@ static ssize_t bonding_show_primary(struct device *d,
425 struct device_attribute *attr, 424 struct device_attribute *attr,
426 char *buf) 425 char *buf)
427{ 426{
428 int count = 0;
429 struct bonding *bond = to_bond(d); 427 struct bonding *bond = to_bond(d);
428 struct slave *primary;
429 int count = 0;
430 430
431 if (bond->primary_slave) 431 rcu_read_lock();
432 count = sprintf(buf, "%s\n", bond->primary_slave->dev->name); 432 primary = rcu_dereference(bond->primary_slave);
433 if (primary)
434 count = sprintf(buf, "%s\n", primary->dev->name);
435 rcu_read_unlock();
433 436
434 return count; 437 return count;
435} 438}
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index aace510d08d1..10920f0686e2 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -24,6 +24,7 @@
24#include <linux/inetdevice.h> 24#include <linux/inetdevice.h>
25#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
26#include <linux/reciprocal_div.h> 26#include <linux/reciprocal_div.h>
27#include <linux/if_link.h>
27 28
28#include "bond_3ad.h" 29#include "bond_3ad.h"
29#include "bond_alb.h" 30#include "bond_alb.h"
@@ -83,7 +84,7 @@
83 * @pos: current slave 84 * @pos: current slave
84 * @iter: list_head * iterator 85 * @iter: list_head * iterator
85 * 86 *
86 * Caller must hold bond->lock 87 * Caller must hold RTNL
87 */ 88 */
88#define bond_for_each_slave(bond, pos, iter) \ 89#define bond_for_each_slave(bond, pos, iter) \
89 netdev_for_each_lower_private((bond)->dev, pos, iter) 90 netdev_for_each_lower_private((bond)->dev, pos, iter)
@@ -175,6 +176,13 @@ struct slave {
175 struct netpoll *np; 176 struct netpoll *np;
176#endif 177#endif
177 struct kobject kobj; 178 struct kobject kobj;
179 struct rtnl_link_stats64 slave_stats;
180};
181
182struct bond_up_slave {
183 unsigned int count;
184 struct rcu_head rcu;
185 struct slave *arr[0];
178}; 186};
179 187
180/* 188/*
@@ -184,24 +192,26 @@ struct slave {
184 192
185/* 193/*
186 * Here are the locking policies for the two bonding locks: 194 * Here are the locking policies for the two bonding locks:
187 * 195 * Get rcu_read_lock when reading or RTNL when writing slave list.
188 * 1) Get bond->lock when reading/writing slave list.
189 * 2) Get bond->curr_slave_lock when reading/writing bond->curr_active_slave.
190 * (It is unnecessary when the write-lock is put with bond->lock.)
191 * 3) When we lock with bond->curr_slave_lock, we must lock with bond->lock
192 * beforehand.
193 */ 196 */
194struct bonding { 197struct bonding {
195 struct net_device *dev; /* first - useful for panic debug */ 198 struct net_device *dev; /* first - useful for panic debug */
196 struct slave __rcu *curr_active_slave; 199 struct slave __rcu *curr_active_slave;
197 struct slave __rcu *current_arp_slave; 200 struct slave __rcu *current_arp_slave;
198 struct slave *primary_slave; 201 struct slave __rcu *primary_slave;
202 struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
199 bool force_primary; 203 bool force_primary;
200 s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ 204 s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
201 int (*recv_probe)(const struct sk_buff *, struct bonding *, 205 int (*recv_probe)(const struct sk_buff *, struct bonding *,
202 struct slave *); 206 struct slave *);
203 rwlock_t lock; 207 /* mode_lock is used for mode-specific locking needs, currently used by:
204 rwlock_t curr_slave_lock; 208 * 3ad mode (4) - protect against running bond_3ad_unbind_slave() and
209 * bond_3ad_state_machine_handler() concurrently and also
210 * the access to the state machine shared variables.
211 * TLB mode (5) - to sync the use and modifications of its hash table
212 * ALB mode (6) - to sync the use and modifications of its hash table
213 */
214 spinlock_t mode_lock;
205 u8 send_peer_notif; 215 u8 send_peer_notif;
206 u8 igmp_retrans; 216 u8 igmp_retrans;
207#ifdef CONFIG_PROC_FS 217#ifdef CONFIG_PROC_FS
@@ -219,10 +229,12 @@ struct bonding {
219 struct delayed_work alb_work; 229 struct delayed_work alb_work;
220 struct delayed_work ad_work; 230 struct delayed_work ad_work;
221 struct delayed_work mcast_work; 231 struct delayed_work mcast_work;
232 struct delayed_work slave_arr_work;
222#ifdef CONFIG_DEBUG_FS 233#ifdef CONFIG_DEBUG_FS
223 /* debugging support via debugfs */ 234 /* debugging support via debugfs */
224 struct dentry *debug_dir; 235 struct dentry *debug_dir;
225#endif /* CONFIG_DEBUG_FS */ 236#endif /* CONFIG_DEBUG_FS */
237 struct rtnl_link_stats64 bond_stats;
226}; 238};
227 239
228#define bond_slave_get_rcu(dev) \ 240#define bond_slave_get_rcu(dev) \
@@ -231,10 +243,6 @@ struct bonding {
231#define bond_slave_get_rtnl(dev) \ 243#define bond_slave_get_rtnl(dev) \
232 ((struct slave *) rtnl_dereference(dev->rx_handler_data)) 244 ((struct slave *) rtnl_dereference(dev->rx_handler_data))
233 245
234#define bond_deref_active_protected(bond) \
235 rcu_dereference_protected(bond->curr_active_slave, \
236 lockdep_is_held(&bond->curr_slave_lock))
237
238struct bond_vlan_tag { 246struct bond_vlan_tag {
239 __be16 vlan_proto; 247 __be16 vlan_proto;
240 unsigned short vlan_id; 248 unsigned short vlan_id;
@@ -274,6 +282,13 @@ static inline bool bond_is_nondyn_tlb(const struct bonding *bond)
274 (bond->params.tlb_dynamic_lb == 0); 282 (bond->params.tlb_dynamic_lb == 0);
275} 283}
276 284
285static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond)
286{
287 return (BOND_MODE(bond) == BOND_MODE_8023AD ||
288 BOND_MODE(bond) == BOND_MODE_XOR ||
289 bond_is_nondyn_tlb(bond));
290}
291
277static inline bool bond_mode_uses_arp(int mode) 292static inline bool bond_mode_uses_arp(int mode)
278{ 293{
279 return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB && 294 return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB &&
@@ -527,6 +542,8 @@ const char *bond_slave_link_status(s8 link);
527struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, 542struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
528 struct net_device *end_dev, 543 struct net_device *end_dev,
529 int level); 544 int level);
545int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
546void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
530 547
531#ifdef CONFIG_PROC_FS 548#ifdef CONFIG_PROC_FS
532void bond_create_proc_entry(struct bonding *bond); 549void bond_create_proc_entry(struct bonding *bond);