aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahesh Bandewar <maheshb@google.com>2014-10-04 20:45:01 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-06 17:13:07 -0400
commitee6377147409a00c071b2da853059a7d59979fbc (patch)
tree82df6bc2bc00711351463081a13785c04b45f773
parentd7021325a2ea5aaf4458097341c988f9dc93491f (diff)
bonding: Simplify the xmit function for modes that use xmit_hash
Earlier change to use usable slave array for TLB mode had an additional performance advantage. So extending the same logic to all other modes that use xmit-hash for slave selection (viz 802.3AD, and XOR modes). Also consolidating this with the earlier TLB change. The main idea is to build the usable slaves array in the control path and use that array for slave selection during xmit operation. Measured performance in a setup with a bond of 4x1G NICs with 200 instances of netperf for the modes involved (3ad, xor, tlb) cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5 Mode TPS-Before TPS-After 802.3ad : 468,694 493,101 TLB (lb=0): 392,583 392,965 XOR : 475,696 484,517 Signed-off-by: Mahesh Bandewar <maheshb@google.com> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_3ad.c140
-rw-r--r--drivers/net/bonding/bond_alb.c51
-rw-r--r--drivers/net/bonding/bond_alb.h8
-rw-r--r--drivers/net/bonding/bond_main.c192
-rw-r--r--drivers/net/bonding/bonding.h10
5 files changed, 249 insertions, 152 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 7e9e522fd476..2110215f3528 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
102/* ================= main 802.3ad protocol functions ================== */ 102/* ================= main 802.3ad protocol functions ================== */
103static int ad_lacpdu_send(struct port *port); 103static int ad_lacpdu_send(struct port *port);
104static int ad_marker_send(struct port *port, struct bond_marker *marker); 104static int ad_marker_send(struct port *port, struct bond_marker *marker);
105static void ad_mux_machine(struct port *port); 105static void ad_mux_machine(struct port *port, bool *update_slave_arr);
106static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); 106static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
107static void ad_tx_machine(struct port *port); 107static void ad_tx_machine(struct port *port);
108static void ad_periodic_machine(struct port *port); 108static void ad_periodic_machine(struct port *port);
109static void ad_port_selection_logic(struct port *port); 109static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
110static void ad_agg_selection_logic(struct aggregator *aggregator); 110static void ad_agg_selection_logic(struct aggregator *aggregator,
111 bool *update_slave_arr);
111static void ad_clear_agg(struct aggregator *aggregator); 112static void ad_clear_agg(struct aggregator *aggregator);
112static void ad_initialize_agg(struct aggregator *aggregator); 113static void ad_initialize_agg(struct aggregator *aggregator);
113static void ad_initialize_port(struct port *port, int lacp_fast); 114static void ad_initialize_port(struct port *port, int lacp_fast);
114static void ad_enable_collecting_distributing(struct port *port); 115static void ad_enable_collecting_distributing(struct port *port,
115static void ad_disable_collecting_distributing(struct port *port); 116 bool *update_slave_arr);
117static void ad_disable_collecting_distributing(struct port *port,
118 bool *update_slave_arr);
116static void ad_marker_info_received(struct bond_marker *marker_info, 119static void ad_marker_info_received(struct bond_marker *marker_info,
117 struct port *port); 120 struct port *port);
118static void ad_marker_response_received(struct bond_marker *marker, 121static void ad_marker_response_received(struct bond_marker *marker,
@@ -796,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
796/** 799/**
797 * ad_mux_machine - handle a port's mux state machine 800 * ad_mux_machine - handle a port's mux state machine
798 * @port: the port we're looking at 801 * @port: the port we're looking at
802 * @update_slave_arr: Does slave array need update?
799 */ 803 */
800static void ad_mux_machine(struct port *port) 804static void ad_mux_machine(struct port *port, bool *update_slave_arr)
801{ 805{
802 mux_states_t last_state; 806 mux_states_t last_state;
803 807
@@ -901,7 +905,8 @@ static void ad_mux_machine(struct port *port)
901 switch (port->sm_mux_state) { 905 switch (port->sm_mux_state) {
902 case AD_MUX_DETACHED: 906 case AD_MUX_DETACHED:
903 port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; 907 port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
904 ad_disable_collecting_distributing(port); 908 ad_disable_collecting_distributing(port,
909 update_slave_arr);
905 port->actor_oper_port_state &= ~AD_STATE_COLLECTING; 910 port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
906 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; 911 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
907 port->ntt = true; 912 port->ntt = true;
@@ -913,13 +918,15 @@ static void ad_mux_machine(struct port *port)
913 port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; 918 port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
914 port->actor_oper_port_state &= ~AD_STATE_COLLECTING; 919 port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
915 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; 920 port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
916 ad_disable_collecting_distributing(port); 921 ad_disable_collecting_distributing(port,
922 update_slave_arr);
917 port->ntt = true; 923 port->ntt = true;
918 break; 924 break;
919 case AD_MUX_COLLECTING_DISTRIBUTING: 925 case AD_MUX_COLLECTING_DISTRIBUTING:
920 port->actor_oper_port_state |= AD_STATE_COLLECTING; 926 port->actor_oper_port_state |= AD_STATE_COLLECTING;
921 port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; 927 port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
922 ad_enable_collecting_distributing(port); 928 ad_enable_collecting_distributing(port,
929 update_slave_arr);
923 port->ntt = true; 930 port->ntt = true;
924 break; 931 break;
925 default: 932 default:
@@ -1187,12 +1194,13 @@ static void ad_periodic_machine(struct port *port)
1187/** 1194/**
1188 * ad_port_selection_logic - select aggregation groups 1195 * ad_port_selection_logic - select aggregation groups
1189 * @port: the port we're looking at 1196 * @port: the port we're looking at
1197 * @update_slave_arr: Does slave array need update?
1190 * 1198 *
1191 * Select aggregation groups, and assign each port for it's aggregetor. The 1199 * Select aggregation groups, and assign each port for it's aggregetor. The
1192 * selection logic is called in the inititalization (after all the handshkes), 1200 * selection logic is called in the inititalization (after all the handshkes),
1193 * and after every lacpdu receive (if selected is off). 1201 * and after every lacpdu receive (if selected is off).
1194 */ 1202 */
1195static void ad_port_selection_logic(struct port *port) 1203static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
1196{ 1204{
1197 struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; 1205 struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;
1198 struct port *last_port = NULL, *curr_port; 1206 struct port *last_port = NULL, *curr_port;
@@ -1347,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port)
1347 __agg_ports_are_ready(port->aggregator)); 1355 __agg_ports_are_ready(port->aggregator));
1348 1356
1349 aggregator = __get_first_agg(port); 1357 aggregator = __get_first_agg(port);
1350 ad_agg_selection_logic(aggregator); 1358 ad_agg_selection_logic(aggregator, update_slave_arr);
1351} 1359}
1352 1360
1353/* Decide if "agg" is a better choice for the new active aggregator that 1361/* Decide if "agg" is a better choice for the new active aggregator that
@@ -1435,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg)
1435/** 1443/**
1436 * ad_agg_selection_logic - select an aggregation group for a team 1444 * ad_agg_selection_logic - select an aggregation group for a team
1437 * @aggregator: the aggregator we're looking at 1445 * @aggregator: the aggregator we're looking at
1446 * @update_slave_arr: Does slave array need update?
1438 * 1447 *
1439 * It is assumed that only one aggregator may be selected for a team. 1448 * It is assumed that only one aggregator may be selected for a team.
1440 * 1449 *
@@ -1457,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg)
1457 * __get_active_agg() won't work correctly. This function should be better 1466 * __get_active_agg() won't work correctly. This function should be better
1458 * called with the bond itself, and retrieve the first agg from it. 1467 * called with the bond itself, and retrieve the first agg from it.
1459 */ 1468 */
1460static void ad_agg_selection_logic(struct aggregator *agg) 1469static void ad_agg_selection_logic(struct aggregator *agg,
1470 bool *update_slave_arr)
1461{ 1471{
1462 struct aggregator *best, *active, *origin; 1472 struct aggregator *best, *active, *origin;
1463 struct bonding *bond = agg->slave->bond; 1473 struct bonding *bond = agg->slave->bond;
@@ -1550,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
1550 __disable_port(port); 1560 __disable_port(port);
1551 } 1561 }
1552 } 1562 }
1563 /* Slave array needs update. */
1564 *update_slave_arr = true;
1553 } 1565 }
1554 1566
1555 /* if the selected aggregator is of join individuals 1567 /* if the selected aggregator is of join individuals
@@ -1678,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
1678/** 1690/**
1679 * ad_enable_collecting_distributing - enable a port's transmit/receive 1691 * ad_enable_collecting_distributing - enable a port's transmit/receive
1680 * @port: the port we're looking at 1692 * @port: the port we're looking at
1693 * @update_slave_arr: Does slave array need update?
1681 * 1694 *
1682 * Enable @port if it's in an active aggregator 1695 * Enable @port if it's in an active aggregator
1683 */ 1696 */
1684static void ad_enable_collecting_distributing(struct port *port) 1697static void ad_enable_collecting_distributing(struct port *port,
1698 bool *update_slave_arr)
1685{ 1699{
1686 if (port->aggregator->is_active) { 1700 if (port->aggregator->is_active) {
1687 pr_debug("Enabling port %d(LAG %d)\n", 1701 pr_debug("Enabling port %d(LAG %d)\n",
1688 port->actor_port_number, 1702 port->actor_port_number,
1689 port->aggregator->aggregator_identifier); 1703 port->aggregator->aggregator_identifier);
1690 __enable_port(port); 1704 __enable_port(port);
1705 /* Slave array needs update */
1706 *update_slave_arr = true;
1691 } 1707 }
1692} 1708}
1693 1709
1694/** 1710/**
1695 * ad_disable_collecting_distributing - disable a port's transmit/receive 1711 * ad_disable_collecting_distributing - disable a port's transmit/receive
1696 * @port: the port we're looking at 1712 * @port: the port we're looking at
1713 * @update_slave_arr: Does slave array need update?
1697 */ 1714 */
1698static void ad_disable_collecting_distributing(struct port *port) 1715static void ad_disable_collecting_distributing(struct port *port,
1716 bool *update_slave_arr)
1699{ 1717{
1700 if (port->aggregator && 1718 if (port->aggregator &&
1701 !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system), 1719 !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system),
@@ -1704,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port)
1704 port->actor_port_number, 1722 port->actor_port_number,
1705 port->aggregator->aggregator_identifier); 1723 port->aggregator->aggregator_identifier);
1706 __disable_port(port); 1724 __disable_port(port);
1725 /* Slave array needs an update */
1726 *update_slave_arr = true;
1707 } 1727 }
1708} 1728}
1709 1729
@@ -1868,6 +1888,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
1868 struct bonding *bond = slave->bond; 1888 struct bonding *bond = slave->bond;
1869 struct slave *slave_iter; 1889 struct slave *slave_iter;
1870 struct list_head *iter; 1890 struct list_head *iter;
1891 bool dummy_slave_update; /* Ignore this value as caller updates array */
1871 1892
1872 /* Sync against bond_3ad_state_machine_handler() */ 1893 /* Sync against bond_3ad_state_machine_handler() */
1873 spin_lock_bh(&bond->mode_lock); 1894 spin_lock_bh(&bond->mode_lock);
@@ -1951,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
1951 ad_clear_agg(aggregator); 1972 ad_clear_agg(aggregator);
1952 1973
1953 if (select_new_active_agg) 1974 if (select_new_active_agg)
1954 ad_agg_selection_logic(__get_first_agg(port)); 1975 ad_agg_selection_logic(__get_first_agg(port),
1976 &dummy_slave_update);
1955 } else { 1977 } else {
1956 netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); 1978 netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n");
1957 } 1979 }
@@ -1966,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
1966 /* select new active aggregator */ 1988 /* select new active aggregator */
1967 temp_aggregator = __get_first_agg(port); 1989 temp_aggregator = __get_first_agg(port);
1968 if (temp_aggregator) 1990 if (temp_aggregator)
1969 ad_agg_selection_logic(temp_aggregator); 1991 ad_agg_selection_logic(temp_aggregator,
1992 &dummy_slave_update);
1970 } 1993 }
1971 } 1994 }
1972 } 1995 }
@@ -1996,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
1996 if (select_new_active_agg) { 2019 if (select_new_active_agg) {
1997 netdev_info(bond->dev, "Removing an active aggregator\n"); 2020 netdev_info(bond->dev, "Removing an active aggregator\n");
1998 /* select new active aggregator */ 2021 /* select new active aggregator */
1999 ad_agg_selection_logic(__get_first_agg(port)); 2022 ad_agg_selection_logic(__get_first_agg(port),
2023 &dummy_slave_update);
2000 } 2024 }
2001 } 2025 }
2002 break; 2026 break;
@@ -2031,6 +2055,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2031 struct slave *slave; 2055 struct slave *slave;
2032 struct port *port; 2056 struct port *port;
2033 bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; 2057 bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
2058 bool update_slave_arr = false;
2034 2059
2035 /* Lock to protect data accessed by all (e.g., port->sm_vars) and 2060 /* Lock to protect data accessed by all (e.g., port->sm_vars) and
2036 * against running with bond_3ad_unbind_slave. ad_rx_machine may run 2061 * against running with bond_3ad_unbind_slave. ad_rx_machine may run
@@ -2058,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2058 } 2083 }
2059 2084
2060 aggregator = __get_first_agg(port); 2085 aggregator = __get_first_agg(port);
2061 ad_agg_selection_logic(aggregator); 2086 ad_agg_selection_logic(aggregator, &update_slave_arr);
2062 } 2087 }
2063 bond_3ad_set_carrier(bond); 2088 bond_3ad_set_carrier(bond);
2064 } 2089 }
@@ -2074,8 +2099,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2074 2099
2075 ad_rx_machine(NULL, port); 2100 ad_rx_machine(NULL, port);
2076 ad_periodic_machine(port); 2101 ad_periodic_machine(port);
2077 ad_port_selection_logic(port); 2102 ad_port_selection_logic(port, &update_slave_arr);
2078 ad_mux_machine(port); 2103 ad_mux_machine(port, &update_slave_arr);
2079 ad_tx_machine(port); 2104 ad_tx_machine(port);
2080 2105
2081 /* turn off the BEGIN bit, since we already handled it */ 2106 /* turn off the BEGIN bit, since we already handled it */
@@ -2093,6 +2118,9 @@ re_arm:
2093 rcu_read_unlock(); 2118 rcu_read_unlock();
2094 spin_unlock_bh(&bond->mode_lock); 2119 spin_unlock_bh(&bond->mode_lock);
2095 2120
2121 if (update_slave_arr)
2122 bond_slave_arr_work_rearm(bond, 0);
2123
2096 if (should_notify_rtnl && rtnl_trylock()) { 2124 if (should_notify_rtnl && rtnl_trylock()) {
2097 bond_slave_state_notify(bond); 2125 bond_slave_state_notify(bond);
2098 rtnl_unlock(); 2126 rtnl_unlock();
@@ -2283,6 +2311,11 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
2283 port->sm_vars |= AD_PORT_BEGIN; 2311 port->sm_vars |= AD_PORT_BEGIN;
2284 2312
2285 spin_unlock_bh(&slave->bond->mode_lock); 2313 spin_unlock_bh(&slave->bond->mode_lock);
2314
2315 /* RTNL is held and mode_lock is released so it's safe
2316 * to update slave_array here.
2317 */
2318 bond_update_slave_arr(slave->bond, NULL);
2286} 2319}
2287 2320
2288/** 2321/**
@@ -2377,73 +2410,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
2377 return ret; 2410 return ret;
2378} 2411}
2379 2412
2380int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
2381{
2382 struct bonding *bond = netdev_priv(dev);
2383 struct slave *slave, *first_ok_slave;
2384 struct aggregator *agg;
2385 struct ad_info ad_info;
2386 struct list_head *iter;
2387 int slaves_in_agg;
2388 int slave_agg_no;
2389 int agg_id;
2390
2391 if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
2392 netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
2393 goto err_free;
2394 }
2395
2396 slaves_in_agg = ad_info.ports;
2397 agg_id = ad_info.aggregator_id;
2398
2399 if (slaves_in_agg == 0) {
2400 netdev_dbg(dev, "active aggregator is empty\n");
2401 goto err_free;
2402 }
2403
2404 slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
2405 first_ok_slave = NULL;
2406
2407 bond_for_each_slave_rcu(bond, slave, iter) {
2408 agg = SLAVE_AD_INFO(slave)->port.aggregator;
2409 if (!agg || agg->aggregator_identifier != agg_id)
2410 continue;
2411
2412 if (slave_agg_no >= 0) {
2413 if (!first_ok_slave && bond_slave_can_tx(slave))
2414 first_ok_slave = slave;
2415 slave_agg_no--;
2416 continue;
2417 }
2418
2419 if (bond_slave_can_tx(slave)) {
2420 bond_dev_queue_xmit(bond, skb, slave->dev);
2421 goto out;
2422 }
2423 }
2424
2425 if (slave_agg_no >= 0) {
2426 netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
2427 agg_id);
2428 goto err_free;
2429 }
2430
2431 /* we couldn't find any suitable slave after the agg_no, so use the
2432 * first suitable found, if found.
2433 */
2434 if (first_ok_slave)
2435 bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
2436 else
2437 goto err_free;
2438
2439out:
2440 return NETDEV_TX_OK;
2441err_free:
2442 /* no suitable interface, frame not sent */
2443 dev_kfree_skb_any(skb);
2444 goto out;
2445}
2446
2447int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, 2413int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
2448 struct slave *slave) 2414 struct slave *slave)
2449{ 2415{
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 615f3bebd019..d2eadab787c5 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -177,7 +177,6 @@ static int tlb_initialize(struct bonding *bond)
177static void tlb_deinitialize(struct bonding *bond) 177static void tlb_deinitialize(struct bonding *bond)
178{ 178{
179 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 179 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
180 struct tlb_up_slave *arr;
181 180
182 spin_lock_bh(&bond->mode_lock); 181 spin_lock_bh(&bond->mode_lock);
183 182
@@ -185,10 +184,6 @@ static void tlb_deinitialize(struct bonding *bond)
185 bond_info->tx_hashtbl = NULL; 184 bond_info->tx_hashtbl = NULL;
186 185
187 spin_unlock_bh(&bond->mode_lock); 186 spin_unlock_bh(&bond->mode_lock);
188
189 arr = rtnl_dereference(bond_info->slave_arr);
190 if (arr)
191 kfree_rcu(arr, rcu);
192} 187}
193 188
194static long long compute_gap(struct slave *slave) 189static long long compute_gap(struct slave *slave)
@@ -1336,39 +1331,9 @@ out:
1336 return NETDEV_TX_OK; 1331 return NETDEV_TX_OK;
1337} 1332}
1338 1333
1339static int bond_tlb_update_slave_arr(struct bonding *bond,
1340 struct slave *skipslave)
1341{
1342 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1343 struct slave *tx_slave;
1344 struct list_head *iter;
1345 struct tlb_up_slave *new_arr, *old_arr;
1346
1347 new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
1348 GFP_ATOMIC);
1349 if (!new_arr)
1350 return -ENOMEM;
1351
1352 bond_for_each_slave(bond, tx_slave, iter) {
1353 if (!bond_slave_can_tx(tx_slave))
1354 continue;
1355 if (skipslave == tx_slave)
1356 continue;
1357 new_arr->arr[new_arr->count++] = tx_slave;
1358 }
1359
1360 old_arr = rtnl_dereference(bond_info->slave_arr);
1361 rcu_assign_pointer(bond_info->slave_arr, new_arr);
1362 if (old_arr)
1363 kfree_rcu(old_arr, rcu);
1364
1365 return 0;
1366}
1367
1368int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1334int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1369{ 1335{
1370 struct bonding *bond = netdev_priv(bond_dev); 1336 struct bonding *bond = netdev_priv(bond_dev);
1371 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1372 struct ethhdr *eth_data; 1337 struct ethhdr *eth_data;
1373 struct slave *tx_slave = NULL; 1338 struct slave *tx_slave = NULL;
1374 u32 hash_index; 1339 u32 hash_index;
@@ -1389,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1389 hash_index & 0xFF, 1354 hash_index & 0xFF,
1390 skb->len); 1355 skb->len);
1391 } else { 1356 } else {
1392 struct tlb_up_slave *slaves; 1357 struct bond_up_slave *slaves;
1358 unsigned int count;
1393 1359
1394 slaves = rcu_dereference(bond_info->slave_arr); 1360 slaves = rcu_dereference(bond->slave_arr);
1395 if (slaves && slaves->count) 1361 count = slaves ? ACCESS_ONCE(slaves->count) : 0;
1362 if (likely(count))
1396 tx_slave = slaves->arr[hash_index % 1363 tx_slave = slaves->arr[hash_index %
1397 slaves->count]; 1364 count];
1398 } 1365 }
1399 break; 1366 break;
1400 } 1367 }
@@ -1641,10 +1608,6 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
1641 rlb_clear_slave(bond, slave); 1608 rlb_clear_slave(bond, slave);
1642 } 1609 }
1643 1610
1644 if (bond_is_nondyn_tlb(bond))
1645 if (bond_tlb_update_slave_arr(bond, slave))
1646 pr_err("Failed to build slave-array for TLB mode.\n");
1647
1648} 1611}
1649 1612
1650void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) 1613void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
@@ -1669,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
1669 } 1632 }
1670 1633
1671 if (bond_is_nondyn_tlb(bond)) { 1634 if (bond_is_nondyn_tlb(bond)) {
1672 if (bond_tlb_update_slave_arr(bond, NULL)) 1635 if (bond_update_slave_arr(bond, NULL))
1673 pr_err("Failed to build slave-array for TLB mode.\n"); 1636 pr_err("Failed to build slave-array for TLB mode.\n");
1674 } 1637 }
1675} 1638}
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 3c6a7ff974d7..1ad473b4ade5 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -139,19 +139,11 @@ struct tlb_slave_info {
139 */ 139 */
140}; 140};
141 141
142struct tlb_up_slave {
143 unsigned int count;
144 struct rcu_head rcu;
145 struct slave *arr[0];
146};
147
148struct alb_bond_info { 142struct alb_bond_info {
149 struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ 143 struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
150 u32 unbalanced_load; 144 u32 unbalanced_load;
151 int tx_rebalance_counter; 145 int tx_rebalance_counter;
152 int lp_counter; 146 int lp_counter;
153 /* -------- non-dynamic tlb mode only ---------*/
154 struct tlb_up_slave __rcu *slave_arr; /* Up slaves */
155 /* -------- rlb parameters -------- */ 147 /* -------- rlb parameters -------- */
156 int rlb_enabled; 148 int rlb_enabled;
157 struct rlb_client_info *rx_hashtbl; /* Receive hash table */ 149 struct rlb_client_info *rx_hashtbl; /* Receive hash table */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c2adc2755ff6..3ad5413d4f57 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -210,6 +210,7 @@ static int bond_init(struct net_device *bond_dev);
210static void bond_uninit(struct net_device *bond_dev); 210static void bond_uninit(struct net_device *bond_dev);
211static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, 211static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
212 struct rtnl_link_stats64 *stats); 212 struct rtnl_link_stats64 *stats);
213static void bond_slave_arr_handler(struct work_struct *work);
213 214
214/*---------------------------- General routines -----------------------------*/ 215/*---------------------------- General routines -----------------------------*/
215 216
@@ -1551,6 +1552,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1551 unblock_netpoll_tx(); 1552 unblock_netpoll_tx();
1552 } 1553 }
1553 1554
1555 if (bond_mode_uses_xmit_hash(bond))
1556 bond_update_slave_arr(bond, NULL);
1557
1554 netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", 1558 netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
1555 slave_dev->name, 1559 slave_dev->name,
1556 bond_is_active_slave(new_slave) ? "an active" : "a backup", 1560 bond_is_active_slave(new_slave) ? "an active" : "a backup",
@@ -1668,6 +1672,9 @@ static int __bond_release_one(struct net_device *bond_dev,
1668 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1672 if (BOND_MODE(bond) == BOND_MODE_8023AD)
1669 bond_3ad_unbind_slave(slave); 1673 bond_3ad_unbind_slave(slave);
1670 1674
1675 if (bond_mode_uses_xmit_hash(bond))
1676 bond_update_slave_arr(bond, slave);
1677
1671 netdev_info(bond_dev, "Releasing %s interface %s\n", 1678 netdev_info(bond_dev, "Releasing %s interface %s\n",
1672 bond_is_active_slave(slave) ? "active" : "backup", 1679 bond_is_active_slave(slave) ? "active" : "backup",
1673 slave_dev->name); 1680 slave_dev->name);
@@ -1970,6 +1977,9 @@ static void bond_miimon_commit(struct bonding *bond)
1970 bond_alb_handle_link_change(bond, slave, 1977 bond_alb_handle_link_change(bond, slave,
1971 BOND_LINK_UP); 1978 BOND_LINK_UP);
1972 1979
1980 if (BOND_MODE(bond) == BOND_MODE_XOR)
1981 bond_update_slave_arr(bond, NULL);
1982
1973 if (!bond->curr_active_slave || slave == primary) 1983 if (!bond->curr_active_slave || slave == primary)
1974 goto do_failover; 1984 goto do_failover;
1975 1985
@@ -1997,6 +2007,9 @@ static void bond_miimon_commit(struct bonding *bond)
1997 bond_alb_handle_link_change(bond, slave, 2007 bond_alb_handle_link_change(bond, slave,
1998 BOND_LINK_DOWN); 2008 BOND_LINK_DOWN);
1999 2009
2010 if (BOND_MODE(bond) == BOND_MODE_XOR)
2011 bond_update_slave_arr(bond, NULL);
2012
2000 if (slave == rcu_access_pointer(bond->curr_active_slave)) 2013 if (slave == rcu_access_pointer(bond->curr_active_slave))
2001 goto do_failover; 2014 goto do_failover;
2002 2015
@@ -2453,6 +2466,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
2453 2466
2454 if (slave_state_changed) { 2467 if (slave_state_changed) {
2455 bond_slave_state_change(bond); 2468 bond_slave_state_change(bond);
2469 if (BOND_MODE(bond) == BOND_MODE_XOR)
2470 bond_update_slave_arr(bond, NULL);
2456 } else if (do_failover) { 2471 } else if (do_failover) {
2457 block_netpoll_tx(); 2472 block_netpoll_tx();
2458 bond_select_active_slave(bond); 2473 bond_select_active_slave(bond);
@@ -2829,8 +2844,20 @@ static int bond_slave_netdev_event(unsigned long event,
2829 if (old_duplex != slave->duplex) 2844 if (old_duplex != slave->duplex)
2830 bond_3ad_adapter_duplex_changed(slave); 2845 bond_3ad_adapter_duplex_changed(slave);
2831 } 2846 }
2847 /* Refresh slave-array if applicable!
2848 * If the setup does not use miimon or arpmon (mode-specific!),
2849 * then these events will not cause the slave-array to be
2850 * refreshed. This will cause xmit to use a slave that is not
2851 * usable. Avoid such situation by refeshing the array at these
2852 * events. If these (miimon/arpmon) parameters are configured
2853 * then array gets refreshed twice and that should be fine!
2854 */
2855 if (bond_mode_uses_xmit_hash(bond))
2856 bond_update_slave_arr(bond, NULL);
2832 break; 2857 break;
2833 case NETDEV_DOWN: 2858 case NETDEV_DOWN:
2859 if (bond_mode_uses_xmit_hash(bond))
2860 bond_update_slave_arr(bond, NULL);
2834 break; 2861 break;
2835 case NETDEV_CHANGEMTU: 2862 case NETDEV_CHANGEMTU:
2836 /* TODO: Should slaves be allowed to 2863 /* TODO: Should slaves be allowed to
@@ -3010,6 +3037,7 @@ static void bond_work_init_all(struct bonding *bond)
3010 else 3037 else
3011 INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); 3038 INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
3012 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 3039 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
3040 INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
3013} 3041}
3014 3042
3015static void bond_work_cancel_all(struct bonding *bond) 3043static void bond_work_cancel_all(struct bonding *bond)
@@ -3019,6 +3047,7 @@ static void bond_work_cancel_all(struct bonding *bond)
3019 cancel_delayed_work_sync(&bond->alb_work); 3047 cancel_delayed_work_sync(&bond->alb_work);
3020 cancel_delayed_work_sync(&bond->ad_work); 3048 cancel_delayed_work_sync(&bond->ad_work);
3021 cancel_delayed_work_sync(&bond->mcast_work); 3049 cancel_delayed_work_sync(&bond->mcast_work);
3050 cancel_delayed_work_sync(&bond->slave_arr_work);
3022} 3051}
3023 3052
3024static int bond_open(struct net_device *bond_dev) 3053static int bond_open(struct net_device *bond_dev)
@@ -3068,6 +3097,9 @@ static int bond_open(struct net_device *bond_dev)
3068 bond_3ad_initiate_agg_selection(bond, 1); 3097 bond_3ad_initiate_agg_selection(bond, 1);
3069 } 3098 }
3070 3099
3100 if (bond_mode_uses_xmit_hash(bond))
3101 bond_update_slave_arr(bond, NULL);
3102
3071 return 0; 3103 return 0;
3072} 3104}
3073 3105
@@ -3573,20 +3605,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
3573 return NETDEV_TX_OK; 3605 return NETDEV_TX_OK;
3574} 3606}
3575 3607
3576/* In bond_xmit_xor() , we determine the output device by using a pre- 3608/* Use this to update slave_array when (a) it's not appropriate to update
3577 * determined xmit_hash_policy(), If the selected device is not enabled, 3609 * slave_array right away (note that update_slave_array() may sleep)
3578 * find the next active slave. 3610 * and / or (b) RTNL is not held.
3579 */ 3611 */
3580static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 3612void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay)
3581{ 3613{
3582 struct bonding *bond = netdev_priv(bond_dev); 3614 queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
3583 int slave_cnt = ACCESS_ONCE(bond->slave_cnt); 3615}
3584 3616
3585 if (likely(slave_cnt)) 3617/* Slave array work handler. Holds only RTNL */
3586 bond_xmit_slave_id(bond, skb, 3618static void bond_slave_arr_handler(struct work_struct *work)
3587 bond_xmit_hash(bond, skb) % slave_cnt); 3619{
3588 else 3620 struct bonding *bond = container_of(work, struct bonding,
3621 slave_arr_work.work);
3622 int ret;
3623
3624 if (!rtnl_trylock())
3625 goto err;
3626
3627 ret = bond_update_slave_arr(bond, NULL);
3628 rtnl_unlock();
3629 if (ret) {
3630 pr_warn_ratelimited("Failed to update slave array from WT\n");
3631 goto err;
3632 }
3633 return;
3634
3635err:
3636 bond_slave_arr_work_rearm(bond, 1);
3637}
3638
3639/* Build the usable slaves array in control path for modes that use xmit-hash
3640 * to determine the slave interface -
3641 * (a) BOND_MODE_8023AD
3642 * (b) BOND_MODE_XOR
3643 * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
3644 *
3645 * The caller is expected to hold RTNL only and NO other lock!
3646 */
3647int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
3648{
3649 struct slave *slave;
3650 struct list_head *iter;
3651 struct bond_up_slave *new_arr, *old_arr;
3652 int slaves_in_agg;
3653 int agg_id = 0;
3654 int ret = 0;
3655
3656#ifdef CONFIG_LOCKDEP
3657 WARN_ON(lockdep_is_held(&bond->mode_lock));
3658#endif
3659
3660 new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
3661 GFP_KERNEL);
3662 if (!new_arr) {
3663 ret = -ENOMEM;
3664 pr_err("Failed to build slave-array.\n");
3665 goto out;
3666 }
3667 if (BOND_MODE(bond) == BOND_MODE_8023AD) {
3668 struct ad_info ad_info;
3669
3670 if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
3671 pr_debug("bond_3ad_get_active_agg_info failed\n");
3672 kfree_rcu(new_arr, rcu);
3673 /* No active aggragator means it's not safe to use
3674 * the previous array.
3675 */
3676 old_arr = rtnl_dereference(bond->slave_arr);
3677 if (old_arr) {
3678 RCU_INIT_POINTER(bond->slave_arr, NULL);
3679 kfree_rcu(old_arr, rcu);
3680 }
3681 goto out;
3682 }
3683 slaves_in_agg = ad_info.ports;
3684 agg_id = ad_info.aggregator_id;
3685 }
3686 bond_for_each_slave(bond, slave, iter) {
3687 if (BOND_MODE(bond) == BOND_MODE_8023AD) {
3688 struct aggregator *agg;
3689
3690 agg = SLAVE_AD_INFO(slave)->port.aggregator;
3691 if (!agg || agg->aggregator_identifier != agg_id)
3692 continue;
3693 }
3694 if (!bond_slave_can_tx(slave))
3695 continue;
3696 if (skipslave == slave)
3697 continue;
3698 new_arr->arr[new_arr->count++] = slave;
3699 }
3700
3701 old_arr = rtnl_dereference(bond->slave_arr);
3702 rcu_assign_pointer(bond->slave_arr, new_arr);
3703 if (old_arr)
3704 kfree_rcu(old_arr, rcu);
3705out:
3706 if (ret != 0 && skipslave) {
3707 int idx;
3708
3709 /* Rare situation where caller has asked to skip a specific
3710 * slave but allocation failed (most likely!). BTW this is
3711 * only possible when the call is initiated from
3712 * __bond_release_one(). In this situation; overwrite the
3713 * skipslave entry in the array with the last entry from the
3714 * array to avoid a situation where the xmit path may choose
3715 * this to-be-skipped slave to send a packet out.
3716 */
3717 old_arr = rtnl_dereference(bond->slave_arr);
3718 for (idx = 0; idx < old_arr->count; idx++) {
3719 if (skipslave == old_arr->arr[idx]) {
3720 old_arr->arr[idx] =
3721 old_arr->arr[old_arr->count-1];
3722 old_arr->count--;
3723 break;
3724 }
3725 }
3726 }
3727 return ret;
3728}
3729
3730/* Use this Xmit function for 3AD as well as XOR modes. The current
3731 * usable slave array is formed in the control path. The xmit function
3732 * just calculates hash and sends the packet out.
3733 */
3734int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
3735{
3736 struct bonding *bond = netdev_priv(dev);
3737 struct slave *slave;
3738 struct bond_up_slave *slaves;
3739 unsigned int count;
3740
3741 slaves = rcu_dereference(bond->slave_arr);
3742 count = slaves ? ACCESS_ONCE(slaves->count) : 0;
3743 if (likely(count)) {
3744 slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
3745 bond_dev_queue_xmit(bond, skb, slave->dev);
3746 } else {
3589 dev_kfree_skb_any(skb); 3747 dev_kfree_skb_any(skb);
3748 atomic_long_inc(&dev->tx_dropped);
3749 }
3590 3750
3591 return NETDEV_TX_OK; 3751 return NETDEV_TX_OK;
3592} 3752}
@@ -3682,12 +3842,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
3682 return bond_xmit_roundrobin(skb, dev); 3842 return bond_xmit_roundrobin(skb, dev);
3683 case BOND_MODE_ACTIVEBACKUP: 3843 case BOND_MODE_ACTIVEBACKUP:
3684 return bond_xmit_activebackup(skb, dev); 3844 return bond_xmit_activebackup(skb, dev);
3845 case BOND_MODE_8023AD:
3685 case BOND_MODE_XOR: 3846 case BOND_MODE_XOR:
3686 return bond_xmit_xor(skb, dev); 3847 return bond_3ad_xor_xmit(skb, dev);
3687 case BOND_MODE_BROADCAST: 3848 case BOND_MODE_BROADCAST:
3688 return bond_xmit_broadcast(skb, dev); 3849 return bond_xmit_broadcast(skb, dev);
3689 case BOND_MODE_8023AD:
3690 return bond_3ad_xmit_xor(skb, dev);
3691 case BOND_MODE_ALB: 3850 case BOND_MODE_ALB:
3692 return bond_alb_xmit(skb, dev); 3851 return bond_alb_xmit(skb, dev);
3693 case BOND_MODE_TLB: 3852 case BOND_MODE_TLB:
@@ -3861,6 +4020,7 @@ static void bond_uninit(struct net_device *bond_dev)
3861 struct bonding *bond = netdev_priv(bond_dev); 4020 struct bonding *bond = netdev_priv(bond_dev);
3862 struct list_head *iter; 4021 struct list_head *iter;
3863 struct slave *slave; 4022 struct slave *slave;
4023 struct bond_up_slave *arr;
3864 4024
3865 bond_netpoll_cleanup(bond_dev); 4025 bond_netpoll_cleanup(bond_dev);
3866 4026
@@ -3869,6 +4029,12 @@ static void bond_uninit(struct net_device *bond_dev)
3869 __bond_release_one(bond_dev, slave->dev, true); 4029 __bond_release_one(bond_dev, slave->dev, true);
3870 netdev_info(bond_dev, "Released all slaves\n"); 4030 netdev_info(bond_dev, "Released all slaves\n");
3871 4031
4032 arr = rtnl_dereference(bond->slave_arr);
4033 if (arr) {
4034 RCU_INIT_POINTER(bond->slave_arr, NULL);
4035 kfree_rcu(arr, rcu);
4036 }
4037
3872 list_del(&bond->bond_list); 4038 list_del(&bond->bond_list);
3873 4039
3874 bond_debug_unregister(bond); 4040 bond_debug_unregister(bond);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 5b022da9cad2..10920f0686e2 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -179,6 +179,12 @@ struct slave {
179 struct rtnl_link_stats64 slave_stats; 179 struct rtnl_link_stats64 slave_stats;
180}; 180};
181 181
182struct bond_up_slave {
183 unsigned int count;
184 struct rcu_head rcu;
185 struct slave *arr[0];
186};
187
182/* 188/*
183 * Link pseudo-state only used internally by monitors 189 * Link pseudo-state only used internally by monitors
184 */ 190 */
@@ -193,6 +199,7 @@ struct bonding {
193 struct slave __rcu *curr_active_slave; 199 struct slave __rcu *curr_active_slave;
194 struct slave __rcu *current_arp_slave; 200 struct slave __rcu *current_arp_slave;
195 struct slave __rcu *primary_slave; 201 struct slave __rcu *primary_slave;
202 struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
196 bool force_primary; 203 bool force_primary;
197 s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ 204 s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
198 int (*recv_probe)(const struct sk_buff *, struct bonding *, 205 int (*recv_probe)(const struct sk_buff *, struct bonding *,
@@ -222,6 +229,7 @@ struct bonding {
222 struct delayed_work alb_work; 229 struct delayed_work alb_work;
223 struct delayed_work ad_work; 230 struct delayed_work ad_work;
224 struct delayed_work mcast_work; 231 struct delayed_work mcast_work;
232 struct delayed_work slave_arr_work;
225#ifdef CONFIG_DEBUG_FS 233#ifdef CONFIG_DEBUG_FS
226 /* debugging support via debugfs */ 234 /* debugging support via debugfs */
227 struct dentry *debug_dir; 235 struct dentry *debug_dir;
@@ -534,6 +542,8 @@ const char *bond_slave_link_status(s8 link);
534struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, 542struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
535 struct net_device *end_dev, 543 struct net_device *end_dev,
536 int level); 544 int level);
545int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
546void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
537 547
538#ifdef CONFIG_PROC_FS 548#ifdef CONFIG_PROC_FS
539void bond_create_proc_entry(struct bonding *bond); 549void bond_create_proc_entry(struct bonding *bond);