aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Gospodarek <andy@greyhouse.net>2010-06-02 04:40:18 -0400
committerDavid S. Miller <davem@davemloft.net>2010-06-05 05:23:17 -0400
commitbb1d912323d5dd50e1079e389f4e964be14f0ae3 (patch)
tree7d20fff2e63bc6add251a56625110257d3ccc45f
parentebd8e4977a87cb81d93c62a9bff0102a9713722f (diff)
bonding: allow user-controlled output slave selection
v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/bonding.txt84
-rw-r--r--drivers/net/bonding/bond_main.c75
-rw-r--r--drivers/net/bonding/bond_sysfs.c116
-rw-r--r--drivers/net/bonding/bonding.h9
-rw-r--r--include/linux/if_bonding.h1
5 files changed, 278 insertions, 7 deletions
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 61f516b135b4..d0914781830e 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -49,6 +49,7 @@ Table of Contents
493.3 Configuring Bonding Manually with Ifenslave 493.3 Configuring Bonding Manually with Ifenslave
503.3.1 Configuring Multiple Bonds Manually 503.3.1 Configuring Multiple Bonds Manually
513.4 Configuring Bonding Manually via Sysfs 513.4 Configuring Bonding Manually via Sysfs
523.5 Overriding Configuration for Special Cases
52 53
534. Querying Bonding Configuration 544. Querying Bonding Configuration
544.1 Bonding Configuration 554.1 Bonding Configuration
@@ -1318,8 +1319,87 @@ echo 2000 > /sys/class/net/bond1/bonding/arp_interval
1318echo +eth2 > /sys/class/net/bond1/bonding/slaves 1319echo +eth2 > /sys/class/net/bond1/bonding/slaves
1319echo +eth3 > /sys/class/net/bond1/bonding/slaves 1320echo +eth3 > /sys/class/net/bond1/bonding/slaves
1320 1321
1321 13223.5 Overriding Configuration for Special Cases
13224. Querying Bonding Configuration 1323----------------------------------------------
1324When using the bonding driver, the physical port which transmits a frame is
1325typically selected by the bonding driver, and is not relevant to the user or
1326system administrator. The output port is simply selected using the policies of
1327the selected bonding mode. On occasion however, it is helpful to direct certain
1328classes of traffic to certain physical interfaces on output to implement
1329slightly more complex policies. For example, to reach a web server over a
1330bonded interface in which eth0 connects to a private network, while eth1
1331connects via a public network, it may be desirous to bias the bond to send said
1332traffic over eth0 first, using eth1 only as a fall back, while all other traffic
1333can safely be sent over either interface. Such configurations may be achieved
1334using the traffic control utilities inherent in linux.
1335
1336By default the bonding driver is multiqueue aware and 16 queues are created
1337when the driver initializes (see Documentation/networking/multiqueue.txt
1338for details). If more or less queues are desired the module parameter
1339tx_queues can be used to change this value. There is no sysfs parameter
1340available as the allocation is done at module init time.
1341
1342The output of the file /proc/net/bonding/bondX has changed so the output Queue
1343ID is now printed for each slave:
1344
1345Bonding Mode: fault-tolerance (active-backup)
1346Primary Slave: None
1347Currently Active Slave: eth0
1348MII Status: up
1349MII Polling Interval (ms): 0
1350Up Delay (ms): 0
1351Down Delay (ms): 0
1352
1353Slave Interface: eth0
1354MII Status: up
1355Link Failure Count: 0
1356Permanent HW addr: 00:1a:a0:12:8f:cb
1357Slave queue ID: 0
1358
1359Slave Interface: eth1
1360MII Status: up
1361Link Failure Count: 0
1362Permanent HW addr: 00:1a:a0:12:8f:cc
1363Slave queue ID: 2
1364
1365The queue_id for a slave can be set using the command:
1366
1367# echo "eth1:2" > /sys/class/net/bond0/bonding/queue_id
1368
1369Any interface that needs a queue_id set should set it with multiple calls
1370like the one above until proper priorities are set for all interfaces. On
1371distributions that allow configuration via initscripts, multiple 'queue_id'
1372arguments can be added to BONDING_OPTS to set all needed slave queues.
1373
1374These queue id's can be used in conjunction with the tc utility to configure
1375a multiqueue qdisc and filters to bias certain traffic to transmit on certain
1376slave devices. For instance, say we wanted, in the above configuration to
1377force all traffic bound to 192.168.1.100 to use eth1 in the bond as its output
1378device. The following commands would accomplish this:
1379
1380# tc qdisc add dev bond0 handle 1 root multiq
1381
1382# tc filter add dev bond0 protocol ip parent 1: prio 1 u32 match ip dst \
1383 192.168.1.100 action skbedit queue_mapping 2
1384
1385These commands tell the kernel to attach a multiqueue queue discipline to the
1386bond0 interface and filter traffic enqueued to it, such that packets with a dst
1387ip of 192.168.1.100 have their output queue mapping value overwritten to 2.
1388This value is then passed into the driver, causing the normal output path
1389selection policy to be overridden, selecting instead qid 2, which maps to eth1.
1390
1391Note that qid values begin at 1. Qid 0 is reserved to initiate to the driver
1392that normal output policy selection should take place. One benefit to simply
1393leaving the qid for a slave to 0 is the multiqueue awareness in the bonding
1394driver that is now present. This awareness allows tc filters to be placed on
1395slave devices as well as bond devices and the bonding driver will simply act as
1396a pass-through for selecting output queues on the slave device rather than
1397output port selection.
1398
1399This feature first appeared in bonding driver version 3.7.0 and support for
1400output slave selection was limited to round-robin and active-backup modes.
1401
14024 Querying Bonding Configuration
1323================================= 1403=================================
1324 1404
13254.1 Bonding Configuration 14054.1 Bonding Configuration
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f22f6bf43858..1b19276cff12 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -90,6 +90,7 @@
90#define BOND_LINK_ARP_INTERV 0 90#define BOND_LINK_ARP_INTERV 0
91 91
92static int max_bonds = BOND_DEFAULT_MAX_BONDS; 92static int max_bonds = BOND_DEFAULT_MAX_BONDS;
93static int tx_queues = BOND_DEFAULT_TX_QUEUES;
93static int num_grat_arp = 1; 94static int num_grat_arp = 1;
94static int num_unsol_na = 1; 95static int num_unsol_na = 1;
95static int miimon = BOND_LINK_MON_INTERV; 96static int miimon = BOND_LINK_MON_INTERV;
@@ -111,6 +112,8 @@ static struct bond_params bonding_defaults;
111 112
112module_param(max_bonds, int, 0); 113module_param(max_bonds, int, 0);
113MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 114MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
115module_param(tx_queues, int, 0);
116MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
114module_param(num_grat_arp, int, 0644); 117module_param(num_grat_arp, int, 0644);
115MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event"); 118MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
116module_param(num_unsol_na, int, 0644); 119module_param(num_unsol_na, int, 0644);
@@ -1540,6 +1543,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1540 goto err_undo_flags; 1543 goto err_undo_flags;
1541 } 1544 }
1542 1545
1546 /*
1547 * Set the new_slave's queue_id to be zero. Queue ID mapping
1548 * is set via sysfs or module option if desired.
1549 */
1550 new_slave->queue_id = 0;
1551
1543 /* Save slave's original mtu and then set it to match the bond */ 1552 /* Save slave's original mtu and then set it to match the bond */
1544 new_slave->original_mtu = slave_dev->mtu; 1553 new_slave->original_mtu = slave_dev->mtu;
1545 res = dev_set_mtu(slave_dev, bond->dev->mtu); 1554 res = dev_set_mtu(slave_dev, bond->dev->mtu);
@@ -3285,6 +3294,7 @@ static void bond_info_show_slave(struct seq_file *seq,
3285 else 3294 else
3286 seq_puts(seq, "Aggregator ID: N/A\n"); 3295 seq_puts(seq, "Aggregator ID: N/A\n");
3287 } 3296 }
3297 seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id);
3288} 3298}
3289 3299
3290static int bond_info_seq_show(struct seq_file *seq, void *v) 3300static int bond_info_seq_show(struct seq_file *seq, void *v)
@@ -4421,9 +4431,59 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
4421 } 4431 }
4422} 4432}
4423 4433
4434/*
4435 * Lookup the slave that corresponds to a qid
4436 */
4437static inline int bond_slave_override(struct bonding *bond,
4438 struct sk_buff *skb)
4439{
4440 int i, res = 1;
4441 struct slave *slave = NULL;
4442 struct slave *check_slave;
4443
4444 read_lock(&bond->lock);
4445
4446 if (!BOND_IS_OK(bond) || !skb->queue_mapping)
4447 goto out;
4448
4449 /* Find out if any slaves have the same mapping as this skb. */
4450 bond_for_each_slave(bond, check_slave, i) {
4451 if (check_slave->queue_id == skb->queue_mapping) {
4452 slave = check_slave;
4453 break;
4454 }
4455 }
4456
4457 /* If the slave isn't UP, use default transmit policy. */
4458 if (slave && slave->queue_id && IS_UP(slave->dev) &&
4459 (slave->link == BOND_LINK_UP)) {
4460 res = bond_dev_queue_xmit(bond, skb, slave->dev);
4461 }
4462
4463out:
4464 read_unlock(&bond->lock);
4465 return res;
4466}
4467
4468static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
4469{
4470 /*
4471 * This helper function exists to help dev_pick_tx get the correct
4472 * destination queue. Using a helper function skips the a call to
4473 * skb_tx_hash and will put the skbs in the queue we expect on their
4474 * way down to the bonding driver.
4475 */
4476 return skb->queue_mapping;
4477}
4478
4424static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 4479static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
4425{ 4480{
4426 const struct bonding *bond = netdev_priv(dev); 4481 struct bonding *bond = netdev_priv(dev);
4482
4483 if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
4484 if (!bond_slave_override(bond, skb))
4485 return NETDEV_TX_OK;
4486 }
4427 4487
4428 switch (bond->params.mode) { 4488 switch (bond->params.mode) {
4429 case BOND_MODE_ROUNDROBIN: 4489 case BOND_MODE_ROUNDROBIN:
@@ -4508,6 +4568,7 @@ static const struct net_device_ops bond_netdev_ops = {
4508 .ndo_open = bond_open, 4568 .ndo_open = bond_open,
4509 .ndo_stop = bond_close, 4569 .ndo_stop = bond_close,
4510 .ndo_start_xmit = bond_start_xmit, 4570 .ndo_start_xmit = bond_start_xmit,
4571 .ndo_select_queue = bond_select_queue,
4511 .ndo_get_stats = bond_get_stats, 4572 .ndo_get_stats = bond_get_stats,
4512 .ndo_do_ioctl = bond_do_ioctl, 4573 .ndo_do_ioctl = bond_do_ioctl,
4513 .ndo_set_multicast_list = bond_set_multicast_list, 4574 .ndo_set_multicast_list = bond_set_multicast_list,
@@ -4776,6 +4837,13 @@ static int bond_check_params(struct bond_params *params)
4776 } 4837 }
4777 } 4838 }
4778 4839
4840 if (tx_queues < 1 || tx_queues > 255) {
4841 pr_warning("Warning: tx_queues (%d) should be between "
4842 "1 and 255, resetting to %d\n",
4843 tx_queues, BOND_DEFAULT_TX_QUEUES);
4844 tx_queues = BOND_DEFAULT_TX_QUEUES;
4845 }
4846
4779 if ((all_slaves_active != 0) && (all_slaves_active != 1)) { 4847 if ((all_slaves_active != 0) && (all_slaves_active != 1)) {
4780 pr_warning("Warning: all_slaves_active module parameter (%d), " 4848 pr_warning("Warning: all_slaves_active module parameter (%d), "
4781 "not of valid value (0/1), so it was set to " 4849 "not of valid value (0/1), so it was set to "
@@ -4953,6 +5021,7 @@ static int bond_check_params(struct bond_params *params)
4953 params->primary[0] = 0; 5021 params->primary[0] = 0;
4954 params->primary_reselect = primary_reselect_value; 5022 params->primary_reselect = primary_reselect_value;
4955 params->fail_over_mac = fail_over_mac_value; 5023 params->fail_over_mac = fail_over_mac_value;
5024 params->tx_queues = tx_queues;
4956 params->all_slaves_active = all_slaves_active; 5025 params->all_slaves_active = all_slaves_active;
4957 5026
4958 if (primary) { 5027 if (primary) {
@@ -5040,8 +5109,8 @@ int bond_create(struct net *net, const char *name)
5040 5109
5041 rtnl_lock(); 5110 rtnl_lock();
5042 5111
5043 bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "", 5112 bond_dev = alloc_netdev_mq(sizeof(struct bonding), name ? name : "",
5044 bond_setup); 5113 bond_setup, tx_queues);
5045 if (!bond_dev) { 5114 if (!bond_dev) {
5046 pr_err("%s: eek! can't alloc netdev!\n", name); 5115 pr_err("%s: eek! can't alloc netdev!\n", name);
5047 rtnl_unlock(); 5116 rtnl_unlock();
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 066311a5e084..f9a034361a8e 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1412,6 +1412,121 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,
1412static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL); 1412static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
1413 1413
1414/* 1414/*
1415 * Show the queue_ids of the slaves in the current bond.
1416 */
1417static ssize_t bonding_show_queue_id(struct device *d,
1418 struct device_attribute *attr,
1419 char *buf)
1420{
1421 struct slave *slave;
1422 int i, res = 0;
1423 struct bonding *bond = to_bond(d);
1424
1425 if (!rtnl_trylock())
1426 return restart_syscall();
1427
1428 read_lock(&bond->lock);
1429 bond_for_each_slave(bond, slave, i) {
1430 if (res > (PAGE_SIZE - 6)) {
1431 /* not enough space for another interface name */
1432 if ((PAGE_SIZE - res) > 10)
1433 res = PAGE_SIZE - 10;
1434 res += sprintf(buf + res, "++more++ ");
1435 break;
1436 }
1437 res += sprintf(buf + res, "%s:%d ",
1438 slave->dev->name, slave->queue_id);
1439 }
1440 read_unlock(&bond->lock);
1441 if (res)
1442 buf[res-1] = '\n'; /* eat the leftover space */
1443 rtnl_unlock();
1444 return res;
1445}
1446
1447/*
1448 * Set the queue_ids of the slaves in the current bond. The bond
1449 * interface must be enslaved for this to work.
1450 */
1451static ssize_t bonding_store_queue_id(struct device *d,
1452 struct device_attribute *attr,
1453 const char *buffer, size_t count)
1454{
1455 struct slave *slave, *update_slave;
1456 struct bonding *bond = to_bond(d);
1457 u16 qid;
1458 int i, ret = count;
1459 char *delim;
1460 struct net_device *sdev = NULL;
1461
1462 if (!rtnl_trylock())
1463 return restart_syscall();
1464
1465 /* delim will point to queue id if successful */
1466 delim = strchr(buffer, ':');
1467 if (!delim)
1468 goto err_no_cmd;
1469
1470 /*
1471 * Terminate string that points to device name and bump it
1472 * up one, so we can read the queue id there.
1473 */
1474 *delim = '\0';
1475 if (sscanf(++delim, "%hd\n", &qid) != 1)
1476 goto err_no_cmd;
1477
1478 /* Check buffer length, valid ifname and queue id */
1479 if (strlen(buffer) > IFNAMSIZ ||
1480 !dev_valid_name(buffer) ||
1481 qid > bond->params.tx_queues)
1482 goto err_no_cmd;
1483
1484 /* Get the pointer to that interface if it exists */
1485 sdev = __dev_get_by_name(dev_net(bond->dev), buffer);
1486 if (!sdev)
1487 goto err_no_cmd;
1488
1489 read_lock(&bond->lock);
1490
1491 /* Search for thes slave and check for duplicate qids */
1492 update_slave = NULL;
1493 bond_for_each_slave(bond, slave, i) {
1494 if (sdev == slave->dev)
1495 /*
1496 * We don't need to check the matching
1497 * slave for dups, since we're overwriting it
1498 */
1499 update_slave = slave;
1500 else if (qid && qid == slave->queue_id) {
1501 goto err_no_cmd_unlock;
1502 }
1503 }
1504
1505 if (!update_slave)
1506 goto err_no_cmd_unlock;
1507
1508 /* Actually set the qids for the slave */
1509 update_slave->queue_id = qid;
1510
1511 read_unlock(&bond->lock);
1512out:
1513 rtnl_unlock();
1514 return ret;
1515
1516err_no_cmd_unlock:
1517 read_unlock(&bond->lock);
1518err_no_cmd:
1519 pr_info("invalid input for queue_id set for %s.\n",
1520 bond->dev->name);
1521 ret = -EPERM;
1522 goto out;
1523}
1524
1525static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id,
1526 bonding_store_queue_id);
1527
1528
1529/*
1415 * Show and set the all_slaves_active flag. 1530 * Show and set the all_slaves_active flag.
1416 */ 1531 */
1417static ssize_t bonding_show_slaves_active(struct device *d, 1532static ssize_t bonding_show_slaves_active(struct device *d,
@@ -1489,6 +1604,7 @@ static struct attribute *per_bond_attrs[] = {
1489 &dev_attr_ad_actor_key.attr, 1604 &dev_attr_ad_actor_key.attr,
1490 &dev_attr_ad_partner_key.attr, 1605 &dev_attr_ad_partner_key.attr,
1491 &dev_attr_ad_partner_mac.attr, 1606 &dev_attr_ad_partner_mac.attr,
1607 &dev_attr_queue_id.attr,
1492 &dev_attr_all_slaves_active.attr, 1608 &dev_attr_all_slaves_active.attr,
1493 NULL, 1609 NULL,
1494}; 1610};
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index cecdea2a629f..c6fdd851579a 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -23,8 +23,8 @@
23#include "bond_3ad.h" 23#include "bond_3ad.h"
24#include "bond_alb.h" 24#include "bond_alb.h"
25 25
26#define DRV_VERSION "3.6.0" 26#define DRV_VERSION "3.7.0"
27#define DRV_RELDATE "September 26, 2009" 27#define DRV_RELDATE "June 2, 2010"
28#define DRV_NAME "bonding" 28#define DRV_NAME "bonding"
29#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" 29#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
30 30
@@ -60,6 +60,9 @@
60 ((mode) == BOND_MODE_TLB) || \ 60 ((mode) == BOND_MODE_TLB) || \
61 ((mode) == BOND_MODE_ALB)) 61 ((mode) == BOND_MODE_ALB))
62 62
63#define TX_QUEUE_OVERRIDE(mode) \
64 (((mode) == BOND_MODE_ACTIVEBACKUP) || \
65 ((mode) == BOND_MODE_ROUNDROBIN))
63/* 66/*
64 * Less bad way to call ioctl from within the kernel; this needs to be 67 * Less bad way to call ioctl from within the kernel; this needs to be
65 * done some other way to get the call out of interrupt context. 68 * done some other way to get the call out of interrupt context.
@@ -131,6 +134,7 @@ struct bond_params {
131 char primary[IFNAMSIZ]; 134 char primary[IFNAMSIZ];
132 int primary_reselect; 135 int primary_reselect;
133 __be32 arp_targets[BOND_MAX_ARP_TARGETS]; 136 __be32 arp_targets[BOND_MAX_ARP_TARGETS];
137 int tx_queues;
134 int all_slaves_active; 138 int all_slaves_active;
135}; 139};
136 140
@@ -165,6 +169,7 @@ struct slave {
165 u8 perm_hwaddr[ETH_ALEN]; 169 u8 perm_hwaddr[ETH_ALEN];
166 u16 speed; 170 u16 speed;
167 u8 duplex; 171 u8 duplex;
172 u16 queue_id;
168 struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */ 173 struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */
169 struct tlb_slave_info tlb_info; 174 struct tlb_slave_info tlb_info;
170}; 175};
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index cd525fae3c98..2c7994372bde 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -83,6 +83,7 @@
83 83
84#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ 84#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */
85 85
86#define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */
86/* hashing types */ 87/* hashing types */
87#define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */ 88#define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */
88#define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */ 89#define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */