diff options
author | nikolay@redhat.com <nikolay@redhat.com> | 2013-08-01 10:54:51 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-08-01 19:42:02 -0400 |
commit | 278b20837511776dc9d5f6ee1c7fabd5479838bb (patch) | |
tree | 6be201bb67d28154444b1860d6c1247d02adab23 /drivers | |
parent | 15077228cab68e5e8c3cbf26a7f6ebacfac4c829 (diff) |
bonding: initial RCU conversion
This patch does the initial bonding conversion to RCU. After it the
following modes are protected by RCU alone: roundrobin, active-backup,
broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for
reading, and will be dealt with later. curr_active_slave needs to be
dereferenced via rcu in the converted modes because the only thing
protecting the slave after this patch is rcu_read_lock, so we need the
proper barrier for weakly ordered archs and to make sure we don't have
stale pointer. It's not tagged with __rcu yet because there's still work
to be done to remove the curr_slave_lock, so sparse will complain when
rcu_assign_pointer and rcu_dereference are used, but the alternative to use
rcu_dereference_protected would've created much bigger code churn which is
more difficult to test and review. That will be converted in time.
1. Active-backup mode
1.1 Perf recording while doing iperf -P 4
- old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU
in bonding
- new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU
in bonding
1.2. Bandwidth measurements
- old bonding: 16.1 gbps consistently
- new bonding: 17.5 gbps consistently
2. Round-robin mode
2.1 Perf recording while doing iperf -P 4
- old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU
in bonding
- new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU
in bonding
2.2 Bandwidth measurements
- old bonding: 8 gbps (variable due to packet reorderings)
- new bonding: 10 gbps (variable due to packet reorderings)
Of course the latency has improved in all converted modes, and moreover
while
doing enslave/release (since it doesn't affect tx anymore).
Also I've stress tested all modes doing enslave/release in a loop while
transmitting traffic.
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/bonding/bond_3ad.c | 2 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 6 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 30 | ||||
-rw-r--r-- | drivers/net/bonding/bond_sysfs.c | 19 | ||||
-rw-r--r-- | drivers/net/bonding/bonding.h | 4 |
5 files changed, 32 insertions, 29 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 7d46fa832c1f..90102652c82a 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c | |||
@@ -2426,6 +2426,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) | |||
2426 | struct ad_info ad_info; | 2426 | struct ad_info ad_info; |
2427 | int res = 1; | 2427 | int res = 1; |
2428 | 2428 | ||
2429 | read_lock(&bond->lock); | ||
2429 | if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { | 2430 | if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { |
2430 | pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n", | 2431 | pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n", |
2431 | dev->name); | 2432 | dev->name); |
@@ -2475,6 +2476,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) | |||
2475 | } | 2476 | } |
2476 | 2477 | ||
2477 | out: | 2478 | out: |
2479 | read_unlock(&bond->lock); | ||
2478 | if (res) { | 2480 | if (res) { |
2479 | /* no suitable interface, frame not sent */ | 2481 | /* no suitable interface, frame not sent */ |
2480 | kfree_skb(skb); | 2482 | kfree_skb(skb); |
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 4d35196b4b5a..3a5db7b1df68 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c | |||
@@ -1337,6 +1337,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) | |||
1337 | 1337 | ||
1338 | /* make sure that the curr_active_slave do not change during tx | 1338 | /* make sure that the curr_active_slave do not change during tx |
1339 | */ | 1339 | */ |
1340 | read_lock(&bond->lock); | ||
1340 | read_lock(&bond->curr_slave_lock); | 1341 | read_lock(&bond->curr_slave_lock); |
1341 | 1342 | ||
1342 | switch (ntohs(skb->protocol)) { | 1343 | switch (ntohs(skb->protocol)) { |
@@ -1441,11 +1442,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) | |||
1441 | } | 1442 | } |
1442 | 1443 | ||
1443 | read_unlock(&bond->curr_slave_lock); | 1444 | read_unlock(&bond->curr_slave_lock); |
1444 | 1445 | read_unlock(&bond->lock); | |
1445 | if (res) { | 1446 | if (res) { |
1446 | /* no suitable interface, frame not sent */ | 1447 | /* no suitable interface, frame not sent */ |
1447 | kfree_skb(skb); | 1448 | kfree_skb(skb); |
1448 | } | 1449 | } |
1450 | |||
1449 | return NETDEV_TX_OK; | 1451 | return NETDEV_TX_OK; |
1450 | } | 1452 | } |
1451 | 1453 | ||
@@ -1663,7 +1665,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave | |||
1663 | } | 1665 | } |
1664 | 1666 | ||
1665 | swap_slave = bond->curr_active_slave; | 1667 | swap_slave = bond->curr_active_slave; |
1666 | bond->curr_active_slave = new_slave; | 1668 | rcu_assign_pointer(bond->curr_active_slave, new_slave); |
1667 | 1669 | ||
1668 | if (!new_slave || list_empty(&bond->slave_list)) | 1670 | if (!new_slave || list_empty(&bond->slave_list)) |
1669 | return; | 1671 | return; |
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5eee95c3b172..1d37a9657e0d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c | |||
@@ -77,6 +77,7 @@ | |||
77 | #include <net/net_namespace.h> | 77 | #include <net/net_namespace.h> |
78 | #include <net/netns/generic.h> | 78 | #include <net/netns/generic.h> |
79 | #include <net/pkt_sched.h> | 79 | #include <net/pkt_sched.h> |
80 | #include <linux/rculist.h> | ||
80 | #include "bonding.h" | 81 | #include "bonding.h" |
81 | #include "bond_3ad.h" | 82 | #include "bond_3ad.h" |
82 | #include "bond_alb.h" | 83 | #include "bond_alb.h" |
@@ -1037,7 +1038,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) | |||
1037 | if (new_active) | 1038 | if (new_active) |
1038 | bond_set_slave_active_flags(new_active); | 1039 | bond_set_slave_active_flags(new_active); |
1039 | } else { | 1040 | } else { |
1040 | bond->curr_active_slave = new_active; | 1041 | rcu_assign_pointer(bond->curr_active_slave, new_active); |
1041 | } | 1042 | } |
1042 | 1043 | ||
1043 | if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { | 1044 | if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { |
@@ -1127,7 +1128,7 @@ void bond_select_active_slave(struct bonding *bond) | |||
1127 | */ | 1128 | */ |
1128 | static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) | 1129 | static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) |
1129 | { | 1130 | { |
1130 | list_add_tail(&new_slave->list, &bond->slave_list); | 1131 | list_add_tail_rcu(&new_slave->list, &bond->slave_list); |
1131 | bond->slave_cnt++; | 1132 | bond->slave_cnt++; |
1132 | } | 1133 | } |
1133 | 1134 | ||
@@ -1143,7 +1144,7 @@ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) | |||
1143 | */ | 1144 | */ |
1144 | static void bond_detach_slave(struct bonding *bond, struct slave *slave) | 1145 | static void bond_detach_slave(struct bonding *bond, struct slave *slave) |
1145 | { | 1146 | { |
1146 | list_del(&slave->list); | 1147 | list_del_rcu(&slave->list); |
1147 | bond->slave_cnt--; | 1148 | bond->slave_cnt--; |
1148 | } | 1149 | } |
1149 | 1150 | ||
@@ -1751,7 +1752,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) | |||
1751 | * so we can change it without calling change_active_interface() | 1752 | * so we can change it without calling change_active_interface() |
1752 | */ | 1753 | */ |
1753 | if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP) | 1754 | if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP) |
1754 | bond->curr_active_slave = new_slave; | 1755 | rcu_assign_pointer(bond->curr_active_slave, new_slave); |
1755 | 1756 | ||
1756 | break; | 1757 | break; |
1757 | } /* switch(bond_mode) */ | 1758 | } /* switch(bond_mode) */ |
@@ -1951,7 +1952,7 @@ static int __bond_release_one(struct net_device *bond_dev, | |||
1951 | } | 1952 | } |
1952 | 1953 | ||
1953 | if (all) { | 1954 | if (all) { |
1954 | bond->curr_active_slave = NULL; | 1955 | rcu_assign_pointer(bond->curr_active_slave, NULL); |
1955 | } else if (oldcurrent == slave) { | 1956 | } else if (oldcurrent == slave) { |
1956 | /* | 1957 | /* |
1957 | * Note that we hold RTNL over this sequence, so there | 1958 | * Note that we hold RTNL over this sequence, so there |
@@ -1983,6 +1984,7 @@ static int __bond_release_one(struct net_device *bond_dev, | |||
1983 | 1984 | ||
1984 | write_unlock_bh(&bond->lock); | 1985 | write_unlock_bh(&bond->lock); |
1985 | unblock_netpoll_tx(); | 1986 | unblock_netpoll_tx(); |
1987 | synchronize_rcu(); | ||
1986 | 1988 | ||
1987 | if (list_empty(&bond->slave_list)) { | 1989 | if (list_empty(&bond->slave_list)) { |
1988 | call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); | 1990 | call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); |
@@ -3811,7 +3813,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) | |||
3811 | int i = slave_id; | 3813 | int i = slave_id; |
3812 | 3814 | ||
3813 | /* Here we start from the slave with slave_id */ | 3815 | /* Here we start from the slave with slave_id */ |
3814 | bond_for_each_slave(bond, slave) { | 3816 | bond_for_each_slave_rcu(bond, slave) { |
3815 | if (--i < 0) { | 3817 | if (--i < 0) { |
3816 | if (slave_can_tx(slave)) { | 3818 | if (slave_can_tx(slave)) { |
3817 | bond_dev_queue_xmit(bond, skb, slave->dev); | 3819 | bond_dev_queue_xmit(bond, skb, slave->dev); |
@@ -3822,7 +3824,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) | |||
3822 | 3824 | ||
3823 | /* Here we start from the first slave up to slave_id */ | 3825 | /* Here we start from the first slave up to slave_id */ |
3824 | i = slave_id; | 3826 | i = slave_id; |
3825 | bond_for_each_slave(bond, slave) { | 3827 | bond_for_each_slave_rcu(bond, slave) { |
3826 | if (--i < 0) | 3828 | if (--i < 0) |
3827 | break; | 3829 | break; |
3828 | if (slave_can_tx(slave)) { | 3830 | if (slave_can_tx(slave)) { |
@@ -3848,7 +3850,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev | |||
3848 | * will send all of this type of traffic. | 3850 | * will send all of this type of traffic. |
3849 | */ | 3851 | */ |
3850 | if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { | 3852 | if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { |
3851 | slave = bond->curr_active_slave; | 3853 | slave = rcu_dereference(bond->curr_active_slave); |
3852 | if (slave && slave_can_tx(slave)) | 3854 | if (slave && slave_can_tx(slave)) |
3853 | bond_dev_queue_xmit(bond, skb, slave->dev); | 3855 | bond_dev_queue_xmit(bond, skb, slave->dev); |
3854 | else | 3856 | else |
@@ -3870,7 +3872,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d | |||
3870 | struct bonding *bond = netdev_priv(bond_dev); | 3872 | struct bonding *bond = netdev_priv(bond_dev); |
3871 | struct slave *slave; | 3873 | struct slave *slave; |
3872 | 3874 | ||
3873 | slave = bond->curr_active_slave; | 3875 | slave = rcu_dereference(bond->curr_active_slave); |
3874 | if (slave) | 3876 | if (slave) |
3875 | bond_dev_queue_xmit(bond, skb, slave->dev); | 3877 | bond_dev_queue_xmit(bond, skb, slave->dev); |
3876 | else | 3878 | else |
@@ -3900,7 +3902,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) | |||
3900 | struct bonding *bond = netdev_priv(bond_dev); | 3902 | struct bonding *bond = netdev_priv(bond_dev); |
3901 | struct slave *slave = NULL; | 3903 | struct slave *slave = NULL; |
3902 | 3904 | ||
3903 | bond_for_each_slave(bond, slave) { | 3905 | bond_for_each_slave_rcu(bond, slave) { |
3904 | if (bond_is_last_slave(bond, slave)) | 3906 | if (bond_is_last_slave(bond, slave)) |
3905 | break; | 3907 | break; |
3906 | if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { | 3908 | if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { |
@@ -3955,7 +3957,7 @@ static inline int bond_slave_override(struct bonding *bond, | |||
3955 | return 1; | 3957 | return 1; |
3956 | 3958 | ||
3957 | /* Find out if any slaves have the same mapping as this skb. */ | 3959 | /* Find out if any slaves have the same mapping as this skb. */ |
3958 | bond_for_each_slave(bond, check_slave) { | 3960 | bond_for_each_slave_rcu(bond, check_slave) { |
3959 | if (check_slave->queue_id == skb->queue_mapping) { | 3961 | if (check_slave->queue_id == skb->queue_mapping) { |
3960 | slave = check_slave; | 3962 | slave = check_slave; |
3961 | break; | 3963 | break; |
@@ -4040,14 +4042,12 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
4040 | if (is_netpoll_tx_blocked(dev)) | 4042 | if (is_netpoll_tx_blocked(dev)) |
4041 | return NETDEV_TX_BUSY; | 4043 | return NETDEV_TX_BUSY; |
4042 | 4044 | ||
4043 | read_lock(&bond->lock); | 4045 | rcu_read_lock(); |
4044 | |||
4045 | if (!list_empty(&bond->slave_list)) | 4046 | if (!list_empty(&bond->slave_list)) |
4046 | ret = __bond_start_xmit(skb, dev); | 4047 | ret = __bond_start_xmit(skb, dev); |
4047 | else | 4048 | else |
4048 | kfree_skb(skb); | 4049 | kfree_skb(skb); |
4049 | 4050 | rcu_read_unlock(); | |
4050 | read_unlock(&bond->lock); | ||
4051 | 4051 | ||
4052 | return ret; | 4052 | return ret; |
4053 | } | 4053 | } |
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 0702e917d478..0f539de640dc 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c | |||
@@ -1243,16 +1243,16 @@ static ssize_t bonding_show_active_slave(struct device *d, | |||
1243 | struct device_attribute *attr, | 1243 | struct device_attribute *attr, |
1244 | char *buf) | 1244 | char *buf) |
1245 | { | 1245 | { |
1246 | struct slave *curr; | ||
1247 | struct bonding *bond = to_bond(d); | 1246 | struct bonding *bond = to_bond(d); |
1247 | struct slave *curr; | ||
1248 | int count = 0; | 1248 | int count = 0; |
1249 | 1249 | ||
1250 | read_lock(&bond->curr_slave_lock); | 1250 | rcu_read_lock(); |
1251 | curr = bond->curr_active_slave; | 1251 | curr = rcu_dereference(bond->curr_active_slave); |
1252 | read_unlock(&bond->curr_slave_lock); | ||
1253 | |||
1254 | if (USES_PRIMARY(bond->params.mode) && curr) | 1252 | if (USES_PRIMARY(bond->params.mode) && curr) |
1255 | count = sprintf(buf, "%s\n", curr->dev->name); | 1253 | count = sprintf(buf, "%s\n", curr->dev->name); |
1254 | rcu_read_unlock(); | ||
1255 | |||
1256 | return count; | 1256 | return count; |
1257 | } | 1257 | } |
1258 | 1258 | ||
@@ -1284,7 +1284,7 @@ static ssize_t bonding_store_active_slave(struct device *d, | |||
1284 | if (!strlen(ifname) || buf[0] == '\n') { | 1284 | if (!strlen(ifname) || buf[0] == '\n') { |
1285 | pr_info("%s: Clearing current active slave.\n", | 1285 | pr_info("%s: Clearing current active slave.\n", |
1286 | bond->dev->name); | 1286 | bond->dev->name); |
1287 | bond->curr_active_slave = NULL; | 1287 | rcu_assign_pointer(bond->curr_active_slave, NULL); |
1288 | bond_select_active_slave(bond); | 1288 | bond_select_active_slave(bond); |
1289 | goto out; | 1289 | goto out; |
1290 | } | 1290 | } |
@@ -1347,14 +1347,9 @@ static ssize_t bonding_show_mii_status(struct device *d, | |||
1347 | struct device_attribute *attr, | 1347 | struct device_attribute *attr, |
1348 | char *buf) | 1348 | char *buf) |
1349 | { | 1349 | { |
1350 | struct slave *curr; | ||
1351 | struct bonding *bond = to_bond(d); | 1350 | struct bonding *bond = to_bond(d); |
1352 | 1351 | ||
1353 | read_lock(&bond->curr_slave_lock); | 1352 | return sprintf(buf, "%s\n", bond->curr_active_slave ? "up" : "down"); |
1354 | curr = bond->curr_active_slave; | ||
1355 | read_unlock(&bond->curr_slave_lock); | ||
1356 | |||
1357 | return sprintf(buf, "%s\n", curr ? "up" : "down"); | ||
1358 | } | 1353 | } |
1359 | static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL); | 1354 | static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL); |
1360 | 1355 | ||
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 0995974c71ac..4bf52d5f637e 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h | |||
@@ -116,6 +116,10 @@ | |||
116 | #define bond_for_each_slave(bond, pos) \ | 116 | #define bond_for_each_slave(bond, pos) \ |
117 | list_for_each_entry(pos, &(bond)->slave_list, list) | 117 | list_for_each_entry(pos, &(bond)->slave_list, list) |
118 | 118 | ||
119 | /* Caller must have rcu_read_lock */ | ||
120 | #define bond_for_each_slave_rcu(bond, pos) \ | ||
121 | list_for_each_entry_rcu(pos, &(bond)->slave_list, list) | ||
122 | |||
119 | /** | 123 | /** |
120 | * bond_for_each_slave_reverse - iterate in reverse from a given position | 124 | * bond_for_each_slave_reverse - iterate in reverse from a given position |
121 | * @bond: the bond holding this list | 125 | * @bond: the bond holding this list |