aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Gospodarek <andy@greyhouse.net>2010-03-25 10:49:05 -0400
committerDavid S. Miller <davem@davemloft.net>2010-03-27 19:39:15 -0400
commita2fd940f4cff74b932728bd6ca12848da21a0234 (patch)
treea0d065e07acd61a1c4ab52f5e67af60d5b31a31d
parent44ebb95290afcc687511ad3f7fd6434e867c270a (diff)
bonding: fix broken multicast with round-robin mode
Round-robin (mode 0) does nothing to ensure that any multicast traffic originally destined for the host will continue to arrive at the host when the link that sent the IGMP join or membership report goes down. One of the benefits of absolute round-robin transmit. Keeping track of subscribed multicast groups for each slave did not seem like a good use of resources, so I decided to simply send on the curr_active slave of the bond (typically the first enslaved device that is up). This makes failover management simple as IGMP membership reports only need to be sent when the curr_active_slave changes. I tested this patch and it appears to work as expected. Originally reported by Lon Hohberger <lhh@redhat.com>. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> CC: Lon Hohberger <lhh@redhat.com> CC: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_main.c40
1 files changed, 32 insertions, 8 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 430c02267d7e..5b92fbff431d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1235,6 +1235,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1235 write_lock_bh(&bond->curr_slave_lock); 1235 write_lock_bh(&bond->curr_slave_lock);
1236 } 1236 }
1237 } 1237 }
1238
1239 /* resend IGMP joins since all were sent on curr_active_slave */
1240 if (bond->params.mode == BOND_MODE_ROUNDROBIN) {
1241 bond_resend_igmp_join_requests(bond);
1242 }
1238} 1243}
1239 1244
1240/** 1245/**
@@ -4138,22 +4143,41 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
4138 struct bonding *bond = netdev_priv(bond_dev); 4143 struct bonding *bond = netdev_priv(bond_dev);
4139 struct slave *slave, *start_at; 4144 struct slave *slave, *start_at;
4140 int i, slave_no, res = 1; 4145 int i, slave_no, res = 1;
4146 struct iphdr *iph = ip_hdr(skb);
4141 4147
4142 read_lock(&bond->lock); 4148 read_lock(&bond->lock);
4143 4149
4144 if (!BOND_IS_OK(bond)) 4150 if (!BOND_IS_OK(bond))
4145 goto out; 4151 goto out;
4146
4147 /* 4152 /*
4148 * Concurrent TX may collide on rr_tx_counter; we accept that 4153 * Start with the curr_active_slave that joined the bond as the
4149 * as being rare enough not to justify using an atomic op here 4154 * default for sending IGMP traffic. For failover purposes one
4155 * needs to maintain some consistency for the interface that will
4156 * send the join/membership reports. The curr_active_slave found
4157 * will send all of this type of traffic.
4150 */ 4158 */
4151 slave_no = bond->rr_tx_counter++ % bond->slave_cnt; 4159 if ((iph->protocol == htons(IPPROTO_IGMP)) &&
4160 (skb->protocol == htons(ETH_P_IP))) {
4152 4161
4153 bond_for_each_slave(bond, slave, i) { 4162 read_lock(&bond->curr_slave_lock);
4154 slave_no--; 4163 slave = bond->curr_active_slave;
4155 if (slave_no < 0) 4164 read_unlock(&bond->curr_slave_lock);
4156 break; 4165
4166 if (!slave)
4167 goto out;
4168 } else {
4169 /*
4170 * Concurrent TX may collide on rr_tx_counter; we accept
4171 * that as being rare enough not to justify using an
4172 * atomic op here.
4173 */
4174 slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
4175
4176 bond_for_each_slave(bond, slave, i) {
4177 slave_no--;
4178 if (slave_no < 0)
4179 break;
4180 }
4157 } 4181 }
4158 4182
4159 start_at = slave; 4183 start_at = slave;