/*
* originally based on the dummy device.
*
* Copyright 1999, Thomas Davis, tadavis@lbl.gov.
* Licensed under the GPL. Based on dummy.c, and eql.c devices.
*
* bonding.c: an Ethernet Bonding driver
*
* This is useful to talk to a Cisco EtherChannel compatible equipment:
* Cisco 5500
* Sun Trunking (Solaris)
* Alteon AceDirector Trunks
* Linux Bonding
* and probably many L2 switches ...
*
* How it works:
* ifconfig bond0 ipaddress netmask up
* will setup a network device, with an ip address. No mac address
* will be assigned at this time. The hw mac address will come from
* the first slave bonded to the channel. All slaves will then use
* this hw mac address.
*
* ifconfig bond0 down
* will release all slaves, marking them as down.
*
* ifenslave bond0 eth0
* will attach eth0 to bond0 as a slave. eth0 hw mac address will either
* a: be used as initial mac address
* b: if a hw mac address already is there, eth0's hw mac address
* will then be set from bond0.
*
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/interrupt.h>
#include <linux/ptrace.h>
#include <linux/ioport.h>
#include <linux/in.h>
#include <net/ip.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/timer.h>
#include <linux/socket.h>
#include <linux/ctype.h>
#include <linux/inet.h>
#include <linux/bitops.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/dma.h>
#include <asm/uaccess.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/smp.h>
#include <linux/if_ether.h>
#include <net/arp.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <linux/if_bonding.h>
#include <linux/jiffies.h>
#include <net/route.h>
#include <net/net_namespace.h>
#include "bonding.h"
#include "bond_3ad.h"
#include "bond_alb.h"
/*---------------------------- Module parameters ----------------------------*/
/* monitor all links that often (in milliseconds). <=0 disables monitoring */
#define BOND_LINK_MON_INTERV 0
#define BOND_LINK_ARP_INTERV 0
static int max_bonds = BOND_DEFAULT_MAX_BONDS;
static int num_grat_arp = 1;
static int num_unsol_na = 1;
static int miimon = BOND_LINK_MON_INTERV;
static int updelay = 0;
static int downdelay = 0;
static int use_carrier = 1;
static char *mode = NULL;
static char *primary = NULL;
static char *lacp_rate = NULL;
static char *ad_select = NULL;
static char *xmit_hash_policy = NULL;
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
static char *arp_validate = NULL;
static char *fail_over_mac = NULL;
struct bond_params bonding_defaults;
module_param(max_bonds, int, 0);
MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
module_param(num_grat_arp, int, 0644);
MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
module_param(num_unsol_na, int, 0644);
MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event");
module_param(miimon, int, 0);
MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
module_param(updelay, int, 0);
MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
module_param(downdelay, int, 0);
MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
"in milliseconds");
module_param(use_carrier, int, 0);
MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
"0 for off, 1 for on (default)");
module_param(mode, charp, 0);
MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
"1 for active-backup, 2 for balance-xor, "
"3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
"6 for balance-alb");
module_param(primary, charp, 0);
MODULE_PARM_DESC(primary, "Primary network device to use");
module_param(lacp_rate, charp, 0);
MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
"(slow/fast)");
module_param(ad_select, charp, 0);
MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic: stable (0, default), bandwidth (1), count (2)");
module_param(xmit_hash_policy, charp, 0);
MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)"
", 1 for layer 3+4");
module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
module_param(arp_validate, charp, 0);
MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
module_param(fail_over_mac, charp, 0);
MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. none (default), active or follow");
/*----------------------------- Global variables ----------------------------*/
static const char * const version =
DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
LIST_HEAD(bond_dev_list);
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *bond_proc_dir = NULL;
#endif
static __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
static int arp_ip_count = 0;
static int bond_mode = BOND_MODE_ROUNDROBIN;
static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2;
static int lacp_fast = 0;
const struct bond_parm_tbl bond_lacp_tbl[] = {
{ "slow", AD_LACP_SLOW},
{ "fast", AD_LACP_FAST},
{ NULL, -1},
};
const struct bond_parm_tbl bond_mode_tbl[] = {
{ "balance-rr", BOND_MODE_ROUNDROBIN},
{ "active-backup", BOND_MODE_ACTIVEBACKUP},
{ "balance-xor", BOND_MODE_XOR},
{ "broadcast", BOND_MODE_BROADCAST},
{ "802.3ad", BOND_MODE_8023AD},
{ "balance-tlb", BOND_MODE_TLB},
{ "balance-alb", BOND_MODE_ALB},
{ NULL, -1},
};
const struct bond_parm_tbl xmit_hashtype_tbl[] = {
{ "layer2", BOND_XMIT_POLICY_LAYER2},
{ "layer3+4", BOND_XMIT_POLICY_LAYER34},
{ "layer2+3", BOND_XMIT_POLICY_LAYER23},
{ NULL, -1},
};
const struct bond_parm_tbl arp_validate_tbl[] = {
{ "none", BOND_ARP_VALIDATE_NONE},
{ "active", BOND_ARP_VALIDATE_ACTIVE},
{ "backup", BOND_ARP_VALIDATE_BACKUP},
{ "all", BOND_ARP_VALIDATE_ALL},
{ NULL, -1},
};
const struct bond_parm_tbl fail_over_mac_tbl[] = {
{ "none", BOND_FOM_NONE},
{ "active", BOND_FOM_ACTIVE},
{ "follow", BOND_FOM_FOLLOW},
{ NULL, -1},
};
struct bond_parm_tbl ad_select_tbl[] = {
{ "stable", BOND_AD_STABLE},
{ "bandwidth", BOND_AD_BANDWIDTH},
{ "count", BOND_AD_COUNT},
{ NULL, -1},
};
/*-------------------------- Forward declarations ---------------------------*/
static void bond_send_gratuitous_arp(struct bonding *bond);
static void bond_deinit(struct net_device *bond_dev);
/*---------------------------- General routines -----------------------------*/
static const char *bond_mode_name(int mode)
{
static const char *names[] = {
[BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)",
[BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)",
[BOND_MODE_XOR] = "load balancing (xor)",
[BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)",
[BOND_MODE_8023AD]= "IEEE 802.3ad Dynamic link aggregation",
[BOND_MODE_TLB] = "transmit load balancing",
[BOND_MODE_ALB] = "adaptive load balancing",
};
if (mode < 0 || mode > BOND_MODE_ALB)
return "unknown";
return names[mode];
}
/*---------------------------------- VLAN -----------------------------------*/
/**
* bond_add_vlan - add a new vlan id on bond
* @bond: bond that got the notification
* @vlan_id: the vlan id to add
*
* Returns -ENOMEM if allocation failed.
*/
static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
{
struct vlan_entry *vlan;
pr_debug("bond: %s, vlan id %d\n",
(bond ? bond->dev->name: "None"), vlan_id);
vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
if (!vlan) {
return -ENOMEM;
}
INIT_LIST_HEAD(&vlan->vlan_list);
vlan->vlan_id = vlan_id;
write_lock_bh(&bond->lock);
list_add_tail(&vlan->vlan_list, &bond->vlan_list);
write_unlock_bh(&bond->lock);
pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
return 0;
}
/**
* bond_del_vlan - delete a vlan id from bond
* @bond: bond that got the notification
* @vlan_id: the vlan id to delete
*
* returns -ENODEV if @vlan_id was not found in @bond.
*/
static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
{
struct vlan_entry *vlan;
int res = -ENODEV;
pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
write_lock_bh(&bond->lock);
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
if (vlan->vlan_id == vlan_id) {
list_del(&vlan->vlan_list);
if (bond_is_lb(bond))
bond_alb_clear_vlan(bond, vlan_id);
pr_debug("removed VLAN ID %d from bond %s\n", vlan_id,
bond->dev->name);
kfree(vlan);
if (list_empty(&bond->vlan_list) &&
(bond->slave_cnt == 0)) {
/* Last VLAN removed and no slaves, so
* restore block on adding VLANs. This will
* be removed once new slaves that are not
* VLAN challenged will be added.
*/
bond->dev->features |= NETIF_F_VLAN_CHALLENGED;
}
res = 0;
goto out;
}
}
pr_debug("couldn't find VLAN ID %d in bond %s\n", vlan_id,
bond->dev->name);
out:
write_unlock_bh(&bond->lock);
return res;
}
/**
* bond_has_challenged_slaves
* @bond: the bond we're working on
*
* Searches the slave list. Returns 1 if a vlan challenged slave
* was found, 0 otherwise.
*
* Assumes bond->lock is held.
*/
static int bond_has_challenged_slaves(struct bonding *bond)
{
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) {
pr_debug("found VLAN challenged slave - %s\n",
slave->dev->name);
return 1;
}
}
pr_debug("no VLAN challenged slaves found\n");
return 0;
}
/**
* bond_next_vlan - safely skip to the next item in the vlans list.
* @bond: the bond we're working on
* @curr: item we're advancing from
*
* Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
* or @curr->next otherwise (even if it is @curr itself again).
*
* Caller must hold bond->lock
*/
struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
{
struct vlan_entry *next, *last;
if (list_empty(&bond->vlan_list)) {
return NULL;
}
if (!curr) {
next = list_entry(bond->vlan_list.next,
struct vlan_entry, vlan_list);
} else {
last = list_entry(bond->vlan_list.prev,
struct vlan_entry, vlan_list);
if (last == curr) {
next = list_entry(bond->vlan_list.next,
struct vlan_entry, vlan_list);
} else {
next = list_entry(curr->vlan_list.next,
struct vlan_entry, vlan_list);
}
}
return next;
}
/**
* bond_dev_queue_xmit - Prepare skb for xmit.
*
* @bond: bond device that got this skb for tx.
* @skb: hw accel VLAN tagged skb to transmit
* @slave_dev: slave that is supposed to xmit this skbuff
*
* When the bond gets an skb to transmit that is
* already hardware accelerated VLAN tagged, and it
* needs to relay this skb to a slave that is not
* hw accel capable, the skb needs to be "unaccelerated",
* i.e. strip the hwaccel tag and re-insert it as part
* of the payload.
*/
int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev)
{
unsigned short uninitialized_var(vlan_id);
if (!list_empty(&bond->vlan_list) &&
!(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
vlan_get_tag(skb, &vlan_id) == 0) {
skb->dev = slave_dev;
skb = vlan_put_tag(skb, vlan_id);
if (!skb) {
/* vlan_put_tag() frees the skb in case of error,
* so return success here so the calling functions
* won't attempt to free is again.
*/
return 0;
}
} else {
skb->dev = slave_dev;
}
skb->priority = 1;
dev_queue_xmit(skb);
return 0;
}
/*
* In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid
* and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
* lock because:
* a. This operation is performed in IOCTL context,
* b. The operation is protected by the RTNL semaphore in the 8021q code,
* c. Holding a lock with BH disabled while directly calling a base driver
* entry point is generally a BAD idea.
*
* The design of synchronization/protection for this operation in the 8021q
* module is good for one or more VLAN devices over a single physical device
* and cannot be extended for a teaming solution like bonding, so there is a
* potential race condition here where a net device from the vlan group might
* be referenced (either by a base driver or the 8021q code) while it is being
* removed from the system. However, it turns out we're not making matters
* worse, and if it works for regular VLAN usage it will work here too.
*/
/**
* bond_vlan_rx_register - Propagates registration to slaves
* @bond_dev: bonding net device that got called
* @grp: vlan group being registered
*/
static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
int i;
bond->vlgrp = grp;
bond_for_each_slave(bond, slave, i) {
struct net_device *slave_dev = slave->dev;
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
slave_ops->ndo_vlan_rx_register) {
slave_ops->ndo_vlan_rx_register(slave_dev, grp);
}
}
}
/**
* bond_vlan_rx_add_vid - Propagates adding an id to slaves
* @bond_dev: bonding net device that got called
* @vid: vlan id being added
*/
static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
int i, res;
bond_for_each_slave(bond, slave, i) {
struct net_device *slave_dev = slave->dev;
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
slave_ops->ndo_vlan_rx_add_vid) {
slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid);
}
}
res = bond_add_vlan(bond, vid);
if (res) {
printk(KERN_ERR DRV_NAME
": %s: Error: Failed to add vlan id %d\n",
bond_dev->name, vid);
}
}
/**
* bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
* @bond_dev: bonding net device that got called
* @vid: vlan id being removed
*/
static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
struct net_device *vlan_dev;
int i, res;
bond_for_each_slave(bond, slave, i) {
struct net_device *slave_dev = slave->dev;
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
slave_ops->ndo_vlan_rx_kill_vid) {
/* Save and then restore vlan_dev in the grp array,
* since the slave's driver might clear it.
*/
vlan_dev = vlan_group_get_device(bond->vlgrp, vid);
slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid);
vlan_group_set_device(bond->vlgrp, vid, vlan_dev);
}
}
res = bond_del_vlan(bond, vid);
if (res) {
printk(KERN_ERR DRV_NAME
": %s: Error: Failed to remove vlan id %d\n",
bond_dev->name, vid);
}
}
static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
{
struct vlan_entry *vlan;
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
write_lock_bh(&bond->lock);
if (list_empty(&bond->vlan_list))
goto out;
if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
slave_ops->ndo_vlan_rx_register)
slave_ops->ndo_vlan_rx_register(slave_dev, bond->vlgrp);
if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
!(slave_ops->ndo_vlan_rx_add_vid))
goto out;
list_for_each_entry(vlan, &bond->vlan_list, vlan_list)
slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id);
out:
write_unlock_bh(&bond->lock);
}
static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev)
{
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
struct vlan_entry *vlan;
struct net_device *vlan_dev;
write_lock_bh(&bond->lock);
if (list_empty(&bond->vlan_list))
goto out;
if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
!(slave_ops->ndo_vlan_rx_kill_vid))
goto unreg;
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
/* Save and then restore vlan_dev in the grp array,
* since the slave's driver might clear it.
*/
vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev);
}
unreg:
if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
slave_ops->ndo_vlan_rx_register)
slave_ops->ndo_vlan_rx_register(slave_dev, NULL);
out:
write_unlock_bh(&bond->lock);
}
/*------------------------------- Link status -------------------------------*/
/*
* Set the carrier state for the master according to the state of its
* slaves. If any slaves are up, the master is up. In 802.3ad mode,
* do special 802.3ad magic.
*
* Returns zero if carrier state does not change, nonzero if it does.
*/
static int bond_set_carrier(struct bonding *bond)
{
struct slave *slave;
int i;
if (bond->slave_cnt == 0)
goto down;
if (bond->params.mode == BOND_MODE_8023AD)
return bond_3ad_set_carrier(bond);
bond_for_each_slave(bond, slave, i) {
if (slave->link == BOND_LINK_UP) {
if (!netif_carrier_ok(bond->dev)) {
netif_carrier_on(bond->dev);
return 1;
}
return 0;
}
}
down:
if (netif_carrier_ok(bond->dev)) {
netif_carrier_off(bond->dev);
return 1;
}
return 0;
}
/*
* Get link speed and duplex from the slave's base driver
* using ethtool. If for some reason the call fails or the
* values are invalid, fake speed and duplex to 100/Full
* and return error.
*/
static int bond_update_speed_duplex(struct slave *slave)
{
struct net_device *slave_dev = slave->dev;
struct ethtool_cmd etool;
int res;
/* Fake speed and duplex */
slave->speed = SPEED_100;
slave->duplex = DUPLEX_FULL;
if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings)
return -1;
res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
if (res < 0)
return -1;
switch (etool.speed) {
case SPEED_10:
case SPEED_100:
case SPEED_1000:
case SPEED_10000:
break;
default:
return -1;
}
switch (etool.duplex) {
case DUPLEX_FULL:
case DUPLEX_HALF:
break;
default:
return -1;
}
slave->speed = etool.speed;
slave->duplex = etool.duplex;
return 0;
}
/*
* if <dev> supports MII link status reporting, check its link status.
*
* We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
* depening upon the setting of the use_carrier parameter.
*
* Return either BMSR_LSTATUS, meaning that the link is up (or we
* can't tell and just pretend it is), or 0, meaning that the link is
* down.
*
* If reporting is non-zero, instead of faking link up, return -1 if
* both ETHTOOL and MII ioctls fail (meaning the device does not
* support them). If use_carrier is set, return whatever it says.
* It'd be nice if there was a good way to tell if a driver supports
* netif_carrier, but there really isn't.
*/
static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting)
{
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
static int (* ioctl)(struct net_device *, struct ifreq *, int);
struct ifreq ifr;
struct mii_ioctl_data *mii;
if (bond->params.use_carrier)
return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
/* Try to get link status using Ethtool first. */
if (slave_dev->ethtool_ops) {
if (slave_dev->ethtool_ops->get_link) {
u32 link;
link = slave_dev->ethtool_ops->get_link(slave_dev);
return link ? BMSR_LSTATUS : 0;
}
}
/* Ethtool can't be used, fallback to MII ioclts. */
ioctl = slave_ops->ndo_do_ioctl;
if (ioctl) {
/* TODO: set pointer to correct ioctl on a per team member */
/* bases to make this more efficient. that is, once */
/* we determine the correct ioctl, we will always */
/* call it and not the others for that team */
/* member. */
/*
* We cannot assume that SIOCGMIIPHY will also read a
* register; not all network drivers (e.g., e100)
* support that.
*/
/* Yes, the mii is overlaid on the ifreq.ifr_ifru */
strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
mii = if_mii(&ifr);
if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
mii->reg_num = MII_BMSR;
if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) {
return (mii->val_out & BMSR_LSTATUS);
}
}
}
/*
* If reporting, report that either there's no dev->do_ioctl,
* or both SIOCGMIIREG and get_link failed (meaning that we
* cannot report link status). If not reporting, pretend
* we're ok.
*/
return (reporting ? -1 : BMSR_LSTATUS);
}
/*----------------------------- Multicast list ------------------------------*/
/*
* Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
*/
static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2)
{
return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
dmi1->dmi_addrlen == dmi2->dmi_addrlen;
}
/*
* returns dmi entry if found, NULL otherwise
*/
static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list)
{
struct dev_mc_list *idmi;
for (idmi = mc_list; idmi; idmi = idmi->next) {
if (bond_is_dmi_same(dmi, idmi)) {
return idmi;
}
}
return NULL;
}
/*
* Push the promiscuity flag down to appropriate slaves
*/
static int bond_set_promiscuity(struct bonding *bond, int inc)
{
int err = 0;
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
err = dev_set_promiscuity(bond->curr_active_slave->dev,
inc);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
err = dev_set_promiscuity(slave->dev, inc);
if (err)
return err;
}
}
return err;
}
/*
* Push the allmulti flag down to all slaves
*/
static int bond_set_allmulti(struct bonding *bond, int inc)
{
int err = 0;
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
err = dev_set_allmulti(bond->curr_active_slave->dev,
inc);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
err = dev_set_allmulti(slave->dev, inc);
if (err)
return err;
}
}
return err;
}
/*
* Add a Multicast address to slaves
* according to mode
*/
static void bond_mc_add(struct bonding *bond, void *addr, int alen)
{
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
dev_mc_add(slave->dev, addr, alen, 0);
}
}
}
/*
* Remove a multicast address from slave
* according to mode
*/
static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
{
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
dev_mc_delete(slave->dev, addr, alen, 0);
}
}
}
/*
* Retrieve the list of registered multicast addresses for the bonding
* device and retransmit an IGMP JOIN request to the current active
* slave.
*/
static void bond_resend_igmp_join_requests(struct bonding *bond)
{
struct in_device *in_dev;
struct ip_mc_list *im;
rcu_read_lock();
in_dev = __in_dev_get_rcu(bond->dev);
if (in_dev) {
for (im = in_dev->mc_list; im; im = im->next) {
ip_mc_rejoin_group(im);
}
}
rcu_read_unlock();
}
/*
* Totally destroys the mc_list in bond
*/
static void bond_mc_list_destroy(struct bonding *bond)
{
struct dev_mc_list *dmi;
dmi = bond->mc_list;
while (dmi) {
bond->mc_list = dmi->next;
kfree(dmi);
dmi = bond->mc_list;
}
bond->mc_list = NULL;
}
/*
* Copy all the Multicast addresses from src to the bonding device dst
*/
static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
gfp_t gfp_flag)
{
struct dev_mc_list *dmi, *new_dmi;
for (dmi = mc_list; dmi; dmi = dmi->next) {
new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag);
if (!new_dmi) {
/* FIXME: Potential memory leak !!! */
return -ENOMEM;
}
new_dmi->next = bond->mc_list;
bond->mc_list = new_dmi;
new_dmi->dmi_addrlen = dmi->dmi_addrlen;
memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen);
new_dmi->dmi_users = dmi->dmi_users;
new_dmi->dmi_gusers = dmi->dmi_gusers;
}
return 0;
}
/*
* flush all members of flush->mc_list from device dev->mc_list
*/
static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
struct dev_mc_list *dmi;
for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
if (bond->params.mode == BOND_MODE_8023AD) {
/* del lacpdu mc addr from mc list */
u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
}
}
/*--------------------------- Active slave change ---------------------------*/
/*
* Update the mc list and multicast-related flags for the new and
* old active slaves (if any) according to the multicast mode, and
* promiscuous flags unconditionally.
*/
static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active)
{
struct dev_mc_list *dmi;
if (!USES_PRIMARY(bond->params.mode)) {
/* nothing to do - mc list is already up-to-date on
* all slaves
*/
return;
}
if (old_active) {
if (bond->dev->flags & IFF_PROMISC) {
dev_set_promiscuity(old_active->dev, -1);
}
if (bond->dev->flags & IFF_ALLMULTI) {
dev_set_allmulti(old_active->dev, -1);
}
for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
}
if (new_active) {
/* FIXME: Signal errors upstream. */
if (bond->dev->flags & IFF_PROMISC) {
dev_set_promiscuity(new_active->dev, 1);
}
if (bond->dev->flags & IFF_ALLMULTI) {
dev_set_allmulti(new_active->dev, 1);
}
for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
bond_resend_igmp_join_requests(bond);
}
}
/*
* bond_do_fail_over_mac
*
* Perform special MAC address swapping for fail_over_mac settings
*
* Called with RTNL, bond->lock for read, curr_slave_lock for write_bh.
*/
static void bond_do_fail_over_mac(struct bonding *bond,
struct slave *new_active,
struct slave *old_active)
__releases(&bond->curr_slave_lock)
__releases(&bond->lock)
__acquires(&bond->lock)
__acquires(&bond->curr_slave_lock)
{
u8 tmp_mac[ETH_ALEN];
struct sockaddr saddr;
int rv;
switch (bond->params.fail_over_mac) {
case BOND_FOM_ACTIVE:
if (new_active)
memcpy(bond->dev->dev_addr, new_active->dev->dev_addr,
new_active->dev->addr_len);
break;
case BOND_FOM_FOLLOW:
/*
* if new_active && old_active, swap them
* if just old_active, do nothing (going to no active slave)
* if just new_active, set new_active to bond's MAC
*/
if (!new_active)
return;
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
if (old_active) {
memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN);
memcpy(saddr.sa_data, old_active->dev->dev_addr,
ETH_ALEN);
saddr.sa_family = new_active->dev->type;
} else {
memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN);
saddr.sa_family = bond->dev->type;
}
rv = dev_set_mac_address(new_active->dev, &saddr);
if (rv) {
printk(KERN_ERR DRV_NAME
": %s: Error %d setting MAC of slave %s\n",
bond->dev->name, -rv, new_active->dev->name);
goto out;
}
|