/*
* originally based on the dummy device.
*
* Copyright 1999, Thomas Davis, tadavis@lbl.gov.
* Licensed under the GPL. Based on dummy.c, and eql.c devices.
*
* bonding.c: an Ethernet Bonding driver
*
* This is useful to talk to a Cisco EtherChannel compatible equipment:
* Cisco 5500
* Sun Trunking (Solaris)
* Alteon AceDirector Trunks
* Linux Bonding
* and probably many L2 switches ...
*
* How it works:
* ifconfig bond0 ipaddress netmask up
* will setup a network device, with an ip address. No mac address
* will be assigned at this time. The hw mac address will come from
* the first slave bonded to the channel. All slaves will then use
* this hw mac address.
*
* ifconfig bond0 down
* will release all slaves, marking them as down.
*
* ifenslave bond0 eth0
* will attach eth0 to bond0 as a slave. eth0 hw mac address will either
* a: be used as initial mac address
* b: if a hw mac address already is there, eth0's hw mac address
* will then be set from bond0.
*
*/
//#define BONDING_DEBUG 1
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/interrupt.h>
#include <linux/ptrace.h>
#include <linux/ioport.h>
#include <linux/in.h>
#include <net/ip.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/timer.h>
#include <linux/socket.h>
#include <linux/ctype.h>
#include <linux/inet.h>
#include <linux/bitops.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/dma.h>
#include <asm/uaccess.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/smp.h>
#include <linux/if_ether.h>
#include <net/arp.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <linux/if_bonding.h>
#include <net/route.h>
#include "bonding.h"
#include "bond_3ad.h"
#include "bond_alb.h"
/*---------------------------- Module parameters ----------------------------*/
/* monitor all links that often (in milliseconds). <=0 disables monitoring */
#define BOND_LINK_MON_INTERV 0
#define BOND_LINK_ARP_INTERV 0
static int max_bonds = BOND_DEFAULT_MAX_BONDS;
static int miimon = BOND_LINK_MON_INTERV;
static int updelay = 0;
static int downdelay = 0;
static int use_carrier = 1;
static char *mode = NULL;
static char *primary = NULL;
static char *lacp_rate = NULL;
static char *xmit_hash_policy = NULL;
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
struct bond_params bonding_defaults;
module_param(max_bonds, int, 0);
MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
module_param(miimon, int, 0);
MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
module_param(updelay, int, 0);
MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
module_param(downdelay, int, 0);
MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
"in milliseconds");
module_param(use_carrier, int, 0);
MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
"0 for off, 1 for on (default)");
module_param(mode, charp, 0);
MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
"1 for active-backup, 2 for balance-xor, "
"3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
"6 for balance-alb");
module_param(primary, charp, 0);
MODULE_PARM_DESC(primary, "Primary network device to use");
module_param(lacp_rate, charp, 0);
MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
"(slow/fast)");
module_param(xmit_hash_policy, charp, 0);
MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)"
", 1 for layer 3+4");
module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
/*----------------------------- Global variables ----------------------------*/
static const char *version =
DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
LIST_HEAD(bond_dev_list);
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *bond_proc_dir = NULL;
#endif
extern struct rw_semaphore bonding_rwsem;
static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
static int arp_ip_count = 0;
static int bond_mode = BOND_MODE_ROUNDROBIN;
static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2;
static int lacp_fast = 0;
struct bond_parm_tbl bond_lacp_tbl[] = {
{ "slow", AD_LACP_SLOW},
{ "fast", AD_LACP_FAST},
{ NULL, -1},
};
struct bond_parm_tbl bond_mode_tbl[] = {
{ "balance-rr", BOND_MODE_ROUNDROBIN},
{ "active-backup", BOND_MODE_ACTIVEBACKUP},
{ "balance-xor", BOND_MODE_XOR},
{ "broadcast", BOND_MODE_BROADCAST},
{ "802.3ad", BOND_MODE_8023AD},
{ "balance-tlb", BOND_MODE_TLB},
{ "balance-alb", BOND_MODE_ALB},
{ NULL, -1},
};
struct bond_parm_tbl xmit_hashtype_tbl[] = {
{ "layer2", BOND_XMIT_POLICY_LAYER2},
{ "layer3+4", BOND_XMIT_POLICY_LAYER34},
{ NULL, -1},
};
/*-------------------------- Forward declarations ---------------------------*/
static void bond_send_gratuitous_arp(struct bonding *bond);
/*---------------------------- General routines -----------------------------*/
const char *bond_mode_name(int mode)
{
switch (mode) {
case BOND_MODE_ROUNDROBIN :
return "load balancing (round-robin)";
case BOND_MODE_ACTIVEBACKUP :
return "fault-tolerance (active-backup)";
case BOND_MODE_XOR :
return "load balancing (xor)";
case BOND_MODE_BROADCAST :
return "fault-tolerance (broadcast)";
case BOND_MODE_8023AD:
return "IEEE 802.3ad Dynamic link aggregation";
case BOND_MODE_TLB:
return "transmit load balancing";
case BOND_MODE_ALB:
return "adaptive load balancing";
default:
return "unknown";
}
}
/*---------------------------------- VLAN -----------------------------------*/
/**
* bond_add_vlan - add a new vlan id on bond
* @bond: bond that got the notification
* @vlan_id: the vlan id to add
*
* Returns -ENOMEM if allocation failed.
*/
static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
{
struct vlan_entry *vlan;
dprintk("bond: %s, vlan id %d\n",
(bond ? bond->dev->name: "None"), vlan_id);
vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL);
if (!vlan) {
return -ENOMEM;
}
INIT_LIST_HEAD(&vlan->vlan_list);
vlan->vlan_id = vlan_id;
vlan->vlan_ip = 0;
write_lock_bh(&bond->lock);
list_add_tail(&vlan->vlan_list, &bond->vlan_list);
write_unlock_bh(&bond->lock);
dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
return 0;
}
/**
* bond_del_vlan - delete a vlan id from bond
* @bond: bond that got the notification
* @vlan_id: the vlan id to delete
*
* returns -ENODEV if @vlan_id was not found in @bond.
*/
static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
{
struct vlan_entry *vlan, *next;
int res = -ENODEV;
dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
write_lock_bh(&bond->lock);
list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) {
if (vlan->vlan_id == vlan_id) {
list_del(&vlan->vlan_list);
if ((bond->params.mode == BOND_MODE_TLB) ||
(bond->params.mode == BOND_MODE_ALB)) {
bond_alb_clear_vlan(bond, vlan_id);
}
dprintk("removed VLAN ID %d from bond %s\n", vlan_id,
bond->dev->name);
kfree(vlan);
if (list_empty(&bond->vlan_list) &&
(bond->slave_cnt == 0)) {
/* Last VLAN removed and no slaves, so
* restore block on adding VLANs. This will
* be removed once new slaves that are not
* VLAN challenged will be added.
*/
bond->dev->features |= NETIF_F_VLAN_CHALLENGED;
}
res = 0;
goto out;
}
}
dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id,
bond->dev->name);
out:
write_unlock_bh(&bond->lock);
return res;
}
/**
* bond_has_challenged_slaves
* @bond: the bond we're working on
*
* Searches the slave list. Returns 1 if a vlan challenged slave
* was found, 0 otherwise.
*
* Assumes bond->lock is held.
*/
static int bond_has_challenged_slaves(struct bonding *bond)
{
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) {
dprintk("found VLAN challenged slave - %s\n",
slave->dev->name);
return 1;
}
}
dprintk("no VLAN challenged slaves found\n");
return 0;
}
/**
* bond_next_vlan - safely skip to the next item in the vlans list.
* @bond: the bond we're working on
* @curr: item we're advancing from
*
* Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
* or @curr->next otherwise (even if it is @curr itself again).
*
* Caller must hold bond->lock
*/
struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
{
struct vlan_entry *next, *last;
if (list_empty(&bond->vlan_list)) {
return NULL;
}
if (!curr) {
next = list_entry(bond->vlan_list.next,
struct vlan_entry, vlan_list);
} else {
last = list_entry(bond->vlan_list.prev,
struct vlan_entry, vlan_list);
if (last == curr) {
next = list_entry(bond->vlan_list.next,
struct vlan_entry, vlan_list);
} else {
next = list_entry(curr->vlan_list.next,
struct vlan_entry, vlan_list);
}
}
return next;
}
/**
* bond_dev_queue_xmit - Prepare skb for xmit.
*
* @bond: bond device that got this skb for tx.
* @skb: hw accel VLAN tagged skb to transmit
* @slave_dev: slave that is supposed to xmit this skbuff
*
* When the bond gets an skb to transmit that is
* already hardware accelerated VLAN tagged, and it
* needs to relay this skb to a slave that is not
* hw accel capable, the skb needs to be "unaccelerated",
* i.e. strip the hwaccel tag and re-insert it as part
* of the payload.
*/
int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev)
{
unsigned short vlan_id;
if (!list_empty(&bond->vlan_list) &&
!(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
vlan_get_tag(skb, &vlan_id) == 0) {
skb->dev = slave_dev;
skb = vlan_put_tag(skb, vlan_id);
if (!skb) {
/* vlan_put_tag() frees the skb in case of error,
* so return success here so the calling functions
* won't attempt to free is again.
*/
return 0;
}
} else {
skb->dev = slave_dev;
}
skb->priority = 1;
dev_queue_xmit(skb);
return 0;
}
/*
* In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid
* and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
* lock because:
* a. This operation is performed in IOCTL context,
* b. The operation is protected by the RTNL semaphore in the 8021q code,
* c. Holding a lock with BH disabled while directly calling a base driver
* entry point is generally a BAD idea.
*
* The design of synchronization/protection for this operation in the 8021q
* module is good for one or more VLAN devices over a single physical device
* and cannot be extended for a teaming solution like bonding, so there is a
* potential race condition here where a net device from the vlan group might
* be referenced (either by a base driver or the 8021q code) while it is being
* removed from the system. However, it turns out we're not making matters
* worse, and if it works for regular VLAN usage it will work here too.
*/
/**
* bond_vlan_rx_register - Propagates registration to slaves
* @bond_dev: bonding net device that got called
* @grp: vlan group being registered
*/
static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp)
{
struct bonding *bond = bond_dev->priv;
struct slave *slave;
int i;
bond->vlgrp = grp;
bond_for_each_slave(bond, slave, i) {
struct net_device *slave_dev = slave->dev;
if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
slave_dev->vlan_rx_register) {
slave_dev->vlan_rx_register(slave_dev, grp);
}
}
}
/**
* bond_vlan_rx_add_vid - Propagates adding an id to slaves
* @bond_dev: bonding net device that got called
* @vid: vlan id being added
*/
static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
{
struct bonding *bond = bond_dev->priv;
struct slave *slave;
int i, res;
bond_for_each_slave(bond, slave, i) {
struct net_device *slave_dev = slave->dev;
if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
slave_dev->vlan_rx_add_vid) {
slave_dev->vlan_rx_add_vid(slave_dev, vid);
}
}
res = bond_add_vlan(bond, vid);
if (res) {
printk(KERN_ERR DRV_NAME
": %s: Error: Failed to add vlan id %d\n",
bond_dev->name, vid);
}
}
/**
* bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
* @bond_dev: bonding net device that got called
* @vid: vlan id being removed
*/
static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
{
struct bonding *bond = bond_dev->priv;
struct slave *slave;
struct net_device *vlan_dev;
int i, res;
bond_for_each_slave(bond, slave, i) {
struct net_device *slave_dev = slave->dev;
if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
slave_dev->vlan_rx_kill_vid) {
/* Save and then restore vlan_dev in the grp array,
* since the slave's driver might clear it.
*/
vlan_dev = bond->vlgrp->vlan_devices[vid];
slave_dev->vlan_rx_kill_vid(slave_dev, vid);
bond->vlgrp->vlan_devices[vid] = vlan_dev;
}
}
res = bond_del_vlan(bond, vid);
if (res) {
printk(KERN_ERR DRV_NAME
": %s: Error: Failed to remove vlan id %d\n",
bond_dev->name, vid);
}
}
static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
{
struct vlan_entry *vlan;
write_lock_bh(&bond->lock);
if (list_empty(&bond->vlan_list)) {
goto out;
}
if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
slave_dev->vlan_rx_register) {
slave_dev->vlan_rx_register(slave_dev, bond->vlgrp);
}
if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
!(slave_dev->vlan_rx_add_vid)) {
goto out;
}
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id);
}
out:
write_unlock_bh(&bond->lock);
}
static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev)
{
struct vlan_entry *vlan;
struct net_device *vlan_dev;
write_lock_bh(&bond->lock);
if (list_empty(&bond->vlan_list)) {
goto out;
}
if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
!(slave_dev->vlan_rx_kill_vid)) {
goto unreg;
}
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
/* Save and then restore vlan_dev in the grp array,
* since the slave's driver might clear it.
*/
vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
bond->vlgrp->vlan_devices[vlan->vlan_id] = vlan_dev;
}
unreg:
if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
slave_dev->vlan_rx_register) {
slave_dev->vlan_rx_register(slave_dev, NULL);
}
out:
write_unlock_bh(&bond->lock);
}
/*------------------------------- Link status -------------------------------*/
/*
* Get link speed and duplex from the slave's base driver
* using ethtool. If for some reason the call fails or the
* values are invalid, fake speed and duplex to 100/Full
* and return error.
*/
static int bond_update_speed_duplex(struct slave *slave)
{
struct net_device *slave_dev = slave->dev;
static int (* ioctl)(struct net_device *, struct ifreq *, int);
struct ifreq ifr;
struct ethtool_cmd etool;
/* Fake speed and duplex */
slave->speed = SPEED_100;
slave->duplex = DUPLEX_FULL;
if (slave_dev->ethtool_ops) {
int res;
if (!slave_dev->ethtool_ops->get_settings) {
return -1;
}
res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
if (res < 0) {
return -1;
}
goto verify;
}
ioctl = slave_dev->do_ioctl;
strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
etool.cmd = ETHTOOL_GSET;
ifr.ifr_data = (char*)&etool;
if (!ioctl || (IOCTL(slave_dev, &ifr, SIOCETHTOOL) < 0)) {
return -1;
}
verify:
switch (etool.speed) {
case SPEED_10:
case SPEED_100:
case SPEED_1000:
break;
default:
return -1;
}
switch (etool.duplex) {
case DUPLEX_FULL:
case DUPLEX_HALF:
break;
default:
return -1;
}
slave->speed = etool.speed;
slave->duplex = etool.duplex;
return 0;
}
/*
* if <dev> supports MII link status reporting, check its link status.
*
* We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
* depening upon the setting of the use_carrier parameter.
*
* Return either BMSR_LSTATUS, meaning that the link is up (or we
* can't tell and just pretend it is), or 0, meaning that the link is
* down.
*
* If reporting is non-zero, instead of faking link up, return -1 if
* both ETHTOOL and MII ioctls fail (meaning the device does not
* support them). If use_carrier is set, return whatever it says.
* It'd be nice if there was a good way to tell if a driver supports
* netif_carrier, but there really isn't.
*/
static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting)
{
static int (* ioctl)(struct net_device *, struct ifreq *, int);
struct ifreq ifr;
struct mii_ioctl_data *mii;
struct ethtool_value etool;
if (bond->params.use_carrier) {
return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
}
ioctl = slave_dev->do_ioctl;
if (ioctl) {
/* TODO: set pointer to correct ioctl on a per team member */
/* bases to make this more efficient. that is, once */
/* we determine the correct ioctl, we will always */
/* call it and not the others for that team */
/* member. */
/*
* We cannot assume that SIOCGMIIPHY will also read a
* register; not all network drivers (e.g., e100)
* support that.
*/
/* Yes, the mii is overlaid on the ifreq.ifr_ifru */
strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
mii = if_mii(&ifr);
if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
mii->reg_num = MII_BMSR;
if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) {
return (mii->val_out & BMSR_LSTATUS);
}
}
}
/* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */
/* for a period of time so we attempt to get link status */
/* from it last if the above MII ioctls fail... */
if (slave_dev->ethtool_ops) {
if (slave_dev->ethtool_ops->get_link) {
u32 link;
link = slave_dev->ethtool_ops->get_link(slave_dev);
return link ? BMSR_LSTATUS : 0;
}
}
if (ioctl) {
strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
etool.cmd = ETHTOOL_GLINK;
ifr.ifr_data = (char*)&etool;
if (IOCTL(slave_dev, &ifr, SIOCETHTOOL) == 0) {
if (etool.data == 1) {
return BMSR_LSTATUS;
} else {
dprintk("SIOCETHTOOL shows link down\n");
return 0;
}
}
}
/*
* If reporting, report that either there's no dev->do_ioctl,
* or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we
* cannot report link status). If not reporting, pretend
* we're ok.
*/
return (reporting ? -1 : BMSR_LSTATUS);
}
/*----------------------------- Multicast list ------------------------------*/
/*
* Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
*/
static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2)
{
return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
dmi1->dmi_addrlen == dmi2->dmi_addrlen;
}
/*
* returns dmi entry if found, NULL otherwise
*/
static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list)
{
struct dev_mc_list *idmi;
for (idmi = mc_list; idmi; idmi = idmi->next) {
if (bond_is_dmi_same(dmi, idmi)) {
return idmi;
}
}
return NULL;
}
/*
* Push the promiscuity flag down to appropriate slaves
*/
static void bond_set_promiscuity(struct bonding *bond, int inc)
{
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
dev_set_promiscuity(bond->curr_active_slave->dev, inc);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
dev_set_promiscuity(slave->dev, inc);
}
}
}
/*
* Push the allmulti flag down to all slaves
*/
static void bond_set_allmulti(struct bonding *bond, int inc)
{
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
dev_set_allmulti(bond->curr_active_slave->dev, inc);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
dev_set_allmulti(slave->dev, inc);
}
}
}
/*
* Add a Multicast address to slaves
* according to mode
*/
static void bond_mc_add(struct bonding *bond, void *addr, int alen)
{
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
dev_mc_add(slave->dev, addr, alen, 0);
}
}
}
/*
* Remove a multicast address from slave
* according to mode
*/
static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
{
if (USES_PRIMARY(bond->params.mode)) {
/* write lock already acquired */
if (bond->curr_active_slave) {
dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0);
}
} else {
struct slave *slave;
int i;
bond_for_each_slave(bond, slave, i) {
dev_mc_delete(slave->dev, addr, alen, 0);
}
}
}
/*
* Totally destroys the mc_list in bond
*/
static void bond_mc_list_destroy(struct bonding *bond)
{
struct dev_mc_list *dmi;
dmi = bond->mc_list;
while (dmi) {
bond->mc_list = dmi->next;
kfree(dmi);
dmi = bond->mc_list;
}
}
/*
* Copy all the Multicast addresses from src to the bonding device dst
*/
static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
gfp_t gfp_flag)
{
struct dev_mc_list *dmi, *new_dmi;
for (dmi = mc_list; dmi; dmi = dmi->next) {
new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag);
if (!new_dmi) {
/* FIXME: Potential memory leak !!! */
return -ENOMEM;
}
new_dmi->next = bond->mc_list;
bond->mc_list = new_dmi;
new_dmi->dmi_addrlen = dmi->dmi_addrlen;
memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen);
new_dmi->dmi_users = dmi->dmi_users;
new_dmi->dmi_gusers = dmi->dmi_gusers;
}
return 0;
}
/*
* flush all members of flush->mc_list from device dev->mc_list
*/
static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev)
{
struct bonding *bond = bond_dev->priv;
struct dev_mc_list *dmi;
for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
if (bond->params.mode == BOND_MODE_8023AD) {
/* del lacpdu mc addr from mc list */
u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
}
}
/*--------------------------- Active slave change ---------------------------*/
/*
* Update the mc list and multicast-related flags for the new and
* old active slaves (if any) according to the multicast mode, and
* promiscuous flags unconditionally.
*/
static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active)
{
struct dev_mc_list *dmi;
if (!USES_PRIMARY(bond->params.mode)) {
/* nothing to do - mc list is already up-to-date on
* all slaves
*/
return;
}
if (old_active) {
if (bond->dev->flags & IFF_PROMISC) {
dev_set_promiscuity(old_active->dev, -1);
}
if (bond->dev->flags & IFF_ALLMULTI) {
dev_set_allmulti(old_active->dev, -1);
}
for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
}
if (new_active) {
if (bond->dev->flags & IFF_PROMISC) {
dev_set_promiscuity(new_active->dev, 1);
}
if (bond->dev->flags & IFF_ALLMULTI) {
dev_set_allmulti(new_active->dev, 1);
}
for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
}
}
/**
* find_best_interface - select the best available slave to be the active one
* @bond: our bonding struct
*
* Warning: Caller must hold curr_slave_lock for writing.
*/
static struct slave *bond_find_best_slave(struct bonding *bond)
{
struct slave *new_active, *old_active;
struct slave *bestslave = NULL;
int mintime = bond->params.updelay;
int i;
new_active = old_active = bond->curr_active_slave;
if (!new_active) { /* there were no active slaves left */
if (bond->slave_cnt > 0) { /* found one slave */
new_active = bond->first_slave;
} else {
return NULL; /* still no slave, return NULL */
}
}
/* first try the primary link; if arping, a link must tx/rx traffic
* before it can be considered the curr_active_slave - also, we would skip
* slaves between the curr_active_slave and primary_slave that may be up
* and able to arp
*/
if ((bond->primary_slave) &&
(!bond->params.arp_interval) &&
(IS_UP(bond->primary_slave->dev))) {
new_active = bond->primary_slave;
}
/* remember where to stop iterating over the slaves */
old_active = new_active;
bond_for_each_slave_from(bond, new_active, i, old_active) {
if (IS_UP(new_active->dev)) {
if (new_active->link == BOND_LINK_UP) {
return new_active;
} else if (new_active->link == BOND_LINK_BACK) {
/* link up, but waiting for stabilization */
if (new_active->delay < mintime) {
mintime = new_active->delay;
bestslave = new_active;
}
}
}
}
return bestslave;
}
/**
* change_active_interface - change the active slave into the specified one
* @bond: our bonding struct
* @new: the new slave to make the active one
*
* Set the new slave to the bond's settings and unset them on the old
* curr_active_slave.
* Setting include flags, mc-list, promiscuity, allmulti, etc.
*
* If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP,
* because it is apparently the best available slave we have, even though its
* updelay hasn't timed out yet.
*
* Warning: Caller must hold curr_slave_lock for writing.
*/
void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
{
struct slave *old_active = bond->curr_active_slave;
if (old_active == new_active) {
return;
}
if (new_active) {
if (new_active->link == BOND_LINK_BACK) {
if (USES_PRIMARY(bond->params.mode)) {
printk(KERN_INFO DRV_NAME
": %s: making interface %s the new "
"active one %d ms earlier.\n",
bond->dev->name, new_active->dev->name,
(bond->params.updelay - new_active->delay) * bond->params.miimon);
}
new_active->delay = 0;
new_active->link = BOND_LINK_UP;
new_active->jiffies = jiffies;
if (bond->params.mode == BOND_MODE_8023AD) {
bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
}
if ((bond->params.mode == BOND_MODE_TLB) ||
(bond->params.mode == BOND_MODE_ALB)) {
bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
}
} else {
if (USES_PRIMARY(bond->params.mode)) {
printk(KERN_INFO DRV_NAME
": %s: making interface %s the new "
"active one.\n",
bond->dev->name, new_active->dev->name);
}
}
}
if (USES_PRIMARY(bond->params.mode)) {
bond_mc_swap(bond, new_active, old_active);
}
if ((bond->params.mode == BOND_MODE_TLB) ||
(bond->params.mode == BOND_MODE_ALB)) {
bond_alb_handle_active_change(bond, new_active);
} else {
bond->curr_active_slave = new_active;
}
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
if (old_active) {
bond_set_slave_inactive_flags(old_active);
}
if (new_active) {
bond_set_slave_active_flags(new_active);
}
bond_send_gratuitous_arp(bond);
}
}
/**
* bond_select_active_slave - select a new active slave, if needed
* @bond: our bonding struct
*
* This functions shoud be called when one of the following occurs:
* - The old curr_active_slave has been released or lost its link.
* - The primary_slave has got its link back.
* - A slave has got its link back and there's no old curr_active_slave.
*
* Warning: Caller must hold curr_slave_lock for writing.
*/
void bond_select_active_slave(struct bonding *bond)
{
struct slave *best_slave;
best_slave = bond_find_best_slave(bond);
if (best_slave != bond->curr_active_slave) {
bond_change_active_slave(bond, best_slave);
}
}
/*--------------------------- slave list handling ---------------------------*/
/*
* This function attaches the slave to the end of list.
*
* bond->lock held for writing by caller.
*/
static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
{
if (bond->first_slave == NULL) { /* attaching the first slave */
new_slave->next = new_slave;
new_slave->prev = new_slave;
bond->first_slave = new_slave;
} else {
new_slave->next = bond->first_slave;
new_slave->prev = bond->first_slave->prev;
new_slave->next->prev = new_slave;
new_slave->prev->next = new_slave;
}
bond->slave_cnt++;
}
/*
* This function detaches the slave from the list.
* WARNING: no check is made to verify if the slave effectively
* belongs to <bond>.
* Nothing is freed on return, structures are just unchained.
* If any slave pointer in bond was pointing to <slave>,
* it should be changed by the calling function.
*
* bond->lock held for writing by caller.
*/
static void bond_detach_slave(struct bonding *bond, struct slave *slave)
{
if (slave->next) {
slave->next->prev = slave->prev;
}
if (slave->prev) {
slave->prev->next = slave->next;
}
if (bond->first_slave == slave) { /* slave is the first slave */
if (bond->slave_cnt > 1) { /* there are more slave */
bond->first_slave = slave->next;
} else {
bond->first_slave = NULL; /* slave was the last one */
}
}
slave->next = NULL;
slave->prev = NULL;
bond->slave_cnt--;
}
/*---------------------------------- IOCTL ----------------------------------*/
int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev)
{
dprintk("bond_dev=%p\n", bond_dev);
dprintk("slave_dev=%p\n", slave_dev);
dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len);
memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
return 0;
}
#define BOND_INTERSECT_FEATURES \
(NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
/*
* Compute the common dev->feature set available to all slaves. Some
* feature bits are managed elsewhere, so preserve feature bits set on
* master device that are not part of the examined set.
*/
static int bond_compute_features(struct bonding *bond)
{
unsigned long features = BOND_INTERSECT_FEATURES;
struct slave *slave;
struct net_device *bond_dev = bond->dev;
int i;
bond_for_each_slave(bond, slave, i)
features &= (slave->dev->features & BOND_INTERSECT_FEATURES);
if ((features & NETIF_F_SG) &&
!(features & (NETIF_F_IP_CSUM |
NETIF_F_NO_CSUM |
NETIF_F_HW_CSUM)))
features &= ~NETIF_F_SG;
features |= (bond_dev->features & ~BOND_INTERSECT_FEATURES);
bond_dev->features = features;
return 0;
}
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
{
struct bonding *bond = bond_dev->priv;
struct slave *new_slave = NULL;
struct dev_mc_list *dmi;
struct sockaddr addr;
int link_reporting;
int old_features = bond_dev->features;
int res = 0;
if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL &&
slave_dev->do_ioctl == NULL) {
printk(KERN_WARNING DRV_NAME
": %s: Warning: no link monitoring support for %s\n",
bond_dev->name, slave_dev->name);
}
/* bond must be initialized by bond_open() before enslaving */
if (!(bond_dev->flags & IFF_UP)) {
dprintk("Error, master_dev is not up\n");
return -EPERM;
}
/* already enslaved */
if (slave_dev->flags & IFF_SLAVE) {
dprintk("Error, Device was already enslaved\n");
return -EBUSY;
}
/* vlan challenged mutual exclusion */
/* no need to lock since we're protected by rtnl_lock */
if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
if (!list_empty(&bond->vlan_list)) {
printk(KERN_ERR DRV_NAME
": %s: Error: cannot enslave VLAN "
"challenged slave %s on VLAN enabled "
"bond %s\n", bond_dev->name, slave_dev->name,
bond_dev->name);
return -EPERM;
} else {
printk(KERN_WARNING DRV_NAME
": %s: Warning: enslaved VLAN challenged "
"slave %s. Adding VLANs will be blocked as "
"long as %s is part of bond %s\n",
bond_dev->name, slave_dev->name, slave_dev->name,
bond_dev->name);
bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
}
} else {
dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
if (bond->slave_cnt == 0) {
/* First slave, and it is not VLAN challenged,
* so remove the block of adding VLANs over the bond.
*/
bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
}
}
/*
* Old ifenslave binaries are no longer supported. These can
* be identified with moderate accurary by the state of the slave:
* the current ifenslave will set the interface down prior to
* enslaving it; the old ifenslave will not.
*/
if ((slave_dev->flags & IFF_UP)) {
printk(KERN_ERR DRV_NAME ": %s is up. "
"This may be due to an out of date ifenslave.\n",
slave_dev->name);
res = -EPERM;
goto err_undo_flags;
}
if (slave_dev->set_mac_address == NULL) {
printk(KERN_ERR DRV_NAME
": %s: Error: The slave device you specified does "
"not support setting the MAC address. "
"Your kernel likely does not support slave "
"devices.\n", bond_dev->name);
res = -EOPNOTSUPP;
goto err_undo_flags;
}
new_slave = kmalloc(sizeof(struct slave), GFP_KERNEL);
if (!new_slave) {
res = -ENOMEM;
goto err_undo_flags;
}
memset(new_slave, 0, sizeof(struct slave));
/* save slave's original flags before calling
* netdev_set_master and dev_open
*/
new_slave->original_flags = slave_dev->flags;
/*
* Save slave's original ("permanent") mac address for modes
* that need it, and for restoring it upon release, and then
* set it to the master's address