aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-05-29 06:31:03 -0400
committerDavid S. Miller <davem@davemloft.net>2008-05-29 06:31:03 -0400
commita5b17df04c4ad8f25fc598fce37fccb4b387c94c (patch)
tree2d0084f6db86362eb067b617ff8470f255ba37e7
parentb79eeeb9e48457579cb742cd02e162fcd673c4a3 (diff)
parentc03571a3e22b821e5be7bda7b166c4554770f489 (diff)
Merge branch 'upstream-next-davem' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6
-rw-r--r--Documentation/networking/bonding.txt96
-rw-r--r--drivers/net/3c509.c15
-rw-r--r--drivers/net/3c515.c4
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/atlx/atl1.c5
-rw-r--r--drivers/net/bonding/bond_main.c706
-rw-r--r--drivers/net/bonding/bond_sysfs.c81
-rw-r--r--drivers/net/bonding/bonding.h13
-rw-r--r--drivers/net/cxgb3/adapter.h18
-rw-r--r--drivers/net/cxgb3/common.h1
-rw-r--r--drivers/net/cxgb3/cxgb3_ioctl.h1
-rw-r--r--drivers/net/cxgb3/cxgb3_main.c19
-rw-r--r--drivers/net/cxgb3/sge.c391
-rw-r--r--drivers/net/cxgb3/t3_cpl.h11
-rw-r--r--drivers/net/dl2k.c8
-rw-r--r--drivers/net/hamachi.c12
-rw-r--r--drivers/net/ixp2000/ixpdev.c4
-rw-r--r--drivers/net/phy/Kconfig3
-rw-r--r--drivers/net/phy/broadcom.c201
-rw-r--r--drivers/net/tg3.c32
-rw-r--r--drivers/net/usb/catc.c5
-rw-r--r--drivers/net/usb/rndis_host.c4
-rw-r--r--drivers/net/via-velocity.c25
-rw-r--r--drivers/net/wireless/zd1211rw/zd_mac.c2
-rw-r--r--drivers/net/wireless/zd1211rw/zd_usb.c2
25 files changed, 1203 insertions, 457 deletions
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index a0cda062bc33..8e6b8d3c7410 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -289,35 +289,73 @@ downdelay
289fail_over_mac 289fail_over_mac
290 290
291 Specifies whether active-backup mode should set all slaves to 291 Specifies whether active-backup mode should set all slaves to
292 the same MAC address (the traditional behavior), or, when 292 the same MAC address at enslavement (the traditional
293 enabled, change the bond's MAC address when changing the 293 behavior), or, when enabled, perform special handling of the
294 active interface (i.e., fail over the MAC address itself). 294 bond's MAC address in accordance with the selected policy.
295 295
296 Fail over MAC is useful for devices that cannot ever alter 296 Possible values are:
297 their MAC address, or for devices that refuse incoming 297
298 broadcasts with their own source MAC (which interferes with 298 none or 0
299 the ARP monitor). 299
300 300 This setting disables fail_over_mac, and causes
301 The down side of fail over MAC is that every device on the 301 bonding to set all slaves of an active-backup bond to
302 network must be updated via gratuitous ARP, vs. just updating 302 the same MAC address at enslavement time. This is the
303 a switch or set of switches (which often takes place for any 303 default.
304 traffic, not just ARP traffic, if the switch snoops incoming 304
305 traffic to update its tables) for the traditional method. If 305 active or 1
306 the gratuitous ARP is lost, communication may be disrupted. 306
307 307 The "active" fail_over_mac policy indicates that the
308 When fail over MAC is used in conjuction with the mii monitor, 308 MAC address of the bond should always be the MAC
309 devices which assert link up prior to being able to actually 309 address of the currently active slave. The MAC
310 transmit and receive are particularly susecptible to loss of 310 address of the slaves is not changed; instead, the MAC
311 the gratuitous ARP, and an appropriate updelay setting may be 311 address of the bond changes during a failover.
312 required. 312
313 313 This policy is useful for devices that cannot ever
314 A value of 0 disables fail over MAC, and is the default. A 314 alter their MAC address, or for devices that refuse
315 value of 1 enables fail over MAC. This option is enabled 315 incoming broadcasts with their own source MAC (which
316 automatically if the first slave added cannot change its MAC 316 interferes with the ARP monitor).
317 address. This option may be modified via sysfs only when no 317
318 slaves are present in the bond. 318 The down side of this policy is that every device on
319 319 the network must be updated via gratuitous ARP,
320 This option was added in bonding version 3.2.0. 320 vs. just updating a switch or set of switches (which
321 often takes place for any traffic, not just ARP
322 traffic, if the switch snoops incoming traffic to
323 update its tables) for the traditional method. If the
324 gratuitous ARP is lost, communication may be
325 disrupted.
326
327 When this policy is used in conjuction with the mii
328 monitor, devices which assert link up prior to being
329 able to actually transmit and receive are particularly
330 susecptible to loss of the gratuitous ARP, and an
331 appropriate updelay setting may be required.
332
333 follow or 2
334
335 The "follow" fail_over_mac policy causes the MAC
336 address of the bond to be selected normally (normally
337 the MAC address of the first slave added to the bond).
338 However, the second and subsequent slaves are not set
339 to this MAC address while they are in a backup role; a
340 slave is programmed with the bond's MAC address at
341 failover time (and the formerly active slave receives
342 the newly active slave's MAC address).
343
344 This policy is useful for multiport devices that
345 either become confused or incur a performance penalty
346 when multiple ports are programmed with the same MAC
347 address.
348
349
350 The default policy is none, unless the first slave cannot
351 change its MAC address, in which case the active policy is
352 selected by default.
353
354 This option may be modified via sysfs only when no slaves are
355 present in the bond.
356
357 This option was added in bonding version 3.2.0. The "follow"
358 policy was added in bonding version 3.3.0.
321 359
322lacp_rate 360lacp_rate
323 361
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index fe6d84105e55..b9d097c9f6bb 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -413,7 +413,7 @@ static int __devinit el3_pnp_probe(struct pnp_dev *pdev,
413{ 413{
414 short i; 414 short i;
415 int ioaddr, irq, if_port; 415 int ioaddr, irq, if_port;
416 u16 phys_addr[3]; 416 __be16 phys_addr[3];
417 struct net_device *dev = NULL; 417 struct net_device *dev = NULL;
418 int err; 418 int err;
419 419
@@ -605,7 +605,7 @@ static int __init el3_mca_probe(struct device *device)
605 605
606 short i; 606 short i;
607 int ioaddr, irq, if_port; 607 int ioaddr, irq, if_port;
608 u16 phys_addr[3]; 608 __be16 phys_addr[3];
609 struct net_device *dev = NULL; 609 struct net_device *dev = NULL;
610 u_char pos4, pos5; 610 u_char pos4, pos5;
611 struct mca_device *mdev = to_mca_device(device); 611 struct mca_device *mdev = to_mca_device(device);
@@ -635,14 +635,13 @@ static int __init el3_mca_probe(struct device *device)
635 printk(KERN_DEBUG "3c529: irq %d ioaddr 0x%x ifport %d\n", irq, ioaddr, if_port); 635 printk(KERN_DEBUG "3c529: irq %d ioaddr 0x%x ifport %d\n", irq, ioaddr, if_port);
636 } 636 }
637 EL3WINDOW(0); 637 EL3WINDOW(0);
638 for (i = 0; i < 3; i++) { 638 for (i = 0; i < 3; i++)
639 phys_addr[i] = htons(read_eeprom(ioaddr, i)); 639 phys_addr[i] = htons(read_eeprom(ioaddr, i));
640 }
641 640
642 dev = alloc_etherdev(sizeof (struct el3_private)); 641 dev = alloc_etherdev(sizeof (struct el3_private));
643 if (dev == NULL) { 642 if (dev == NULL) {
644 release_region(ioaddr, EL3_IO_EXTENT); 643 release_region(ioaddr, EL3_IO_EXTENT);
645 return -ENOMEM; 644 return -ENOMEM;
646 } 645 }
647 646
648 netdev_boot_setup_check(dev); 647 netdev_boot_setup_check(dev);
@@ -668,7 +667,7 @@ static int __init el3_eisa_probe (struct device *device)
668{ 667{
669 short i; 668 short i;
670 int ioaddr, irq, if_port; 669 int ioaddr, irq, if_port;
671 u16 phys_addr[3]; 670 __be16 phys_addr[3];
672 struct net_device *dev = NULL; 671 struct net_device *dev = NULL;
673 struct eisa_device *edev; 672 struct eisa_device *edev;
674 int err; 673 int err;
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c
index 105a8c7ca7e9..e4e3241628d6 100644
--- a/drivers/net/3c515.c
+++ b/drivers/net/3c515.c
@@ -572,12 +572,16 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr,
572 int irq; 572 int irq;
573 DECLARE_MAC_BUF(mac); 573 DECLARE_MAC_BUF(mac);
574 574
575#ifdef __ISAPNP__
575 if (idev) { 576 if (idev) {
576 irq = pnp_irq(idev, 0); 577 irq = pnp_irq(idev, 0);
577 vp->dev = &idev->dev; 578 vp->dev = &idev->dev;
578 } else { 579 } else {
579 irq = inw(ioaddr + 0x2002) & 15; 580 irq = inw(ioaddr + 0x2002) & 15;
580 } 581 }
582#else
583 irq = inw(ioaddr + 0x2002) & 15;
584#endif
581 585
582 dev->base_addr = ioaddr; 586 dev->base_addr = ioaddr;
583 dev->irq = irq; 587 dev->irq = irq;
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 78cc9495fd46..8178a4dfd09c 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2410,6 +2410,7 @@ config CHELSIO_T3
2410 tristate "Chelsio Communications T3 10Gb Ethernet support" 2410 tristate "Chelsio Communications T3 10Gb Ethernet support"
2411 depends on PCI 2411 depends on PCI
2412 select FW_LOADER 2412 select FW_LOADER
2413 select INET_LRO
2413 help 2414 help
2414 This driver supports Chelsio T3-based gigabit and 10Gb Ethernet 2415 This driver supports Chelsio T3-based gigabit and 10Gb Ethernet
2415 adapters. 2416 adapters.
diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 9c2394d49428..db04bfb3460f 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -1876,7 +1876,8 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
1876 1876
1877 rfd_desc = ATL1_RFD_DESC(rfd_ring, rfd_next_to_use); 1877 rfd_desc = ATL1_RFD_DESC(rfd_ring, rfd_next_to_use);
1878 1878
1879 skb = dev_alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN); 1879 skb = netdev_alloc_skb(adapter->netdev,
1880 adapter->rx_buffer_len + NET_IP_ALIGN);
1880 if (unlikely(!skb)) { 1881 if (unlikely(!skb)) {
1881 /* Better luck next round */ 1882 /* Better luck next round */
1882 adapter->net_stats.rx_dropped++; 1883 adapter->net_stats.rx_dropped++;
@@ -2135,7 +2136,7 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
2135 return -1; 2136 return -1;
2136 } 2137 }
2137 2138
2138 if (skb->protocol == ntohs(ETH_P_IP)) { 2139 if (skb->protocol == htons(ETH_P_IP)) {
2139 struct iphdr *iph = ip_hdr(skb); 2140 struct iphdr *iph = ip_hdr(skb);
2140 2141
2141 real_len = (((unsigned char *)iph - skb->data) + 2142 real_len = (((unsigned char *)iph - skb->data) +
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 50a40e433154..5b4af3cc2a44 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -88,6 +88,7 @@
88#define BOND_LINK_ARP_INTERV 0 88#define BOND_LINK_ARP_INTERV 0
89 89
90static int max_bonds = BOND_DEFAULT_MAX_BONDS; 90static int max_bonds = BOND_DEFAULT_MAX_BONDS;
91static int num_grat_arp = 1;
91static int miimon = BOND_LINK_MON_INTERV; 92static int miimon = BOND_LINK_MON_INTERV;
92static int updelay = 0; 93static int updelay = 0;
93static int downdelay = 0; 94static int downdelay = 0;
@@ -99,11 +100,13 @@ static char *xmit_hash_policy = NULL;
99static int arp_interval = BOND_LINK_ARP_INTERV; 100static int arp_interval = BOND_LINK_ARP_INTERV;
100static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; 101static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
101static char *arp_validate = NULL; 102static char *arp_validate = NULL;
102static int fail_over_mac = 0; 103static char *fail_over_mac = NULL;
103struct bond_params bonding_defaults; 104struct bond_params bonding_defaults;
104 105
105module_param(max_bonds, int, 0); 106module_param(max_bonds, int, 0);
106MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 107MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
108module_param(num_grat_arp, int, 0644);
109MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
107module_param(miimon, int, 0); 110module_param(miimon, int, 0);
108MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 111MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
109module_param(updelay, int, 0); 112module_param(updelay, int, 0);
@@ -133,8 +136,8 @@ module_param_array(arp_ip_target, charp, NULL, 0);
133MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 136MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
134module_param(arp_validate, charp, 0); 137module_param(arp_validate, charp, 0);
135MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); 138MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
136module_param(fail_over_mac, int, 0); 139module_param(fail_over_mac, charp, 0);
137MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on."); 140MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. none (default), active or follow");
138 141
139/*----------------------------- Global variables ----------------------------*/ 142/*----------------------------- Global variables ----------------------------*/
140 143
@@ -187,6 +190,13 @@ struct bond_parm_tbl arp_validate_tbl[] = {
187{ NULL, -1}, 190{ NULL, -1},
188}; 191};
189 192
193struct bond_parm_tbl fail_over_mac_tbl[] = {
194{ "none", BOND_FOM_NONE},
195{ "active", BOND_FOM_ACTIVE},
196{ "follow", BOND_FOM_FOLLOW},
197{ NULL, -1},
198};
199
190/*-------------------------- Forward declarations ---------------------------*/ 200/*-------------------------- Forward declarations ---------------------------*/
191 201
192static void bond_send_gratuitous_arp(struct bonding *bond); 202static void bond_send_gratuitous_arp(struct bonding *bond);
@@ -261,14 +271,14 @@ static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
261 */ 271 */
262static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 272static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
263{ 273{
264 struct vlan_entry *vlan, *next; 274 struct vlan_entry *vlan;
265 int res = -ENODEV; 275 int res = -ENODEV;
266 276
267 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 277 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
268 278
269 write_lock_bh(&bond->lock); 279 write_lock_bh(&bond->lock);
270 280
271 list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { 281 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
272 if (vlan->vlan_id == vlan_id) { 282 if (vlan->vlan_id == vlan_id) {
273 list_del(&vlan->vlan_list); 283 list_del(&vlan->vlan_list);
274 284
@@ -970,6 +980,82 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct
970 } 980 }
971} 981}
972 982
983/*
984 * bond_do_fail_over_mac
985 *
986 * Perform special MAC address swapping for fail_over_mac settings
987 *
988 * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh.
989 */
990static void bond_do_fail_over_mac(struct bonding *bond,
991 struct slave *new_active,
992 struct slave *old_active)
993{
994 u8 tmp_mac[ETH_ALEN];
995 struct sockaddr saddr;
996 int rv;
997
998 switch (bond->params.fail_over_mac) {
999 case BOND_FOM_ACTIVE:
1000 if (new_active)
1001 memcpy(bond->dev->dev_addr, new_active->dev->dev_addr,
1002 new_active->dev->addr_len);
1003 break;
1004 case BOND_FOM_FOLLOW:
1005 /*
1006 * if new_active && old_active, swap them
1007 * if just old_active, do nothing (going to no active slave)
1008 * if just new_active, set new_active to bond's MAC
1009 */
1010 if (!new_active)
1011 return;
1012
1013 write_unlock_bh(&bond->curr_slave_lock);
1014 read_unlock(&bond->lock);
1015
1016 if (old_active) {
1017 memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN);
1018 memcpy(saddr.sa_data, old_active->dev->dev_addr,
1019 ETH_ALEN);
1020 saddr.sa_family = new_active->dev->type;
1021 } else {
1022 memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN);
1023 saddr.sa_family = bond->dev->type;
1024 }
1025
1026 rv = dev_set_mac_address(new_active->dev, &saddr);
1027 if (rv) {
1028 printk(KERN_ERR DRV_NAME
1029 ": %s: Error %d setting MAC of slave %s\n",
1030 bond->dev->name, -rv, new_active->dev->name);
1031 goto out;
1032 }
1033
1034 if (!old_active)
1035 goto out;
1036
1037 memcpy(saddr.sa_data, tmp_mac, ETH_ALEN);
1038 saddr.sa_family = old_active->dev->type;
1039
1040 rv = dev_set_mac_address(old_active->dev, &saddr);
1041 if (rv)
1042 printk(KERN_ERR DRV_NAME
1043 ": %s: Error %d setting MAC of slave %s\n",
1044 bond->dev->name, -rv, new_active->dev->name);
1045out:
1046 read_lock(&bond->lock);
1047 write_lock_bh(&bond->curr_slave_lock);
1048 break;
1049 default:
1050 printk(KERN_ERR DRV_NAME
1051 ": %s: bond_do_fail_over_mac impossible: bad policy %d\n",
1052 bond->dev->name, bond->params.fail_over_mac);
1053 break;
1054 }
1055
1056}
1057
1058
973/** 1059/**
974 * find_best_interface - select the best available slave to be the active one 1060 * find_best_interface - select the best available slave to be the active one
975 * @bond: our bonding struct 1061 * @bond: our bonding struct
@@ -1037,7 +1123,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
1037 * because it is apparently the best available slave we have, even though its 1123 * because it is apparently the best available slave we have, even though its
1038 * updelay hasn't timed out yet. 1124 * updelay hasn't timed out yet.
1039 * 1125 *
1040 * Warning: Caller must hold curr_slave_lock for writing. 1126 * If new_active is not NULL, caller must hold bond->lock for read and
1127 * curr_slave_lock for write_bh.
1041 */ 1128 */
1042void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1129void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1043{ 1130{
@@ -1048,6 +1135,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1048 } 1135 }
1049 1136
1050 if (new_active) { 1137 if (new_active) {
1138 new_active->jiffies = jiffies;
1139
1051 if (new_active->link == BOND_LINK_BACK) { 1140 if (new_active->link == BOND_LINK_BACK) {
1052 if (USES_PRIMARY(bond->params.mode)) { 1141 if (USES_PRIMARY(bond->params.mode)) {
1053 printk(KERN_INFO DRV_NAME 1142 printk(KERN_INFO DRV_NAME
@@ -1059,7 +1148,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1059 1148
1060 new_active->delay = 0; 1149 new_active->delay = 0;
1061 new_active->link = BOND_LINK_UP; 1150 new_active->link = BOND_LINK_UP;
1062 new_active->jiffies = jiffies;
1063 1151
1064 if (bond->params.mode == BOND_MODE_8023AD) { 1152 if (bond->params.mode == BOND_MODE_8023AD) {
1065 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1153 bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
@@ -1103,20 +1191,21 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1103 bond_set_slave_active_flags(new_active); 1191 bond_set_slave_active_flags(new_active);
1104 } 1192 }
1105 1193
1106 /* when bonding does not set the slave MAC address, the bond MAC
1107 * address is the one of the active slave.
1108 */
1109 if (new_active && bond->params.fail_over_mac) 1194 if (new_active && bond->params.fail_over_mac)
1110 memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, 1195 bond_do_fail_over_mac(bond, new_active, old_active);
1111 new_active->dev->addr_len); 1196
1197 bond->send_grat_arp = bond->params.num_grat_arp;
1112 if (bond->curr_active_slave && 1198 if (bond->curr_active_slave &&
1113 test_bit(__LINK_STATE_LINKWATCH_PENDING, 1199 test_bit(__LINK_STATE_LINKWATCH_PENDING,
1114 &bond->curr_active_slave->dev->state)) { 1200 &bond->curr_active_slave->dev->state)) {
1115 dprintk("delaying gratuitous arp on %s\n", 1201 dprintk("delaying gratuitous arp on %s\n",
1116 bond->curr_active_slave->dev->name); 1202 bond->curr_active_slave->dev->name);
1117 bond->send_grat_arp = 1; 1203 } else {
1118 } else 1204 if (bond->send_grat_arp > 0) {
1119 bond_send_gratuitous_arp(bond); 1205 bond_send_gratuitous_arp(bond);
1206 bond->send_grat_arp--;
1207 }
1208 }
1120 } 1209 }
1121} 1210}
1122 1211
@@ -1129,7 +1218,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1129 * - The primary_slave has got its link back. 1218 * - The primary_slave has got its link back.
1130 * - A slave has got its link back and there's no old curr_active_slave. 1219 * - A slave has got its link back and there's no old curr_active_slave.
1131 * 1220 *
1132 * Warning: Caller must hold curr_slave_lock for writing. 1221 * Caller must hold bond->lock for read and curr_slave_lock for write_bh.
1133 */ 1222 */
1134void bond_select_active_slave(struct bonding *bond) 1223void bond_select_active_slave(struct bonding *bond)
1135{ 1224{
@@ -1376,14 +1465,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1376 printk(KERN_WARNING DRV_NAME 1465 printk(KERN_WARNING DRV_NAME
1377 ": %s: Warning: The first slave device " 1466 ": %s: Warning: The first slave device "
1378 "specified does not support setting the MAC " 1467 "specified does not support setting the MAC "
1379 "address. Enabling the fail_over_mac option.", 1468 "address. Setting fail_over_mac to active.",
1380 bond_dev->name); 1469 bond_dev->name);
1381 bond->params.fail_over_mac = 1; 1470 bond->params.fail_over_mac = BOND_FOM_ACTIVE;
1382 } else if (!bond->params.fail_over_mac) { 1471 } else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
1383 printk(KERN_ERR DRV_NAME 1472 printk(KERN_ERR DRV_NAME
1384 ": %s: Error: The slave device specified " 1473 ": %s: Error: The slave device specified "
1385 "does not support setting the MAC address, " 1474 "does not support setting the MAC address, "
1386 "but fail_over_mac is not enabled.\n" 1475 "but fail_over_mac is not set to active.\n"
1387 , bond_dev->name); 1476 , bond_dev->name);
1388 res = -EOPNOTSUPP; 1477 res = -EOPNOTSUPP;
1389 goto err_undo_flags; 1478 goto err_undo_flags;
@@ -1490,6 +1579,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1490 1579
1491 bond_compute_features(bond); 1580 bond_compute_features(bond);
1492 1581
1582 write_unlock_bh(&bond->lock);
1583
1584 read_lock(&bond->lock);
1585
1493 new_slave->last_arp_rx = jiffies; 1586 new_slave->last_arp_rx = jiffies;
1494 1587
1495 if (bond->params.miimon && !bond->params.use_carrier) { 1588 if (bond->params.miimon && !bond->params.use_carrier) {
@@ -1566,6 +1659,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1566 } 1659 }
1567 } 1660 }
1568 1661
1662 write_lock_bh(&bond->curr_slave_lock);
1663
1569 switch (bond->params.mode) { 1664 switch (bond->params.mode) {
1570 case BOND_MODE_ACTIVEBACKUP: 1665 case BOND_MODE_ACTIVEBACKUP:
1571 bond_set_slave_inactive_flags(new_slave); 1666 bond_set_slave_inactive_flags(new_slave);
@@ -1613,9 +1708,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1613 break; 1708 break;
1614 } /* switch(bond_mode) */ 1709 } /* switch(bond_mode) */
1615 1710
1711 write_unlock_bh(&bond->curr_slave_lock);
1712
1616 bond_set_carrier(bond); 1713 bond_set_carrier(bond);
1617 1714
1618 write_unlock_bh(&bond->lock); 1715 read_unlock(&bond->lock);
1619 1716
1620 res = bond_create_slave_symlinks(bond_dev, slave_dev); 1717 res = bond_create_slave_symlinks(bond_dev, slave_dev);
1621 if (res) 1718 if (res)
@@ -1639,6 +1736,10 @@ err_unset_master:
1639 1736
1640err_restore_mac: 1737err_restore_mac:
1641 if (!bond->params.fail_over_mac) { 1738 if (!bond->params.fail_over_mac) {
1739 /* XXX TODO - fom follow mode needs to change master's
1740 * MAC if this slave's MAC is in use by the bond, or at
1741 * least print a warning.
1742 */
1642 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1743 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
1643 addr.sa_family = slave_dev->type; 1744 addr.sa_family = slave_dev->type;
1644 dev_set_mac_address(slave_dev, &addr); 1745 dev_set_mac_address(slave_dev, &addr);
@@ -1693,20 +1794,18 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1693 return -EINVAL; 1794 return -EINVAL;
1694 } 1795 }
1695 1796
1696 mac_addr_differ = memcmp(bond_dev->dev_addr, 1797 if (!bond->params.fail_over_mac) {
1697 slave->perm_hwaddr, 1798 mac_addr_differ = memcmp(bond_dev->dev_addr, slave->perm_hwaddr,
1698 ETH_ALEN); 1799 ETH_ALEN);
1699 if (!mac_addr_differ && (bond->slave_cnt > 1)) { 1800 if (!mac_addr_differ && (bond->slave_cnt > 1))
1700 printk(KERN_WARNING DRV_NAME 1801 printk(KERN_WARNING DRV_NAME
1701 ": %s: Warning: the permanent HWaddr of %s - " 1802 ": %s: Warning: the permanent HWaddr of %s - "
1702 "%s - is still in use by %s. " 1803 "%s - is still in use by %s. "
1703 "Set the HWaddr of %s to a different address " 1804 "Set the HWaddr of %s to a different address "
1704 "to avoid conflicts.\n", 1805 "to avoid conflicts.\n",
1705 bond_dev->name, 1806 bond_dev->name, slave_dev->name,
1706 slave_dev->name, 1807 print_mac(mac, slave->perm_hwaddr),
1707 print_mac(mac, slave->perm_hwaddr), 1808 bond_dev->name, slave_dev->name);
1708 bond_dev->name,
1709 slave_dev->name);
1710 } 1809 }
1711 1810
1712 /* Inform AD package of unbinding of slave. */ 1811 /* Inform AD package of unbinding of slave. */
@@ -1833,7 +1932,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1833 /* close slave before restoring its mac address */ 1932 /* close slave before restoring its mac address */
1834 dev_close(slave_dev); 1933 dev_close(slave_dev);
1835 1934
1836 if (!bond->params.fail_over_mac) { 1935 if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
1837 /* restore original ("permanent") mac address */ 1936 /* restore original ("permanent") mac address */
1838 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1937 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
1839 addr.sa_family = slave_dev->type; 1938 addr.sa_family = slave_dev->type;
@@ -2144,7 +2243,7 @@ static int __bond_mii_monitor(struct bonding *bond, int have_locks)
2144 dprintk("sending delayed gratuitous arp on on %s\n", 2243 dprintk("sending delayed gratuitous arp on on %s\n",
2145 bond->curr_active_slave->dev->name); 2244 bond->curr_active_slave->dev->name);
2146 bond_send_gratuitous_arp(bond); 2245 bond_send_gratuitous_arp(bond);
2147 bond->send_grat_arp = 0; 2246 bond->send_grat_arp--;
2148 } 2247 }
2149 } 2248 }
2150 read_lock(&bond->curr_slave_lock); 2249 read_lock(&bond->curr_slave_lock);
@@ -2397,7 +2496,7 @@ void bond_mii_monitor(struct work_struct *work)
2397 read_lock(&bond->lock); 2496 read_lock(&bond->lock);
2398 } 2497 }
2399 2498
2400 delay = ((bond->params.miimon * HZ) / 1000) ? : 1; 2499 delay = msecs_to_jiffies(bond->params.miimon);
2401 read_unlock(&bond->lock); 2500 read_unlock(&bond->lock);
2402 queue_delayed_work(bond->wq, &bond->mii_work, delay); 2501 queue_delayed_work(bond->wq, &bond->mii_work, delay);
2403} 2502}
@@ -2426,37 +2525,14 @@ out:
2426 return addr; 2525 return addr;
2427} 2526}
2428 2527
2429static int bond_has_ip(struct bonding *bond)
2430{
2431 struct vlan_entry *vlan, *vlan_next;
2432
2433 if (bond->master_ip)
2434 return 1;
2435
2436 if (list_empty(&bond->vlan_list))
2437 return 0;
2438
2439 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
2440 vlan_list) {
2441 if (vlan->vlan_ip)
2442 return 1;
2443 }
2444
2445 return 0;
2446}
2447
2448static int bond_has_this_ip(struct bonding *bond, __be32 ip) 2528static int bond_has_this_ip(struct bonding *bond, __be32 ip)
2449{ 2529{
2450 struct vlan_entry *vlan, *vlan_next; 2530 struct vlan_entry *vlan;
2451 2531
2452 if (ip == bond->master_ip) 2532 if (ip == bond->master_ip)
2453 return 1; 2533 return 1;
2454 2534
2455 if (list_empty(&bond->vlan_list)) 2535 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2456 return 0;
2457
2458 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
2459 vlan_list) {
2460 if (ip == vlan->vlan_ip) 2536 if (ip == vlan->vlan_ip)
2461 return 1; 2537 return 1;
2462 } 2538 }
@@ -2498,7 +2574,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
2498{ 2574{
2499 int i, vlan_id, rv; 2575 int i, vlan_id, rv;
2500 __be32 *targets = bond->params.arp_targets; 2576 __be32 *targets = bond->params.arp_targets;
2501 struct vlan_entry *vlan, *vlan_next; 2577 struct vlan_entry *vlan;
2502 struct net_device *vlan_dev; 2578 struct net_device *vlan_dev;
2503 struct flowi fl; 2579 struct flowi fl;
2504 struct rtable *rt; 2580 struct rtable *rt;
@@ -2545,8 +2621,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
2545 } 2621 }
2546 2622
2547 vlan_id = 0; 2623 vlan_id = 0;
2548 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2624 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2549 vlan_list) {
2550 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2625 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
2551 if (vlan_dev == rt->u.dst.dev) { 2626 if (vlan_dev == rt->u.dst.dev) {
2552 vlan_id = vlan->vlan_id; 2627 vlan_id = vlan->vlan_id;
@@ -2707,7 +2782,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
2707 2782
2708 read_lock(&bond->lock); 2783 read_lock(&bond->lock);
2709 2784
2710 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2785 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
2711 2786
2712 if (bond->kill_timers) { 2787 if (bond->kill_timers) {
2713 goto out; 2788 goto out;
@@ -2764,8 +2839,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
2764 * if we don't know our ip yet 2839 * if we don't know our ip yet
2765 */ 2840 */
2766 if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || 2841 if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
2767 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks) && 2842 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) {
2768 bond_has_ip(bond))) {
2769 2843
2770 slave->link = BOND_LINK_DOWN; 2844 slave->link = BOND_LINK_DOWN;
2771 slave->state = BOND_STATE_BACKUP; 2845 slave->state = BOND_STATE_BACKUP;
@@ -2813,246 +2887,299 @@ out:
2813} 2887}
2814 2888
2815/* 2889/*
2816 * When using arp monitoring in active-backup mode, this function is 2890 * Called to inspect slaves for active-backup mode ARP monitor link state
2817 * called to determine if any backup slaves have went down or a new 2891 * changes. Sets new_link in slaves to specify what action should take
2818 * current slave needs to be found. 2892 * place for the slave. Returns 0 if no changes are found, >0 if changes
2819 * The backup slaves never generate traffic, they are considered up by merely 2893 * to link states must be committed.
2820 * receiving traffic. If the current slave goes down, each backup slave will 2894 *
2821 * be given the opportunity to tx/rx an arp before being taken down - this 2895 * Called with bond->lock held for read.
2822 * prevents all slaves from being taken down due to the current slave not
2823 * sending any traffic for the backups to receive. The arps are not necessarily
2824 * necessary, any tx and rx traffic will keep the current slave up. While any
2825 * rx traffic will keep the backup slaves up, the current slave is responsible
2826 * for generating traffic to keep them up regardless of any other traffic they
2827 * may have received.
2828 * see loadbalance_arp_monitor for arp monitoring in load balancing mode
2829 */ 2896 */
2830void bond_activebackup_arp_mon(struct work_struct *work) 2897static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2831{ 2898{
2832 struct bonding *bond = container_of(work, struct bonding,
2833 arp_work.work);
2834 struct slave *slave; 2899 struct slave *slave;
2835 int delta_in_ticks; 2900 int i, commit = 0;
2836 int i;
2837 2901
2838 read_lock(&bond->lock); 2902 bond_for_each_slave(bond, slave, i) {
2903 slave->new_link = BOND_LINK_NOCHANGE;
2839 2904
2840 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2905 if (slave->link != BOND_LINK_UP) {
2906 if (time_before_eq(jiffies, slave_last_rx(bond, slave) +
2907 delta_in_ticks)) {
2908 slave->new_link = BOND_LINK_UP;
2909 commit++;
2910 }
2841 2911
2842 if (bond->kill_timers) { 2912 continue;
2843 goto out; 2913 }
2844 }
2845 2914
2846 if (bond->slave_cnt == 0) { 2915 /*
2847 goto re_arm; 2916 * Give slaves 2*delta after being enslaved or made
2917 * active. This avoids bouncing, as the last receive
2918 * times need a full ARP monitor cycle to be updated.
2919 */
2920 if (!time_after_eq(jiffies, slave->jiffies +
2921 2 * delta_in_ticks))
2922 continue;
2923
2924 /*
2925 * Backup slave is down if:
2926 * - No current_arp_slave AND
2927 * - more than 3*delta since last receive AND
2928 * - the bond has an IP address
2929 *
2930 * Note: a non-null current_arp_slave indicates
2931 * the curr_active_slave went down and we are
2932 * searching for a new one; under this condition
2933 * we only take the curr_active_slave down - this
2934 * gives each slave a chance to tx/rx traffic
2935 * before being taken out
2936 */
2937 if (slave->state == BOND_STATE_BACKUP &&
2938 !bond->current_arp_slave &&
2939 time_after(jiffies, slave_last_rx(bond, slave) +
2940 3 * delta_in_ticks)) {
2941 slave->new_link = BOND_LINK_DOWN;
2942 commit++;
2943 }
2944
2945 /*
2946 * Active slave is down if:
2947 * - more than 2*delta since transmitting OR
2948 * - (more than 2*delta since receive AND
2949 * the bond has an IP address)
2950 */
2951 if ((slave->state == BOND_STATE_ACTIVE) &&
2952 (time_after_eq(jiffies, slave->dev->trans_start +
2953 2 * delta_in_ticks) ||
2954 (time_after_eq(jiffies, slave_last_rx(bond, slave)
2955 + 2 * delta_in_ticks)))) {
2956 slave->new_link = BOND_LINK_DOWN;
2957 commit++;
2958 }
2848 } 2959 }
2849 2960
2850 /* determine if any slave has come up or any backup slave has 2961 read_lock(&bond->curr_slave_lock);
2851 * gone down 2962
2852 * TODO: what about up/down delay in arp mode? it wasn't here before 2963 /*
2853 * so it can wait 2964 * Trigger a commit if the primary option setting has changed.
2854 */ 2965 */
2855 bond_for_each_slave(bond, slave, i) { 2966 if (bond->primary_slave &&
2856 if (slave->link != BOND_LINK_UP) { 2967 (bond->primary_slave != bond->curr_active_slave) &&
2857 if (time_before_eq(jiffies, 2968 (bond->primary_slave->link == BOND_LINK_UP))
2858 slave_last_rx(bond, slave) + delta_in_ticks)) { 2969 commit++;
2859 2970
2860 slave->link = BOND_LINK_UP; 2971 read_unlock(&bond->curr_slave_lock);
2861 2972
2862 write_lock_bh(&bond->curr_slave_lock); 2973 return commit;
2974}
2863 2975
2864 if ((!bond->curr_active_slave) && 2976/*
2865 time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) { 2977 * Called to commit link state changes noted by inspection step of
2866 bond_change_active_slave(bond, slave); 2978 * active-backup mode ARP monitor.
2867 bond->current_arp_slave = NULL; 2979 *
2868 } else if (bond->curr_active_slave != slave) { 2980 * Called with RTNL and bond->lock for read.
2869 /* this slave has just come up but we 2981 */
2870 * already have a current slave; this 2982static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
2871 * can also happen if bond_enslave adds 2983{
2872 * a new slave that is up while we are 2984 struct slave *slave;
2873 * searching for a new slave 2985 int i;
2874 */
2875 bond_set_slave_inactive_flags(slave);
2876 bond->current_arp_slave = NULL;
2877 }
2878 2986
2879 bond_set_carrier(bond); 2987 bond_for_each_slave(bond, slave, i) {
2988 switch (slave->new_link) {
2989 case BOND_LINK_NOCHANGE:
2990 continue;
2880 2991
2881 if (slave == bond->curr_active_slave) { 2992 case BOND_LINK_UP:
2882 printk(KERN_INFO DRV_NAME 2993 write_lock_bh(&bond->curr_slave_lock);
2883 ": %s: %s is up and now the "
2884 "active interface\n",
2885 bond->dev->name,
2886 slave->dev->name);
2887 netif_carrier_on(bond->dev);
2888 } else {
2889 printk(KERN_INFO DRV_NAME
2890 ": %s: backup interface %s is "
2891 "now up\n",
2892 bond->dev->name,
2893 slave->dev->name);
2894 }
2895 2994
2896 write_unlock_bh(&bond->curr_slave_lock); 2995 if (!bond->curr_active_slave &&
2897 } 2996 time_before_eq(jiffies, slave->dev->trans_start +
2898 } else { 2997 delta_in_ticks)) {
2899 read_lock(&bond->curr_slave_lock); 2998 slave->link = BOND_LINK_UP;
2999 bond_change_active_slave(bond, slave);
3000 bond->current_arp_slave = NULL;
2900 3001
2901 if ((slave != bond->curr_active_slave) && 3002 printk(KERN_INFO DRV_NAME
2902 (!bond->current_arp_slave) && 3003 ": %s: %s is up and now the "
2903 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks) && 3004 "active interface\n",
2904 bond_has_ip(bond))) { 3005 bond->dev->name, slave->dev->name);
2905 /* a backup slave has gone down; three times 3006
2906 * the delta allows the current slave to be 3007 } else if (bond->curr_active_slave != slave) {
2907 * taken out before the backup slave. 3008 /* this slave has just come up but we
2908 * note: a non-null current_arp_slave indicates 3009 * already have a current slave; this can
2909 * the curr_active_slave went down and we are 3010 * also happen if bond_enslave adds a new
2910 * searching for a new one; under this 3011 * slave that is up while we are searching
2911 * condition we only take the curr_active_slave 3012 * for a new slave
2912 * down - this gives each slave a chance to
2913 * tx/rx traffic before being taken out
2914 */ 3013 */
3014 slave->link = BOND_LINK_UP;
3015 bond_set_slave_inactive_flags(slave);
3016 bond->current_arp_slave = NULL;
2915 3017
2916 read_unlock(&bond->curr_slave_lock); 3018 printk(KERN_INFO DRV_NAME
3019 ": %s: backup interface %s is now up\n",
3020 bond->dev->name, slave->dev->name);
3021 }
2917 3022
2918 slave->link = BOND_LINK_DOWN; 3023 write_unlock_bh(&bond->curr_slave_lock);
2919 3024
2920 if (slave->link_failure_count < UINT_MAX) { 3025 break;
2921 slave->link_failure_count++; 3026
2922 } 3027 case BOND_LINK_DOWN:
3028 if (slave->link_failure_count < UINT_MAX)
3029 slave->link_failure_count++;
3030
3031 slave->link = BOND_LINK_DOWN;
3032
3033 if (slave == bond->curr_active_slave) {
3034 printk(KERN_INFO DRV_NAME
3035 ": %s: link status down for active "
3036 "interface %s, disabling it\n",
3037 bond->dev->name, slave->dev->name);
2923 3038
2924 bond_set_slave_inactive_flags(slave); 3039 bond_set_slave_inactive_flags(slave);
2925 3040
3041 write_lock_bh(&bond->curr_slave_lock);
3042
3043 bond_select_active_slave(bond);
3044 if (bond->curr_active_slave)
3045 bond->curr_active_slave->jiffies =
3046 jiffies;
3047
3048 write_unlock_bh(&bond->curr_slave_lock);
3049
3050 bond->current_arp_slave = NULL;
3051
3052 } else if (slave->state == BOND_STATE_BACKUP) {
2926 printk(KERN_INFO DRV_NAME 3053 printk(KERN_INFO DRV_NAME
2927 ": %s: backup interface %s is now down\n", 3054 ": %s: backup interface %s is now down\n",
2928 bond->dev->name, 3055 bond->dev->name, slave->dev->name);
2929 slave->dev->name); 3056
2930 } else { 3057 bond_set_slave_inactive_flags(slave);
2931 read_unlock(&bond->curr_slave_lock);
2932 } 3058 }
3059 break;
3060
3061 default:
3062 printk(KERN_ERR DRV_NAME
3063 ": %s: impossible: new_link %d on slave %s\n",
3064 bond->dev->name, slave->new_link,
3065 slave->dev->name);
2933 } 3066 }
2934 } 3067 }
2935 3068
2936 read_lock(&bond->curr_slave_lock); 3069 /*
2937 slave = bond->curr_active_slave; 3070 * No race with changes to primary via sysfs, as we hold rtnl.
2938 read_unlock(&bond->curr_slave_lock); 3071 */
2939 3072 if (bond->primary_slave &&
2940 if (slave) { 3073 (bond->primary_slave != bond->curr_active_slave) &&
2941 /* if we have sent traffic in the past 2*arp_intervals but 3074 (bond->primary_slave->link == BOND_LINK_UP)) {
2942 * haven't xmit and rx traffic in that time interval, select 3075 write_lock_bh(&bond->curr_slave_lock);
2943 * a different slave. slave->jiffies is only updated when 3076 bond_change_active_slave(bond, bond->primary_slave);
2944 * a slave first becomes the curr_active_slave - not necessarily 3077 write_unlock_bh(&bond->curr_slave_lock);
2945 * after every arp; this ensures the slave has a full 2*delta 3078 }
2946 * before being taken out. if a primary is being used, check
2947 * if it is up and needs to take over as the curr_active_slave
2948 */
2949 if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
2950 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks) &&
2951 bond_has_ip(bond))) &&
2952 time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) {
2953 3079
2954 slave->link = BOND_LINK_DOWN; 3080 bond_set_carrier(bond);
3081}
2955 3082
2956 if (slave->link_failure_count < UINT_MAX) { 3083/*
2957 slave->link_failure_count++; 3084 * Send ARP probes for active-backup mode ARP monitor.
2958 } 3085 *
3086 * Called with bond->lock held for read.
3087 */
3088static void bond_ab_arp_probe(struct bonding *bond)
3089{
3090 struct slave *slave;
3091 int i;
2959 3092
2960 printk(KERN_INFO DRV_NAME 3093 read_lock(&bond->curr_slave_lock);
2961 ": %s: link status down for active interface "
2962 "%s, disabling it\n",
2963 bond->dev->name,
2964 slave->dev->name);
2965 3094
2966 write_lock_bh(&bond->curr_slave_lock); 3095 if (bond->current_arp_slave && bond->curr_active_slave)
3096 printk("PROBE: c_arp %s && cas %s BAD\n",
3097 bond->current_arp_slave->dev->name,
3098 bond->curr_active_slave->dev->name);
2967 3099
2968 bond_select_active_slave(bond); 3100 if (bond->curr_active_slave) {
2969 slave = bond->curr_active_slave; 3101 bond_arp_send_all(bond, bond->curr_active_slave);
3102 read_unlock(&bond->curr_slave_lock);
3103 return;
3104 }
2970 3105
2971 write_unlock_bh(&bond->curr_slave_lock); 3106 read_unlock(&bond->curr_slave_lock);
2972 3107
2973 bond->current_arp_slave = slave; 3108 /* if we don't have a curr_active_slave, search for the next available
3109 * backup slave from the current_arp_slave and make it the candidate
3110 * for becoming the curr_active_slave
3111 */
2974 3112
2975 if (slave) { 3113 if (!bond->current_arp_slave) {
2976 slave->jiffies = jiffies; 3114 bond->current_arp_slave = bond->first_slave;
2977 } 3115 if (!bond->current_arp_slave)
2978 } else if ((bond->primary_slave) && 3116 return;
2979 (bond->primary_slave != slave) && 3117 }
2980 (bond->primary_slave->link == BOND_LINK_UP)) {
2981 /* at this point, slave is the curr_active_slave */
2982 printk(KERN_INFO DRV_NAME
2983 ": %s: changing from interface %s to primary "
2984 "interface %s\n",
2985 bond->dev->name,
2986 slave->dev->name,
2987 bond->primary_slave->dev->name);
2988 3118
2989 /* primary is up so switch to it */ 3119 bond_set_slave_inactive_flags(bond->current_arp_slave);
2990 write_lock_bh(&bond->curr_slave_lock);
2991 bond_change_active_slave(bond, bond->primary_slave);
2992 write_unlock_bh(&bond->curr_slave_lock);
2993 3120
2994 slave = bond->primary_slave; 3121 /* search for next candidate */
3122 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) {
3123 if (IS_UP(slave->dev)) {
3124 slave->link = BOND_LINK_BACK;
3125 bond_set_slave_active_flags(slave);
3126 bond_arp_send_all(bond, slave);
2995 slave->jiffies = jiffies; 3127 slave->jiffies = jiffies;
2996 } else { 3128 bond->current_arp_slave = slave;
2997 bond->current_arp_slave = NULL; 3129 break;
2998 } 3130 }
2999 3131
3000 /* the current slave must tx an arp to ensure backup slaves 3132 /* if the link state is up at this point, we
3001 * rx traffic 3133 * mark it down - this can happen if we have
3134 * simultaneous link failures and
3135 * reselect_active_interface doesn't make this
3136 * one the current slave so it is still marked
3137 * up when it is actually down
3002 */ 3138 */
3003 if (slave && bond_has_ip(bond)) { 3139 if (slave->link == BOND_LINK_UP) {
3004 bond_arp_send_all(bond, slave); 3140 slave->link = BOND_LINK_DOWN;
3141 if (slave->link_failure_count < UINT_MAX)
3142 slave->link_failure_count++;
3143
3144 bond_set_slave_inactive_flags(slave);
3145
3146 printk(KERN_INFO DRV_NAME
3147 ": %s: backup interface %s is now down.\n",
3148 bond->dev->name, slave->dev->name);
3005 } 3149 }
3006 } 3150 }
3151}
3007 3152
3008 /* if we don't have a curr_active_slave, search for the next available 3153void bond_activebackup_arp_mon(struct work_struct *work)
3009 * backup slave from the current_arp_slave and make it the candidate 3154{
3010 * for becoming the curr_active_slave 3155 struct bonding *bond = container_of(work, struct bonding,
3011 */ 3156 arp_work.work);
3012 if (!slave) { 3157 int delta_in_ticks;
3013 if (!bond->current_arp_slave) {
3014 bond->current_arp_slave = bond->first_slave;
3015 }
3016 3158
3017 if (bond->current_arp_slave) { 3159 read_lock(&bond->lock);
3018 bond_set_slave_inactive_flags(bond->current_arp_slave);
3019 3160
3020 /* search for next candidate */ 3161 if (bond->kill_timers)
3021 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 3162 goto out;
3022 if (IS_UP(slave->dev)) {
3023 slave->link = BOND_LINK_BACK;
3024 bond_set_slave_active_flags(slave);
3025 bond_arp_send_all(bond, slave);
3026 slave->jiffies = jiffies;
3027 bond->current_arp_slave = slave;
3028 break;
3029 }
3030 3163
3031 /* if the link state is up at this point, we 3164 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
3032 * mark it down - this can happen if we have
3033 * simultaneous link failures and
3034 * reselect_active_interface doesn't make this
3035 * one the current slave so it is still marked
3036 * up when it is actually down
3037 */
3038 if (slave->link == BOND_LINK_UP) {
3039 slave->link = BOND_LINK_DOWN;
3040 if (slave->link_failure_count < UINT_MAX) {
3041 slave->link_failure_count++;
3042 }
3043 3165
3044 bond_set_slave_inactive_flags(slave); 3166 if (bond->slave_cnt == 0)
3167 goto re_arm;
3045 3168
3046 printk(KERN_INFO DRV_NAME 3169 if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
3047 ": %s: backup interface %s is " 3170 read_unlock(&bond->lock);
3048 "now down.\n", 3171 rtnl_lock();
3049 bond->dev->name, 3172 read_lock(&bond->lock);
3050 slave->dev->name); 3173
3051 } 3174 bond_ab_arp_commit(bond, delta_in_ticks);
3052 } 3175
3053 } 3176 read_unlock(&bond->lock);
3177 rtnl_unlock();
3178 read_lock(&bond->lock);
3054 } 3179 }
3055 3180
3181 bond_ab_arp_probe(bond);
3182
3056re_arm: 3183re_arm:
3057 if (bond->params.arp_interval) { 3184 if (bond->params.arp_interval) {
3058 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 3185 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
@@ -3128,7 +3255,8 @@ static void bond_info_show_master(struct seq_file *seq)
3128 3255
3129 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && 3256 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
3130 bond->params.fail_over_mac) 3257 bond->params.fail_over_mac)
3131 seq_printf(seq, " (fail_over_mac)"); 3258 seq_printf(seq, " (fail_over_mac %s)",
3259 fail_over_mac_tbl[bond->params.fail_over_mac].modename);
3132 3260
3133 seq_printf(seq, "\n"); 3261 seq_printf(seq, "\n");
3134 3262
@@ -3500,13 +3628,13 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event,
3500{ 3628{
3501 struct in_ifaddr *ifa = ptr; 3629 struct in_ifaddr *ifa = ptr;
3502 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3630 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev;
3503 struct bonding *bond, *bond_next; 3631 struct bonding *bond;
3504 struct vlan_entry *vlan, *vlan_next; 3632 struct vlan_entry *vlan;
3505 3633
3506 if (dev_net(ifa->ifa_dev->dev) != &init_net) 3634 if (dev_net(ifa->ifa_dev->dev) != &init_net)
3507 return NOTIFY_DONE; 3635 return NOTIFY_DONE;
3508 3636
3509 list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { 3637 list_for_each_entry(bond, &bond_dev_list, bond_list) {
3510 if (bond->dev == event_dev) { 3638 if (bond->dev == event_dev) {
3511 switch (event) { 3639 switch (event) {
3512 case NETDEV_UP: 3640 case NETDEV_UP:
@@ -3520,11 +3648,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event,
3520 } 3648 }
3521 } 3649 }
3522 3650
3523 if (list_empty(&bond->vlan_list)) 3651 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
3524 continue;
3525
3526 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
3527 vlan_list) {
3528 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 3652 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
3529 if (vlan_dev == event_dev) { 3653 if (vlan_dev == event_dev) {
3530 switch (event) { 3654 switch (event) {
@@ -4060,10 +4184,10 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
4060 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); 4184 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
4061 4185
4062 /* 4186 /*
4063 * If fail_over_mac is enabled, do nothing and return success. 4187 * If fail_over_mac is set to active, do nothing and return
4064 * Returning an error causes ifenslave to fail. 4188 * success. Returning an error causes ifenslave to fail.
4065 */ 4189 */
4066 if (bond->params.fail_over_mac) 4190 if (bond->params.fail_over_mac == BOND_FOM_ACTIVE)
4067 return 0; 4191 return 0;
4068 4192
4069 if (!is_valid_ether_addr(sa->sa_data)) { 4193 if (!is_valid_ether_addr(sa->sa_data)) {
@@ -4568,7 +4692,7 @@ int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl)
4568 4692
4569static int bond_check_params(struct bond_params *params) 4693static int bond_check_params(struct bond_params *params)
4570{ 4694{
4571 int arp_validate_value; 4695 int arp_validate_value, fail_over_mac_value;
4572 4696
4573 /* 4697 /*
4574 * Convert string parameters. 4698 * Convert string parameters.
@@ -4658,6 +4782,13 @@ static int bond_check_params(struct bond_params *params)
4658 use_carrier = 1; 4782 use_carrier = 1;
4659 } 4783 }
4660 4784
4785 if (num_grat_arp < 0 || num_grat_arp > 255) {
4786 printk(KERN_WARNING DRV_NAME
4787 ": Warning: num_grat_arp (%d) not in range 0-255 so it "
4788 "was reset to 1 \n", num_grat_arp);
4789 num_grat_arp = 1;
4790 }
4791
4661 /* reset values for 802.3ad */ 4792 /* reset values for 802.3ad */
4662 if (bond_mode == BOND_MODE_8023AD) { 4793 if (bond_mode == BOND_MODE_8023AD) {
4663 if (!miimon) { 4794 if (!miimon) {
@@ -4836,15 +4967,29 @@ static int bond_check_params(struct bond_params *params)
4836 primary = NULL; 4967 primary = NULL;
4837 } 4968 }
4838 4969
4839 if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP)) 4970 if (fail_over_mac) {
4840 printk(KERN_WARNING DRV_NAME 4971 fail_over_mac_value = bond_parse_parm(fail_over_mac,
4841 ": Warning: fail_over_mac only affects " 4972 fail_over_mac_tbl);
4842 "active-backup mode.\n"); 4973 if (fail_over_mac_value == -1) {
4974 printk(KERN_ERR DRV_NAME
4975 ": Error: invalid fail_over_mac \"%s\"\n",
4976 arp_validate == NULL ? "NULL" : arp_validate);
4977 return -EINVAL;
4978 }
4979
4980 if (bond_mode != BOND_MODE_ACTIVEBACKUP)
4981 printk(KERN_WARNING DRV_NAME
4982 ": Warning: fail_over_mac only affects "
4983 "active-backup mode.\n");
4984 } else {
4985 fail_over_mac_value = BOND_FOM_NONE;
4986 }
4843 4987
4844 /* fill params struct with the proper values */ 4988 /* fill params struct with the proper values */
4845 params->mode = bond_mode; 4989 params->mode = bond_mode;
4846 params->xmit_policy = xmit_hashtype; 4990 params->xmit_policy = xmit_hashtype;
4847 params->miimon = miimon; 4991 params->miimon = miimon;
4992 params->num_grat_arp = num_grat_arp;
4848 params->arp_interval = arp_interval; 4993 params->arp_interval = arp_interval;
4849 params->arp_validate = arp_validate_value; 4994 params->arp_validate = arp_validate_value;
4850 params->updelay = updelay; 4995 params->updelay = updelay;
@@ -4852,7 +4997,7 @@ static int bond_check_params(struct bond_params *params)
4852 params->use_carrier = use_carrier; 4997 params->use_carrier = use_carrier;
4853 params->lacp_fast = lacp_fast; 4998 params->lacp_fast = lacp_fast;
4854 params->primary[0] = 0; 4999 params->primary[0] = 0;
4855 params->fail_over_mac = fail_over_mac; 5000 params->fail_over_mac = fail_over_mac_value;
4856 5001
4857 if (primary) { 5002 if (primary) {
4858 strncpy(params->primary, primary, IFNAMSIZ); 5003 strncpy(params->primary, primary, IFNAMSIZ);
@@ -4871,10 +5016,10 @@ static struct lock_class_key bonding_netdev_xmit_lock_key;
4871 * Caller must NOT hold rtnl_lock; we need to release it here before we 5016 * Caller must NOT hold rtnl_lock; we need to release it here before we
4872 * set up our sysfs entries. 5017 * set up our sysfs entries.
4873 */ 5018 */
4874int bond_create(char *name, struct bond_params *params, struct bonding **newbond) 5019int bond_create(char *name, struct bond_params *params)
4875{ 5020{
4876 struct net_device *bond_dev; 5021 struct net_device *bond_dev;
4877 struct bonding *bond, *nxt; 5022 struct bonding *bond;
4878 int res; 5023 int res;
4879 5024
4880 rtnl_lock(); 5025 rtnl_lock();
@@ -4882,7 +5027,7 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond
4882 5027
4883 /* Check to see if the bond already exists. */ 5028 /* Check to see if the bond already exists. */
4884 if (name) { 5029 if (name) {
4885 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) 5030 list_for_each_entry(bond, &bond_dev_list, bond_list)
4886 if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) { 5031 if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) {
4887 printk(KERN_ERR DRV_NAME 5032 printk(KERN_ERR DRV_NAME
4888 ": cannot add bond %s; it already exists\n", 5033 ": cannot add bond %s; it already exists\n",
@@ -4925,9 +5070,6 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond
4925 5070
4926 lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key); 5071 lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key);
4927 5072
4928 if (newbond)
4929 *newbond = bond_dev->priv;
4930
4931 netif_carrier_off(bond_dev); 5073 netif_carrier_off(bond_dev);
4932 5074
4933 up_write(&bonding_rwsem); 5075 up_write(&bonding_rwsem);
@@ -4957,7 +5099,7 @@ static int __init bonding_init(void)
4957{ 5099{
4958 int i; 5100 int i;
4959 int res; 5101 int res;
4960 struct bonding *bond, *nxt; 5102 struct bonding *bond;
4961 5103
4962 printk(KERN_INFO "%s", version); 5104 printk(KERN_INFO "%s", version);
4963 5105
@@ -4973,7 +5115,7 @@ static int __init bonding_init(void)
4973 init_rwsem(&bonding_rwsem); 5115 init_rwsem(&bonding_rwsem);
4974 5116
4975 for (i = 0; i < max_bonds; i++) { 5117 for (i = 0; i < max_bonds; i++) {
4976 res = bond_create(NULL, &bonding_defaults, NULL); 5118 res = bond_create(NULL, &bonding_defaults);
4977 if (res) 5119 if (res)
4978 goto err; 5120 goto err;
4979 } 5121 }
@@ -4987,7 +5129,7 @@ static int __init bonding_init(void)
4987 5129
4988 goto out; 5130 goto out;
4989err: 5131err:
4990 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 5132 list_for_each_entry(bond, &bond_dev_list, bond_list) {
4991 bond_work_cancel_all(bond); 5133 bond_work_cancel_all(bond);
4992 destroy_workqueue(bond->wq); 5134 destroy_workqueue(bond->wq);
4993 } 5135 }
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 08f3d396bcd6..dd265c69b0df 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -50,6 +50,7 @@ extern struct bond_parm_tbl bond_mode_tbl[];
50extern struct bond_parm_tbl bond_lacp_tbl[]; 50extern struct bond_parm_tbl bond_lacp_tbl[];
51extern struct bond_parm_tbl xmit_hashtype_tbl[]; 51extern struct bond_parm_tbl xmit_hashtype_tbl[];
52extern struct bond_parm_tbl arp_validate_tbl[]; 52extern struct bond_parm_tbl arp_validate_tbl[];
53extern struct bond_parm_tbl fail_over_mac_tbl[];
53 54
54static int expected_refcount = -1; 55static int expected_refcount = -1;
55static struct class *netdev_class; 56static struct class *netdev_class;
@@ -111,7 +112,6 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t
111 char *ifname; 112 char *ifname;
112 int rv, res = count; 113 int rv, res = count;
113 struct bonding *bond; 114 struct bonding *bond;
114 struct bonding *nxt;
115 115
116 sscanf(buffer, "%16s", command); /* IFNAMSIZ*/ 116 sscanf(buffer, "%16s", command); /* IFNAMSIZ*/
117 ifname = command + 1; 117 ifname = command + 1;
@@ -122,7 +122,7 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t
122 if (command[0] == '+') { 122 if (command[0] == '+') {
123 printk(KERN_INFO DRV_NAME 123 printk(KERN_INFO DRV_NAME
124 ": %s is being created...\n", ifname); 124 ": %s is being created...\n", ifname);
125 rv = bond_create(ifname, &bonding_defaults, &bond); 125 rv = bond_create(ifname, &bonding_defaults);
126 if (rv) { 126 if (rv) {
127 printk(KERN_INFO DRV_NAME ": Bond creation failed.\n"); 127 printk(KERN_INFO DRV_NAME ": Bond creation failed.\n");
128 res = rv; 128 res = rv;
@@ -134,7 +134,7 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t
134 rtnl_lock(); 134 rtnl_lock();
135 down_write(&bonding_rwsem); 135 down_write(&bonding_rwsem);
136 136
137 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) 137 list_for_each_entry(bond, &bond_dev_list, bond_list)
138 if (strnicmp(bond->dev->name, ifname, IFNAMSIZ) == 0) { 138 if (strnicmp(bond->dev->name, ifname, IFNAMSIZ) == 0) {
139 /* check the ref count on the bond's kobject. 139 /* check the ref count on the bond's kobject.
140 * If it's > expected, then there's a file open, 140 * If it's > expected, then there's a file open,
@@ -548,42 +548,37 @@ static ssize_t bonding_show_fail_over_mac(struct device *d, struct device_attrib
548{ 548{
549 struct bonding *bond = to_bond(d); 549 struct bonding *bond = to_bond(d);
550 550
551 return sprintf(buf, "%d\n", bond->params.fail_over_mac) + 1; 551 return sprintf(buf, "%s %d\n",
552 fail_over_mac_tbl[bond->params.fail_over_mac].modename,
553 bond->params.fail_over_mac);
552} 554}
553 555
554static ssize_t bonding_store_fail_over_mac(struct device *d, struct device_attribute *attr, const char *buf, size_t count) 556static ssize_t bonding_store_fail_over_mac(struct device *d, struct device_attribute *attr, const char *buf, size_t count)
555{ 557{
556 int new_value; 558 int new_value;
557 int ret = count;
558 struct bonding *bond = to_bond(d); 559 struct bonding *bond = to_bond(d);
559 560
560 if (bond->slave_cnt != 0) { 561 if (bond->slave_cnt != 0) {
561 printk(KERN_ERR DRV_NAME 562 printk(KERN_ERR DRV_NAME
562 ": %s: Can't alter fail_over_mac with slaves in bond.\n", 563 ": %s: Can't alter fail_over_mac with slaves in bond.\n",
563 bond->dev->name); 564 bond->dev->name);
564 ret = -EPERM; 565 return -EPERM;
565 goto out;
566 } 566 }
567 567
568 if (sscanf(buf, "%d", &new_value) != 1) { 568 new_value = bond_parse_parm(buf, fail_over_mac_tbl);
569 if (new_value < 0) {
569 printk(KERN_ERR DRV_NAME 570 printk(KERN_ERR DRV_NAME
570 ": %s: no fail_over_mac value specified.\n", 571 ": %s: Ignoring invalid fail_over_mac value %s.\n",
571 bond->dev->name); 572 bond->dev->name, buf);
572 ret = -EINVAL; 573 return -EINVAL;
573 goto out;
574 } 574 }
575 575
576 if ((new_value == 0) || (new_value == 1)) { 576 bond->params.fail_over_mac = new_value;
577 bond->params.fail_over_mac = new_value; 577 printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %s (%d).\n",
578 printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %d.\n", 578 bond->dev->name, fail_over_mac_tbl[new_value].modename,
579 bond->dev->name, new_value); 579 new_value);
580 } else { 580
581 printk(KERN_INFO DRV_NAME 581 return count;
582 ": %s: Ignoring invalid fail_over_mac value %d.\n",
583 bond->dev->name, new_value);
584 }
585out:
586 return ret;
587} 582}
588 583
589static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, bonding_show_fail_over_mac, bonding_store_fail_over_mac); 584static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, bonding_show_fail_over_mac, bonding_store_fail_over_mac);
@@ -952,6 +947,45 @@ out:
952static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR, bonding_show_lacp, bonding_store_lacp); 947static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR, bonding_show_lacp, bonding_store_lacp);
953 948
954/* 949/*
950 * Show and set the number of grat ARP to send after a failover event.
951 */
952static ssize_t bonding_show_n_grat_arp(struct device *d,
953 struct device_attribute *attr,
954 char *buf)
955{
956 struct bonding *bond = to_bond(d);
957
958 return sprintf(buf, "%d\n", bond->params.num_grat_arp);
959}
960
961static ssize_t bonding_store_n_grat_arp(struct device *d,
962 struct device_attribute *attr,
963 const char *buf, size_t count)
964{
965 int new_value, ret = count;
966 struct bonding *bond = to_bond(d);
967
968 if (sscanf(buf, "%d", &new_value) != 1) {
969 printk(KERN_ERR DRV_NAME
970 ": %s: no num_grat_arp value specified.\n",
971 bond->dev->name);
972 ret = -EINVAL;
973 goto out;
974 }
975 if (new_value < 0 || new_value > 255) {
976 printk(KERN_ERR DRV_NAME
977 ": %s: Invalid num_grat_arp value %d not in range 0-255; rejected.\n",
978 bond->dev->name, new_value);
979 ret = -EINVAL;
980 goto out;
981 } else {
982 bond->params.num_grat_arp = new_value;
983 }
984out:
985 return ret;
986}
987static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR, bonding_show_n_grat_arp, bonding_store_n_grat_arp);
988/*
955 * Show and set the MII monitor interval. There are two tricky bits 989 * Show and set the MII monitor interval. There are two tricky bits
956 * here. First, if MII monitoring is activated, then we must disable 990 * here. First, if MII monitoring is activated, then we must disable
957 * ARP monitoring. Second, if the timer isn't running, we must 991 * ARP monitoring. Second, if the timer isn't running, we must
@@ -1388,6 +1422,7 @@ static struct attribute *per_bond_attrs[] = {
1388 &dev_attr_updelay.attr, 1422 &dev_attr_updelay.attr,
1389 &dev_attr_lacp_rate.attr, 1423 &dev_attr_lacp_rate.attr,
1390 &dev_attr_xmit_hash_policy.attr, 1424 &dev_attr_xmit_hash_policy.attr,
1425 &dev_attr_num_grat_arp.attr,
1391 &dev_attr_miimon.attr, 1426 &dev_attr_miimon.attr,
1392 &dev_attr_primary.attr, 1427 &dev_attr_primary.attr,
1393 &dev_attr_use_carrier.attr, 1428 &dev_attr_use_carrier.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index a3c74e20aa53..89fd9963db7a 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -125,6 +125,7 @@ struct bond_params {
125 int mode; 125 int mode;
126 int xmit_policy; 126 int xmit_policy;
127 int miimon; 127 int miimon;
128 int num_grat_arp;
128 int arp_interval; 129 int arp_interval;
129 int arp_validate; 130 int arp_validate;
130 int use_carrier; 131 int use_carrier;
@@ -157,6 +158,7 @@ struct slave {
157 unsigned long jiffies; 158 unsigned long jiffies;
158 unsigned long last_arp_rx; 159 unsigned long last_arp_rx;
159 s8 link; /* one of BOND_LINK_XXXX */ 160 s8 link; /* one of BOND_LINK_XXXX */
161 s8 new_link;
160 s8 state; /* one of BOND_STATE_XXXX */ 162 s8 state; /* one of BOND_STATE_XXXX */
161 u32 original_flags; 163 u32 original_flags;
162 u32 original_mtu; 164 u32 original_mtu;
@@ -169,6 +171,11 @@ struct slave {
169}; 171};
170 172
171/* 173/*
174 * Link pseudo-state only used internally by monitors
175 */
176#define BOND_LINK_NOCHANGE -1
177
178/*
172 * Here are the locking policies for the two bonding locks: 179 * Here are the locking policies for the two bonding locks:
173 * 180 *
174 * 1) Get bond->lock when reading/writing slave list. 181 * 1) Get bond->lock when reading/writing slave list.
@@ -241,6 +248,10 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
241 return (struct bonding *)slave->dev->master->priv; 248 return (struct bonding *)slave->dev->master->priv;
242} 249}
243 250
251#define BOND_FOM_NONE 0
252#define BOND_FOM_ACTIVE 1
253#define BOND_FOM_FOLLOW 2
254
244#define BOND_ARP_VALIDATE_NONE 0 255#define BOND_ARP_VALIDATE_NONE 0
245#define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE) 256#define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE)
246#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP) 257#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP)
@@ -301,7 +312,7 @@ static inline void bond_unset_master_alb_flags(struct bonding *bond)
301 312
302struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); 313struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
303int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); 314int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
304int bond_create(char *name, struct bond_params *params, struct bonding **newbond); 315int bond_create(char *name, struct bond_params *params);
305void bond_destroy(struct bonding *bond); 316void bond_destroy(struct bonding *bond);
306int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev); 317int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev);
307int bond_create_sysfs(void); 318int bond_create_sysfs(void);
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index acebe431d068..271140433b09 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -42,6 +42,7 @@
42#include <linux/cache.h> 42#include <linux/cache.h>
43#include <linux/mutex.h> 43#include <linux/mutex.h>
44#include <linux/bitops.h> 44#include <linux/bitops.h>
45#include <linux/inet_lro.h>
45#include "t3cdev.h" 46#include "t3cdev.h"
46#include <asm/io.h> 47#include <asm/io.h>
47 48
@@ -92,6 +93,7 @@ struct sge_fl { /* SGE per free-buffer list state */
92 unsigned int gen; /* free list generation */ 93 unsigned int gen; /* free list generation */
93 struct fl_pg_chunk pg_chunk;/* page chunk cache */ 94 struct fl_pg_chunk pg_chunk;/* page chunk cache */
94 unsigned int use_pages; /* whether FL uses pages or sk_buffs */ 95 unsigned int use_pages; /* whether FL uses pages or sk_buffs */
96 unsigned int order; /* order of page allocations */
95 struct rx_desc *desc; /* address of HW Rx descriptor ring */ 97 struct rx_desc *desc; /* address of HW Rx descriptor ring */
96 struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ 98 struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */
97 dma_addr_t phys_addr; /* physical address of HW ring start */ 99 dma_addr_t phys_addr; /* physical address of HW ring start */
@@ -116,12 +118,15 @@ struct sge_rspq { /* state for an SGE response queue */
116 unsigned int polling; /* is the queue serviced through NAPI? */ 118 unsigned int polling; /* is the queue serviced through NAPI? */
117 unsigned int holdoff_tmr; /* interrupt holdoff timer in 100ns */ 119 unsigned int holdoff_tmr; /* interrupt holdoff timer in 100ns */
118 unsigned int next_holdoff; /* holdoff time for next interrupt */ 120 unsigned int next_holdoff; /* holdoff time for next interrupt */
121 unsigned int rx_recycle_buf; /* whether recycling occurred
122 within current sop-eop */
119 struct rsp_desc *desc; /* address of HW response ring */ 123 struct rsp_desc *desc; /* address of HW response ring */
120 dma_addr_t phys_addr; /* physical address of the ring */ 124 dma_addr_t phys_addr; /* physical address of the ring */
121 unsigned int cntxt_id; /* SGE context id for the response q */ 125 unsigned int cntxt_id; /* SGE context id for the response q */
122 spinlock_t lock; /* guards response processing */ 126 spinlock_t lock; /* guards response processing */
123 struct sk_buff *rx_head; /* offload packet receive queue head */ 127 struct sk_buff *rx_head; /* offload packet receive queue head */
124 struct sk_buff *rx_tail; /* offload packet receive queue tail */ 128 struct sk_buff *rx_tail; /* offload packet receive queue tail */
129 struct sk_buff *pg_skb; /* used to build frag list in napi handler */
125 130
126 unsigned long offload_pkts; 131 unsigned long offload_pkts;
127 unsigned long offload_bundles; 132 unsigned long offload_bundles;
@@ -169,16 +174,29 @@ enum { /* per port SGE statistics */
169 SGE_PSTAT_TX_CSUM, /* # of TX checksum offloads */ 174 SGE_PSTAT_TX_CSUM, /* # of TX checksum offloads */
170 SGE_PSTAT_VLANEX, /* # of VLAN tag extractions */ 175 SGE_PSTAT_VLANEX, /* # of VLAN tag extractions */
171 SGE_PSTAT_VLANINS, /* # of VLAN tag insertions */ 176 SGE_PSTAT_VLANINS, /* # of VLAN tag insertions */
177 SGE_PSTAT_LRO_AGGR, /* # of page chunks added to LRO sessions */
178 SGE_PSTAT_LRO_FLUSHED, /* # of flushed LRO sessions */
179 SGE_PSTAT_LRO_NO_DESC, /* # of overflown LRO sessions */
172 180
173 SGE_PSTAT_MAX /* must be last */ 181 SGE_PSTAT_MAX /* must be last */
174}; 182};
175 183
184#define T3_MAX_LRO_SES 8
185#define T3_MAX_LRO_MAX_PKTS 64
186
176struct sge_qset { /* an SGE queue set */ 187struct sge_qset { /* an SGE queue set */
177 struct adapter *adap; 188 struct adapter *adap;
178 struct napi_struct napi; 189 struct napi_struct napi;
179 struct sge_rspq rspq; 190 struct sge_rspq rspq;
180 struct sge_fl fl[SGE_RXQ_PER_SET]; 191 struct sge_fl fl[SGE_RXQ_PER_SET];
181 struct sge_txq txq[SGE_TXQ_PER_SET]; 192 struct sge_txq txq[SGE_TXQ_PER_SET];
193 struct net_lro_mgr lro_mgr;
194 struct net_lro_desc lro_desc[T3_MAX_LRO_SES];
195 struct skb_frag_struct *lro_frag_tbl;
196 int lro_nfrags;
197 int lro_enabled;
198 int lro_frag_len;
199 void *lro_va;
182 struct net_device *netdev; 200 struct net_device *netdev;
183 unsigned long txq_stopped; /* which Tx queues are stopped */ 201 unsigned long txq_stopped; /* which Tx queues are stopped */
184 struct timer_list tx_reclaim_timer; /* reclaims TX buffers */ 202 struct timer_list tx_reclaim_timer; /* reclaims TX buffers */
diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h
index 579bee42a5cb..d444f5881f56 100644
--- a/drivers/net/cxgb3/common.h
+++ b/drivers/net/cxgb3/common.h
@@ -351,6 +351,7 @@ struct tp_params {
351 351
352struct qset_params { /* SGE queue set parameters */ 352struct qset_params { /* SGE queue set parameters */
353 unsigned int polling; /* polling/interrupt service for rspq */ 353 unsigned int polling; /* polling/interrupt service for rspq */
354 unsigned int lro; /* large receive offload */
354 unsigned int coalesce_usecs; /* irq coalescing timer */ 355 unsigned int coalesce_usecs; /* irq coalescing timer */
355 unsigned int rspq_size; /* # of entries in response queue */ 356 unsigned int rspq_size; /* # of entries in response queue */
356 unsigned int fl_size; /* # of entries in regular free list */ 357 unsigned int fl_size; /* # of entries in regular free list */
diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h
index 0a82fcddf2d8..68200a14065e 100644
--- a/drivers/net/cxgb3/cxgb3_ioctl.h
+++ b/drivers/net/cxgb3/cxgb3_ioctl.h
@@ -90,6 +90,7 @@ struct ch_qset_params {
90 int32_t fl_size[2]; 90 int32_t fl_size[2];
91 int32_t intr_lat; 91 int32_t intr_lat;
92 int32_t polling; 92 int32_t polling;
93 int32_t lro;
93 int32_t cong_thres; 94 int32_t cong_thres;
94}; 95};
95 96
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 3a3127216791..5447f3e60f07 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1212,6 +1212,9 @@ static char stats_strings[][ETH_GSTRING_LEN] = {
1212 "VLANinsertions ", 1212 "VLANinsertions ",
1213 "TxCsumOffload ", 1213 "TxCsumOffload ",
1214 "RxCsumGood ", 1214 "RxCsumGood ",
1215 "LroAggregated ",
1216 "LroFlushed ",
1217 "LroNoDesc ",
1215 "RxDrops ", 1218 "RxDrops ",
1216 1219
1217 "CheckTXEnToggled ", 1220 "CheckTXEnToggled ",
@@ -1340,6 +1343,9 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1340 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_VLANINS); 1343 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_VLANINS);
1341 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM); 1344 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM);
1342 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD); 1345 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD);
1346 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_AGGR);
1347 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_FLUSHED);
1348 *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_NO_DESC);
1343 *data++ = s->rx_cong_drops; 1349 *data++ = s->rx_cong_drops;
1344 1350
1345 *data++ = s->num_toggled; 1351 *data++ = s->num_toggled;
@@ -1558,6 +1564,13 @@ static int set_rx_csum(struct net_device *dev, u32 data)
1558 struct port_info *p = netdev_priv(dev); 1564 struct port_info *p = netdev_priv(dev);
1559 1565
1560 p->rx_csum_offload = data; 1566 p->rx_csum_offload = data;
1567 if (!data) {
1568 struct adapter *adap = p->adapter;
1569 int i;
1570
1571 for (i = p->first_qset; i < p->first_qset + p->nqsets; i++)
1572 adap->sge.qs[i].lro_enabled = 0;
1573 }
1561 return 0; 1574 return 0;
1562} 1575}
1563 1576
@@ -1830,6 +1843,11 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
1830 } 1843 }
1831 } 1844 }
1832 } 1845 }
1846 if (t.lro >= 0) {
1847 struct sge_qset *qs = &adapter->sge.qs[t.qset_idx];
1848 q->lro = t.lro;
1849 qs->lro_enabled = t.lro;
1850 }
1833 break; 1851 break;
1834 } 1852 }
1835 case CHELSIO_GET_QSET_PARAMS:{ 1853 case CHELSIO_GET_QSET_PARAMS:{
@@ -1849,6 +1867,7 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
1849 t.fl_size[0] = q->fl_size; 1867 t.fl_size[0] = q->fl_size;
1850 t.fl_size[1] = q->jumbo_size; 1868 t.fl_size[1] = q->jumbo_size;
1851 t.polling = q->polling; 1869 t.polling = q->polling;
1870 t.lro = q->lro;
1852 t.intr_lat = q->coalesce_usecs; 1871 t.intr_lat = q->coalesce_usecs;
1853 t.cong_thres = q->cong_thres; 1872 t.cong_thres = q->cong_thres;
1854 1873
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 796eb305cdc3..a96331c875e6 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -55,6 +55,9 @@
55 * directly. 55 * directly.
56 */ 56 */
57#define FL0_PG_CHUNK_SIZE 2048 57#define FL0_PG_CHUNK_SIZE 2048
58#define FL0_PG_ORDER 0
59#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
60#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
58 61
59#define SGE_RX_DROP_THRES 16 62#define SGE_RX_DROP_THRES 16
60 63
@@ -359,7 +362,7 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
359 } 362 }
360 363
361 if (q->pg_chunk.page) { 364 if (q->pg_chunk.page) {
362 __free_page(q->pg_chunk.page); 365 __free_pages(q->pg_chunk.page, q->order);
363 q->pg_chunk.page = NULL; 366 q->pg_chunk.page = NULL;
364 } 367 }
365} 368}
@@ -376,13 +379,16 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
376 * Add a buffer of the given length to the supplied HW and SW Rx 379 * Add a buffer of the given length to the supplied HW and SW Rx
377 * descriptors. 380 * descriptors.
378 */ 381 */
379static inline void add_one_rx_buf(void *va, unsigned int len, 382static inline int add_one_rx_buf(void *va, unsigned int len,
380 struct rx_desc *d, struct rx_sw_desc *sd, 383 struct rx_desc *d, struct rx_sw_desc *sd,
381 unsigned int gen, struct pci_dev *pdev) 384 unsigned int gen, struct pci_dev *pdev)
382{ 385{
383 dma_addr_t mapping; 386 dma_addr_t mapping;
384 387
385 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE); 388 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
389 if (unlikely(pci_dma_mapping_error(mapping)))
390 return -ENOMEM;
391
386 pci_unmap_addr_set(sd, dma_addr, mapping); 392 pci_unmap_addr_set(sd, dma_addr, mapping);
387 393
388 d->addr_lo = cpu_to_be32(mapping); 394 d->addr_lo = cpu_to_be32(mapping);
@@ -390,12 +396,14 @@ static inline void add_one_rx_buf(void *va, unsigned int len,
390 wmb(); 396 wmb();
391 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen)); 397 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
392 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen)); 398 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
399 return 0;
393} 400}
394 401
395static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp) 402static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
403 unsigned int order)
396{ 404{
397 if (!q->pg_chunk.page) { 405 if (!q->pg_chunk.page) {
398 q->pg_chunk.page = alloc_page(gfp); 406 q->pg_chunk.page = alloc_pages(gfp, order);
399 if (unlikely(!q->pg_chunk.page)) 407 if (unlikely(!q->pg_chunk.page))
400 return -ENOMEM; 408 return -ENOMEM;
401 q->pg_chunk.va = page_address(q->pg_chunk.page); 409 q->pg_chunk.va = page_address(q->pg_chunk.page);
@@ -404,7 +412,7 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
404 sd->pg_chunk = q->pg_chunk; 412 sd->pg_chunk = q->pg_chunk;
405 413
406 q->pg_chunk.offset += q->buf_size; 414 q->pg_chunk.offset += q->buf_size;
407 if (q->pg_chunk.offset == PAGE_SIZE) 415 if (q->pg_chunk.offset == (PAGE_SIZE << order))
408 q->pg_chunk.page = NULL; 416 q->pg_chunk.page = NULL;
409 else { 417 else {
410 q->pg_chunk.va += q->buf_size; 418 q->pg_chunk.va += q->buf_size;
@@ -424,15 +432,18 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
424 * allocated with the supplied gfp flags. The caller must assure that 432 * allocated with the supplied gfp flags. The caller must assure that
425 * @n does not exceed the queue's capacity. 433 * @n does not exceed the queue's capacity.
426 */ 434 */
427static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) 435static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
428{ 436{
429 void *buf_start; 437 void *buf_start;
430 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 438 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
431 struct rx_desc *d = &q->desc[q->pidx]; 439 struct rx_desc *d = &q->desc[q->pidx];
440 unsigned int count = 0;
432 441
433 while (n--) { 442 while (n--) {
443 int err;
444
434 if (q->use_pages) { 445 if (q->use_pages) {
435 if (unlikely(alloc_pg_chunk(q, sd, gfp))) { 446 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
436nomem: q->alloc_failed++; 447nomem: q->alloc_failed++;
437 break; 448 break;
438 } 449 }
@@ -447,8 +458,16 @@ nomem: q->alloc_failed++;
447 buf_start = skb->data; 458 buf_start = skb->data;
448 } 459 }
449 460
450 add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen, 461 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
451 adap->pdev); 462 adap->pdev);
463 if (unlikely(err)) {
464 if (!q->use_pages) {
465 kfree_skb(sd->skb);
466 sd->skb = NULL;
467 }
468 break;
469 }
470
452 d++; 471 d++;
453 sd++; 472 sd++;
454 if (++q->pidx == q->size) { 473 if (++q->pidx == q->size) {
@@ -458,14 +477,19 @@ nomem: q->alloc_failed++;
458 d = q->desc; 477 d = q->desc;
459 } 478 }
460 q->credits++; 479 q->credits++;
480 count++;
461 } 481 }
462 wmb(); 482 wmb();
463 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 483 if (likely(count))
484 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
485
486 return count;
464} 487}
465 488
466static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl) 489static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
467{ 490{
468 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC); 491 refill_fl(adap, fl, min(16U, fl->size - fl->credits),
492 GFP_ATOMIC | __GFP_COMP);
469} 493}
470 494
471/** 495/**
@@ -560,6 +584,8 @@ static void t3_reset_qset(struct sge_qset *q)
560 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET); 584 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
561 q->txq_stopped = 0; 585 q->txq_stopped = 0;
562 memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer)); 586 memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
587 kfree(q->lro_frag_tbl);
588 q->lro_nfrags = q->lro_frag_len = 0;
563} 589}
564 590
565 591
@@ -740,19 +766,22 @@ use_orig_buf:
740 * that are page chunks rather than sk_buffs. 766 * that are page chunks rather than sk_buffs.
741 */ 767 */
742static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl, 768static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
743 unsigned int len, unsigned int drop_thres) 769 struct sge_rspq *q, unsigned int len,
770 unsigned int drop_thres)
744{ 771{
745 struct sk_buff *skb = NULL; 772 struct sk_buff *newskb, *skb;
746 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 773 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
747 774
748 if (len <= SGE_RX_COPY_THRES) { 775 newskb = skb = q->pg_skb;
749 skb = alloc_skb(len, GFP_ATOMIC); 776
750 if (likely(skb != NULL)) { 777 if (!skb && (len <= SGE_RX_COPY_THRES)) {
751 __skb_put(skb, len); 778 newskb = alloc_skb(len, GFP_ATOMIC);
779 if (likely(newskb != NULL)) {
780 __skb_put(newskb, len);
752 pci_dma_sync_single_for_cpu(adap->pdev, 781 pci_dma_sync_single_for_cpu(adap->pdev,
753 pci_unmap_addr(sd, dma_addr), len, 782 pci_unmap_addr(sd, dma_addr), len,
754 PCI_DMA_FROMDEVICE); 783 PCI_DMA_FROMDEVICE);
755 memcpy(skb->data, sd->pg_chunk.va, len); 784 memcpy(newskb->data, sd->pg_chunk.va, len);
756 pci_dma_sync_single_for_device(adap->pdev, 785 pci_dma_sync_single_for_device(adap->pdev,
757 pci_unmap_addr(sd, dma_addr), len, 786 pci_unmap_addr(sd, dma_addr), len,
758 PCI_DMA_FROMDEVICE); 787 PCI_DMA_FROMDEVICE);
@@ -761,14 +790,16 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
761recycle: 790recycle:
762 fl->credits--; 791 fl->credits--;
763 recycle_rx_buf(adap, fl, fl->cidx); 792 recycle_rx_buf(adap, fl, fl->cidx);
764 return skb; 793 q->rx_recycle_buf++;
794 return newskb;
765 } 795 }
766 796
767 if (unlikely(fl->credits <= drop_thres)) 797 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
768 goto recycle; 798 goto recycle;
769 799
770 skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); 800 if (!skb)
771 if (unlikely(!skb)) { 801 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
802 if (unlikely(!newskb)) {
772 if (!drop_thres) 803 if (!drop_thres)
773 return NULL; 804 return NULL;
774 goto recycle; 805 goto recycle;
@@ -776,21 +807,29 @@ recycle:
776 807
777 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), 808 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
778 fl->buf_size, PCI_DMA_FROMDEVICE); 809 fl->buf_size, PCI_DMA_FROMDEVICE);
779 __skb_put(skb, SGE_RX_PULL_LEN); 810 if (!skb) {
780 memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); 811 __skb_put(newskb, SGE_RX_PULL_LEN);
781 skb_fill_page_desc(skb, 0, sd->pg_chunk.page, 812 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
782 sd->pg_chunk.offset + SGE_RX_PULL_LEN, 813 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
783 len - SGE_RX_PULL_LEN); 814 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
784 skb->len = len; 815 len - SGE_RX_PULL_LEN);
785 skb->data_len = len - SGE_RX_PULL_LEN; 816 newskb->len = len;
786 skb->truesize += skb->data_len; 817 newskb->data_len = len - SGE_RX_PULL_LEN;
818 } else {
819 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
820 sd->pg_chunk.page,
821 sd->pg_chunk.offset, len);
822 newskb->len += len;
823 newskb->data_len += len;
824 }
825 newskb->truesize += newskb->data_len;
787 826
788 fl->credits--; 827 fl->credits--;
789 /* 828 /*
790 * We do not refill FLs here, we let the caller do it to overlap a 829 * We do not refill FLs here, we let the caller do it to overlap a
791 * prefetch. 830 * prefetch.
792 */ 831 */
793 return skb; 832 return newskb;
794} 833}
795 834
796/** 835/**
@@ -1831,9 +1870,10 @@ static void restart_tx(struct sge_qset *qs)
1831 * if it was immediate data in a response. 1870 * if it was immediate data in a response.
1832 */ 1871 */
1833static void rx_eth(struct adapter *adap, struct sge_rspq *rq, 1872static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1834 struct sk_buff *skb, int pad) 1873 struct sk_buff *skb, int pad, int lro)
1835{ 1874{
1836 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad); 1875 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1876 struct sge_qset *qs = rspq_to_qset(rq);
1837 struct port_info *pi; 1877 struct port_info *pi;
1838 1878
1839 skb_pull(skb, sizeof(*p) + pad); 1879 skb_pull(skb, sizeof(*p) + pad);
@@ -1850,18 +1890,202 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1850 if (unlikely(p->vlan_valid)) { 1890 if (unlikely(p->vlan_valid)) {
1851 struct vlan_group *grp = pi->vlan_grp; 1891 struct vlan_group *grp = pi->vlan_grp;
1852 1892
1853 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++; 1893 qs->port_stats[SGE_PSTAT_VLANEX]++;
1854 if (likely(grp)) 1894 if (likely(grp))
1855 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan), 1895 if (lro)
1856 rq->polling); 1896 lro_vlan_hwaccel_receive_skb(&qs->lro_mgr, skb,
1897 grp,
1898 ntohs(p->vlan),
1899 p);
1900 else
1901 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1902 rq->polling);
1857 else 1903 else
1858 dev_kfree_skb_any(skb); 1904 dev_kfree_skb_any(skb);
1859 } else if (rq->polling) 1905 } else if (rq->polling) {
1860 netif_receive_skb(skb); 1906 if (lro)
1861 else 1907 lro_receive_skb(&qs->lro_mgr, skb, p);
1908 else
1909 netif_receive_skb(skb);
1910 } else
1862 netif_rx(skb); 1911 netif_rx(skb);
1863} 1912}
1864 1913
1914static inline int is_eth_tcp(u32 rss)
1915{
1916 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
1917}
1918
1919/**
1920 * lro_frame_ok - check if an ingress packet is eligible for LRO
1921 * @p: the CPL header of the packet
1922 *
1923 * Returns true if a received packet is eligible for LRO.
1924 * The following conditions must be true:
1925 * - packet is TCP/IP Ethernet II (checked elsewhere)
1926 * - not an IP fragment
1927 * - no IP options
1928 * - TCP/IP checksums are correct
1929 * - the packet is for this host
1930 */
1931static inline int lro_frame_ok(const struct cpl_rx_pkt *p)
1932{
1933 const struct ethhdr *eh = (struct ethhdr *)(p + 1);
1934 const struct iphdr *ih = (struct iphdr *)(eh + 1);
1935
1936 return (*((u8 *)p + 1) & 0x90) == 0x10 && p->csum == htons(0xffff) &&
1937 eh->h_proto == htons(ETH_P_IP) && ih->ihl == (sizeof(*ih) >> 2);
1938}
1939
1940#define TCP_FLAG_MASK (TCP_FLAG_CWR | TCP_FLAG_ECE | TCP_FLAG_URG |\
1941 TCP_FLAG_ACK | TCP_FLAG_PSH | TCP_FLAG_RST |\
1942 TCP_FLAG_SYN | TCP_FLAG_FIN)
1943#define TSTAMP_WORD ((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |\
1944 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)
1945
1946/**
1947 * lro_segment_ok - check if a TCP segment is eligible for LRO
1948 * @tcph: the TCP header of the packet
1949 *
1950 * Returns true if a TCP packet is eligible for LRO. This requires that
1951 * the packet have only the ACK flag set and no TCP options besides
1952 * time stamps.
1953 */
1954static inline int lro_segment_ok(const struct tcphdr *tcph)
1955{
1956 int optlen;
1957
1958 if (unlikely((tcp_flag_word(tcph) & TCP_FLAG_MASK) != TCP_FLAG_ACK))
1959 return 0;
1960
1961 optlen = (tcph->doff << 2) - sizeof(*tcph);
1962 if (optlen) {
1963 const u32 *opt = (const u32 *)(tcph + 1);
1964
1965 if (optlen != TCPOLEN_TSTAMP_ALIGNED ||
1966 *opt != htonl(TSTAMP_WORD) || !opt[2])
1967 return 0;
1968 }
1969 return 1;
1970}
1971
1972static int t3_get_lro_header(void **eh, void **iph, void **tcph,
1973 u64 *hdr_flags, void *priv)
1974{
1975 const struct cpl_rx_pkt *cpl = priv;
1976
1977 if (!lro_frame_ok(cpl))
1978 return -1;
1979
1980 *eh = (struct ethhdr *)(cpl + 1);
1981 *iph = (struct iphdr *)((struct ethhdr *)*eh + 1);
1982 *tcph = (struct tcphdr *)((struct iphdr *)*iph + 1);
1983
1984 if (!lro_segment_ok(*tcph))
1985 return -1;
1986
1987 *hdr_flags = LRO_IPV4 | LRO_TCP;
1988 return 0;
1989}
1990
1991static int t3_get_skb_header(struct sk_buff *skb,
1992 void **iph, void **tcph, u64 *hdr_flags,
1993 void *priv)
1994{
1995 void *eh;
1996
1997 return t3_get_lro_header(&eh, iph, tcph, hdr_flags, priv);
1998}
1999
2000static int t3_get_frag_header(struct skb_frag_struct *frag, void **eh,
2001 void **iph, void **tcph, u64 *hdr_flags,
2002 void *priv)
2003{
2004 return t3_get_lro_header(eh, iph, tcph, hdr_flags, priv);
2005}
2006
2007/**
2008 * lro_add_page - add a page chunk to an LRO session
2009 * @adap: the adapter
2010 * @qs: the associated queue set
2011 * @fl: the free list containing the page chunk to add
2012 * @len: packet length
2013 * @complete: Indicates the last fragment of a frame
2014 *
2015 * Add a received packet contained in a page chunk to an existing LRO
2016 * session.
2017 */
2018static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2019 struct sge_fl *fl, int len, int complete)
2020{
2021 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2022 struct cpl_rx_pkt *cpl;
2023 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl;
2024 int nr_frags = qs->lro_nfrags, frag_len = qs->lro_frag_len;
2025 int offset = 0;
2026
2027 if (!nr_frags) {
2028 offset = 2 + sizeof(struct cpl_rx_pkt);
2029 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2030 }
2031
2032 fl->credits--;
2033
2034 len -= offset;
2035 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2036 fl->buf_size, PCI_DMA_FROMDEVICE);
2037
2038 rx_frag += nr_frags;
2039 rx_frag->page = sd->pg_chunk.page;
2040 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2041 rx_frag->size = len;
2042 frag_len += len;
2043 qs->lro_nfrags++;
2044 qs->lro_frag_len = frag_len;
2045
2046 if (!complete)
2047 return;
2048
2049 qs->lro_nfrags = qs->lro_frag_len = 0;
2050 cpl = qs->lro_va;
2051
2052 if (unlikely(cpl->vlan_valid)) {
2053 struct net_device *dev = qs->netdev;
2054 struct port_info *pi = netdev_priv(dev);
2055 struct vlan_group *grp = pi->vlan_grp;
2056
2057 if (likely(grp != NULL)) {
2058 lro_vlan_hwaccel_receive_frags(&qs->lro_mgr,
2059 qs->lro_frag_tbl,
2060 frag_len, frag_len,
2061 grp, ntohs(cpl->vlan),
2062 cpl, 0);
2063 return;
2064 }
2065 }
2066 lro_receive_frags(&qs->lro_mgr, qs->lro_frag_tbl,
2067 frag_len, frag_len, cpl, 0);
2068}
2069
2070/**
2071 * init_lro_mgr - initialize a LRO manager object
2072 * @lro_mgr: the LRO manager object
2073 */
2074static void init_lro_mgr(struct sge_qset *qs, struct net_lro_mgr *lro_mgr)
2075{
2076 lro_mgr->dev = qs->netdev;
2077 lro_mgr->features = LRO_F_NAPI;
2078 lro_mgr->ip_summed = CHECKSUM_UNNECESSARY;
2079 lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
2080 lro_mgr->max_desc = T3_MAX_LRO_SES;
2081 lro_mgr->lro_arr = qs->lro_desc;
2082 lro_mgr->get_frag_header = t3_get_frag_header;
2083 lro_mgr->get_skb_header = t3_get_skb_header;
2084 lro_mgr->max_aggr = T3_MAX_LRO_MAX_PKTS;
2085 if (lro_mgr->max_aggr > MAX_SKB_FRAGS)
2086 lro_mgr->max_aggr = MAX_SKB_FRAGS;
2087}
2088
1865/** 2089/**
1866 * handle_rsp_cntrl_info - handles control information in a response 2090 * handle_rsp_cntrl_info - handles control information in a response
1867 * @qs: the queue set corresponding to the response 2091 * @qs: the queue set corresponding to the response
@@ -1947,6 +2171,12 @@ static inline int is_new_response(const struct rsp_desc *r,
1947 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2171 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1948} 2172}
1949 2173
2174static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2175{
2176 q->pg_skb = NULL;
2177 q->rx_recycle_buf = 0;
2178}
2179
1950#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2180#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1951#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2181#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1952 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2182 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
@@ -1984,10 +2214,11 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs,
1984 q->next_holdoff = q->holdoff_tmr; 2214 q->next_holdoff = q->holdoff_tmr;
1985 2215
1986 while (likely(budget_left && is_new_response(r, q))) { 2216 while (likely(budget_left && is_new_response(r, q))) {
1987 int eth, ethpad = 2; 2217 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
1988 struct sk_buff *skb = NULL; 2218 struct sk_buff *skb = NULL;
1989 u32 len, flags = ntohl(r->flags); 2219 u32 len, flags = ntohl(r->flags);
1990 __be32 rss_hi = *(const __be32 *)r, rss_lo = r->rss_hdr.rss_hash_val; 2220 __be32 rss_hi = *(const __be32 *)r,
2221 rss_lo = r->rss_hdr.rss_hash_val;
1991 2222
1992 eth = r->rss_hdr.opcode == CPL_RX_PKT; 2223 eth = r->rss_hdr.opcode == CPL_RX_PKT;
1993 2224
@@ -2015,6 +2246,9 @@ no_mem:
2015 } else if ((len = ntohl(r->len_cq)) != 0) { 2246 } else if ((len = ntohl(r->len_cq)) != 0) {
2016 struct sge_fl *fl; 2247 struct sge_fl *fl;
2017 2248
2249 if (eth)
2250 lro = qs->lro_enabled && is_eth_tcp(rss_hi);
2251
2018 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2252 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2019 if (fl->use_pages) { 2253 if (fl->use_pages) {
2020 void *addr = fl->sdesc[fl->cidx].pg_chunk.va; 2254 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
@@ -2024,9 +2258,18 @@ no_mem:
2024 prefetch(addr + L1_CACHE_BYTES); 2258 prefetch(addr + L1_CACHE_BYTES);
2025#endif 2259#endif
2026 __refill_fl(adap, fl); 2260 __refill_fl(adap, fl);
2261 if (lro > 0) {
2262 lro_add_page(adap, qs, fl,
2263 G_RSPD_LEN(len),
2264 flags & F_RSPD_EOP);
2265 goto next_fl;
2266 }
2027 2267
2028 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len), 2268 skb = get_packet_pg(adap, fl, q,
2029 eth ? SGE_RX_DROP_THRES : 0); 2269 G_RSPD_LEN(len),
2270 eth ?
2271 SGE_RX_DROP_THRES : 0);
2272 q->pg_skb = skb;
2030 } else 2273 } else
2031 skb = get_packet(adap, fl, G_RSPD_LEN(len), 2274 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2032 eth ? SGE_RX_DROP_THRES : 0); 2275 eth ? SGE_RX_DROP_THRES : 0);
@@ -2036,7 +2279,7 @@ no_mem:
2036 q->rx_drops++; 2279 q->rx_drops++;
2037 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT)) 2280 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2038 __skb_pull(skb, 2); 2281 __skb_pull(skb, 2);
2039 2282next_fl:
2040 if (++fl->cidx == fl->size) 2283 if (++fl->cidx == fl->size)
2041 fl->cidx = 0; 2284 fl->cidx = 0;
2042 } else 2285 } else
@@ -2060,9 +2303,13 @@ no_mem:
2060 q->credits = 0; 2303 q->credits = 0;
2061 } 2304 }
2062 2305
2063 if (likely(skb != NULL)) { 2306 packet_complete = flags &
2307 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2308 F_RSPD_ASYNC_NOTIF);
2309
2310 if (skb != NULL && packet_complete) {
2064 if (eth) 2311 if (eth)
2065 rx_eth(adap, q, skb, ethpad); 2312 rx_eth(adap, q, skb, ethpad, lro);
2066 else { 2313 else {
2067 q->offload_pkts++; 2314 q->offload_pkts++;
2068 /* Preserve the RSS info in csum & priority */ 2315 /* Preserve the RSS info in csum & priority */
@@ -2072,11 +2319,19 @@ no_mem:
2072 offload_skbs, 2319 offload_skbs,
2073 ngathered); 2320 ngathered);
2074 } 2321 }
2322
2323 if (flags & F_RSPD_EOP)
2324 clear_rspq_bufstate(q);
2075 } 2325 }
2076 --budget_left; 2326 --budget_left;
2077 } 2327 }
2078 2328
2079 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered); 2329 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2330 lro_flush_all(&qs->lro_mgr);
2331 qs->port_stats[SGE_PSTAT_LRO_AGGR] = qs->lro_mgr.stats.aggregated;
2332 qs->port_stats[SGE_PSTAT_LRO_FLUSHED] = qs->lro_mgr.stats.flushed;
2333 qs->port_stats[SGE_PSTAT_LRO_NO_DESC] = qs->lro_mgr.stats.no_desc;
2334
2080 if (sleeping) 2335 if (sleeping)
2081 check_ring_db(adap, qs, sleeping); 2336 check_ring_db(adap, qs, sleeping);
2082 2337
@@ -2618,8 +2873,9 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2618 int irq_vec_idx, const struct qset_params *p, 2873 int irq_vec_idx, const struct qset_params *p,
2619 int ntxq, struct net_device *dev) 2874 int ntxq, struct net_device *dev)
2620{ 2875{
2621 int i, ret = -ENOMEM; 2876 int i, avail, ret = -ENOMEM;
2622 struct sge_qset *q = &adapter->sge.qs[id]; 2877 struct sge_qset *q = &adapter->sge.qs[id];
2878 struct net_lro_mgr *lro_mgr = &q->lro_mgr;
2623 2879
2624 init_qset_cntxt(q, id); 2880 init_qset_cntxt(q, id);
2625 init_timer(&q->tx_reclaim_timer); 2881 init_timer(&q->tx_reclaim_timer);
@@ -2687,11 +2943,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2687#else 2943#else
2688 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data); 2944 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2689#endif 2945#endif
2690 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0; 2946#if FL1_PG_CHUNK_SIZE > 0
2947 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2948#else
2691 q->fl[1].buf_size = is_offload(adapter) ? 2949 q->fl[1].buf_size = is_offload(adapter) ?
2692 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : 2950 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2693 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt); 2951 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2952#endif
2694 2953
2954 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2955 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2956 q->fl[0].order = FL0_PG_ORDER;
2957 q->fl[1].order = FL1_PG_ORDER;
2958
2959 q->lro_frag_tbl = kcalloc(MAX_FRAME_SIZE / FL1_PG_CHUNK_SIZE + 1,
2960 sizeof(struct skb_frag_struct),
2961 GFP_KERNEL);
2962 q->lro_nfrags = q->lro_frag_len = 0;
2695 spin_lock_irq(&adapter->sge.reg_lock); 2963 spin_lock_irq(&adapter->sge.reg_lock);
2696 2964
2697 /* FL threshold comparison uses < */ 2965 /* FL threshold comparison uses < */
@@ -2742,8 +3010,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2742 q->netdev = dev; 3010 q->netdev = dev;
2743 t3_update_qset_coalesce(q, p); 3011 t3_update_qset_coalesce(q, p);
2744 3012
2745 refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL); 3013 init_lro_mgr(q, lro_mgr);
2746 refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL); 3014
3015 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3016 GFP_KERNEL | __GFP_COMP);
3017 if (!avail) {
3018 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3019 goto err;
3020 }
3021 if (avail < q->fl[0].size)
3022 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3023 avail);
3024
3025 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3026 GFP_KERNEL | __GFP_COMP);
3027 if (avail < q->fl[1].size)
3028 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3029 avail);
2747 refill_rspq(adapter, &q->rspq, q->rspq.size - 1); 3030 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2748 3031
2749 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 3032 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
@@ -2752,9 +3035,9 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2752 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); 3035 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2753 return 0; 3036 return 0;
2754 3037
2755 err_unlock: 3038err_unlock:
2756 spin_unlock_irq(&adapter->sge.reg_lock); 3039 spin_unlock_irq(&adapter->sge.reg_lock);
2757 err: 3040err:
2758 t3_free_qset(adapter, q); 3041 t3_free_qset(adapter, q);
2759 return ret; 3042 return ret;
2760} 3043}
@@ -2876,7 +3159,7 @@ void t3_sge_prep(struct adapter *adap, struct sge_params *p)
2876 q->coalesce_usecs = 5; 3159 q->coalesce_usecs = 5;
2877 q->rspq_size = 1024; 3160 q->rspq_size = 1024;
2878 q->fl_size = 1024; 3161 q->fl_size = 1024;
2879 q->jumbo_size = 512; 3162 q->jumbo_size = 512;
2880 q->txq_size[TXQ_ETH] = 1024; 3163 q->txq_size[TXQ_ETH] = 1024;
2881 q->txq_size[TXQ_OFLD] = 1024; 3164 q->txq_size[TXQ_OFLD] = 1024;
2882 q->txq_size[TXQ_CTRL] = 256; 3165 q->txq_size[TXQ_CTRL] = 256;
diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h
index b7a1a310dfd4..a666c5d51cc0 100644
--- a/drivers/net/cxgb3/t3_cpl.h
+++ b/drivers/net/cxgb3/t3_cpl.h
@@ -174,6 +174,13 @@ enum { /* TCP congestion control algorithms */
174 CONG_ALG_HIGHSPEED 174 CONG_ALG_HIGHSPEED
175}; 175};
176 176
177enum { /* RSS hash type */
178 RSS_HASH_NONE = 0,
179 RSS_HASH_2_TUPLE = 1,
180 RSS_HASH_4_TUPLE = 2,
181 RSS_HASH_TCPV6 = 3
182};
183
177union opcode_tid { 184union opcode_tid {
178 __be32 opcode_tid; 185 __be32 opcode_tid;
179 __u8 opcode; 186 __u8 opcode;
@@ -184,6 +191,10 @@ union opcode_tid {
184#define G_OPCODE(x) (((x) >> S_OPCODE) & 0xFF) 191#define G_OPCODE(x) (((x) >> S_OPCODE) & 0xFF)
185#define G_TID(x) ((x) & 0xFFFFFF) 192#define G_TID(x) ((x) & 0xFFFFFF)
186 193
194#define S_HASHTYPE 22
195#define M_HASHTYPE 0x3
196#define G_HASHTYPE(x) (((x) >> S_HASHTYPE) & M_HASHTYPE)
197
187/* tid is assumed to be 24-bits */ 198/* tid is assumed to be 24-bits */
188#define MK_OPCODE_TID(opcode, tid) (V_OPCODE(opcode) | (tid)) 199#define MK_OPCODE_TID(opcode, tid) (V_OPCODE(opcode) | (tid))
189 200
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index e233d04a2132..8277e89e552d 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -499,7 +499,7 @@ rio_timer (unsigned long data)
499 entry = np->old_rx % RX_RING_SIZE; 499 entry = np->old_rx % RX_RING_SIZE;
500 /* Dropped packets don't need to re-allocate */ 500 /* Dropped packets don't need to re-allocate */
501 if (np->rx_skbuff[entry] == NULL) { 501 if (np->rx_skbuff[entry] == NULL) {
502 skb = dev_alloc_skb (np->rx_buf_sz); 502 skb = netdev_alloc_skb (dev, np->rx_buf_sz);
503 if (skb == NULL) { 503 if (skb == NULL) {
504 np->rx_ring[entry].fraginfo = 0; 504 np->rx_ring[entry].fraginfo = 0;
505 printk (KERN_INFO 505 printk (KERN_INFO
@@ -570,7 +570,7 @@ alloc_list (struct net_device *dev)
570 /* Allocate the rx buffers */ 570 /* Allocate the rx buffers */
571 for (i = 0; i < RX_RING_SIZE; i++) { 571 for (i = 0; i < RX_RING_SIZE; i++) {
572 /* Allocated fixed size of skbuff */ 572 /* Allocated fixed size of skbuff */
573 struct sk_buff *skb = dev_alloc_skb (np->rx_buf_sz); 573 struct sk_buff *skb = netdev_alloc_skb (dev, np->rx_buf_sz);
574 np->rx_skbuff[i] = skb; 574 np->rx_skbuff[i] = skb;
575 if (skb == NULL) { 575 if (skb == NULL) {
576 printk (KERN_ERR 576 printk (KERN_ERR
@@ -867,7 +867,7 @@ receive_packet (struct net_device *dev)
867 PCI_DMA_FROMDEVICE); 867 PCI_DMA_FROMDEVICE);
868 skb_put (skb = np->rx_skbuff[entry], pkt_len); 868 skb_put (skb = np->rx_skbuff[entry], pkt_len);
869 np->rx_skbuff[entry] = NULL; 869 np->rx_skbuff[entry] = NULL;
870 } else if ((skb = dev_alloc_skb (pkt_len + 2)) != NULL) { 870 } else if ((skb = netdev_alloc_skb(dev, pkt_len + 2))) {
871 pci_dma_sync_single_for_cpu(np->pdev, 871 pci_dma_sync_single_for_cpu(np->pdev,
872 desc_to_dma(desc), 872 desc_to_dma(desc),
873 np->rx_buf_sz, 873 np->rx_buf_sz,
@@ -904,7 +904,7 @@ receive_packet (struct net_device *dev)
904 struct sk_buff *skb; 904 struct sk_buff *skb;
905 /* Dropped packets don't need to re-allocate */ 905 /* Dropped packets don't need to re-allocate */
906 if (np->rx_skbuff[entry] == NULL) { 906 if (np->rx_skbuff[entry] == NULL) {
907 skb = dev_alloc_skb (np->rx_buf_sz); 907 skb = netdev_alloc_skb(dev, np->rx_buf_sz);
908 if (skb == NULL) { 908 if (skb == NULL) {
909 np->rx_ring[entry].fraginfo = 0; 909 np->rx_ring[entry].fraginfo = 0;
910 printk (KERN_INFO 910 printk (KERN_INFO
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index e5c2380f50ca..3199526bcecb 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1140,11 +1140,11 @@ static void hamachi_tx_timeout(struct net_device *dev)
1140 } 1140 }
1141 /* Fill in the Rx buffers. Handle allocation failure gracefully. */ 1141 /* Fill in the Rx buffers. Handle allocation failure gracefully. */
1142 for (i = 0; i < RX_RING_SIZE; i++) { 1142 for (i = 0; i < RX_RING_SIZE; i++) {
1143 struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz); 1143 struct sk_buff *skb = netdev_alloc_skb(dev, hmp->rx_buf_sz);
1144 hmp->rx_skbuff[i] = skb; 1144 hmp->rx_skbuff[i] = skb;
1145 if (skb == NULL) 1145 if (skb == NULL)
1146 break; 1146 break;
1147 skb->dev = dev; /* Mark as being used by this device. */ 1147
1148 skb_reserve(skb, 2); /* 16 byte align the IP header. */ 1148 skb_reserve(skb, 2); /* 16 byte align the IP header. */
1149 hmp->rx_ring[i].addr = cpu_to_leXX(pci_map_single(hmp->pci_dev, 1149 hmp->rx_ring[i].addr = cpu_to_leXX(pci_map_single(hmp->pci_dev,
1150 skb->data, hmp->rx_buf_sz, PCI_DMA_FROMDEVICE)); 1150 skb->data, hmp->rx_buf_sz, PCI_DMA_FROMDEVICE));
@@ -1178,14 +1178,6 @@ static void hamachi_init_ring(struct net_device *dev)
1178 hmp->cur_rx = hmp->cur_tx = 0; 1178 hmp->cur_rx = hmp->cur_tx = 0;
1179 hmp->dirty_rx = hmp->dirty_tx = 0; 1179 hmp->dirty_rx = hmp->dirty_tx = 0;
1180 1180
1181#if 0
1182 /* This is wrong. I'm not sure what the original plan was, but this
1183 * is wrong. An MTU of 1 gets you a buffer of 1536, while an MTU
1184 * of 1501 gets a buffer of 1533? -KDU
1185 */
1186 hmp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
1187#endif
1188 /* My attempt at a reasonable correction */
1189 /* +26 gets the maximum ethernet encapsulation, +7 & ~7 because the 1181 /* +26 gets the maximum ethernet encapsulation, +7 & ~7 because the
1190 * card needs room to do 8 byte alignment, +2 so we can reserve 1182 * card needs room to do 8 byte alignment, +2 so we can reserve
1191 * the first 2 bytes, and +16 gets room for the status word from the 1183 * the first 2 bytes, and +16 gets room for the status word from the
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index 484cb2ba717f..7111c65f0b30 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -108,14 +108,14 @@ static int ixpdev_rx(struct net_device *dev, int processed, int budget)
108 if (unlikely(!netif_running(nds[desc->channel]))) 108 if (unlikely(!netif_running(nds[desc->channel])))
109 goto err; 109 goto err;
110 110
111 skb = dev_alloc_skb(desc->pkt_length + 2); 111 skb = netdev_alloc_skb(dev, desc->pkt_length + 2);
112 if (likely(skb != NULL)) { 112 if (likely(skb != NULL)) {
113 skb_reserve(skb, 2); 113 skb_reserve(skb, 2);
114 skb_copy_to_linear_data(skb, buf, desc->pkt_length); 114 skb_copy_to_linear_data(skb, buf, desc->pkt_length);
115 skb_put(skb, desc->pkt_length); 115 skb_put(skb, desc->pkt_length);
116 skb->protocol = eth_type_trans(skb, nds[desc->channel]); 116 skb->protocol = eth_type_trans(skb, nds[desc->channel]);
117 117
118 skb->dev->last_rx = jiffies; 118 dev->last_rx = jiffies;
119 119
120 netif_receive_skb(skb); 120 netif_receive_skb(skb);
121 } 121 }
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 6eb2d31d1e34..284217c12839 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -53,7 +53,8 @@ config SMSC_PHY
53config BROADCOM_PHY 53config BROADCOM_PHY
54 tristate "Drivers for Broadcom PHYs" 54 tristate "Drivers for Broadcom PHYs"
55 ---help--- 55 ---help---
56 Currently supports the BCM5411, BCM5421 and BCM5461 PHYs. 56 Currently supports the BCM5411, BCM5421, BCM5461, BCM5464, BCM5481
57 and BCM5482 PHYs.
57 58
58config ICPLUS_PHY 59config ICPLUS_PHY
59 tristate "Drivers for ICPlus PHYs" 60 tristate "Drivers for ICPlus PHYs"
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 60c5cfe96918..4b4dc98ad165 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -24,6 +24,12 @@
24#define MII_BCM54XX_ESR 0x11 /* BCM54xx extended status register */ 24#define MII_BCM54XX_ESR 0x11 /* BCM54xx extended status register */
25#define MII_BCM54XX_ESR_IS 0x1000 /* Interrupt status */ 25#define MII_BCM54XX_ESR_IS 0x1000 /* Interrupt status */
26 26
27#define MII_BCM54XX_EXP_DATA 0x15 /* Expansion register data */
28#define MII_BCM54XX_EXP_SEL 0x17 /* Expansion register select */
29#define MII_BCM54XX_EXP_SEL_SSD 0x0e00 /* Secondary SerDes select */
30#define MII_BCM54XX_EXP_SEL_ER 0x0f00 /* Expansion register select */
31
32#define MII_BCM54XX_AUX_CTL 0x18 /* Auxiliary control register */
27#define MII_BCM54XX_ISR 0x1a /* BCM54xx interrupt status register */ 33#define MII_BCM54XX_ISR 0x1a /* BCM54xx interrupt status register */
28#define MII_BCM54XX_IMR 0x1b /* BCM54xx interrupt mask register */ 34#define MII_BCM54XX_IMR 0x1b /* BCM54xx interrupt mask register */
29#define MII_BCM54XX_INT_CRCERR 0x0001 /* CRC error */ 35#define MII_BCM54XX_INT_CRCERR 0x0001 /* CRC error */
@@ -42,10 +48,120 @@
42#define MII_BCM54XX_INT_MDIX 0x2000 /* MDIX status change */ 48#define MII_BCM54XX_INT_MDIX 0x2000 /* MDIX status change */
43#define MII_BCM54XX_INT_PSERR 0x4000 /* Pair swap error */ 49#define MII_BCM54XX_INT_PSERR 0x4000 /* Pair swap error */
44 50
51#define MII_BCM54XX_SHD 0x1c /* 0x1c shadow registers */
52#define MII_BCM54XX_SHD_WRITE 0x8000
53#define MII_BCM54XX_SHD_VAL(x) ((x & 0x1f) << 10)
54#define MII_BCM54XX_SHD_DATA(x) ((x & 0x3ff) << 0)
55
56/*
57 * Broadcom LED source encodings. These are used in BCM5461, BCM5481,
58 * BCM5482, and possibly some others.
59 */
60#define BCM_LED_SRC_LINKSPD1 0x0
61#define BCM_LED_SRC_LINKSPD2 0x1
62#define BCM_LED_SRC_XMITLED 0x2
63#define BCM_LED_SRC_ACTIVITYLED 0x3
64#define BCM_LED_SRC_FDXLED 0x4
65#define BCM_LED_SRC_SLAVE 0x5
66#define BCM_LED_SRC_INTR 0x6
67#define BCM_LED_SRC_QUALITY 0x7
68#define BCM_LED_SRC_RCVLED 0x8
69#define BCM_LED_SRC_MULTICOLOR1 0xa
70#define BCM_LED_SRC_OPENSHORT 0xb
71#define BCM_LED_SRC_OFF 0xe /* Tied high */
72#define BCM_LED_SRC_ON 0xf /* Tied low */
73
74/*
75 * BCM5482: Shadow registers
76 * Shadow values go into bits [14:10] of register 0x1c to select a shadow
77 * register to access.
78 */
79#define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */
80 /* LED3 / ~LINKSPD[2] selector */
81#define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4)
82 /* LED1 / ~LINKSPD[1] selector */
83#define BCM5482_SHD_LEDS1_LED1(src) ((src & 0xf) << 0)
84#define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */
85#define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */
86#define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */
87#define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */
88#define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */
89
90/*
91 * BCM5482: Secondary SerDes registers
92 */
93#define BCM5482_SSD_1000BX_CTL 0x00 /* 1000BASE-X Control */
94#define BCM5482_SSD_1000BX_CTL_PWRDOWN 0x0800 /* Power-down SSD */
95#define BCM5482_SSD_SGMII_SLAVE 0x15 /* SGMII Slave Register */
96#define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */
97#define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */
98
99/*
100 * Device flags for PHYs that can be configured for different operating
101 * modes.
102 */
103#define PHY_BCM_FLAGS_VALID 0x80000000
104#define PHY_BCM_FLAGS_INTF_XAUI 0x00000020
105#define PHY_BCM_FLAGS_INTF_SGMII 0x00000010
106#define PHY_BCM_FLAGS_MODE_1000BX 0x00000002
107#define PHY_BCM_FLAGS_MODE_COPPER 0x00000001
108
45MODULE_DESCRIPTION("Broadcom PHY driver"); 109MODULE_DESCRIPTION("Broadcom PHY driver");
46MODULE_AUTHOR("Maciej W. Rozycki"); 110MODULE_AUTHOR("Maciej W. Rozycki");
47MODULE_LICENSE("GPL"); 111MODULE_LICENSE("GPL");
48 112
113/*
114 * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T
115 * 0x1c shadow registers.
116 */
117static int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow)
118{
119 phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
120 return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
121}
122
123static int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, u16 val)
124{
125 return phy_write(phydev, MII_BCM54XX_SHD,
126 MII_BCM54XX_SHD_WRITE |
127 MII_BCM54XX_SHD_VAL(shadow) |
128 MII_BCM54XX_SHD_DATA(val));
129}
130
131/*
132 * Indirect register access functions for the Expansion Registers
133 * and Secondary SerDes registers (when sec_serdes=1).
134 */
135static int bcm54xx_exp_read(struct phy_device *phydev,
136 int sec_serdes, u8 regnum)
137{
138 int val;
139
140 phy_write(phydev, MII_BCM54XX_EXP_SEL,
141 (sec_serdes ? MII_BCM54XX_EXP_SEL_SSD :
142 MII_BCM54XX_EXP_SEL_ER) |
143 regnum);
144 val = phy_read(phydev, MII_BCM54XX_EXP_DATA);
145 phy_write(phydev, MII_BCM54XX_EXP_SEL, regnum);
146
147 return val;
148}
149
150static int bcm54xx_exp_write(struct phy_device *phydev,
151 int sec_serdes, u8 regnum, u16 val)
152{
153 int ret;
154
155 phy_write(phydev, MII_BCM54XX_EXP_SEL,
156 (sec_serdes ? MII_BCM54XX_EXP_SEL_SSD :
157 MII_BCM54XX_EXP_SEL_ER) |
158 regnum);
159 ret = phy_write(phydev, MII_BCM54XX_EXP_DATA, val);
160 phy_write(phydev, MII_BCM54XX_EXP_SEL, regnum);
161
162 return ret;
163}
164
49static int bcm54xx_config_init(struct phy_device *phydev) 165static int bcm54xx_config_init(struct phy_device *phydev)
50{ 166{
51 int reg, err; 167 int reg, err;
@@ -70,6 +186,87 @@ static int bcm54xx_config_init(struct phy_device *phydev)
70 return 0; 186 return 0;
71} 187}
72 188
189static int bcm5482_config_init(struct phy_device *phydev)
190{
191 int err, reg;
192
193 err = bcm54xx_config_init(phydev);
194
195 if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) {
196 /*
197 * Enable secondary SerDes and its use as an LED source
198 */
199 reg = bcm54xx_shadow_read(phydev, BCM5482_SHD_SSD);
200 bcm54xx_shadow_write(phydev, BCM5482_SHD_SSD,
201 reg |
202 BCM5482_SHD_SSD_LEDM |
203 BCM5482_SHD_SSD_EN);
204
205 /*
206 * Enable SGMII slave mode and auto-detection
207 */
208 reg = bcm54xx_exp_read(phydev, 1, BCM5482_SSD_SGMII_SLAVE);
209 bcm54xx_exp_write(phydev, 1, BCM5482_SSD_SGMII_SLAVE,
210 reg |
211 BCM5482_SSD_SGMII_SLAVE_EN |
212 BCM5482_SSD_SGMII_SLAVE_AD);
213
214 /*
215 * Disable secondary SerDes powerdown
216 */
217 reg = bcm54xx_exp_read(phydev, 1, BCM5482_SSD_1000BX_CTL);
218 bcm54xx_exp_write(phydev, 1, BCM5482_SSD_1000BX_CTL,
219 reg & ~BCM5482_SSD_1000BX_CTL_PWRDOWN);
220
221 /*
222 * Select 1000BASE-X register set (primary SerDes)
223 */
224 reg = bcm54xx_shadow_read(phydev, BCM5482_SHD_MODE);
225 bcm54xx_shadow_write(phydev, BCM5482_SHD_MODE,
226 reg | BCM5482_SHD_MODE_1000BX);
227
228 /*
229 * LED1=ACTIVITYLED, LED3=LINKSPD[2]
230 * (Use LED1 as secondary SerDes ACTIVITY LED)
231 */
232 bcm54xx_shadow_write(phydev, BCM5482_SHD_LEDS1,
233 BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_ACTIVITYLED) |
234 BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_LINKSPD2));
235
236 /*
237 * Auto-negotiation doesn't seem to work quite right
238 * in this mode, so we disable it and force it to the
239 * right speed/duplex setting. Only 'link status'
240 * is important.
241 */
242 phydev->autoneg = AUTONEG_DISABLE;
243 phydev->speed = SPEED_1000;
244 phydev->duplex = DUPLEX_FULL;
245 }
246
247 return err;
248}
249
250static int bcm5482_read_status(struct phy_device *phydev)
251{
252 int err;
253
254 err = genphy_read_status(phydev);
255
256 if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) {
257 /*
258 * Only link status matters for 1000Base-X mode, so force
259 * 1000 Mbit/s full-duplex status
260 */
261 if (phydev->link) {
262 phydev->speed = SPEED_1000;
263 phydev->duplex = DUPLEX_FULL;
264 }
265 }
266
267 return err;
268}
269
73static int bcm54xx_ack_interrupt(struct phy_device *phydev) 270static int bcm54xx_ack_interrupt(struct phy_device *phydev)
74{ 271{
75 int reg; 272 int reg;
@@ -210,9 +407,9 @@ static struct phy_driver bcm5482_driver = {
210 .name = "Broadcom BCM5482", 407 .name = "Broadcom BCM5482",
211 .features = PHY_GBIT_FEATURES, 408 .features = PHY_GBIT_FEATURES,
212 .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, 409 .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
213 .config_init = bcm54xx_config_init, 410 .config_init = bcm5482_config_init,
214 .config_aneg = genphy_config_aneg, 411 .config_aneg = genphy_config_aneg,
215 .read_status = genphy_read_status, 412 .read_status = bcm5482_read_status,
216 .ack_interrupt = bcm54xx_ack_interrupt, 413 .ack_interrupt = bcm54xx_ack_interrupt,
217 .config_intr = bcm54xx_config_intr, 414 .config_intr = bcm54xx_config_intr,
218 .driver = { .owner = THIS_MODULE }, 415 .driver = { .owner = THIS_MODULE },
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 20a8e3996407..d9f248f23b97 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -2013,7 +2013,7 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state)
2013 "requested.\n", 2013 "requested.\n",
2014 tp->dev->name, state); 2014 tp->dev->name, state);
2015 return -EINVAL; 2015 return -EINVAL;
2016 }; 2016 }
2017 2017
2018 power_control |= PCI_PM_CTRL_PME_ENABLE; 2018 power_control |= PCI_PM_CTRL_PME_ENABLE;
2019 2019
@@ -2272,7 +2272,7 @@ static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8
2272 *speed = SPEED_INVALID; 2272 *speed = SPEED_INVALID;
2273 *duplex = DUPLEX_INVALID; 2273 *duplex = DUPLEX_INVALID;
2274 break; 2274 break;
2275 }; 2275 }
2276} 2276}
2277 2277
2278static void tg3_phy_copper_begin(struct tg3 *tp) 2278static void tg3_phy_copper_begin(struct tg3 *tp)
@@ -2384,7 +2384,7 @@ static void tg3_phy_copper_begin(struct tg3 *tp)
2384 case SPEED_1000: 2384 case SPEED_1000:
2385 bmcr |= TG3_BMCR_SPEED1000; 2385 bmcr |= TG3_BMCR_SPEED1000;
2386 break; 2386 break;
2387 }; 2387 }
2388 2388
2389 if (tp->link_config.duplex == DUPLEX_FULL) 2389 if (tp->link_config.duplex == DUPLEX_FULL)
2390 bmcr |= BMCR_FULLDPLX; 2390 bmcr |= BMCR_FULLDPLX;
@@ -3082,7 +3082,7 @@ static int tg3_fiber_aneg_smachine(struct tg3 *tp,
3082 default: 3082 default:
3083 ret = ANEG_FAILED; 3083 ret = ANEG_FAILED;
3084 break; 3084 break;
3085 }; 3085 }
3086 3086
3087 return ret; 3087 return ret;
3088} 3088}
@@ -3924,7 +3924,7 @@ static int tg3_alloc_rx_skb(struct tg3 *tp, u32 opaque_key,
3924 3924
3925 default: 3925 default:
3926 return -EINVAL; 3926 return -EINVAL;
3927 }; 3927 }
3928 3928
3929 /* Do not overwrite any of the map or rp information 3929 /* Do not overwrite any of the map or rp information
3930 * until we are sure we can commit to a new buffer. 3930 * until we are sure we can commit to a new buffer.
@@ -3984,7 +3984,7 @@ static void tg3_recycle_rx(struct tg3 *tp, u32 opaque_key,
3984 3984
3985 default: 3985 default:
3986 return; 3986 return;
3987 }; 3987 }
3988 3988
3989 dest_map->skb = src_map->skb; 3989 dest_map->skb = src_map->skb;
3990 pci_unmap_addr_set(dest_map, mapping, 3990 pci_unmap_addr_set(dest_map, mapping,
@@ -5347,7 +5347,7 @@ static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit, int
5347 5347
5348 default: 5348 default:
5349 break; 5349 break;
5350 }; 5350 }
5351 } 5351 }
5352 5352
5353 val = tr32(ofs); 5353 val = tr32(ofs);
@@ -5589,7 +5589,7 @@ static void tg3_write_sig_pre_reset(struct tg3 *tp, int kind)
5589 5589
5590 default: 5590 default:
5591 break; 5591 break;
5592 }; 5592 }
5593 } 5593 }
5594 5594
5595 if (kind == RESET_KIND_INIT || 5595 if (kind == RESET_KIND_INIT ||
@@ -5614,7 +5614,7 @@ static void tg3_write_sig_post_reset(struct tg3 *tp, int kind)
5614 5614
5615 default: 5615 default:
5616 break; 5616 break;
5617 }; 5617 }
5618 } 5618 }
5619 5619
5620 if (kind == RESET_KIND_SHUTDOWN) 5620 if (kind == RESET_KIND_SHUTDOWN)
@@ -5643,7 +5643,7 @@ static void tg3_write_sig_legacy(struct tg3 *tp, int kind)
5643 5643
5644 default: 5644 default:
5645 break; 5645 break;
5646 }; 5646 }
5647 } 5647 }
5648} 5648}
5649 5649
@@ -7677,7 +7677,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
7677 7677
7678 default: 7678 default:
7679 break; 7679 break;
7680 }; 7680 }
7681 7681
7682 if (tp->tg3_flags3 & TG3_FLG3_ENABLE_APE) 7682 if (tp->tg3_flags3 & TG3_FLG3_ENABLE_APE)
7683 /* Write our heartbeat update interval to APE. */ 7683 /* Write our heartbeat update interval to APE. */
@@ -11379,7 +11379,7 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp)
11379 LED_CTRL_MODE_PHY_2); 11379 LED_CTRL_MODE_PHY_2);
11380 break; 11380 break;
11381 11381
11382 }; 11382 }
11383 11383
11384 if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 || 11384 if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
11385 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701) && 11385 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701) &&
@@ -12690,7 +12690,7 @@ static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val)
12690 val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX | 12690 val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX |
12691 DMA_RWCTRL_WRITE_BNDRY_384_PCIX); 12691 DMA_RWCTRL_WRITE_BNDRY_384_PCIX);
12692 break; 12692 break;
12693 }; 12693 }
12694 } else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) { 12694 } else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
12695 switch (cacheline_size) { 12695 switch (cacheline_size) {
12696 case 16: 12696 case 16:
@@ -12707,7 +12707,7 @@ static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val)
12707 val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE; 12707 val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE;
12708 val |= DMA_RWCTRL_WRITE_BNDRY_128_PCIE; 12708 val |= DMA_RWCTRL_WRITE_BNDRY_128_PCIE;
12709 break; 12709 break;
12710 }; 12710 }
12711 } else { 12711 } else {
12712 switch (cacheline_size) { 12712 switch (cacheline_size) {
12713 case 16: 12713 case 16:
@@ -12751,7 +12751,7 @@ static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val)
12751 val |= (DMA_RWCTRL_READ_BNDRY_1024 | 12751 val |= (DMA_RWCTRL_READ_BNDRY_1024 |
12752 DMA_RWCTRL_WRITE_BNDRY_1024); 12752 DMA_RWCTRL_WRITE_BNDRY_1024);
12753 break; 12753 break;
12754 }; 12754 }
12755 } 12755 }
12756 12756
12757out: 12757out:
@@ -13111,7 +13111,7 @@ static char * __devinit tg3_phy_string(struct tg3 *tp)
13111 case PHY_ID_BCM8002: return "8002/serdes"; 13111 case PHY_ID_BCM8002: return "8002/serdes";
13112 case 0: return "serdes"; 13112 case 0: return "serdes";
13113 default: return "unknown"; 13113 default: return "unknown";
13114 }; 13114 }
13115} 13115}
13116 13116
13117static char * __devinit tg3_bus_string(struct tg3 *tp, char *str) 13117static char * __devinit tg3_bus_string(struct tg3 *tp, char *str)
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 76752d84a30f..22c17bbacb69 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -423,7 +423,10 @@ static int catc_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
423 423
424 catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6; 424 catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6;
425 tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr; 425 tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr;
426 *((u16*)tx_buf) = (catc->is_f5u011) ? cpu_to_be16((u16)skb->len) : cpu_to_le16((u16)skb->len); 426 if (catc->is_f5u011)
427 *(__be16 *)tx_buf = cpu_to_be16(skb->len);
428 else
429 *(__le16 *)tx_buf = cpu_to_le16(skb->len);
427 skb_copy_from_linear_data(skb, tx_buf + 2, skb->len); 430 skb_copy_from_linear_data(skb, tx_buf + 2, skb->len);
428 catc->tx_ptr += skb->len + 2; 431 catc->tx_ptr += skb->len + 2;
429 432
diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index e1177cca8a76..ae467f182c40 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -283,8 +283,8 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
283 struct rndis_set_c *set_c; 283 struct rndis_set_c *set_c;
284 struct rndis_halt *halt; 284 struct rndis_halt *halt;
285 } u; 285 } u;
286 u32 tmp, phym_unspec; 286 u32 tmp;
287 __le32 *phym; 287 __le32 phym_unspec, *phym;
288 int reply_len; 288 int reply_len;
289 unsigned char *bp; 289 unsigned char *bp;
290 290
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 6b8d882d197b..bcbf2fa9b94a 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1495,24 +1495,18 @@ static inline void velocity_rx_csum(struct rx_desc *rd, struct sk_buff *skb)
1495 * enough. This function returns a negative value if the received 1495 * enough. This function returns a negative value if the received
1496 * packet is too big or if memory is exhausted. 1496 * packet is too big or if memory is exhausted.
1497 */ 1497 */
1498static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size, 1498static int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size,
1499 struct velocity_info *vptr) 1499 struct velocity_info *vptr)
1500{ 1500{
1501 int ret = -1; 1501 int ret = -1;
1502
1503 if (pkt_size < rx_copybreak) { 1502 if (pkt_size < rx_copybreak) {
1504 struct sk_buff *new_skb; 1503 struct sk_buff *new_skb;
1505 1504
1506 new_skb = dev_alloc_skb(pkt_size + 2); 1505 new_skb = netdev_alloc_skb(vptr->dev, pkt_size + 2);
1507 if (new_skb) { 1506 if (new_skb) {
1508 new_skb->dev = vptr->dev;
1509 new_skb->ip_summed = rx_skb[0]->ip_summed; 1507 new_skb->ip_summed = rx_skb[0]->ip_summed;
1510 1508 skb_reserve(new_skb, 2);
1511 if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN) 1509 skb_copy_from_linear_data(*rx_skb, new_skb->data, pkt_size);
1512 skb_reserve(new_skb, 2);
1513
1514 skb_copy_from_linear_data(rx_skb[0], new_skb->data,
1515 pkt_size);
1516 *rx_skb = new_skb; 1510 *rx_skb = new_skb;
1517 ret = 0; 1511 ret = 0;
1518 } 1512 }
@@ -1533,12 +1527,8 @@ static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size,
1533static inline void velocity_iph_realign(struct velocity_info *vptr, 1527static inline void velocity_iph_realign(struct velocity_info *vptr,
1534 struct sk_buff *skb, int pkt_size) 1528 struct sk_buff *skb, int pkt_size)
1535{ 1529{
1536 /* FIXME - memmove ? */
1537 if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN) { 1530 if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN) {
1538 int i; 1531 memmove(skb->data + 2, skb->data, pkt_size);
1539
1540 for (i = pkt_size; i >= 0; i--)
1541 *(skb->data + i + 2) = *(skb->data + i);
1542 skb_reserve(skb, 2); 1532 skb_reserve(skb, 2);
1543 } 1533 }
1544} 1534}
@@ -1629,7 +1619,7 @@ static int velocity_alloc_rx_buf(struct velocity_info *vptr, int idx)
1629 struct rx_desc *rd = &(vptr->rd_ring[idx]); 1619 struct rx_desc *rd = &(vptr->rd_ring[idx]);
1630 struct velocity_rd_info *rd_info = &(vptr->rd_info[idx]); 1620 struct velocity_rd_info *rd_info = &(vptr->rd_info[idx]);
1631 1621
1632 rd_info->skb = dev_alloc_skb(vptr->rx_buf_sz + 64); 1622 rd_info->skb = netdev_alloc_skb(vptr->dev, vptr->rx_buf_sz + 64);
1633 if (rd_info->skb == NULL) 1623 if (rd_info->skb == NULL)
1634 return -ENOMEM; 1624 return -ENOMEM;
1635 1625
@@ -1638,7 +1628,6 @@ static int velocity_alloc_rx_buf(struct velocity_info *vptr, int idx)
1638 * 64byte alignment. 1628 * 64byte alignment.
1639 */ 1629 */
1640 skb_reserve(rd_info->skb, (unsigned long) rd_info->skb->data & 63); 1630 skb_reserve(rd_info->skb, (unsigned long) rd_info->skb->data & 63);
1641 rd_info->skb->dev = vptr->dev;
1642 rd_info->skb_dma = pci_map_single(vptr->pdev, rd_info->skb->data, vptr->rx_buf_sz, PCI_DMA_FROMDEVICE); 1631 rd_info->skb_dma = pci_map_single(vptr->pdev, rd_info->skb->data, vptr->rx_buf_sz, PCI_DMA_FROMDEVICE);
1643 1632
1644 /* 1633 /*
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 0c736735e217..c99d4a4fde05 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -807,7 +807,7 @@ void zd_process_intr(struct work_struct *work)
807 u16 int_status; 807 u16 int_status;
808 struct zd_mac *mac = container_of(work, struct zd_mac, process_intr); 808 struct zd_mac *mac = container_of(work, struct zd_mac, process_intr);
809 809
810 int_status = le16_to_cpu(*(u16 *)(mac->intr_buffer+4)); 810 int_status = le16_to_cpu(*(__le16 *)(mac->intr_buffer+4));
811 if (int_status & INT_CFG_NEXT_BCN) { 811 if (int_status & INT_CFG_NEXT_BCN) {
812 if (net_ratelimit()) 812 if (net_ratelimit())
813 dev_dbg_f(zd_mac_dev(mac), "INT_CFG_NEXT_BCN\n"); 813 dev_dbg_f(zd_mac_dev(mac), "INT_CFG_NEXT_BCN\n");
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index 12e24f04dddf..8941f5eb96c2 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -342,7 +342,7 @@ static inline void handle_regs_int(struct urb *urb)
342 ZD_ASSERT(in_interrupt()); 342 ZD_ASSERT(in_interrupt());
343 spin_lock(&intr->lock); 343 spin_lock(&intr->lock);
344 344
345 int_num = le16_to_cpu(*(u16 *)(urb->transfer_buffer+2)); 345 int_num = le16_to_cpu(*(__le16 *)(urb->transfer_buffer+2));
346 if (int_num == CR_INTERRUPT) { 346 if (int_num == CR_INTERRUPT) {
347 struct zd_mac *mac = zd_hw_mac(zd_usb_to_hw(urb->context)); 347 struct zd_mac *mac = zd_hw_mac(zd_usb_to_hw(urb->context));
348 memcpy(&mac->intr_buffer, urb->transfer_buffer, 348 memcpy(&mac->intr_buffer, urb->transfer_buffer,