aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/bonding.txt323
-rw-r--r--drivers/net/e100.c75
-rw-r--r--drivers/net/e1000/e1000_main.c114
3 files changed, 417 insertions, 95 deletions
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 8d8b4e5ea184..afac780445cd 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -1,7 +1,7 @@
1 1
2 Linux Ethernet Bonding Driver HOWTO 2 Linux Ethernet Bonding Driver HOWTO
3 3
4 Latest update: 21 June 2005 4 Latest update: 24 April 2006
5 5
6Initial release : Thomas Davis <tadavis at lbl.gov> 6Initial release : Thomas Davis <tadavis at lbl.gov>
7Corrections, HA extensions : 2000/10/03-15 : 7Corrections, HA extensions : 2000/10/03-15 :
@@ -12,6 +12,8 @@ Corrections, HA extensions : 2000/10/03-15 :
12 - Jay Vosburgh <fubar at us dot ibm dot com> 12 - Jay Vosburgh <fubar at us dot ibm dot com>
13 13
14Reorganized and updated Feb 2005 by Jay Vosburgh 14Reorganized and updated Feb 2005 by Jay Vosburgh
15Added Sysfs information: 2006/04/24
16 - Mitch Williams <mitch.a.williams at intel.com>
15 17
16Introduction 18Introduction
17============ 19============
@@ -38,61 +40,62 @@ Table of Contents
382. Bonding Driver Options 402. Bonding Driver Options
39 41
403. Configuring Bonding Devices 423. Configuring Bonding Devices
413.1 Configuration with sysconfig support 433.1 Configuration with Sysconfig Support
423.1.1 Using DHCP with sysconfig 443.1.1 Using DHCP with Sysconfig
433.1.2 Configuring Multiple Bonds with sysconfig 453.1.2 Configuring Multiple Bonds with Sysconfig
443.2 Configuration with initscripts support 463.2 Configuration with Initscripts Support
453.2.1 Using DHCP with initscripts 473.2.1 Using DHCP with Initscripts
463.2.2 Configuring Multiple Bonds with initscripts 483.2.2 Configuring Multiple Bonds with Initscripts
473.3 Configuring Bonding Manually 493.3 Configuring Bonding Manually with Ifenslave
483.3.1 Configuring Multiple Bonds Manually 503.3.1 Configuring Multiple Bonds Manually
513.4 Configuring Bonding Manually via Sysfs
49 52
505. Querying Bonding Configuration 534. Querying Bonding Configuration
515.1 Bonding Configuration 544.1 Bonding Configuration
525.2 Network Configuration 554.2 Network Configuration
53 56
546. Switch Configuration 575. Switch Configuration
55 58
567. 802.1q VLAN Support 596. 802.1q VLAN Support
57 60
588. Link Monitoring 617. Link Monitoring
598.1 ARP Monitor Operation 627.1 ARP Monitor Operation
608.2 Configuring Multiple ARP Targets 637.2 Configuring Multiple ARP Targets
618.3 MII Monitor Operation 647.3 MII Monitor Operation
62 65
639. Potential Trouble Sources 668. Potential Trouble Sources
649.1 Adventures in Routing 678.1 Adventures in Routing
659.2 Ethernet Device Renaming 688.2 Ethernet Device Renaming
669.3 Painfully Slow Or No Failed Link Detection By Miimon 698.3 Painfully Slow Or No Failed Link Detection By Miimon
67 70
6810. SNMP agents 719. SNMP agents
69 72
7011. Promiscuous mode 7310. Promiscuous mode
71 74
7212. Configuring Bonding for High Availability 7511. Configuring Bonding for High Availability
7312.1 High Availability in a Single Switch Topology 7611.1 High Availability in a Single Switch Topology
7412.2 High Availability in a Multiple Switch Topology 7711.2 High Availability in a Multiple Switch Topology
7512.2.1 HA Bonding Mode Selection for Multiple Switch Topology 7811.2.1 HA Bonding Mode Selection for Multiple Switch Topology
7612.2.2 HA Link Monitoring for Multiple Switch Topology 7911.2.2 HA Link Monitoring for Multiple Switch Topology
77 80
7813. Configuring Bonding for Maximum Throughput 8112. Configuring Bonding for Maximum Throughput
7913.1 Maximum Throughput in a Single Switch Topology 8212.1 Maximum Throughput in a Single Switch Topology
8013.1.1 MT Bonding Mode Selection for Single Switch Topology 8312.1.1 MT Bonding Mode Selection for Single Switch Topology
8113.1.2 MT Link Monitoring for Single Switch Topology 8412.1.2 MT Link Monitoring for Single Switch Topology
8213.2 Maximum Throughput in a Multiple Switch Topology 8512.2 Maximum Throughput in a Multiple Switch Topology
8313.2.1 MT Bonding Mode Selection for Multiple Switch Topology 8612.2.1 MT Bonding Mode Selection for Multiple Switch Topology
8413.2.2 MT Link Monitoring for Multiple Switch Topology 8712.2.2 MT Link Monitoring for Multiple Switch Topology
85 88
8614. Switch Behavior Issues 8913. Switch Behavior Issues
8714.1 Link Establishment and Failover Delays 9013.1 Link Establishment and Failover Delays
8814.2 Duplicated Incoming Packets 9113.2 Duplicated Incoming Packets
89 92
9015. Hardware Specific Considerations 9314. Hardware Specific Considerations
9115.1 IBM BladeCenter 9414.1 IBM BladeCenter
92 95
9316. Frequently Asked Questions 9615. Frequently Asked Questions
94 97
9517. Resources and Links 9816. Resources and Links
96 99
97 100
981. Bonding Driver Installation 1011. Bonding Driver Installation
@@ -156,6 +159,9 @@ you're trying to build it for. Some distros (e.g., Red Hat from 7.1
156onwards) do not have /usr/include/linux symbolically linked to the 159onwards) do not have /usr/include/linux symbolically linked to the
157default kernel source include directory. 160default kernel source include directory.
158 161
162SECOND IMPORTANT NOTE:
163 If you plan to configure bonding using sysfs, you do not need
164to use ifenslave.
159 165
1602. Bonding Driver Options 1662. Bonding Driver Options
161========================= 167=========================
@@ -270,7 +276,7 @@ mode
270 In bonding version 2.6.2 or later, when a failover 276 In bonding version 2.6.2 or later, when a failover
271 occurs in active-backup mode, bonding will issue one 277 occurs in active-backup mode, bonding will issue one
272 or more gratuitous ARPs on the newly active slave. 278 or more gratuitous ARPs on the newly active slave.
273 One gratutious ARP is issued for the bonding master 279 One gratuitous ARP is issued for the bonding master
274 interface and each VLAN interfaces configured above 280 interface and each VLAN interfaces configured above
275 it, provided that the interface has at least one IP 281 it, provided that the interface has at least one IP
276 address configured. Gratuitous ARPs issued for VLAN 282 address configured. Gratuitous ARPs issued for VLAN
@@ -377,7 +383,7 @@ mode
377 When a link is reconnected or a new slave joins the 383 When a link is reconnected or a new slave joins the
378 bond the receive traffic is redistributed among all 384 bond the receive traffic is redistributed among all
379 active slaves in the bond by initiating ARP Replies 385 active slaves in the bond by initiating ARP Replies
380 with the selected mac address to each of the 386 with the selected MAC address to each of the
381 clients. The updelay parameter (detailed below) must 387 clients. The updelay parameter (detailed below) must
382 be set to a value equal or greater than the switch's 388 be set to a value equal or greater than the switch's
383 forwarding delay so that the ARP Replies sent to the 389 forwarding delay so that the ARP Replies sent to the
@@ -498,11 +504,12 @@ not exist, and the layer2 policy is the only policy.
4983. Configuring Bonding Devices 5043. Configuring Bonding Devices
499============================== 505==============================
500 506
501 There are, essentially, two methods for configuring bonding: 507 You can configure bonding using either your distro's network
502with support from the distro's network initialization scripts, and 508initialization scripts, or manually using either ifenslave or the
503without. Distros generally use one of two packages for the network 509sysfs interface. Distros generally use one of two packages for the
504initialization scripts: initscripts or sysconfig. Recent versions of 510network initialization scripts: initscripts or sysconfig. Recent
505these packages have support for bonding, while older versions do not. 511versions of these packages have support for bonding, while older
512versions do not.
506 513
507 We will first describe the options for configuring bonding for 514 We will first describe the options for configuring bonding for
508distros using versions of initscripts and sysconfig with full or 515distros using versions of initscripts and sysconfig with full or
@@ -530,7 +537,7 @@ $ grep ifenslave /sbin/ifup
530 If this returns any matches, then your initscripts or 537 If this returns any matches, then your initscripts or
531sysconfig has support for bonding. 538sysconfig has support for bonding.
532 539
5333.1 Configuration with sysconfig support 5403.1 Configuration with Sysconfig Support
534---------------------------------------- 541----------------------------------------
535 542
536 This section applies to distros using a version of sysconfig 543 This section applies to distros using a version of sysconfig
@@ -538,7 +545,7 @@ with bonding support, for example, SuSE Linux Enterprise Server 9.
538 545
539 SuSE SLES 9's networking configuration system does support 546 SuSE SLES 9's networking configuration system does support
540bonding, however, at this writing, the YaST system configuration 547bonding, however, at this writing, the YaST system configuration
541frontend does not provide any means to work with bonding devices. 548front end does not provide any means to work with bonding devices.
542Bonding devices can be managed by hand, however, as follows. 549Bonding devices can be managed by hand, however, as follows.
543 550
544 First, if they have not already been configured, configure the 551 First, if they have not already been configured, configure the
@@ -660,7 +667,7 @@ format can be found in an example ifcfg template file:
660 Note that the template does not document the various BONDING_ 667 Note that the template does not document the various BONDING_
661settings described above, but does describe many of the other options. 668settings described above, but does describe many of the other options.
662 669
6633.1.1 Using DHCP with sysconfig 6703.1.1 Using DHCP with Sysconfig
664------------------------------- 671-------------------------------
665 672
666 Under sysconfig, configuring a device with BOOTPROTO='dhcp' 673 Under sysconfig, configuring a device with BOOTPROTO='dhcp'
@@ -670,7 +677,7 @@ attempt to obtain the device address from DHCP prior to adding any of
670the slave devices. Without active slaves, the DHCP requests are not 677the slave devices. Without active slaves, the DHCP requests are not
671sent to the network. 678sent to the network.
672 679
6733.1.2 Configuring Multiple Bonds with sysconfig 6803.1.2 Configuring Multiple Bonds with Sysconfig
674----------------------------------------------- 681-----------------------------------------------
675 682
676 The sysconfig network initialization system is capable of 683 The sysconfig network initialization system is capable of
@@ -685,7 +692,7 @@ ifcfg-bondX files.
685options in the ifcfg-bondX file, it is not necessary to add them to 692options in the ifcfg-bondX file, it is not necessary to add them to
686the system /etc/modules.conf or /etc/modprobe.conf configuration file. 693the system /etc/modules.conf or /etc/modprobe.conf configuration file.
687 694
6883.2 Configuration with initscripts support 6953.2 Configuration with Initscripts Support
689------------------------------------------ 696------------------------------------------
690 697
691 This section applies to distros using a version of initscripts 698 This section applies to distros using a version of initscripts
@@ -756,7 +763,7 @@ options for your configuration.
756will restart the networking subsystem and your bond link should be now 763will restart the networking subsystem and your bond link should be now
757up and running. 764up and running.
758 765
7593.2.1 Using DHCP with initscripts 7663.2.1 Using DHCP with Initscripts
760--------------------------------- 767---------------------------------
761 768
762 Recent versions of initscripts (the version supplied with 769 Recent versions of initscripts (the version supplied with
@@ -768,7 +775,7 @@ above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp"
768and add a line consisting of "TYPE=Bonding". Note that the TYPE value 775and add a line consisting of "TYPE=Bonding". Note that the TYPE value
769is case sensitive. 776is case sensitive.
770 777
7713.2.2 Configuring Multiple Bonds with initscripts 7783.2.2 Configuring Multiple Bonds with Initscripts
772------------------------------------------------- 779-------------------------------------------------
773 780
774 At this writing, the initscripts package does not directly 781 At this writing, the initscripts package does not directly
@@ -784,8 +791,8 @@ Fedora Core kernels, and has been seen on RHEL 4 as well. On kernels
784exhibiting this problem, it will be impossible to configure multiple 791exhibiting this problem, it will be impossible to configure multiple
785bonds with differing parameters. 792bonds with differing parameters.
786 793
7873.3 Configuring Bonding Manually 7943.3 Configuring Bonding Manually with Ifenslave
788-------------------------------- 795-----------------------------------------------
789 796
790 This section applies to distros whose network initialization 797 This section applies to distros whose network initialization
791scripts (the sysconfig or initscripts package) do not have specific 798scripts (the sysconfig or initscripts package) do not have specific
@@ -889,11 +896,139 @@ install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
889 This may be repeated any number of times, specifying a new and 896 This may be repeated any number of times, specifying a new and
890unique name in place of bond1 for each subsequent instance. 897unique name in place of bond1 for each subsequent instance.
891 898
8993.4 Configuring Bonding Manually via Sysfs
900------------------------------------------
901
902 Starting with version 3.0, Channel Bonding may be configured
903via the sysfs interface. This interface allows dynamic configuration
904of all bonds in the system without unloading the module. It also
905allows for adding and removing bonds at runtime. Ifenslave is no
906longer required, though it is still supported.
907
908 Use of the sysfs interface allows you to use multiple bonds
909with different configurations without having to reload the module.
910It also allows you to use multiple, differently configured bonds when
911bonding is compiled into the kernel.
912
913 You must have the sysfs filesystem mounted to configure
914bonding this way. The examples in this document assume that you
915are using the standard mount point for sysfs, e.g. /sys. If your
916sysfs filesystem is mounted elsewhere, you will need to adjust the
917example paths accordingly.
918
919Creating and Destroying Bonds
920-----------------------------
921To add a new bond foo:
922# echo +foo > /sys/class/net/bonding_masters
923
924To remove an existing bond bar:
925# echo -bar > /sys/class/net/bonding_masters
926
927To show all existing bonds:
928# cat /sys/class/net/bonding_masters
929
930NOTE: due to 4K size limitation of sysfs files, this list may be
931truncated if you have more than a few hundred bonds. This is unlikely
932to occur under normal operating conditions.
933
934Adding and Removing Slaves
935--------------------------
936 Interfaces may be enslaved to a bond using the file
937/sys/class/net/<bond>/bonding/slaves. The semantics for this file
938are the same as for the bonding_masters file.
939
940To enslave interface eth0 to bond bond0:
941# ifconfig bond0 up
942# echo +eth0 > /sys/class/net/bond0/bonding/slaves
943
944To free slave eth0 from bond bond0:
945# echo -eth0 > /sys/class/net/bond0/bonding/slaves
946
947 NOTE: The bond must be up before slaves can be added. All
948slaves are freed when the interface is brought down.
949
950 When an interface is enslaved to a bond, symlinks between the
951two are created in the sysfs filesystem. In this case, you would get
952/sys/class/net/bond0/slave_eth0 pointing to /sys/class/net/eth0, and
953/sys/class/net/eth0/master pointing to /sys/class/net/bond0.
954
955 This means that you can tell quickly whether or not an
956interface is enslaved by looking for the master symlink. Thus:
957# echo -eth0 > /sys/class/net/eth0/master/bonding/slaves
958will free eth0 from whatever bond it is enslaved to, regardless of
959the name of the bond interface.
960
961Changing a Bond's Configuration
962-------------------------------
963 Each bond may be configured individually by manipulating the
964files located in /sys/class/net/<bond name>/bonding
965
966 The names of these files correspond directly with the command-
967line parameters described elsewhere in in this file, and, with the
968exception of arp_ip_target, they accept the same values. To see the
969current setting, simply cat the appropriate file.
970
971 A few examples will be given here; for specific usage
972guidelines for each parameter, see the appropriate section in this
973document.
974
975To configure bond0 for balance-alb mode:
976# ifconfig bond0 down
977# echo 6 > /sys/class/net/bond0/bonding/mode
978 - or -
979# echo balance-alb > /sys/class/net/bond0/bonding/mode
980 NOTE: The bond interface must be down before the mode can be
981changed.
982
983To enable MII monitoring on bond0 with a 1 second interval:
984# echo 1000 > /sys/class/net/bond0/bonding/miimon
985 NOTE: If ARP monitoring is enabled, it will disabled when MII
986monitoring is enabled, and vice-versa.
987
988To add ARP targets:
989# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
990# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target
991 NOTE: up to 10 target addresses may be specified.
992
993To remove an ARP target:
994# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
995
996Example Configuration
997---------------------
998 We begin with the same example that is shown in section 3.3,
999executed with sysfs, and without using ifenslave.
1000
1001 To make a simple bond of two e100 devices (presumed to be eth0
1002and eth1), and have it persist across reboots, edit the appropriate
1003file (/etc/init.d/boot.local or /etc/rc.d/rc.local), and add the
1004following:
1005
1006modprobe bonding
1007modprobe e100
1008echo balance-alb > /sys/class/net/bond0/bonding/mode
1009ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
1010echo 100 > /sys/class/net/bond0/bonding/miimon
1011echo +eth0 > /sys/class/net/bond0/bonding/slaves
1012echo +eth1 > /sys/class/net/bond0/bonding/slaves
1013
1014 To add a second bond, with two e1000 interfaces in
1015active-backup mode, using ARP monitoring, add the following lines to
1016your init script:
1017
1018modprobe e1000
1019echo +bond1 > /sys/class/net/bonding_masters
1020echo active-backup > /sys/class/net/bond1/bonding/mode
1021ifconfig bond1 192.168.2.1 netmask 255.255.255.0 up
1022echo +192.168.2.100 /sys/class/net/bond1/bonding/arp_ip_target
1023echo 2000 > /sys/class/net/bond1/bonding/arp_interval
1024echo +eth2 > /sys/class/net/bond1/bonding/slaves
1025echo +eth3 > /sys/class/net/bond1/bonding/slaves
1026
892 1027
8935. Querying Bonding Configuration 10284. Querying Bonding Configuration
894================================= 1029=================================
895 1030
8965.1 Bonding Configuration 10314.1 Bonding Configuration
897------------------------- 1032-------------------------
898 1033
899 Each bonding device has a read-only file residing in the 1034 Each bonding device has a read-only file residing in the
@@ -923,7 +1058,7 @@ generally as follows:
923 The precise format and contents will change depending upon the 1058 The precise format and contents will change depending upon the
924bonding configuration, state, and version of the bonding driver. 1059bonding configuration, state, and version of the bonding driver.
925 1060
9265.2 Network configuration 10614.2 Network configuration
927------------------------- 1062-------------------------
928 1063
929 The network configuration can be inspected using the ifconfig 1064 The network configuration can be inspected using the ifconfig
@@ -958,7 +1093,7 @@ eth1 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
958 collisions:0 txqueuelen:100 1093 collisions:0 txqueuelen:100
959 Interrupt:9 Base address:0x1400 1094 Interrupt:9 Base address:0x1400
960 1095
9616. Switch Configuration 10965. Switch Configuration
962======================= 1097=======================
963 1098
964 For this section, "switch" refers to whatever system the 1099 For this section, "switch" refers to whatever system the
@@ -991,7 +1126,7 @@ transmit policy for an EtherChannel group; all three will interoperate
991with another EtherChannel group. 1126with another EtherChannel group.
992 1127
993 1128
9947. 802.1q VLAN Support 11296. 802.1q VLAN Support
995====================== 1130======================
996 1131
997 It is possible to configure VLAN devices over a bond interface 1132 It is possible to configure VLAN devices over a bond interface
@@ -1042,7 +1177,7 @@ underlying device -- i.e. the bonding interface -- to promiscuous
1042mode, which might not be what you want. 1177mode, which might not be what you want.
1043 1178
1044 1179
10458. Link Monitoring 11807. Link Monitoring
1046================== 1181==================
1047 1182
1048 The bonding driver at present supports two schemes for 1183 The bonding driver at present supports two schemes for
@@ -1053,7 +1188,7 @@ monitor.
1053bonding driver itself, it is not possible to enable both ARP and MII 1188bonding driver itself, it is not possible to enable both ARP and MII
1054monitoring simultaneously. 1189monitoring simultaneously.
1055 1190
10568.1 ARP Monitor Operation 11917.1 ARP Monitor Operation
1057------------------------- 1192-------------------------
1058 1193
1059 The ARP monitor operates as its name suggests: it sends ARP 1194 The ARP monitor operates as its name suggests: it sends ARP
@@ -1071,7 +1206,7 @@ those slaves will stay down. If networking monitoring (tcpdump, etc)
1071shows the ARP requests and replies on the network, then it may be that 1206shows the ARP requests and replies on the network, then it may be that
1072your device driver is not updating last_rx and trans_start. 1207your device driver is not updating last_rx and trans_start.
1073 1208
10748.2 Configuring Multiple ARP Targets 12097.2 Configuring Multiple ARP Targets
1075------------------------------------ 1210------------------------------------
1076 1211
1077 While ARP monitoring can be done with just one target, it can 1212 While ARP monitoring can be done with just one target, it can
@@ -1094,7 +1229,7 @@ alias bond0 bonding
1094options bond0 arp_interval=60 arp_ip_target=192.168.0.100 1229options bond0 arp_interval=60 arp_ip_target=192.168.0.100
1095 1230
1096 1231
10978.3 MII Monitor Operation 12327.3 MII Monitor Operation
1098------------------------- 1233-------------------------
1099 1234
1100 The MII monitor monitors only the carrier state of the local 1235 The MII monitor monitors only the carrier state of the local
@@ -1120,14 +1255,14 @@ does not support or had some error in processing both the MII register
1120and ethtool requests), then the MII monitor will assume the link is 1255and ethtool requests), then the MII monitor will assume the link is
1121up. 1256up.
1122 1257
11239. Potential Sources of Trouble 12588. Potential Sources of Trouble
1124=============================== 1259===============================
1125 1260
11269.1 Adventures in Routing 12618.1 Adventures in Routing
1127------------------------- 1262-------------------------
1128 1263
1129 When bonding is configured, it is important that the slave 1264 When bonding is configured, it is important that the slave
1130devices not have routes that supercede routes of the master (or, 1265devices not have routes that supersede routes of the master (or,
1131generally, not have routes at all). For example, suppose the bonding 1266generally, not have routes at all). For example, suppose the bonding
1132device bond0 has two slaves, eth0 and eth1, and the routing table is 1267device bond0 has two slaves, eth0 and eth1, and the routing table is
1133as follows: 1268as follows:
@@ -1154,11 +1289,11 @@ by the state of the routing table.
1154 1289
1155 The solution here is simply to insure that slaves do not have 1290 The solution here is simply to insure that slaves do not have
1156routes of their own, and if for some reason they must, those routes do 1291routes of their own, and if for some reason they must, those routes do
1157not supercede routes of their master. This should generally be the 1292not supersede routes of their master. This should generally be the
1158case, but unusual configurations or errant manual or automatic static 1293case, but unusual configurations or errant manual or automatic static
1159route additions may cause trouble. 1294route additions may cause trouble.
1160 1295
11619.2 Ethernet Device Renaming 12968.2 Ethernet Device Renaming
1162---------------------------- 1297----------------------------
1163 1298
1164 On systems with network configuration scripts that do not 1299 On systems with network configuration scripts that do not
@@ -1207,7 +1342,7 @@ modprobe with --ignore-install to cause the normal action to then take
1207place. Full documentation on this can be found in the modprobe.conf 1342place. Full documentation on this can be found in the modprobe.conf
1208and modprobe manual pages. 1343and modprobe manual pages.
1209 1344
12109.3. Painfully Slow Or No Failed Link Detection By Miimon 13458.3. Painfully Slow Or No Failed Link Detection By Miimon
1211--------------------------------------------------------- 1346---------------------------------------------------------
1212 1347
1213 By default, bonding enables the use_carrier option, which 1348 By default, bonding enables the use_carrier option, which
@@ -1235,7 +1370,7 @@ carrier state. It has no way to determine the state of devices on or
1235beyond other ports of a switch, or if a switch is refusing to pass 1370beyond other ports of a switch, or if a switch is refusing to pass
1236traffic while still maintaining carrier on. 1371traffic while still maintaining carrier on.
1237 1372
123810. SNMP agents 13739. SNMP agents
1239=============== 1374===============
1240 1375
1241 If running SNMP agents, the bonding driver should be loaded 1376 If running SNMP agents, the bonding driver should be loaded
@@ -1281,7 +1416,7 @@ ifDescr, the association between the IP address and IfIndex remains
1281and SNMP functions such as Interface_Scan_Next will report that 1416and SNMP functions such as Interface_Scan_Next will report that
1282association. 1417association.
1283 1418
128411. Promiscuous mode 141910. Promiscuous mode
1285==================== 1420====================
1286 1421
1287 When running network monitoring tools, e.g., tcpdump, it is 1422 When running network monitoring tools, e.g., tcpdump, it is
@@ -1308,7 +1443,7 @@ sending to peers that are unassigned or if the load is unbalanced.
1308the active slave changes (e.g., due to a link failure), the 1443the active slave changes (e.g., due to a link failure), the
1309promiscuous setting will be propagated to the new active slave. 1444promiscuous setting will be propagated to the new active slave.
1310 1445
131112. Configuring Bonding for High Availability 144611. Configuring Bonding for High Availability
1312============================================= 1447=============================================
1313 1448
1314 High Availability refers to configurations that provide 1449 High Availability refers to configurations that provide
@@ -1318,7 +1453,7 @@ goal is to provide the maximum availability of network connectivity
1318(i.e., the network always works), even though other configurations 1453(i.e., the network always works), even though other configurations
1319could provide higher throughput. 1454could provide higher throughput.
1320 1455
132112.1 High Availability in a Single Switch Topology 145611.1 High Availability in a Single Switch Topology
1322-------------------------------------------------- 1457--------------------------------------------------
1323 1458
1324 If two hosts (or a host and a single switch) are directly 1459 If two hosts (or a host and a single switch) are directly
@@ -1332,7 +1467,7 @@ the load will be rebalanced across the remaining devices.
1332 See Section 13, "Configuring Bonding for Maximum Throughput" 1467 See Section 13, "Configuring Bonding for Maximum Throughput"
1333for information on configuring bonding with one peer device. 1468for information on configuring bonding with one peer device.
1334 1469
133512.2 High Availability in a Multiple Switch Topology 147011.2 High Availability in a Multiple Switch Topology
1336---------------------------------------------------- 1471----------------------------------------------------
1337 1472
1338 With multiple switches, the configuration of bonding and the 1473 With multiple switches, the configuration of bonding and the
@@ -1359,7 +1494,7 @@ switches (ISL, or inter switch link), and multiple ports connecting to
1359the outside world ("port3" on each switch). There is no technical 1494the outside world ("port3" on each switch). There is no technical
1360reason that this could not be extended to a third switch. 1495reason that this could not be extended to a third switch.
1361 1496
136212.2.1 HA Bonding Mode Selection for Multiple Switch Topology 149711.2.1 HA Bonding Mode Selection for Multiple Switch Topology
1363------------------------------------------------------------- 1498-------------------------------------------------------------
1364 1499
1365 In a topology such as the example above, the active-backup and 1500 In a topology such as the example above, the active-backup and
@@ -1381,7 +1516,7 @@ broadcast: This mode is really a special purpose mode, and is suitable
1381 necessary for some specific one-way traffic to reach both 1516 necessary for some specific one-way traffic to reach both
1382 independent networks, then the broadcast mode may be suitable. 1517 independent networks, then the broadcast mode may be suitable.
1383 1518
138412.2.2 HA Link Monitoring Selection for Multiple Switch Topology 151911.2.2 HA Link Monitoring Selection for Multiple Switch Topology
1385---------------------------------------------------------------- 1520----------------------------------------------------------------
1386 1521
1387 The choice of link monitoring ultimately depends upon your 1522 The choice of link monitoring ultimately depends upon your
@@ -1402,10 +1537,10 @@ regardless of which switch is active, the ARP monitor has a suitable
1402target to query. 1537target to query.
1403 1538
1404 1539
140513. Configuring Bonding for Maximum Throughput 154012. Configuring Bonding for Maximum Throughput
1406============================================== 1541==============================================
1407 1542
140813.1 Maximizing Throughput in a Single Switch Topology 154312.1 Maximizing Throughput in a Single Switch Topology
1409------------------------------------------------------ 1544------------------------------------------------------
1410 1545
1411 In a single switch configuration, the best method to maximize 1546 In a single switch configuration, the best method to maximize
@@ -1476,7 +1611,7 @@ destination to make load balancing decisions. The behavior of each
1476mode is described below. 1611mode is described below.
1477 1612
1478 1613
147913.1.1 MT Bonding Mode Selection for Single Switch Topology 161412.1.1 MT Bonding Mode Selection for Single Switch Topology
1480----------------------------------------------------------- 1615-----------------------------------------------------------
1481 1616
1482 This configuration is the easiest to set up and to understand, 1617 This configuration is the easiest to set up and to understand,
@@ -1607,7 +1742,7 @@ balance-alb: This mode is everything that balance-tlb is, and more.
1607 device driver must support changing the hardware address while 1742 device driver must support changing the hardware address while
1608 the device is open. 1743 the device is open.
1609 1744
161013.1.2 MT Link Monitoring for Single Switch Topology 174512.1.2 MT Link Monitoring for Single Switch Topology
1611---------------------------------------------------- 1746----------------------------------------------------
1612 1747
1613 The choice of link monitoring may largely depend upon which 1748 The choice of link monitoring may largely depend upon which
@@ -1616,7 +1751,7 @@ support the use of the ARP monitor, and are thus restricted to using
1616the MII monitor (which does not provide as high a level of end to end 1751the MII monitor (which does not provide as high a level of end to end
1617assurance as the ARP monitor). 1752assurance as the ARP monitor).
1618 1753
161913.2 Maximum Throughput in a Multiple Switch Topology 175412.2 Maximum Throughput in a Multiple Switch Topology
1620----------------------------------------------------- 1755-----------------------------------------------------
1621 1756
1622 Multiple switches may be utilized to optimize for throughput 1757 Multiple switches may be utilized to optimize for throughput
@@ -1651,7 +1786,7 @@ a single 72 port switch.
1651can be equipped with an additional network device connected to an 1786can be equipped with an additional network device connected to an
1652external network; this host then additionally acts as a gateway. 1787external network; this host then additionally acts as a gateway.
1653 1788
165413.2.1 MT Bonding Mode Selection for Multiple Switch Topology 178912.2.1 MT Bonding Mode Selection for Multiple Switch Topology
1655------------------------------------------------------------- 1790-------------------------------------------------------------
1656 1791
1657 In actual practice, the bonding mode typically employed in 1792 In actual practice, the bonding mode typically employed in
@@ -1664,7 +1799,7 @@ packets has arrived). When employed in this fashion, the balance-rr
1664mode allows individual connections between two hosts to effectively 1799mode allows individual connections between two hosts to effectively
1665utilize greater than one interface's bandwidth. 1800utilize greater than one interface's bandwidth.
1666 1801
166713.2.2 MT Link Monitoring for Multiple Switch Topology 180212.2.2 MT Link Monitoring for Multiple Switch Topology
1668------------------------------------------------------ 1803------------------------------------------------------
1669 1804
1670 Again, in actual practice, the MII monitor is most often used 1805 Again, in actual practice, the MII monitor is most often used
@@ -1674,10 +1809,10 @@ advantages over the MII monitor are mitigated by the volume of probes
1674needed as the number of systems involved grows (remember that each 1809needed as the number of systems involved grows (remember that each
1675host in the network is configured with bonding). 1810host in the network is configured with bonding).
1676 1811
167714. Switch Behavior Issues 181213. Switch Behavior Issues
1678========================== 1813==========================
1679 1814
168014.1 Link Establishment and Failover Delays 181513.1 Link Establishment and Failover Delays
1681------------------------------------------- 1816-------------------------------------------
1682 1817
1683 Some switches exhibit undesirable behavior with regard to the 1818 Some switches exhibit undesirable behavior with regard to the
@@ -1712,7 +1847,7 @@ switches take a long time to go into backup mode, it may be desirable
1712to not activate a backup interface immediately after a link goes down. 1847to not activate a backup interface immediately after a link goes down.
1713Failover may be delayed via the downdelay bonding module option. 1848Failover may be delayed via the downdelay bonding module option.
1714 1849
171514.2 Duplicated Incoming Packets 185013.2 Duplicated Incoming Packets
1716-------------------------------- 1851--------------------------------
1717 1852
1718 It is not uncommon to observe a short burst of duplicated 1853 It is not uncommon to observe a short burst of duplicated
@@ -1751,14 +1886,14 @@ behavior, it can be induced by clearing the MAC forwarding table (on
1751most Cisco switches, the privileged command "clear mac address-table 1886most Cisco switches, the privileged command "clear mac address-table
1752dynamic" will accomplish this). 1887dynamic" will accomplish this).
1753 1888
175415. Hardware Specific Considerations 188914. Hardware Specific Considerations
1755==================================== 1890====================================
1756 1891
1757 This section contains additional information for configuring 1892 This section contains additional information for configuring
1758bonding on specific hardware platforms, or for interfacing bonding 1893bonding on specific hardware platforms, or for interfacing bonding
1759with particular switches or other devices. 1894with particular switches or other devices.
1760 1895
176115.1 IBM BladeCenter 189614.1 IBM BladeCenter
1762-------------------- 1897--------------------
1763 1898
1764 This applies to the JS20 and similar systems. 1899 This applies to the JS20 and similar systems.
@@ -1861,7 +1996,7 @@ bonding driver.
1861avoid fail-over delay issues when using bonding. 1996avoid fail-over delay issues when using bonding.
1862 1997
1863 1998
186416. Frequently Asked Questions 199915. Frequently Asked Questions
1865============================== 2000==============================
1866 2001
18671. Is it SMP safe? 20021. Is it SMP safe?
@@ -1925,7 +2060,7 @@ not have special switch requirements, but do need device drivers that
1925support specific features (described in the appropriate section under 2060support specific features (described in the appropriate section under
1926module parameters, above). 2061module parameters, above).
1927 2062
1928 In 802.3ad mode, it works with with systems that support IEEE 2063 In 802.3ad mode, it works with systems that support IEEE
1929802.3ad Dynamic Link Aggregation. Most managed and many unmanaged 2064802.3ad Dynamic Link Aggregation. Most managed and many unmanaged
1930switches currently available support 802.3ad. 2065switches currently available support 802.3ad.
1931 2066
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 31ac001f5517..f37170cc1a37 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -2780,6 +2780,80 @@ static void e100_shutdown(struct pci_dev *pdev)
2780 DPRINTK(PROBE,ERR, "Error enabling wake\n"); 2780 DPRINTK(PROBE,ERR, "Error enabling wake\n");
2781} 2781}
2782 2782
2783/* ------------------ PCI Error Recovery infrastructure -------------- */
2784/**
2785 * e100_io_error_detected - called when PCI error is detected.
2786 * @pdev: Pointer to PCI device
2787 * @state: The current pci conneection state
2788 */
2789static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
2790{
2791 struct net_device *netdev = pci_get_drvdata(pdev);
2792
2793 /* Similar to calling e100_down(), but avoids adpater I/O. */
2794 netdev->stop(netdev);
2795
2796 /* Detach; put netif into state similar to hotplug unplug. */
2797 netif_poll_enable(netdev);
2798 netif_device_detach(netdev);
2799
2800 /* Request a slot reset. */
2801 return PCI_ERS_RESULT_NEED_RESET;
2802}
2803
2804/**
2805 * e100_io_slot_reset - called after the pci bus has been reset.
2806 * @pdev: Pointer to PCI device
2807 *
2808 * Restart the card from scratch.
2809 */
2810static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
2811{
2812 struct net_device *netdev = pci_get_drvdata(pdev);
2813 struct nic *nic = netdev_priv(netdev);
2814
2815 if (pci_enable_device(pdev)) {
2816 printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");
2817 return PCI_ERS_RESULT_DISCONNECT;
2818 }
2819 pci_set_master(pdev);
2820
2821 /* Only one device per card can do a reset */
2822 if (0 != PCI_FUNC(pdev->devfn))
2823 return PCI_ERS_RESULT_RECOVERED;
2824 e100_hw_reset(nic);
2825 e100_phy_init(nic);
2826
2827 return PCI_ERS_RESULT_RECOVERED;
2828}
2829
2830/**
2831 * e100_io_resume - resume normal operations
2832 * @pdev: Pointer to PCI device
2833 *
2834 * Resume normal operations after an error recovery
2835 * sequence has been completed.
2836 */
2837static void e100_io_resume(struct pci_dev *pdev)
2838{
2839 struct net_device *netdev = pci_get_drvdata(pdev);
2840 struct nic *nic = netdev_priv(netdev);
2841
2842 /* ack any pending wake events, disable PME */
2843 pci_enable_wake(pdev, 0, 0);
2844
2845 netif_device_attach(netdev);
2846 if (netif_running(netdev)) {
2847 e100_open(netdev);
2848 mod_timer(&nic->watchdog, jiffies);
2849 }
2850}
2851
2852static struct pci_error_handlers e100_err_handler = {
2853 .error_detected = e100_io_error_detected,
2854 .slot_reset = e100_io_slot_reset,
2855 .resume = e100_io_resume,
2856};
2783 2857
2784static struct pci_driver e100_driver = { 2858static struct pci_driver e100_driver = {
2785 .name = DRV_NAME, 2859 .name = DRV_NAME,
@@ -2791,6 +2865,7 @@ static struct pci_driver e100_driver = {
2791 .resume = e100_resume, 2865 .resume = e100_resume,
2792#endif 2866#endif
2793 .shutdown = e100_shutdown, 2867 .shutdown = e100_shutdown,
2868 .err_handler = &e100_err_handler,
2794}; 2869};
2795 2870
2796static int __init e100_init_module(void) 2871static int __init e100_init_module(void)
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 115eff25d8c1..56c7492e3e91 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -189,6 +189,16 @@ static void e1000_shutdown(struct pci_dev *pdev);
189static void e1000_netpoll (struct net_device *netdev); 189static void e1000_netpoll (struct net_device *netdev);
190#endif 190#endif
191 191
192static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
193 pci_channel_state_t state);
194static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev);
195static void e1000_io_resume(struct pci_dev *pdev);
196
197static struct pci_error_handlers e1000_err_handler = {
198 .error_detected = e1000_io_error_detected,
199 .slot_reset = e1000_io_slot_reset,
200 .resume = e1000_io_resume,
201};
192 202
193static struct pci_driver e1000_driver = { 203static struct pci_driver e1000_driver = {
194 .name = e1000_driver_name, 204 .name = e1000_driver_name,
@@ -200,7 +210,8 @@ static struct pci_driver e1000_driver = {
200 .suspend = e1000_suspend, 210 .suspend = e1000_suspend,
201 .resume = e1000_resume, 211 .resume = e1000_resume,
202#endif 212#endif
203 .shutdown = e1000_shutdown 213 .shutdown = e1000_shutdown,
214 .err_handler = &e1000_err_handler
204}; 215};
205 216
206MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); 217MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
@@ -3039,6 +3050,10 @@ e1000_update_stats(struct e1000_adapter *adapter)
3039 3050
3040#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF 3051#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3041 3052
3053 /* Prevent stats update while adapter is being reset */
3054 if (adapter->link_speed == 0)
3055 return;
3056
3042 spin_lock_irqsave(&adapter->stats_lock, flags); 3057 spin_lock_irqsave(&adapter->stats_lock, flags);
3043 3058
3044 /* these counters are modified from e1000_adjust_tbi_stats, 3059 /* these counters are modified from e1000_adjust_tbi_stats,
@@ -4590,4 +4605,101 @@ e1000_netpoll(struct net_device *netdev)
4590} 4605}
4591#endif 4606#endif
4592 4607
4608/**
4609 * e1000_io_error_detected - called when PCI error is detected
4610 * @pdev: Pointer to PCI device
4611 * @state: The current pci conneection state
4612 *
4613 * This function is called after a PCI bus error affecting
4614 * this device has been detected.
4615 */
4616static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
4617{
4618 struct net_device *netdev = pci_get_drvdata(pdev);
4619 struct e1000_adapter *adapter = netdev->priv;
4620
4621 netif_device_detach(netdev);
4622
4623 if (netif_running(netdev))
4624 e1000_down(adapter);
4625
4626 /* Request a slot slot reset. */
4627 return PCI_ERS_RESULT_NEED_RESET;
4628}
4629
4630/**
4631 * e1000_io_slot_reset - called after the pci bus has been reset.
4632 * @pdev: Pointer to PCI device
4633 *
4634 * Restart the card from scratch, as if from a cold-boot. Implementation
4635 * resembles the first-half of the e1000_resume routine.
4636 */
4637static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
4638{
4639 struct net_device *netdev = pci_get_drvdata(pdev);
4640 struct e1000_adapter *adapter = netdev->priv;
4641
4642 if (pci_enable_device(pdev)) {
4643 printk(KERN_ERR "e1000: Cannot re-enable PCI device after reset.\n");
4644 return PCI_ERS_RESULT_DISCONNECT;
4645 }
4646 pci_set_master(pdev);
4647
4648 pci_enable_wake(pdev, 3, 0);
4649 pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
4650
4651 /* Perform card reset only on one instance of the card */
4652 if (PCI_FUNC (pdev->devfn) != 0)
4653 return PCI_ERS_RESULT_RECOVERED;
4654
4655 e1000_reset(adapter);
4656 E1000_WRITE_REG(&adapter->hw, WUS, ~0);
4657
4658 return PCI_ERS_RESULT_RECOVERED;
4659}
4660
4661/**
4662 * e1000_io_resume - called when traffic can start flowing again.
4663 * @pdev: Pointer to PCI device
4664 *
4665 * This callback is called when the error recovery driver tells us that
4666 * its OK to resume normal operation. Implementation resembles the
4667 * second-half of the e1000_resume routine.
4668 */
4669static void e1000_io_resume(struct pci_dev *pdev)
4670{
4671 struct net_device *netdev = pci_get_drvdata(pdev);
4672 struct e1000_adapter *adapter = netdev->priv;
4673 uint32_t manc, swsm;
4674
4675 if (netif_running(netdev)) {
4676 if (e1000_up(adapter)) {
4677 printk("e1000: can't bring device back up after reset\n");
4678 return;
4679 }
4680 }
4681
4682 netif_device_attach(netdev);
4683
4684 if (adapter->hw.mac_type >= e1000_82540 &&
4685 adapter->hw.media_type == e1000_media_type_copper) {
4686 manc = E1000_READ_REG(&adapter->hw, MANC);
4687 manc &= ~(E1000_MANC_ARP_EN);
4688 E1000_WRITE_REG(&adapter->hw, MANC, manc);
4689 }
4690
4691 switch (adapter->hw.mac_type) {
4692 case e1000_82573:
4693 swsm = E1000_READ_REG(&adapter->hw, SWSM);
4694 E1000_WRITE_REG(&adapter->hw, SWSM,
4695 swsm | E1000_SWSM_DRV_LOAD);
4696 break;
4697 default:
4698 break;
4699 }
4700
4701 if (netif_running(netdev))
4702 mod_timer(&adapter->watchdog_timer, jiffies);
4703}
4704
4593/* e1000_main.c */ 4705/* e1000_main.c */