diff options
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r-- | drivers/net/bonding/Makefile | 8 | ||||
-rw-r--r-- | drivers/net/bonding/bond_3ad.c | 2451 | ||||
-rw-r--r-- | drivers/net/bonding/bond_3ad.h | 300 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 1696 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.h | 141 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 4708 | ||||
-rw-r--r-- | drivers/net/bonding/bonding.h | 252 |
7 files changed, 9556 insertions, 0 deletions
diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile new file mode 100644 index 000000000000..cf50384b469e --- /dev/null +++ b/drivers/net/bonding/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | # | ||
2 | # Makefile for the Ethernet Bonding driver | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_BONDING) += bonding.o | ||
6 | |||
7 | bonding-objs := bond_main.o bond_3ad.o bond_alb.o | ||
8 | |||
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c new file mode 100644 index 000000000000..6233c4ffb805 --- /dev/null +++ b/drivers/net/bonding/bond_3ad.c | |||
@@ -0,0 +1,2451 @@ | |||
1 | /* | ||
2 | * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the Free | ||
6 | * Software Foundation; either version 2 of the License, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called LICENSE. | ||
20 | * | ||
21 | * | ||
22 | * Changes: | ||
23 | * | ||
24 | * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
25 | * Amir Noam <amir.noam at intel dot com> | ||
26 | * - Added support for lacp_rate module param. | ||
27 | * | ||
28 | * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
29 | * - Based on discussion on mailing list, changed locking scheme | ||
30 | * to use lock/unlock or lock_bh/unlock_bh appropriately instead | ||
31 | * of lock_irqsave/unlock_irqrestore. The new scheme helps exposing | ||
32 | * hidden bugs and solves system hangs that occurred due to the fact | ||
33 | * that holding lock_irqsave doesn't prevent softirqs from running. | ||
34 | * This also increases total throughput since interrupts are not | ||
35 | * blocked on each transmitted packets or monitor timeout. | ||
36 | * | ||
37 | * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
38 | * - Renamed bond_3ad_link_status_changed() to | ||
39 | * bond_3ad_handle_link_change() for compatibility with TLB. | ||
40 | * | ||
41 | * 2003/05/20 - Amir Noam <amir.noam at intel dot com> | ||
42 | * - Fix long fail over time when releasing last slave of an active | ||
43 | * aggregator - send LACPDU on unbind of slave to tell partner this | ||
44 | * port is no longer aggregatable. | ||
45 | * | ||
46 | * 2003/06/25 - Tsippy Mendelson <tsippy.mendelson at intel dot com> | ||
47 | * - Send LACPDU as highest priority packet to further fix the above | ||
48 | * problem on very high Tx traffic load where packets may get dropped | ||
49 | * by the slave. | ||
50 | * | ||
51 | * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
52 | * - Code cleanup and style changes | ||
53 | */ | ||
54 | |||
55 | //#define BONDING_DEBUG 1 | ||
56 | |||
57 | #include <linux/skbuff.h> | ||
58 | #include <linux/if_ether.h> | ||
59 | #include <linux/netdevice.h> | ||
60 | #include <linux/spinlock.h> | ||
61 | #include <linux/ethtool.h> | ||
62 | #include <linux/if_bonding.h> | ||
63 | #include <linux/pkt_sched.h> | ||
64 | #include "bonding.h" | ||
65 | #include "bond_3ad.h" | ||
66 | |||
67 | // General definitions | ||
68 | #define AD_SHORT_TIMEOUT 1 | ||
69 | #define AD_LONG_TIMEOUT 0 | ||
70 | #define AD_STANDBY 0x2 | ||
71 | #define AD_MAX_TX_IN_SECOND 3 | ||
72 | #define AD_COLLECTOR_MAX_DELAY 0 | ||
73 | |||
74 | // Timer definitions(43.4.4 in the 802.3ad standard) | ||
75 | #define AD_FAST_PERIODIC_TIME 1 | ||
76 | #define AD_SLOW_PERIODIC_TIME 30 | ||
77 | #define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME) | ||
78 | #define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME) | ||
79 | #define AD_CHURN_DETECTION_TIME 60 | ||
80 | #define AD_AGGREGATE_WAIT_TIME 2 | ||
81 | |||
82 | // Port state definitions(43.4.2.2 in the 802.3ad standard) | ||
83 | #define AD_STATE_LACP_ACTIVITY 0x1 | ||
84 | #define AD_STATE_LACP_TIMEOUT 0x2 | ||
85 | #define AD_STATE_AGGREGATION 0x4 | ||
86 | #define AD_STATE_SYNCHRONIZATION 0x8 | ||
87 | #define AD_STATE_COLLECTING 0x10 | ||
88 | #define AD_STATE_DISTRIBUTING 0x20 | ||
89 | #define AD_STATE_DEFAULTED 0x40 | ||
90 | #define AD_STATE_EXPIRED 0x80 | ||
91 | |||
92 | // Port Variables definitions used by the State Machines(43.4.7 in the 802.3ad standard) | ||
93 | #define AD_PORT_BEGIN 0x1 | ||
94 | #define AD_PORT_LACP_ENABLED 0x2 | ||
95 | #define AD_PORT_ACTOR_CHURN 0x4 | ||
96 | #define AD_PORT_PARTNER_CHURN 0x8 | ||
97 | #define AD_PORT_READY 0x10 | ||
98 | #define AD_PORT_READY_N 0x20 | ||
99 | #define AD_PORT_MATCHED 0x40 | ||
100 | #define AD_PORT_STANDBY 0x80 | ||
101 | #define AD_PORT_SELECTED 0x100 | ||
102 | #define AD_PORT_MOVED 0x200 | ||
103 | |||
104 | // Port Key definitions | ||
105 | // key is determined according to the link speed, duplex and | ||
106 | // user key(which is yet not supported) | ||
107 | // ------------------------------------------------------------ | ||
108 | // Port key : | User key | Speed |Duplex| | ||
109 | // ------------------------------------------------------------ | ||
110 | // 16 6 1 0 | ||
111 | #define AD_DUPLEX_KEY_BITS 0x1 | ||
112 | #define AD_SPEED_KEY_BITS 0x3E | ||
113 | #define AD_USER_KEY_BITS 0xFFC0 | ||
114 | |||
115 | //dalloun | ||
116 | #define AD_LINK_SPEED_BITMASK_1MBPS 0x1 | ||
117 | #define AD_LINK_SPEED_BITMASK_10MBPS 0x2 | ||
118 | #define AD_LINK_SPEED_BITMASK_100MBPS 0x4 | ||
119 | #define AD_LINK_SPEED_BITMASK_1000MBPS 0x8 | ||
120 | //endalloun | ||
121 | |||
122 | // compare MAC addresses | ||
123 | #define MAC_ADDRESS_COMPARE(A, B) memcmp(A, B, ETH_ALEN) | ||
124 | |||
125 | static struct mac_addr null_mac_addr = {{0, 0, 0, 0, 0, 0}}; | ||
126 | static u16 ad_ticks_per_sec; | ||
127 | static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; | ||
128 | |||
129 | // ================= 3AD api to bonding and kernel code ================== | ||
130 | static u16 __get_link_speed(struct port *port); | ||
131 | static u8 __get_duplex(struct port *port); | ||
132 | static inline void __initialize_port_locks(struct port *port); | ||
133 | //conversions | ||
134 | static void __ntohs_lacpdu(struct lacpdu *lacpdu); | ||
135 | static u16 __ad_timer_to_ticks(u16 timer_type, u16 Par); | ||
136 | |||
137 | |||
138 | // ================= ad code helper functions ================== | ||
139 | //needed by ad_rx_machine(...) | ||
140 | static void __record_pdu(struct lacpdu *lacpdu, struct port *port); | ||
141 | static void __record_default(struct port *port); | ||
142 | static void __update_selected(struct lacpdu *lacpdu, struct port *port); | ||
143 | static void __update_default_selected(struct port *port); | ||
144 | static void __choose_matched(struct lacpdu *lacpdu, struct port *port); | ||
145 | static void __update_ntt(struct lacpdu *lacpdu, struct port *port); | ||
146 | |||
147 | //needed for ad_mux_machine(..) | ||
148 | static void __attach_bond_to_agg(struct port *port); | ||
149 | static void __detach_bond_from_agg(struct port *port); | ||
150 | static int __agg_ports_are_ready(struct aggregator *aggregator); | ||
151 | static void __set_agg_ports_ready(struct aggregator *aggregator, int val); | ||
152 | |||
153 | //needed for ad_agg_selection_logic(...) | ||
154 | static u32 __get_agg_bandwidth(struct aggregator *aggregator); | ||
155 | static struct aggregator *__get_active_agg(struct aggregator *aggregator); | ||
156 | |||
157 | |||
158 | // ================= main 802.3ad protocol functions ================== | ||
159 | static int ad_lacpdu_send(struct port *port); | ||
160 | static int ad_marker_send(struct port *port, struct marker *marker); | ||
161 | static void ad_mux_machine(struct port *port); | ||
162 | static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); | ||
163 | static void ad_tx_machine(struct port *port); | ||
164 | static void ad_periodic_machine(struct port *port); | ||
165 | static void ad_port_selection_logic(struct port *port); | ||
166 | static void ad_agg_selection_logic(struct aggregator *aggregator); | ||
167 | static void ad_clear_agg(struct aggregator *aggregator); | ||
168 | static void ad_initialize_agg(struct aggregator *aggregator); | ||
169 | static void ad_initialize_port(struct port *port, int lacp_fast); | ||
170 | static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); | ||
171 | static void ad_enable_collecting_distributing(struct port *port); | ||
172 | static void ad_disable_collecting_distributing(struct port *port); | ||
173 | static void ad_marker_info_received(struct marker *marker_info, struct port *port); | ||
174 | static void ad_marker_response_received(struct marker *marker, struct port *port); | ||
175 | |||
176 | |||
177 | ///////////////////////////////////////////////////////////////////////////////// | ||
178 | // ================= api to bonding and kernel code ================== | ||
179 | ///////////////////////////////////////////////////////////////////////////////// | ||
180 | |||
181 | /** | ||
182 | * __get_bond_by_port - get the port's bonding struct | ||
183 | * @port: the port we're looking at | ||
184 | * | ||
185 | * Return @port's bonding struct, or %NULL if it can't be found. | ||
186 | */ | ||
187 | static inline struct bonding *__get_bond_by_port(struct port *port) | ||
188 | { | ||
189 | if (port->slave == NULL) { | ||
190 | return NULL; | ||
191 | } | ||
192 | |||
193 | return bond_get_bond_by_slave(port->slave); | ||
194 | } | ||
195 | |||
196 | /** | ||
197 | * __get_first_port - get the first port in the bond | ||
198 | * @bond: the bond we're looking at | ||
199 | * | ||
200 | * Return the port of the first slave in @bond, or %NULL if it can't be found. | ||
201 | */ | ||
202 | static inline struct port *__get_first_port(struct bonding *bond) | ||
203 | { | ||
204 | if (bond->slave_cnt == 0) { | ||
205 | return NULL; | ||
206 | } | ||
207 | |||
208 | return &(SLAVE_AD_INFO(bond->first_slave).port); | ||
209 | } | ||
210 | |||
211 | /** | ||
212 | * __get_next_port - get the next port in the bond | ||
213 | * @port: the port we're looking at | ||
214 | * | ||
215 | * Return the port of the slave that is next in line of @port's slave in the | ||
216 | * bond, or %NULL if it can't be found. | ||
217 | */ | ||
218 | static inline struct port *__get_next_port(struct port *port) | ||
219 | { | ||
220 | struct bonding *bond = __get_bond_by_port(port); | ||
221 | struct slave *slave = port->slave; | ||
222 | |||
223 | // If there's no bond for this port, or this is the last slave | ||
224 | if ((bond == NULL) || (slave->next == bond->first_slave)) { | ||
225 | return NULL; | ||
226 | } | ||
227 | |||
228 | return &(SLAVE_AD_INFO(slave->next).port); | ||
229 | } | ||
230 | |||
231 | /** | ||
232 | * __get_first_agg - get the first aggregator in the bond | ||
233 | * @bond: the bond we're looking at | ||
234 | * | ||
235 | * Return the aggregator of the first slave in @bond, or %NULL if it can't be | ||
236 | * found. | ||
237 | */ | ||
238 | static inline struct aggregator *__get_first_agg(struct port *port) | ||
239 | { | ||
240 | struct bonding *bond = __get_bond_by_port(port); | ||
241 | |||
242 | // If there's no bond for this port, or bond has no slaves | ||
243 | if ((bond == NULL) || (bond->slave_cnt == 0)) { | ||
244 | return NULL; | ||
245 | } | ||
246 | |||
247 | return &(SLAVE_AD_INFO(bond->first_slave).aggregator); | ||
248 | } | ||
249 | |||
250 | /** | ||
251 | * __get_next_agg - get the next aggregator in the bond | ||
252 | * @aggregator: the aggregator we're looking at | ||
253 | * | ||
254 | * Return the aggregator of the slave that is next in line of @aggregator's | ||
255 | * slave in the bond, or %NULL if it can't be found. | ||
256 | */ | ||
257 | static inline struct aggregator *__get_next_agg(struct aggregator *aggregator) | ||
258 | { | ||
259 | struct slave *slave = aggregator->slave; | ||
260 | struct bonding *bond = bond_get_bond_by_slave(slave); | ||
261 | |||
262 | // If there's no bond for this aggregator, or this is the last slave | ||
263 | if ((bond == NULL) || (slave->next == bond->first_slave)) { | ||
264 | return NULL; | ||
265 | } | ||
266 | |||
267 | return &(SLAVE_AD_INFO(slave->next).aggregator); | ||
268 | } | ||
269 | |||
270 | /** | ||
271 | * __disable_port - disable the port's slave | ||
272 | * @port: the port we're looking at | ||
273 | * | ||
274 | */ | ||
275 | static inline void __disable_port(struct port *port) | ||
276 | { | ||
277 | bond_set_slave_inactive_flags(port->slave); | ||
278 | } | ||
279 | |||
280 | /** | ||
281 | * __enable_port - enable the port's slave, if it's up | ||
282 | * @port: the port we're looking at | ||
283 | * | ||
284 | */ | ||
285 | static inline void __enable_port(struct port *port) | ||
286 | { | ||
287 | struct slave *slave = port->slave; | ||
288 | |||
289 | if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev)) { | ||
290 | bond_set_slave_active_flags(slave); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /** | ||
295 | * __port_is_enabled - check if the port's slave is in active state | ||
296 | * @port: the port we're looking at | ||
297 | * | ||
298 | */ | ||
299 | static inline int __port_is_enabled(struct port *port) | ||
300 | { | ||
301 | return(port->slave->state == BOND_STATE_ACTIVE); | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * __get_agg_selection_mode - get the aggregator selection mode | ||
306 | * @port: the port we're looking at | ||
307 | * | ||
308 | * Get the aggregator selection mode. Can be %BANDWIDTH or %COUNT. | ||
309 | */ | ||
310 | static inline u32 __get_agg_selection_mode(struct port *port) | ||
311 | { | ||
312 | struct bonding *bond = __get_bond_by_port(port); | ||
313 | |||
314 | if (bond == NULL) { | ||
315 | return AD_BANDWIDTH; | ||
316 | } | ||
317 | |||
318 | return BOND_AD_INFO(bond).agg_select_mode; | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * __check_agg_selection_timer - check if the selection timer has expired | ||
323 | * @port: the port we're looking at | ||
324 | * | ||
325 | */ | ||
326 | static inline int __check_agg_selection_timer(struct port *port) | ||
327 | { | ||
328 | struct bonding *bond = __get_bond_by_port(port); | ||
329 | |||
330 | if (bond == NULL) { | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0; | ||
335 | } | ||
336 | |||
337 | /** | ||
338 | * __get_rx_machine_lock - lock the port's RX machine | ||
339 | * @port: the port we're looking at | ||
340 | * | ||
341 | */ | ||
342 | static inline void __get_rx_machine_lock(struct port *port) | ||
343 | { | ||
344 | spin_lock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); | ||
345 | } | ||
346 | |||
347 | /** | ||
348 | * __release_rx_machine_lock - unlock the port's RX machine | ||
349 | * @port: the port we're looking at | ||
350 | * | ||
351 | */ | ||
352 | static inline void __release_rx_machine_lock(struct port *port) | ||
353 | { | ||
354 | spin_unlock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); | ||
355 | } | ||
356 | |||
357 | /** | ||
358 | * __get_link_speed - get a port's speed | ||
359 | * @port: the port we're looking at | ||
360 | * | ||
361 | * Return @port's speed in 802.3ad bitmask format. i.e. one of: | ||
362 | * 0, | ||
363 | * %AD_LINK_SPEED_BITMASK_10MBPS, | ||
364 | * %AD_LINK_SPEED_BITMASK_100MBPS, | ||
365 | * %AD_LINK_SPEED_BITMASK_1000MBPS | ||
366 | */ | ||
367 | static u16 __get_link_speed(struct port *port) | ||
368 | { | ||
369 | struct slave *slave = port->slave; | ||
370 | u16 speed; | ||
371 | |||
372 | /* this if covers only a special case: when the configuration starts with | ||
373 | * link down, it sets the speed to 0. | ||
374 | * This is done in spite of the fact that the e100 driver reports 0 to be | ||
375 | * compatible with MVT in the future.*/ | ||
376 | if (slave->link != BOND_LINK_UP) { | ||
377 | speed=0; | ||
378 | } else { | ||
379 | switch (slave->speed) { | ||
380 | case SPEED_10: | ||
381 | speed = AD_LINK_SPEED_BITMASK_10MBPS; | ||
382 | break; | ||
383 | |||
384 | case SPEED_100: | ||
385 | speed = AD_LINK_SPEED_BITMASK_100MBPS; | ||
386 | break; | ||
387 | |||
388 | case SPEED_1000: | ||
389 | speed = AD_LINK_SPEED_BITMASK_1000MBPS; | ||
390 | break; | ||
391 | |||
392 | default: | ||
393 | speed = 0; // unknown speed value from ethtool. shouldn't happen | ||
394 | break; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | dprintk("Port %d Received link speed %d update from adapter\n", port->actor_port_number, speed); | ||
399 | return speed; | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * __get_duplex - get a port's duplex | ||
404 | * @port: the port we're looking at | ||
405 | * | ||
406 | * Return @port's duplex in 802.3ad bitmask format. i.e.: | ||
407 | * 0x01 if in full duplex | ||
408 | * 0x00 otherwise | ||
409 | */ | ||
410 | static u8 __get_duplex(struct port *port) | ||
411 | { | ||
412 | struct slave *slave = port->slave; | ||
413 | |||
414 | u8 retval; | ||
415 | |||
416 | // handling a special case: when the configuration starts with | ||
417 | // link down, it sets the duplex to 0. | ||
418 | if (slave->link != BOND_LINK_UP) { | ||
419 | retval=0x0; | ||
420 | } else { | ||
421 | switch (slave->duplex) { | ||
422 | case DUPLEX_FULL: | ||
423 | retval=0x1; | ||
424 | dprintk("Port %d Received status full duplex update from adapter\n", port->actor_port_number); | ||
425 | break; | ||
426 | case DUPLEX_HALF: | ||
427 | default: | ||
428 | retval=0x0; | ||
429 | dprintk("Port %d Received status NOT full duplex update from adapter\n", port->actor_port_number); | ||
430 | break; | ||
431 | } | ||
432 | } | ||
433 | return retval; | ||
434 | } | ||
435 | |||
436 | /** | ||
437 | * __initialize_port_locks - initialize a port's RX machine spinlock | ||
438 | * @port: the port we're looking at | ||
439 | * | ||
440 | */ | ||
441 | static inline void __initialize_port_locks(struct port *port) | ||
442 | { | ||
443 | // make sure it isn't called twice | ||
444 | spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); | ||
445 | } | ||
446 | |||
447 | //conversions | ||
448 | /** | ||
449 | * __ntohs_lacpdu - convert the contents of a LACPDU to host byte order | ||
450 | * @lacpdu: the speicifed lacpdu | ||
451 | * | ||
452 | * For each multi-byte field in the lacpdu, convert its content | ||
453 | */ | ||
454 | static void __ntohs_lacpdu(struct lacpdu *lacpdu) | ||
455 | { | ||
456 | if (lacpdu) { | ||
457 | lacpdu->actor_system_priority = ntohs(lacpdu->actor_system_priority); | ||
458 | lacpdu->actor_key = ntohs(lacpdu->actor_key); | ||
459 | lacpdu->actor_port_priority = ntohs(lacpdu->actor_port_priority); | ||
460 | lacpdu->actor_port = ntohs(lacpdu->actor_port); | ||
461 | lacpdu->partner_system_priority = ntohs(lacpdu->partner_system_priority); | ||
462 | lacpdu->partner_key = ntohs(lacpdu->partner_key); | ||
463 | lacpdu->partner_port_priority = ntohs(lacpdu->partner_port_priority); | ||
464 | lacpdu->partner_port = ntohs(lacpdu->partner_port); | ||
465 | lacpdu->collector_max_delay = ntohs(lacpdu->collector_max_delay); | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /** | ||
470 | * __ad_timer_to_ticks - convert a given timer type to AD module ticks | ||
471 | * @timer_type: which timer to operate | ||
472 | * @par: timer parameter. see below | ||
473 | * | ||
474 | * If @timer_type is %current_while_timer, @par indicates long/short timer. | ||
475 | * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, | ||
476 | * %SLOW_PERIODIC_TIME. | ||
477 | */ | ||
478 | static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) | ||
479 | { | ||
480 | u16 retval=0; //to silence the compiler | ||
481 | |||
482 | switch (timer_type) { | ||
483 | case AD_CURRENT_WHILE_TIMER: // for rx machine usage | ||
484 | if (par) { // for short or long timeout | ||
485 | retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); // short timeout | ||
486 | } else { | ||
487 | retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); // long timeout | ||
488 | } | ||
489 | break; | ||
490 | case AD_ACTOR_CHURN_TIMER: // for local churn machine | ||
491 | retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); | ||
492 | break; | ||
493 | case AD_PERIODIC_TIMER: // for periodic machine | ||
494 | retval = (par*ad_ticks_per_sec); // long timeout | ||
495 | break; | ||
496 | case AD_PARTNER_CHURN_TIMER: // for remote churn machine | ||
497 | retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); | ||
498 | break; | ||
499 | case AD_WAIT_WHILE_TIMER: // for selection machine | ||
500 | retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec); | ||
501 | break; | ||
502 | } | ||
503 | return retval; | ||
504 | } | ||
505 | |||
506 | |||
507 | ///////////////////////////////////////////////////////////////////////////////// | ||
508 | // ================= ad_rx_machine helper functions ================== | ||
509 | ///////////////////////////////////////////////////////////////////////////////// | ||
510 | |||
511 | /** | ||
512 | * __record_pdu - record parameters from a received lacpdu | ||
513 | * @lacpdu: the lacpdu we've received | ||
514 | * @port: the port we're looking at | ||
515 | * | ||
516 | * Record the parameter values for the Actor carried in a received lacpdu as | ||
517 | * the current partner operational parameter values and sets | ||
518 | * actor_oper_port_state.defaulted to FALSE. | ||
519 | */ | ||
520 | static void __record_pdu(struct lacpdu *lacpdu, struct port *port) | ||
521 | { | ||
522 | // validate lacpdu and port | ||
523 | if (lacpdu && port) { | ||
524 | // record the new parameter values for the partner operational | ||
525 | port->partner_oper_port_number = lacpdu->actor_port; | ||
526 | port->partner_oper_port_priority = lacpdu->actor_port_priority; | ||
527 | port->partner_oper_system = lacpdu->actor_system; | ||
528 | port->partner_oper_system_priority = lacpdu->actor_system_priority; | ||
529 | port->partner_oper_key = lacpdu->actor_key; | ||
530 | // zero partener's lase states | ||
531 | port->partner_oper_port_state = 0; | ||
532 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_ACTIVITY); | ||
533 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_TIMEOUT); | ||
534 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_AGGREGATION); | ||
535 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION); | ||
536 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_COLLECTING); | ||
537 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DISTRIBUTING); | ||
538 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DEFAULTED); | ||
539 | port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_EXPIRED); | ||
540 | |||
541 | // set actor_oper_port_state.defaulted to FALSE | ||
542 | port->actor_oper_port_state &= ~AD_STATE_DEFAULTED; | ||
543 | |||
544 | // set the partner sync. to on if the partner is sync. and the port is matched | ||
545 | if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) { | ||
546 | port->partner_oper_port_state |= AD_STATE_SYNCHRONIZATION; | ||
547 | } else { | ||
548 | port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; | ||
549 | } | ||
550 | } | ||
551 | } | ||
552 | |||
553 | /** | ||
554 | * __record_default - record default parameters | ||
555 | * @port: the port we're looking at | ||
556 | * | ||
557 | * This function records the default parameter values for the partner carried | ||
558 | * in the Partner Admin parameters as the current partner operational parameter | ||
559 | * values and sets actor_oper_port_state.defaulted to TRUE. | ||
560 | */ | ||
561 | static void __record_default(struct port *port) | ||
562 | { | ||
563 | // validate the port | ||
564 | if (port) { | ||
565 | // record the partner admin parameters | ||
566 | port->partner_oper_port_number = port->partner_admin_port_number; | ||
567 | port->partner_oper_port_priority = port->partner_admin_port_priority; | ||
568 | port->partner_oper_system = port->partner_admin_system; | ||
569 | port->partner_oper_system_priority = port->partner_admin_system_priority; | ||
570 | port->partner_oper_key = port->partner_admin_key; | ||
571 | port->partner_oper_port_state = port->partner_admin_port_state; | ||
572 | |||
573 | // set actor_oper_port_state.defaulted to true | ||
574 | port->actor_oper_port_state |= AD_STATE_DEFAULTED; | ||
575 | } | ||
576 | } | ||
577 | |||
578 | /** | ||
579 | * __update_selected - update a port's Selected variable from a received lacpdu | ||
580 | * @lacpdu: the lacpdu we've received | ||
581 | * @port: the port we're looking at | ||
582 | * | ||
583 | * Update the value of the selected variable, using parameter values from a | ||
584 | * newly received lacpdu. The parameter values for the Actor carried in the | ||
585 | * received PDU are compared with the corresponding operational parameter | ||
586 | * values for the ports partner. If one or more of the comparisons shows that | ||
587 | * the value(s) received in the PDU differ from the current operational values, | ||
588 | * then selected is set to FALSE and actor_oper_port_state.synchronization is | ||
589 | * set to out_of_sync. Otherwise, selected remains unchanged. | ||
590 | */ | ||
591 | static void __update_selected(struct lacpdu *lacpdu, struct port *port) | ||
592 | { | ||
593 | // validate lacpdu and port | ||
594 | if (lacpdu && port) { | ||
595 | // check if any parameter is different | ||
596 | if ((lacpdu->actor_port != port->partner_oper_port_number) || | ||
597 | (lacpdu->actor_port_priority != port->partner_oper_port_priority) || | ||
598 | MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->partner_oper_system)) || | ||
599 | (lacpdu->actor_system_priority != port->partner_oper_system_priority) || | ||
600 | (lacpdu->actor_key != port->partner_oper_key) || | ||
601 | ((lacpdu->actor_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION)) | ||
602 | ) { | ||
603 | // update the state machine Selected variable | ||
604 | port->sm_vars &= ~AD_PORT_SELECTED; | ||
605 | } | ||
606 | } | ||
607 | } | ||
608 | |||
609 | /** | ||
610 | * __update_default_selected - update a port's Selected variable from Partner | ||
611 | * @port: the port we're looking at | ||
612 | * | ||
613 | * This function updates the value of the selected variable, using the partner | ||
614 | * administrative parameter values. The administrative values are compared with | ||
615 | * the corresponding operational parameter values for the partner. If one or | ||
616 | * more of the comparisons shows that the administrative value(s) differ from | ||
617 | * the current operational values, then Selected is set to FALSE and | ||
618 | * actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise, | ||
619 | * Selected remains unchanged. | ||
620 | */ | ||
621 | static void __update_default_selected(struct port *port) | ||
622 | { | ||
623 | // validate the port | ||
624 | if (port) { | ||
625 | // check if any parameter is different | ||
626 | if ((port->partner_admin_port_number != port->partner_oper_port_number) || | ||
627 | (port->partner_admin_port_priority != port->partner_oper_port_priority) || | ||
628 | MAC_ADDRESS_COMPARE(&(port->partner_admin_system), &(port->partner_oper_system)) || | ||
629 | (port->partner_admin_system_priority != port->partner_oper_system_priority) || | ||
630 | (port->partner_admin_key != port->partner_oper_key) || | ||
631 | ((port->partner_admin_port_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION)) | ||
632 | ) { | ||
633 | // update the state machine Selected variable | ||
634 | port->sm_vars &= ~AD_PORT_SELECTED; | ||
635 | } | ||
636 | } | ||
637 | } | ||
638 | |||
639 | /** | ||
640 | * __choose_matched - update a port's matched variable from a received lacpdu | ||
641 | * @lacpdu: the lacpdu we've received | ||
642 | * @port: the port we're looking at | ||
643 | * | ||
644 | * Update the value of the matched variable, using parameter values from a | ||
645 | * newly received lacpdu. Parameter values for the partner carried in the | ||
646 | * received PDU are compared with the corresponding operational parameter | ||
647 | * values for the actor. Matched is set to TRUE if all of these parameters | ||
648 | * match and the PDU parameter partner_state.aggregation has the same value as | ||
649 | * actor_oper_port_state.aggregation and lacp will actively maintain the link | ||
650 | * in the aggregation. Matched is also set to TRUE if the value of | ||
651 | * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates | ||
652 | * an individual link and lacp will actively maintain the link. Otherwise, | ||
653 | * matched is set to FALSE. LACP is considered to be actively maintaining the | ||
654 | * link if either the PDU's actor_state.lacp_activity variable is TRUE or both | ||
655 | * the actor's actor_oper_port_state.lacp_activity and the PDU's | ||
656 | * partner_state.lacp_activity variables are TRUE. | ||
657 | */ | ||
658 | static void __choose_matched(struct lacpdu *lacpdu, struct port *port) | ||
659 | { | ||
660 | // validate lacpdu and port | ||
661 | if (lacpdu && port) { | ||
662 | // check if all parameters are alike | ||
663 | if (((lacpdu->partner_port == port->actor_port_number) && | ||
664 | (lacpdu->partner_port_priority == port->actor_port_priority) && | ||
665 | !MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) && | ||
666 | (lacpdu->partner_system_priority == port->actor_system_priority) && | ||
667 | (lacpdu->partner_key == port->actor_oper_port_key) && | ||
668 | ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || | ||
669 | // or this is individual link(aggregation == FALSE) | ||
670 | ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0) | ||
671 | ) { | ||
672 | // update the state machine Matched variable | ||
673 | port->sm_vars |= AD_PORT_MATCHED; | ||
674 | } else { | ||
675 | port->sm_vars &= ~AD_PORT_MATCHED; | ||
676 | } | ||
677 | } | ||
678 | } | ||
679 | |||
680 | /** | ||
681 | * __update_ntt - update a port's ntt variable from a received lacpdu | ||
682 | * @lacpdu: the lacpdu we've received | ||
683 | * @port: the port we're looking at | ||
684 | * | ||
685 | * Updates the value of the ntt variable, using parameter values from a newly | ||
686 | * received lacpdu. The parameter values for the partner carried in the | ||
687 | * received PDU are compared with the corresponding operational parameter | ||
688 | * values for the Actor. If one or more of the comparisons shows that the | ||
689 | * value(s) received in the PDU differ from the current operational values, | ||
690 | * then ntt is set to TRUE. Otherwise, ntt remains unchanged. | ||
691 | */ | ||
692 | static void __update_ntt(struct lacpdu *lacpdu, struct port *port) | ||
693 | { | ||
694 | // validate lacpdu and port | ||
695 | if (lacpdu && port) { | ||
696 | // check if any parameter is different | ||
697 | if ((lacpdu->partner_port != port->actor_port_number) || | ||
698 | (lacpdu->partner_port_priority != port->actor_port_priority) || | ||
699 | MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) || | ||
700 | (lacpdu->partner_system_priority != port->actor_system_priority) || | ||
701 | (lacpdu->partner_key != port->actor_oper_port_key) || | ||
702 | ((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) || | ||
703 | ((lacpdu->partner_state & AD_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)) || | ||
704 | ((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) || | ||
705 | ((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION)) | ||
706 | ) { | ||
707 | // set ntt to be TRUE | ||
708 | port->ntt = 1; | ||
709 | } | ||
710 | } | ||
711 | } | ||
712 | |||
713 | /** | ||
714 | * __attach_bond_to_agg | ||
715 | * @port: the port we're looking at | ||
716 | * | ||
717 | * Handle the attaching of the port's control parser/multiplexer and the | ||
718 | * aggregator. This function does nothing since the parser/multiplexer of the | ||
719 | * receive and the parser/multiplexer of the aggregator are already combined. | ||
720 | */ | ||
721 | static void __attach_bond_to_agg(struct port *port) | ||
722 | { | ||
723 | port=NULL; // just to satisfy the compiler | ||
724 | // This function does nothing since the parser/multiplexer of the receive | ||
725 | // and the parser/multiplexer of the aggregator are already combined | ||
726 | } | ||
727 | |||
728 | /** | ||
729 | * __detach_bond_from_agg | ||
730 | * @port: the port we're looking at | ||
731 | * | ||
732 | * Handle the detaching of the port's control parser/multiplexer from the | ||
733 | * aggregator. This function does nothing since the parser/multiplexer of the | ||
734 | * receive and the parser/multiplexer of the aggregator are already combined. | ||
735 | */ | ||
736 | static void __detach_bond_from_agg(struct port *port) | ||
737 | { | ||
738 | port=NULL; // just to satisfy the compiler | ||
739 | // This function does nothing sience the parser/multiplexer of the receive | ||
740 | // and the parser/multiplexer of the aggregator are already combined | ||
741 | } | ||
742 | |||
743 | /** | ||
744 | * __agg_ports_are_ready - check if all ports in an aggregator are ready | ||
745 | * @aggregator: the aggregator we're looking at | ||
746 | * | ||
747 | */ | ||
748 | static int __agg_ports_are_ready(struct aggregator *aggregator) | ||
749 | { | ||
750 | struct port *port; | ||
751 | int retval = 1; | ||
752 | |||
753 | if (aggregator) { | ||
754 | // scan all ports in this aggregator to verfy if they are all ready | ||
755 | for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) { | ||
756 | if (!(port->sm_vars & AD_PORT_READY_N)) { | ||
757 | retval = 0; | ||
758 | break; | ||
759 | } | ||
760 | } | ||
761 | } | ||
762 | |||
763 | return retval; | ||
764 | } | ||
765 | |||
766 | /** | ||
767 | * __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator | ||
768 | * @aggregator: the aggregator we're looking at | ||
769 | * @val: Should the ports' ready bit be set on or off | ||
770 | * | ||
771 | */ | ||
772 | static void __set_agg_ports_ready(struct aggregator *aggregator, int val) | ||
773 | { | ||
774 | struct port *port; | ||
775 | |||
776 | for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) { | ||
777 | if (val) { | ||
778 | port->sm_vars |= AD_PORT_READY; | ||
779 | } else { | ||
780 | port->sm_vars &= ~AD_PORT_READY; | ||
781 | } | ||
782 | } | ||
783 | } | ||
784 | |||
785 | /** | ||
786 | * __get_agg_bandwidth - get the total bandwidth of an aggregator | ||
787 | * @aggregator: the aggregator we're looking at | ||
788 | * | ||
789 | */ | ||
790 | static u32 __get_agg_bandwidth(struct aggregator *aggregator) | ||
791 | { | ||
792 | u32 bandwidth=0; | ||
793 | u32 basic_speed; | ||
794 | |||
795 | if (aggregator->num_of_ports) { | ||
796 | basic_speed = __get_link_speed(aggregator->lag_ports); | ||
797 | switch (basic_speed) { | ||
798 | case AD_LINK_SPEED_BITMASK_1MBPS: | ||
799 | bandwidth = aggregator->num_of_ports; | ||
800 | break; | ||
801 | case AD_LINK_SPEED_BITMASK_10MBPS: | ||
802 | bandwidth = aggregator->num_of_ports * 10; | ||
803 | break; | ||
804 | case AD_LINK_SPEED_BITMASK_100MBPS: | ||
805 | bandwidth = aggregator->num_of_ports * 100; | ||
806 | break; | ||
807 | case AD_LINK_SPEED_BITMASK_1000MBPS: | ||
808 | bandwidth = aggregator->num_of_ports * 1000; | ||
809 | break; | ||
810 | default: | ||
811 | bandwidth=0; // to silent the compilor .... | ||
812 | } | ||
813 | } | ||
814 | return bandwidth; | ||
815 | } | ||
816 | |||
817 | /** | ||
818 | * __get_active_agg - get the current active aggregator | ||
819 | * @aggregator: the aggregator we're looking at | ||
820 | * | ||
821 | */ | ||
822 | static struct aggregator *__get_active_agg(struct aggregator *aggregator) | ||
823 | { | ||
824 | struct aggregator *retval = NULL; | ||
825 | |||
826 | for (; aggregator; aggregator = __get_next_agg(aggregator)) { | ||
827 | if (aggregator->is_active) { | ||
828 | retval = aggregator; | ||
829 | break; | ||
830 | } | ||
831 | } | ||
832 | |||
833 | return retval; | ||
834 | } | ||
835 | |||
836 | /** | ||
837 | * __update_lacpdu_from_port - update a port's lacpdu fields | ||
838 | * @port: the port we're looking at | ||
839 | * | ||
840 | */ | ||
841 | static inline void __update_lacpdu_from_port(struct port *port) | ||
842 | { | ||
843 | struct lacpdu *lacpdu = &port->lacpdu; | ||
844 | |||
845 | /* update current actual Actor parameters */ | ||
846 | /* lacpdu->subtype initialized | ||
847 | * lacpdu->version_number initialized | ||
848 | * lacpdu->tlv_type_actor_info initialized | ||
849 | * lacpdu->actor_information_length initialized | ||
850 | */ | ||
851 | |||
852 | lacpdu->actor_system_priority = port->actor_system_priority; | ||
853 | lacpdu->actor_system = port->actor_system; | ||
854 | lacpdu->actor_key = port->actor_oper_port_key; | ||
855 | lacpdu->actor_port_priority = port->actor_port_priority; | ||
856 | lacpdu->actor_port = port->actor_port_number; | ||
857 | lacpdu->actor_state = port->actor_oper_port_state; | ||
858 | |||
859 | /* lacpdu->reserved_3_1 initialized | ||
860 | * lacpdu->tlv_type_partner_info initialized | ||
861 | * lacpdu->partner_information_length initialized | ||
862 | */ | ||
863 | |||
864 | lacpdu->partner_system_priority = port->partner_oper_system_priority; | ||
865 | lacpdu->partner_system = port->partner_oper_system; | ||
866 | lacpdu->partner_key = port->partner_oper_key; | ||
867 | lacpdu->partner_port_priority = port->partner_oper_port_priority; | ||
868 | lacpdu->partner_port = port->partner_oper_port_number; | ||
869 | lacpdu->partner_state = port->partner_oper_port_state; | ||
870 | |||
871 | /* lacpdu->reserved_3_2 initialized | ||
872 | * lacpdu->tlv_type_collector_info initialized | ||
873 | * lacpdu->collector_information_length initialized | ||
874 | * collector_max_delay initialized | ||
875 | * reserved_12[12] initialized | ||
876 | * tlv_type_terminator initialized | ||
877 | * terminator_length initialized | ||
878 | * reserved_50[50] initialized | ||
879 | */ | ||
880 | |||
881 | /* Convert all non u8 parameters to Big Endian for transmit */ | ||
882 | __ntohs_lacpdu(lacpdu); | ||
883 | } | ||
884 | |||
885 | ////////////////////////////////////////////////////////////////////////////////////// | ||
886 | // ================= main 802.3ad protocol code ====================================== | ||
887 | ////////////////////////////////////////////////////////////////////////////////////// | ||
888 | |||
889 | /** | ||
890 | * ad_lacpdu_send - send out a lacpdu packet on a given port | ||
891 | * @port: the port we're looking at | ||
892 | * | ||
893 | * Returns: 0 on success | ||
894 | * < 0 on error | ||
895 | */ | ||
896 | static int ad_lacpdu_send(struct port *port) | ||
897 | { | ||
898 | struct slave *slave = port->slave; | ||
899 | struct sk_buff *skb; | ||
900 | struct lacpdu_header *lacpdu_header; | ||
901 | int length = sizeof(struct lacpdu_header); | ||
902 | struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; | ||
903 | |||
904 | skb = dev_alloc_skb(length); | ||
905 | if (!skb) { | ||
906 | return -ENOMEM; | ||
907 | } | ||
908 | |||
909 | skb->dev = slave->dev; | ||
910 | skb->mac.raw = skb->data; | ||
911 | skb->nh.raw = skb->data + ETH_HLEN; | ||
912 | skb->protocol = PKT_TYPE_LACPDU; | ||
913 | skb->priority = TC_PRIO_CONTROL; | ||
914 | |||
915 | lacpdu_header = (struct lacpdu_header *)skb_put(skb, length); | ||
916 | |||
917 | lacpdu_header->ad_header.destination_address = lacpdu_multicast_address; | ||
918 | /* Note: source addres is set to be the member's PERMANENT address, because we use it | ||
919 | to identify loopback lacpdus in receive. */ | ||
920 | lacpdu_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr)); | ||
921 | lacpdu_header->ad_header.length_type = PKT_TYPE_LACPDU; | ||
922 | |||
923 | lacpdu_header->lacpdu = port->lacpdu; // struct copy | ||
924 | |||
925 | dev_queue_xmit(skb); | ||
926 | |||
927 | return 0; | ||
928 | } | ||
929 | |||
930 | /** | ||
931 | * ad_marker_send - send marker information/response on a given port | ||
932 | * @port: the port we're looking at | ||
933 | * @marker: marker data to send | ||
934 | * | ||
935 | * Returns: 0 on success | ||
936 | * < 0 on error | ||
937 | */ | ||
938 | static int ad_marker_send(struct port *port, struct marker *marker) | ||
939 | { | ||
940 | struct slave *slave = port->slave; | ||
941 | struct sk_buff *skb; | ||
942 | struct marker_header *marker_header; | ||
943 | int length = sizeof(struct marker_header); | ||
944 | struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; | ||
945 | |||
946 | skb = dev_alloc_skb(length + 16); | ||
947 | if (!skb) { | ||
948 | return -ENOMEM; | ||
949 | } | ||
950 | |||
951 | skb_reserve(skb, 16); | ||
952 | |||
953 | skb->dev = slave->dev; | ||
954 | skb->mac.raw = skb->data; | ||
955 | skb->nh.raw = skb->data + ETH_HLEN; | ||
956 | skb->protocol = PKT_TYPE_LACPDU; | ||
957 | |||
958 | marker_header = (struct marker_header *)skb_put(skb, length); | ||
959 | |||
960 | marker_header->ad_header.destination_address = lacpdu_multicast_address; | ||
961 | /* Note: source addres is set to be the member's PERMANENT address, because we use it | ||
962 | to identify loopback MARKERs in receive. */ | ||
963 | marker_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr)); | ||
964 | marker_header->ad_header.length_type = PKT_TYPE_LACPDU; | ||
965 | |||
966 | marker_header->marker = *marker; // struct copy | ||
967 | |||
968 | dev_queue_xmit(skb); | ||
969 | |||
970 | return 0; | ||
971 | } | ||
972 | |||
973 | /** | ||
974 | * ad_mux_machine - handle a port's mux state machine | ||
975 | * @port: the port we're looking at | ||
976 | * | ||
977 | */ | ||
978 | static void ad_mux_machine(struct port *port) | ||
979 | { | ||
980 | mux_states_t last_state; | ||
981 | |||
982 | // keep current State Machine state to compare later if it was changed | ||
983 | last_state = port->sm_mux_state; | ||
984 | |||
985 | if (port->sm_vars & AD_PORT_BEGIN) { | ||
986 | port->sm_mux_state = AD_MUX_DETACHED; // next state | ||
987 | } else { | ||
988 | switch (port->sm_mux_state) { | ||
989 | case AD_MUX_DETACHED: | ||
990 | if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if SELECTED or STANDBY | ||
991 | port->sm_mux_state = AD_MUX_WAITING; // next state | ||
992 | } | ||
993 | break; | ||
994 | case AD_MUX_WAITING: | ||
995 | // if SELECTED == FALSE return to DETACH state | ||
996 | if (!(port->sm_vars & AD_PORT_SELECTED)) { // if UNSELECTED | ||
997 | port->sm_vars &= ~AD_PORT_READY_N; | ||
998 | // in order to withhold the Selection Logic to check all ports READY_N value | ||
999 | // every callback cycle to update ready variable, we check READY_N and update READY here | ||
1000 | __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); | ||
1001 | port->sm_mux_state = AD_MUX_DETACHED; // next state | ||
1002 | break; | ||
1003 | } | ||
1004 | |||
1005 | // check if the wait_while_timer expired | ||
1006 | if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) { | ||
1007 | port->sm_vars |= AD_PORT_READY_N; | ||
1008 | } | ||
1009 | |||
1010 | // in order to withhold the selection logic to check all ports READY_N value | ||
1011 | // every callback cycle to update ready variable, we check READY_N and update READY here | ||
1012 | __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); | ||
1013 | |||
1014 | // if the wait_while_timer expired, and the port is in READY state, move to ATTACHED state | ||
1015 | if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) { | ||
1016 | port->sm_mux_state = AD_MUX_ATTACHED; // next state | ||
1017 | } | ||
1018 | break; | ||
1019 | case AD_MUX_ATTACHED: | ||
1020 | // check also if agg_select_timer expired(so the edable port will take place only after this timer) | ||
1021 | if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { | ||
1022 | port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;// next state | ||
1023 | } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if UNSELECTED or STANDBY | ||
1024 | port->sm_vars &= ~AD_PORT_READY_N; | ||
1025 | // in order to withhold the selection logic to check all ports READY_N value | ||
1026 | // every callback cycle to update ready variable, we check READY_N and update READY here | ||
1027 | __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); | ||
1028 | port->sm_mux_state = AD_MUX_DETACHED;// next state | ||
1029 | } | ||
1030 | break; | ||
1031 | case AD_MUX_COLLECTING_DISTRIBUTING: | ||
1032 | if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || | ||
1033 | !(port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION) | ||
1034 | ) { | ||
1035 | port->sm_mux_state = AD_MUX_ATTACHED;// next state | ||
1036 | |||
1037 | } else { | ||
1038 | // if port state hasn't changed make | ||
1039 | // sure that a collecting distributing | ||
1040 | // port in an active aggregator is enabled | ||
1041 | if (port->aggregator && | ||
1042 | port->aggregator->is_active && | ||
1043 | !__port_is_enabled(port)) { | ||
1044 | |||
1045 | __enable_port(port); | ||
1046 | } | ||
1047 | } | ||
1048 | break; | ||
1049 | default: //to silence the compiler | ||
1050 | break; | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | // check if the state machine was changed | ||
1055 | if (port->sm_mux_state != last_state) { | ||
1056 | dprintk("Mux Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_mux_state); | ||
1057 | switch (port->sm_mux_state) { | ||
1058 | case AD_MUX_DETACHED: | ||
1059 | __detach_bond_from_agg(port); | ||
1060 | port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; | ||
1061 | ad_disable_collecting_distributing(port); | ||
1062 | port->actor_oper_port_state &= ~AD_STATE_COLLECTING; | ||
1063 | port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; | ||
1064 | port->ntt = 1; | ||
1065 | break; | ||
1066 | case AD_MUX_WAITING: | ||
1067 | port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); | ||
1068 | break; | ||
1069 | case AD_MUX_ATTACHED: | ||
1070 | __attach_bond_to_agg(port); | ||
1071 | port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; | ||
1072 | port->actor_oper_port_state &= ~AD_STATE_COLLECTING; | ||
1073 | port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; | ||
1074 | ad_disable_collecting_distributing(port); | ||
1075 | port->ntt = 1; | ||
1076 | break; | ||
1077 | case AD_MUX_COLLECTING_DISTRIBUTING: | ||
1078 | port->actor_oper_port_state |= AD_STATE_COLLECTING; | ||
1079 | port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; | ||
1080 | ad_enable_collecting_distributing(port); | ||
1081 | port->ntt = 1; | ||
1082 | break; | ||
1083 | default: //to silence the compiler | ||
1084 | break; | ||
1085 | } | ||
1086 | } | ||
1087 | } | ||
1088 | |||
1089 | /** | ||
1090 | * ad_rx_machine - handle a port's rx State Machine | ||
1091 | * @lacpdu: the lacpdu we've received | ||
1092 | * @port: the port we're looking at | ||
1093 | * | ||
1094 | * If lacpdu arrived, stop previous timer (if exists) and set the next state as | ||
1095 | * CURRENT. If timer expired set the state machine in the proper state. | ||
1096 | * In other cases, this function checks if we need to switch to other state. | ||
1097 | */ | ||
1098 | static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) | ||
1099 | { | ||
1100 | rx_states_t last_state; | ||
1101 | |||
1102 | // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback) | ||
1103 | __get_rx_machine_lock(port); | ||
1104 | |||
1105 | // keep current State Machine state to compare later if it was changed | ||
1106 | last_state = port->sm_rx_state; | ||
1107 | |||
1108 | // check if state machine should change state | ||
1109 | // first, check if port was reinitialized | ||
1110 | if (port->sm_vars & AD_PORT_BEGIN) { | ||
1111 | port->sm_rx_state = AD_RX_INITIALIZE; // next state | ||
1112 | } | ||
1113 | // check if port is not enabled | ||
1114 | else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED)) { | ||
1115 | port->sm_rx_state = AD_RX_PORT_DISABLED; // next state | ||
1116 | } | ||
1117 | // check if new lacpdu arrived | ||
1118 | else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { | ||
1119 | port->sm_rx_timer_counter = 0; // zero timer | ||
1120 | port->sm_rx_state = AD_RX_CURRENT; | ||
1121 | } else { | ||
1122 | // if timer is on, and if it is expired | ||
1123 | if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { | ||
1124 | switch (port->sm_rx_state) { | ||
1125 | case AD_RX_EXPIRED: | ||
1126 | port->sm_rx_state = AD_RX_DEFAULTED; // next state | ||
1127 | break; | ||
1128 | case AD_RX_CURRENT: | ||
1129 | port->sm_rx_state = AD_RX_EXPIRED; // next state | ||
1130 | break; | ||
1131 | default: //to silence the compiler | ||
1132 | break; | ||
1133 | } | ||
1134 | } else { | ||
1135 | // if no lacpdu arrived and no timer is on | ||
1136 | switch (port->sm_rx_state) { | ||
1137 | case AD_RX_PORT_DISABLED: | ||
1138 | if (port->sm_vars & AD_PORT_MOVED) { | ||
1139 | port->sm_rx_state = AD_RX_INITIALIZE; // next state | ||
1140 | } else if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) { | ||
1141 | port->sm_rx_state = AD_RX_EXPIRED; // next state | ||
1142 | } else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) { | ||
1143 | port->sm_rx_state = AD_RX_LACP_DISABLED; // next state | ||
1144 | } | ||
1145 | break; | ||
1146 | default: //to silence the compiler | ||
1147 | break; | ||
1148 | |||
1149 | } | ||
1150 | } | ||
1151 | } | ||
1152 | |||
1153 | // check if the State machine was changed or new lacpdu arrived | ||
1154 | if ((port->sm_rx_state != last_state) || (lacpdu)) { | ||
1155 | dprintk("Rx Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_rx_state); | ||
1156 | switch (port->sm_rx_state) { | ||
1157 | case AD_RX_INITIALIZE: | ||
1158 | if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) { | ||
1159 | port->sm_vars &= ~AD_PORT_LACP_ENABLED; | ||
1160 | } else { | ||
1161 | port->sm_vars |= AD_PORT_LACP_ENABLED; | ||
1162 | } | ||
1163 | port->sm_vars &= ~AD_PORT_SELECTED; | ||
1164 | __record_default(port); | ||
1165 | port->actor_oper_port_state &= ~AD_STATE_EXPIRED; | ||
1166 | port->sm_vars &= ~AD_PORT_MOVED; | ||
1167 | port->sm_rx_state = AD_RX_PORT_DISABLED; // next state | ||
1168 | |||
1169 | /*- Fall Through -*/ | ||
1170 | |||
1171 | case AD_RX_PORT_DISABLED: | ||
1172 | port->sm_vars &= ~AD_PORT_MATCHED; | ||
1173 | break; | ||
1174 | case AD_RX_LACP_DISABLED: | ||
1175 | port->sm_vars &= ~AD_PORT_SELECTED; | ||
1176 | __record_default(port); | ||
1177 | port->partner_oper_port_state &= ~AD_STATE_AGGREGATION; | ||
1178 | port->sm_vars |= AD_PORT_MATCHED; | ||
1179 | port->actor_oper_port_state &= ~AD_STATE_EXPIRED; | ||
1180 | break; | ||
1181 | case AD_RX_EXPIRED: | ||
1182 | //Reset of the Synchronization flag. (Standard 43.4.12) | ||
1183 | //This reset cause to disable this port in the COLLECTING_DISTRIBUTING state of the | ||
1184 | //mux machine in case of EXPIRED even if LINK_DOWN didn't arrive for the port. | ||
1185 | port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; | ||
1186 | port->sm_vars &= ~AD_PORT_MATCHED; | ||
1187 | port->partner_oper_port_state |= AD_SHORT_TIMEOUT; | ||
1188 | port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); | ||
1189 | port->actor_oper_port_state |= AD_STATE_EXPIRED; | ||
1190 | break; | ||
1191 | case AD_RX_DEFAULTED: | ||
1192 | __update_default_selected(port); | ||
1193 | __record_default(port); | ||
1194 | port->sm_vars |= AD_PORT_MATCHED; | ||
1195 | port->actor_oper_port_state &= ~AD_STATE_EXPIRED; | ||
1196 | break; | ||
1197 | case AD_RX_CURRENT: | ||
1198 | // detect loopback situation | ||
1199 | if (!MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->actor_system))) { | ||
1200 | // INFO_RECEIVED_LOOPBACK_FRAMES | ||
1201 | printk(KERN_ERR DRV_NAME ": An illegal loopback occurred on adapter (%s)\n", | ||
1202 | port->slave->dev->name); | ||
1203 | printk(KERN_ERR "Check the configuration to verify that all Adapters " | ||
1204 | "are connected to 802.3ad compliant switch ports\n"); | ||
1205 | __release_rx_machine_lock(port); | ||
1206 | return; | ||
1207 | } | ||
1208 | __update_selected(lacpdu, port); | ||
1209 | __update_ntt(lacpdu, port); | ||
1210 | __record_pdu(lacpdu, port); | ||
1211 | __choose_matched(lacpdu, port); | ||
1212 | port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)); | ||
1213 | port->actor_oper_port_state &= ~AD_STATE_EXPIRED; | ||
1214 | // verify that if the aggregator is enabled, the port is enabled too. | ||
1215 | //(because if the link goes down for a short time, the 802.3ad will not | ||
1216 | // catch it, and the port will continue to be disabled) | ||
1217 | if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) { | ||
1218 | __enable_port(port); | ||
1219 | } | ||
1220 | break; | ||
1221 | default: //to silence the compiler | ||
1222 | break; | ||
1223 | } | ||
1224 | } | ||
1225 | __release_rx_machine_lock(port); | ||
1226 | } | ||
1227 | |||
1228 | /** | ||
1229 | * ad_tx_machine - handle a port's tx state machine | ||
1230 | * @port: the port we're looking at | ||
1231 | * | ||
1232 | */ | ||
1233 | static void ad_tx_machine(struct port *port) | ||
1234 | { | ||
1235 | // check if tx timer expired, to verify that we do not send more than 3 packets per second | ||
1236 | if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { | ||
1237 | // check if there is something to send | ||
1238 | if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { | ||
1239 | __update_lacpdu_from_port(port); | ||
1240 | // send the lacpdu | ||
1241 | if (ad_lacpdu_send(port) >= 0) { | ||
1242 | dprintk("Sent LACPDU on port %d\n", port->actor_port_number); | ||
1243 | // mark ntt as false, so it will not be sent again until demanded | ||
1244 | port->ntt = 0; | ||
1245 | } | ||
1246 | } | ||
1247 | // restart tx timer(to verify that we will not exceed AD_MAX_TX_IN_SECOND | ||
1248 | port->sm_tx_timer_counter=ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; | ||
1249 | } | ||
1250 | } | ||
1251 | |||
1252 | /** | ||
1253 | * ad_periodic_machine - handle a port's periodic state machine | ||
1254 | * @port: the port we're looking at | ||
1255 | * | ||
1256 | * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. | ||
1257 | */ | ||
1258 | static void ad_periodic_machine(struct port *port) | ||
1259 | { | ||
1260 | periodic_states_t last_state; | ||
1261 | |||
1262 | // keep current state machine state to compare later if it was changed | ||
1263 | last_state = port->sm_periodic_state; | ||
1264 | |||
1265 | // check if port was reinitialized | ||
1266 | if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || | ||
1267 | (!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper_port_state & AD_STATE_LACP_ACTIVITY)) | ||
1268 | ) { | ||
1269 | port->sm_periodic_state = AD_NO_PERIODIC; // next state | ||
1270 | } | ||
1271 | // check if state machine should change state | ||
1272 | else if (port->sm_periodic_timer_counter) { | ||
1273 | // check if periodic state machine expired | ||
1274 | if (!(--port->sm_periodic_timer_counter)) { | ||
1275 | // if expired then do tx | ||
1276 | port->sm_periodic_state = AD_PERIODIC_TX; // next state | ||
1277 | } else { | ||
1278 | // If not expired, check if there is some new timeout parameter from the partner state | ||
1279 | switch (port->sm_periodic_state) { | ||
1280 | case AD_FAST_PERIODIC: | ||
1281 | if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { | ||
1282 | port->sm_periodic_state = AD_SLOW_PERIODIC; // next state | ||
1283 | } | ||
1284 | break; | ||
1285 | case AD_SLOW_PERIODIC: | ||
1286 | if ((port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { | ||
1287 | // stop current timer | ||
1288 | port->sm_periodic_timer_counter = 0; | ||
1289 | port->sm_periodic_state = AD_PERIODIC_TX; // next state | ||
1290 | } | ||
1291 | break; | ||
1292 | default: //to silence the compiler | ||
1293 | break; | ||
1294 | } | ||
1295 | } | ||
1296 | } else { | ||
1297 | switch (port->sm_periodic_state) { | ||
1298 | case AD_NO_PERIODIC: | ||
1299 | port->sm_periodic_state = AD_FAST_PERIODIC; // next state | ||
1300 | break; | ||
1301 | case AD_PERIODIC_TX: | ||
1302 | if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { | ||
1303 | port->sm_periodic_state = AD_SLOW_PERIODIC; // next state | ||
1304 | } else { | ||
1305 | port->sm_periodic_state = AD_FAST_PERIODIC; // next state | ||
1306 | } | ||
1307 | break; | ||
1308 | default: //to silence the compiler | ||
1309 | break; | ||
1310 | } | ||
1311 | } | ||
1312 | |||
1313 | // check if the state machine was changed | ||
1314 | if (port->sm_periodic_state != last_state) { | ||
1315 | dprintk("Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_periodic_state); | ||
1316 | switch (port->sm_periodic_state) { | ||
1317 | case AD_NO_PERIODIC: | ||
1318 | port->sm_periodic_timer_counter = 0; // zero timer | ||
1319 | break; | ||
1320 | case AD_FAST_PERIODIC: | ||
1321 | port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle | ||
1322 | break; | ||
1323 | case AD_SLOW_PERIODIC: | ||
1324 | port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle | ||
1325 | break; | ||
1326 | case AD_PERIODIC_TX: | ||
1327 | port->ntt = 1; | ||
1328 | break; | ||
1329 | default: //to silence the compiler | ||
1330 | break; | ||
1331 | } | ||
1332 | } | ||
1333 | } | ||
1334 | |||
1335 | /** | ||
1336 | * ad_port_selection_logic - select aggregation groups | ||
1337 | * @port: the port we're looking at | ||
1338 | * | ||
1339 | * Select aggregation groups, and assign each port for it's aggregetor. The | ||
1340 | * selection logic is called in the inititalization (after all the handshkes), | ||
1341 | * and after every lacpdu receive (if selected is off). | ||
1342 | */ | ||
1343 | static void ad_port_selection_logic(struct port *port) | ||
1344 | { | ||
1345 | struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; | ||
1346 | struct port *last_port = NULL, *curr_port; | ||
1347 | int found = 0; | ||
1348 | |||
1349 | // if the port is already Selected, do nothing | ||
1350 | if (port->sm_vars & AD_PORT_SELECTED) { | ||
1351 | return; | ||
1352 | } | ||
1353 | |||
1354 | // if the port is connected to other aggregator, detach it | ||
1355 | if (port->aggregator) { | ||
1356 | // detach the port from its former aggregator | ||
1357 | temp_aggregator=port->aggregator; | ||
1358 | for (curr_port=temp_aggregator->lag_ports; curr_port; last_port=curr_port, curr_port=curr_port->next_port_in_aggregator) { | ||
1359 | if (curr_port == port) { | ||
1360 | temp_aggregator->num_of_ports--; | ||
1361 | if (!last_port) {// if it is the first port attached to the aggregator | ||
1362 | temp_aggregator->lag_ports=port->next_port_in_aggregator; | ||
1363 | } else {// not the first port attached to the aggregator | ||
1364 | last_port->next_port_in_aggregator=port->next_port_in_aggregator; | ||
1365 | } | ||
1366 | |||
1367 | // clear the port's relations to this aggregator | ||
1368 | port->aggregator = NULL; | ||
1369 | port->next_port_in_aggregator=NULL; | ||
1370 | port->actor_port_aggregator_identifier=0; | ||
1371 | |||
1372 | dprintk("Port %d left LAG %d\n", port->actor_port_number, temp_aggregator->aggregator_identifier); | ||
1373 | // if the aggregator is empty, clear its parameters, and set it ready to be attached | ||
1374 | if (!temp_aggregator->lag_ports) { | ||
1375 | ad_clear_agg(temp_aggregator); | ||
1376 | } | ||
1377 | break; | ||
1378 | } | ||
1379 | } | ||
1380 | if (!curr_port) { // meaning: the port was related to an aggregator but was not on the aggregator port list | ||
1381 | printk(KERN_WARNING DRV_NAME ": Warning: Port %d (on %s) was " | ||
1382 | "related to aggregator %d but was not on its port list\n", | ||
1383 | port->actor_port_number, port->slave->dev->name, | ||
1384 | port->aggregator->aggregator_identifier); | ||
1385 | } | ||
1386 | } | ||
1387 | // search on all aggregators for a suitable aggregator for this port | ||
1388 | for (aggregator = __get_first_agg(port); aggregator; | ||
1389 | aggregator = __get_next_agg(aggregator)) { | ||
1390 | |||
1391 | // keep a free aggregator for later use(if needed) | ||
1392 | if (!aggregator->lag_ports) { | ||
1393 | if (!free_aggregator) { | ||
1394 | free_aggregator=aggregator; | ||
1395 | } | ||
1396 | continue; | ||
1397 | } | ||
1398 | // check if current aggregator suits us | ||
1399 | if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && // if all parameters match AND | ||
1400 | !MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(port->partner_oper_system)) && | ||
1401 | (aggregator->partner_system_priority == port->partner_oper_system_priority) && | ||
1402 | (aggregator->partner_oper_aggregator_key == port->partner_oper_key) | ||
1403 | ) && | ||
1404 | ((MAC_ADDRESS_COMPARE(&(port->partner_oper_system), &(null_mac_addr)) && // partner answers | ||
1405 | !aggregator->is_individual) // but is not individual OR | ||
1406 | ) | ||
1407 | ) { | ||
1408 | // attach to the founded aggregator | ||
1409 | port->aggregator = aggregator; | ||
1410 | port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier; | ||
1411 | port->next_port_in_aggregator=aggregator->lag_ports; | ||
1412 | port->aggregator->num_of_ports++; | ||
1413 | aggregator->lag_ports=port; | ||
1414 | dprintk("Port %d joined LAG %d(existing LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); | ||
1415 | |||
1416 | // mark this port as selected | ||
1417 | port->sm_vars |= AD_PORT_SELECTED; | ||
1418 | found = 1; | ||
1419 | break; | ||
1420 | } | ||
1421 | } | ||
1422 | |||
1423 | // the port couldn't find an aggregator - attach it to a new aggregator | ||
1424 | if (!found) { | ||
1425 | if (free_aggregator) { | ||
1426 | // assign port a new aggregator | ||
1427 | port->aggregator = free_aggregator; | ||
1428 | port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier; | ||
1429 | |||
1430 | // update the new aggregator's parameters | ||
1431 | // if port was responsed from the end-user | ||
1432 | if (port->actor_oper_port_key & AD_DUPLEX_KEY_BITS) {// if port is full duplex | ||
1433 | port->aggregator->is_individual = 0; | ||
1434 | } else { | ||
1435 | port->aggregator->is_individual = 1; | ||
1436 | } | ||
1437 | |||
1438 | port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; | ||
1439 | port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; | ||
1440 | port->aggregator->partner_system=port->partner_oper_system; | ||
1441 | port->aggregator->partner_system_priority = port->partner_oper_system_priority; | ||
1442 | port->aggregator->partner_oper_aggregator_key = port->partner_oper_key; | ||
1443 | port->aggregator->receive_state = 1; | ||
1444 | port->aggregator->transmit_state = 1; | ||
1445 | port->aggregator->lag_ports = port; | ||
1446 | port->aggregator->num_of_ports++; | ||
1447 | |||
1448 | // mark this port as selected | ||
1449 | port->sm_vars |= AD_PORT_SELECTED; | ||
1450 | |||
1451 | dprintk("Port %d joined LAG %d(new LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); | ||
1452 | } else { | ||
1453 | printk(KERN_ERR DRV_NAME ": Port %d (on %s) did not find a suitable aggregator\n", | ||
1454 | port->actor_port_number, port->slave->dev->name); | ||
1455 | } | ||
1456 | } | ||
1457 | // if all aggregator's ports are READY_N == TRUE, set ready=TRUE in all aggregator's ports | ||
1458 | // else set ready=FALSE in all aggregator's ports | ||
1459 | __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); | ||
1460 | |||
1461 | if (!__check_agg_selection_timer(port) && (aggregator = __get_first_agg(port))) { | ||
1462 | ad_agg_selection_logic(aggregator); | ||
1463 | } | ||
1464 | } | ||
1465 | |||
1466 | /** | ||
1467 | * ad_agg_selection_logic - select an aggregation group for a team | ||
1468 | * @aggregator: the aggregator we're looking at | ||
1469 | * | ||
1470 | * It is assumed that only one aggregator may be selected for a team. | ||
1471 | * The logic of this function is to select (at first time) the aggregator with | ||
1472 | * the most ports attached to it, and to reselect the active aggregator only if | ||
1473 | * the previous aggregator has no more ports related to it. | ||
1474 | * | ||
1475 | * FIXME: this function MUST be called with the first agg in the bond, or | ||
1476 | * __get_active_agg() won't work correctly. This function should be better | ||
1477 | * called with the bond itself, and retrieve the first agg from it. | ||
1478 | */ | ||
1479 | static void ad_agg_selection_logic(struct aggregator *aggregator) | ||
1480 | { | ||
1481 | struct aggregator *best_aggregator = NULL, *active_aggregator = NULL; | ||
1482 | struct aggregator *last_active_aggregator = NULL, *origin_aggregator; | ||
1483 | struct port *port; | ||
1484 | u16 num_of_aggs=0; | ||
1485 | |||
1486 | origin_aggregator = aggregator; | ||
1487 | |||
1488 | //get current active aggregator | ||
1489 | last_active_aggregator = __get_active_agg(aggregator); | ||
1490 | |||
1491 | // search for the aggregator with the most ports attached to it. | ||
1492 | do { | ||
1493 | // count how many candidate lag's we have | ||
1494 | if (aggregator->lag_ports) { | ||
1495 | num_of_aggs++; | ||
1496 | } | ||
1497 | if (aggregator->is_active && !aggregator->is_individual && // if current aggregator is the active aggregator | ||
1498 | MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr))) { // and partner answers to 802.3ad PDUs | ||
1499 | if (aggregator->num_of_ports) { // if any ports attached to the current aggregator | ||
1500 | best_aggregator=NULL; // disregard the best aggregator that was chosen by now | ||
1501 | break; // stop the selection of other aggregator if there are any ports attached to this active aggregator | ||
1502 | } else { // no ports attached to this active aggregator | ||
1503 | aggregator->is_active = 0; // mark this aggregator as not active anymore | ||
1504 | } | ||
1505 | } | ||
1506 | if (aggregator->num_of_ports) { // if any ports attached | ||
1507 | if (best_aggregator) { // if there is a candidte aggregator | ||
1508 | //The reasons for choosing new best aggregator: | ||
1509 | // 1. if current agg is NOT individual and the best agg chosen so far is individual OR | ||
1510 | // current and best aggs are both individual or both not individual, AND | ||
1511 | // 2a. current agg partner reply but best agg partner do not reply OR | ||
1512 | // 2b. current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply AND | ||
1513 | // current has more ports/bandwidth, or same amount of ports but current has faster ports, THEN | ||
1514 | // current agg become best agg so far | ||
1515 | |||
1516 | //if current agg is NOT individual and the best agg chosen so far is individual change best_aggregator | ||
1517 | if (!aggregator->is_individual && best_aggregator->is_individual) { | ||
1518 | best_aggregator=aggregator; | ||
1519 | } | ||
1520 | // current and best aggs are both individual or both not individual | ||
1521 | else if ((aggregator->is_individual && best_aggregator->is_individual) || | ||
1522 | (!aggregator->is_individual && !best_aggregator->is_individual)) { | ||
1523 | // current and best aggs are both individual or both not individual AND | ||
1524 | // current agg partner reply but best agg partner do not reply | ||
1525 | if ((MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) && | ||
1526 | !MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) { | ||
1527 | best_aggregator=aggregator; | ||
1528 | } | ||
1529 | // current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply | ||
1530 | else if (! (!MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) && | ||
1531 | MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) { | ||
1532 | if ((__get_agg_selection_mode(aggregator->lag_ports) == AD_BANDWIDTH)&& | ||
1533 | (__get_agg_bandwidth(aggregator) > __get_agg_bandwidth(best_aggregator))) { | ||
1534 | best_aggregator=aggregator; | ||
1535 | } else if (__get_agg_selection_mode(aggregator->lag_ports) == AD_COUNT) { | ||
1536 | if (((aggregator->num_of_ports > best_aggregator->num_of_ports) && | ||
1537 | (aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS))|| | ||
1538 | ((aggregator->num_of_ports == best_aggregator->num_of_ports) && | ||
1539 | ((u16)(aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS) > | ||
1540 | (u16)(best_aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS)))) { | ||
1541 | best_aggregator=aggregator; | ||
1542 | } | ||
1543 | } | ||
1544 | } | ||
1545 | } | ||
1546 | } else { | ||
1547 | best_aggregator=aggregator; | ||
1548 | } | ||
1549 | } | ||
1550 | aggregator->is_active = 0; // mark all aggregators as not active anymore | ||
1551 | } while ((aggregator = __get_next_agg(aggregator))); | ||
1552 | |||
1553 | // if we have new aggregator selected, don't replace the old aggregator if it has an answering partner, | ||
1554 | // or if both old aggregator and new aggregator don't have answering partner | ||
1555 | if (best_aggregator) { | ||
1556 | if (last_active_aggregator && last_active_aggregator->lag_ports && last_active_aggregator->lag_ports->is_enabled && | ||
1557 | (MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) || // partner answers OR | ||
1558 | (!MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) && // both old and new | ||
1559 | !MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) // partner do not answer | ||
1560 | ) { | ||
1561 | // if new aggregator has link, and old aggregator does not, replace old aggregator.(do nothing) | ||
1562 | // -> don't replace otherwise. | ||
1563 | if (!(!last_active_aggregator->actor_oper_aggregator_key && best_aggregator->actor_oper_aggregator_key)) { | ||
1564 | best_aggregator=NULL; | ||
1565 | last_active_aggregator->is_active = 1; // don't replace good old aggregator | ||
1566 | |||
1567 | } | ||
1568 | } | ||
1569 | } | ||
1570 | |||
1571 | // if there is new best aggregator, activate it | ||
1572 | if (best_aggregator) { | ||
1573 | for (aggregator = __get_first_agg(best_aggregator->lag_ports); | ||
1574 | aggregator; | ||
1575 | aggregator = __get_next_agg(aggregator)) { | ||
1576 | |||
1577 | dprintk("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d\n", | ||
1578 | aggregator->aggregator_identifier, aggregator->num_of_ports, | ||
1579 | aggregator->actor_oper_aggregator_key, aggregator->partner_oper_aggregator_key, | ||
1580 | aggregator->is_individual, aggregator->is_active); | ||
1581 | } | ||
1582 | |||
1583 | // check if any partner replys | ||
1584 | if (best_aggregator->is_individual) { | ||
1585 | printk(KERN_WARNING DRV_NAME ": Warning: No 802.3ad response from the link partner " | ||
1586 | "for any adapters in the bond\n"); | ||
1587 | } | ||
1588 | |||
1589 | // check if there are more than one aggregator | ||
1590 | if (num_of_aggs > 1) { | ||
1591 | dprintk("Warning: More than one Link Aggregation Group was " | ||
1592 | "found in the bond. Only one group will function in the bond\n"); | ||
1593 | } | ||
1594 | |||
1595 | best_aggregator->is_active = 1; | ||
1596 | dprintk("LAG %d choosed as the active LAG\n", best_aggregator->aggregator_identifier); | ||
1597 | dprintk("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d\n", | ||
1598 | best_aggregator->aggregator_identifier, best_aggregator->num_of_ports, | ||
1599 | best_aggregator->actor_oper_aggregator_key, best_aggregator->partner_oper_aggregator_key, | ||
1600 | best_aggregator->is_individual, best_aggregator->is_active); | ||
1601 | |||
1602 | // disable the ports that were related to the former active_aggregator | ||
1603 | if (last_active_aggregator) { | ||
1604 | for (port=last_active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) { | ||
1605 | __disable_port(port); | ||
1606 | } | ||
1607 | } | ||
1608 | } | ||
1609 | |||
1610 | // if the selected aggregator is of join individuals(partner_system is NULL), enable their ports | ||
1611 | active_aggregator = __get_active_agg(origin_aggregator); | ||
1612 | |||
1613 | if (active_aggregator) { | ||
1614 | if (!MAC_ADDRESS_COMPARE(&(active_aggregator->partner_system), &(null_mac_addr))) { | ||
1615 | for (port=active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) { | ||
1616 | __enable_port(port); | ||
1617 | } | ||
1618 | } | ||
1619 | } | ||
1620 | } | ||
1621 | |||
1622 | /** | ||
1623 | * ad_clear_agg - clear a given aggregator's parameters | ||
1624 | * @aggregator: the aggregator we're looking at | ||
1625 | * | ||
1626 | */ | ||
1627 | static void ad_clear_agg(struct aggregator *aggregator) | ||
1628 | { | ||
1629 | if (aggregator) { | ||
1630 | aggregator->is_individual = 0; | ||
1631 | aggregator->actor_admin_aggregator_key = 0; | ||
1632 | aggregator->actor_oper_aggregator_key = 0; | ||
1633 | aggregator->partner_system = null_mac_addr; | ||
1634 | aggregator->partner_system_priority = 0; | ||
1635 | aggregator->partner_oper_aggregator_key = 0; | ||
1636 | aggregator->receive_state = 0; | ||
1637 | aggregator->transmit_state = 0; | ||
1638 | aggregator->lag_ports = NULL; | ||
1639 | aggregator->is_active = 0; | ||
1640 | aggregator->num_of_ports = 0; | ||
1641 | dprintk("LAG %d was cleared\n", aggregator->aggregator_identifier); | ||
1642 | } | ||
1643 | } | ||
1644 | |||
1645 | /** | ||
1646 | * ad_initialize_agg - initialize a given aggregator's parameters | ||
1647 | * @aggregator: the aggregator we're looking at | ||
1648 | * | ||
1649 | */ | ||
1650 | static void ad_initialize_agg(struct aggregator *aggregator) | ||
1651 | { | ||
1652 | if (aggregator) { | ||
1653 | ad_clear_agg(aggregator); | ||
1654 | |||
1655 | aggregator->aggregator_mac_address = null_mac_addr; | ||
1656 | aggregator->aggregator_identifier = 0; | ||
1657 | aggregator->slave = NULL; | ||
1658 | } | ||
1659 | } | ||
1660 | |||
1661 | /** | ||
1662 | * ad_initialize_port - initialize a given port's parameters | ||
1663 | * @aggregator: the aggregator we're looking at | ||
1664 | * @lacp_fast: boolean. whether fast periodic should be used | ||
1665 | * | ||
1666 | */ | ||
1667 | static void ad_initialize_port(struct port *port, int lacp_fast) | ||
1668 | { | ||
1669 | if (port) { | ||
1670 | port->actor_port_number = 1; | ||
1671 | port->actor_port_priority = 0xff; | ||
1672 | port->actor_system = null_mac_addr; | ||
1673 | port->actor_system_priority = 0xffff; | ||
1674 | port->actor_port_aggregator_identifier = 0; | ||
1675 | port->ntt = 0; | ||
1676 | port->actor_admin_port_key = 1; | ||
1677 | port->actor_oper_port_key = 1; | ||
1678 | port->actor_admin_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; | ||
1679 | port->actor_oper_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; | ||
1680 | |||
1681 | if (lacp_fast) { | ||
1682 | port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; | ||
1683 | } | ||
1684 | |||
1685 | port->partner_admin_system = null_mac_addr; | ||
1686 | port->partner_oper_system = null_mac_addr; | ||
1687 | port->partner_admin_system_priority = 0xffff; | ||
1688 | port->partner_oper_system_priority = 0xffff; | ||
1689 | port->partner_admin_key = 1; | ||
1690 | port->partner_oper_key = 1; | ||
1691 | port->partner_admin_port_number = 1; | ||
1692 | port->partner_oper_port_number = 1; | ||
1693 | port->partner_admin_port_priority = 0xff; | ||
1694 | port->partner_oper_port_priority = 0xff; | ||
1695 | port->partner_admin_port_state = 1; | ||
1696 | port->partner_oper_port_state = 1; | ||
1697 | port->is_enabled = 1; | ||
1698 | // ****** private parameters ****** | ||
1699 | port->sm_vars = 0x3; | ||
1700 | port->sm_rx_state = 0; | ||
1701 | port->sm_rx_timer_counter = 0; | ||
1702 | port->sm_periodic_state = 0; | ||
1703 | port->sm_periodic_timer_counter = 0; | ||
1704 | port->sm_mux_state = 0; | ||
1705 | port->sm_mux_timer_counter = 0; | ||
1706 | port->sm_tx_state = 0; | ||
1707 | port->sm_tx_timer_counter = 0; | ||
1708 | port->slave = NULL; | ||
1709 | port->aggregator = NULL; | ||
1710 | port->next_port_in_aggregator = NULL; | ||
1711 | port->transaction_id = 0; | ||
1712 | |||
1713 | ad_initialize_lacpdu(&(port->lacpdu)); | ||
1714 | } | ||
1715 | } | ||
1716 | |||
1717 | /** | ||
1718 | * ad_enable_collecting_distributing - enable a port's transmit/receive | ||
1719 | * @port: the port we're looking at | ||
1720 | * | ||
1721 | * Enable @port if it's in an active aggregator | ||
1722 | */ | ||
1723 | static void ad_enable_collecting_distributing(struct port *port) | ||
1724 | { | ||
1725 | if (port->aggregator->is_active) { | ||
1726 | dprintk("Enabling port %d(LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); | ||
1727 | __enable_port(port); | ||
1728 | } | ||
1729 | } | ||
1730 | |||
1731 | /** | ||
1732 | * ad_disable_collecting_distributing - disable a port's transmit/receive | ||
1733 | * @port: the port we're looking at | ||
1734 | * | ||
1735 | */ | ||
1736 | static void ad_disable_collecting_distributing(struct port *port) | ||
1737 | { | ||
1738 | if (port->aggregator && MAC_ADDRESS_COMPARE(&(port->aggregator->partner_system), &(null_mac_addr))) { | ||
1739 | dprintk("Disabling port %d(LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); | ||
1740 | __disable_port(port); | ||
1741 | } | ||
1742 | } | ||
1743 | |||
1744 | #if 0 | ||
1745 | /** | ||
1746 | * ad_marker_info_send - send a marker information frame | ||
1747 | * @port: the port we're looking at | ||
1748 | * | ||
1749 | * This function does nothing since we decided not to implement send and handle | ||
1750 | * response for marker PDU's, in this stage, but only to respond to marker | ||
1751 | * information. | ||
1752 | */ | ||
1753 | static void ad_marker_info_send(struct port *port) | ||
1754 | { | ||
1755 | struct marker marker; | ||
1756 | u16 index; | ||
1757 | |||
1758 | // fill the marker PDU with the appropriate values | ||
1759 | marker.subtype = 0x02; | ||
1760 | marker.version_number = 0x01; | ||
1761 | marker.tlv_type = AD_MARKER_INFORMATION_SUBTYPE; | ||
1762 | marker.marker_length = 0x16; | ||
1763 | // convert requester_port to Big Endian | ||
1764 | marker.requester_port = (((port->actor_port_number & 0xFF) << 8) |((u16)(port->actor_port_number & 0xFF00) >> 8)); | ||
1765 | marker.requester_system = port->actor_system; | ||
1766 | // convert requester_port(u32) to Big Endian | ||
1767 | marker.requester_transaction_id = (((++port->transaction_id & 0xFF) << 24) |((port->transaction_id & 0xFF00) << 8) |((port->transaction_id & 0xFF0000) >> 8) |((port->transaction_id & 0xFF000000) >> 24)); | ||
1768 | marker.pad = 0; | ||
1769 | marker.tlv_type_terminator = 0x00; | ||
1770 | marker.terminator_length = 0x00; | ||
1771 | for (index=0; index<90; index++) { | ||
1772 | marker.reserved_90[index]=0; | ||
1773 | } | ||
1774 | |||
1775 | // send the marker information | ||
1776 | if (ad_marker_send(port, &marker) >= 0) { | ||
1777 | dprintk("Sent Marker Information on port %d\n", port->actor_port_number); | ||
1778 | } | ||
1779 | } | ||
1780 | #endif | ||
1781 | |||
1782 | /** | ||
1783 | * ad_marker_info_received - handle receive of a Marker information frame | ||
1784 | * @marker_info: Marker info received | ||
1785 | * @port: the port we're looking at | ||
1786 | * | ||
1787 | */ | ||
1788 | static void ad_marker_info_received(struct marker *marker_info,struct port *port) | ||
1789 | { | ||
1790 | struct marker marker; | ||
1791 | |||
1792 | // copy the received marker data to the response marker | ||
1793 | //marker = *marker_info; | ||
1794 | memcpy(&marker, marker_info, sizeof(struct marker)); | ||
1795 | // change the marker subtype to marker response | ||
1796 | marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; | ||
1797 | // send the marker response | ||
1798 | |||
1799 | if (ad_marker_send(port, &marker) >= 0) { | ||
1800 | dprintk("Sent Marker Response on port %d\n", port->actor_port_number); | ||
1801 | } | ||
1802 | } | ||
1803 | |||
1804 | /** | ||
1805 | * ad_marker_response_received - handle receive of a marker response frame | ||
1806 | * @marker: marker PDU received | ||
1807 | * @port: the port we're looking at | ||
1808 | * | ||
1809 | * This function does nothing since we decided not to implement send and handle | ||
1810 | * response for marker PDU's, in this stage, but only to respond to marker | ||
1811 | * information. | ||
1812 | */ | ||
1813 | static void ad_marker_response_received(struct marker *marker, struct port *port) | ||
1814 | { | ||
1815 | marker=NULL; // just to satisfy the compiler | ||
1816 | port=NULL; // just to satisfy the compiler | ||
1817 | // DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW | ||
1818 | } | ||
1819 | |||
1820 | /** | ||
1821 | * ad_initialize_lacpdu - initialize a given lacpdu structure | ||
1822 | * @lacpdu: lacpdu structure to initialize | ||
1823 | * | ||
1824 | */ | ||
1825 | static void ad_initialize_lacpdu(struct lacpdu *lacpdu) | ||
1826 | { | ||
1827 | u16 index; | ||
1828 | |||
1829 | // initialize lacpdu data | ||
1830 | lacpdu->subtype = 0x01; | ||
1831 | lacpdu->version_number = 0x01; | ||
1832 | lacpdu->tlv_type_actor_info = 0x01; | ||
1833 | lacpdu->actor_information_length = 0x14; | ||
1834 | // lacpdu->actor_system_priority updated on send | ||
1835 | // lacpdu->actor_system updated on send | ||
1836 | // lacpdu->actor_key updated on send | ||
1837 | // lacpdu->actor_port_priority updated on send | ||
1838 | // lacpdu->actor_port updated on send | ||
1839 | // lacpdu->actor_state updated on send | ||
1840 | lacpdu->tlv_type_partner_info = 0x02; | ||
1841 | lacpdu->partner_information_length = 0x14; | ||
1842 | for (index=0; index<=2; index++) { | ||
1843 | lacpdu->reserved_3_1[index]=0; | ||
1844 | } | ||
1845 | // lacpdu->partner_system_priority updated on send | ||
1846 | // lacpdu->partner_system updated on send | ||
1847 | // lacpdu->partner_key updated on send | ||
1848 | // lacpdu->partner_port_priority updated on send | ||
1849 | // lacpdu->partner_port updated on send | ||
1850 | // lacpdu->partner_state updated on send | ||
1851 | for (index=0; index<=2; index++) { | ||
1852 | lacpdu->reserved_3_2[index]=0; | ||
1853 | } | ||
1854 | lacpdu->tlv_type_collector_info = 0x03; | ||
1855 | lacpdu->collector_information_length= 0x10; | ||
1856 | lacpdu->collector_max_delay = AD_COLLECTOR_MAX_DELAY; | ||
1857 | for (index=0; index<=11; index++) { | ||
1858 | lacpdu->reserved_12[index]=0; | ||
1859 | } | ||
1860 | lacpdu->tlv_type_terminator = 0x00; | ||
1861 | lacpdu->terminator_length = 0; | ||
1862 | for (index=0; index<=49; index++) { | ||
1863 | lacpdu->reserved_50[index]=0; | ||
1864 | } | ||
1865 | } | ||
1866 | |||
1867 | ////////////////////////////////////////////////////////////////////////////////////// | ||
1868 | // ================= AD exported functions to the main bonding code ================== | ||
1869 | ////////////////////////////////////////////////////////////////////////////////////// | ||
1870 | |||
1871 | // Check aggregators status in team every T seconds | ||
1872 | #define AD_AGGREGATOR_SELECTION_TIMER 8 | ||
1873 | |||
1874 | static u16 aggregator_identifier; | ||
1875 | |||
1876 | /** | ||
1877 | * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures | ||
1878 | * @bond: bonding struct to work on | ||
1879 | * @tick_resolution: tick duration (millisecond resolution) | ||
1880 | * @lacp_fast: boolean. whether fast periodic should be used | ||
1881 | * | ||
1882 | * Can be called only after the mac address of the bond is set. | ||
1883 | */ | ||
1884 | void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast) | ||
1885 | { | ||
1886 | // check that the bond is not initialized yet | ||
1887 | if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr), &(bond->dev->dev_addr))) { | ||
1888 | |||
1889 | aggregator_identifier = 0; | ||
1890 | |||
1891 | BOND_AD_INFO(bond).lacp_fast = lacp_fast; | ||
1892 | BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; | ||
1893 | BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); | ||
1894 | |||
1895 | // initialize how many times this module is called in one second(should be about every 100ms) | ||
1896 | ad_ticks_per_sec = tick_resolution; | ||
1897 | |||
1898 | // initialize the aggregator selection timer(to activate an aggregation selection after initialize) | ||
1899 | BOND_AD_INFO(bond).agg_select_timer = (AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); | ||
1900 | BOND_AD_INFO(bond).agg_select_mode = AD_BANDWIDTH; | ||
1901 | } | ||
1902 | } | ||
1903 | |||
1904 | /** | ||
1905 | * bond_3ad_bind_slave - initialize a slave's port | ||
1906 | * @slave: slave struct to work on | ||
1907 | * | ||
1908 | * Returns: 0 on success | ||
1909 | * < 0 on error | ||
1910 | */ | ||
1911 | int bond_3ad_bind_slave(struct slave *slave) | ||
1912 | { | ||
1913 | struct bonding *bond = bond_get_bond_by_slave(slave); | ||
1914 | struct port *port; | ||
1915 | struct aggregator *aggregator; | ||
1916 | |||
1917 | if (bond == NULL) { | ||
1918 | printk(KERN_ERR "The slave %s is not attached to its bond\n", slave->dev->name); | ||
1919 | return -1; | ||
1920 | } | ||
1921 | |||
1922 | //check that the slave has not been intialized yet. | ||
1923 | if (SLAVE_AD_INFO(slave).port.slave != slave) { | ||
1924 | |||
1925 | // port initialization | ||
1926 | port = &(SLAVE_AD_INFO(slave).port); | ||
1927 | |||
1928 | ad_initialize_port(port, BOND_AD_INFO(bond).lacp_fast); | ||
1929 | |||
1930 | port->slave = slave; | ||
1931 | port->actor_port_number = SLAVE_AD_INFO(slave).id; | ||
1932 | // key is determined according to the link speed, duplex and user key(which is yet not supported) | ||
1933 | // ------------------------------------------------------------ | ||
1934 | // Port key : | User key | Speed |Duplex| | ||
1935 | // ------------------------------------------------------------ | ||
1936 | // 16 6 1 0 | ||
1937 | port->actor_admin_port_key = 0; // initialize this parameter | ||
1938 | port->actor_admin_port_key |= __get_duplex(port); | ||
1939 | port->actor_admin_port_key |= (__get_link_speed(port) << 1); | ||
1940 | port->actor_oper_port_key = port->actor_admin_port_key; | ||
1941 | // if the port is not full duplex, then the port should be not lacp Enabled | ||
1942 | if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) { | ||
1943 | port->sm_vars &= ~AD_PORT_LACP_ENABLED; | ||
1944 | } | ||
1945 | // actor system is the bond's system | ||
1946 | port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; | ||
1947 | // tx timer(to verify that no more than MAX_TX_IN_SECOND lacpdu's are sent in one second) | ||
1948 | port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; | ||
1949 | port->aggregator = NULL; | ||
1950 | port->next_port_in_aggregator = NULL; | ||
1951 | |||
1952 | __disable_port(port); | ||
1953 | __initialize_port_locks(port); | ||
1954 | |||
1955 | |||
1956 | // aggregator initialization | ||
1957 | aggregator = &(SLAVE_AD_INFO(slave).aggregator); | ||
1958 | |||
1959 | ad_initialize_agg(aggregator); | ||
1960 | |||
1961 | aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); | ||
1962 | aggregator->aggregator_identifier = (++aggregator_identifier); | ||
1963 | aggregator->slave = slave; | ||
1964 | aggregator->is_active = 0; | ||
1965 | aggregator->num_of_ports = 0; | ||
1966 | } | ||
1967 | |||
1968 | return 0; | ||
1969 | } | ||
1970 | |||
1971 | /** | ||
1972 | * bond_3ad_unbind_slave - deinitialize a slave's port | ||
1973 | * @slave: slave struct to work on | ||
1974 | * | ||
1975 | * Search for the aggregator that is related to this port, remove the | ||
1976 | * aggregator and assign another aggregator for other port related to it | ||
1977 | * (if any), and remove the port. | ||
1978 | */ | ||
1979 | void bond_3ad_unbind_slave(struct slave *slave) | ||
1980 | { | ||
1981 | struct port *port, *prev_port, *temp_port; | ||
1982 | struct aggregator *aggregator, *new_aggregator, *temp_aggregator; | ||
1983 | int select_new_active_agg = 0; | ||
1984 | |||
1985 | // find the aggregator related to this slave | ||
1986 | aggregator = &(SLAVE_AD_INFO(slave).aggregator); | ||
1987 | |||
1988 | // find the port related to this slave | ||
1989 | port = &(SLAVE_AD_INFO(slave).port); | ||
1990 | |||
1991 | // if slave is null, the whole port is not initialized | ||
1992 | if (!port->slave) { | ||
1993 | printk(KERN_WARNING DRV_NAME ": Trying to unbind an uninitialized port on %s\n", slave->dev->name); | ||
1994 | return; | ||
1995 | } | ||
1996 | |||
1997 | dprintk("Unbinding Link Aggregation Group %d\n", aggregator->aggregator_identifier); | ||
1998 | |||
1999 | /* Tell the partner that this port is not suitable for aggregation */ | ||
2000 | port->actor_oper_port_state &= ~AD_STATE_AGGREGATION; | ||
2001 | __update_lacpdu_from_port(port); | ||
2002 | ad_lacpdu_send(port); | ||
2003 | |||
2004 | // check if this aggregator is occupied | ||
2005 | if (aggregator->lag_ports) { | ||
2006 | // check if there are other ports related to this aggregator except | ||
2007 | // the port related to this slave(thats ensure us that there is a | ||
2008 | // reason to search for new aggregator, and that we will find one | ||
2009 | if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { | ||
2010 | // find new aggregator for the related port(s) | ||
2011 | new_aggregator = __get_first_agg(port); | ||
2012 | for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) { | ||
2013 | // if the new aggregator is empty, or it connected to to our port only | ||
2014 | if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) { | ||
2015 | break; | ||
2016 | } | ||
2017 | } | ||
2018 | // if new aggregator found, copy the aggregator's parameters | ||
2019 | // and connect the related lag_ports to the new aggregator | ||
2020 | if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { | ||
2021 | dprintk("Some port(s) related to LAG %d - replaceing with LAG %d\n", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier); | ||
2022 | |||
2023 | if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { | ||
2024 | printk(KERN_INFO DRV_NAME ": Removing an active aggregator\n"); | ||
2025 | // select new active aggregator | ||
2026 | select_new_active_agg = 1; | ||
2027 | } | ||
2028 | |||
2029 | new_aggregator->is_individual = aggregator->is_individual; | ||
2030 | new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key; | ||
2031 | new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key; | ||
2032 | new_aggregator->partner_system = aggregator->partner_system; | ||
2033 | new_aggregator->partner_system_priority = aggregator->partner_system_priority; | ||
2034 | new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key; | ||
2035 | new_aggregator->receive_state = aggregator->receive_state; | ||
2036 | new_aggregator->transmit_state = aggregator->transmit_state; | ||
2037 | new_aggregator->lag_ports = aggregator->lag_ports; | ||
2038 | new_aggregator->is_active = aggregator->is_active; | ||
2039 | new_aggregator->num_of_ports = aggregator->num_of_ports; | ||
2040 | |||
2041 | // update the information that is written on the ports about the aggregator | ||
2042 | for (temp_port=aggregator->lag_ports; temp_port; temp_port=temp_port->next_port_in_aggregator) { | ||
2043 | temp_port->aggregator=new_aggregator; | ||
2044 | temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; | ||
2045 | } | ||
2046 | |||
2047 | // clear the aggregator | ||
2048 | ad_clear_agg(aggregator); | ||
2049 | |||
2050 | if (select_new_active_agg) { | ||
2051 | ad_agg_selection_logic(__get_first_agg(port)); | ||
2052 | } | ||
2053 | } else { | ||
2054 | printk(KERN_WARNING DRV_NAME ": Warning: unbinding aggregator, " | ||
2055 | "and could not find a new aggregator for its ports\n"); | ||
2056 | } | ||
2057 | } else { // in case that the only port related to this aggregator is the one we want to remove | ||
2058 | select_new_active_agg = aggregator->is_active; | ||
2059 | // clear the aggregator | ||
2060 | ad_clear_agg(aggregator); | ||
2061 | if (select_new_active_agg) { | ||
2062 | printk(KERN_INFO "Removing an active aggregator\n"); | ||
2063 | // select new active aggregator | ||
2064 | ad_agg_selection_logic(__get_first_agg(port)); | ||
2065 | } | ||
2066 | } | ||
2067 | } | ||
2068 | |||
2069 | dprintk("Unbinding port %d\n", port->actor_port_number); | ||
2070 | // find the aggregator that this port is connected to | ||
2071 | temp_aggregator = __get_first_agg(port); | ||
2072 | for (; temp_aggregator; temp_aggregator = __get_next_agg(temp_aggregator)) { | ||
2073 | prev_port = NULL; | ||
2074 | // search the port in the aggregator's related ports | ||
2075 | for (temp_port=temp_aggregator->lag_ports; temp_port; prev_port=temp_port, temp_port=temp_port->next_port_in_aggregator) { | ||
2076 | if (temp_port == port) { // the aggregator found - detach the port from this aggregator | ||
2077 | if (prev_port) { | ||
2078 | prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator; | ||
2079 | } else { | ||
2080 | temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; | ||
2081 | } | ||
2082 | temp_aggregator->num_of_ports--; | ||
2083 | if (temp_aggregator->num_of_ports==0) { | ||
2084 | select_new_active_agg = temp_aggregator->is_active; | ||
2085 | // clear the aggregator | ||
2086 | ad_clear_agg(temp_aggregator); | ||
2087 | if (select_new_active_agg) { | ||
2088 | printk(KERN_INFO "Removing an active aggregator\n"); | ||
2089 | // select new active aggregator | ||
2090 | ad_agg_selection_logic(__get_first_agg(port)); | ||
2091 | } | ||
2092 | } | ||
2093 | break; | ||
2094 | } | ||
2095 | } | ||
2096 | } | ||
2097 | port->slave=NULL; | ||
2098 | } | ||
2099 | |||
2100 | /** | ||
2101 | * bond_3ad_state_machine_handler - handle state machines timeout | ||
2102 | * @bond: bonding struct to work on | ||
2103 | * | ||
2104 | * The state machine handling concept in this module is to check every tick | ||
2105 | * which state machine should operate any function. The execution order is | ||
2106 | * round robin, so when we have an interaction between state machines, the | ||
2107 | * reply of one to each other might be delayed until next tick. | ||
2108 | * | ||
2109 | * This function also complete the initialization when the agg_select_timer | ||
2110 | * times out, and it selects an aggregator for the ports that are yet not | ||
2111 | * related to any aggregator, and selects the active aggregator for a bond. | ||
2112 | */ | ||
2113 | void bond_3ad_state_machine_handler(struct bonding *bond) | ||
2114 | { | ||
2115 | struct port *port; | ||
2116 | struct aggregator *aggregator; | ||
2117 | |||
2118 | read_lock(&bond->lock); | ||
2119 | |||
2120 | if (bond->kill_timers) { | ||
2121 | goto out; | ||
2122 | } | ||
2123 | |||
2124 | //check if there are any slaves | ||
2125 | if (bond->slave_cnt == 0) { | ||
2126 | goto re_arm; | ||
2127 | } | ||
2128 | |||
2129 | // check if agg_select_timer timer after initialize is timed out | ||
2130 | if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) { | ||
2131 | // select the active aggregator for the bond | ||
2132 | if ((port = __get_first_port(bond))) { | ||
2133 | if (!port->slave) { | ||
2134 | printk(KERN_WARNING DRV_NAME ": Warning: bond's first port is uninitialized\n"); | ||
2135 | goto re_arm; | ||
2136 | } | ||
2137 | |||
2138 | aggregator = __get_first_agg(port); | ||
2139 | ad_agg_selection_logic(aggregator); | ||
2140 | } | ||
2141 | } | ||
2142 | |||
2143 | // for each port run the state machines | ||
2144 | for (port = __get_first_port(bond); port; port = __get_next_port(port)) { | ||
2145 | if (!port->slave) { | ||
2146 | printk(KERN_WARNING DRV_NAME ": Warning: Found an uninitialized port\n"); | ||
2147 | goto re_arm; | ||
2148 | } | ||
2149 | |||
2150 | ad_rx_machine(NULL, port); | ||
2151 | ad_periodic_machine(port); | ||
2152 | ad_port_selection_logic(port); | ||
2153 | ad_mux_machine(port); | ||
2154 | ad_tx_machine(port); | ||
2155 | |||
2156 | // turn off the BEGIN bit, since we already handled it | ||
2157 | if (port->sm_vars & AD_PORT_BEGIN) { | ||
2158 | port->sm_vars &= ~AD_PORT_BEGIN; | ||
2159 | } | ||
2160 | } | ||
2161 | |||
2162 | re_arm: | ||
2163 | mod_timer(&(BOND_AD_INFO(bond).ad_timer), jiffies + ad_delta_in_ticks); | ||
2164 | out: | ||
2165 | read_unlock(&bond->lock); | ||
2166 | } | ||
2167 | |||
2168 | /** | ||
2169 | * bond_3ad_rx_indication - handle a received frame | ||
2170 | * @lacpdu: received lacpdu | ||
2171 | * @slave: slave struct to work on | ||
2172 | * @length: length of the data received | ||
2173 | * | ||
2174 | * It is assumed that frames that were sent on this NIC don't returned as new | ||
2175 | * received frames (loopback). Since only the payload is given to this | ||
2176 | * function, it check for loopback. | ||
2177 | */ | ||
2178 | static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length) | ||
2179 | { | ||
2180 | struct port *port; | ||
2181 | |||
2182 | if (length >= sizeof(struct lacpdu)) { | ||
2183 | |||
2184 | port = &(SLAVE_AD_INFO(slave).port); | ||
2185 | |||
2186 | if (!port->slave) { | ||
2187 | printk(KERN_WARNING DRV_NAME ": Warning: port of slave %s is uninitialized\n", slave->dev->name); | ||
2188 | return; | ||
2189 | } | ||
2190 | |||
2191 | switch (lacpdu->subtype) { | ||
2192 | case AD_TYPE_LACPDU: | ||
2193 | __ntohs_lacpdu(lacpdu); | ||
2194 | dprintk("Received LACPDU on port %d\n", port->actor_port_number); | ||
2195 | ad_rx_machine(lacpdu, port); | ||
2196 | break; | ||
2197 | |||
2198 | case AD_TYPE_MARKER: | ||
2199 | // No need to convert fields to Little Endian since we don't use the marker's fields. | ||
2200 | |||
2201 | switch (((struct marker *)lacpdu)->tlv_type) { | ||
2202 | case AD_MARKER_INFORMATION_SUBTYPE: | ||
2203 | dprintk("Received Marker Information on port %d\n", port->actor_port_number); | ||
2204 | ad_marker_info_received((struct marker *)lacpdu, port); | ||
2205 | break; | ||
2206 | |||
2207 | case AD_MARKER_RESPONSE_SUBTYPE: | ||
2208 | dprintk("Received Marker Response on port %d\n", port->actor_port_number); | ||
2209 | ad_marker_response_received((struct marker *)lacpdu, port); | ||
2210 | break; | ||
2211 | |||
2212 | default: | ||
2213 | dprintk("Received an unknown Marker subtype on slot %d\n", port->actor_port_number); | ||
2214 | } | ||
2215 | } | ||
2216 | } | ||
2217 | } | ||
2218 | |||
2219 | /** | ||
2220 | * bond_3ad_adapter_speed_changed - handle a slave's speed change indication | ||
2221 | * @slave: slave struct to work on | ||
2222 | * | ||
2223 | * Handle reselection of aggregator (if needed) for this port. | ||
2224 | */ | ||
2225 | void bond_3ad_adapter_speed_changed(struct slave *slave) | ||
2226 | { | ||
2227 | struct port *port; | ||
2228 | |||
2229 | port = &(SLAVE_AD_INFO(slave).port); | ||
2230 | |||
2231 | // if slave is null, the whole port is not initialized | ||
2232 | if (!port->slave) { | ||
2233 | printk(KERN_WARNING DRV_NAME ": Warning: speed changed for uninitialized port on %s\n", | ||
2234 | slave->dev->name); | ||
2235 | return; | ||
2236 | } | ||
2237 | |||
2238 | port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; | ||
2239 | port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1); | ||
2240 | dprintk("Port %d changed speed\n", port->actor_port_number); | ||
2241 | // there is no need to reselect a new aggregator, just signal the | ||
2242 | // state machines to reinitialize | ||
2243 | port->sm_vars |= AD_PORT_BEGIN; | ||
2244 | } | ||
2245 | |||
2246 | /** | ||
2247 | * bond_3ad_adapter_duplex_changed - handle a slave's duplex change indication | ||
2248 | * @slave: slave struct to work on | ||
2249 | * | ||
2250 | * Handle reselection of aggregator (if needed) for this port. | ||
2251 | */ | ||
2252 | void bond_3ad_adapter_duplex_changed(struct slave *slave) | ||
2253 | { | ||
2254 | struct port *port; | ||
2255 | |||
2256 | port=&(SLAVE_AD_INFO(slave).port); | ||
2257 | |||
2258 | // if slave is null, the whole port is not initialized | ||
2259 | if (!port->slave) { | ||
2260 | printk(KERN_WARNING DRV_NAME ": Warning: duplex changed for uninitialized port on %s\n", | ||
2261 | slave->dev->name); | ||
2262 | return; | ||
2263 | } | ||
2264 | |||
2265 | port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; | ||
2266 | port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port); | ||
2267 | dprintk("Port %d changed duplex\n", port->actor_port_number); | ||
2268 | // there is no need to reselect a new aggregator, just signal the | ||
2269 | // state machines to reinitialize | ||
2270 | port->sm_vars |= AD_PORT_BEGIN; | ||
2271 | } | ||
2272 | |||
2273 | /** | ||
2274 | * bond_3ad_handle_link_change - handle a slave's link status change indication | ||
2275 | * @slave: slave struct to work on | ||
2276 | * @status: whether the link is now up or down | ||
2277 | * | ||
2278 | * Handle reselection of aggregator (if needed) for this port. | ||
2279 | */ | ||
2280 | void bond_3ad_handle_link_change(struct slave *slave, char link) | ||
2281 | { | ||
2282 | struct port *port; | ||
2283 | |||
2284 | port = &(SLAVE_AD_INFO(slave).port); | ||
2285 | |||
2286 | // if slave is null, the whole port is not initialized | ||
2287 | if (!port->slave) { | ||
2288 | printk(KERN_WARNING DRV_NAME ": Warning: link status changed for uninitialized port on %s\n", | ||
2289 | slave->dev->name); | ||
2290 | return; | ||
2291 | } | ||
2292 | |||
2293 | // on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed) | ||
2294 | // on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report | ||
2295 | if (link == BOND_LINK_UP) { | ||
2296 | port->is_enabled = 1; | ||
2297 | port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; | ||
2298 | port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port); | ||
2299 | port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; | ||
2300 | port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1); | ||
2301 | } else { | ||
2302 | /* link has failed */ | ||
2303 | port->is_enabled = 0; | ||
2304 | port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; | ||
2305 | port->actor_oper_port_key= (port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS); | ||
2306 | } | ||
2307 | //BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN"))); | ||
2308 | // there is no need to reselect a new aggregator, just signal the | ||
2309 | // state machines to reinitialize | ||
2310 | port->sm_vars |= AD_PORT_BEGIN; | ||
2311 | } | ||
2312 | |||
2313 | /** | ||
2314 | * bond_3ad_get_active_agg_info - get information of the active aggregator | ||
2315 | * @bond: bonding struct to work on | ||
2316 | * @ad_info: ad_info struct to fill with the bond's info | ||
2317 | * | ||
2318 | * Returns: 0 on success | ||
2319 | * < 0 on error | ||
2320 | */ | ||
2321 | int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) | ||
2322 | { | ||
2323 | struct aggregator *aggregator = NULL; | ||
2324 | struct port *port; | ||
2325 | |||
2326 | for (port = __get_first_port(bond); port; port = __get_next_port(port)) { | ||
2327 | if (port->aggregator && port->aggregator->is_active) { | ||
2328 | aggregator = port->aggregator; | ||
2329 | break; | ||
2330 | } | ||
2331 | } | ||
2332 | |||
2333 | if (aggregator) { | ||
2334 | ad_info->aggregator_id = aggregator->aggregator_identifier; | ||
2335 | ad_info->ports = aggregator->num_of_ports; | ||
2336 | ad_info->actor_key = aggregator->actor_oper_aggregator_key; | ||
2337 | ad_info->partner_key = aggregator->partner_oper_aggregator_key; | ||
2338 | memcpy(ad_info->partner_system, aggregator->partner_system.mac_addr_value, ETH_ALEN); | ||
2339 | return 0; | ||
2340 | } | ||
2341 | |||
2342 | return -1; | ||
2343 | } | ||
2344 | |||
2345 | int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) | ||
2346 | { | ||
2347 | struct slave *slave, *start_at; | ||
2348 | struct bonding *bond = dev->priv; | ||
2349 | struct ethhdr *data = (struct ethhdr *)skb->data; | ||
2350 | int slave_agg_no; | ||
2351 | int slaves_in_agg; | ||
2352 | int agg_id; | ||
2353 | int i; | ||
2354 | struct ad_info ad_info; | ||
2355 | int res = 1; | ||
2356 | |||
2357 | /* make sure that the slaves list will | ||
2358 | * not change during tx | ||
2359 | */ | ||
2360 | read_lock(&bond->lock); | ||
2361 | |||
2362 | if (!BOND_IS_OK(bond)) { | ||
2363 | goto out; | ||
2364 | } | ||
2365 | |||
2366 | if (bond_3ad_get_active_agg_info(bond, &ad_info)) { | ||
2367 | printk(KERN_DEBUG "ERROR: bond_3ad_get_active_agg_info failed\n"); | ||
2368 | goto out; | ||
2369 | } | ||
2370 | |||
2371 | slaves_in_agg = ad_info.ports; | ||
2372 | agg_id = ad_info.aggregator_id; | ||
2373 | |||
2374 | if (slaves_in_agg == 0) { | ||
2375 | /*the aggregator is empty*/ | ||
2376 | printk(KERN_DEBUG "ERROR: active aggregator is empty\n"); | ||
2377 | goto out; | ||
2378 | } | ||
2379 | |||
2380 | slave_agg_no = (data->h_dest[5]^bond->dev->dev_addr[5]) % slaves_in_agg; | ||
2381 | |||
2382 | bond_for_each_slave(bond, slave, i) { | ||
2383 | struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; | ||
2384 | |||
2385 | if (agg && (agg->aggregator_identifier == agg_id)) { | ||
2386 | slave_agg_no--; | ||
2387 | if (slave_agg_no < 0) { | ||
2388 | break; | ||
2389 | } | ||
2390 | } | ||
2391 | } | ||
2392 | |||
2393 | if (slave_agg_no >= 0) { | ||
2394 | printk(KERN_ERR DRV_NAME ": Error: Couldn't find a slave to tx on for aggregator ID %d\n", agg_id); | ||
2395 | goto out; | ||
2396 | } | ||
2397 | |||
2398 | start_at = slave; | ||
2399 | |||
2400 | bond_for_each_slave_from(bond, slave, i, start_at) { | ||
2401 | int slave_agg_id = 0; | ||
2402 | struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; | ||
2403 | |||
2404 | if (agg) { | ||
2405 | slave_agg_id = agg->aggregator_identifier; | ||
2406 | } | ||
2407 | |||
2408 | if (SLAVE_IS_OK(slave) && agg && (slave_agg_id == agg_id)) { | ||
2409 | res = bond_dev_queue_xmit(bond, skb, slave->dev); | ||
2410 | break; | ||
2411 | } | ||
2412 | } | ||
2413 | |||
2414 | out: | ||
2415 | if (res) { | ||
2416 | /* no suitable interface, frame not sent */ | ||
2417 | dev_kfree_skb(skb); | ||
2418 | } | ||
2419 | read_unlock(&bond->lock); | ||
2420 | return 0; | ||
2421 | } | ||
2422 | |||
2423 | int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) | ||
2424 | { | ||
2425 | struct bonding *bond = dev->priv; | ||
2426 | struct slave *slave = NULL; | ||
2427 | int ret = NET_RX_DROP; | ||
2428 | |||
2429 | if (!(dev->flags & IFF_MASTER)) { | ||
2430 | goto out; | ||
2431 | } | ||
2432 | |||
2433 | read_lock(&bond->lock); | ||
2434 | slave = bond_get_slave_by_dev((struct bonding *)dev->priv, | ||
2435 | skb->real_dev); | ||
2436 | if (slave == NULL) { | ||
2437 | goto out_unlock; | ||
2438 | } | ||
2439 | |||
2440 | bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); | ||
2441 | |||
2442 | ret = NET_RX_SUCCESS; | ||
2443 | |||
2444 | out_unlock: | ||
2445 | read_unlock(&bond->lock); | ||
2446 | out: | ||
2447 | dev_kfree_skb(skb); | ||
2448 | |||
2449 | return ret; | ||
2450 | } | ||
2451 | |||
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h new file mode 100644 index 000000000000..f46823894187 --- /dev/null +++ b/drivers/net/bonding/bond_3ad.h | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the Free | ||
6 | * Software Foundation; either version 2 of the License, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called LICENSE. | ||
20 | * | ||
21 | * | ||
22 | * Changes: | ||
23 | * | ||
24 | * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
25 | * Amir Noam <amir.noam at intel dot com> | ||
26 | * - Added support for lacp_rate module param. | ||
27 | * | ||
28 | * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
29 | * - Renamed bond_3ad_link_status_changed() to | ||
30 | * bond_3ad_handle_link_change() for compatibility with TLB. | ||
31 | * | ||
32 | * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
33 | * - Code cleanup and style changes | ||
34 | */ | ||
35 | |||
36 | #ifndef __BOND_3AD_H__ | ||
37 | #define __BOND_3AD_H__ | ||
38 | |||
39 | #include <asm/byteorder.h> | ||
40 | #include <linux/skbuff.h> | ||
41 | #include <linux/netdevice.h> | ||
42 | |||
43 | // General definitions | ||
44 | #define BOND_ETH_P_LACPDU 0x8809 | ||
45 | #define PKT_TYPE_LACPDU __constant_htons(BOND_ETH_P_LACPDU) | ||
46 | #define AD_TIMER_INTERVAL 100 /*msec*/ | ||
47 | |||
48 | #define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02} | ||
49 | #define AD_MULTICAST_LACPDU_ADDR {MULTICAST_LACPDU_ADDR} | ||
50 | |||
51 | #define AD_LACP_SLOW 0 | ||
52 | #define AD_LACP_FAST 1 | ||
53 | |||
54 | typedef struct mac_addr { | ||
55 | u8 mac_addr_value[ETH_ALEN]; | ||
56 | } mac_addr_t; | ||
57 | |||
58 | typedef enum { | ||
59 | AD_BANDWIDTH = 0, | ||
60 | AD_COUNT | ||
61 | } agg_selection_t; | ||
62 | |||
63 | // rx machine states(43.4.11 in the 802.3ad standard) | ||
64 | typedef enum { | ||
65 | AD_RX_DUMMY, | ||
66 | AD_RX_INITIALIZE, // rx Machine | ||
67 | AD_RX_PORT_DISABLED, // rx Machine | ||
68 | AD_RX_LACP_DISABLED, // rx Machine | ||
69 | AD_RX_EXPIRED, // rx Machine | ||
70 | AD_RX_DEFAULTED, // rx Machine | ||
71 | AD_RX_CURRENT // rx Machine | ||
72 | } rx_states_t; | ||
73 | |||
74 | // periodic machine states(43.4.12 in the 802.3ad standard) | ||
75 | typedef enum { | ||
76 | AD_PERIODIC_DUMMY, | ||
77 | AD_NO_PERIODIC, // periodic machine | ||
78 | AD_FAST_PERIODIC, // periodic machine | ||
79 | AD_SLOW_PERIODIC, // periodic machine | ||
80 | AD_PERIODIC_TX // periodic machine | ||
81 | } periodic_states_t; | ||
82 | |||
83 | // mux machine states(43.4.13 in the 802.3ad standard) | ||
84 | typedef enum { | ||
85 | AD_MUX_DUMMY, | ||
86 | AD_MUX_DETACHED, // mux machine | ||
87 | AD_MUX_WAITING, // mux machine | ||
88 | AD_MUX_ATTACHED, // mux machine | ||
89 | AD_MUX_COLLECTING_DISTRIBUTING // mux machine | ||
90 | } mux_states_t; | ||
91 | |||
92 | // tx machine states(43.4.15 in the 802.3ad standard) | ||
93 | typedef enum { | ||
94 | AD_TX_DUMMY, | ||
95 | AD_TRANSMIT // tx Machine | ||
96 | } tx_states_t; | ||
97 | |||
98 | // rx indication types | ||
99 | typedef enum { | ||
100 | AD_TYPE_LACPDU = 1, // type lacpdu | ||
101 | AD_TYPE_MARKER // type marker | ||
102 | } pdu_type_t; | ||
103 | |||
104 | // rx marker indication types | ||
105 | typedef enum { | ||
106 | AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype | ||
107 | AD_MARKER_RESPONSE_SUBTYPE // marker response subtype | ||
108 | } marker_subtype_t; | ||
109 | |||
110 | // timers types(43.4.9 in the 802.3ad standard) | ||
111 | typedef enum { | ||
112 | AD_CURRENT_WHILE_TIMER, | ||
113 | AD_ACTOR_CHURN_TIMER, | ||
114 | AD_PERIODIC_TIMER, | ||
115 | AD_PARTNER_CHURN_TIMER, | ||
116 | AD_WAIT_WHILE_TIMER | ||
117 | } ad_timers_t; | ||
118 | |||
119 | #pragma pack(1) | ||
120 | |||
121 | typedef struct ad_header { | ||
122 | struct mac_addr destination_address; | ||
123 | struct mac_addr source_address; | ||
124 | u16 length_type; | ||
125 | } ad_header_t; | ||
126 | |||
127 | // Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard) | ||
128 | typedef struct lacpdu { | ||
129 | u8 subtype; // = LACP(= 0x01) | ||
130 | u8 version_number; | ||
131 | u8 tlv_type_actor_info; // = actor information(type/length/value) | ||
132 | u8 actor_information_length; // = 20 | ||
133 | u16 actor_system_priority; | ||
134 | struct mac_addr actor_system; | ||
135 | u16 actor_key; | ||
136 | u16 actor_port_priority; | ||
137 | u16 actor_port; | ||
138 | u8 actor_state; | ||
139 | u8 reserved_3_1[3]; // = 0 | ||
140 | u8 tlv_type_partner_info; // = partner information | ||
141 | u8 partner_information_length; // = 20 | ||
142 | u16 partner_system_priority; | ||
143 | struct mac_addr partner_system; | ||
144 | u16 partner_key; | ||
145 | u16 partner_port_priority; | ||
146 | u16 partner_port; | ||
147 | u8 partner_state; | ||
148 | u8 reserved_3_2[3]; // = 0 | ||
149 | u8 tlv_type_collector_info; // = collector information | ||
150 | u8 collector_information_length; // = 16 | ||
151 | u16 collector_max_delay; | ||
152 | u8 reserved_12[12]; | ||
153 | u8 tlv_type_terminator; // = terminator | ||
154 | u8 terminator_length; // = 0 | ||
155 | u8 reserved_50[50]; // = 0 | ||
156 | } lacpdu_t; | ||
157 | |||
158 | typedef struct lacpdu_header { | ||
159 | struct ad_header ad_header; | ||
160 | struct lacpdu lacpdu; | ||
161 | } lacpdu_header_t; | ||
162 | |||
163 | // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) | ||
164 | typedef struct marker { | ||
165 | u8 subtype; // = 0x02 (marker PDU) | ||
166 | u8 version_number; // = 0x01 | ||
167 | u8 tlv_type; // = 0x01 (marker information) | ||
168 | // = 0x02 (marker response information) | ||
169 | u8 marker_length; // = 0x16 | ||
170 | u16 requester_port; // The number assigned to the port by the requester | ||
171 | struct mac_addr requester_system; // The requester's system id | ||
172 | u32 requester_transaction_id; // The transaction id allocated by the requester, | ||
173 | u16 pad; // = 0 | ||
174 | u8 tlv_type_terminator; // = 0x00 | ||
175 | u8 terminator_length; // = 0x00 | ||
176 | u8 reserved_90[90]; // = 0 | ||
177 | } marker_t; | ||
178 | |||
179 | typedef struct marker_header { | ||
180 | struct ad_header ad_header; | ||
181 | struct marker marker; | ||
182 | } marker_header_t; | ||
183 | |||
184 | #pragma pack() | ||
185 | |||
186 | struct slave; | ||
187 | struct bonding; | ||
188 | struct ad_info; | ||
189 | struct port; | ||
190 | |||
191 | #ifdef __ia64__ | ||
192 | #pragma pack(8) | ||
193 | #endif | ||
194 | |||
195 | // aggregator structure(43.4.5 in the 802.3ad standard) | ||
196 | typedef struct aggregator { | ||
197 | struct mac_addr aggregator_mac_address; | ||
198 | u16 aggregator_identifier; | ||
199 | u16 is_individual; // BOOLEAN | ||
200 | u16 actor_admin_aggregator_key; | ||
201 | u16 actor_oper_aggregator_key; | ||
202 | struct mac_addr partner_system; | ||
203 | u16 partner_system_priority; | ||
204 | u16 partner_oper_aggregator_key; | ||
205 | u16 receive_state; // BOOLEAN | ||
206 | u16 transmit_state; // BOOLEAN | ||
207 | struct port *lag_ports; | ||
208 | // ****** PRIVATE PARAMETERS ****** | ||
209 | struct slave *slave; // pointer to the bond slave that this aggregator belongs to | ||
210 | u16 is_active; // BOOLEAN. Indicates if this aggregator is active | ||
211 | u16 num_of_ports; | ||
212 | } aggregator_t; | ||
213 | |||
214 | // port structure(43.4.6 in the 802.3ad standard) | ||
215 | typedef struct port { | ||
216 | u16 actor_port_number; | ||
217 | u16 actor_port_priority; | ||
218 | struct mac_addr actor_system; // This parameter is added here although it is not specified in the standard, just for simplification | ||
219 | u16 actor_system_priority; // This parameter is added here although it is not specified in the standard, just for simplification | ||
220 | u16 actor_port_aggregator_identifier; | ||
221 | u16 ntt; // BOOLEAN | ||
222 | u16 actor_admin_port_key; | ||
223 | u16 actor_oper_port_key; | ||
224 | u8 actor_admin_port_state; | ||
225 | u8 actor_oper_port_state; | ||
226 | struct mac_addr partner_admin_system; | ||
227 | struct mac_addr partner_oper_system; | ||
228 | u16 partner_admin_system_priority; | ||
229 | u16 partner_oper_system_priority; | ||
230 | u16 partner_admin_key; | ||
231 | u16 partner_oper_key; | ||
232 | u16 partner_admin_port_number; | ||
233 | u16 partner_oper_port_number; | ||
234 | u16 partner_admin_port_priority; | ||
235 | u16 partner_oper_port_priority; | ||
236 | u8 partner_admin_port_state; | ||
237 | u8 partner_oper_port_state; | ||
238 | u16 is_enabled; // BOOLEAN | ||
239 | // ****** PRIVATE PARAMETERS ****** | ||
240 | u16 sm_vars; // all state machines variables for this port | ||
241 | rx_states_t sm_rx_state; // state machine rx state | ||
242 | u16 sm_rx_timer_counter; // state machine rx timer counter | ||
243 | periodic_states_t sm_periodic_state;// state machine periodic state | ||
244 | u16 sm_periodic_timer_counter; // state machine periodic timer counter | ||
245 | mux_states_t sm_mux_state; // state machine mux state | ||
246 | u16 sm_mux_timer_counter; // state machine mux timer counter | ||
247 | tx_states_t sm_tx_state; // state machine tx state | ||
248 | u16 sm_tx_timer_counter; // state machine tx timer counter(allways on - enter to transmit state 3 time per second) | ||
249 | struct slave *slave; // pointer to the bond slave that this port belongs to | ||
250 | struct aggregator *aggregator; // pointer to an aggregator that this port related to | ||
251 | struct port *next_port_in_aggregator; // Next port on the linked list of the parent aggregator | ||
252 | u32 transaction_id; // continuous number for identification of Marker PDU's; | ||
253 | struct lacpdu lacpdu; // the lacpdu that will be sent for this port | ||
254 | } port_t; | ||
255 | |||
256 | // system structure | ||
257 | typedef struct ad_system { | ||
258 | u16 sys_priority; | ||
259 | struct mac_addr sys_mac_addr; | ||
260 | } ad_system_t; | ||
261 | |||
262 | #ifdef __ia64__ | ||
263 | #pragma pack() | ||
264 | #endif | ||
265 | |||
266 | // ================= AD Exported structures to the main bonding code ================== | ||
267 | #define BOND_AD_INFO(bond) ((bond)->ad_info) | ||
268 | #define SLAVE_AD_INFO(slave) ((slave)->ad_info) | ||
269 | |||
270 | struct ad_bond_info { | ||
271 | ad_system_t system; // 802.3ad system structure | ||
272 | u32 agg_select_timer; // Timer to select aggregator after all adapter's hand shakes | ||
273 | u32 agg_select_mode; // Mode of selection of active aggregator(bandwidth/count) | ||
274 | int lacp_fast; /* whether fast periodic tx should be | ||
275 | * requested | ||
276 | */ | ||
277 | struct timer_list ad_timer; | ||
278 | struct packet_type ad_pkt_type; | ||
279 | }; | ||
280 | |||
281 | struct ad_slave_info { | ||
282 | struct aggregator aggregator; // 802.3ad aggregator structure | ||
283 | struct port port; // 802.3ad port structure | ||
284 | spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt | ||
285 | u16 id; | ||
286 | }; | ||
287 | |||
288 | // ================= AD Exported functions to the main bonding code ================== | ||
289 | void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast); | ||
290 | int bond_3ad_bind_slave(struct slave *slave); | ||
291 | void bond_3ad_unbind_slave(struct slave *slave); | ||
292 | void bond_3ad_state_machine_handler(struct bonding *bond); | ||
293 | void bond_3ad_adapter_speed_changed(struct slave *slave); | ||
294 | void bond_3ad_adapter_duplex_changed(struct slave *slave); | ||
295 | void bond_3ad_handle_link_change(struct slave *slave, char link); | ||
296 | int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); | ||
297 | int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); | ||
298 | int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); | ||
299 | #endif //__BOND_3AD_H__ | ||
300 | |||
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c new file mode 100644 index 000000000000..5ce606d9dc03 --- /dev/null +++ b/drivers/net/bonding/bond_alb.c | |||
@@ -0,0 +1,1696 @@ | |||
1 | /* | ||
2 | * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the | ||
6 | * Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | ||
11 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
12 | * for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along | ||
15 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called LICENSE. | ||
20 | * | ||
21 | * | ||
22 | * Changes: | ||
23 | * | ||
24 | * 2003/06/25 - Shmulik Hen <shmulik.hen at intel dot com> | ||
25 | * - Fixed signed/unsigned calculation errors that caused load sharing | ||
26 | * to collapse to one slave under very heavy UDP Tx stress. | ||
27 | * | ||
28 | * 2003/08/06 - Amir Noam <amir.noam at intel dot com> | ||
29 | * - Add support for setting bond's MAC address with special | ||
30 | * handling required for ALB/TLB. | ||
31 | * | ||
32 | * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
33 | * - Code cleanup and style changes | ||
34 | * | ||
35 | * 2003/12/30 - Amir Noam <amir.noam at intel dot com> | ||
36 | * - Fixed: Cannot remove and re-enslave the original active slave. | ||
37 | * | ||
38 | * 2004/01/14 - Shmulik Hen <shmulik.hen at intel dot com> | ||
39 | * - Add capability to tag self generated packets in ALB/TLB modes. | ||
40 | */ | ||
41 | |||
42 | //#define BONDING_DEBUG 1 | ||
43 | |||
44 | #include <linux/skbuff.h> | ||
45 | #include <linux/netdevice.h> | ||
46 | #include <linux/etherdevice.h> | ||
47 | #include <linux/pkt_sched.h> | ||
48 | #include <linux/spinlock.h> | ||
49 | #include <linux/slab.h> | ||
50 | #include <linux/timer.h> | ||
51 | #include <linux/ip.h> | ||
52 | #include <linux/ipv6.h> | ||
53 | #include <linux/if_arp.h> | ||
54 | #include <linux/if_ether.h> | ||
55 | #include <linux/if_bonding.h> | ||
56 | #include <linux/if_vlan.h> | ||
57 | #include <linux/in.h> | ||
58 | #include <net/ipx.h> | ||
59 | #include <net/arp.h> | ||
60 | #include <asm/byteorder.h> | ||
61 | #include "bonding.h" | ||
62 | #include "bond_alb.h" | ||
63 | |||
64 | |||
65 | #define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */ | ||
66 | #define BOND_TLB_REBALANCE_INTERVAL 10 /* In seconds, periodic re-balancing. | ||
67 | * Used for division - never set | ||
68 | * to zero !!! | ||
69 | */ | ||
70 | #define BOND_ALB_LP_INTERVAL 1 /* In seconds, periodic send of | ||
71 | * learning packets to the switch | ||
72 | */ | ||
73 | |||
74 | #define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ | ||
75 | * ALB_TIMER_TICKS_PER_SEC) | ||
76 | |||
77 | #define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ | ||
78 | * ALB_TIMER_TICKS_PER_SEC) | ||
79 | |||
80 | #define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table. | ||
81 | * Note that this value MUST NOT be smaller | ||
82 | * because the key hash table is BYTE wide ! | ||
83 | */ | ||
84 | |||
85 | |||
86 | #define TLB_NULL_INDEX 0xffffffff | ||
87 | #define MAX_LP_BURST 3 | ||
88 | |||
89 | /* rlb defs */ | ||
90 | #define RLB_HASH_TABLE_SIZE 256 | ||
91 | #define RLB_NULL_INDEX 0xffffffff | ||
92 | #define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */ | ||
93 | #define RLB_ARP_BURST_SIZE 2 | ||
94 | #define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb | ||
95 | * rebalance interval (5 min). | ||
96 | */ | ||
97 | /* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is | ||
98 | * promiscuous after failover | ||
99 | */ | ||
100 | #define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SEC | ||
101 | |||
102 | static const u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; | ||
103 | static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; | ||
104 | |||
105 | #pragma pack(1) | ||
106 | struct learning_pkt { | ||
107 | u8 mac_dst[ETH_ALEN]; | ||
108 | u8 mac_src[ETH_ALEN]; | ||
109 | u16 type; | ||
110 | u8 padding[ETH_ZLEN - ETH_HLEN]; | ||
111 | }; | ||
112 | |||
113 | struct arp_pkt { | ||
114 | u16 hw_addr_space; | ||
115 | u16 prot_addr_space; | ||
116 | u8 hw_addr_len; | ||
117 | u8 prot_addr_len; | ||
118 | u16 op_code; | ||
119 | u8 mac_src[ETH_ALEN]; /* sender hardware address */ | ||
120 | u32 ip_src; /* sender IP address */ | ||
121 | u8 mac_dst[ETH_ALEN]; /* target hardware address */ | ||
122 | u32 ip_dst; /* target IP address */ | ||
123 | }; | ||
124 | #pragma pack() | ||
125 | |||
126 | /* Forward declaration */ | ||
127 | static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); | ||
128 | |||
129 | static inline u8 _simple_hash(u8 *hash_start, int hash_size) | ||
130 | { | ||
131 | int i; | ||
132 | u8 hash = 0; | ||
133 | |||
134 | for (i = 0; i < hash_size; i++) { | ||
135 | hash ^= hash_start[i]; | ||
136 | } | ||
137 | |||
138 | return hash; | ||
139 | } | ||
140 | |||
141 | /*********************** tlb specific functions ***************************/ | ||
142 | |||
143 | static inline void _lock_tx_hashtbl(struct bonding *bond) | ||
144 | { | ||
145 | spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); | ||
146 | } | ||
147 | |||
148 | static inline void _unlock_tx_hashtbl(struct bonding *bond) | ||
149 | { | ||
150 | spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); | ||
151 | } | ||
152 | |||
153 | /* Caller must hold tx_hashtbl lock */ | ||
154 | static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) | ||
155 | { | ||
156 | if (save_load) { | ||
157 | entry->load_history = 1 + entry->tx_bytes / | ||
158 | BOND_TLB_REBALANCE_INTERVAL; | ||
159 | entry->tx_bytes = 0; | ||
160 | } | ||
161 | |||
162 | entry->tx_slave = NULL; | ||
163 | entry->next = TLB_NULL_INDEX; | ||
164 | entry->prev = TLB_NULL_INDEX; | ||
165 | } | ||
166 | |||
167 | static inline void tlb_init_slave(struct slave *slave) | ||
168 | { | ||
169 | SLAVE_TLB_INFO(slave).load = 0; | ||
170 | SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; | ||
171 | } | ||
172 | |||
173 | /* Caller must hold bond lock for read */ | ||
174 | static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) | ||
175 | { | ||
176 | struct tlb_client_info *tx_hash_table; | ||
177 | u32 index; | ||
178 | |||
179 | _lock_tx_hashtbl(bond); | ||
180 | |||
181 | /* clear slave from tx_hashtbl */ | ||
182 | tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; | ||
183 | |||
184 | index = SLAVE_TLB_INFO(slave).head; | ||
185 | while (index != TLB_NULL_INDEX) { | ||
186 | u32 next_index = tx_hash_table[index].next; | ||
187 | tlb_init_table_entry(&tx_hash_table[index], save_load); | ||
188 | index = next_index; | ||
189 | } | ||
190 | |||
191 | _unlock_tx_hashtbl(bond); | ||
192 | |||
193 | tlb_init_slave(slave); | ||
194 | } | ||
195 | |||
196 | /* Must be called before starting the monitor timer */ | ||
197 | static int tlb_initialize(struct bonding *bond) | ||
198 | { | ||
199 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
200 | int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); | ||
201 | int i; | ||
202 | |||
203 | spin_lock_init(&(bond_info->tx_hashtbl_lock)); | ||
204 | |||
205 | _lock_tx_hashtbl(bond); | ||
206 | |||
207 | bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL); | ||
208 | if (!bond_info->tx_hashtbl) { | ||
209 | printk(KERN_ERR DRV_NAME | ||
210 | ": Error: %s: Failed to allocate TLB hash table\n", | ||
211 | bond->dev->name); | ||
212 | _unlock_tx_hashtbl(bond); | ||
213 | return -1; | ||
214 | } | ||
215 | |||
216 | memset(bond_info->tx_hashtbl, 0, size); | ||
217 | |||
218 | for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) { | ||
219 | tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1); | ||
220 | } | ||
221 | |||
222 | _unlock_tx_hashtbl(bond); | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | /* Must be called only after all slaves have been released */ | ||
228 | static void tlb_deinitialize(struct bonding *bond) | ||
229 | { | ||
230 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
231 | |||
232 | _lock_tx_hashtbl(bond); | ||
233 | |||
234 | kfree(bond_info->tx_hashtbl); | ||
235 | bond_info->tx_hashtbl = NULL; | ||
236 | |||
237 | _unlock_tx_hashtbl(bond); | ||
238 | } | ||
239 | |||
240 | /* Caller must hold bond lock for read */ | ||
241 | static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) | ||
242 | { | ||
243 | struct slave *slave, *least_loaded; | ||
244 | s64 max_gap; | ||
245 | int i, found = 0; | ||
246 | |||
247 | /* Find the first enabled slave */ | ||
248 | bond_for_each_slave(bond, slave, i) { | ||
249 | if (SLAVE_IS_OK(slave)) { | ||
250 | found = 1; | ||
251 | break; | ||
252 | } | ||
253 | } | ||
254 | |||
255 | if (!found) { | ||
256 | return NULL; | ||
257 | } | ||
258 | |||
259 | least_loaded = slave; | ||
260 | max_gap = (s64)(slave->speed << 20) - /* Convert to Megabit per sec */ | ||
261 | (s64)(SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ | ||
262 | |||
263 | /* Find the slave with the largest gap */ | ||
264 | bond_for_each_slave_from(bond, slave, i, least_loaded) { | ||
265 | if (SLAVE_IS_OK(slave)) { | ||
266 | s64 gap = (s64)(slave->speed << 20) - | ||
267 | (s64)(SLAVE_TLB_INFO(slave).load << 3); | ||
268 | if (max_gap < gap) { | ||
269 | least_loaded = slave; | ||
270 | max_gap = gap; | ||
271 | } | ||
272 | } | ||
273 | } | ||
274 | |||
275 | return least_loaded; | ||
276 | } | ||
277 | |||
278 | /* Caller must hold bond lock for read */ | ||
279 | static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) | ||
280 | { | ||
281 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
282 | struct tlb_client_info *hash_table; | ||
283 | struct slave *assigned_slave; | ||
284 | |||
285 | _lock_tx_hashtbl(bond); | ||
286 | |||
287 | hash_table = bond_info->tx_hashtbl; | ||
288 | assigned_slave = hash_table[hash_index].tx_slave; | ||
289 | if (!assigned_slave) { | ||
290 | assigned_slave = tlb_get_least_loaded_slave(bond); | ||
291 | |||
292 | if (assigned_slave) { | ||
293 | struct tlb_slave_info *slave_info = | ||
294 | &(SLAVE_TLB_INFO(assigned_slave)); | ||
295 | u32 next_index = slave_info->head; | ||
296 | |||
297 | hash_table[hash_index].tx_slave = assigned_slave; | ||
298 | hash_table[hash_index].next = next_index; | ||
299 | hash_table[hash_index].prev = TLB_NULL_INDEX; | ||
300 | |||
301 | if (next_index != TLB_NULL_INDEX) { | ||
302 | hash_table[next_index].prev = hash_index; | ||
303 | } | ||
304 | |||
305 | slave_info->head = hash_index; | ||
306 | slave_info->load += | ||
307 | hash_table[hash_index].load_history; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | if (assigned_slave) { | ||
312 | hash_table[hash_index].tx_bytes += skb_len; | ||
313 | } | ||
314 | |||
315 | _unlock_tx_hashtbl(bond); | ||
316 | |||
317 | return assigned_slave; | ||
318 | } | ||
319 | |||
320 | /*********************** rlb specific functions ***************************/ | ||
321 | static inline void _lock_rx_hashtbl(struct bonding *bond) | ||
322 | { | ||
323 | spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); | ||
324 | } | ||
325 | |||
326 | static inline void _unlock_rx_hashtbl(struct bonding *bond) | ||
327 | { | ||
328 | spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); | ||
329 | } | ||
330 | |||
331 | /* when an ARP REPLY is received from a client update its info | ||
332 | * in the rx_hashtbl | ||
333 | */ | ||
334 | static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) | ||
335 | { | ||
336 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
337 | struct rlb_client_info *client_info; | ||
338 | u32 hash_index; | ||
339 | |||
340 | _lock_rx_hashtbl(bond); | ||
341 | |||
342 | hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); | ||
343 | client_info = &(bond_info->rx_hashtbl[hash_index]); | ||
344 | |||
345 | if ((client_info->assigned) && | ||
346 | (client_info->ip_src == arp->ip_dst) && | ||
347 | (client_info->ip_dst == arp->ip_src)) { | ||
348 | /* update the clients MAC address */ | ||
349 | memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); | ||
350 | client_info->ntt = 1; | ||
351 | bond_info->rx_ntt = 1; | ||
352 | } | ||
353 | |||
354 | _unlock_rx_hashtbl(bond); | ||
355 | } | ||
356 | |||
357 | static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype) | ||
358 | { | ||
359 | struct bonding *bond = bond_dev->priv; | ||
360 | struct arp_pkt *arp = (struct arp_pkt *)skb->data; | ||
361 | int res = NET_RX_DROP; | ||
362 | |||
363 | if (!(bond_dev->flags & IFF_MASTER)) { | ||
364 | goto out; | ||
365 | } | ||
366 | |||
367 | if (!arp) { | ||
368 | dprintk("Packet has no ARP data\n"); | ||
369 | goto out; | ||
370 | } | ||
371 | |||
372 | if (skb->len < sizeof(struct arp_pkt)) { | ||
373 | dprintk("Packet is too small to be an ARP\n"); | ||
374 | goto out; | ||
375 | } | ||
376 | |||
377 | if (arp->op_code == htons(ARPOP_REPLY)) { | ||
378 | /* update rx hash table for this ARP */ | ||
379 | rlb_update_entry_from_arp(bond, arp); | ||
380 | dprintk("Server received an ARP Reply from client\n"); | ||
381 | } | ||
382 | |||
383 | res = NET_RX_SUCCESS; | ||
384 | |||
385 | out: | ||
386 | dev_kfree_skb(skb); | ||
387 | |||
388 | return res; | ||
389 | } | ||
390 | |||
391 | /* Caller must hold bond lock for read */ | ||
392 | static struct slave *rlb_next_rx_slave(struct bonding *bond) | ||
393 | { | ||
394 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
395 | struct slave *rx_slave, *slave, *start_at; | ||
396 | int i = 0; | ||
397 | |||
398 | if (bond_info->next_rx_slave) { | ||
399 | start_at = bond_info->next_rx_slave; | ||
400 | } else { | ||
401 | start_at = bond->first_slave; | ||
402 | } | ||
403 | |||
404 | rx_slave = NULL; | ||
405 | |||
406 | bond_for_each_slave_from(bond, slave, i, start_at) { | ||
407 | if (SLAVE_IS_OK(slave)) { | ||
408 | if (!rx_slave) { | ||
409 | rx_slave = slave; | ||
410 | } else if (slave->speed > rx_slave->speed) { | ||
411 | rx_slave = slave; | ||
412 | } | ||
413 | } | ||
414 | } | ||
415 | |||
416 | if (rx_slave) { | ||
417 | bond_info->next_rx_slave = rx_slave->next; | ||
418 | } | ||
419 | |||
420 | return rx_slave; | ||
421 | } | ||
422 | |||
423 | /* teach the switch the mac of a disabled slave | ||
424 | * on the primary for fault tolerance | ||
425 | * | ||
426 | * Caller must hold bond->curr_slave_lock for write or bond lock for write | ||
427 | */ | ||
428 | static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) | ||
429 | { | ||
430 | if (!bond->curr_active_slave) { | ||
431 | return; | ||
432 | } | ||
433 | |||
434 | if (!bond->alb_info.primary_is_promisc) { | ||
435 | bond->alb_info.primary_is_promisc = 1; | ||
436 | dev_set_promiscuity(bond->curr_active_slave->dev, 1); | ||
437 | } | ||
438 | |||
439 | bond->alb_info.rlb_promisc_timeout_counter = 0; | ||
440 | |||
441 | alb_send_learning_packets(bond->curr_active_slave, addr); | ||
442 | } | ||
443 | |||
444 | /* slave being removed should not be active at this point | ||
445 | * | ||
446 | * Caller must hold bond lock for read | ||
447 | */ | ||
448 | static void rlb_clear_slave(struct bonding *bond, struct slave *slave) | ||
449 | { | ||
450 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
451 | struct rlb_client_info *rx_hash_table; | ||
452 | u32 index, next_index; | ||
453 | |||
454 | /* clear slave from rx_hashtbl */ | ||
455 | _lock_rx_hashtbl(bond); | ||
456 | |||
457 | rx_hash_table = bond_info->rx_hashtbl; | ||
458 | index = bond_info->rx_hashtbl_head; | ||
459 | for (; index != RLB_NULL_INDEX; index = next_index) { | ||
460 | next_index = rx_hash_table[index].next; | ||
461 | if (rx_hash_table[index].slave == slave) { | ||
462 | struct slave *assigned_slave = rlb_next_rx_slave(bond); | ||
463 | |||
464 | if (assigned_slave) { | ||
465 | rx_hash_table[index].slave = assigned_slave; | ||
466 | if (memcmp(rx_hash_table[index].mac_dst, | ||
467 | mac_bcast, ETH_ALEN)) { | ||
468 | bond_info->rx_hashtbl[index].ntt = 1; | ||
469 | bond_info->rx_ntt = 1; | ||
470 | /* A slave has been removed from the | ||
471 | * table because it is either disabled | ||
472 | * or being released. We must retry the | ||
473 | * update to avoid clients from not | ||
474 | * being updated & disconnecting when | ||
475 | * there is stress | ||
476 | */ | ||
477 | bond_info->rlb_update_retry_counter = | ||
478 | RLB_UPDATE_RETRY; | ||
479 | } | ||
480 | } else { /* there is no active slave */ | ||
481 | rx_hash_table[index].slave = NULL; | ||
482 | } | ||
483 | } | ||
484 | } | ||
485 | |||
486 | _unlock_rx_hashtbl(bond); | ||
487 | |||
488 | write_lock(&bond->curr_slave_lock); | ||
489 | |||
490 | if (slave != bond->curr_active_slave) { | ||
491 | rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); | ||
492 | } | ||
493 | |||
494 | write_unlock(&bond->curr_slave_lock); | ||
495 | } | ||
496 | |||
497 | static void rlb_update_client(struct rlb_client_info *client_info) | ||
498 | { | ||
499 | int i; | ||
500 | |||
501 | if (!client_info->slave) { | ||
502 | return; | ||
503 | } | ||
504 | |||
505 | for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { | ||
506 | struct sk_buff *skb; | ||
507 | |||
508 | skb = arp_create(ARPOP_REPLY, ETH_P_ARP, | ||
509 | client_info->ip_dst, | ||
510 | client_info->slave->dev, | ||
511 | client_info->ip_src, | ||
512 | client_info->mac_dst, | ||
513 | client_info->slave->dev->dev_addr, | ||
514 | client_info->mac_dst); | ||
515 | if (!skb) { | ||
516 | printk(KERN_ERR DRV_NAME | ||
517 | ": Error: failed to create an ARP packet\n"); | ||
518 | continue; | ||
519 | } | ||
520 | |||
521 | skb->dev = client_info->slave->dev; | ||
522 | |||
523 | if (client_info->tag) { | ||
524 | skb = vlan_put_tag(skb, client_info->vlan_id); | ||
525 | if (!skb) { | ||
526 | printk(KERN_ERR DRV_NAME | ||
527 | ": Error: failed to insert VLAN tag\n"); | ||
528 | continue; | ||
529 | } | ||
530 | } | ||
531 | |||
532 | arp_xmit(skb); | ||
533 | } | ||
534 | } | ||
535 | |||
536 | /* sends ARP REPLIES that update the clients that need updating */ | ||
537 | static void rlb_update_rx_clients(struct bonding *bond) | ||
538 | { | ||
539 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
540 | struct rlb_client_info *client_info; | ||
541 | u32 hash_index; | ||
542 | |||
543 | _lock_rx_hashtbl(bond); | ||
544 | |||
545 | hash_index = bond_info->rx_hashtbl_head; | ||
546 | for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { | ||
547 | client_info = &(bond_info->rx_hashtbl[hash_index]); | ||
548 | if (client_info->ntt) { | ||
549 | rlb_update_client(client_info); | ||
550 | if (bond_info->rlb_update_retry_counter == 0) { | ||
551 | client_info->ntt = 0; | ||
552 | } | ||
553 | } | ||
554 | } | ||
555 | |||
556 | /* do not update the entries again untill this counter is zero so that | ||
557 | * not to confuse the clients. | ||
558 | */ | ||
559 | bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; | ||
560 | |||
561 | _unlock_rx_hashtbl(bond); | ||
562 | } | ||
563 | |||
564 | /* The slave was assigned a new mac address - update the clients */ | ||
565 | static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) | ||
566 | { | ||
567 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
568 | struct rlb_client_info *client_info; | ||
569 | int ntt = 0; | ||
570 | u32 hash_index; | ||
571 | |||
572 | _lock_rx_hashtbl(bond); | ||
573 | |||
574 | hash_index = bond_info->rx_hashtbl_head; | ||
575 | for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { | ||
576 | client_info = &(bond_info->rx_hashtbl[hash_index]); | ||
577 | |||
578 | if ((client_info->slave == slave) && | ||
579 | memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { | ||
580 | client_info->ntt = 1; | ||
581 | ntt = 1; | ||
582 | } | ||
583 | } | ||
584 | |||
585 | // update the team's flag only after the whole iteration | ||
586 | if (ntt) { | ||
587 | bond_info->rx_ntt = 1; | ||
588 | //fasten the change | ||
589 | bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; | ||
590 | } | ||
591 | |||
592 | _unlock_rx_hashtbl(bond); | ||
593 | } | ||
594 | |||
595 | /* mark all clients using src_ip to be updated */ | ||
596 | static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip) | ||
597 | { | ||
598 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
599 | struct rlb_client_info *client_info; | ||
600 | u32 hash_index; | ||
601 | |||
602 | _lock_rx_hashtbl(bond); | ||
603 | |||
604 | hash_index = bond_info->rx_hashtbl_head; | ||
605 | for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { | ||
606 | client_info = &(bond_info->rx_hashtbl[hash_index]); | ||
607 | |||
608 | if (!client_info->slave) { | ||
609 | printk(KERN_ERR DRV_NAME | ||
610 | ": Error: found a client with no channel in " | ||
611 | "the client's hash table\n"); | ||
612 | continue; | ||
613 | } | ||
614 | /*update all clients using this src_ip, that are not assigned | ||
615 | * to the team's address (curr_active_slave) and have a known | ||
616 | * unicast mac address. | ||
617 | */ | ||
618 | if ((client_info->ip_src == src_ip) && | ||
619 | memcmp(client_info->slave->dev->dev_addr, | ||
620 | bond->dev->dev_addr, ETH_ALEN) && | ||
621 | memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { | ||
622 | client_info->ntt = 1; | ||
623 | bond_info->rx_ntt = 1; | ||
624 | } | ||
625 | } | ||
626 | |||
627 | _unlock_rx_hashtbl(bond); | ||
628 | } | ||
629 | |||
630 | /* Caller must hold both bond and ptr locks for read */ | ||
631 | static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) | ||
632 | { | ||
633 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
634 | struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; | ||
635 | struct slave *assigned_slave; | ||
636 | struct rlb_client_info *client_info; | ||
637 | u32 hash_index = 0; | ||
638 | |||
639 | _lock_rx_hashtbl(bond); | ||
640 | |||
641 | hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src)); | ||
642 | client_info = &(bond_info->rx_hashtbl[hash_index]); | ||
643 | |||
644 | if (client_info->assigned) { | ||
645 | if ((client_info->ip_src == arp->ip_src) && | ||
646 | (client_info->ip_dst == arp->ip_dst)) { | ||
647 | /* the entry is already assigned to this client */ | ||
648 | if (memcmp(arp->mac_dst, mac_bcast, ETH_ALEN)) { | ||
649 | /* update mac address from arp */ | ||
650 | memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); | ||
651 | } | ||
652 | |||
653 | assigned_slave = client_info->slave; | ||
654 | if (assigned_slave) { | ||
655 | _unlock_rx_hashtbl(bond); | ||
656 | return assigned_slave; | ||
657 | } | ||
658 | } else { | ||
659 | /* the entry is already assigned to some other client, | ||
660 | * move the old client to primary (curr_active_slave) so | ||
661 | * that the new client can be assigned to this entry. | ||
662 | */ | ||
663 | if (bond->curr_active_slave && | ||
664 | client_info->slave != bond->curr_active_slave) { | ||
665 | client_info->slave = bond->curr_active_slave; | ||
666 | rlb_update_client(client_info); | ||
667 | } | ||
668 | } | ||
669 | } | ||
670 | /* assign a new slave */ | ||
671 | assigned_slave = rlb_next_rx_slave(bond); | ||
672 | |||
673 | if (assigned_slave) { | ||
674 | client_info->ip_src = arp->ip_src; | ||
675 | client_info->ip_dst = arp->ip_dst; | ||
676 | /* arp->mac_dst is broadcast for arp reqeusts. | ||
677 | * will be updated with clients actual unicast mac address | ||
678 | * upon receiving an arp reply. | ||
679 | */ | ||
680 | memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); | ||
681 | client_info->slave = assigned_slave; | ||
682 | |||
683 | if (memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { | ||
684 | client_info->ntt = 1; | ||
685 | bond->alb_info.rx_ntt = 1; | ||
686 | } else { | ||
687 | client_info->ntt = 0; | ||
688 | } | ||
689 | |||
690 | if (!list_empty(&bond->vlan_list)) { | ||
691 | unsigned short vlan_id; | ||
692 | int res = vlan_get_tag(skb, &vlan_id); | ||
693 | if (!res) { | ||
694 | client_info->tag = 1; | ||
695 | client_info->vlan_id = vlan_id; | ||
696 | } | ||
697 | } | ||
698 | |||
699 | if (!client_info->assigned) { | ||
700 | u32 prev_tbl_head = bond_info->rx_hashtbl_head; | ||
701 | bond_info->rx_hashtbl_head = hash_index; | ||
702 | client_info->next = prev_tbl_head; | ||
703 | if (prev_tbl_head != RLB_NULL_INDEX) { | ||
704 | bond_info->rx_hashtbl[prev_tbl_head].prev = | ||
705 | hash_index; | ||
706 | } | ||
707 | client_info->assigned = 1; | ||
708 | } | ||
709 | } | ||
710 | |||
711 | _unlock_rx_hashtbl(bond); | ||
712 | |||
713 | return assigned_slave; | ||
714 | } | ||
715 | |||
716 | /* chooses (and returns) transmit channel for arp reply | ||
717 | * does not choose channel for other arp types since they are | ||
718 | * sent on the curr_active_slave | ||
719 | */ | ||
720 | static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) | ||
721 | { | ||
722 | struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; | ||
723 | struct slave *tx_slave = NULL; | ||
724 | |||
725 | if (arp->op_code == __constant_htons(ARPOP_REPLY)) { | ||
726 | /* the arp must be sent on the selected | ||
727 | * rx channel | ||
728 | */ | ||
729 | tx_slave = rlb_choose_channel(skb, bond); | ||
730 | if (tx_slave) { | ||
731 | memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); | ||
732 | } | ||
733 | dprintk("Server sent ARP Reply packet\n"); | ||
734 | } else if (arp->op_code == __constant_htons(ARPOP_REQUEST)) { | ||
735 | /* Create an entry in the rx_hashtbl for this client as a | ||
736 | * place holder. | ||
737 | * When the arp reply is received the entry will be updated | ||
738 | * with the correct unicast address of the client. | ||
739 | */ | ||
740 | rlb_choose_channel(skb, bond); | ||
741 | |||
742 | /* The ARP relpy packets must be delayed so that | ||
743 | * they can cancel out the influence of the ARP request. | ||
744 | */ | ||
745 | bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; | ||
746 | |||
747 | /* arp requests are broadcast and are sent on the primary | ||
748 | * the arp request will collapse all clients on the subnet to | ||
749 | * the primary slave. We must register these clients to be | ||
750 | * updated with their assigned mac. | ||
751 | */ | ||
752 | rlb_req_update_subnet_clients(bond, arp->ip_src); | ||
753 | dprintk("Server sent ARP Request packet\n"); | ||
754 | } | ||
755 | |||
756 | return tx_slave; | ||
757 | } | ||
758 | |||
759 | /* Caller must hold bond lock for read */ | ||
760 | static void rlb_rebalance(struct bonding *bond) | ||
761 | { | ||
762 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
763 | struct slave *assigned_slave; | ||
764 | struct rlb_client_info *client_info; | ||
765 | int ntt; | ||
766 | u32 hash_index; | ||
767 | |||
768 | _lock_rx_hashtbl(bond); | ||
769 | |||
770 | ntt = 0; | ||
771 | hash_index = bond_info->rx_hashtbl_head; | ||
772 | for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { | ||
773 | client_info = &(bond_info->rx_hashtbl[hash_index]); | ||
774 | assigned_slave = rlb_next_rx_slave(bond); | ||
775 | if (assigned_slave && (client_info->slave != assigned_slave)) { | ||
776 | client_info->slave = assigned_slave; | ||
777 | client_info->ntt = 1; | ||
778 | ntt = 1; | ||
779 | } | ||
780 | } | ||
781 | |||
782 | /* update the team's flag only after the whole iteration */ | ||
783 | if (ntt) { | ||
784 | bond_info->rx_ntt = 1; | ||
785 | } | ||
786 | _unlock_rx_hashtbl(bond); | ||
787 | } | ||
788 | |||
789 | /* Caller must hold rx_hashtbl lock */ | ||
790 | static void rlb_init_table_entry(struct rlb_client_info *entry) | ||
791 | { | ||
792 | memset(entry, 0, sizeof(struct rlb_client_info)); | ||
793 | entry->next = RLB_NULL_INDEX; | ||
794 | entry->prev = RLB_NULL_INDEX; | ||
795 | } | ||
796 | |||
797 | static int rlb_initialize(struct bonding *bond) | ||
798 | { | ||
799 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
800 | struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type); | ||
801 | int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); | ||
802 | int i; | ||
803 | |||
804 | spin_lock_init(&(bond_info->rx_hashtbl_lock)); | ||
805 | |||
806 | _lock_rx_hashtbl(bond); | ||
807 | |||
808 | bond_info->rx_hashtbl = kmalloc(size, GFP_KERNEL); | ||
809 | if (!bond_info->rx_hashtbl) { | ||
810 | printk(KERN_ERR DRV_NAME | ||
811 | ": Error: %s: Failed to allocate RLB hash table\n", | ||
812 | bond->dev->name); | ||
813 | _unlock_rx_hashtbl(bond); | ||
814 | return -1; | ||
815 | } | ||
816 | |||
817 | bond_info->rx_hashtbl_head = RLB_NULL_INDEX; | ||
818 | |||
819 | for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) { | ||
820 | rlb_init_table_entry(bond_info->rx_hashtbl + i); | ||
821 | } | ||
822 | |||
823 | _unlock_rx_hashtbl(bond); | ||
824 | |||
825 | /*initialize packet type*/ | ||
826 | pk_type->type = __constant_htons(ETH_P_ARP); | ||
827 | pk_type->dev = bond->dev; | ||
828 | pk_type->func = rlb_arp_recv; | ||
829 | |||
830 | /* register to receive ARPs */ | ||
831 | dev_add_pack(pk_type); | ||
832 | |||
833 | return 0; | ||
834 | } | ||
835 | |||
836 | static void rlb_deinitialize(struct bonding *bond) | ||
837 | { | ||
838 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
839 | |||
840 | dev_remove_pack(&(bond_info->rlb_pkt_type)); | ||
841 | |||
842 | _lock_rx_hashtbl(bond); | ||
843 | |||
844 | kfree(bond_info->rx_hashtbl); | ||
845 | bond_info->rx_hashtbl = NULL; | ||
846 | bond_info->rx_hashtbl_head = RLB_NULL_INDEX; | ||
847 | |||
848 | _unlock_rx_hashtbl(bond); | ||
849 | } | ||
850 | |||
851 | static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) | ||
852 | { | ||
853 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
854 | u32 curr_index; | ||
855 | |||
856 | _lock_rx_hashtbl(bond); | ||
857 | |||
858 | curr_index = bond_info->rx_hashtbl_head; | ||
859 | while (curr_index != RLB_NULL_INDEX) { | ||
860 | struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); | ||
861 | u32 next_index = bond_info->rx_hashtbl[curr_index].next; | ||
862 | u32 prev_index = bond_info->rx_hashtbl[curr_index].prev; | ||
863 | |||
864 | if (curr->tag && (curr->vlan_id == vlan_id)) { | ||
865 | if (curr_index == bond_info->rx_hashtbl_head) { | ||
866 | bond_info->rx_hashtbl_head = next_index; | ||
867 | } | ||
868 | if (prev_index != RLB_NULL_INDEX) { | ||
869 | bond_info->rx_hashtbl[prev_index].next = next_index; | ||
870 | } | ||
871 | if (next_index != RLB_NULL_INDEX) { | ||
872 | bond_info->rx_hashtbl[next_index].prev = prev_index; | ||
873 | } | ||
874 | |||
875 | rlb_init_table_entry(curr); | ||
876 | } | ||
877 | |||
878 | curr_index = next_index; | ||
879 | } | ||
880 | |||
881 | _unlock_rx_hashtbl(bond); | ||
882 | } | ||
883 | |||
884 | /*********************** tlb/rlb shared functions *********************/ | ||
885 | |||
886 | static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) | ||
887 | { | ||
888 | struct bonding *bond = bond_get_bond_by_slave(slave); | ||
889 | struct learning_pkt pkt; | ||
890 | int size = sizeof(struct learning_pkt); | ||
891 | int i; | ||
892 | |||
893 | memset(&pkt, 0, size); | ||
894 | memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); | ||
895 | memcpy(pkt.mac_src, mac_addr, ETH_ALEN); | ||
896 | pkt.type = __constant_htons(ETH_P_LOOP); | ||
897 | |||
898 | for (i = 0; i < MAX_LP_BURST; i++) { | ||
899 | struct sk_buff *skb; | ||
900 | char *data; | ||
901 | |||
902 | skb = dev_alloc_skb(size); | ||
903 | if (!skb) { | ||
904 | return; | ||
905 | } | ||
906 | |||
907 | data = skb_put(skb, size); | ||
908 | memcpy(data, &pkt, size); | ||
909 | |||
910 | skb->mac.raw = data; | ||
911 | skb->nh.raw = data + ETH_HLEN; | ||
912 | skb->protocol = pkt.type; | ||
913 | skb->priority = TC_PRIO_CONTROL; | ||
914 | skb->dev = slave->dev; | ||
915 | |||
916 | if (!list_empty(&bond->vlan_list)) { | ||
917 | struct vlan_entry *vlan; | ||
918 | |||
919 | vlan = bond_next_vlan(bond, | ||
920 | bond->alb_info.current_alb_vlan); | ||
921 | |||
922 | bond->alb_info.current_alb_vlan = vlan; | ||
923 | if (!vlan) { | ||
924 | kfree_skb(skb); | ||
925 | continue; | ||
926 | } | ||
927 | |||
928 | skb = vlan_put_tag(skb, vlan->vlan_id); | ||
929 | if (!skb) { | ||
930 | printk(KERN_ERR DRV_NAME | ||
931 | ": Error: failed to insert VLAN tag\n"); | ||
932 | continue; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | dev_queue_xmit(skb); | ||
937 | } | ||
938 | } | ||
939 | |||
940 | /* hw is a boolean parameter that determines whether we should try and | ||
941 | * set the hw address of the device as well as the hw address of the | ||
942 | * net_device | ||
943 | */ | ||
944 | static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw) | ||
945 | { | ||
946 | struct net_device *dev = slave->dev; | ||
947 | struct sockaddr s_addr; | ||
948 | |||
949 | if (!hw) { | ||
950 | memcpy(dev->dev_addr, addr, dev->addr_len); | ||
951 | return 0; | ||
952 | } | ||
953 | |||
954 | /* for rlb each slave must have a unique hw mac addresses so that */ | ||
955 | /* each slave will receive packets destined to a different mac */ | ||
956 | memcpy(s_addr.sa_data, addr, dev->addr_len); | ||
957 | s_addr.sa_family = dev->type; | ||
958 | if (dev_set_mac_address(dev, &s_addr)) { | ||
959 | printk(KERN_ERR DRV_NAME | ||
960 | ": Error: dev_set_mac_address of dev %s failed! ALB " | ||
961 | "mode requires that the base driver support setting " | ||
962 | "the hw address also when the network device's " | ||
963 | "interface is open\n", | ||
964 | dev->name); | ||
965 | return -EOPNOTSUPP; | ||
966 | } | ||
967 | return 0; | ||
968 | } | ||
969 | |||
970 | /* Caller must hold bond lock for write or curr_slave_lock for write*/ | ||
971 | static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct slave *slave2) | ||
972 | { | ||
973 | struct slave *disabled_slave = NULL; | ||
974 | u8 tmp_mac_addr[ETH_ALEN]; | ||
975 | int slaves_state_differ; | ||
976 | |||
977 | slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); | ||
978 | |||
979 | memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); | ||
980 | alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled); | ||
981 | alb_set_slave_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled); | ||
982 | |||
983 | /* fasten the change in the switch */ | ||
984 | if (SLAVE_IS_OK(slave1)) { | ||
985 | alb_send_learning_packets(slave1, slave1->dev->dev_addr); | ||
986 | if (bond->alb_info.rlb_enabled) { | ||
987 | /* inform the clients that the mac address | ||
988 | * has changed | ||
989 | */ | ||
990 | rlb_req_update_slave_clients(bond, slave1); | ||
991 | } | ||
992 | } else { | ||
993 | disabled_slave = slave1; | ||
994 | } | ||
995 | |||
996 | if (SLAVE_IS_OK(slave2)) { | ||
997 | alb_send_learning_packets(slave2, slave2->dev->dev_addr); | ||
998 | if (bond->alb_info.rlb_enabled) { | ||
999 | /* inform the clients that the mac address | ||
1000 | * has changed | ||
1001 | */ | ||
1002 | rlb_req_update_slave_clients(bond, slave2); | ||
1003 | } | ||
1004 | } else { | ||
1005 | disabled_slave = slave2; | ||
1006 | } | ||
1007 | |||
1008 | if (bond->alb_info.rlb_enabled && slaves_state_differ) { | ||
1009 | /* A disabled slave was assigned an active mac addr */ | ||
1010 | rlb_teach_disabled_mac_on_primary(bond, | ||
1011 | disabled_slave->dev->dev_addr); | ||
1012 | } | ||
1013 | } | ||
1014 | |||
1015 | /** | ||
1016 | * alb_change_hw_addr_on_detach | ||
1017 | * @bond: bonding we're working on | ||
1018 | * @slave: the slave that was just detached | ||
1019 | * | ||
1020 | * We assume that @slave was already detached from the slave list. | ||
1021 | * | ||
1022 | * If @slave's permanent hw address is different both from its current | ||
1023 | * address and from @bond's address, then somewhere in the bond there's | ||
1024 | * a slave that has @slave's permanet address as its current address. | ||
1025 | * We'll make sure that that slave no longer uses @slave's permanent address. | ||
1026 | * | ||
1027 | * Caller must hold bond lock | ||
1028 | */ | ||
1029 | static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) | ||
1030 | { | ||
1031 | int perm_curr_diff; | ||
1032 | int perm_bond_diff; | ||
1033 | |||
1034 | perm_curr_diff = memcmp(slave->perm_hwaddr, | ||
1035 | slave->dev->dev_addr, | ||
1036 | ETH_ALEN); | ||
1037 | perm_bond_diff = memcmp(slave->perm_hwaddr, | ||
1038 | bond->dev->dev_addr, | ||
1039 | ETH_ALEN); | ||
1040 | |||
1041 | if (perm_curr_diff && perm_bond_diff) { | ||
1042 | struct slave *tmp_slave; | ||
1043 | int i, found = 0; | ||
1044 | |||
1045 | bond_for_each_slave(bond, tmp_slave, i) { | ||
1046 | if (!memcmp(slave->perm_hwaddr, | ||
1047 | tmp_slave->dev->dev_addr, | ||
1048 | ETH_ALEN)) { | ||
1049 | found = 1; | ||
1050 | break; | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | if (found) { | ||
1055 | alb_swap_mac_addr(bond, slave, tmp_slave); | ||
1056 | } | ||
1057 | } | ||
1058 | } | ||
1059 | |||
1060 | /** | ||
1061 | * alb_handle_addr_collision_on_attach | ||
1062 | * @bond: bonding we're working on | ||
1063 | * @slave: the slave that was just attached | ||
1064 | * | ||
1065 | * checks uniqueness of slave's mac address and handles the case the | ||
1066 | * new slave uses the bonds mac address. | ||
1067 | * | ||
1068 | * If the permanent hw address of @slave is @bond's hw address, we need to | ||
1069 | * find a different hw address to give @slave, that isn't in use by any other | ||
1070 | * slave in the bond. This address must be, of course, one of the premanent | ||
1071 | * addresses of the other slaves. | ||
1072 | * | ||
1073 | * We go over the slave list, and for each slave there we compare its | ||
1074 | * permanent hw address with the current address of all the other slaves. | ||
1075 | * If no match was found, then we've found a slave with a permanent address | ||
1076 | * that isn't used by any other slave in the bond, so we can assign it to | ||
1077 | * @slave. | ||
1078 | * | ||
1079 | * assumption: this function is called before @slave is attached to the | ||
1080 | * bond slave list. | ||
1081 | * | ||
1082 | * caller must hold the bond lock for write since the mac addresses are compared | ||
1083 | * and may be swapped. | ||
1084 | */ | ||
1085 | static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) | ||
1086 | { | ||
1087 | struct slave *tmp_slave1, *tmp_slave2, *free_mac_slave; | ||
1088 | struct slave *has_bond_addr = bond->curr_active_slave; | ||
1089 | int i, j, found = 0; | ||
1090 | |||
1091 | if (bond->slave_cnt == 0) { | ||
1092 | /* this is the first slave */ | ||
1093 | return 0; | ||
1094 | } | ||
1095 | |||
1096 | /* if slave's mac address differs from bond's mac address | ||
1097 | * check uniqueness of slave's mac address against the other | ||
1098 | * slaves in the bond. | ||
1099 | */ | ||
1100 | if (memcmp(slave->perm_hwaddr, bond->dev->dev_addr, ETH_ALEN)) { | ||
1101 | bond_for_each_slave(bond, tmp_slave1, i) { | ||
1102 | if (!memcmp(tmp_slave1->dev->dev_addr, slave->dev->dev_addr, | ||
1103 | ETH_ALEN)) { | ||
1104 | found = 1; | ||
1105 | break; | ||
1106 | } | ||
1107 | } | ||
1108 | |||
1109 | if (found) { | ||
1110 | /* a slave was found that is using the mac address | ||
1111 | * of the new slave | ||
1112 | */ | ||
1113 | printk(KERN_ERR DRV_NAME | ||
1114 | ": Error: the hw address of slave %s is not " | ||
1115 | "unique - cannot enslave it!", | ||
1116 | slave->dev->name); | ||
1117 | return -EINVAL; | ||
1118 | } | ||
1119 | |||
1120 | return 0; | ||
1121 | } | ||
1122 | |||
1123 | /* The slave's address is equal to the address of the bond. | ||
1124 | * Search for a spare address in the bond for this slave. | ||
1125 | */ | ||
1126 | free_mac_slave = NULL; | ||
1127 | |||
1128 | bond_for_each_slave(bond, tmp_slave1, i) { | ||
1129 | found = 0; | ||
1130 | bond_for_each_slave(bond, tmp_slave2, j) { | ||
1131 | if (!memcmp(tmp_slave1->perm_hwaddr, | ||
1132 | tmp_slave2->dev->dev_addr, | ||
1133 | ETH_ALEN)) { | ||
1134 | found = 1; | ||
1135 | break; | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | if (!found) { | ||
1140 | /* no slave has tmp_slave1's perm addr | ||
1141 | * as its curr addr | ||
1142 | */ | ||
1143 | free_mac_slave = tmp_slave1; | ||
1144 | break; | ||
1145 | } | ||
1146 | |||
1147 | if (!has_bond_addr) { | ||
1148 | if (!memcmp(tmp_slave1->dev->dev_addr, | ||
1149 | bond->dev->dev_addr, | ||
1150 | ETH_ALEN)) { | ||
1151 | |||
1152 | has_bond_addr = tmp_slave1; | ||
1153 | } | ||
1154 | } | ||
1155 | } | ||
1156 | |||
1157 | if (free_mac_slave) { | ||
1158 | alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, | ||
1159 | bond->alb_info.rlb_enabled); | ||
1160 | |||
1161 | printk(KERN_WARNING DRV_NAME | ||
1162 | ": Warning: the hw address of slave %s is in use by " | ||
1163 | "the bond; giving it the hw address of %s\n", | ||
1164 | slave->dev->name, free_mac_slave->dev->name); | ||
1165 | |||
1166 | } else if (has_bond_addr) { | ||
1167 | printk(KERN_ERR DRV_NAME | ||
1168 | ": Error: the hw address of slave %s is in use by the " | ||
1169 | "bond; couldn't find a slave with a free hw address to " | ||
1170 | "give it (this should not have happened)\n", | ||
1171 | slave->dev->name); | ||
1172 | return -EFAULT; | ||
1173 | } | ||
1174 | |||
1175 | return 0; | ||
1176 | } | ||
1177 | |||
1178 | /** | ||
1179 | * alb_set_mac_address | ||
1180 | * @bond: | ||
1181 | * @addr: | ||
1182 | * | ||
1183 | * In TLB mode all slaves are configured to the bond's hw address, but set | ||
1184 | * their dev_addr field to different addresses (based on their permanent hw | ||
1185 | * addresses). | ||
1186 | * | ||
1187 | * For each slave, this function sets the interface to the new address and then | ||
1188 | * changes its dev_addr field to its previous value. | ||
1189 | * | ||
1190 | * Unwinding assumes bond's mac address has not yet changed. | ||
1191 | */ | ||
1192 | static int alb_set_mac_address(struct bonding *bond, void *addr) | ||
1193 | { | ||
1194 | struct sockaddr sa; | ||
1195 | struct slave *slave, *stop_at; | ||
1196 | char tmp_addr[ETH_ALEN]; | ||
1197 | int res; | ||
1198 | int i; | ||
1199 | |||
1200 | if (bond->alb_info.rlb_enabled) { | ||
1201 | return 0; | ||
1202 | } | ||
1203 | |||
1204 | bond_for_each_slave(bond, slave, i) { | ||
1205 | if (slave->dev->set_mac_address == NULL) { | ||
1206 | res = -EOPNOTSUPP; | ||
1207 | goto unwind; | ||
1208 | } | ||
1209 | |||
1210 | /* save net_device's current hw address */ | ||
1211 | memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); | ||
1212 | |||
1213 | res = dev_set_mac_address(slave->dev, addr); | ||
1214 | |||
1215 | /* restore net_device's hw address */ | ||
1216 | memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); | ||
1217 | |||
1218 | if (res) { | ||
1219 | goto unwind; | ||
1220 | } | ||
1221 | } | ||
1222 | |||
1223 | return 0; | ||
1224 | |||
1225 | unwind: | ||
1226 | memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); | ||
1227 | sa.sa_family = bond->dev->type; | ||
1228 | |||
1229 | /* unwind from head to the slave that failed */ | ||
1230 | stop_at = slave; | ||
1231 | bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { | ||
1232 | memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); | ||
1233 | dev_set_mac_address(slave->dev, &sa); | ||
1234 | memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); | ||
1235 | } | ||
1236 | |||
1237 | return res; | ||
1238 | } | ||
1239 | |||
1240 | /************************ exported alb funcions ************************/ | ||
1241 | |||
1242 | int bond_alb_initialize(struct bonding *bond, int rlb_enabled) | ||
1243 | { | ||
1244 | int res; | ||
1245 | |||
1246 | res = tlb_initialize(bond); | ||
1247 | if (res) { | ||
1248 | return res; | ||
1249 | } | ||
1250 | |||
1251 | if (rlb_enabled) { | ||
1252 | bond->alb_info.rlb_enabled = 1; | ||
1253 | /* initialize rlb */ | ||
1254 | res = rlb_initialize(bond); | ||
1255 | if (res) { | ||
1256 | tlb_deinitialize(bond); | ||
1257 | return res; | ||
1258 | } | ||
1259 | } | ||
1260 | |||
1261 | return 0; | ||
1262 | } | ||
1263 | |||
1264 | void bond_alb_deinitialize(struct bonding *bond) | ||
1265 | { | ||
1266 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
1267 | |||
1268 | tlb_deinitialize(bond); | ||
1269 | |||
1270 | if (bond_info->rlb_enabled) { | ||
1271 | rlb_deinitialize(bond); | ||
1272 | } | ||
1273 | } | ||
1274 | |||
1275 | int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) | ||
1276 | { | ||
1277 | struct bonding *bond = bond_dev->priv; | ||
1278 | struct ethhdr *eth_data; | ||
1279 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
1280 | struct slave *tx_slave = NULL; | ||
1281 | static u32 ip_bcast = 0xffffffff; | ||
1282 | int hash_size = 0; | ||
1283 | int do_tx_balance = 1; | ||
1284 | u32 hash_index = 0; | ||
1285 | u8 *hash_start = NULL; | ||
1286 | int res = 1; | ||
1287 | |||
1288 | skb->mac.raw = (unsigned char *)skb->data; | ||
1289 | eth_data = eth_hdr(skb); | ||
1290 | |||
1291 | /* make sure that the curr_active_slave and the slaves list do | ||
1292 | * not change during tx | ||
1293 | */ | ||
1294 | read_lock(&bond->lock); | ||
1295 | read_lock(&bond->curr_slave_lock); | ||
1296 | |||
1297 | if (!BOND_IS_OK(bond)) { | ||
1298 | goto out; | ||
1299 | } | ||
1300 | |||
1301 | switch (ntohs(skb->protocol)) { | ||
1302 | case ETH_P_IP: | ||
1303 | if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) || | ||
1304 | (skb->nh.iph->daddr == ip_bcast) || | ||
1305 | (skb->nh.iph->protocol == IPPROTO_IGMP)) { | ||
1306 | do_tx_balance = 0; | ||
1307 | break; | ||
1308 | } | ||
1309 | hash_start = (char*)&(skb->nh.iph->daddr); | ||
1310 | hash_size = sizeof(skb->nh.iph->daddr); | ||
1311 | break; | ||
1312 | case ETH_P_IPV6: | ||
1313 | if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) { | ||
1314 | do_tx_balance = 0; | ||
1315 | break; | ||
1316 | } | ||
1317 | |||
1318 | hash_start = (char*)&(skb->nh.ipv6h->daddr); | ||
1319 | hash_size = sizeof(skb->nh.ipv6h->daddr); | ||
1320 | break; | ||
1321 | case ETH_P_IPX: | ||
1322 | if (ipx_hdr(skb)->ipx_checksum != | ||
1323 | __constant_htons(IPX_NO_CHECKSUM)) { | ||
1324 | /* something is wrong with this packet */ | ||
1325 | do_tx_balance = 0; | ||
1326 | break; | ||
1327 | } | ||
1328 | |||
1329 | if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { | ||
1330 | /* The only protocol worth balancing in | ||
1331 | * this family since it has an "ARP" like | ||
1332 | * mechanism | ||
1333 | */ | ||
1334 | do_tx_balance = 0; | ||
1335 | break; | ||
1336 | } | ||
1337 | |||
1338 | hash_start = (char*)eth_data->h_dest; | ||
1339 | hash_size = ETH_ALEN; | ||
1340 | break; | ||
1341 | case ETH_P_ARP: | ||
1342 | do_tx_balance = 0; | ||
1343 | if (bond_info->rlb_enabled) { | ||
1344 | tx_slave = rlb_arp_xmit(skb, bond); | ||
1345 | } | ||
1346 | break; | ||
1347 | default: | ||
1348 | do_tx_balance = 0; | ||
1349 | break; | ||
1350 | } | ||
1351 | |||
1352 | if (do_tx_balance) { | ||
1353 | hash_index = _simple_hash(hash_start, hash_size); | ||
1354 | tx_slave = tlb_choose_channel(bond, hash_index, skb->len); | ||
1355 | } | ||
1356 | |||
1357 | if (!tx_slave) { | ||
1358 | /* unbalanced or unassigned, send through primary */ | ||
1359 | tx_slave = bond->curr_active_slave; | ||
1360 | bond_info->unbalanced_load += skb->len; | ||
1361 | } | ||
1362 | |||
1363 | if (tx_slave && SLAVE_IS_OK(tx_slave)) { | ||
1364 | if (tx_slave != bond->curr_active_slave) { | ||
1365 | memcpy(eth_data->h_source, | ||
1366 | tx_slave->dev->dev_addr, | ||
1367 | ETH_ALEN); | ||
1368 | } | ||
1369 | |||
1370 | res = bond_dev_queue_xmit(bond, skb, tx_slave->dev); | ||
1371 | } else { | ||
1372 | if (tx_slave) { | ||
1373 | tlb_clear_slave(bond, tx_slave, 0); | ||
1374 | } | ||
1375 | } | ||
1376 | |||
1377 | out: | ||
1378 | if (res) { | ||
1379 | /* no suitable interface, frame not sent */ | ||
1380 | dev_kfree_skb(skb); | ||
1381 | } | ||
1382 | read_unlock(&bond->curr_slave_lock); | ||
1383 | read_unlock(&bond->lock); | ||
1384 | return 0; | ||
1385 | } | ||
1386 | |||
1387 | void bond_alb_monitor(struct bonding *bond) | ||
1388 | { | ||
1389 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
1390 | struct slave *slave; | ||
1391 | int i; | ||
1392 | |||
1393 | read_lock(&bond->lock); | ||
1394 | |||
1395 | if (bond->kill_timers) { | ||
1396 | goto out; | ||
1397 | } | ||
1398 | |||
1399 | if (bond->slave_cnt == 0) { | ||
1400 | bond_info->tx_rebalance_counter = 0; | ||
1401 | bond_info->lp_counter = 0; | ||
1402 | goto re_arm; | ||
1403 | } | ||
1404 | |||
1405 | bond_info->tx_rebalance_counter++; | ||
1406 | bond_info->lp_counter++; | ||
1407 | |||
1408 | /* send learning packets */ | ||
1409 | if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) { | ||
1410 | /* change of curr_active_slave involves swapping of mac addresses. | ||
1411 | * in order to avoid this swapping from happening while | ||
1412 | * sending the learning packets, the curr_slave_lock must be held for | ||
1413 | * read. | ||
1414 | */ | ||
1415 | read_lock(&bond->curr_slave_lock); | ||
1416 | |||
1417 | bond_for_each_slave(bond, slave, i) { | ||
1418 | alb_send_learning_packets(slave,slave->dev->dev_addr); | ||
1419 | } | ||
1420 | |||
1421 | read_unlock(&bond->curr_slave_lock); | ||
1422 | |||
1423 | bond_info->lp_counter = 0; | ||
1424 | } | ||
1425 | |||
1426 | /* rebalance tx traffic */ | ||
1427 | if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { | ||
1428 | |||
1429 | read_lock(&bond->curr_slave_lock); | ||
1430 | |||
1431 | bond_for_each_slave(bond, slave, i) { | ||
1432 | tlb_clear_slave(bond, slave, 1); | ||
1433 | if (slave == bond->curr_active_slave) { | ||
1434 | SLAVE_TLB_INFO(slave).load = | ||
1435 | bond_info->unbalanced_load / | ||
1436 | BOND_TLB_REBALANCE_INTERVAL; | ||
1437 | bond_info->unbalanced_load = 0; | ||
1438 | } | ||
1439 | } | ||
1440 | |||
1441 | read_unlock(&bond->curr_slave_lock); | ||
1442 | |||
1443 | bond_info->tx_rebalance_counter = 0; | ||
1444 | } | ||
1445 | |||
1446 | /* handle rlb stuff */ | ||
1447 | if (bond_info->rlb_enabled) { | ||
1448 | /* the following code changes the promiscuity of the | ||
1449 | * the curr_active_slave. It needs to be locked with a | ||
1450 | * write lock to protect from other code that also | ||
1451 | * sets the promiscuity. | ||
1452 | */ | ||
1453 | write_lock(&bond->curr_slave_lock); | ||
1454 | |||
1455 | if (bond_info->primary_is_promisc && | ||
1456 | (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { | ||
1457 | |||
1458 | bond_info->rlb_promisc_timeout_counter = 0; | ||
1459 | |||
1460 | /* If the primary was set to promiscuous mode | ||
1461 | * because a slave was disabled then | ||
1462 | * it can now leave promiscuous mode. | ||
1463 | */ | ||
1464 | dev_set_promiscuity(bond->curr_active_slave->dev, -1); | ||
1465 | bond_info->primary_is_promisc = 0; | ||
1466 | } | ||
1467 | |||
1468 | write_unlock(&bond->curr_slave_lock); | ||
1469 | |||
1470 | if (bond_info->rlb_rebalance) { | ||
1471 | bond_info->rlb_rebalance = 0; | ||
1472 | rlb_rebalance(bond); | ||
1473 | } | ||
1474 | |||
1475 | /* check if clients need updating */ | ||
1476 | if (bond_info->rx_ntt) { | ||
1477 | if (bond_info->rlb_update_delay_counter) { | ||
1478 | --bond_info->rlb_update_delay_counter; | ||
1479 | } else { | ||
1480 | rlb_update_rx_clients(bond); | ||
1481 | if (bond_info->rlb_update_retry_counter) { | ||
1482 | --bond_info->rlb_update_retry_counter; | ||
1483 | } else { | ||
1484 | bond_info->rx_ntt = 0; | ||
1485 | } | ||
1486 | } | ||
1487 | } | ||
1488 | } | ||
1489 | |||
1490 | re_arm: | ||
1491 | mod_timer(&(bond_info->alb_timer), jiffies + alb_delta_in_ticks); | ||
1492 | out: | ||
1493 | read_unlock(&bond->lock); | ||
1494 | } | ||
1495 | |||
1496 | /* assumption: called before the slave is attached to the bond | ||
1497 | * and not locked by the bond lock | ||
1498 | */ | ||
1499 | int bond_alb_init_slave(struct bonding *bond, struct slave *slave) | ||
1500 | { | ||
1501 | int res; | ||
1502 | |||
1503 | res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, | ||
1504 | bond->alb_info.rlb_enabled); | ||
1505 | if (res) { | ||
1506 | return res; | ||
1507 | } | ||
1508 | |||
1509 | /* caller must hold the bond lock for write since the mac addresses | ||
1510 | * are compared and may be swapped. | ||
1511 | */ | ||
1512 | write_lock_bh(&bond->lock); | ||
1513 | |||
1514 | res = alb_handle_addr_collision_on_attach(bond, slave); | ||
1515 | |||
1516 | write_unlock_bh(&bond->lock); | ||
1517 | |||
1518 | if (res) { | ||
1519 | return res; | ||
1520 | } | ||
1521 | |||
1522 | tlb_init_slave(slave); | ||
1523 | |||
1524 | /* order a rebalance ASAP */ | ||
1525 | bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; | ||
1526 | |||
1527 | if (bond->alb_info.rlb_enabled) { | ||
1528 | bond->alb_info.rlb_rebalance = 1; | ||
1529 | } | ||
1530 | |||
1531 | return 0; | ||
1532 | } | ||
1533 | |||
1534 | /* Caller must hold bond lock for write */ | ||
1535 | void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) | ||
1536 | { | ||
1537 | if (bond->slave_cnt > 1) { | ||
1538 | alb_change_hw_addr_on_detach(bond, slave); | ||
1539 | } | ||
1540 | |||
1541 | tlb_clear_slave(bond, slave, 0); | ||
1542 | |||
1543 | if (bond->alb_info.rlb_enabled) { | ||
1544 | bond->alb_info.next_rx_slave = NULL; | ||
1545 | rlb_clear_slave(bond, slave); | ||
1546 | } | ||
1547 | } | ||
1548 | |||
1549 | /* Caller must hold bond lock for read */ | ||
1550 | void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) | ||
1551 | { | ||
1552 | struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); | ||
1553 | |||
1554 | if (link == BOND_LINK_DOWN) { | ||
1555 | tlb_clear_slave(bond, slave, 0); | ||
1556 | if (bond->alb_info.rlb_enabled) { | ||
1557 | rlb_clear_slave(bond, slave); | ||
1558 | } | ||
1559 | } else if (link == BOND_LINK_UP) { | ||
1560 | /* order a rebalance ASAP */ | ||
1561 | bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; | ||
1562 | if (bond->alb_info.rlb_enabled) { | ||
1563 | bond->alb_info.rlb_rebalance = 1; | ||
1564 | /* If the updelay module parameter is smaller than the | ||
1565 | * forwarding delay of the switch the rebalance will | ||
1566 | * not work because the rebalance arp replies will | ||
1567 | * not be forwarded to the clients.. | ||
1568 | */ | ||
1569 | } | ||
1570 | } | ||
1571 | } | ||
1572 | |||
1573 | /** | ||
1574 | * bond_alb_handle_active_change - assign new curr_active_slave | ||
1575 | * @bond: our bonding struct | ||
1576 | * @new_slave: new slave to assign | ||
1577 | * | ||
1578 | * Set the bond->curr_active_slave to @new_slave and handle | ||
1579 | * mac address swapping and promiscuity changes as needed. | ||
1580 | * | ||
1581 | * Caller must hold bond curr_slave_lock for write (or bond lock for write) | ||
1582 | */ | ||
1583 | void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) | ||
1584 | { | ||
1585 | struct slave *swap_slave; | ||
1586 | int i; | ||
1587 | |||
1588 | if (bond->curr_active_slave == new_slave) { | ||
1589 | return; | ||
1590 | } | ||
1591 | |||
1592 | if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) { | ||
1593 | dev_set_promiscuity(bond->curr_active_slave->dev, -1); | ||
1594 | bond->alb_info.primary_is_promisc = 0; | ||
1595 | bond->alb_info.rlb_promisc_timeout_counter = 0; | ||
1596 | } | ||
1597 | |||
1598 | swap_slave = bond->curr_active_slave; | ||
1599 | bond->curr_active_slave = new_slave; | ||
1600 | |||
1601 | if (!new_slave || (bond->slave_cnt == 0)) { | ||
1602 | return; | ||
1603 | } | ||
1604 | |||
1605 | /* set the new curr_active_slave to the bonds mac address | ||
1606 | * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave | ||
1607 | */ | ||
1608 | if (!swap_slave) { | ||
1609 | struct slave *tmp_slave; | ||
1610 | /* find slave that is holding the bond's mac address */ | ||
1611 | bond_for_each_slave(bond, tmp_slave, i) { | ||
1612 | if (!memcmp(tmp_slave->dev->dev_addr, | ||
1613 | bond->dev->dev_addr, ETH_ALEN)) { | ||
1614 | swap_slave = tmp_slave; | ||
1615 | break; | ||
1616 | } | ||
1617 | } | ||
1618 | } | ||
1619 | |||
1620 | /* curr_active_slave must be set before calling alb_swap_mac_addr */ | ||
1621 | if (swap_slave) { | ||
1622 | /* swap mac address */ | ||
1623 | alb_swap_mac_addr(bond, swap_slave, new_slave); | ||
1624 | } else { | ||
1625 | /* set the new_slave to the bond mac address */ | ||
1626 | alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, | ||
1627 | bond->alb_info.rlb_enabled); | ||
1628 | /* fasten bond mac on new current slave */ | ||
1629 | alb_send_learning_packets(new_slave, bond->dev->dev_addr); | ||
1630 | } | ||
1631 | } | ||
1632 | |||
1633 | int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) | ||
1634 | { | ||
1635 | struct bonding *bond = bond_dev->priv; | ||
1636 | struct sockaddr *sa = addr; | ||
1637 | struct slave *slave, *swap_slave; | ||
1638 | int res; | ||
1639 | int i; | ||
1640 | |||
1641 | if (!is_valid_ether_addr(sa->sa_data)) { | ||
1642 | return -EADDRNOTAVAIL; | ||
1643 | } | ||
1644 | |||
1645 | res = alb_set_mac_address(bond, addr); | ||
1646 | if (res) { | ||
1647 | return res; | ||
1648 | } | ||
1649 | |||
1650 | memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); | ||
1651 | |||
1652 | /* If there is no curr_active_slave there is nothing else to do. | ||
1653 | * Otherwise we'll need to pass the new address to it and handle | ||
1654 | * duplications. | ||
1655 | */ | ||
1656 | if (!bond->curr_active_slave) { | ||
1657 | return 0; | ||
1658 | } | ||
1659 | |||
1660 | swap_slave = NULL; | ||
1661 | |||
1662 | bond_for_each_slave(bond, slave, i) { | ||
1663 | if (!memcmp(slave->dev->dev_addr, bond_dev->dev_addr, ETH_ALEN)) { | ||
1664 | swap_slave = slave; | ||
1665 | break; | ||
1666 | } | ||
1667 | } | ||
1668 | |||
1669 | if (swap_slave) { | ||
1670 | alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave); | ||
1671 | } else { | ||
1672 | alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr, | ||
1673 | bond->alb_info.rlb_enabled); | ||
1674 | |||
1675 | alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); | ||
1676 | if (bond->alb_info.rlb_enabled) { | ||
1677 | /* inform clients mac address has changed */ | ||
1678 | rlb_req_update_slave_clients(bond, bond->curr_active_slave); | ||
1679 | } | ||
1680 | } | ||
1681 | |||
1682 | return 0; | ||
1683 | } | ||
1684 | |||
1685 | void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) | ||
1686 | { | ||
1687 | if (bond->alb_info.current_alb_vlan && | ||
1688 | (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) { | ||
1689 | bond->alb_info.current_alb_vlan = NULL; | ||
1690 | } | ||
1691 | |||
1692 | if (bond->alb_info.rlb_enabled) { | ||
1693 | rlb_clear_vlan(bond, vlan_id); | ||
1694 | } | ||
1695 | } | ||
1696 | |||
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h new file mode 100644 index 000000000000..e4091cd8d654 --- /dev/null +++ b/drivers/net/bonding/bond_alb.h | |||
@@ -0,0 +1,141 @@ | |||
1 | /* | ||
2 | * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the | ||
6 | * Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | ||
11 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
12 | * for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along | ||
15 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called LICENSE. | ||
20 | * | ||
21 | * | ||
22 | * Changes: | ||
23 | * | ||
24 | * 2003/08/06 - Amir Noam <amir.noam at intel dot com> | ||
25 | * - Add support for setting bond's MAC address with special | ||
26 | * handling required for ALB/TLB. | ||
27 | * | ||
28 | * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
29 | * - Code cleanup and style changes | ||
30 | */ | ||
31 | |||
32 | #ifndef __BOND_ALB_H__ | ||
33 | #define __BOND_ALB_H__ | ||
34 | |||
35 | #include <linux/if_ether.h> | ||
36 | |||
37 | struct bonding; | ||
38 | struct slave; | ||
39 | |||
40 | #define BOND_ALB_INFO(bond) ((bond)->alb_info) | ||
41 | #define SLAVE_TLB_INFO(slave) ((slave)->tlb_info) | ||
42 | |||
43 | struct tlb_client_info { | ||
44 | struct slave *tx_slave; /* A pointer to slave used for transmiting | ||
45 | * packets to a Client that the Hash function | ||
46 | * gave this entry index. | ||
47 | */ | ||
48 | u32 tx_bytes; /* Each Client acumulates the BytesTx that | ||
49 | * were tranmitted to it, and after each | ||
50 | * CallBack the LoadHistory is devided | ||
51 | * by the balance interval | ||
52 | */ | ||
53 | u32 load_history; /* This field contains the amount of Bytes | ||
54 | * that were transmitted to this client by | ||
55 | * the server on the previous balance | ||
56 | * interval in Bps. | ||
57 | */ | ||
58 | u32 next; /* The next Hash table entry index, assigned | ||
59 | * to use the same adapter for transmit. | ||
60 | */ | ||
61 | u32 prev; /* The previous Hash table entry index, | ||
62 | * assigned to use the same | ||
63 | */ | ||
64 | }; | ||
65 | |||
66 | /* ------------------------------------------------------------------------- | ||
67 | * struct rlb_client_info contains all info related to a specific rx client | ||
68 | * connection. This is the Clients Hash Table entry struct | ||
69 | * ------------------------------------------------------------------------- | ||
70 | */ | ||
71 | struct rlb_client_info { | ||
72 | u32 ip_src; /* the server IP address */ | ||
73 | u32 ip_dst; /* the client IP address */ | ||
74 | u8 mac_dst[ETH_ALEN]; /* the client MAC address */ | ||
75 | u32 next; /* The next Hash table entry index */ | ||
76 | u32 prev; /* The previous Hash table entry index */ | ||
77 | u8 assigned; /* checking whether this entry is assigned */ | ||
78 | u8 ntt; /* flag - need to transmit client info */ | ||
79 | struct slave *slave; /* the slave assigned to this client */ | ||
80 | u8 tag; /* flag - need to tag skb */ | ||
81 | unsigned short vlan_id; /* VLAN tag associated with IP address */ | ||
82 | }; | ||
83 | |||
84 | struct tlb_slave_info { | ||
85 | u32 head; /* Index to the head of the bi-directional clients | ||
86 | * hash table entries list. The entries in the list | ||
87 | * are the entries that were assigned to use this | ||
88 | * slave for transmit. | ||
89 | */ | ||
90 | u32 load; /* Each slave sums the loadHistory of all clients | ||
91 | * assigned to it | ||
92 | */ | ||
93 | }; | ||
94 | |||
95 | struct alb_bond_info { | ||
96 | struct timer_list alb_timer; | ||
97 | struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ | ||
98 | spinlock_t tx_hashtbl_lock; | ||
99 | u32 unbalanced_load; | ||
100 | int tx_rebalance_counter; | ||
101 | int lp_counter; | ||
102 | /* -------- rlb parameters -------- */ | ||
103 | int rlb_enabled; | ||
104 | struct packet_type rlb_pkt_type; | ||
105 | struct rlb_client_info *rx_hashtbl; /* Receive hash table */ | ||
106 | spinlock_t rx_hashtbl_lock; | ||
107 | u32 rx_hashtbl_head; | ||
108 | u8 rx_ntt; /* flag - need to transmit | ||
109 | * to all rx clients | ||
110 | */ | ||
111 | struct slave *next_rx_slave;/* next slave to be assigned | ||
112 | * to a new rx client for | ||
113 | */ | ||
114 | u32 rlb_interval_counter; | ||
115 | u8 primary_is_promisc; /* boolean */ | ||
116 | u32 rlb_promisc_timeout_counter;/* counts primary | ||
117 | * promiscuity time | ||
118 | */ | ||
119 | u32 rlb_update_delay_counter; | ||
120 | u32 rlb_update_retry_counter;/* counter of retries | ||
121 | * of client update | ||
122 | */ | ||
123 | u8 rlb_rebalance; /* flag - indicates that the | ||
124 | * rx traffic should be | ||
125 | * rebalanced | ||
126 | */ | ||
127 | struct vlan_entry *current_alb_vlan; | ||
128 | }; | ||
129 | |||
130 | int bond_alb_initialize(struct bonding *bond, int rlb_enabled); | ||
131 | void bond_alb_deinitialize(struct bonding *bond); | ||
132 | int bond_alb_init_slave(struct bonding *bond, struct slave *slave); | ||
133 | void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave); | ||
134 | void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link); | ||
135 | void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); | ||
136 | int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); | ||
137 | void bond_alb_monitor(struct bonding *bond); | ||
138 | int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); | ||
139 | void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); | ||
140 | #endif /* __BOND_ALB_H__ */ | ||
141 | |||
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c new file mode 100644 index 000000000000..770e28f98fd5 --- /dev/null +++ b/drivers/net/bonding/bond_main.c | |||
@@ -0,0 +1,4708 @@ | |||
1 | /* | ||
2 | * originally based on the dummy device. | ||
3 | * | ||
4 | * Copyright 1999, Thomas Davis, tadavis@lbl.gov. | ||
5 | * Licensed under the GPL. Based on dummy.c, and eql.c devices. | ||
6 | * | ||
7 | * bonding.c: an Ethernet Bonding driver | ||
8 | * | ||
9 | * This is useful to talk to a Cisco EtherChannel compatible equipment: | ||
10 | * Cisco 5500 | ||
11 | * Sun Trunking (Solaris) | ||
12 | * Alteon AceDirector Trunks | ||
13 | * Linux Bonding | ||
14 | * and probably many L2 switches ... | ||
15 | * | ||
16 | * How it works: | ||
17 | * ifconfig bond0 ipaddress netmask up | ||
18 | * will setup a network device, with an ip address. No mac address | ||
19 | * will be assigned at this time. The hw mac address will come from | ||
20 | * the first slave bonded to the channel. All slaves will then use | ||
21 | * this hw mac address. | ||
22 | * | ||
23 | * ifconfig bond0 down | ||
24 | * will release all slaves, marking them as down. | ||
25 | * | ||
26 | * ifenslave bond0 eth0 | ||
27 | * will attach eth0 to bond0 as a slave. eth0 hw mac address will either | ||
28 | * a: be used as initial mac address | ||
29 | * b: if a hw mac address already is there, eth0's hw mac address | ||
30 | * will then be set from bond0. | ||
31 | * | ||
32 | * v0.1 - first working version. | ||
33 | * v0.2 - changed stats to be calculated by summing slaves stats. | ||
34 | * | ||
35 | * Changes: | ||
36 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
37 | * - fix leaks on failure at bond_init | ||
38 | * | ||
39 | * 2000/09/30 - Willy Tarreau <willy at meta-x.org> | ||
40 | * - added trivial code to release a slave device. | ||
41 | * - fixed security bug (CAP_NET_ADMIN not checked) | ||
42 | * - implemented MII link monitoring to disable dead links : | ||
43 | * All MII capable slaves are checked every <miimon> milliseconds | ||
44 | * (100 ms seems good). This value can be changed by passing it to | ||
45 | * insmod. A value of zero disables the monitoring (default). | ||
46 | * - fixed an infinite loop in bond_xmit_roundrobin() when there's no | ||
47 | * good slave. | ||
48 | * - made the code hopefully SMP safe | ||
49 | * | ||
50 | * 2000/10/03 - Willy Tarreau <willy at meta-x.org> | ||
51 | * - optimized slave lists based on relevant suggestions from Thomas Davis | ||
52 | * - implemented active-backup method to obtain HA with two switches: | ||
53 | * stay as long as possible on the same active interface, while we | ||
54 | * also monitor the backup one (MII link status) because we want to know | ||
55 | * if we are able to switch at any time. ( pass "mode=1" to insmod ) | ||
56 | * - lots of stress testings because we need it to be more robust than the | ||
57 | * wires ! :-> | ||
58 | * | ||
59 | * 2000/10/09 - Willy Tarreau <willy at meta-x.org> | ||
60 | * - added up and down delays after link state change. | ||
61 | * - optimized the slaves chaining so that when we run forward, we never | ||
62 | * repass through the bond itself, but we can find it by searching | ||
63 | * backwards. Renders the deletion more difficult, but accelerates the | ||
64 | * scan. | ||
65 | * - smarter enslaving and releasing. | ||
66 | * - finer and more robust SMP locking | ||
67 | * | ||
68 | * 2000/10/17 - Willy Tarreau <willy at meta-x.org> | ||
69 | * - fixed two potential SMP race conditions | ||
70 | * | ||
71 | * 2000/10/18 - Willy Tarreau <willy at meta-x.org> | ||
72 | * - small fixes to the monitoring FSM in case of zero delays | ||
73 | * 2000/11/01 - Willy Tarreau <willy at meta-x.org> | ||
74 | * - fixed first slave not automatically used in trunk mode. | ||
75 | * 2000/11/10 : spelling of "EtherChannel" corrected. | ||
76 | * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). | ||
77 | * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). | ||
78 | * | ||
79 | * 2001/1/3 - Chad N. Tindel <ctindel at ieee dot org> | ||
80 | * - The bonding driver now simulates MII status monitoring, just like | ||
81 | * a normal network device. It will show that the link is down iff | ||
82 | * every slave in the bond shows that their links are down. If at least | ||
83 | * one slave is up, the bond's MII status will appear as up. | ||
84 | * | ||
85 | * 2001/2/7 - Chad N. Tindel <ctindel at ieee dot org> | ||
86 | * - Applications can now query the bond from user space to get | ||
87 | * information which may be useful. They do this by calling | ||
88 | * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves | ||
89 | * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to | ||
90 | * get slave specific information (# link failures, etc). See | ||
91 | * <linux/if_bonding.h> for more details. The structs of interest | ||
92 | * are ifbond and ifslave. | ||
93 | * | ||
94 | * 2001/4/5 - Chad N. Tindel <ctindel at ieee dot org> | ||
95 | * - Ported to 2.4 Kernel | ||
96 | * | ||
97 | * 2001/5/2 - Jeffrey E. Mast <jeff at mastfamily dot com> | ||
98 | * - When a device is detached from a bond, the slave device is no longer | ||
99 | * left thinking that is has a master. | ||
100 | * | ||
101 | * 2001/5/16 - Jeffrey E. Mast <jeff at mastfamily dot com> | ||
102 | * - memset did not appropriately initialized the bond rw_locks. Used | ||
103 | * rwlock_init to initialize to unlocked state to prevent deadlock when | ||
104 | * first attempting a lock | ||
105 | * - Called SET_MODULE_OWNER for bond device | ||
106 | * | ||
107 | * 2001/5/17 - Tim Anderson <tsa at mvista.com> | ||
108 | * - 2 paths for releasing for slave release; 1 through ioctl | ||
109 | * and 2) through close. Both paths need to release the same way. | ||
110 | * - the free slave in bond release is changing slave status before | ||
111 | * the free. The netdev_set_master() is intended to change slave state | ||
112 | * so it should not be done as part of the release process. | ||
113 | * - Simple rule for slave state at release: only the active in A/B and | ||
114 | * only one in the trunked case. | ||
115 | * | ||
116 | * 2001/6/01 - Tim Anderson <tsa at mvista.com> | ||
117 | * - Now call dev_close when releasing a slave so it doesn't screw up | ||
118 | * out routing table. | ||
119 | * | ||
120 | * 2001/6/01 - Chad N. Tindel <ctindel at ieee dot org> | ||
121 | * - Added /proc support for getting bond and slave information. | ||
122 | * Information is in /proc/net/<bond device>/info. | ||
123 | * - Changed the locking when calling bond_close to prevent deadlock. | ||
124 | * | ||
125 | * 2001/8/05 - Janice Girouard <girouard at us.ibm.com> | ||
126 | * - correct problem where refcnt of slave is not incremented in bond_ioctl | ||
127 | * so the system hangs when halting. | ||
128 | * - correct locking problem when unable to malloc in bond_enslave. | ||
129 | * - adding bond_xmit_xor logic. | ||
130 | * - adding multiple bond device support. | ||
131 | * | ||
132 | * 2001/8/13 - Erik Habbinga <erik_habbinga at hp dot com> | ||
133 | * - correct locking problem with rtnl_exlock_nowait | ||
134 | * | ||
135 | * 2001/8/23 - Janice Girouard <girouard at us.ibm.com> | ||
136 | * - bzero initial dev_bonds, to correct oops | ||
137 | * - convert SIOCDEVPRIVATE to new MII ioctl calls | ||
138 | * | ||
139 | * 2001/9/13 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> | ||
140 | * - Add the BOND_CHANGE_ACTIVE ioctl implementation | ||
141 | * | ||
142 | * 2001/9/14 - Mark Huth <mhuth at mvista dot com> | ||
143 | * - Change MII_LINK_READY to not check for end of auto-negotiation, | ||
144 | * but only for an up link. | ||
145 | * | ||
146 | * 2001/9/20 - Chad N. Tindel <ctindel at ieee dot org> | ||
147 | * - Add the device field to bonding_t. Previously the net_device | ||
148 | * corresponding to a bond wasn't available from the bonding_t | ||
149 | * structure. | ||
150 | * | ||
151 | * 2001/9/25 - Janice Girouard <girouard at us.ibm.com> | ||
152 | * - add arp_monitor for active backup mode | ||
153 | * | ||
154 | * 2001/10/23 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> | ||
155 | * - Various memory leak fixes | ||
156 | * | ||
157 | * 2001/11/5 - Mark Huth <mark dot huth at mvista dot com> | ||
158 | * - Don't take rtnl lock in bond_mii_monitor as it deadlocks under | ||
159 | * certain hotswap conditions. | ||
160 | * Note: this same change may be required in bond_arp_monitor ??? | ||
161 | * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr | ||
162 | * - Handle hot swap ethernet interface deregistration events to remove | ||
163 | * kernel oops following hot swap of enslaved interface | ||
164 | * | ||
165 | * 2002/1/2 - Chad N. Tindel <ctindel at ieee dot org> | ||
166 | * - Restore original slave flags at release time. | ||
167 | * | ||
168 | * 2002/02/18 - Erik Habbinga <erik_habbinga at hp dot com> | ||
169 | * - bond_release(): calling kfree on our_slave after call to | ||
170 | * bond_restore_slave_flags, not before | ||
171 | * - bond_enslave(): saving slave flags into original_flags before | ||
172 | * call to netdev_set_master, so the IFF_SLAVE flag doesn't end | ||
173 | * up in original_flags | ||
174 | * | ||
175 | * 2002/04/05 - Mark Smith <mark.smith at comdev dot cc> and | ||
176 | * Steve Mead <steve.mead at comdev dot cc> | ||
177 | * - Port Gleb Natapov's multicast support patchs from 2.4.12 | ||
178 | * to 2.4.18 adding support for multicast. | ||
179 | * | ||
180 | * 2002/06/10 - Tony Cureington <tony.cureington * hp_com> | ||
181 | * - corrected uninitialized pointer (ifr.ifr_data) in bond_check_dev_link; | ||
182 | * actually changed function to use MIIPHY, then MIIREG, and finally | ||
183 | * ETHTOOL to determine the link status | ||
184 | * - fixed bad ifr_data pointer assignments in bond_ioctl | ||
185 | * - corrected mode 1 being reported as active-backup in bond_get_info; | ||
186 | * also added text to distinguish type of load balancing (rr or xor) | ||
187 | * - change arp_ip_target module param from "1-12s" (array of 12 ptrs) | ||
188 | * to "s" (a single ptr) | ||
189 | * | ||
190 | * 2002/08/30 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
191 | * - Removed acquisition of xmit_lock in set_multicast_list; caused | ||
192 | * deadlock on SMP (lock is held by caller). | ||
193 | * - Revamped SIOCGMIIPHY, SIOCGMIIREG portion of bond_check_dev_link(). | ||
194 | * | ||
195 | * 2002/09/18 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
196 | * - Fixed up bond_check_dev_link() (and callers): removed some magic | ||
197 | * numbers, banished local MII_ defines, wrapped ioctl calls to | ||
198 | * prevent EFAULT errors | ||
199 | * | ||
200 | * 2002/9/30 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
201 | * - make sure the ip target matches the arp_target before saving the | ||
202 | * hw address. | ||
203 | * | ||
204 | * 2002/9/30 - Dan Eisner <eisner at 2robots dot com> | ||
205 | * - make sure my_ip is set before taking down the link, since | ||
206 | * not all switches respond if the source ip is not set. | ||
207 | * | ||
208 | * 2002/10/8 - Janice Girouard <girouard at us dot ibm dot com> | ||
209 | * - read in the local ip address when enslaving a device | ||
210 | * - add primary support | ||
211 | * - make sure 2*arp_interval has passed when a new device | ||
212 | * is brought on-line before taking it down. | ||
213 | * | ||
214 | * 2002/09/11 - Philippe De Muyter <phdm at macqel dot be> | ||
215 | * - Added bond_xmit_broadcast logic. | ||
216 | * - Added bond_mode() support function. | ||
217 | * | ||
218 | * 2002/10/26 - Laurent Deniel <laurent.deniel at free.fr> | ||
219 | * - allow to register multicast addresses only on active slave | ||
220 | * (useful in active-backup mode) | ||
221 | * - add multicast module parameter | ||
222 | * - fix deletion of multicast groups after unloading module | ||
223 | * | ||
224 | * 2002/11/06 - Kameshwara Rayaprolu <kameshwara.rao * wipro_com> | ||
225 | * - Changes to prevent panic from closing the device twice; if we close | ||
226 | * the device in bond_release, we must set the original_flags to down | ||
227 | * so it won't be closed again by the network layer. | ||
228 | * | ||
229 | * 2002/11/07 - Tony Cureington <tony.cureington * hp_com> | ||
230 | * - Fix arp_target_hw_addr memory leak | ||
231 | * - Created activebackup_arp_monitor function to handle arp monitoring | ||
232 | * in active backup mode - the bond_arp_monitor had several problems... | ||
233 | * such as allowing slaves to tx arps sequentially without any delay | ||
234 | * for a response | ||
235 | * - Renamed bond_arp_monitor to loadbalance_arp_monitor and re-wrote | ||
236 | * this function to just handle arp monitoring in load-balancing mode; | ||
237 | * it is a lot more compact now | ||
238 | * - Changes to ensure one and only one slave transmits in active-backup | ||
239 | * mode | ||
240 | * - Robustesize parameters; warn users about bad combinations of | ||
241 | * parameters; also if miimon is specified and a network driver does | ||
242 | * not support MII or ETHTOOL, inform the user of this | ||
243 | * - Changes to support link_failure_count when in arp monitoring mode | ||
244 | * - Fix up/down delay reported in /proc | ||
245 | * - Added version; log version; make version available from "modinfo -d" | ||
246 | * - Fixed problem in bond_check_dev_link - if the first IOCTL (SIOCGMIIPH) | ||
247 | * failed, the ETHTOOL ioctl never got a chance | ||
248 | * | ||
249 | * 2002/11/16 - Laurent Deniel <laurent.deniel at free.fr> | ||
250 | * - fix multicast handling in activebackup_arp_monitor | ||
251 | * - remove one unnecessary and confusing curr_active_slave == slave test | ||
252 | * in activebackup_arp_monitor | ||
253 | * | ||
254 | * 2002/11/17 - Laurent Deniel <laurent.deniel at free.fr> | ||
255 | * - fix bond_slave_info_query when slave_id = num_slaves | ||
256 | * | ||
257 | * 2002/11/19 - Janice Girouard <girouard at us dot ibm dot com> | ||
258 | * - correct ifr_data reference. Update ifr_data reference | ||
259 | * to mii_ioctl_data struct values to avoid confusion. | ||
260 | * | ||
261 | * 2002/11/22 - Bert Barbe <bert.barbe at oracle dot com> | ||
262 | * - Add support for multiple arp_ip_target | ||
263 | * | ||
264 | * 2002/12/13 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
265 | * - Changed to allow text strings for mode and multicast, e.g., | ||
266 | * insmod bonding mode=active-backup. The numbers still work. | ||
267 | * One change: an invalid choice will cause module load failure, | ||
268 | * rather than the previous behavior of just picking one. | ||
269 | * - Minor cleanups; got rid of dup ctype stuff, atoi function | ||
270 | * | ||
271 | * 2003/02/07 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
272 | * - Added use_carrier module parameter that causes miimon to | ||
273 | * use netif_carrier_ok() test instead of MII/ETHTOOL ioctls. | ||
274 | * - Minor cleanups; consolidated ioctl calls to one function. | ||
275 | * | ||
276 | * 2003/02/07 - Tony Cureington <tony.cureington * hp_com> | ||
277 | * - Fix bond_mii_monitor() logic error that could result in | ||
278 | * bonding round-robin mode ignoring links after failover/recovery | ||
279 | * | ||
280 | * 2003/03/17 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
281 | * - kmalloc fix (GFP_KERNEL to GFP_ATOMIC) reported by | ||
282 | * Shmulik dot Hen at intel.com. | ||
283 | * - Based on discussion on mailing list, changed use of | ||
284 | * update_slave_cnt(), created wrapper functions for adding/removing | ||
285 | * slaves, changed bond_xmit_xor() to check slave_cnt instead of | ||
286 | * checking slave and slave->dev (which only worked by accident). | ||
287 | * - Misc code cleanup: get arp_send() prototype from header file, | ||
288 | * add max_bonds to bonding.txt. | ||
289 | * | ||
290 | * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
291 | * Shmulik Hen <shmulik.hen at intel dot com> | ||
292 | * - Make sure only bond_attach_slave() and bond_detach_slave() can | ||
293 | * manipulate the slave list, including slave_cnt, even when in | ||
294 | * bond_release_all(). | ||
295 | * - Fixed hang in bond_release() with traffic running: | ||
296 | * netdev_set_master() must not be called from within the bond lock. | ||
297 | * | ||
298 | * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
299 | * Shmulik Hen <shmulik.hen at intel dot com> | ||
300 | * - Fixed hang in bond_enslave() with traffic running: | ||
301 | * netdev_set_master() must not be called from within the bond lock. | ||
302 | * | ||
303 | * 2003/03/18 - Amir Noam <amir.noam at intel dot com> | ||
304 | * - Added support for getting slave's speed and duplex via ethtool. | ||
305 | * Needed for 802.3ad and other future modes. | ||
306 | * | ||
307 | * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
308 | * Shmulik Hen <shmulik.hen at intel dot com> | ||
309 | * - Enable support of modes that need to use the unique mac address of | ||
310 | * each slave. | ||
311 | * * bond_enslave(): Moved setting the slave's mac address, and | ||
312 | * openning it, from the application to the driver. This breaks | ||
313 | * backward comaptibility with old versions of ifenslave that open | ||
314 | * the slave before enalsving it !!!. | ||
315 | * * bond_release(): The driver also takes care of closing the slave | ||
316 | * and restoring its original mac address. | ||
317 | * - Removed the code that restores all base driver's flags. | ||
318 | * Flags are automatically restored once all undo stages are done | ||
319 | * properly. | ||
320 | * - Block possibility of enslaving before the master is up. This | ||
321 | * prevents putting the system in an unstable state. | ||
322 | * | ||
323 | * 2003/03/18 - Amir Noam <amir.noam at intel dot com>, | ||
324 | * Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
325 | * Shmulik Hen <shmulik.hen at intel dot com> | ||
326 | * - Added support for IEEE 802.3ad Dynamic link aggregation mode. | ||
327 | * | ||
328 | * 2003/05/01 - Amir Noam <amir.noam at intel dot com> | ||
329 | * - Added ABI version control to restore compatibility between | ||
330 | * new/old ifenslave and new/old bonding. | ||
331 | * | ||
332 | * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
333 | * - Fixed bug in bond_release_all(): save old value of curr_active_slave | ||
334 | * before setting it to NULL. | ||
335 | * - Changed driver versioning scheme to include version number instead | ||
336 | * of release date (that is already in another field). There are 3 | ||
337 | * fields X.Y.Z where: | ||
338 | * X - Major version - big behavior changes | ||
339 | * Y - Minor version - addition of features | ||
340 | * Z - Extra version - minor changes and bug fixes | ||
341 | * The current version is 1.0.0 as a base line. | ||
342 | * | ||
343 | * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
344 | * Amir Noam <amir.noam at intel dot com> | ||
345 | * - Added support for lacp_rate module param. | ||
346 | * - Code beautification and style changes (mainly in comments). | ||
347 | * new version - 1.0.1 | ||
348 | * | ||
349 | * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
350 | * - Based on discussion on mailing list, changed locking scheme | ||
351 | * to use lock/unlock or lock_bh/unlock_bh appropriately instead | ||
352 | * of lock_irqsave/unlock_irqrestore. The new scheme helps exposing | ||
353 | * hidden bugs and solves system hangs that occurred due to the fact | ||
354 | * that holding lock_irqsave doesn't prevent softirqs from running. | ||
355 | * This also increases total throughput since interrupts are not | ||
356 | * blocked on each transmitted packets or monitor timeout. | ||
357 | * new version - 2.0.0 | ||
358 | * | ||
359 | * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
360 | * - Added support for Transmit load balancing mode. | ||
361 | * - Concentrate all assignments of curr_active_slave to a single point | ||
362 | * so specific modes can take actions when the primary adapter is | ||
363 | * changed. | ||
364 | * - Take the updelay parameter into consideration during bond_enslave | ||
365 | * since some adapters loose their link during setting the device. | ||
366 | * - Renamed bond_3ad_link_status_changed() to | ||
367 | * bond_3ad_handle_link_change() for compatibility with TLB. | ||
368 | * new version - 2.1.0 | ||
369 | * | ||
370 | * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> | ||
371 | * - Added support for Adaptive load balancing mode which is | ||
372 | * equivalent to Transmit load balancing + Receive load balancing. | ||
373 | * new version - 2.2.0 | ||
374 | * | ||
375 | * 2003/05/15 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
376 | * - Applied fix to activebackup_arp_monitor posted to bonding-devel | ||
377 | * by Tony Cureington <tony.cureington * hp_com>. Fixes ARP | ||
378 | * monitor endless failover bug. Version to 2.2.10 | ||
379 | * | ||
380 | * 2003/05/20 - Amir Noam <amir.noam at intel dot com> | ||
381 | * - Fixed bug in ABI version control - Don't commit to a specific | ||
382 | * ABI version if receiving unsupported ioctl commands. | ||
383 | * | ||
384 | * 2003/05/22 - Jay Vosburgh <fubar at us dot ibm dot com> | ||
385 | * - Fix ifenslave -c causing bond to loose existing routes; | ||
386 | * added bond_set_mac_address() that doesn't require the | ||
387 | * bond to be down. | ||
388 | * - In conjunction with fix for ifenslave -c, in | ||
389 | * bond_change_active(), changing to the already active slave | ||
390 | * is no longer an error (it successfully does nothing). | ||
391 | * | ||
392 | * 2003/06/30 - Amir Noam <amir.noam at intel dot com> | ||
393 | * - Fixed bond_change_active() for ALB/TLB modes. | ||
394 | * Version to 2.2.14. | ||
395 | * | ||
396 | * 2003/07/29 - Amir Noam <amir.noam at intel dot com> | ||
397 | * - Fixed ARP monitoring bug. | ||
398 | * Version to 2.2.15. | ||
399 | * | ||
400 | * 2003/07/31 - Willy Tarreau <willy at ods dot org> | ||
401 | * - Fixed kernel panic when using ARP monitoring without | ||
402 | * setting bond's IP address. | ||
403 | * Version to 2.2.16. | ||
404 | * | ||
405 | * 2003/08/06 - Amir Noam <amir.noam at intel dot com> | ||
406 | * - Back port from 2.6: use alloc_netdev(); fix /proc handling; | ||
407 | * made stats a part of bond struct so no need to allocate | ||
408 | * and free it separately; use standard list operations instead | ||
409 | * of pre-allocated array of bonds. | ||
410 | * Version to 2.3.0. | ||
411 | * | ||
412 | * 2003/08/07 - Jay Vosburgh <fubar at us dot ibm dot com>, | ||
413 | * Amir Noam <amir.noam at intel dot com> and | ||
414 | * Shmulik Hen <shmulik.hen at intel dot com> | ||
415 | * - Propagating master's settings: Distinguish between modes that | ||
416 | * use a primary slave from those that don't, and propagate settings | ||
417 | * accordingly; Consolidate change_active opeartions and add | ||
418 | * reselect_active and find_best opeartions; Decouple promiscuous | ||
419 | * handling from the multicast mode setting; Add support for changing | ||
420 | * HW address and MTU with proper unwind; Consolidate procfs code, | ||
421 | * add CHANGENAME handler; Enhance netdev notification handling. | ||
422 | * Version to 2.4.0. | ||
423 | * | ||
424 | * 2003/09/15 - Stephen Hemminger <shemminger at osdl dot org>, | ||
425 | * Amir Noam <amir.noam at intel dot com> | ||
426 | * - Convert /proc to seq_file interface. | ||
427 | * Change /proc/net/bondX/info to /proc/net/bonding/bondX. | ||
428 | * Set version to 2.4.1. | ||
429 | * | ||
430 | * 2003/11/20 - Amir Noam <amir.noam at intel dot com> | ||
431 | * - Fix /proc creation/destruction. | ||
432 | * | ||
433 | * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
434 | * - Massive cleanup - Set version to 2.5.0 | ||
435 | * Code changes: | ||
436 | * o Consolidate format of prints and debug prints. | ||
437 | * o Remove bonding_t/slave_t typedefs and consolidate all casts. | ||
438 | * o Remove dead code and unnecessary checks. | ||
439 | * o Consolidate starting/stopping timers. | ||
440 | * o Consolidate handling of primary module param throughout the code. | ||
441 | * o Removed multicast module param support - all settings are done | ||
442 | * according to mode. | ||
443 | * o Slave list iteration - bond is no longer part of the list, | ||
444 | * added cyclic list iteration macros. | ||
445 | * o Consolidate error handling in all xmit functions. | ||
446 | * Style changes: | ||
447 | * o Consolidate function naming and declarations. | ||
448 | * o Consolidate function params and local variables names. | ||
449 | * o Consolidate return values. | ||
450 | * o Consolidate curly braces. | ||
451 | * o Consolidate conditionals format. | ||
452 | * o Change struct member names and types. | ||
453 | * o Chomp trailing spaces, remove empty lines, fix indentations. | ||
454 | * o Re-organize code according to context. | ||
455 | * | ||
456 | * 2003/12/30 - Amir Noam <amir.noam at intel dot com> | ||
457 | * - Fixed: Cannot remove and re-enslave the original active slave. | ||
458 | * - Fixed: Releasing the original active slave causes mac address | ||
459 | * duplication. | ||
460 | * - Add support for slaves that use ethtool_ops. | ||
461 | * Set version to 2.5.3. | ||
462 | * | ||
463 | * 2004/01/05 - Amir Noam <amir.noam at intel dot com> | ||
464 | * - Save bonding parameters per bond instead of using the global values. | ||
465 | * Set version to 2.5.4. | ||
466 | * | ||
467 | * 2004/01/14 - Shmulik Hen <shmulik.hen at intel dot com> | ||
468 | * - Enhance VLAN support: | ||
469 | * * Add support for VLAN hardware acceleration capable slaves. | ||
470 | * * Add capability to tag self generated packets in ALB/TLB modes. | ||
471 | * Set version to 2.6.0. | ||
472 | * 2004/10/29 - Mitch Williams <mitch.a.williams at intel dot com> | ||
473 | * - Fixed bug when unloading module while using 802.3ad. If | ||
474 | * spinlock debugging is turned on, this causes a stack dump. | ||
475 | * Solution is to move call to dev_remove_pack outside of the | ||
476 | * spinlock. | ||
477 | * Set version to 2.6.1. | ||
478 | * | ||
479 | */ | ||
480 | |||
481 | //#define BONDING_DEBUG 1 | ||
482 | |||
483 | #include <linux/config.h> | ||
484 | #include <linux/kernel.h> | ||
485 | #include <linux/module.h> | ||
486 | #include <linux/sched.h> | ||
487 | #include <linux/types.h> | ||
488 | #include <linux/fcntl.h> | ||
489 | #include <linux/interrupt.h> | ||
490 | #include <linux/ptrace.h> | ||
491 | #include <linux/ioport.h> | ||
492 | #include <linux/in.h> | ||
493 | #include <linux/ip.h> | ||
494 | #include <linux/slab.h> | ||
495 | #include <linux/string.h> | ||
496 | #include <linux/init.h> | ||
497 | #include <linux/timer.h> | ||
498 | #include <linux/socket.h> | ||
499 | #include <linux/ctype.h> | ||
500 | #include <linux/inet.h> | ||
501 | #include <linux/bitops.h> | ||
502 | #include <asm/system.h> | ||
503 | #include <asm/io.h> | ||
504 | #include <asm/dma.h> | ||
505 | #include <asm/uaccess.h> | ||
506 | #include <linux/errno.h> | ||
507 | #include <linux/netdevice.h> | ||
508 | #include <linux/inetdevice.h> | ||
509 | #include <linux/etherdevice.h> | ||
510 | #include <linux/skbuff.h> | ||
511 | #include <net/sock.h> | ||
512 | #include <linux/rtnetlink.h> | ||
513 | #include <linux/proc_fs.h> | ||
514 | #include <linux/seq_file.h> | ||
515 | #include <linux/smp.h> | ||
516 | #include <linux/if_ether.h> | ||
517 | #include <net/arp.h> | ||
518 | #include <linux/mii.h> | ||
519 | #include <linux/ethtool.h> | ||
520 | #include <linux/if_vlan.h> | ||
521 | #include <linux/if_bonding.h> | ||
522 | #include "bonding.h" | ||
523 | #include "bond_3ad.h" | ||
524 | #include "bond_alb.h" | ||
525 | |||
526 | /*---------------------------- Module parameters ----------------------------*/ | ||
527 | |||
528 | /* monitor all links that often (in milliseconds). <=0 disables monitoring */ | ||
529 | #define BOND_LINK_MON_INTERV 0 | ||
530 | #define BOND_LINK_ARP_INTERV 0 | ||
531 | |||
532 | static int max_bonds = BOND_DEFAULT_MAX_BONDS; | ||
533 | static int miimon = BOND_LINK_MON_INTERV; | ||
534 | static int updelay = 0; | ||
535 | static int downdelay = 0; | ||
536 | static int use_carrier = 1; | ||
537 | static char *mode = NULL; | ||
538 | static char *primary = NULL; | ||
539 | static char *lacp_rate = NULL; | ||
540 | static int arp_interval = BOND_LINK_ARP_INTERV; | ||
541 | static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; | ||
542 | |||
543 | module_param(max_bonds, int, 0); | ||
544 | MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); | ||
545 | module_param(miimon, int, 0); | ||
546 | MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); | ||
547 | module_param(updelay, int, 0); | ||
548 | MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); | ||
549 | module_param(downdelay, int, 0); | ||
550 | MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); | ||
551 | module_param(use_carrier, int, 0); | ||
552 | MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; 0 for off, 1 for on (default)"); | ||
553 | module_param(mode, charp, 0); | ||
554 | MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); | ||
555 | module_param(primary, charp, 0); | ||
556 | MODULE_PARM_DESC(primary, "Primary network device to use"); | ||
557 | module_param(lacp_rate, charp, 0); | ||
558 | MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)"); | ||
559 | module_param(arp_interval, int, 0); | ||
560 | MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); | ||
561 | module_param_array(arp_ip_target, charp, NULL, 0); | ||
562 | MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); | ||
563 | |||
564 | /*----------------------------- Global variables ----------------------------*/ | ||
565 | |||
566 | static const char *version = | ||
567 | DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; | ||
568 | |||
569 | static LIST_HEAD(bond_dev_list); | ||
570 | |||
571 | #ifdef CONFIG_PROC_FS | ||
572 | static struct proc_dir_entry *bond_proc_dir = NULL; | ||
573 | #endif | ||
574 | |||
575 | static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; | ||
576 | static int arp_ip_count = 0; | ||
577 | static u32 my_ip = 0; | ||
578 | static int bond_mode = BOND_MODE_ROUNDROBIN; | ||
579 | static int lacp_fast = 0; | ||
580 | static int app_abi_ver = 0; | ||
581 | static int orig_app_abi_ver = -1; /* This is used to save the first ABI version | ||
582 | * we receive from the application. Once set, | ||
583 | * it won't be changed, and the module will | ||
584 | * refuse to enslave/release interfaces if the | ||
585 | * command comes from an application using | ||
586 | * another ABI version. | ||
587 | */ | ||
588 | |||
589 | struct bond_parm_tbl { | ||
590 | char *modename; | ||
591 | int mode; | ||
592 | }; | ||
593 | |||
594 | static struct bond_parm_tbl bond_lacp_tbl[] = { | ||
595 | { "slow", AD_LACP_SLOW}, | ||
596 | { "fast", AD_LACP_FAST}, | ||
597 | { NULL, -1}, | ||
598 | }; | ||
599 | |||
600 | static struct bond_parm_tbl bond_mode_tbl[] = { | ||
601 | { "balance-rr", BOND_MODE_ROUNDROBIN}, | ||
602 | { "active-backup", BOND_MODE_ACTIVEBACKUP}, | ||
603 | { "balance-xor", BOND_MODE_XOR}, | ||
604 | { "broadcast", BOND_MODE_BROADCAST}, | ||
605 | { "802.3ad", BOND_MODE_8023AD}, | ||
606 | { "balance-tlb", BOND_MODE_TLB}, | ||
607 | { "balance-alb", BOND_MODE_ALB}, | ||
608 | { NULL, -1}, | ||
609 | }; | ||
610 | |||
611 | /*-------------------------- Forward declarations ---------------------------*/ | ||
612 | |||
613 | static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode); | ||
614 | |||
615 | /*---------------------------- General routines -----------------------------*/ | ||
616 | |||
617 | static const char *bond_mode_name(int mode) | ||
618 | { | ||
619 | switch (mode) { | ||
620 | case BOND_MODE_ROUNDROBIN : | ||
621 | return "load balancing (round-robin)"; | ||
622 | case BOND_MODE_ACTIVEBACKUP : | ||
623 | return "fault-tolerance (active-backup)"; | ||
624 | case BOND_MODE_XOR : | ||
625 | return "load balancing (xor)"; | ||
626 | case BOND_MODE_BROADCAST : | ||
627 | return "fault-tolerance (broadcast)"; | ||
628 | case BOND_MODE_8023AD: | ||
629 | return "IEEE 802.3ad Dynamic link aggregation"; | ||
630 | case BOND_MODE_TLB: | ||
631 | return "transmit load balancing"; | ||
632 | case BOND_MODE_ALB: | ||
633 | return "adaptive load balancing"; | ||
634 | default: | ||
635 | return "unknown"; | ||
636 | } | ||
637 | } | ||
638 | |||
639 | /*---------------------------------- VLAN -----------------------------------*/ | ||
640 | |||
641 | /** | ||
642 | * bond_add_vlan - add a new vlan id on bond | ||
643 | * @bond: bond that got the notification | ||
644 | * @vlan_id: the vlan id to add | ||
645 | * | ||
646 | * Returns -ENOMEM if allocation failed. | ||
647 | */ | ||
648 | static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) | ||
649 | { | ||
650 | struct vlan_entry *vlan; | ||
651 | |||
652 | dprintk("bond: %s, vlan id %d\n", | ||
653 | (bond ? bond->dev->name: "None"), vlan_id); | ||
654 | |||
655 | vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); | ||
656 | if (!vlan) { | ||
657 | return -ENOMEM; | ||
658 | } | ||
659 | |||
660 | INIT_LIST_HEAD(&vlan->vlan_list); | ||
661 | vlan->vlan_id = vlan_id; | ||
662 | |||
663 | write_lock_bh(&bond->lock); | ||
664 | |||
665 | list_add_tail(&vlan->vlan_list, &bond->vlan_list); | ||
666 | |||
667 | write_unlock_bh(&bond->lock); | ||
668 | |||
669 | dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); | ||
670 | |||
671 | return 0; | ||
672 | } | ||
673 | |||
674 | /** | ||
675 | * bond_del_vlan - delete a vlan id from bond | ||
676 | * @bond: bond that got the notification | ||
677 | * @vlan_id: the vlan id to delete | ||
678 | * | ||
679 | * returns -ENODEV if @vlan_id was not found in @bond. | ||
680 | */ | ||
681 | static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) | ||
682 | { | ||
683 | struct vlan_entry *vlan, *next; | ||
684 | int res = -ENODEV; | ||
685 | |||
686 | dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); | ||
687 | |||
688 | write_lock_bh(&bond->lock); | ||
689 | |||
690 | list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { | ||
691 | if (vlan->vlan_id == vlan_id) { | ||
692 | list_del(&vlan->vlan_list); | ||
693 | |||
694 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
695 | (bond->params.mode == BOND_MODE_ALB)) { | ||
696 | bond_alb_clear_vlan(bond, vlan_id); | ||
697 | } | ||
698 | |||
699 | dprintk("removed VLAN ID %d from bond %s\n", vlan_id, | ||
700 | bond->dev->name); | ||
701 | |||
702 | kfree(vlan); | ||
703 | |||
704 | if (list_empty(&bond->vlan_list) && | ||
705 | (bond->slave_cnt == 0)) { | ||
706 | /* Last VLAN removed and no slaves, so | ||
707 | * restore block on adding VLANs. This will | ||
708 | * be removed once new slaves that are not | ||
709 | * VLAN challenged will be added. | ||
710 | */ | ||
711 | bond->dev->features |= NETIF_F_VLAN_CHALLENGED; | ||
712 | } | ||
713 | |||
714 | res = 0; | ||
715 | goto out; | ||
716 | } | ||
717 | } | ||
718 | |||
719 | dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id, | ||
720 | bond->dev->name); | ||
721 | |||
722 | out: | ||
723 | write_unlock_bh(&bond->lock); | ||
724 | return res; | ||
725 | } | ||
726 | |||
727 | /** | ||
728 | * bond_has_challenged_slaves | ||
729 | * @bond: the bond we're working on | ||
730 | * | ||
731 | * Searches the slave list. Returns 1 if a vlan challenged slave | ||
732 | * was found, 0 otherwise. | ||
733 | * | ||
734 | * Assumes bond->lock is held. | ||
735 | */ | ||
736 | static int bond_has_challenged_slaves(struct bonding *bond) | ||
737 | { | ||
738 | struct slave *slave; | ||
739 | int i; | ||
740 | |||
741 | bond_for_each_slave(bond, slave, i) { | ||
742 | if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { | ||
743 | dprintk("found VLAN challenged slave - %s\n", | ||
744 | slave->dev->name); | ||
745 | return 1; | ||
746 | } | ||
747 | } | ||
748 | |||
749 | dprintk("no VLAN challenged slaves found\n"); | ||
750 | return 0; | ||
751 | } | ||
752 | |||
753 | /** | ||
754 | * bond_next_vlan - safely skip to the next item in the vlans list. | ||
755 | * @bond: the bond we're working on | ||
756 | * @curr: item we're advancing from | ||
757 | * | ||
758 | * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, | ||
759 | * or @curr->next otherwise (even if it is @curr itself again). | ||
760 | * | ||
761 | * Caller must hold bond->lock | ||
762 | */ | ||
763 | struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) | ||
764 | { | ||
765 | struct vlan_entry *next, *last; | ||
766 | |||
767 | if (list_empty(&bond->vlan_list)) { | ||
768 | return NULL; | ||
769 | } | ||
770 | |||
771 | if (!curr) { | ||
772 | next = list_entry(bond->vlan_list.next, | ||
773 | struct vlan_entry, vlan_list); | ||
774 | } else { | ||
775 | last = list_entry(bond->vlan_list.prev, | ||
776 | struct vlan_entry, vlan_list); | ||
777 | if (last == curr) { | ||
778 | next = list_entry(bond->vlan_list.next, | ||
779 | struct vlan_entry, vlan_list); | ||
780 | } else { | ||
781 | next = list_entry(curr->vlan_list.next, | ||
782 | struct vlan_entry, vlan_list); | ||
783 | } | ||
784 | } | ||
785 | |||
786 | return next; | ||
787 | } | ||
788 | |||
789 | /** | ||
790 | * bond_dev_queue_xmit - Prepare skb for xmit. | ||
791 | * | ||
792 | * @bond: bond device that got this skb for tx. | ||
793 | * @skb: hw accel VLAN tagged skb to transmit | ||
794 | * @slave_dev: slave that is supposed to xmit this skbuff | ||
795 | * | ||
796 | * When the bond gets an skb to transmit that is | ||
797 | * already hardware accelerated VLAN tagged, and it | ||
798 | * needs to relay this skb to a slave that is not | ||
799 | * hw accel capable, the skb needs to be "unaccelerated", | ||
800 | * i.e. strip the hwaccel tag and re-insert it as part | ||
801 | * of the payload. | ||
802 | */ | ||
803 | int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) | ||
804 | { | ||
805 | unsigned short vlan_id; | ||
806 | |||
807 | if (!list_empty(&bond->vlan_list) && | ||
808 | !(slave_dev->features & NETIF_F_HW_VLAN_TX) && | ||
809 | vlan_get_tag(skb, &vlan_id) == 0) { | ||
810 | skb->dev = slave_dev; | ||
811 | skb = vlan_put_tag(skb, vlan_id); | ||
812 | if (!skb) { | ||
813 | /* vlan_put_tag() frees the skb in case of error, | ||
814 | * so return success here so the calling functions | ||
815 | * won't attempt to free is again. | ||
816 | */ | ||
817 | return 0; | ||
818 | } | ||
819 | } else { | ||
820 | skb->dev = slave_dev; | ||
821 | } | ||
822 | |||
823 | skb->priority = 1; | ||
824 | dev_queue_xmit(skb); | ||
825 | |||
826 | return 0; | ||
827 | } | ||
828 | |||
829 | /* | ||
830 | * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid | ||
831 | * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a | ||
832 | * lock because: | ||
833 | * a. This operation is performed in IOCTL context, | ||
834 | * b. The operation is protected by the RTNL semaphore in the 8021q code, | ||
835 | * c. Holding a lock with BH disabled while directly calling a base driver | ||
836 | * entry point is generally a BAD idea. | ||
837 | * | ||
838 | * The design of synchronization/protection for this operation in the 8021q | ||
839 | * module is good for one or more VLAN devices over a single physical device | ||
840 | * and cannot be extended for a teaming solution like bonding, so there is a | ||
841 | * potential race condition here where a net device from the vlan group might | ||
842 | * be referenced (either by a base driver or the 8021q code) while it is being | ||
843 | * removed from the system. However, it turns out we're not making matters | ||
844 | * worse, and if it works for regular VLAN usage it will work here too. | ||
845 | */ | ||
846 | |||
847 | /** | ||
848 | * bond_vlan_rx_register - Propagates registration to slaves | ||
849 | * @bond_dev: bonding net device that got called | ||
850 | * @grp: vlan group being registered | ||
851 | */ | ||
852 | static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp) | ||
853 | { | ||
854 | struct bonding *bond = bond_dev->priv; | ||
855 | struct slave *slave; | ||
856 | int i; | ||
857 | |||
858 | bond->vlgrp = grp; | ||
859 | |||
860 | bond_for_each_slave(bond, slave, i) { | ||
861 | struct net_device *slave_dev = slave->dev; | ||
862 | |||
863 | if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && | ||
864 | slave_dev->vlan_rx_register) { | ||
865 | slave_dev->vlan_rx_register(slave_dev, grp); | ||
866 | } | ||
867 | } | ||
868 | } | ||
869 | |||
870 | /** | ||
871 | * bond_vlan_rx_add_vid - Propagates adding an id to slaves | ||
872 | * @bond_dev: bonding net device that got called | ||
873 | * @vid: vlan id being added | ||
874 | */ | ||
875 | static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) | ||
876 | { | ||
877 | struct bonding *bond = bond_dev->priv; | ||
878 | struct slave *slave; | ||
879 | int i, res; | ||
880 | |||
881 | bond_for_each_slave(bond, slave, i) { | ||
882 | struct net_device *slave_dev = slave->dev; | ||
883 | |||
884 | if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && | ||
885 | slave_dev->vlan_rx_add_vid) { | ||
886 | slave_dev->vlan_rx_add_vid(slave_dev, vid); | ||
887 | } | ||
888 | } | ||
889 | |||
890 | res = bond_add_vlan(bond, vid); | ||
891 | if (res) { | ||
892 | printk(KERN_ERR DRV_NAME | ||
893 | ": %s: Failed to add vlan id %d\n", | ||
894 | bond_dev->name, vid); | ||
895 | } | ||
896 | } | ||
897 | |||
898 | /** | ||
899 | * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves | ||
900 | * @bond_dev: bonding net device that got called | ||
901 | * @vid: vlan id being removed | ||
902 | */ | ||
903 | static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) | ||
904 | { | ||
905 | struct bonding *bond = bond_dev->priv; | ||
906 | struct slave *slave; | ||
907 | struct net_device *vlan_dev; | ||
908 | int i, res; | ||
909 | |||
910 | bond_for_each_slave(bond, slave, i) { | ||
911 | struct net_device *slave_dev = slave->dev; | ||
912 | |||
913 | if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && | ||
914 | slave_dev->vlan_rx_kill_vid) { | ||
915 | /* Save and then restore vlan_dev in the grp array, | ||
916 | * since the slave's driver might clear it. | ||
917 | */ | ||
918 | vlan_dev = bond->vlgrp->vlan_devices[vid]; | ||
919 | slave_dev->vlan_rx_kill_vid(slave_dev, vid); | ||
920 | bond->vlgrp->vlan_devices[vid] = vlan_dev; | ||
921 | } | ||
922 | } | ||
923 | |||
924 | res = bond_del_vlan(bond, vid); | ||
925 | if (res) { | ||
926 | printk(KERN_ERR DRV_NAME | ||
927 | ": %s: Failed to remove vlan id %d\n", | ||
928 | bond_dev->name, vid); | ||
929 | } | ||
930 | } | ||
931 | |||
932 | static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) | ||
933 | { | ||
934 | struct vlan_entry *vlan; | ||
935 | |||
936 | write_lock_bh(&bond->lock); | ||
937 | |||
938 | if (list_empty(&bond->vlan_list)) { | ||
939 | goto out; | ||
940 | } | ||
941 | |||
942 | if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && | ||
943 | slave_dev->vlan_rx_register) { | ||
944 | slave_dev->vlan_rx_register(slave_dev, bond->vlgrp); | ||
945 | } | ||
946 | |||
947 | if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || | ||
948 | !(slave_dev->vlan_rx_add_vid)) { | ||
949 | goto out; | ||
950 | } | ||
951 | |||
952 | list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { | ||
953 | slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id); | ||
954 | } | ||
955 | |||
956 | out: | ||
957 | write_unlock_bh(&bond->lock); | ||
958 | } | ||
959 | |||
960 | static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev) | ||
961 | { | ||
962 | struct vlan_entry *vlan; | ||
963 | struct net_device *vlan_dev; | ||
964 | |||
965 | write_lock_bh(&bond->lock); | ||
966 | |||
967 | if (list_empty(&bond->vlan_list)) { | ||
968 | goto out; | ||
969 | } | ||
970 | |||
971 | if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || | ||
972 | !(slave_dev->vlan_rx_kill_vid)) { | ||
973 | goto unreg; | ||
974 | } | ||
975 | |||
976 | list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { | ||
977 | /* Save and then restore vlan_dev in the grp array, | ||
978 | * since the slave's driver might clear it. | ||
979 | */ | ||
980 | vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; | ||
981 | slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id); | ||
982 | bond->vlgrp->vlan_devices[vlan->vlan_id] = vlan_dev; | ||
983 | } | ||
984 | |||
985 | unreg: | ||
986 | if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && | ||
987 | slave_dev->vlan_rx_register) { | ||
988 | slave_dev->vlan_rx_register(slave_dev, NULL); | ||
989 | } | ||
990 | |||
991 | out: | ||
992 | write_unlock_bh(&bond->lock); | ||
993 | } | ||
994 | |||
995 | /*------------------------------- Link status -------------------------------*/ | ||
996 | |||
997 | /* | ||
998 | * Get link speed and duplex from the slave's base driver | ||
999 | * using ethtool. If for some reason the call fails or the | ||
1000 | * values are invalid, fake speed and duplex to 100/Full | ||
1001 | * and return error. | ||
1002 | */ | ||
1003 | static int bond_update_speed_duplex(struct slave *slave) | ||
1004 | { | ||
1005 | struct net_device *slave_dev = slave->dev; | ||
1006 | static int (* ioctl)(struct net_device *, struct ifreq *, int); | ||
1007 | struct ifreq ifr; | ||
1008 | struct ethtool_cmd etool; | ||
1009 | |||
1010 | /* Fake speed and duplex */ | ||
1011 | slave->speed = SPEED_100; | ||
1012 | slave->duplex = DUPLEX_FULL; | ||
1013 | |||
1014 | if (slave_dev->ethtool_ops) { | ||
1015 | u32 res; | ||
1016 | |||
1017 | if (!slave_dev->ethtool_ops->get_settings) { | ||
1018 | return -1; | ||
1019 | } | ||
1020 | |||
1021 | res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); | ||
1022 | if (res < 0) { | ||
1023 | return -1; | ||
1024 | } | ||
1025 | |||
1026 | goto verify; | ||
1027 | } | ||
1028 | |||
1029 | ioctl = slave_dev->do_ioctl; | ||
1030 | strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); | ||
1031 | etool.cmd = ETHTOOL_GSET; | ||
1032 | ifr.ifr_data = (char*)&etool; | ||
1033 | if (!ioctl || (IOCTL(slave_dev, &ifr, SIOCETHTOOL) < 0)) { | ||
1034 | return -1; | ||
1035 | } | ||
1036 | |||
1037 | verify: | ||
1038 | switch (etool.speed) { | ||
1039 | case SPEED_10: | ||
1040 | case SPEED_100: | ||
1041 | case SPEED_1000: | ||
1042 | break; | ||
1043 | default: | ||
1044 | return -1; | ||
1045 | } | ||
1046 | |||
1047 | switch (etool.duplex) { | ||
1048 | case DUPLEX_FULL: | ||
1049 | case DUPLEX_HALF: | ||
1050 | break; | ||
1051 | default: | ||
1052 | return -1; | ||
1053 | } | ||
1054 | |||
1055 | slave->speed = etool.speed; | ||
1056 | slave->duplex = etool.duplex; | ||
1057 | |||
1058 | return 0; | ||
1059 | } | ||
1060 | |||
1061 | /* | ||
1062 | * if <dev> supports MII link status reporting, check its link status. | ||
1063 | * | ||
1064 | * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), | ||
1065 | * depening upon the setting of the use_carrier parameter. | ||
1066 | * | ||
1067 | * Return either BMSR_LSTATUS, meaning that the link is up (or we | ||
1068 | * can't tell and just pretend it is), or 0, meaning that the link is | ||
1069 | * down. | ||
1070 | * | ||
1071 | * If reporting is non-zero, instead of faking link up, return -1 if | ||
1072 | * both ETHTOOL and MII ioctls fail (meaning the device does not | ||
1073 | * support them). If use_carrier is set, return whatever it says. | ||
1074 | * It'd be nice if there was a good way to tell if a driver supports | ||
1075 | * netif_carrier, but there really isn't. | ||
1076 | */ | ||
1077 | static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) | ||
1078 | { | ||
1079 | static int (* ioctl)(struct net_device *, struct ifreq *, int); | ||
1080 | struct ifreq ifr; | ||
1081 | struct mii_ioctl_data *mii; | ||
1082 | struct ethtool_value etool; | ||
1083 | |||
1084 | if (bond->params.use_carrier) { | ||
1085 | return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; | ||
1086 | } | ||
1087 | |||
1088 | ioctl = slave_dev->do_ioctl; | ||
1089 | if (ioctl) { | ||
1090 | /* TODO: set pointer to correct ioctl on a per team member */ | ||
1091 | /* bases to make this more efficient. that is, once */ | ||
1092 | /* we determine the correct ioctl, we will always */ | ||
1093 | /* call it and not the others for that team */ | ||
1094 | /* member. */ | ||
1095 | |||
1096 | /* | ||
1097 | * We cannot assume that SIOCGMIIPHY will also read a | ||
1098 | * register; not all network drivers (e.g., e100) | ||
1099 | * support that. | ||
1100 | */ | ||
1101 | |||
1102 | /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ | ||
1103 | strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); | ||
1104 | mii = if_mii(&ifr); | ||
1105 | if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { | ||
1106 | mii->reg_num = MII_BMSR; | ||
1107 | if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) { | ||
1108 | return (mii->val_out & BMSR_LSTATUS); | ||
1109 | } | ||
1110 | } | ||
1111 | } | ||
1112 | |||
1113 | /* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */ | ||
1114 | /* for a period of time so we attempt to get link status */ | ||
1115 | /* from it last if the above MII ioctls fail... */ | ||
1116 | if (slave_dev->ethtool_ops) { | ||
1117 | if (slave_dev->ethtool_ops->get_link) { | ||
1118 | u32 link; | ||
1119 | |||
1120 | link = slave_dev->ethtool_ops->get_link(slave_dev); | ||
1121 | |||
1122 | return link ? BMSR_LSTATUS : 0; | ||
1123 | } | ||
1124 | } | ||
1125 | |||
1126 | if (ioctl) { | ||
1127 | strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); | ||
1128 | etool.cmd = ETHTOOL_GLINK; | ||
1129 | ifr.ifr_data = (char*)&etool; | ||
1130 | if (IOCTL(slave_dev, &ifr, SIOCETHTOOL) == 0) { | ||
1131 | if (etool.data == 1) { | ||
1132 | return BMSR_LSTATUS; | ||
1133 | } else { | ||
1134 | dprintk("SIOCETHTOOL shows link down\n"); | ||
1135 | return 0; | ||
1136 | } | ||
1137 | } | ||
1138 | } | ||
1139 | |||
1140 | /* | ||
1141 | * If reporting, report that either there's no dev->do_ioctl, | ||
1142 | * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we | ||
1143 | * cannot report link status). If not reporting, pretend | ||
1144 | * we're ok. | ||
1145 | */ | ||
1146 | return (reporting ? -1 : BMSR_LSTATUS); | ||
1147 | } | ||
1148 | |||
1149 | /*----------------------------- Multicast list ------------------------------*/ | ||
1150 | |||
1151 | /* | ||
1152 | * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise | ||
1153 | */ | ||
1154 | static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) | ||
1155 | { | ||
1156 | return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && | ||
1157 | dmi1->dmi_addrlen == dmi2->dmi_addrlen; | ||
1158 | } | ||
1159 | |||
1160 | /* | ||
1161 | * returns dmi entry if found, NULL otherwise | ||
1162 | */ | ||
1163 | static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) | ||
1164 | { | ||
1165 | struct dev_mc_list *idmi; | ||
1166 | |||
1167 | for (idmi = mc_list; idmi; idmi = idmi->next) { | ||
1168 | if (bond_is_dmi_same(dmi, idmi)) { | ||
1169 | return idmi; | ||
1170 | } | ||
1171 | } | ||
1172 | |||
1173 | return NULL; | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * Push the promiscuity flag down to appropriate slaves | ||
1178 | */ | ||
1179 | static void bond_set_promiscuity(struct bonding *bond, int inc) | ||
1180 | { | ||
1181 | if (USES_PRIMARY(bond->params.mode)) { | ||
1182 | /* write lock already acquired */ | ||
1183 | if (bond->curr_active_slave) { | ||
1184 | dev_set_promiscuity(bond->curr_active_slave->dev, inc); | ||
1185 | } | ||
1186 | } else { | ||
1187 | struct slave *slave; | ||
1188 | int i; | ||
1189 | bond_for_each_slave(bond, slave, i) { | ||
1190 | dev_set_promiscuity(slave->dev, inc); | ||
1191 | } | ||
1192 | } | ||
1193 | } | ||
1194 | |||
1195 | /* | ||
1196 | * Push the allmulti flag down to all slaves | ||
1197 | */ | ||
1198 | static void bond_set_allmulti(struct bonding *bond, int inc) | ||
1199 | { | ||
1200 | if (USES_PRIMARY(bond->params.mode)) { | ||
1201 | /* write lock already acquired */ | ||
1202 | if (bond->curr_active_slave) { | ||
1203 | dev_set_allmulti(bond->curr_active_slave->dev, inc); | ||
1204 | } | ||
1205 | } else { | ||
1206 | struct slave *slave; | ||
1207 | int i; | ||
1208 | bond_for_each_slave(bond, slave, i) { | ||
1209 | dev_set_allmulti(slave->dev, inc); | ||
1210 | } | ||
1211 | } | ||
1212 | } | ||
1213 | |||
1214 | /* | ||
1215 | * Add a Multicast address to slaves | ||
1216 | * according to mode | ||
1217 | */ | ||
1218 | static void bond_mc_add(struct bonding *bond, void *addr, int alen) | ||
1219 | { | ||
1220 | if (USES_PRIMARY(bond->params.mode)) { | ||
1221 | /* write lock already acquired */ | ||
1222 | if (bond->curr_active_slave) { | ||
1223 | dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); | ||
1224 | } | ||
1225 | } else { | ||
1226 | struct slave *slave; | ||
1227 | int i; | ||
1228 | bond_for_each_slave(bond, slave, i) { | ||
1229 | dev_mc_add(slave->dev, addr, alen, 0); | ||
1230 | } | ||
1231 | } | ||
1232 | } | ||
1233 | |||
1234 | /* | ||
1235 | * Remove a multicast address from slave | ||
1236 | * according to mode | ||
1237 | */ | ||
1238 | static void bond_mc_delete(struct bonding *bond, void *addr, int alen) | ||
1239 | { | ||
1240 | if (USES_PRIMARY(bond->params.mode)) { | ||
1241 | /* write lock already acquired */ | ||
1242 | if (bond->curr_active_slave) { | ||
1243 | dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0); | ||
1244 | } | ||
1245 | } else { | ||
1246 | struct slave *slave; | ||
1247 | int i; | ||
1248 | bond_for_each_slave(bond, slave, i) { | ||
1249 | dev_mc_delete(slave->dev, addr, alen, 0); | ||
1250 | } | ||
1251 | } | ||
1252 | } | ||
1253 | |||
1254 | /* | ||
1255 | * Totally destroys the mc_list in bond | ||
1256 | */ | ||
1257 | static void bond_mc_list_destroy(struct bonding *bond) | ||
1258 | { | ||
1259 | struct dev_mc_list *dmi; | ||
1260 | |||
1261 | dmi = bond->mc_list; | ||
1262 | while (dmi) { | ||
1263 | bond->mc_list = dmi->next; | ||
1264 | kfree(dmi); | ||
1265 | dmi = bond->mc_list; | ||
1266 | } | ||
1267 | } | ||
1268 | |||
1269 | /* | ||
1270 | * Copy all the Multicast addresses from src to the bonding device dst | ||
1271 | */ | ||
1272 | static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, int gpf_flag) | ||
1273 | { | ||
1274 | struct dev_mc_list *dmi, *new_dmi; | ||
1275 | |||
1276 | for (dmi = mc_list; dmi; dmi = dmi->next) { | ||
1277 | new_dmi = kmalloc(sizeof(struct dev_mc_list), gpf_flag); | ||
1278 | |||
1279 | if (!new_dmi) { | ||
1280 | /* FIXME: Potential memory leak !!! */ | ||
1281 | return -ENOMEM; | ||
1282 | } | ||
1283 | |||
1284 | new_dmi->next = bond->mc_list; | ||
1285 | bond->mc_list = new_dmi; | ||
1286 | new_dmi->dmi_addrlen = dmi->dmi_addrlen; | ||
1287 | memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); | ||
1288 | new_dmi->dmi_users = dmi->dmi_users; | ||
1289 | new_dmi->dmi_gusers = dmi->dmi_gusers; | ||
1290 | } | ||
1291 | |||
1292 | return 0; | ||
1293 | } | ||
1294 | |||
1295 | /* | ||
1296 | * flush all members of flush->mc_list from device dev->mc_list | ||
1297 | */ | ||
1298 | static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) | ||
1299 | { | ||
1300 | struct bonding *bond = bond_dev->priv; | ||
1301 | struct dev_mc_list *dmi; | ||
1302 | |||
1303 | for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { | ||
1304 | dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); | ||
1305 | } | ||
1306 | |||
1307 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
1308 | /* del lacpdu mc addr from mc list */ | ||
1309 | u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; | ||
1310 | |||
1311 | dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); | ||
1312 | } | ||
1313 | } | ||
1314 | |||
1315 | /*--------------------------- Active slave change ---------------------------*/ | ||
1316 | |||
1317 | /* | ||
1318 | * Update the mc list and multicast-related flags for the new and | ||
1319 | * old active slaves (if any) according to the multicast mode, and | ||
1320 | * promiscuous flags unconditionally. | ||
1321 | */ | ||
1322 | static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) | ||
1323 | { | ||
1324 | struct dev_mc_list *dmi; | ||
1325 | |||
1326 | if (!USES_PRIMARY(bond->params.mode)) { | ||
1327 | /* nothing to do - mc list is already up-to-date on | ||
1328 | * all slaves | ||
1329 | */ | ||
1330 | return; | ||
1331 | } | ||
1332 | |||
1333 | if (old_active) { | ||
1334 | if (bond->dev->flags & IFF_PROMISC) { | ||
1335 | dev_set_promiscuity(old_active->dev, -1); | ||
1336 | } | ||
1337 | |||
1338 | if (bond->dev->flags & IFF_ALLMULTI) { | ||
1339 | dev_set_allmulti(old_active->dev, -1); | ||
1340 | } | ||
1341 | |||
1342 | for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { | ||
1343 | dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | if (new_active) { | ||
1348 | if (bond->dev->flags & IFF_PROMISC) { | ||
1349 | dev_set_promiscuity(new_active->dev, 1); | ||
1350 | } | ||
1351 | |||
1352 | if (bond->dev->flags & IFF_ALLMULTI) { | ||
1353 | dev_set_allmulti(new_active->dev, 1); | ||
1354 | } | ||
1355 | |||
1356 | for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { | ||
1357 | dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); | ||
1358 | } | ||
1359 | } | ||
1360 | } | ||
1361 | |||
1362 | /** | ||
1363 | * find_best_interface - select the best available slave to be the active one | ||
1364 | * @bond: our bonding struct | ||
1365 | * | ||
1366 | * Warning: Caller must hold curr_slave_lock for writing. | ||
1367 | */ | ||
1368 | static struct slave *bond_find_best_slave(struct bonding *bond) | ||
1369 | { | ||
1370 | struct slave *new_active, *old_active; | ||
1371 | struct slave *bestslave = NULL; | ||
1372 | int mintime = bond->params.updelay; | ||
1373 | int i; | ||
1374 | |||
1375 | new_active = old_active = bond->curr_active_slave; | ||
1376 | |||
1377 | if (!new_active) { /* there were no active slaves left */ | ||
1378 | if (bond->slave_cnt > 0) { /* found one slave */ | ||
1379 | new_active = bond->first_slave; | ||
1380 | } else { | ||
1381 | return NULL; /* still no slave, return NULL */ | ||
1382 | } | ||
1383 | } | ||
1384 | |||
1385 | /* first try the primary link; if arping, a link must tx/rx traffic | ||
1386 | * before it can be considered the curr_active_slave - also, we would skip | ||
1387 | * slaves between the curr_active_slave and primary_slave that may be up | ||
1388 | * and able to arp | ||
1389 | */ | ||
1390 | if ((bond->primary_slave) && | ||
1391 | (!bond->params.arp_interval) && | ||
1392 | (IS_UP(bond->primary_slave->dev))) { | ||
1393 | new_active = bond->primary_slave; | ||
1394 | } | ||
1395 | |||
1396 | /* remember where to stop iterating over the slaves */ | ||
1397 | old_active = new_active; | ||
1398 | |||
1399 | bond_for_each_slave_from(bond, new_active, i, old_active) { | ||
1400 | if (IS_UP(new_active->dev)) { | ||
1401 | if (new_active->link == BOND_LINK_UP) { | ||
1402 | return new_active; | ||
1403 | } else if (new_active->link == BOND_LINK_BACK) { | ||
1404 | /* link up, but waiting for stabilization */ | ||
1405 | if (new_active->delay < mintime) { | ||
1406 | mintime = new_active->delay; | ||
1407 | bestslave = new_active; | ||
1408 | } | ||
1409 | } | ||
1410 | } | ||
1411 | } | ||
1412 | |||
1413 | return bestslave; | ||
1414 | } | ||
1415 | |||
1416 | /** | ||
1417 | * change_active_interface - change the active slave into the specified one | ||
1418 | * @bond: our bonding struct | ||
1419 | * @new: the new slave to make the active one | ||
1420 | * | ||
1421 | * Set the new slave to the bond's settings and unset them on the old | ||
1422 | * curr_active_slave. | ||
1423 | * Setting include flags, mc-list, promiscuity, allmulti, etc. | ||
1424 | * | ||
1425 | * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, | ||
1426 | * because it is apparently the best available slave we have, even though its | ||
1427 | * updelay hasn't timed out yet. | ||
1428 | * | ||
1429 | * Warning: Caller must hold curr_slave_lock for writing. | ||
1430 | */ | ||
1431 | static void bond_change_active_slave(struct bonding *bond, struct slave *new_active) | ||
1432 | { | ||
1433 | struct slave *old_active = bond->curr_active_slave; | ||
1434 | |||
1435 | if (old_active == new_active) { | ||
1436 | return; | ||
1437 | } | ||
1438 | |||
1439 | if (new_active) { | ||
1440 | if (new_active->link == BOND_LINK_BACK) { | ||
1441 | if (USES_PRIMARY(bond->params.mode)) { | ||
1442 | printk(KERN_INFO DRV_NAME | ||
1443 | ": %s: making interface %s the new " | ||
1444 | "active one %d ms earlier.\n", | ||
1445 | bond->dev->name, new_active->dev->name, | ||
1446 | (bond->params.updelay - new_active->delay) * bond->params.miimon); | ||
1447 | } | ||
1448 | |||
1449 | new_active->delay = 0; | ||
1450 | new_active->link = BOND_LINK_UP; | ||
1451 | new_active->jiffies = jiffies; | ||
1452 | |||
1453 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
1454 | bond_3ad_handle_link_change(new_active, BOND_LINK_UP); | ||
1455 | } | ||
1456 | |||
1457 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
1458 | (bond->params.mode == BOND_MODE_ALB)) { | ||
1459 | bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); | ||
1460 | } | ||
1461 | } else { | ||
1462 | if (USES_PRIMARY(bond->params.mode)) { | ||
1463 | printk(KERN_INFO DRV_NAME | ||
1464 | ": %s: making interface %s the new " | ||
1465 | "active one.\n", | ||
1466 | bond->dev->name, new_active->dev->name); | ||
1467 | } | ||
1468 | } | ||
1469 | } | ||
1470 | |||
1471 | if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { | ||
1472 | if (old_active) { | ||
1473 | bond_set_slave_inactive_flags(old_active); | ||
1474 | } | ||
1475 | |||
1476 | if (new_active) { | ||
1477 | bond_set_slave_active_flags(new_active); | ||
1478 | } | ||
1479 | } | ||
1480 | |||
1481 | if (USES_PRIMARY(bond->params.mode)) { | ||
1482 | bond_mc_swap(bond, new_active, old_active); | ||
1483 | } | ||
1484 | |||
1485 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
1486 | (bond->params.mode == BOND_MODE_ALB)) { | ||
1487 | bond_alb_handle_active_change(bond, new_active); | ||
1488 | } else { | ||
1489 | bond->curr_active_slave = new_active; | ||
1490 | } | ||
1491 | } | ||
1492 | |||
1493 | /** | ||
1494 | * bond_select_active_slave - select a new active slave, if needed | ||
1495 | * @bond: our bonding struct | ||
1496 | * | ||
1497 | * This functions shoud be called when one of the following occurs: | ||
1498 | * - The old curr_active_slave has been released or lost its link. | ||
1499 | * - The primary_slave has got its link back. | ||
1500 | * - A slave has got its link back and there's no old curr_active_slave. | ||
1501 | * | ||
1502 | * Warning: Caller must hold curr_slave_lock for writing. | ||
1503 | */ | ||
1504 | static void bond_select_active_slave(struct bonding *bond) | ||
1505 | { | ||
1506 | struct slave *best_slave; | ||
1507 | |||
1508 | best_slave = bond_find_best_slave(bond); | ||
1509 | if (best_slave != bond->curr_active_slave) { | ||
1510 | bond_change_active_slave(bond, best_slave); | ||
1511 | } | ||
1512 | } | ||
1513 | |||
1514 | /*--------------------------- slave list handling ---------------------------*/ | ||
1515 | |||
1516 | /* | ||
1517 | * This function attaches the slave to the end of list. | ||
1518 | * | ||
1519 | * bond->lock held for writing by caller. | ||
1520 | */ | ||
1521 | static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) | ||
1522 | { | ||
1523 | if (bond->first_slave == NULL) { /* attaching the first slave */ | ||
1524 | new_slave->next = new_slave; | ||
1525 | new_slave->prev = new_slave; | ||
1526 | bond->first_slave = new_slave; | ||
1527 | } else { | ||
1528 | new_slave->next = bond->first_slave; | ||
1529 | new_slave->prev = bond->first_slave->prev; | ||
1530 | new_slave->next->prev = new_slave; | ||
1531 | new_slave->prev->next = new_slave; | ||
1532 | } | ||
1533 | |||
1534 | bond->slave_cnt++; | ||
1535 | } | ||
1536 | |||
1537 | /* | ||
1538 | * This function detaches the slave from the list. | ||
1539 | * WARNING: no check is made to verify if the slave effectively | ||
1540 | * belongs to <bond>. | ||
1541 | * Nothing is freed on return, structures are just unchained. | ||
1542 | * If any slave pointer in bond was pointing to <slave>, | ||
1543 | * it should be changed by the calling function. | ||
1544 | * | ||
1545 | * bond->lock held for writing by caller. | ||
1546 | */ | ||
1547 | static void bond_detach_slave(struct bonding *bond, struct slave *slave) | ||
1548 | { | ||
1549 | if (slave->next) { | ||
1550 | slave->next->prev = slave->prev; | ||
1551 | } | ||
1552 | |||
1553 | if (slave->prev) { | ||
1554 | slave->prev->next = slave->next; | ||
1555 | } | ||
1556 | |||
1557 | if (bond->first_slave == slave) { /* slave is the first slave */ | ||
1558 | if (bond->slave_cnt > 1) { /* there are more slave */ | ||
1559 | bond->first_slave = slave->next; | ||
1560 | } else { | ||
1561 | bond->first_slave = NULL; /* slave was the last one */ | ||
1562 | } | ||
1563 | } | ||
1564 | |||
1565 | slave->next = NULL; | ||
1566 | slave->prev = NULL; | ||
1567 | bond->slave_cnt--; | ||
1568 | } | ||
1569 | |||
1570 | /*---------------------------------- IOCTL ----------------------------------*/ | ||
1571 | |||
1572 | static int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev) | ||
1573 | { | ||
1574 | dprintk("bond_dev=%p\n", bond_dev); | ||
1575 | dprintk("slave_dev=%p\n", slave_dev); | ||
1576 | dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); | ||
1577 | memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); | ||
1578 | return 0; | ||
1579 | } | ||
1580 | |||
1581 | /* enslave device <slave> to bond device <master> */ | ||
1582 | static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) | ||
1583 | { | ||
1584 | struct bonding *bond = bond_dev->priv; | ||
1585 | struct slave *new_slave = NULL; | ||
1586 | struct dev_mc_list *dmi; | ||
1587 | struct sockaddr addr; | ||
1588 | int link_reporting; | ||
1589 | int old_features = bond_dev->features; | ||
1590 | int res = 0; | ||
1591 | |||
1592 | if (slave_dev->do_ioctl == NULL) { | ||
1593 | printk(KERN_WARNING DRV_NAME | ||
1594 | ": Warning : no link monitoring support for %s\n", | ||
1595 | slave_dev->name); | ||
1596 | } | ||
1597 | |||
1598 | /* bond must be initialized by bond_open() before enslaving */ | ||
1599 | if (!(bond_dev->flags & IFF_UP)) { | ||
1600 | dprintk("Error, master_dev is not up\n"); | ||
1601 | return -EPERM; | ||
1602 | } | ||
1603 | |||
1604 | /* already enslaved */ | ||
1605 | if (slave_dev->flags & IFF_SLAVE) { | ||
1606 | dprintk("Error, Device was already enslaved\n"); | ||
1607 | return -EBUSY; | ||
1608 | } | ||
1609 | |||
1610 | /* vlan challenged mutual exclusion */ | ||
1611 | /* no need to lock since we're protected by rtnl_lock */ | ||
1612 | if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { | ||
1613 | dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); | ||
1614 | if (!list_empty(&bond->vlan_list)) { | ||
1615 | printk(KERN_ERR DRV_NAME | ||
1616 | ": Error: cannot enslave VLAN " | ||
1617 | "challenged slave %s on VLAN enabled " | ||
1618 | "bond %s\n", slave_dev->name, | ||
1619 | bond_dev->name); | ||
1620 | return -EPERM; | ||
1621 | } else { | ||
1622 | printk(KERN_WARNING DRV_NAME | ||
1623 | ": Warning: enslaved VLAN challenged " | ||
1624 | "slave %s. Adding VLANs will be blocked as " | ||
1625 | "long as %s is part of bond %s\n", | ||
1626 | slave_dev->name, slave_dev->name, | ||
1627 | bond_dev->name); | ||
1628 | bond_dev->features |= NETIF_F_VLAN_CHALLENGED; | ||
1629 | } | ||
1630 | } else { | ||
1631 | dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); | ||
1632 | if (bond->slave_cnt == 0) { | ||
1633 | /* First slave, and it is not VLAN challenged, | ||
1634 | * so remove the block of adding VLANs over the bond. | ||
1635 | */ | ||
1636 | bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; | ||
1637 | } | ||
1638 | } | ||
1639 | |||
1640 | if (app_abi_ver >= 1) { | ||
1641 | /* The application is using an ABI, which requires the | ||
1642 | * slave interface to be closed. | ||
1643 | */ | ||
1644 | if ((slave_dev->flags & IFF_UP)) { | ||
1645 | printk(KERN_ERR DRV_NAME | ||
1646 | ": Error: %s is up\n", | ||
1647 | slave_dev->name); | ||
1648 | res = -EPERM; | ||
1649 | goto err_undo_flags; | ||
1650 | } | ||
1651 | |||
1652 | if (slave_dev->set_mac_address == NULL) { | ||
1653 | printk(KERN_ERR DRV_NAME | ||
1654 | ": Error: The slave device you specified does " | ||
1655 | "not support setting the MAC address.\n"); | ||
1656 | printk(KERN_ERR | ||
1657 | "Your kernel likely does not support slave " | ||
1658 | "devices.\n"); | ||
1659 | |||
1660 | res = -EOPNOTSUPP; | ||
1661 | goto err_undo_flags; | ||
1662 | } | ||
1663 | } else { | ||
1664 | /* The application is not using an ABI, which requires the | ||
1665 | * slave interface to be open. | ||
1666 | */ | ||
1667 | if (!(slave_dev->flags & IFF_UP)) { | ||
1668 | printk(KERN_ERR DRV_NAME | ||
1669 | ": Error: %s is not running\n", | ||
1670 | slave_dev->name); | ||
1671 | res = -EINVAL; | ||
1672 | goto err_undo_flags; | ||
1673 | } | ||
1674 | |||
1675 | if ((bond->params.mode == BOND_MODE_8023AD) || | ||
1676 | (bond->params.mode == BOND_MODE_TLB) || | ||
1677 | (bond->params.mode == BOND_MODE_ALB)) { | ||
1678 | printk(KERN_ERR DRV_NAME | ||
1679 | ": Error: to use %s mode, you must upgrade " | ||
1680 | "ifenslave.\n", | ||
1681 | bond_mode_name(bond->params.mode)); | ||
1682 | res = -EOPNOTSUPP; | ||
1683 | goto err_undo_flags; | ||
1684 | } | ||
1685 | } | ||
1686 | |||
1687 | new_slave = kmalloc(sizeof(struct slave), GFP_KERNEL); | ||
1688 | if (!new_slave) { | ||
1689 | res = -ENOMEM; | ||
1690 | goto err_undo_flags; | ||
1691 | } | ||
1692 | |||
1693 | memset(new_slave, 0, sizeof(struct slave)); | ||
1694 | |||
1695 | /* save slave's original flags before calling | ||
1696 | * netdev_set_master and dev_open | ||
1697 | */ | ||
1698 | new_slave->original_flags = slave_dev->flags; | ||
1699 | |||
1700 | if (app_abi_ver >= 1) { | ||
1701 | /* save slave's original ("permanent") mac address for | ||
1702 | * modes that needs it, and for restoring it upon release, | ||
1703 | * and then set it to the master's address | ||
1704 | */ | ||
1705 | memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); | ||
1706 | |||
1707 | /* set slave to master's mac address | ||
1708 | * The application already set the master's | ||
1709 | * mac address to that of the first slave | ||
1710 | */ | ||
1711 | memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); | ||
1712 | addr.sa_family = slave_dev->type; | ||
1713 | res = dev_set_mac_address(slave_dev, &addr); | ||
1714 | if (res) { | ||
1715 | dprintk("Error %d calling set_mac_address\n", res); | ||
1716 | goto err_free; | ||
1717 | } | ||
1718 | |||
1719 | /* open the slave since the application closed it */ | ||
1720 | res = dev_open(slave_dev); | ||
1721 | if (res) { | ||
1722 | dprintk("Openning slave %s failed\n", slave_dev->name); | ||
1723 | goto err_restore_mac; | ||
1724 | } | ||
1725 | } | ||
1726 | |||
1727 | res = netdev_set_master(slave_dev, bond_dev); | ||
1728 | if (res) { | ||
1729 | dprintk("Error %d calling netdev_set_master\n", res); | ||
1730 | if (app_abi_ver < 1) { | ||
1731 | goto err_free; | ||
1732 | } else { | ||
1733 | goto err_close; | ||
1734 | } | ||
1735 | } | ||
1736 | |||
1737 | new_slave->dev = slave_dev; | ||
1738 | |||
1739 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
1740 | (bond->params.mode == BOND_MODE_ALB)) { | ||
1741 | /* bond_alb_init_slave() must be called before all other stages since | ||
1742 | * it might fail and we do not want to have to undo everything | ||
1743 | */ | ||
1744 | res = bond_alb_init_slave(bond, new_slave); | ||
1745 | if (res) { | ||
1746 | goto err_unset_master; | ||
1747 | } | ||
1748 | } | ||
1749 | |||
1750 | /* If the mode USES_PRIMARY, then the new slave gets the | ||
1751 | * master's promisc (and mc) settings only if it becomes the | ||
1752 | * curr_active_slave, and that is taken care of later when calling | ||
1753 | * bond_change_active() | ||
1754 | */ | ||
1755 | if (!USES_PRIMARY(bond->params.mode)) { | ||
1756 | /* set promiscuity level to new slave */ | ||
1757 | if (bond_dev->flags & IFF_PROMISC) { | ||
1758 | dev_set_promiscuity(slave_dev, 1); | ||
1759 | } | ||
1760 | |||
1761 | /* set allmulti level to new slave */ | ||
1762 | if (bond_dev->flags & IFF_ALLMULTI) { | ||
1763 | dev_set_allmulti(slave_dev, 1); | ||
1764 | } | ||
1765 | |||
1766 | /* upload master's mc_list to new slave */ | ||
1767 | for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { | ||
1768 | dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); | ||
1769 | } | ||
1770 | } | ||
1771 | |||
1772 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
1773 | /* add lacpdu mc addr to mc list */ | ||
1774 | u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; | ||
1775 | |||
1776 | dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); | ||
1777 | } | ||
1778 | |||
1779 | bond_add_vlans_on_slave(bond, slave_dev); | ||
1780 | |||
1781 | write_lock_bh(&bond->lock); | ||
1782 | |||
1783 | bond_attach_slave(bond, new_slave); | ||
1784 | |||
1785 | new_slave->delay = 0; | ||
1786 | new_slave->link_failure_count = 0; | ||
1787 | |||
1788 | if (bond->params.miimon && !bond->params.use_carrier) { | ||
1789 | link_reporting = bond_check_dev_link(bond, slave_dev, 1); | ||
1790 | |||
1791 | if ((link_reporting == -1) && !bond->params.arp_interval) { | ||
1792 | /* | ||
1793 | * miimon is set but a bonded network driver | ||
1794 | * does not support ETHTOOL/MII and | ||
1795 | * arp_interval is not set. Note: if | ||
1796 | * use_carrier is enabled, we will never go | ||
1797 | * here (because netif_carrier is always | ||
1798 | * supported); thus, we don't need to change | ||
1799 | * the messages for netif_carrier. | ||
1800 | */ | ||
1801 | printk(KERN_WARNING DRV_NAME | ||
1802 | ": Warning: MII and ETHTOOL support not " | ||
1803 | "available for interface %s, and " | ||
1804 | "arp_interval/arp_ip_target module parameters " | ||
1805 | "not specified, thus bonding will not detect " | ||
1806 | "link failures! see bonding.txt for details.\n", | ||
1807 | slave_dev->name); | ||
1808 | } else if (link_reporting == -1) { | ||
1809 | /* unable get link status using mii/ethtool */ | ||
1810 | printk(KERN_WARNING DRV_NAME | ||
1811 | ": Warning: can't get link status from " | ||
1812 | "interface %s; the network driver associated " | ||
1813 | "with this interface does not support MII or " | ||
1814 | "ETHTOOL link status reporting, thus miimon " | ||
1815 | "has no effect on this interface.\n", | ||
1816 | slave_dev->name); | ||
1817 | } | ||
1818 | } | ||
1819 | |||
1820 | /* check for initial state */ | ||
1821 | if (!bond->params.miimon || | ||
1822 | (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { | ||
1823 | if (bond->params.updelay) { | ||
1824 | dprintk("Initial state of slave_dev is " | ||
1825 | "BOND_LINK_BACK\n"); | ||
1826 | new_slave->link = BOND_LINK_BACK; | ||
1827 | new_slave->delay = bond->params.updelay; | ||
1828 | } else { | ||
1829 | dprintk("Initial state of slave_dev is " | ||
1830 | "BOND_LINK_UP\n"); | ||
1831 | new_slave->link = BOND_LINK_UP; | ||
1832 | } | ||
1833 | new_slave->jiffies = jiffies; | ||
1834 | } else { | ||
1835 | dprintk("Initial state of slave_dev is " | ||
1836 | "BOND_LINK_DOWN\n"); | ||
1837 | new_slave->link = BOND_LINK_DOWN; | ||
1838 | } | ||
1839 | |||
1840 | if (bond_update_speed_duplex(new_slave) && | ||
1841 | (new_slave->link != BOND_LINK_DOWN)) { | ||
1842 | printk(KERN_WARNING DRV_NAME | ||
1843 | ": Warning: failed to get speed and duplex from %s, " | ||
1844 | "assumed to be 100Mb/sec and Full.\n", | ||
1845 | new_slave->dev->name); | ||
1846 | |||
1847 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
1848 | printk(KERN_WARNING | ||
1849 | "Operation of 802.3ad mode requires ETHTOOL " | ||
1850 | "support in base driver for proper aggregator " | ||
1851 | "selection.\n"); | ||
1852 | } | ||
1853 | } | ||
1854 | |||
1855 | if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { | ||
1856 | /* if there is a primary slave, remember it */ | ||
1857 | if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { | ||
1858 | bond->primary_slave = new_slave; | ||
1859 | } | ||
1860 | } | ||
1861 | |||
1862 | switch (bond->params.mode) { | ||
1863 | case BOND_MODE_ACTIVEBACKUP: | ||
1864 | /* if we're in active-backup mode, we need one and only one active | ||
1865 | * interface. The backup interfaces will have their NOARP flag set | ||
1866 | * because we need them to be completely deaf and not to respond to | ||
1867 | * any ARP request on the network to avoid fooling a switch. Thus, | ||
1868 | * since we guarantee that curr_active_slave always point to the last | ||
1869 | * usable interface, we just have to verify this interface's flag. | ||
1870 | */ | ||
1871 | if (((!bond->curr_active_slave) || | ||
1872 | (bond->curr_active_slave->dev->flags & IFF_NOARP)) && | ||
1873 | (new_slave->link != BOND_LINK_DOWN)) { | ||
1874 | dprintk("This is the first active slave\n"); | ||
1875 | /* first slave or no active slave yet, and this link | ||
1876 | is OK, so make this interface the active one */ | ||
1877 | bond_change_active_slave(bond, new_slave); | ||
1878 | } else { | ||
1879 | dprintk("This is just a backup slave\n"); | ||
1880 | bond_set_slave_inactive_flags(new_slave); | ||
1881 | } | ||
1882 | break; | ||
1883 | case BOND_MODE_8023AD: | ||
1884 | /* in 802.3ad mode, the internal mechanism | ||
1885 | * will activate the slaves in the selected | ||
1886 | * aggregator | ||
1887 | */ | ||
1888 | bond_set_slave_inactive_flags(new_slave); | ||
1889 | /* if this is the first slave */ | ||
1890 | if (bond->slave_cnt == 1) { | ||
1891 | SLAVE_AD_INFO(new_slave).id = 1; | ||
1892 | /* Initialize AD with the number of times that the AD timer is called in 1 second | ||
1893 | * can be called only after the mac address of the bond is set | ||
1894 | */ | ||
1895 | bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, | ||
1896 | bond->params.lacp_fast); | ||
1897 | } else { | ||
1898 | SLAVE_AD_INFO(new_slave).id = | ||
1899 | SLAVE_AD_INFO(new_slave->prev).id + 1; | ||
1900 | } | ||
1901 | |||
1902 | bond_3ad_bind_slave(new_slave); | ||
1903 | break; | ||
1904 | case BOND_MODE_TLB: | ||
1905 | case BOND_MODE_ALB: | ||
1906 | new_slave->state = BOND_STATE_ACTIVE; | ||
1907 | if ((!bond->curr_active_slave) && | ||
1908 | (new_slave->link != BOND_LINK_DOWN)) { | ||
1909 | /* first slave or no active slave yet, and this link | ||
1910 | * is OK, so make this interface the active one | ||
1911 | */ | ||
1912 | bond_change_active_slave(bond, new_slave); | ||
1913 | } | ||
1914 | break; | ||
1915 | default: | ||
1916 | dprintk("This slave is always active in trunk mode\n"); | ||
1917 | |||
1918 | /* always active in trunk mode */ | ||
1919 | new_slave->state = BOND_STATE_ACTIVE; | ||
1920 | |||
1921 | /* In trunking mode there is little meaning to curr_active_slave | ||
1922 | * anyway (it holds no special properties of the bond device), | ||
1923 | * so we can change it without calling change_active_interface() | ||
1924 | */ | ||
1925 | if (!bond->curr_active_slave) { | ||
1926 | bond->curr_active_slave = new_slave; | ||
1927 | } | ||
1928 | break; | ||
1929 | } /* switch(bond_mode) */ | ||
1930 | |||
1931 | write_unlock_bh(&bond->lock); | ||
1932 | |||
1933 | if (app_abi_ver < 1) { | ||
1934 | /* | ||
1935 | * !!! This is to support old versions of ifenslave. | ||
1936 | * We can remove this in 2.5 because our ifenslave takes | ||
1937 | * care of this for us. | ||
1938 | * We check to see if the master has a mac address yet. | ||
1939 | * If not, we'll give it the mac address of our slave device. | ||
1940 | */ | ||
1941 | int ndx = 0; | ||
1942 | |||
1943 | for (ndx = 0; ndx < bond_dev->addr_len; ndx++) { | ||
1944 | dprintk("Checking ndx=%d of bond_dev->dev_addr\n", | ||
1945 | ndx); | ||
1946 | if (bond_dev->dev_addr[ndx] != 0) { | ||
1947 | dprintk("Found non-zero byte at ndx=%d\n", | ||
1948 | ndx); | ||
1949 | break; | ||
1950 | } | ||
1951 | } | ||
1952 | |||
1953 | if (ndx == bond_dev->addr_len) { | ||
1954 | /* | ||
1955 | * We got all the way through the address and it was | ||
1956 | * all 0's. | ||
1957 | */ | ||
1958 | dprintk("%s doesn't have a MAC address yet. \n", | ||
1959 | bond_dev->name); | ||
1960 | dprintk("Going to give assign it from %s.\n", | ||
1961 | slave_dev->name); | ||
1962 | bond_sethwaddr(bond_dev, slave_dev); | ||
1963 | } | ||
1964 | } | ||
1965 | |||
1966 | printk(KERN_INFO DRV_NAME | ||
1967 | ": %s: enslaving %s as a%s interface with a%s link.\n", | ||
1968 | bond_dev->name, slave_dev->name, | ||
1969 | new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", | ||
1970 | new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); | ||
1971 | |||
1972 | /* enslave is successful */ | ||
1973 | return 0; | ||
1974 | |||
1975 | /* Undo stages on error */ | ||
1976 | err_unset_master: | ||
1977 | netdev_set_master(slave_dev, NULL); | ||
1978 | |||
1979 | err_close: | ||
1980 | dev_close(slave_dev); | ||
1981 | |||
1982 | err_restore_mac: | ||
1983 | memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); | ||
1984 | addr.sa_family = slave_dev->type; | ||
1985 | dev_set_mac_address(slave_dev, &addr); | ||
1986 | |||
1987 | err_free: | ||
1988 | kfree(new_slave); | ||
1989 | |||
1990 | err_undo_flags: | ||
1991 | bond_dev->features = old_features; | ||
1992 | |||
1993 | return res; | ||
1994 | } | ||
1995 | |||
1996 | /* | ||
1997 | * Try to release the slave device <slave> from the bond device <master> | ||
1998 | * It is legal to access curr_active_slave without a lock because all the function | ||
1999 | * is write-locked. | ||
2000 | * | ||
2001 | * The rules for slave state should be: | ||
2002 | * for Active/Backup: | ||
2003 | * Active stays on all backups go down | ||
2004 | * for Bonded connections: | ||
2005 | * The first up interface should be left on and all others downed. | ||
2006 | */ | ||
2007 | static int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) | ||
2008 | { | ||
2009 | struct bonding *bond = bond_dev->priv; | ||
2010 | struct slave *slave, *oldcurrent; | ||
2011 | struct sockaddr addr; | ||
2012 | int mac_addr_differ; | ||
2013 | |||
2014 | /* slave is not a slave or master is not master of this slave */ | ||
2015 | if (!(slave_dev->flags & IFF_SLAVE) || | ||
2016 | (slave_dev->master != bond_dev)) { | ||
2017 | printk(KERN_ERR DRV_NAME | ||
2018 | ": Error: %s: cannot release %s.\n", | ||
2019 | bond_dev->name, slave_dev->name); | ||
2020 | return -EINVAL; | ||
2021 | } | ||
2022 | |||
2023 | write_lock_bh(&bond->lock); | ||
2024 | |||
2025 | slave = bond_get_slave_by_dev(bond, slave_dev); | ||
2026 | if (!slave) { | ||
2027 | /* not a slave of this bond */ | ||
2028 | printk(KERN_INFO DRV_NAME | ||
2029 | ": %s: %s not enslaved\n", | ||
2030 | bond_dev->name, slave_dev->name); | ||
2031 | return -EINVAL; | ||
2032 | } | ||
2033 | |||
2034 | mac_addr_differ = memcmp(bond_dev->dev_addr, | ||
2035 | slave->perm_hwaddr, | ||
2036 | ETH_ALEN); | ||
2037 | if (!mac_addr_differ && (bond->slave_cnt > 1)) { | ||
2038 | printk(KERN_WARNING DRV_NAME | ||
2039 | ": Warning: the permanent HWaddr of %s " | ||
2040 | "- %02X:%02X:%02X:%02X:%02X:%02X - is " | ||
2041 | "still in use by %s. Set the HWaddr of " | ||
2042 | "%s to a different address to avoid " | ||
2043 | "conflicts.\n", | ||
2044 | slave_dev->name, | ||
2045 | slave->perm_hwaddr[0], | ||
2046 | slave->perm_hwaddr[1], | ||
2047 | slave->perm_hwaddr[2], | ||
2048 | slave->perm_hwaddr[3], | ||
2049 | slave->perm_hwaddr[4], | ||
2050 | slave->perm_hwaddr[5], | ||
2051 | bond_dev->name, | ||
2052 | slave_dev->name); | ||
2053 | } | ||
2054 | |||
2055 | /* Inform AD package of unbinding of slave. */ | ||
2056 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
2057 | /* must be called before the slave is | ||
2058 | * detached from the list | ||
2059 | */ | ||
2060 | bond_3ad_unbind_slave(slave); | ||
2061 | } | ||
2062 | |||
2063 | printk(KERN_INFO DRV_NAME | ||
2064 | ": %s: releasing %s interface %s\n", | ||
2065 | bond_dev->name, | ||
2066 | (slave->state == BOND_STATE_ACTIVE) | ||
2067 | ? "active" : "backup", | ||
2068 | slave_dev->name); | ||
2069 | |||
2070 | oldcurrent = bond->curr_active_slave; | ||
2071 | |||
2072 | bond->current_arp_slave = NULL; | ||
2073 | |||
2074 | /* release the slave from its bond */ | ||
2075 | bond_detach_slave(bond, slave); | ||
2076 | |||
2077 | if (bond->primary_slave == slave) { | ||
2078 | bond->primary_slave = NULL; | ||
2079 | } | ||
2080 | |||
2081 | if (oldcurrent == slave) { | ||
2082 | bond_change_active_slave(bond, NULL); | ||
2083 | } | ||
2084 | |||
2085 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
2086 | (bond->params.mode == BOND_MODE_ALB)) { | ||
2087 | /* Must be called only after the slave has been | ||
2088 | * detached from the list and the curr_active_slave | ||
2089 | * has been cleared (if our_slave == old_current), | ||
2090 | * but before a new active slave is selected. | ||
2091 | */ | ||
2092 | bond_alb_deinit_slave(bond, slave); | ||
2093 | } | ||
2094 | |||
2095 | if (oldcurrent == slave) { | ||
2096 | bond_select_active_slave(bond); | ||
2097 | |||
2098 | if (!bond->curr_active_slave) { | ||
2099 | printk(KERN_INFO DRV_NAME | ||
2100 | ": %s: now running without any active " | ||
2101 | "interface !\n", | ||
2102 | bond_dev->name); | ||
2103 | } | ||
2104 | } | ||
2105 | |||
2106 | if (bond->slave_cnt == 0) { | ||
2107 | /* if the last slave was removed, zero the mac address | ||
2108 | * of the master so it will be set by the application | ||
2109 | * to the mac address of the first slave | ||
2110 | */ | ||
2111 | memset(bond_dev->dev_addr, 0, bond_dev->addr_len); | ||
2112 | |||
2113 | if (list_empty(&bond->vlan_list)) { | ||
2114 | bond_dev->features |= NETIF_F_VLAN_CHALLENGED; | ||
2115 | } else { | ||
2116 | printk(KERN_WARNING DRV_NAME | ||
2117 | ": Warning: clearing HW address of %s while it " | ||
2118 | "still has VLANs.\n", | ||
2119 | bond_dev->name); | ||
2120 | printk(KERN_WARNING DRV_NAME | ||
2121 | ": When re-adding slaves, make sure the bond's " | ||
2122 | "HW address matches its VLANs'.\n"); | ||
2123 | } | ||
2124 | } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && | ||
2125 | !bond_has_challenged_slaves(bond)) { | ||
2126 | printk(KERN_INFO DRV_NAME | ||
2127 | ": last VLAN challenged slave %s " | ||
2128 | "left bond %s. VLAN blocking is removed\n", | ||
2129 | slave_dev->name, bond_dev->name); | ||
2130 | bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; | ||
2131 | } | ||
2132 | |||
2133 | write_unlock_bh(&bond->lock); | ||
2134 | |||
2135 | bond_del_vlans_from_slave(bond, slave_dev); | ||
2136 | |||
2137 | /* If the mode USES_PRIMARY, then we should only remove its | ||
2138 | * promisc and mc settings if it was the curr_active_slave, but that was | ||
2139 | * already taken care of above when we detached the slave | ||
2140 | */ | ||
2141 | if (!USES_PRIMARY(bond->params.mode)) { | ||
2142 | /* unset promiscuity level from slave */ | ||
2143 | if (bond_dev->flags & IFF_PROMISC) { | ||
2144 | dev_set_promiscuity(slave_dev, -1); | ||
2145 | } | ||
2146 | |||
2147 | /* unset allmulti level from slave */ | ||
2148 | if (bond_dev->flags & IFF_ALLMULTI) { | ||
2149 | dev_set_allmulti(slave_dev, -1); | ||
2150 | } | ||
2151 | |||
2152 | /* flush master's mc_list from slave */ | ||
2153 | bond_mc_list_flush(bond_dev, slave_dev); | ||
2154 | } | ||
2155 | |||
2156 | netdev_set_master(slave_dev, NULL); | ||
2157 | |||
2158 | /* close slave before restoring its mac address */ | ||
2159 | dev_close(slave_dev); | ||
2160 | |||
2161 | if (app_abi_ver >= 1) { | ||
2162 | /* restore original ("permanent") mac address */ | ||
2163 | memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); | ||
2164 | addr.sa_family = slave_dev->type; | ||
2165 | dev_set_mac_address(slave_dev, &addr); | ||
2166 | } | ||
2167 | |||
2168 | /* restore the original state of the | ||
2169 | * IFF_NOARP flag that might have been | ||
2170 | * set by bond_set_slave_inactive_flags() | ||
2171 | */ | ||
2172 | if ((slave->original_flags & IFF_NOARP) == 0) { | ||
2173 | slave_dev->flags &= ~IFF_NOARP; | ||
2174 | } | ||
2175 | |||
2176 | kfree(slave); | ||
2177 | |||
2178 | return 0; /* deletion OK */ | ||
2179 | } | ||
2180 | |||
2181 | /* | ||
2182 | * This function releases all slaves. | ||
2183 | */ | ||
2184 | static int bond_release_all(struct net_device *bond_dev) | ||
2185 | { | ||
2186 | struct bonding *bond = bond_dev->priv; | ||
2187 | struct slave *slave; | ||
2188 | struct net_device *slave_dev; | ||
2189 | struct sockaddr addr; | ||
2190 | |||
2191 | write_lock_bh(&bond->lock); | ||
2192 | |||
2193 | if (bond->slave_cnt == 0) { | ||
2194 | goto out; | ||
2195 | } | ||
2196 | |||
2197 | bond->current_arp_slave = NULL; | ||
2198 | bond->primary_slave = NULL; | ||
2199 | bond_change_active_slave(bond, NULL); | ||
2200 | |||
2201 | while ((slave = bond->first_slave) != NULL) { | ||
2202 | /* Inform AD package of unbinding of slave | ||
2203 | * before slave is detached from the list. | ||
2204 | */ | ||
2205 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
2206 | bond_3ad_unbind_slave(slave); | ||
2207 | } | ||
2208 | |||
2209 | slave_dev = slave->dev; | ||
2210 | bond_detach_slave(bond, slave); | ||
2211 | |||
2212 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
2213 | (bond->params.mode == BOND_MODE_ALB)) { | ||
2214 | /* must be called only after the slave | ||
2215 | * has been detached from the list | ||
2216 | */ | ||
2217 | bond_alb_deinit_slave(bond, slave); | ||
2218 | } | ||
2219 | |||
2220 | /* now that the slave is detached, unlock and perform | ||
2221 | * all the undo steps that should not be called from | ||
2222 | * within a lock. | ||
2223 | */ | ||
2224 | write_unlock_bh(&bond->lock); | ||
2225 | |||
2226 | bond_del_vlans_from_slave(bond, slave_dev); | ||
2227 | |||
2228 | /* If the mode USES_PRIMARY, then we should only remove its | ||
2229 | * promisc and mc settings if it was the curr_active_slave, but that was | ||
2230 | * already taken care of above when we detached the slave | ||
2231 | */ | ||
2232 | if (!USES_PRIMARY(bond->params.mode)) { | ||
2233 | /* unset promiscuity level from slave */ | ||
2234 | if (bond_dev->flags & IFF_PROMISC) { | ||
2235 | dev_set_promiscuity(slave_dev, -1); | ||
2236 | } | ||
2237 | |||
2238 | /* unset allmulti level from slave */ | ||
2239 | if (bond_dev->flags & IFF_ALLMULTI) { | ||
2240 | dev_set_allmulti(slave_dev, -1); | ||
2241 | } | ||
2242 | |||
2243 | /* flush master's mc_list from slave */ | ||
2244 | bond_mc_list_flush(bond_dev, slave_dev); | ||
2245 | } | ||
2246 | |||
2247 | netdev_set_master(slave_dev, NULL); | ||
2248 | |||
2249 | /* close slave before restoring its mac address */ | ||
2250 | dev_close(slave_dev); | ||
2251 | |||
2252 | if (app_abi_ver >= 1) { | ||
2253 | /* restore original ("permanent") mac address*/ | ||
2254 | memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); | ||
2255 | addr.sa_family = slave_dev->type; | ||
2256 | dev_set_mac_address(slave_dev, &addr); | ||
2257 | } | ||
2258 | |||
2259 | /* restore the original state of the IFF_NOARP flag that might have | ||
2260 | * been set by bond_set_slave_inactive_flags() | ||
2261 | */ | ||
2262 | if ((slave->original_flags & IFF_NOARP) == 0) { | ||
2263 | slave_dev->flags &= ~IFF_NOARP; | ||
2264 | } | ||
2265 | |||
2266 | kfree(slave); | ||
2267 | |||
2268 | /* re-acquire the lock before getting the next slave */ | ||
2269 | write_lock_bh(&bond->lock); | ||
2270 | } | ||
2271 | |||
2272 | /* zero the mac address of the master so it will be | ||
2273 | * set by the application to the mac address of the | ||
2274 | * first slave | ||
2275 | */ | ||
2276 | memset(bond_dev->dev_addr, 0, bond_dev->addr_len); | ||
2277 | |||
2278 | if (list_empty(&bond->vlan_list)) { | ||
2279 | bond_dev->features |= NETIF_F_VLAN_CHALLENGED; | ||
2280 | } else { | ||
2281 | printk(KERN_WARNING DRV_NAME | ||
2282 | ": Warning: clearing HW address of %s while it " | ||
2283 | "still has VLANs.\n", | ||
2284 | bond_dev->name); | ||
2285 | printk(KERN_WARNING DRV_NAME | ||
2286 | ": When re-adding slaves, make sure the bond's " | ||
2287 | "HW address matches its VLANs'.\n"); | ||
2288 | } | ||
2289 | |||
2290 | printk(KERN_INFO DRV_NAME | ||
2291 | ": %s: released all slaves\n", | ||
2292 | bond_dev->name); | ||
2293 | |||
2294 | out: | ||
2295 | write_unlock_bh(&bond->lock); | ||
2296 | |||
2297 | return 0; | ||
2298 | } | ||
2299 | |||
2300 | /* | ||
2301 | * This function changes the active slave to slave <slave_dev>. | ||
2302 | * It returns -EINVAL in the following cases. | ||
2303 | * - <slave_dev> is not found in the list. | ||
2304 | * - There is not active slave now. | ||
2305 | * - <slave_dev> is already active. | ||
2306 | * - The link state of <slave_dev> is not BOND_LINK_UP. | ||
2307 | * - <slave_dev> is not running. | ||
2308 | * In these cases, this fuction does nothing. | ||
2309 | * In the other cases, currnt_slave pointer is changed and 0 is returned. | ||
2310 | */ | ||
2311 | static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) | ||
2312 | { | ||
2313 | struct bonding *bond = bond_dev->priv; | ||
2314 | struct slave *old_active = NULL; | ||
2315 | struct slave *new_active = NULL; | ||
2316 | int res = 0; | ||
2317 | |||
2318 | if (!USES_PRIMARY(bond->params.mode)) { | ||
2319 | return -EINVAL; | ||
2320 | } | ||
2321 | |||
2322 | /* Verify that master_dev is indeed the master of slave_dev */ | ||
2323 | if (!(slave_dev->flags & IFF_SLAVE) || | ||
2324 | (slave_dev->master != bond_dev)) { | ||
2325 | return -EINVAL; | ||
2326 | } | ||
2327 | |||
2328 | write_lock_bh(&bond->lock); | ||
2329 | |||
2330 | old_active = bond->curr_active_slave; | ||
2331 | new_active = bond_get_slave_by_dev(bond, slave_dev); | ||
2332 | |||
2333 | /* | ||
2334 | * Changing to the current active: do nothing; return success. | ||
2335 | */ | ||
2336 | if (new_active && (new_active == old_active)) { | ||
2337 | write_unlock_bh(&bond->lock); | ||
2338 | return 0; | ||
2339 | } | ||
2340 | |||
2341 | if ((new_active) && | ||
2342 | (old_active) && | ||
2343 | (new_active->link == BOND_LINK_UP) && | ||
2344 | IS_UP(new_active->dev)) { | ||
2345 | bond_change_active_slave(bond, new_active); | ||
2346 | } else { | ||
2347 | res = -EINVAL; | ||
2348 | } | ||
2349 | |||
2350 | write_unlock_bh(&bond->lock); | ||
2351 | |||
2352 | return res; | ||
2353 | } | ||
2354 | |||
2355 | static int bond_ethtool_ioctl(struct net_device *bond_dev, struct ifreq *ifr) | ||
2356 | { | ||
2357 | struct ethtool_drvinfo info; | ||
2358 | void __user *addr = ifr->ifr_data; | ||
2359 | uint32_t cmd; | ||
2360 | |||
2361 | if (get_user(cmd, (uint32_t __user *)addr)) { | ||
2362 | return -EFAULT; | ||
2363 | } | ||
2364 | |||
2365 | switch (cmd) { | ||
2366 | case ETHTOOL_GDRVINFO: | ||
2367 | if (copy_from_user(&info, addr, sizeof(info))) { | ||
2368 | return -EFAULT; | ||
2369 | } | ||
2370 | |||
2371 | if (strcmp(info.driver, "ifenslave") == 0) { | ||
2372 | int new_abi_ver; | ||
2373 | char *endptr; | ||
2374 | |||
2375 | new_abi_ver = simple_strtoul(info.fw_version, | ||
2376 | &endptr, 0); | ||
2377 | if (*endptr) { | ||
2378 | printk(KERN_ERR DRV_NAME | ||
2379 | ": Error: got invalid ABI " | ||
2380 | "version from application\n"); | ||
2381 | |||
2382 | return -EINVAL; | ||
2383 | } | ||
2384 | |||
2385 | if (orig_app_abi_ver == -1) { | ||
2386 | orig_app_abi_ver = new_abi_ver; | ||
2387 | } | ||
2388 | |||
2389 | app_abi_ver = new_abi_ver; | ||
2390 | } | ||
2391 | |||
2392 | strncpy(info.driver, DRV_NAME, 32); | ||
2393 | strncpy(info.version, DRV_VERSION, 32); | ||
2394 | snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION); | ||
2395 | |||
2396 | if (copy_to_user(addr, &info, sizeof(info))) { | ||
2397 | return -EFAULT; | ||
2398 | } | ||
2399 | |||
2400 | return 0; | ||
2401 | default: | ||
2402 | return -EOPNOTSUPP; | ||
2403 | } | ||
2404 | } | ||
2405 | |||
2406 | static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) | ||
2407 | { | ||
2408 | struct bonding *bond = bond_dev->priv; | ||
2409 | |||
2410 | info->bond_mode = bond->params.mode; | ||
2411 | info->miimon = bond->params.miimon; | ||
2412 | |||
2413 | read_lock_bh(&bond->lock); | ||
2414 | info->num_slaves = bond->slave_cnt; | ||
2415 | read_unlock_bh(&bond->lock); | ||
2416 | |||
2417 | return 0; | ||
2418 | } | ||
2419 | |||
2420 | static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) | ||
2421 | { | ||
2422 | struct bonding *bond = bond_dev->priv; | ||
2423 | struct slave *slave; | ||
2424 | int i, found = 0; | ||
2425 | |||
2426 | if (info->slave_id < 0) { | ||
2427 | return -ENODEV; | ||
2428 | } | ||
2429 | |||
2430 | read_lock_bh(&bond->lock); | ||
2431 | |||
2432 | bond_for_each_slave(bond, slave, i) { | ||
2433 | if (i == (int)info->slave_id) { | ||
2434 | found = 1; | ||
2435 | break; | ||
2436 | } | ||
2437 | } | ||
2438 | |||
2439 | read_unlock_bh(&bond->lock); | ||
2440 | |||
2441 | if (found) { | ||
2442 | strcpy(info->slave_name, slave->dev->name); | ||
2443 | info->link = slave->link; | ||
2444 | info->state = slave->state; | ||
2445 | info->link_failure_count = slave->link_failure_count; | ||
2446 | } else { | ||
2447 | return -ENODEV; | ||
2448 | } | ||
2449 | |||
2450 | return 0; | ||
2451 | } | ||
2452 | |||
2453 | /*-------------------------------- Monitoring -------------------------------*/ | ||
2454 | |||
2455 | /* this function is called regularly to monitor each slave's link. */ | ||
2456 | static void bond_mii_monitor(struct net_device *bond_dev) | ||
2457 | { | ||
2458 | struct bonding *bond = bond_dev->priv; | ||
2459 | struct slave *slave, *oldcurrent; | ||
2460 | int do_failover = 0; | ||
2461 | int delta_in_ticks; | ||
2462 | int i; | ||
2463 | |||
2464 | read_lock(&bond->lock); | ||
2465 | |||
2466 | delta_in_ticks = (bond->params.miimon * HZ) / 1000; | ||
2467 | |||
2468 | if (bond->kill_timers) { | ||
2469 | goto out; | ||
2470 | } | ||
2471 | |||
2472 | if (bond->slave_cnt == 0) { | ||
2473 | goto re_arm; | ||
2474 | } | ||
2475 | |||
2476 | /* we will try to read the link status of each of our slaves, and | ||
2477 | * set their IFF_RUNNING flag appropriately. For each slave not | ||
2478 | * supporting MII status, we won't do anything so that a user-space | ||
2479 | * program could monitor the link itself if needed. | ||
2480 | */ | ||
2481 | |||
2482 | read_lock(&bond->curr_slave_lock); | ||
2483 | oldcurrent = bond->curr_active_slave; | ||
2484 | read_unlock(&bond->curr_slave_lock); | ||
2485 | |||
2486 | bond_for_each_slave(bond, slave, i) { | ||
2487 | struct net_device *slave_dev = slave->dev; | ||
2488 | int link_state; | ||
2489 | u16 old_speed = slave->speed; | ||
2490 | u8 old_duplex = slave->duplex; | ||
2491 | |||
2492 | link_state = bond_check_dev_link(bond, slave_dev, 0); | ||
2493 | |||
2494 | switch (slave->link) { | ||
2495 | case BOND_LINK_UP: /* the link was up */ | ||
2496 | if (link_state == BMSR_LSTATUS) { | ||
2497 | /* link stays up, nothing more to do */ | ||
2498 | break; | ||
2499 | } else { /* link going down */ | ||
2500 | slave->link = BOND_LINK_FAIL; | ||
2501 | slave->delay = bond->params.downdelay; | ||
2502 | |||
2503 | if (slave->link_failure_count < UINT_MAX) { | ||
2504 | slave->link_failure_count++; | ||
2505 | } | ||
2506 | |||
2507 | if (bond->params.downdelay) { | ||
2508 | printk(KERN_INFO DRV_NAME | ||
2509 | ": %s: link status down for %s " | ||
2510 | "interface %s, disabling it in " | ||
2511 | "%d ms.\n", | ||
2512 | bond_dev->name, | ||
2513 | IS_UP(slave_dev) | ||
2514 | ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) | ||
2515 | ? ((slave == oldcurrent) | ||
2516 | ? "active " : "backup ") | ||
2517 | : "") | ||
2518 | : "idle ", | ||
2519 | slave_dev->name, | ||
2520 | bond->params.downdelay * bond->params.miimon); | ||
2521 | } | ||
2522 | } | ||
2523 | /* no break ! fall through the BOND_LINK_FAIL test to | ||
2524 | ensure proper action to be taken | ||
2525 | */ | ||
2526 | case BOND_LINK_FAIL: /* the link has just gone down */ | ||
2527 | if (link_state != BMSR_LSTATUS) { | ||
2528 | /* link stays down */ | ||
2529 | if (slave->delay <= 0) { | ||
2530 | /* link down for too long time */ | ||
2531 | slave->link = BOND_LINK_DOWN; | ||
2532 | |||
2533 | /* in active/backup mode, we must | ||
2534 | * completely disable this interface | ||
2535 | */ | ||
2536 | if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) || | ||
2537 | (bond->params.mode == BOND_MODE_8023AD)) { | ||
2538 | bond_set_slave_inactive_flags(slave); | ||
2539 | } | ||
2540 | |||
2541 | printk(KERN_INFO DRV_NAME | ||
2542 | ": %s: link status definitely " | ||
2543 | "down for interface %s, " | ||
2544 | "disabling it\n", | ||
2545 | bond_dev->name, | ||
2546 | slave_dev->name); | ||
2547 | |||
2548 | /* notify ad that the link status has changed */ | ||
2549 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
2550 | bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); | ||
2551 | } | ||
2552 | |||
2553 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
2554 | (bond->params.mode == BOND_MODE_ALB)) { | ||
2555 | bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); | ||
2556 | } | ||
2557 | |||
2558 | if (slave == oldcurrent) { | ||
2559 | do_failover = 1; | ||
2560 | } | ||
2561 | } else { | ||
2562 | slave->delay--; | ||
2563 | } | ||
2564 | } else { | ||
2565 | /* link up again */ | ||
2566 | slave->link = BOND_LINK_UP; | ||
2567 | slave->jiffies = jiffies; | ||
2568 | printk(KERN_INFO DRV_NAME | ||
2569 | ": %s: link status up again after %d " | ||
2570 | "ms for interface %s.\n", | ||
2571 | bond_dev->name, | ||
2572 | (bond->params.downdelay - slave->delay) * bond->params.miimon, | ||
2573 | slave_dev->name); | ||
2574 | } | ||
2575 | break; | ||
2576 | case BOND_LINK_DOWN: /* the link was down */ | ||
2577 | if (link_state != BMSR_LSTATUS) { | ||
2578 | /* the link stays down, nothing more to do */ | ||
2579 | break; | ||
2580 | } else { /* link going up */ | ||
2581 | slave->link = BOND_LINK_BACK; | ||
2582 | slave->delay = bond->params.updelay; | ||
2583 | |||
2584 | if (bond->params.updelay) { | ||
2585 | /* if updelay == 0, no need to | ||
2586 | advertise about a 0 ms delay */ | ||
2587 | printk(KERN_INFO DRV_NAME | ||
2588 | ": %s: link status up for " | ||
2589 | "interface %s, enabling it " | ||
2590 | "in %d ms.\n", | ||
2591 | bond_dev->name, | ||
2592 | slave_dev->name, | ||
2593 | bond->params.updelay * bond->params.miimon); | ||
2594 | } | ||
2595 | } | ||
2596 | /* no break ! fall through the BOND_LINK_BACK state in | ||
2597 | case there's something to do. | ||
2598 | */ | ||
2599 | case BOND_LINK_BACK: /* the link has just come back */ | ||
2600 | if (link_state != BMSR_LSTATUS) { | ||
2601 | /* link down again */ | ||
2602 | slave->link = BOND_LINK_DOWN; | ||
2603 | |||
2604 | printk(KERN_INFO DRV_NAME | ||
2605 | ": %s: link status down again after %d " | ||
2606 | "ms for interface %s.\n", | ||
2607 | bond_dev->name, | ||
2608 | (bond->params.updelay - slave->delay) * bond->params.miimon, | ||
2609 | slave_dev->name); | ||
2610 | } else { | ||
2611 | /* link stays up */ | ||
2612 | if (slave->delay == 0) { | ||
2613 | /* now the link has been up for long time enough */ | ||
2614 | slave->link = BOND_LINK_UP; | ||
2615 | slave->jiffies = jiffies; | ||
2616 | |||
2617 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
2618 | /* prevent it from being the active one */ | ||
2619 | slave->state = BOND_STATE_BACKUP; | ||
2620 | } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { | ||
2621 | /* make it immediately active */ | ||
2622 | slave->state = BOND_STATE_ACTIVE; | ||
2623 | } else if (slave != bond->primary_slave) { | ||
2624 | /* prevent it from being the active one */ | ||
2625 | slave->state = BOND_STATE_BACKUP; | ||
2626 | } | ||
2627 | |||
2628 | printk(KERN_INFO DRV_NAME | ||
2629 | ": %s: link status definitely " | ||
2630 | "up for interface %s.\n", | ||
2631 | bond_dev->name, | ||
2632 | slave_dev->name); | ||
2633 | |||
2634 | /* notify ad that the link status has changed */ | ||
2635 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
2636 | bond_3ad_handle_link_change(slave, BOND_LINK_UP); | ||
2637 | } | ||
2638 | |||
2639 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
2640 | (bond->params.mode == BOND_MODE_ALB)) { | ||
2641 | bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); | ||
2642 | } | ||
2643 | |||
2644 | if ((!oldcurrent) || | ||
2645 | (slave == bond->primary_slave)) { | ||
2646 | do_failover = 1; | ||
2647 | } | ||
2648 | } else { | ||
2649 | slave->delay--; | ||
2650 | } | ||
2651 | } | ||
2652 | break; | ||
2653 | default: | ||
2654 | /* Should not happen */ | ||
2655 | printk(KERN_ERR "bonding: Error: %s Illegal value (link=%d)\n", | ||
2656 | slave->dev->name, slave->link); | ||
2657 | goto out; | ||
2658 | } /* end of switch (slave->link) */ | ||
2659 | |||
2660 | bond_update_speed_duplex(slave); | ||
2661 | |||
2662 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
2663 | if (old_speed != slave->speed) { | ||
2664 | bond_3ad_adapter_speed_changed(slave); | ||
2665 | } | ||
2666 | |||
2667 | if (old_duplex != slave->duplex) { | ||
2668 | bond_3ad_adapter_duplex_changed(slave); | ||
2669 | } | ||
2670 | } | ||
2671 | |||
2672 | } /* end of for */ | ||
2673 | |||
2674 | if (do_failover) { | ||
2675 | write_lock(&bond->curr_slave_lock); | ||
2676 | |||
2677 | bond_select_active_slave(bond); | ||
2678 | |||
2679 | if (oldcurrent && !bond->curr_active_slave) { | ||
2680 | printk(KERN_INFO DRV_NAME | ||
2681 | ": %s: now running without any active " | ||
2682 | "interface !\n", | ||
2683 | bond_dev->name); | ||
2684 | } | ||
2685 | |||
2686 | write_unlock(&bond->curr_slave_lock); | ||
2687 | } | ||
2688 | |||
2689 | re_arm: | ||
2690 | if (bond->params.miimon) { | ||
2691 | mod_timer(&bond->mii_timer, jiffies + delta_in_ticks); | ||
2692 | } | ||
2693 | out: | ||
2694 | read_unlock(&bond->lock); | ||
2695 | } | ||
2696 | |||
2697 | static void bond_arp_send_all(struct bonding *bond, struct slave *slave) | ||
2698 | { | ||
2699 | int i; | ||
2700 | u32 *targets = bond->params.arp_targets; | ||
2701 | |||
2702 | for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { | ||
2703 | arp_send(ARPOP_REQUEST, ETH_P_ARP, targets[i], slave->dev, | ||
2704 | my_ip, NULL, slave->dev->dev_addr, | ||
2705 | NULL); | ||
2706 | } | ||
2707 | } | ||
2708 | |||
2709 | /* | ||
2710 | * this function is called regularly to monitor each slave's link | ||
2711 | * ensuring that traffic is being sent and received when arp monitoring | ||
2712 | * is used in load-balancing mode. if the adapter has been dormant, then an | ||
2713 | * arp is transmitted to generate traffic. see activebackup_arp_monitor for | ||
2714 | * arp monitoring in active backup mode. | ||
2715 | */ | ||
2716 | static void bond_loadbalance_arp_mon(struct net_device *bond_dev) | ||
2717 | { | ||
2718 | struct bonding *bond = bond_dev->priv; | ||
2719 | struct slave *slave, *oldcurrent; | ||
2720 | int do_failover = 0; | ||
2721 | int delta_in_ticks; | ||
2722 | int i; | ||
2723 | |||
2724 | read_lock(&bond->lock); | ||
2725 | |||
2726 | delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; | ||
2727 | |||
2728 | if (bond->kill_timers) { | ||
2729 | goto out; | ||
2730 | } | ||
2731 | |||
2732 | if (bond->slave_cnt == 0) { | ||
2733 | goto re_arm; | ||
2734 | } | ||
2735 | |||
2736 | read_lock(&bond->curr_slave_lock); | ||
2737 | oldcurrent = bond->curr_active_slave; | ||
2738 | read_unlock(&bond->curr_slave_lock); | ||
2739 | |||
2740 | /* see if any of the previous devices are up now (i.e. they have | ||
2741 | * xmt and rcv traffic). the curr_active_slave does not come into | ||
2742 | * the picture unless it is null. also, slave->jiffies is not needed | ||
2743 | * here because we send an arp on each slave and give a slave as | ||
2744 | * long as it needs to get the tx/rx within the delta. | ||
2745 | * TODO: what about up/down delay in arp mode? it wasn't here before | ||
2746 | * so it can wait | ||
2747 | */ | ||
2748 | bond_for_each_slave(bond, slave, i) { | ||
2749 | if (slave->link != BOND_LINK_UP) { | ||
2750 | if (((jiffies - slave->dev->trans_start) <= delta_in_ticks) && | ||
2751 | ((jiffies - slave->dev->last_rx) <= delta_in_ticks)) { | ||
2752 | |||
2753 | slave->link = BOND_LINK_UP; | ||
2754 | slave->state = BOND_STATE_ACTIVE; | ||
2755 | |||
2756 | /* primary_slave has no meaning in round-robin | ||
2757 | * mode. the window of a slave being up and | ||
2758 | * curr_active_slave being null after enslaving | ||
2759 | * is closed. | ||
2760 | */ | ||
2761 | if (!oldcurrent) { | ||
2762 | printk(KERN_INFO DRV_NAME | ||
2763 | ": %s: link status definitely " | ||
2764 | "up for interface %s, ", | ||
2765 | bond_dev->name, | ||
2766 | slave->dev->name); | ||
2767 | do_failover = 1; | ||
2768 | } else { | ||
2769 | printk(KERN_INFO DRV_NAME | ||
2770 | ": %s: interface %s is now up\n", | ||
2771 | bond_dev->name, | ||
2772 | slave->dev->name); | ||
2773 | } | ||
2774 | } | ||
2775 | } else { | ||
2776 | /* slave->link == BOND_LINK_UP */ | ||
2777 | |||
2778 | /* not all switches will respond to an arp request | ||
2779 | * when the source ip is 0, so don't take the link down | ||
2780 | * if we don't know our ip yet | ||
2781 | */ | ||
2782 | if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || | ||
2783 | (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && | ||
2784 | my_ip)) { | ||
2785 | |||
2786 | slave->link = BOND_LINK_DOWN; | ||
2787 | slave->state = BOND_STATE_BACKUP; | ||
2788 | |||
2789 | if (slave->link_failure_count < UINT_MAX) { | ||
2790 | slave->link_failure_count++; | ||
2791 | } | ||
2792 | |||
2793 | printk(KERN_INFO DRV_NAME | ||
2794 | ": %s: interface %s is now down.\n", | ||
2795 | bond_dev->name, | ||
2796 | slave->dev->name); | ||
2797 | |||
2798 | if (slave == oldcurrent) { | ||
2799 | do_failover = 1; | ||
2800 | } | ||
2801 | } | ||
2802 | } | ||
2803 | |||
2804 | /* note: if switch is in round-robin mode, all links | ||
2805 | * must tx arp to ensure all links rx an arp - otherwise | ||
2806 | * links may oscillate or not come up at all; if switch is | ||
2807 | * in something like xor mode, there is nothing we can | ||
2808 | * do - all replies will be rx'ed on same link causing slaves | ||
2809 | * to be unstable during low/no traffic periods | ||
2810 | */ | ||
2811 | if (IS_UP(slave->dev)) { | ||
2812 | bond_arp_send_all(bond, slave); | ||
2813 | } | ||
2814 | } | ||
2815 | |||
2816 | if (do_failover) { | ||
2817 | write_lock(&bond->curr_slave_lock); | ||
2818 | |||
2819 | bond_select_active_slave(bond); | ||
2820 | |||
2821 | if (oldcurrent && !bond->curr_active_slave) { | ||
2822 | printk(KERN_INFO DRV_NAME | ||
2823 | ": %s: now running without any active " | ||
2824 | "interface !\n", | ||
2825 | bond_dev->name); | ||
2826 | } | ||
2827 | |||
2828 | write_unlock(&bond->curr_slave_lock); | ||
2829 | } | ||
2830 | |||
2831 | re_arm: | ||
2832 | if (bond->params.arp_interval) { | ||
2833 | mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); | ||
2834 | } | ||
2835 | out: | ||
2836 | read_unlock(&bond->lock); | ||
2837 | } | ||
2838 | |||
2839 | /* | ||
2840 | * When using arp monitoring in active-backup mode, this function is | ||
2841 | * called to determine if any backup slaves have went down or a new | ||
2842 | * current slave needs to be found. | ||
2843 | * The backup slaves never generate traffic, they are considered up by merely | ||
2844 | * receiving traffic. If the current slave goes down, each backup slave will | ||
2845 | * be given the opportunity to tx/rx an arp before being taken down - this | ||
2846 | * prevents all slaves from being taken down due to the current slave not | ||
2847 | * sending any traffic for the backups to receive. The arps are not necessarily | ||
2848 | * necessary, any tx and rx traffic will keep the current slave up. While any | ||
2849 | * rx traffic will keep the backup slaves up, the current slave is responsible | ||
2850 | * for generating traffic to keep them up regardless of any other traffic they | ||
2851 | * may have received. | ||
2852 | * see loadbalance_arp_monitor for arp monitoring in load balancing mode | ||
2853 | */ | ||
2854 | static void bond_activebackup_arp_mon(struct net_device *bond_dev) | ||
2855 | { | ||
2856 | struct bonding *bond = bond_dev->priv; | ||
2857 | struct slave *slave; | ||
2858 | int delta_in_ticks; | ||
2859 | int i; | ||
2860 | |||
2861 | read_lock(&bond->lock); | ||
2862 | |||
2863 | delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; | ||
2864 | |||
2865 | if (bond->kill_timers) { | ||
2866 | goto out; | ||
2867 | } | ||
2868 | |||
2869 | if (bond->slave_cnt == 0) { | ||
2870 | goto re_arm; | ||
2871 | } | ||
2872 | |||
2873 | /* determine if any slave has come up or any backup slave has | ||
2874 | * gone down | ||
2875 | * TODO: what about up/down delay in arp mode? it wasn't here before | ||
2876 | * so it can wait | ||
2877 | */ | ||
2878 | bond_for_each_slave(bond, slave, i) { | ||
2879 | if (slave->link != BOND_LINK_UP) { | ||
2880 | if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) { | ||
2881 | |||
2882 | slave->link = BOND_LINK_UP; | ||
2883 | |||
2884 | write_lock(&bond->curr_slave_lock); | ||
2885 | |||
2886 | if ((!bond->curr_active_slave) && | ||
2887 | ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) { | ||
2888 | bond_change_active_slave(bond, slave); | ||
2889 | bond->current_arp_slave = NULL; | ||
2890 | } else if (bond->curr_active_slave != slave) { | ||
2891 | /* this slave has just come up but we | ||
2892 | * already have a current slave; this | ||
2893 | * can also happen if bond_enslave adds | ||
2894 | * a new slave that is up while we are | ||
2895 | * searching for a new slave | ||
2896 | */ | ||
2897 | bond_set_slave_inactive_flags(slave); | ||
2898 | bond->current_arp_slave = NULL; | ||
2899 | } | ||
2900 | |||
2901 | if (slave == bond->curr_active_slave) { | ||
2902 | printk(KERN_INFO DRV_NAME | ||
2903 | ": %s: %s is up and now the " | ||
2904 | "active interface\n", | ||
2905 | bond_dev->name, | ||
2906 | slave->dev->name); | ||
2907 | } else { | ||
2908 | printk(KERN_INFO DRV_NAME | ||
2909 | ": %s: backup interface %s is " | ||
2910 | "now up\n", | ||
2911 | bond_dev->name, | ||
2912 | slave->dev->name); | ||
2913 | } | ||
2914 | |||
2915 | write_unlock(&bond->curr_slave_lock); | ||
2916 | } | ||
2917 | } else { | ||
2918 | read_lock(&bond->curr_slave_lock); | ||
2919 | |||
2920 | if ((slave != bond->curr_active_slave) && | ||
2921 | (!bond->current_arp_slave) && | ||
2922 | (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) && | ||
2923 | my_ip)) { | ||
2924 | /* a backup slave has gone down; three times | ||
2925 | * the delta allows the current slave to be | ||
2926 | * taken out before the backup slave. | ||
2927 | * note: a non-null current_arp_slave indicates | ||
2928 | * the curr_active_slave went down and we are | ||
2929 | * searching for a new one; under this | ||
2930 | * condition we only take the curr_active_slave | ||
2931 | * down - this gives each slave a chance to | ||
2932 | * tx/rx traffic before being taken out | ||
2933 | */ | ||
2934 | |||
2935 | read_unlock(&bond->curr_slave_lock); | ||
2936 | |||
2937 | slave->link = BOND_LINK_DOWN; | ||
2938 | |||
2939 | if (slave->link_failure_count < UINT_MAX) { | ||
2940 | slave->link_failure_count++; | ||
2941 | } | ||
2942 | |||
2943 | bond_set_slave_inactive_flags(slave); | ||
2944 | |||
2945 | printk(KERN_INFO DRV_NAME | ||
2946 | ": %s: backup interface %s is now down\n", | ||
2947 | bond_dev->name, | ||
2948 | slave->dev->name); | ||
2949 | } else { | ||
2950 | read_unlock(&bond->curr_slave_lock); | ||
2951 | } | ||
2952 | } | ||
2953 | } | ||
2954 | |||
2955 | read_lock(&bond->curr_slave_lock); | ||
2956 | slave = bond->curr_active_slave; | ||
2957 | read_unlock(&bond->curr_slave_lock); | ||
2958 | |||
2959 | if (slave) { | ||
2960 | /* if we have sent traffic in the past 2*arp_intervals but | ||
2961 | * haven't xmit and rx traffic in that time interval, select | ||
2962 | * a different slave. slave->jiffies is only updated when | ||
2963 | * a slave first becomes the curr_active_slave - not necessarily | ||
2964 | * after every arp; this ensures the slave has a full 2*delta | ||
2965 | * before being taken out. if a primary is being used, check | ||
2966 | * if it is up and needs to take over as the curr_active_slave | ||
2967 | */ | ||
2968 | if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || | ||
2969 | (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && | ||
2970 | my_ip)) && | ||
2971 | ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { | ||
2972 | |||
2973 | slave->link = BOND_LINK_DOWN; | ||
2974 | |||
2975 | if (slave->link_failure_count < UINT_MAX) { | ||
2976 | slave->link_failure_count++; | ||
2977 | } | ||
2978 | |||
2979 | printk(KERN_INFO DRV_NAME | ||
2980 | ": %s: link status down for active interface " | ||
2981 | "%s, disabling it\n", | ||
2982 | bond_dev->name, | ||
2983 | slave->dev->name); | ||
2984 | |||
2985 | write_lock(&bond->curr_slave_lock); | ||
2986 | |||
2987 | bond_select_active_slave(bond); | ||
2988 | slave = bond->curr_active_slave; | ||
2989 | |||
2990 | write_unlock(&bond->curr_slave_lock); | ||
2991 | |||
2992 | bond->current_arp_slave = slave; | ||
2993 | |||
2994 | if (slave) { | ||
2995 | slave->jiffies = jiffies; | ||
2996 | } | ||
2997 | } else if ((bond->primary_slave) && | ||
2998 | (bond->primary_slave != slave) && | ||
2999 | (bond->primary_slave->link == BOND_LINK_UP)) { | ||
3000 | /* at this point, slave is the curr_active_slave */ | ||
3001 | printk(KERN_INFO DRV_NAME | ||
3002 | ": %s: changing from interface %s to primary " | ||
3003 | "interface %s\n", | ||
3004 | bond_dev->name, | ||
3005 | slave->dev->name, | ||
3006 | bond->primary_slave->dev->name); | ||
3007 | |||
3008 | /* primary is up so switch to it */ | ||
3009 | write_lock(&bond->curr_slave_lock); | ||
3010 | bond_change_active_slave(bond, bond->primary_slave); | ||
3011 | write_unlock(&bond->curr_slave_lock); | ||
3012 | |||
3013 | slave = bond->primary_slave; | ||
3014 | slave->jiffies = jiffies; | ||
3015 | } else { | ||
3016 | bond->current_arp_slave = NULL; | ||
3017 | } | ||
3018 | |||
3019 | /* the current slave must tx an arp to ensure backup slaves | ||
3020 | * rx traffic | ||
3021 | */ | ||
3022 | if (slave && my_ip) { | ||
3023 | bond_arp_send_all(bond, slave); | ||
3024 | } | ||
3025 | } | ||
3026 | |||
3027 | /* if we don't have a curr_active_slave, search for the next available | ||
3028 | * backup slave from the current_arp_slave and make it the candidate | ||
3029 | * for becoming the curr_active_slave | ||
3030 | */ | ||
3031 | if (!slave) { | ||
3032 | if (!bond->current_arp_slave) { | ||
3033 | bond->current_arp_slave = bond->first_slave; | ||
3034 | } | ||
3035 | |||
3036 | if (bond->current_arp_slave) { | ||
3037 | bond_set_slave_inactive_flags(bond->current_arp_slave); | ||
3038 | |||
3039 | /* search for next candidate */ | ||
3040 | bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave) { | ||
3041 | if (IS_UP(slave->dev)) { | ||
3042 | slave->link = BOND_LINK_BACK; | ||
3043 | bond_set_slave_active_flags(slave); | ||
3044 | bond_arp_send_all(bond, slave); | ||
3045 | slave->jiffies = jiffies; | ||
3046 | bond->current_arp_slave = slave; | ||
3047 | break; | ||
3048 | } | ||
3049 | |||
3050 | /* if the link state is up at this point, we | ||
3051 | * mark it down - this can happen if we have | ||
3052 | * simultaneous link failures and | ||
3053 | * reselect_active_interface doesn't make this | ||
3054 | * one the current slave so it is still marked | ||
3055 | * up when it is actually down | ||
3056 | */ | ||
3057 | if (slave->link == BOND_LINK_UP) { | ||
3058 | slave->link = BOND_LINK_DOWN; | ||
3059 | if (slave->link_failure_count < UINT_MAX) { | ||
3060 | slave->link_failure_count++; | ||
3061 | } | ||
3062 | |||
3063 | bond_set_slave_inactive_flags(slave); | ||
3064 | |||
3065 | printk(KERN_INFO DRV_NAME | ||
3066 | ": %s: backup interface %s is " | ||
3067 | "now down.\n", | ||
3068 | bond_dev->name, | ||
3069 | slave->dev->name); | ||
3070 | } | ||
3071 | } | ||
3072 | } | ||
3073 | } | ||
3074 | |||
3075 | re_arm: | ||
3076 | if (bond->params.arp_interval) { | ||
3077 | mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); | ||
3078 | } | ||
3079 | out: | ||
3080 | read_unlock(&bond->lock); | ||
3081 | } | ||
3082 | |||
3083 | /*------------------------------ proc/seq_file-------------------------------*/ | ||
3084 | |||
3085 | #ifdef CONFIG_PROC_FS | ||
3086 | |||
3087 | #define SEQ_START_TOKEN ((void *)1) | ||
3088 | |||
3089 | static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) | ||
3090 | { | ||
3091 | struct bonding *bond = seq->private; | ||
3092 | loff_t off = 0; | ||
3093 | struct slave *slave; | ||
3094 | int i; | ||
3095 | |||
3096 | /* make sure the bond won't be taken away */ | ||
3097 | read_lock(&dev_base_lock); | ||
3098 | read_lock_bh(&bond->lock); | ||
3099 | |||
3100 | if (*pos == 0) { | ||
3101 | return SEQ_START_TOKEN; | ||
3102 | } | ||
3103 | |||
3104 | bond_for_each_slave(bond, slave, i) { | ||
3105 | if (++off == *pos) { | ||
3106 | return slave; | ||
3107 | } | ||
3108 | } | ||
3109 | |||
3110 | return NULL; | ||
3111 | } | ||
3112 | |||
3113 | static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
3114 | { | ||
3115 | struct bonding *bond = seq->private; | ||
3116 | struct slave *slave = v; | ||
3117 | |||
3118 | ++*pos; | ||
3119 | if (v == SEQ_START_TOKEN) { | ||
3120 | return bond->first_slave; | ||
3121 | } | ||
3122 | |||
3123 | slave = slave->next; | ||
3124 | |||
3125 | return (slave == bond->first_slave) ? NULL : slave; | ||
3126 | } | ||
3127 | |||
3128 | static void bond_info_seq_stop(struct seq_file *seq, void *v) | ||
3129 | { | ||
3130 | struct bonding *bond = seq->private; | ||
3131 | |||
3132 | read_unlock_bh(&bond->lock); | ||
3133 | read_unlock(&dev_base_lock); | ||
3134 | } | ||
3135 | |||
3136 | static void bond_info_show_master(struct seq_file *seq) | ||
3137 | { | ||
3138 | struct bonding *bond = seq->private; | ||
3139 | struct slave *curr; | ||
3140 | |||
3141 | read_lock(&bond->curr_slave_lock); | ||
3142 | curr = bond->curr_active_slave; | ||
3143 | read_unlock(&bond->curr_slave_lock); | ||
3144 | |||
3145 | seq_printf(seq, "Bonding Mode: %s\n", | ||
3146 | bond_mode_name(bond->params.mode)); | ||
3147 | |||
3148 | if (USES_PRIMARY(bond->params.mode)) { | ||
3149 | seq_printf(seq, "Primary Slave: %s\n", | ||
3150 | (bond->params.primary[0]) ? | ||
3151 | bond->params.primary : "None"); | ||
3152 | |||
3153 | seq_printf(seq, "Currently Active Slave: %s\n", | ||
3154 | (curr) ? curr->dev->name : "None"); | ||
3155 | } | ||
3156 | |||
3157 | seq_printf(seq, "MII Status: %s\n", (curr) ? "up" : "down"); | ||
3158 | seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); | ||
3159 | seq_printf(seq, "Up Delay (ms): %d\n", | ||
3160 | bond->params.updelay * bond->params.miimon); | ||
3161 | seq_printf(seq, "Down Delay (ms): %d\n", | ||
3162 | bond->params.downdelay * bond->params.miimon); | ||
3163 | |||
3164 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
3165 | struct ad_info ad_info; | ||
3166 | |||
3167 | seq_puts(seq, "\n802.3ad info\n"); | ||
3168 | seq_printf(seq, "LACP rate: %s\n", | ||
3169 | (bond->params.lacp_fast) ? "fast" : "slow"); | ||
3170 | |||
3171 | if (bond_3ad_get_active_agg_info(bond, &ad_info)) { | ||
3172 | seq_printf(seq, "bond %s has no active aggregator\n", | ||
3173 | bond->dev->name); | ||
3174 | } else { | ||
3175 | seq_printf(seq, "Active Aggregator Info:\n"); | ||
3176 | |||
3177 | seq_printf(seq, "\tAggregator ID: %d\n", | ||
3178 | ad_info.aggregator_id); | ||
3179 | seq_printf(seq, "\tNumber of ports: %d\n", | ||
3180 | ad_info.ports); | ||
3181 | seq_printf(seq, "\tActor Key: %d\n", | ||
3182 | ad_info.actor_key); | ||
3183 | seq_printf(seq, "\tPartner Key: %d\n", | ||
3184 | ad_info.partner_key); | ||
3185 | seq_printf(seq, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", | ||
3186 | ad_info.partner_system[0], | ||
3187 | ad_info.partner_system[1], | ||
3188 | ad_info.partner_system[2], | ||
3189 | ad_info.partner_system[3], | ||
3190 | ad_info.partner_system[4], | ||
3191 | ad_info.partner_system[5]); | ||
3192 | } | ||
3193 | } | ||
3194 | } | ||
3195 | |||
3196 | static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) | ||
3197 | { | ||
3198 | struct bonding *bond = seq->private; | ||
3199 | |||
3200 | seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); | ||
3201 | seq_printf(seq, "MII Status: %s\n", | ||
3202 | (slave->link == BOND_LINK_UP) ? "up" : "down"); | ||
3203 | seq_printf(seq, "Link Failure Count: %d\n", | ||
3204 | slave->link_failure_count); | ||
3205 | |||
3206 | if (app_abi_ver >= 1) { | ||
3207 | seq_printf(seq, | ||
3208 | "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", | ||
3209 | slave->perm_hwaddr[0], | ||
3210 | slave->perm_hwaddr[1], | ||
3211 | slave->perm_hwaddr[2], | ||
3212 | slave->perm_hwaddr[3], | ||
3213 | slave->perm_hwaddr[4], | ||
3214 | slave->perm_hwaddr[5]); | ||
3215 | } | ||
3216 | |||
3217 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
3218 | const struct aggregator *agg | ||
3219 | = SLAVE_AD_INFO(slave).port.aggregator; | ||
3220 | |||
3221 | if (agg) { | ||
3222 | seq_printf(seq, "Aggregator ID: %d\n", | ||
3223 | agg->aggregator_identifier); | ||
3224 | } else { | ||
3225 | seq_puts(seq, "Aggregator ID: N/A\n"); | ||
3226 | } | ||
3227 | } | ||
3228 | } | ||
3229 | |||
3230 | static int bond_info_seq_show(struct seq_file *seq, void *v) | ||
3231 | { | ||
3232 | if (v == SEQ_START_TOKEN) { | ||
3233 | seq_printf(seq, "%s\n", version); | ||
3234 | bond_info_show_master(seq); | ||
3235 | } else { | ||
3236 | bond_info_show_slave(seq, v); | ||
3237 | } | ||
3238 | |||
3239 | return 0; | ||
3240 | } | ||
3241 | |||
3242 | static struct seq_operations bond_info_seq_ops = { | ||
3243 | .start = bond_info_seq_start, | ||
3244 | .next = bond_info_seq_next, | ||
3245 | .stop = bond_info_seq_stop, | ||
3246 | .show = bond_info_seq_show, | ||
3247 | }; | ||
3248 | |||
3249 | static int bond_info_open(struct inode *inode, struct file *file) | ||
3250 | { | ||
3251 | struct seq_file *seq; | ||
3252 | struct proc_dir_entry *proc; | ||
3253 | int res; | ||
3254 | |||
3255 | res = seq_open(file, &bond_info_seq_ops); | ||
3256 | if (!res) { | ||
3257 | /* recover the pointer buried in proc_dir_entry data */ | ||
3258 | seq = file->private_data; | ||
3259 | proc = PDE(inode); | ||
3260 | seq->private = proc->data; | ||
3261 | } | ||
3262 | |||
3263 | return res; | ||
3264 | } | ||
3265 | |||
3266 | static struct file_operations bond_info_fops = { | ||
3267 | .owner = THIS_MODULE, | ||
3268 | .open = bond_info_open, | ||
3269 | .read = seq_read, | ||
3270 | .llseek = seq_lseek, | ||
3271 | .release = seq_release, | ||
3272 | }; | ||
3273 | |||
3274 | static int bond_create_proc_entry(struct bonding *bond) | ||
3275 | { | ||
3276 | struct net_device *bond_dev = bond->dev; | ||
3277 | |||
3278 | if (bond_proc_dir) { | ||
3279 | bond->proc_entry = create_proc_entry(bond_dev->name, | ||
3280 | S_IRUGO, | ||
3281 | bond_proc_dir); | ||
3282 | if (bond->proc_entry == NULL) { | ||
3283 | printk(KERN_WARNING DRV_NAME | ||
3284 | ": Warning: Cannot create /proc/net/%s/%s\n", | ||
3285 | DRV_NAME, bond_dev->name); | ||
3286 | } else { | ||
3287 | bond->proc_entry->data = bond; | ||
3288 | bond->proc_entry->proc_fops = &bond_info_fops; | ||
3289 | bond->proc_entry->owner = THIS_MODULE; | ||
3290 | memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); | ||
3291 | } | ||
3292 | } | ||
3293 | |||
3294 | return 0; | ||
3295 | } | ||
3296 | |||
3297 | static void bond_remove_proc_entry(struct bonding *bond) | ||
3298 | { | ||
3299 | if (bond_proc_dir && bond->proc_entry) { | ||
3300 | remove_proc_entry(bond->proc_file_name, bond_proc_dir); | ||
3301 | memset(bond->proc_file_name, 0, IFNAMSIZ); | ||
3302 | bond->proc_entry = NULL; | ||
3303 | } | ||
3304 | } | ||
3305 | |||
3306 | /* Create the bonding directory under /proc/net, if doesn't exist yet. | ||
3307 | * Caller must hold rtnl_lock. | ||
3308 | */ | ||
3309 | static void bond_create_proc_dir(void) | ||
3310 | { | ||
3311 | int len = strlen(DRV_NAME); | ||
3312 | |||
3313 | for (bond_proc_dir = proc_net->subdir; bond_proc_dir; | ||
3314 | bond_proc_dir = bond_proc_dir->next) { | ||
3315 | if ((bond_proc_dir->namelen == len) && | ||
3316 | !memcmp(bond_proc_dir->name, DRV_NAME, len)) { | ||
3317 | break; | ||
3318 | } | ||
3319 | } | ||
3320 | |||
3321 | if (!bond_proc_dir) { | ||
3322 | bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); | ||
3323 | if (bond_proc_dir) { | ||
3324 | bond_proc_dir->owner = THIS_MODULE; | ||
3325 | } else { | ||
3326 | printk(KERN_WARNING DRV_NAME | ||
3327 | ": Warning: cannot create /proc/net/%s\n", | ||
3328 | DRV_NAME); | ||
3329 | } | ||
3330 | } | ||
3331 | } | ||
3332 | |||
3333 | /* Destroy the bonding directory under /proc/net, if empty. | ||
3334 | * Caller must hold rtnl_lock. | ||
3335 | */ | ||
3336 | static void bond_destroy_proc_dir(void) | ||
3337 | { | ||
3338 | struct proc_dir_entry *de; | ||
3339 | |||
3340 | if (!bond_proc_dir) { | ||
3341 | return; | ||
3342 | } | ||
3343 | |||
3344 | /* verify that the /proc dir is empty */ | ||
3345 | for (de = bond_proc_dir->subdir; de; de = de->next) { | ||
3346 | /* ignore . and .. */ | ||
3347 | if (*(de->name) != '.') { | ||
3348 | break; | ||
3349 | } | ||
3350 | } | ||
3351 | |||
3352 | if (de) { | ||
3353 | if (bond_proc_dir->owner == THIS_MODULE) { | ||
3354 | bond_proc_dir->owner = NULL; | ||
3355 | } | ||
3356 | } else { | ||
3357 | remove_proc_entry(DRV_NAME, proc_net); | ||
3358 | bond_proc_dir = NULL; | ||
3359 | } | ||
3360 | } | ||
3361 | #endif /* CONFIG_PROC_FS */ | ||
3362 | |||
3363 | /*-------------------------- netdev event handling --------------------------*/ | ||
3364 | |||
3365 | /* | ||
3366 | * Change device name | ||
3367 | */ | ||
3368 | static int bond_event_changename(struct bonding *bond) | ||
3369 | { | ||
3370 | #ifdef CONFIG_PROC_FS | ||
3371 | bond_remove_proc_entry(bond); | ||
3372 | bond_create_proc_entry(bond); | ||
3373 | #endif | ||
3374 | |||
3375 | return NOTIFY_DONE; | ||
3376 | } | ||
3377 | |||
3378 | static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) | ||
3379 | { | ||
3380 | struct bonding *event_bond = bond_dev->priv; | ||
3381 | |||
3382 | switch (event) { | ||
3383 | case NETDEV_CHANGENAME: | ||
3384 | return bond_event_changename(event_bond); | ||
3385 | case NETDEV_UNREGISTER: | ||
3386 | /* | ||
3387 | * TODO: remove a bond from the list? | ||
3388 | */ | ||
3389 | break; | ||
3390 | default: | ||
3391 | break; | ||
3392 | } | ||
3393 | |||
3394 | return NOTIFY_DONE; | ||
3395 | } | ||
3396 | |||
3397 | static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) | ||
3398 | { | ||
3399 | struct net_device *bond_dev = slave_dev->master; | ||
3400 | |||
3401 | switch (event) { | ||
3402 | case NETDEV_UNREGISTER: | ||
3403 | if (bond_dev) { | ||
3404 | bond_release(bond_dev, slave_dev); | ||
3405 | } | ||
3406 | break; | ||
3407 | case NETDEV_CHANGE: | ||
3408 | /* | ||
3409 | * TODO: is this what we get if somebody | ||
3410 | * sets up a hierarchical bond, then rmmod's | ||
3411 | * one of the slave bonding devices? | ||
3412 | */ | ||
3413 | break; | ||
3414 | case NETDEV_DOWN: | ||
3415 | /* | ||
3416 | * ... Or is it this? | ||
3417 | */ | ||
3418 | break; | ||
3419 | case NETDEV_CHANGEMTU: | ||
3420 | /* | ||
3421 | * TODO: Should slaves be allowed to | ||
3422 | * independently alter their MTU? For | ||
3423 | * an active-backup bond, slaves need | ||
3424 | * not be the same type of device, so | ||
3425 | * MTUs may vary. For other modes, | ||
3426 | * slaves arguably should have the | ||
3427 | * same MTUs. To do this, we'd need to | ||
3428 | * take over the slave's change_mtu | ||
3429 | * function for the duration of their | ||
3430 | * servitude. | ||
3431 | */ | ||
3432 | break; | ||
3433 | case NETDEV_CHANGENAME: | ||
3434 | /* | ||
3435 | * TODO: handle changing the primary's name | ||
3436 | */ | ||
3437 | break; | ||
3438 | default: | ||
3439 | break; | ||
3440 | } | ||
3441 | |||
3442 | return NOTIFY_DONE; | ||
3443 | } | ||
3444 | |||
3445 | /* | ||
3446 | * bond_netdev_event: handle netdev notifier chain events. | ||
3447 | * | ||
3448 | * This function receives events for the netdev chain. The caller (an | ||
3449 | * ioctl handler calling notifier_call_chain) holds the necessary | ||
3450 | * locks for us to safely manipulate the slave devices (RTNL lock, | ||
3451 | * dev_probe_lock). | ||
3452 | */ | ||
3453 | static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) | ||
3454 | { | ||
3455 | struct net_device *event_dev = (struct net_device *)ptr; | ||
3456 | |||
3457 | dprintk("event_dev: %s, event: %lx\n", | ||
3458 | (event_dev ? event_dev->name : "None"), | ||
3459 | event); | ||
3460 | |||
3461 | if (event_dev->flags & IFF_MASTER) { | ||
3462 | dprintk("IFF_MASTER\n"); | ||
3463 | return bond_master_netdev_event(event, event_dev); | ||
3464 | } | ||
3465 | |||
3466 | if (event_dev->flags & IFF_SLAVE) { | ||
3467 | dprintk("IFF_SLAVE\n"); | ||
3468 | return bond_slave_netdev_event(event, event_dev); | ||
3469 | } | ||
3470 | |||
3471 | return NOTIFY_DONE; | ||
3472 | } | ||
3473 | |||
3474 | static struct notifier_block bond_netdev_notifier = { | ||
3475 | .notifier_call = bond_netdev_event, | ||
3476 | }; | ||
3477 | |||
3478 | /*-------------------------- Packet type handling ---------------------------*/ | ||
3479 | |||
3480 | /* register to receive lacpdus on a bond */ | ||
3481 | static void bond_register_lacpdu(struct bonding *bond) | ||
3482 | { | ||
3483 | struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); | ||
3484 | |||
3485 | /* initialize packet type */ | ||
3486 | pk_type->type = PKT_TYPE_LACPDU; | ||
3487 | pk_type->dev = bond->dev; | ||
3488 | pk_type->func = bond_3ad_lacpdu_recv; | ||
3489 | |||
3490 | dev_add_pack(pk_type); | ||
3491 | } | ||
3492 | |||
3493 | /* unregister to receive lacpdus on a bond */ | ||
3494 | static void bond_unregister_lacpdu(struct bonding *bond) | ||
3495 | { | ||
3496 | dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); | ||
3497 | } | ||
3498 | |||
3499 | /*-------------------------- Device entry points ----------------------------*/ | ||
3500 | |||
3501 | static int bond_open(struct net_device *bond_dev) | ||
3502 | { | ||
3503 | struct bonding *bond = bond_dev->priv; | ||
3504 | struct timer_list *mii_timer = &bond->mii_timer; | ||
3505 | struct timer_list *arp_timer = &bond->arp_timer; | ||
3506 | |||
3507 | bond->kill_timers = 0; | ||
3508 | |||
3509 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
3510 | (bond->params.mode == BOND_MODE_ALB)) { | ||
3511 | struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); | ||
3512 | |||
3513 | /* bond_alb_initialize must be called before the timer | ||
3514 | * is started. | ||
3515 | */ | ||
3516 | if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { | ||
3517 | /* something went wrong - fail the open operation */ | ||
3518 | return -1; | ||
3519 | } | ||
3520 | |||
3521 | init_timer(alb_timer); | ||
3522 | alb_timer->expires = jiffies + 1; | ||
3523 | alb_timer->data = (unsigned long)bond; | ||
3524 | alb_timer->function = (void *)&bond_alb_monitor; | ||
3525 | add_timer(alb_timer); | ||
3526 | } | ||
3527 | |||
3528 | if (bond->params.miimon) { /* link check interval, in milliseconds. */ | ||
3529 | init_timer(mii_timer); | ||
3530 | mii_timer->expires = jiffies + 1; | ||
3531 | mii_timer->data = (unsigned long)bond_dev; | ||
3532 | mii_timer->function = (void *)&bond_mii_monitor; | ||
3533 | add_timer(mii_timer); | ||
3534 | } | ||
3535 | |||
3536 | if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ | ||
3537 | init_timer(arp_timer); | ||
3538 | arp_timer->expires = jiffies + 1; | ||
3539 | arp_timer->data = (unsigned long)bond_dev; | ||
3540 | if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { | ||
3541 | arp_timer->function = (void *)&bond_activebackup_arp_mon; | ||
3542 | } else { | ||
3543 | arp_timer->function = (void *)&bond_loadbalance_arp_mon; | ||
3544 | } | ||
3545 | add_timer(arp_timer); | ||
3546 | } | ||
3547 | |||
3548 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
3549 | struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); | ||
3550 | init_timer(ad_timer); | ||
3551 | ad_timer->expires = jiffies + 1; | ||
3552 | ad_timer->data = (unsigned long)bond; | ||
3553 | ad_timer->function = (void *)&bond_3ad_state_machine_handler; | ||
3554 | add_timer(ad_timer); | ||
3555 | |||
3556 | /* register to receive LACPDUs */ | ||
3557 | bond_register_lacpdu(bond); | ||
3558 | } | ||
3559 | |||
3560 | return 0; | ||
3561 | } | ||
3562 | |||
3563 | static int bond_close(struct net_device *bond_dev) | ||
3564 | { | ||
3565 | struct bonding *bond = bond_dev->priv; | ||
3566 | |||
3567 | if (bond->params.mode == BOND_MODE_8023AD) { | ||
3568 | /* Unregister the receive of LACPDUs */ | ||
3569 | bond_unregister_lacpdu(bond); | ||
3570 | } | ||
3571 | |||
3572 | write_lock_bh(&bond->lock); | ||
3573 | |||
3574 | bond_mc_list_destroy(bond); | ||
3575 | |||
3576 | /* signal timers not to re-arm */ | ||
3577 | bond->kill_timers = 1; | ||
3578 | |||
3579 | write_unlock_bh(&bond->lock); | ||
3580 | |||
3581 | /* del_timer_sync must run without holding the bond->lock | ||
3582 | * because a running timer might be trying to hold it too | ||
3583 | */ | ||
3584 | |||
3585 | if (bond->params.miimon) { /* link check interval, in milliseconds. */ | ||
3586 | del_timer_sync(&bond->mii_timer); | ||
3587 | } | ||
3588 | |||
3589 | if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ | ||
3590 | del_timer_sync(&bond->arp_timer); | ||
3591 | } | ||
3592 | |||
3593 | switch (bond->params.mode) { | ||
3594 | case BOND_MODE_8023AD: | ||
3595 | del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); | ||
3596 | break; | ||
3597 | case BOND_MODE_TLB: | ||
3598 | case BOND_MODE_ALB: | ||
3599 | del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); | ||
3600 | break; | ||
3601 | default: | ||
3602 | break; | ||
3603 | } | ||
3604 | |||
3605 | /* Release the bonded slaves */ | ||
3606 | bond_release_all(bond_dev); | ||
3607 | |||
3608 | if ((bond->params.mode == BOND_MODE_TLB) || | ||
3609 | (bond->params.mode == BOND_MODE_ALB)) { | ||
3610 | /* Must be called only after all | ||
3611 | * slaves have been released | ||
3612 | */ | ||
3613 | bond_alb_deinitialize(bond); | ||
3614 | } | ||
3615 | |||
3616 | return 0; | ||
3617 | } | ||
3618 | |||
3619 | static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) | ||
3620 | { | ||
3621 | struct bonding *bond = bond_dev->priv; | ||
3622 | struct net_device_stats *stats = &(bond->stats), *sstats; | ||
3623 | struct slave *slave; | ||
3624 | int i; | ||
3625 | |||
3626 | memset(stats, 0, sizeof(struct net_device_stats)); | ||
3627 | |||
3628 | read_lock_bh(&bond->lock); | ||
3629 | |||
3630 | bond_for_each_slave(bond, slave, i) { | ||
3631 | sstats = slave->dev->get_stats(slave->dev); | ||
3632 | |||
3633 | stats->rx_packets += sstats->rx_packets; | ||
3634 | stats->rx_bytes += sstats->rx_bytes; | ||
3635 | stats->rx_errors += sstats->rx_errors; | ||
3636 | stats->rx_dropped += sstats->rx_dropped; | ||
3637 | |||
3638 | stats->tx_packets += sstats->tx_packets; | ||
3639 | stats->tx_bytes += sstats->tx_bytes; | ||
3640 | stats->tx_errors += sstats->tx_errors; | ||
3641 | stats->tx_dropped += sstats->tx_dropped; | ||
3642 | |||
3643 | stats->multicast += sstats->multicast; | ||
3644 | stats->collisions += sstats->collisions; | ||
3645 | |||
3646 | stats->rx_length_errors += sstats->rx_length_errors; | ||
3647 | stats->rx_over_errors += sstats->rx_over_errors; | ||
3648 | stats->rx_crc_errors += sstats->rx_crc_errors; | ||
3649 | stats->rx_frame_errors += sstats->rx_frame_errors; | ||
3650 | stats->rx_fifo_errors += sstats->rx_fifo_errors; | ||
3651 | stats->rx_missed_errors += sstats->rx_missed_errors; | ||
3652 | |||
3653 | stats->tx_aborted_errors += sstats->tx_aborted_errors; | ||
3654 | stats->tx_carrier_errors += sstats->tx_carrier_errors; | ||
3655 | stats->tx_fifo_errors += sstats->tx_fifo_errors; | ||
3656 | stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; | ||
3657 | stats->tx_window_errors += sstats->tx_window_errors; | ||
3658 | } | ||
3659 | |||
3660 | read_unlock_bh(&bond->lock); | ||
3661 | |||
3662 | return stats; | ||
3663 | } | ||
3664 | |||
3665 | static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) | ||
3666 | { | ||
3667 | struct net_device *slave_dev = NULL; | ||
3668 | struct ifbond k_binfo; | ||
3669 | struct ifbond __user *u_binfo = NULL; | ||
3670 | struct ifslave k_sinfo; | ||
3671 | struct ifslave __user *u_sinfo = NULL; | ||
3672 | struct mii_ioctl_data *mii = NULL; | ||
3673 | int prev_abi_ver = orig_app_abi_ver; | ||
3674 | int res = 0; | ||
3675 | |||
3676 | dprintk("bond_ioctl: master=%s, cmd=%d\n", | ||
3677 | bond_dev->name, cmd); | ||
3678 | |||
3679 | switch (cmd) { | ||
3680 | case SIOCETHTOOL: | ||
3681 | return bond_ethtool_ioctl(bond_dev, ifr); | ||
3682 | case SIOCGMIIPHY: | ||
3683 | mii = if_mii(ifr); | ||
3684 | if (!mii) { | ||
3685 | return -EINVAL; | ||
3686 | } | ||
3687 | mii->phy_id = 0; | ||
3688 | /* Fall Through */ | ||
3689 | case SIOCGMIIREG: | ||
3690 | /* | ||
3691 | * We do this again just in case we were called by SIOCGMIIREG | ||
3692 | * instead of SIOCGMIIPHY. | ||
3693 | */ | ||
3694 | mii = if_mii(ifr); | ||
3695 | if (!mii) { | ||
3696 | return -EINVAL; | ||
3697 | } | ||
3698 | |||
3699 | if (mii->reg_num == 1) { | ||
3700 | struct bonding *bond = bond_dev->priv; | ||
3701 | mii->val_out = 0; | ||
3702 | read_lock_bh(&bond->lock); | ||
3703 | read_lock(&bond->curr_slave_lock); | ||
3704 | if (bond->curr_active_slave) { | ||
3705 | mii->val_out = BMSR_LSTATUS; | ||
3706 | } | ||
3707 | read_unlock(&bond->curr_slave_lock); | ||
3708 | read_unlock_bh(&bond->lock); | ||
3709 | } | ||
3710 | |||
3711 | return 0; | ||
3712 | case BOND_INFO_QUERY_OLD: | ||
3713 | case SIOCBONDINFOQUERY: | ||
3714 | u_binfo = (struct ifbond __user *)ifr->ifr_data; | ||
3715 | |||
3716 | if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { | ||
3717 | return -EFAULT; | ||
3718 | } | ||
3719 | |||
3720 | res = bond_info_query(bond_dev, &k_binfo); | ||
3721 | if (res == 0) { | ||
3722 | if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { | ||
3723 | return -EFAULT; | ||
3724 | } | ||
3725 | } | ||
3726 | |||
3727 | return res; | ||
3728 | case BOND_SLAVE_INFO_QUERY_OLD: | ||
3729 | case SIOCBONDSLAVEINFOQUERY: | ||
3730 | u_sinfo = (struct ifslave __user *)ifr->ifr_data; | ||
3731 | |||
3732 | if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { | ||
3733 | return -EFAULT; | ||
3734 | } | ||
3735 | |||
3736 | res = bond_slave_info_query(bond_dev, &k_sinfo); | ||
3737 | if (res == 0) { | ||
3738 | if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { | ||
3739 | return -EFAULT; | ||
3740 | } | ||
3741 | } | ||
3742 | |||
3743 | return res; | ||
3744 | default: | ||
3745 | /* Go on */ | ||
3746 | break; | ||
3747 | } | ||
3748 | |||
3749 | if (!capable(CAP_NET_ADMIN)) { | ||
3750 | return -EPERM; | ||
3751 | } | ||
3752 | |||
3753 | if (orig_app_abi_ver == -1) { | ||
3754 | /* no orig_app_abi_ver was provided yet, so we'll use the | ||
3755 | * current one from now on, even if it's 0 | ||
3756 | */ | ||
3757 | orig_app_abi_ver = app_abi_ver; | ||
3758 | |||
3759 | } else if (orig_app_abi_ver != app_abi_ver) { | ||
3760 | printk(KERN_ERR DRV_NAME | ||
3761 | ": Error: already using ifenslave ABI version %d; to " | ||
3762 | "upgrade ifenslave to version %d, you must first " | ||
3763 | "reload bonding.\n", | ||
3764 | orig_app_abi_ver, app_abi_ver); | ||
3765 | return -EINVAL; | ||
3766 | } | ||
3767 | |||
3768 | slave_dev = dev_get_by_name(ifr->ifr_slave); | ||
3769 | |||
3770 | dprintk("slave_dev=%p: \n", slave_dev); | ||
3771 | |||
3772 | if (!slave_dev) { | ||
3773 | res = -ENODEV; | ||
3774 | } else { | ||
3775 | dprintk("slave_dev->name=%s: \n", slave_dev->name); | ||
3776 | switch (cmd) { | ||
3777 | case BOND_ENSLAVE_OLD: | ||
3778 | case SIOCBONDENSLAVE: | ||
3779 | res = bond_enslave(bond_dev, slave_dev); | ||
3780 | break; | ||
3781 | case BOND_RELEASE_OLD: | ||
3782 | case SIOCBONDRELEASE: | ||
3783 | res = bond_release(bond_dev, slave_dev); | ||
3784 | break; | ||
3785 | case BOND_SETHWADDR_OLD: | ||
3786 | case SIOCBONDSETHWADDR: | ||
3787 | res = bond_sethwaddr(bond_dev, slave_dev); | ||
3788 | break; | ||
3789 | case BOND_CHANGE_ACTIVE_OLD: | ||
3790 | case SIOCBONDCHANGEACTIVE: | ||
3791 | res = bond_ioctl_change_active(bond_dev, slave_dev); | ||
3792 | break; | ||
3793 | default: | ||
3794 | res = -EOPNOTSUPP; | ||
3795 | } | ||
3796 | |||
3797 | dev_put(slave_dev); | ||
3798 | } | ||
3799 | |||
3800 | if (res < 0) { | ||
3801 | /* The ioctl failed, so there's no point in changing the | ||
3802 | * orig_app_abi_ver. We'll restore it's value just in case | ||
3803 | * we've changed it earlier in this function. | ||
3804 | */ | ||
3805 | orig_app_abi_ver = prev_abi_ver; | ||
3806 | } | ||
3807 | |||
3808 | return res; | ||
3809 | } | ||
3810 | |||
3811 | static void bond_set_multicast_list(struct net_device *bond_dev) | ||
3812 | { | ||
3813 | struct bonding *bond = bond_dev->priv; | ||
3814 | struct dev_mc_list *dmi; | ||
3815 | |||
3816 | write_lock_bh(&bond->lock); | ||
3817 | |||
3818 | /* | ||
3819 | * Do promisc before checking multicast_mode | ||
3820 | */ | ||
3821 | if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { | ||
3822 | bond_set_promiscuity(bond, 1); | ||
3823 | } | ||
3824 | |||
3825 | if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { | ||
3826 | bond_set_promiscuity(bond, -1); | ||
3827 | } | ||
3828 | |||
3829 | /* set allmulti flag to slaves */ | ||
3830 | if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { | ||
3831 | bond_set_allmulti(bond, 1); | ||
3832 | } | ||
3833 | |||
3834 | if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { | ||
3835 | bond_set_allmulti(bond, -1); | ||
3836 | } | ||
3837 | |||
3838 | bond->flags = bond_dev->flags; | ||
3839 | |||
3840 | /* looking for addresses to add to slaves' mc list */ | ||
3841 | for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { | ||
3842 | if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { | ||
3843 | bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); | ||
3844 | } | ||
3845 | } | ||
3846 | |||
3847 | /* looking for addresses to delete from slaves' list */ | ||
3848 | for (dmi = bond->mc_list; dmi; dmi = dmi->next) { | ||
3849 | if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { | ||
3850 | bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); | ||
3851 | } | ||
3852 | } | ||
3853 | |||
3854 | /* save master's multicast list */ | ||
3855 | bond_mc_list_destroy(bond); | ||
3856 | bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); | ||
3857 | |||
3858 | write_unlock_bh(&bond->lock); | ||
3859 | } | ||
3860 | |||
3861 | /* | ||
3862 | * Change the MTU of all of a master's slaves to match the master | ||
3863 | */ | ||
3864 | static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) | ||
3865 | { | ||
3866 | struct bonding *bond = bond_dev->priv; | ||
3867 | struct slave *slave, *stop_at; | ||
3868 | int res = 0; | ||
3869 | int i; | ||
3870 | |||
3871 | dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, | ||
3872 | (bond_dev ? bond_dev->name : "None"), new_mtu); | ||
3873 | |||
3874 | /* Can't hold bond->lock with bh disabled here since | ||
3875 | * some base drivers panic. On the other hand we can't | ||
3876 | * hold bond->lock without bh disabled because we'll | ||
3877 | * deadlock. The only solution is to rely on the fact | ||
3878 | * that we're under rtnl_lock here, and the slaves | ||
3879 | * list won't change. This doesn't solve the problem | ||
3880 | * of setting the slave's MTU while it is | ||
3881 | * transmitting, but the assumption is that the base | ||
3882 | * driver can handle that. | ||
3883 | * | ||
3884 | * TODO: figure out a way to safely iterate the slaves | ||
3885 | * list, but without holding a lock around the actual | ||
3886 | * call to the base driver. | ||
3887 | */ | ||
3888 | |||
3889 | bond_for_each_slave(bond, slave, i) { | ||
3890 | dprintk("s %p s->p %p c_m %p\n", slave, | ||
3891 | slave->prev, slave->dev->change_mtu); | ||
3892 | res = dev_set_mtu(slave->dev, new_mtu); | ||
3893 | |||
3894 | if (res) { | ||
3895 | /* If we failed to set the slave's mtu to the new value | ||
3896 | * we must abort the operation even in ACTIVE_BACKUP | ||
3897 | * mode, because if we allow the backup slaves to have | ||
3898 | * different mtu values than the active slave we'll | ||
3899 | * need to change their mtu when doing a failover. That | ||
3900 | * means changing their mtu from timer context, which | ||
3901 | * is probably not a good idea. | ||
3902 | */ | ||
3903 | dprintk("err %d %s\n", res, slave->dev->name); | ||
3904 | goto unwind; | ||
3905 | } | ||
3906 | } | ||
3907 | |||
3908 | bond_dev->mtu = new_mtu; | ||
3909 | |||
3910 | return 0; | ||
3911 | |||
3912 | unwind: | ||
3913 | /* unwind from head to the slave that failed */ | ||
3914 | stop_at = slave; | ||
3915 | bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { | ||
3916 | int tmp_res; | ||
3917 | |||
3918 | tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); | ||
3919 | if (tmp_res) { | ||
3920 | dprintk("unwind err %d dev %s\n", tmp_res, | ||
3921 | slave->dev->name); | ||
3922 | } | ||
3923 | } | ||
3924 | |||
3925 | return res; | ||
3926 | } | ||
3927 | |||
3928 | /* | ||
3929 | * Change HW address | ||
3930 | * | ||
3931 | * Note that many devices must be down to change the HW address, and | ||
3932 | * downing the master releases all slaves. We can make bonds full of | ||
3933 | * bonding devices to test this, however. | ||
3934 | */ | ||
3935 | static int bond_set_mac_address(struct net_device *bond_dev, void *addr) | ||
3936 | { | ||
3937 | struct bonding *bond = bond_dev->priv; | ||
3938 | struct sockaddr *sa = addr, tmp_sa; | ||
3939 | struct slave *slave, *stop_at; | ||
3940 | int res = 0; | ||
3941 | int i; | ||
3942 | |||
3943 | dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); | ||
3944 | |||
3945 | if (!is_valid_ether_addr(sa->sa_data)) { | ||
3946 | return -EADDRNOTAVAIL; | ||
3947 | } | ||
3948 | |||
3949 | /* Can't hold bond->lock with bh disabled here since | ||
3950 | * some base drivers panic. On the other hand we can't | ||
3951 | * hold bond->lock without bh disabled because we'll | ||
3952 | * deadlock. The only solution is to rely on the fact | ||
3953 | * that we're under rtnl_lock here, and the slaves | ||
3954 | * list won't change. This doesn't solve the problem | ||
3955 | * of setting the slave's hw address while it is | ||
3956 | * transmitting, but the assumption is that the base | ||
3957 | * driver can handle that. | ||
3958 | * | ||
3959 | * TODO: figure out a way to safely iterate the slaves | ||
3960 | * list, but without holding a lock around the actual | ||
3961 | * call to the base driver. | ||
3962 | */ | ||
3963 | |||
3964 | bond_for_each_slave(bond, slave, i) { | ||
3965 | dprintk("slave %p %s\n", slave, slave->dev->name); | ||
3966 | |||
3967 | if (slave->dev->set_mac_address == NULL) { | ||
3968 | res = -EOPNOTSUPP; | ||
3969 | dprintk("EOPNOTSUPP %s\n", slave->dev->name); | ||
3970 | goto unwind; | ||
3971 | } | ||
3972 | |||
3973 | res = dev_set_mac_address(slave->dev, addr); | ||
3974 | if (res) { | ||
3975 | /* TODO: consider downing the slave | ||
3976 | * and retry ? | ||
3977 | * User should expect communications | ||
3978 | * breakage anyway until ARP finish | ||
3979 | * updating, so... | ||
3980 | */ | ||
3981 | dprintk("err %d %s\n", res, slave->dev->name); | ||
3982 | goto unwind; | ||
3983 | } | ||
3984 | } | ||
3985 | |||
3986 | /* success */ | ||
3987 | memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); | ||
3988 | return 0; | ||
3989 | |||
3990 | unwind: | ||
3991 | memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); | ||
3992 | tmp_sa.sa_family = bond_dev->type; | ||
3993 | |||
3994 | /* unwind from head to the slave that failed */ | ||
3995 | stop_at = slave; | ||
3996 | bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { | ||
3997 | int tmp_res; | ||
3998 | |||
3999 | tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); | ||
4000 | if (tmp_res) { | ||
4001 | dprintk("unwind err %d dev %s\n", tmp_res, | ||
4002 | slave->dev->name); | ||
4003 | } | ||
4004 | } | ||
4005 | |||
4006 | return res; | ||
4007 | } | ||
4008 | |||
4009 | static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) | ||
4010 | { | ||
4011 | struct bonding *bond = bond_dev->priv; | ||
4012 | struct slave *slave, *start_at; | ||
4013 | int i; | ||
4014 | int res = 1; | ||
4015 | |||
4016 | read_lock(&bond->lock); | ||
4017 | |||
4018 | if (!BOND_IS_OK(bond)) { | ||
4019 | goto out; | ||
4020 | } | ||
4021 | |||
4022 | read_lock(&bond->curr_slave_lock); | ||
4023 | slave = start_at = bond->curr_active_slave; | ||
4024 | read_unlock(&bond->curr_slave_lock); | ||
4025 | |||
4026 | if (!slave) { | ||
4027 | goto out; | ||
4028 | } | ||
4029 | |||
4030 | bond_for_each_slave_from(bond, slave, i, start_at) { | ||
4031 | if (IS_UP(slave->dev) && | ||
4032 | (slave->link == BOND_LINK_UP) && | ||
4033 | (slave->state == BOND_STATE_ACTIVE)) { | ||
4034 | res = bond_dev_queue_xmit(bond, skb, slave->dev); | ||
4035 | |||
4036 | write_lock(&bond->curr_slave_lock); | ||
4037 | bond->curr_active_slave = slave->next; | ||
4038 | write_unlock(&bond->curr_slave_lock); | ||
4039 | |||
4040 | break; | ||
4041 | } | ||
4042 | } | ||
4043 | |||
4044 | |||
4045 | out: | ||
4046 | if (res) { | ||
4047 | /* no suitable interface, frame not sent */ | ||
4048 | dev_kfree_skb(skb); | ||
4049 | } | ||
4050 | read_unlock(&bond->lock); | ||
4051 | return 0; | ||
4052 | } | ||
4053 | |||
4054 | /* | ||
4055 | * in active-backup mode, we know that bond->curr_active_slave is always valid if | ||
4056 | * the bond has a usable interface. | ||
4057 | */ | ||
4058 | static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) | ||
4059 | { | ||
4060 | struct bonding *bond = bond_dev->priv; | ||
4061 | int res = 1; | ||
4062 | |||
4063 | /* if we are sending arp packets, try to at least | ||
4064 | identify our own ip address */ | ||
4065 | if (bond->params.arp_interval && !my_ip && | ||
4066 | (skb->protocol == __constant_htons(ETH_P_ARP))) { | ||
4067 | char *the_ip = (char *)skb->data + | ||
4068 | sizeof(struct ethhdr) + | ||
4069 | sizeof(struct arphdr) + | ||
4070 | ETH_ALEN; | ||
4071 | memcpy(&my_ip, the_ip, 4); | ||
4072 | } | ||
4073 | |||
4074 | read_lock(&bond->lock); | ||
4075 | read_lock(&bond->curr_slave_lock); | ||
4076 | |||
4077 | if (!BOND_IS_OK(bond)) { | ||
4078 | goto out; | ||
4079 | } | ||
4080 | |||
4081 | if (bond->curr_active_slave) { /* one usable interface */ | ||
4082 | res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); | ||
4083 | } | ||
4084 | |||
4085 | out: | ||
4086 | if (res) { | ||
4087 | /* no suitable interface, frame not sent */ | ||
4088 | dev_kfree_skb(skb); | ||
4089 | } | ||
4090 | read_unlock(&bond->curr_slave_lock); | ||
4091 | read_unlock(&bond->lock); | ||
4092 | return 0; | ||
4093 | } | ||
4094 | |||
4095 | /* | ||
4096 | * in XOR mode, we determine the output device by performing xor on | ||
4097 | * the source and destination hw adresses. If this device is not | ||
4098 | * enabled, find the next slave following this xor slave. | ||
4099 | */ | ||
4100 | static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) | ||
4101 | { | ||
4102 | struct bonding *bond = bond_dev->priv; | ||
4103 | struct ethhdr *data = (struct ethhdr *)skb->data; | ||
4104 | struct slave *slave, *start_at; | ||
4105 | int slave_no; | ||
4106 | int i; | ||
4107 | int res = 1; | ||
4108 | |||
4109 | read_lock(&bond->lock); | ||
4110 | |||
4111 | if (!BOND_IS_OK(bond)) { | ||
4112 | goto out; | ||
4113 | } | ||
4114 | |||
4115 | slave_no = (data->h_dest[5]^bond_dev->dev_addr[5]) % bond->slave_cnt; | ||
4116 | |||
4117 | bond_for_each_slave(bond, slave, i) { | ||
4118 | slave_no--; | ||
4119 | if (slave_no < 0) { | ||
4120 | break; | ||
4121 | } | ||
4122 | } | ||
4123 | |||
4124 | start_at = slave; | ||
4125 | |||
4126 | bond_for_each_slave_from(bond, slave, i, start_at) { | ||
4127 | if (IS_UP(slave->dev) && | ||
4128 | (slave->link == BOND_LINK_UP) && | ||
4129 | (slave->state == BOND_STATE_ACTIVE)) { | ||
4130 | res = bond_dev_queue_xmit(bond, skb, slave->dev); | ||
4131 | break; | ||
4132 | } | ||
4133 | } | ||
4134 | |||
4135 | out: | ||
4136 | if (res) { | ||
4137 | /* no suitable interface, frame not sent */ | ||
4138 | dev_kfree_skb(skb); | ||
4139 | } | ||
4140 | read_unlock(&bond->lock); | ||
4141 | return 0; | ||
4142 | } | ||
4143 | |||
4144 | /* | ||
4145 | * in broadcast mode, we send everything to all usable interfaces. | ||
4146 | */ | ||
4147 | static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) | ||
4148 | { | ||
4149 | struct bonding *bond = bond_dev->priv; | ||
4150 | struct slave *slave, *start_at; | ||
4151 | struct net_device *tx_dev = NULL; | ||
4152 | int i; | ||
4153 | int res = 1; | ||
4154 | |||
4155 | read_lock(&bond->lock); | ||
4156 | |||
4157 | if (!BOND_IS_OK(bond)) { | ||
4158 | goto out; | ||
4159 | } | ||
4160 | |||
4161 | read_lock(&bond->curr_slave_lock); | ||
4162 | start_at = bond->curr_active_slave; | ||
4163 | read_unlock(&bond->curr_slave_lock); | ||
4164 | |||
4165 | if (!start_at) { | ||
4166 | goto out; | ||
4167 | } | ||
4168 | |||
4169 | bond_for_each_slave_from(bond, slave, i, start_at) { | ||
4170 | if (IS_UP(slave->dev) && | ||
4171 | (slave->link == BOND_LINK_UP) && | ||
4172 | (slave->state == BOND_STATE_ACTIVE)) { | ||
4173 | if (tx_dev) { | ||
4174 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | ||
4175 | if (!skb2) { | ||
4176 | printk(KERN_ERR DRV_NAME | ||
4177 | ": Error: bond_xmit_broadcast(): " | ||
4178 | "skb_clone() failed\n"); | ||
4179 | continue; | ||
4180 | } | ||
4181 | |||
4182 | res = bond_dev_queue_xmit(bond, skb2, tx_dev); | ||
4183 | if (res) { | ||
4184 | dev_kfree_skb(skb2); | ||
4185 | continue; | ||
4186 | } | ||
4187 | } | ||
4188 | tx_dev = slave->dev; | ||
4189 | } | ||
4190 | } | ||
4191 | |||
4192 | if (tx_dev) { | ||
4193 | res = bond_dev_queue_xmit(bond, skb, tx_dev); | ||
4194 | } | ||
4195 | |||
4196 | out: | ||
4197 | if (res) { | ||
4198 | /* no suitable interface, frame not sent */ | ||
4199 | dev_kfree_skb(skb); | ||
4200 | } | ||
4201 | /* frame sent to all suitable interfaces */ | ||
4202 | read_unlock(&bond->lock); | ||
4203 | return 0; | ||
4204 | } | ||
4205 | |||
4206 | /*------------------------- Device initialization ---------------------------*/ | ||
4207 | |||
4208 | /* | ||
4209 | * set bond mode specific net device operations | ||
4210 | */ | ||
4211 | static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode) | ||
4212 | { | ||
4213 | switch (mode) { | ||
4214 | case BOND_MODE_ROUNDROBIN: | ||
4215 | bond_dev->hard_start_xmit = bond_xmit_roundrobin; | ||
4216 | break; | ||
4217 | case BOND_MODE_ACTIVEBACKUP: | ||
4218 | bond_dev->hard_start_xmit = bond_xmit_activebackup; | ||
4219 | break; | ||
4220 | case BOND_MODE_XOR: | ||
4221 | bond_dev->hard_start_xmit = bond_xmit_xor; | ||
4222 | break; | ||
4223 | case BOND_MODE_BROADCAST: | ||
4224 | bond_dev->hard_start_xmit = bond_xmit_broadcast; | ||
4225 | break; | ||
4226 | case BOND_MODE_8023AD: | ||
4227 | bond_dev->hard_start_xmit = bond_3ad_xmit_xor; | ||
4228 | break; | ||
4229 | case BOND_MODE_TLB: | ||
4230 | case BOND_MODE_ALB: | ||
4231 | bond_dev->hard_start_xmit = bond_alb_xmit; | ||
4232 | bond_dev->set_mac_address = bond_alb_set_mac_address; | ||
4233 | break; | ||
4234 | default: | ||
4235 | /* Should never happen, mode already checked */ | ||
4236 | printk(KERN_ERR DRV_NAME | ||
4237 | ": Error: Unknown bonding mode %d\n", | ||
4238 | mode); | ||
4239 | break; | ||
4240 | } | ||
4241 | } | ||
4242 | |||
4243 | /* | ||
4244 | * Does not allocate but creates a /proc entry. | ||
4245 | * Allowed to fail. | ||
4246 | */ | ||
4247 | static int __init bond_init(struct net_device *bond_dev, struct bond_params *params) | ||
4248 | { | ||
4249 | struct bonding *bond = bond_dev->priv; | ||
4250 | |||
4251 | dprintk("Begin bond_init for %s\n", bond_dev->name); | ||
4252 | |||
4253 | /* initialize rwlocks */ | ||
4254 | rwlock_init(&bond->lock); | ||
4255 | rwlock_init(&bond->curr_slave_lock); | ||
4256 | |||
4257 | bond->params = *params; /* copy params struct */ | ||
4258 | |||
4259 | /* Initialize pointers */ | ||
4260 | bond->first_slave = NULL; | ||
4261 | bond->curr_active_slave = NULL; | ||
4262 | bond->current_arp_slave = NULL; | ||
4263 | bond->primary_slave = NULL; | ||
4264 | bond->dev = bond_dev; | ||
4265 | INIT_LIST_HEAD(&bond->vlan_list); | ||
4266 | |||
4267 | /* Initialize the device entry points */ | ||
4268 | bond_dev->open = bond_open; | ||
4269 | bond_dev->stop = bond_close; | ||
4270 | bond_dev->get_stats = bond_get_stats; | ||
4271 | bond_dev->do_ioctl = bond_do_ioctl; | ||
4272 | bond_dev->set_multicast_list = bond_set_multicast_list; | ||
4273 | bond_dev->change_mtu = bond_change_mtu; | ||
4274 | bond_dev->set_mac_address = bond_set_mac_address; | ||
4275 | |||
4276 | bond_set_mode_ops(bond_dev, bond->params.mode); | ||
4277 | |||
4278 | bond_dev->destructor = free_netdev; | ||
4279 | |||
4280 | /* Initialize the device options */ | ||
4281 | bond_dev->tx_queue_len = 0; | ||
4282 | bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; | ||
4283 | |||
4284 | /* At first, we block adding VLANs. That's the only way to | ||
4285 | * prevent problems that occur when adding VLANs over an | ||
4286 | * empty bond. The block will be removed once non-challenged | ||
4287 | * slaves are enslaved. | ||
4288 | */ | ||
4289 | bond_dev->features |= NETIF_F_VLAN_CHALLENGED; | ||
4290 | |||
4291 | /* don't acquire bond device's xmit_lock when | ||
4292 | * transmitting */ | ||
4293 | bond_dev->features |= NETIF_F_LLTX; | ||
4294 | |||
4295 | /* By default, we declare the bond to be fully | ||
4296 | * VLAN hardware accelerated capable. Special | ||
4297 | * care is taken in the various xmit functions | ||
4298 | * when there are slaves that are not hw accel | ||
4299 | * capable | ||
4300 | */ | ||
4301 | bond_dev->vlan_rx_register = bond_vlan_rx_register; | ||
4302 | bond_dev->vlan_rx_add_vid = bond_vlan_rx_add_vid; | ||
4303 | bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid; | ||
4304 | bond_dev->features |= (NETIF_F_HW_VLAN_TX | | ||
4305 | NETIF_F_HW_VLAN_RX | | ||
4306 | NETIF_F_HW_VLAN_FILTER); | ||
4307 | |||
4308 | #ifdef CONFIG_PROC_FS | ||
4309 | bond_create_proc_entry(bond); | ||
4310 | #endif | ||
4311 | |||
4312 | list_add_tail(&bond->bond_list, &bond_dev_list); | ||
4313 | |||
4314 | return 0; | ||
4315 | } | ||
4316 | |||
4317 | /* De-initialize device specific data. | ||
4318 | * Caller must hold rtnl_lock. | ||
4319 | */ | ||
4320 | static inline void bond_deinit(struct net_device *bond_dev) | ||
4321 | { | ||
4322 | struct bonding *bond = bond_dev->priv; | ||
4323 | |||
4324 | list_del(&bond->bond_list); | ||
4325 | |||
4326 | #ifdef CONFIG_PROC_FS | ||
4327 | bond_remove_proc_entry(bond); | ||
4328 | #endif | ||
4329 | } | ||
4330 | |||
4331 | /* Unregister and free all bond devices. | ||
4332 | * Caller must hold rtnl_lock. | ||
4333 | */ | ||
4334 | static void bond_free_all(void) | ||
4335 | { | ||
4336 | struct bonding *bond, *nxt; | ||
4337 | |||
4338 | list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { | ||
4339 | struct net_device *bond_dev = bond->dev; | ||
4340 | |||
4341 | unregister_netdevice(bond_dev); | ||
4342 | bond_deinit(bond_dev); | ||
4343 | } | ||
4344 | |||
4345 | #ifdef CONFIG_PROC_FS | ||
4346 | bond_destroy_proc_dir(); | ||
4347 | #endif | ||
4348 | } | ||
4349 | |||
4350 | /*------------------------- Module initialization ---------------------------*/ | ||
4351 | |||
4352 | /* | ||
4353 | * Convert string input module parms. Accept either the | ||
4354 | * number of the mode or its string name. | ||
4355 | */ | ||
4356 | static inline int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) | ||
4357 | { | ||
4358 | int i; | ||
4359 | |||
4360 | for (i = 0; tbl[i].modename; i++) { | ||
4361 | if ((isdigit(*mode_arg) && | ||
4362 | tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) || | ||
4363 | (strncmp(mode_arg, tbl[i].modename, | ||
4364 | strlen(tbl[i].modename)) == 0)) { | ||
4365 | return tbl[i].mode; | ||
4366 | } | ||
4367 | } | ||
4368 | |||
4369 | return -1; | ||
4370 | } | ||
4371 | |||
4372 | static int bond_check_params(struct bond_params *params) | ||
4373 | { | ||
4374 | /* | ||
4375 | * Convert string parameters. | ||
4376 | */ | ||
4377 | if (mode) { | ||
4378 | bond_mode = bond_parse_parm(mode, bond_mode_tbl); | ||
4379 | if (bond_mode == -1) { | ||
4380 | printk(KERN_ERR DRV_NAME | ||
4381 | ": Error: Invalid bonding mode \"%s\"\n", | ||
4382 | mode == NULL ? "NULL" : mode); | ||
4383 | return -EINVAL; | ||
4384 | } | ||
4385 | } | ||
4386 | |||
4387 | if (lacp_rate) { | ||
4388 | if (bond_mode != BOND_MODE_8023AD) { | ||
4389 | printk(KERN_INFO DRV_NAME | ||
4390 | ": lacp_rate param is irrelevant in mode %s\n", | ||
4391 | bond_mode_name(bond_mode)); | ||
4392 | } else { | ||
4393 | lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); | ||
4394 | if (lacp_fast == -1) { | ||
4395 | printk(KERN_ERR DRV_NAME | ||
4396 | ": Error: Invalid lacp rate \"%s\"\n", | ||
4397 | lacp_rate == NULL ? "NULL" : lacp_rate); | ||
4398 | return -EINVAL; | ||
4399 | } | ||
4400 | } | ||
4401 | } | ||
4402 | |||
4403 | if (max_bonds < 1 || max_bonds > INT_MAX) { | ||
4404 | printk(KERN_WARNING DRV_NAME | ||
4405 | ": Warning: max_bonds (%d) not in range %d-%d, so it " | ||
4406 | "was reset to BOND_DEFAULT_MAX_BONDS (%d)", | ||
4407 | max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); | ||
4408 | max_bonds = BOND_DEFAULT_MAX_BONDS; | ||
4409 | } | ||
4410 | |||
4411 | if (miimon < 0) { | ||
4412 | printk(KERN_WARNING DRV_NAME | ||
4413 | ": Warning: miimon module parameter (%d), " | ||
4414 | "not in range 0-%d, so it was reset to %d\n", | ||
4415 | miimon, INT_MAX, BOND_LINK_MON_INTERV); | ||
4416 | miimon = BOND_LINK_MON_INTERV; | ||
4417 | } | ||
4418 | |||
4419 | if (updelay < 0) { | ||
4420 | printk(KERN_WARNING DRV_NAME | ||
4421 | ": Warning: updelay module parameter (%d), " | ||
4422 | "not in range 0-%d, so it was reset to 0\n", | ||
4423 | updelay, INT_MAX); | ||
4424 | updelay = 0; | ||
4425 | } | ||
4426 | |||
4427 | if (downdelay < 0) { | ||
4428 | printk(KERN_WARNING DRV_NAME | ||
4429 | ": Warning: downdelay module parameter (%d), " | ||
4430 | "not in range 0-%d, so it was reset to 0\n", | ||
4431 | downdelay, INT_MAX); | ||
4432 | downdelay = 0; | ||
4433 | } | ||
4434 | |||
4435 | if ((use_carrier != 0) && (use_carrier != 1)) { | ||
4436 | printk(KERN_WARNING DRV_NAME | ||
4437 | ": Warning: use_carrier module parameter (%d), " | ||
4438 | "not of valid value (0/1), so it was set to 1\n", | ||
4439 | use_carrier); | ||
4440 | use_carrier = 1; | ||
4441 | } | ||
4442 | |||
4443 | /* reset values for 802.3ad */ | ||
4444 | if (bond_mode == BOND_MODE_8023AD) { | ||
4445 | if (!miimon) { | ||
4446 | printk(KERN_WARNING DRV_NAME | ||
4447 | ": Warning: miimon must be specified, " | ||
4448 | "otherwise bonding will not detect link " | ||
4449 | "failure, speed and duplex which are " | ||
4450 | "essential for 802.3ad operation\n"); | ||
4451 | printk(KERN_WARNING "Forcing miimon to 100msec\n"); | ||
4452 | miimon = 100; | ||
4453 | } | ||
4454 | } | ||
4455 | |||
4456 | /* reset values for TLB/ALB */ | ||
4457 | if ((bond_mode == BOND_MODE_TLB) || | ||
4458 | (bond_mode == BOND_MODE_ALB)) { | ||
4459 | if (!miimon) { | ||
4460 | printk(KERN_WARNING DRV_NAME | ||
4461 | ": Warning: miimon must be specified, " | ||
4462 | "otherwise bonding will not detect link " | ||
4463 | "failure and link speed which are essential " | ||
4464 | "for TLB/ALB load balancing\n"); | ||
4465 | printk(KERN_WARNING "Forcing miimon to 100msec\n"); | ||
4466 | miimon = 100; | ||
4467 | } | ||
4468 | } | ||
4469 | |||
4470 | if (bond_mode == BOND_MODE_ALB) { | ||
4471 | printk(KERN_NOTICE DRV_NAME | ||
4472 | ": In ALB mode you might experience client " | ||
4473 | "disconnections upon reconnection of a link if the " | ||
4474 | "bonding module updelay parameter (%d msec) is " | ||
4475 | "incompatible with the forwarding delay time of the " | ||
4476 | "switch\n", | ||
4477 | updelay); | ||
4478 | } | ||
4479 | |||
4480 | if (!miimon) { | ||
4481 | if (updelay || downdelay) { | ||
4482 | /* just warn the user the up/down delay will have | ||
4483 | * no effect since miimon is zero... | ||
4484 | */ | ||
4485 | printk(KERN_WARNING DRV_NAME | ||
4486 | ": Warning: miimon module parameter not set " | ||
4487 | "and updelay (%d) or downdelay (%d) module " | ||
4488 | "parameter is set; updelay and downdelay have " | ||
4489 | "no effect unless miimon is set\n", | ||
4490 | updelay, downdelay); | ||
4491 | } | ||
4492 | } else { | ||
4493 | /* don't allow arp monitoring */ | ||
4494 | if (arp_interval) { | ||
4495 | printk(KERN_WARNING DRV_NAME | ||
4496 | ": Warning: miimon (%d) and arp_interval (%d) " | ||
4497 | "can't be used simultaneously, disabling ARP " | ||
4498 | "monitoring\n", | ||
4499 | miimon, arp_interval); | ||
4500 | arp_interval = 0; | ||
4501 | } | ||
4502 | |||
4503 | if ((updelay % miimon) != 0) { | ||
4504 | printk(KERN_WARNING DRV_NAME | ||
4505 | ": Warning: updelay (%d) is not a multiple " | ||
4506 | "of miimon (%d), updelay rounded to %d ms\n", | ||
4507 | updelay, miimon, (updelay / miimon) * miimon); | ||
4508 | } | ||
4509 | |||
4510 | updelay /= miimon; | ||
4511 | |||
4512 | if ((downdelay % miimon) != 0) { | ||
4513 | printk(KERN_WARNING DRV_NAME | ||
4514 | ": Warning: downdelay (%d) is not a multiple " | ||
4515 | "of miimon (%d), downdelay rounded to %d ms\n", | ||
4516 | downdelay, miimon, | ||
4517 | (downdelay / miimon) * miimon); | ||
4518 | } | ||
4519 | |||
4520 | downdelay /= miimon; | ||
4521 | } | ||
4522 | |||
4523 | if (arp_interval < 0) { | ||
4524 | printk(KERN_WARNING DRV_NAME | ||
4525 | ": Warning: arp_interval module parameter (%d) " | ||
4526 | ", not in range 0-%d, so it was reset to %d\n", | ||
4527 | arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); | ||
4528 | arp_interval = BOND_LINK_ARP_INTERV; | ||
4529 | } | ||
4530 | |||
4531 | for (arp_ip_count = 0; | ||
4532 | (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; | ||
4533 | arp_ip_count++) { | ||
4534 | /* not complete check, but should be good enough to | ||
4535 | catch mistakes */ | ||
4536 | if (!isdigit(arp_ip_target[arp_ip_count][0])) { | ||
4537 | printk(KERN_WARNING DRV_NAME | ||
4538 | ": Warning: bad arp_ip_target module parameter " | ||
4539 | "(%s), ARP monitoring will not be performed\n", | ||
4540 | arp_ip_target[arp_ip_count]); | ||
4541 | arp_interval = 0; | ||
4542 | } else { | ||
4543 | u32 ip = in_aton(arp_ip_target[arp_ip_count]); | ||
4544 | arp_target[arp_ip_count] = ip; | ||
4545 | } | ||
4546 | } | ||
4547 | |||
4548 | if (arp_interval && !arp_ip_count) { | ||
4549 | /* don't allow arping if no arp_ip_target given... */ | ||
4550 | printk(KERN_WARNING DRV_NAME | ||
4551 | ": Warning: arp_interval module parameter (%d) " | ||
4552 | "specified without providing an arp_ip_target " | ||
4553 | "parameter, arp_interval was reset to 0\n", | ||
4554 | arp_interval); | ||
4555 | arp_interval = 0; | ||
4556 | } | ||
4557 | |||
4558 | if (miimon) { | ||
4559 | printk(KERN_INFO DRV_NAME | ||
4560 | ": MII link monitoring set to %d ms\n", | ||
4561 | miimon); | ||
4562 | } else if (arp_interval) { | ||
4563 | int i; | ||
4564 | |||
4565 | printk(KERN_INFO DRV_NAME | ||
4566 | ": ARP monitoring set to %d ms with %d target(s):", | ||
4567 | arp_interval, arp_ip_count); | ||
4568 | |||
4569 | for (i = 0; i < arp_ip_count; i++) | ||
4570 | printk (" %s", arp_ip_target[i]); | ||
4571 | |||
4572 | printk("\n"); | ||
4573 | |||
4574 | } else { | ||
4575 | /* miimon and arp_interval not set, we need one so things | ||
4576 | * work as expected, see bonding.txt for details | ||
4577 | */ | ||
4578 | printk(KERN_WARNING DRV_NAME | ||
4579 | ": Warning: either miimon or arp_interval and " | ||
4580 | "arp_ip_target module parameters must be specified, " | ||
4581 | "otherwise bonding will not detect link failures! see " | ||
4582 | "bonding.txt for details.\n"); | ||
4583 | } | ||
4584 | |||
4585 | if (primary && !USES_PRIMARY(bond_mode)) { | ||
4586 | /* currently, using a primary only makes sense | ||
4587 | * in active backup, TLB or ALB modes | ||
4588 | */ | ||
4589 | printk(KERN_WARNING DRV_NAME | ||
4590 | ": Warning: %s primary device specified but has no " | ||
4591 | "effect in %s mode\n", | ||
4592 | primary, bond_mode_name(bond_mode)); | ||
4593 | primary = NULL; | ||
4594 | } | ||
4595 | |||
4596 | /* fill params struct with the proper values */ | ||
4597 | params->mode = bond_mode; | ||
4598 | params->miimon = miimon; | ||
4599 | params->arp_interval = arp_interval; | ||
4600 | params->updelay = updelay; | ||
4601 | params->downdelay = downdelay; | ||
4602 | params->use_carrier = use_carrier; | ||
4603 | params->lacp_fast = lacp_fast; | ||
4604 | params->primary[0] = 0; | ||
4605 | |||
4606 | if (primary) { | ||
4607 | strncpy(params->primary, primary, IFNAMSIZ); | ||
4608 | params->primary[IFNAMSIZ - 1] = 0; | ||
4609 | } | ||
4610 | |||
4611 | memcpy(params->arp_targets, arp_target, sizeof(arp_target)); | ||
4612 | |||
4613 | return 0; | ||
4614 | } | ||
4615 | |||
4616 | static int __init bonding_init(void) | ||
4617 | { | ||
4618 | struct bond_params params; | ||
4619 | int i; | ||
4620 | int res; | ||
4621 | |||
4622 | printk(KERN_INFO "%s", version); | ||
4623 | |||
4624 | res = bond_check_params(¶ms); | ||
4625 | if (res) { | ||
4626 | return res; | ||
4627 | } | ||
4628 | |||
4629 | rtnl_lock(); | ||
4630 | |||
4631 | #ifdef CONFIG_PROC_FS | ||
4632 | bond_create_proc_dir(); | ||
4633 | #endif | ||
4634 | |||
4635 | for (i = 0; i < max_bonds; i++) { | ||
4636 | struct net_device *bond_dev; | ||
4637 | |||
4638 | bond_dev = alloc_netdev(sizeof(struct bonding), "", ether_setup); | ||
4639 | if (!bond_dev) { | ||
4640 | res = -ENOMEM; | ||
4641 | goto out_err; | ||
4642 | } | ||
4643 | |||
4644 | res = dev_alloc_name(bond_dev, "bond%d"); | ||
4645 | if (res < 0) { | ||
4646 | free_netdev(bond_dev); | ||
4647 | goto out_err; | ||
4648 | } | ||
4649 | |||
4650 | /* bond_init() must be called after dev_alloc_name() (for the | ||
4651 | * /proc files), but before register_netdevice(), because we | ||
4652 | * need to set function pointers. | ||
4653 | */ | ||
4654 | res = bond_init(bond_dev, ¶ms); | ||
4655 | if (res < 0) { | ||
4656 | free_netdev(bond_dev); | ||
4657 | goto out_err; | ||
4658 | } | ||
4659 | |||
4660 | SET_MODULE_OWNER(bond_dev); | ||
4661 | |||
4662 | res = register_netdevice(bond_dev); | ||
4663 | if (res < 0) { | ||
4664 | bond_deinit(bond_dev); | ||
4665 | free_netdev(bond_dev); | ||
4666 | goto out_err; | ||
4667 | } | ||
4668 | } | ||
4669 | |||
4670 | rtnl_unlock(); | ||
4671 | register_netdevice_notifier(&bond_netdev_notifier); | ||
4672 | |||
4673 | return 0; | ||
4674 | |||
4675 | out_err: | ||
4676 | /* free and unregister all bonds that were successfully added */ | ||
4677 | bond_free_all(); | ||
4678 | |||
4679 | rtnl_unlock(); | ||
4680 | |||
4681 | return res; | ||
4682 | } | ||
4683 | |||
4684 | static void __exit bonding_exit(void) | ||
4685 | { | ||
4686 | unregister_netdevice_notifier(&bond_netdev_notifier); | ||
4687 | |||
4688 | rtnl_lock(); | ||
4689 | bond_free_all(); | ||
4690 | rtnl_unlock(); | ||
4691 | } | ||
4692 | |||
4693 | module_init(bonding_init); | ||
4694 | module_exit(bonding_exit); | ||
4695 | MODULE_LICENSE("GPL"); | ||
4696 | MODULE_VERSION(DRV_VERSION); | ||
4697 | MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); | ||
4698 | MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); | ||
4699 | MODULE_SUPPORTED_DEVICE("most ethernet devices"); | ||
4700 | |||
4701 | /* | ||
4702 | * Local variables: | ||
4703 | * c-indent-level: 8 | ||
4704 | * c-basic-offset: 8 | ||
4705 | * tab-width: 8 | ||
4706 | * End: | ||
4707 | */ | ||
4708 | |||
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h new file mode 100644 index 000000000000..8c325308489d --- /dev/null +++ b/drivers/net/bonding/bonding.h | |||
@@ -0,0 +1,252 @@ | |||
1 | /* | ||
2 | * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. | ||
3 | * | ||
4 | * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes | ||
5 | * NCM: Network and Communications Management, Inc. | ||
6 | * | ||
7 | * BUT, I'm the one who modified it for ethernet, so: | ||
8 | * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov | ||
9 | * | ||
10 | * This software may be used and distributed according to the terms | ||
11 | * of the GNU Public License, incorporated herein by reference. | ||
12 | * | ||
13 | * | ||
14 | * 2003/03/18 - Amir Noam <amir.noam at intel dot com>, | ||
15 | * Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
16 | * Shmulik Hen <shmulik.hen at intel dot com> | ||
17 | * - Added support for IEEE 802.3ad Dynamic link aggregation mode. | ||
18 | * | ||
19 | * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and | ||
20 | * Amir Noam <amir.noam at intel dot com> | ||
21 | * - Code beautification and style changes (mainly in comments). | ||
22 | * | ||
23 | * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
24 | * - Added support for Transmit load balancing mode. | ||
25 | * | ||
26 | * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> | ||
27 | * - Code cleanup and style changes | ||
28 | */ | ||
29 | |||
30 | #ifndef _LINUX_BONDING_H | ||
31 | #define _LINUX_BONDING_H | ||
32 | |||
33 | #include <linux/timer.h> | ||
34 | #include <linux/proc_fs.h> | ||
35 | #include <linux/if_bonding.h> | ||
36 | #include "bond_3ad.h" | ||
37 | #include "bond_alb.h" | ||
38 | |||
39 | #define DRV_VERSION "2.6.1" | ||
40 | #define DRV_RELDATE "October 29, 2004" | ||
41 | #define DRV_NAME "bonding" | ||
42 | #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" | ||
43 | |||
44 | #define BOND_MAX_ARP_TARGETS 16 | ||
45 | |||
46 | #ifdef BONDING_DEBUG | ||
47 | #define dprintk(fmt, args...) \ | ||
48 | printk(KERN_DEBUG \ | ||
49 | DRV_NAME ": %s() %d: " fmt, __FUNCTION__, __LINE__ , ## args ) | ||
50 | #else | ||
51 | #define dprintk(fmt, args...) | ||
52 | #endif /* BONDING_DEBUG */ | ||
53 | |||
54 | #define IS_UP(dev) \ | ||
55 | ((((dev)->flags & IFF_UP) == IFF_UP) && \ | ||
56 | netif_running(dev) && \ | ||
57 | netif_carrier_ok(dev)) | ||
58 | |||
59 | /* | ||
60 | * Checks whether bond is ready for transmit. | ||
61 | * | ||
62 | * Caller must hold bond->lock | ||
63 | */ | ||
64 | #define BOND_IS_OK(bond) \ | ||
65 | (((bond)->dev->flags & IFF_UP) && \ | ||
66 | netif_running((bond)->dev) && \ | ||
67 | ((bond)->slave_cnt > 0)) | ||
68 | |||
69 | /* | ||
70 | * Checks whether slave is ready for transmit. | ||
71 | */ | ||
72 | #define SLAVE_IS_OK(slave) \ | ||
73 | (((slave)->dev->flags & IFF_UP) && \ | ||
74 | netif_running((slave)->dev) && \ | ||
75 | ((slave)->link == BOND_LINK_UP) && \ | ||
76 | ((slave)->state == BOND_STATE_ACTIVE)) | ||
77 | |||
78 | |||
79 | #define USES_PRIMARY(mode) \ | ||
80 | (((mode) == BOND_MODE_ACTIVEBACKUP) || \ | ||
81 | ((mode) == BOND_MODE_TLB) || \ | ||
82 | ((mode) == BOND_MODE_ALB)) | ||
83 | |||
84 | /* | ||
85 | * Less bad way to call ioctl from within the kernel; this needs to be | ||
86 | * done some other way to get the call out of interrupt context. | ||
87 | * Needs "ioctl" variable to be supplied by calling context. | ||
88 | */ | ||
89 | #define IOCTL(dev, arg, cmd) ({ \ | ||
90 | int res = 0; \ | ||
91 | mm_segment_t fs = get_fs(); \ | ||
92 | set_fs(get_ds()); \ | ||
93 | res = ioctl(dev, arg, cmd); \ | ||
94 | set_fs(fs); \ | ||
95 | res; }) | ||
96 | |||
97 | /** | ||
98 | * bond_for_each_slave_from - iterate the slaves list from a starting point | ||
99 | * @bond: the bond holding this list. | ||
100 | * @pos: current slave. | ||
101 | * @cnt: counter for max number of moves | ||
102 | * @start: starting point. | ||
103 | * | ||
104 | * Caller must hold bond->lock | ||
105 | */ | ||
106 | #define bond_for_each_slave_from(bond, pos, cnt, start) \ | ||
107 | for (cnt = 0, pos = start; \ | ||
108 | cnt < (bond)->slave_cnt; \ | ||
109 | cnt++, pos = (pos)->next) | ||
110 | |||
111 | /** | ||
112 | * bond_for_each_slave_from_to - iterate the slaves list from start point to stop point | ||
113 | * @bond: the bond holding this list. | ||
114 | * @pos: current slave. | ||
115 | * @cnt: counter for number max of moves | ||
116 | * @start: start point. | ||
117 | * @stop: stop point. | ||
118 | * | ||
119 | * Caller must hold bond->lock | ||
120 | */ | ||
121 | #define bond_for_each_slave_from_to(bond, pos, cnt, start, stop) \ | ||
122 | for (cnt = 0, pos = start; \ | ||
123 | ((cnt < (bond)->slave_cnt) && (pos != (stop)->next)); \ | ||
124 | cnt++, pos = (pos)->next) | ||
125 | |||
126 | /** | ||
127 | * bond_for_each_slave - iterate the slaves list from head | ||
128 | * @bond: the bond holding this list. | ||
129 | * @pos: current slave. | ||
130 | * @cnt: counter for max number of moves | ||
131 | * | ||
132 | * Caller must hold bond->lock | ||
133 | */ | ||
134 | #define bond_for_each_slave(bond, pos, cnt) \ | ||
135 | bond_for_each_slave_from(bond, pos, cnt, (bond)->first_slave) | ||
136 | |||
137 | |||
138 | struct bond_params { | ||
139 | int mode; | ||
140 | int miimon; | ||
141 | int arp_interval; | ||
142 | int use_carrier; | ||
143 | int updelay; | ||
144 | int downdelay; | ||
145 | int lacp_fast; | ||
146 | char primary[IFNAMSIZ]; | ||
147 | u32 arp_targets[BOND_MAX_ARP_TARGETS]; | ||
148 | }; | ||
149 | |||
150 | struct vlan_entry { | ||
151 | struct list_head vlan_list; | ||
152 | unsigned short vlan_id; | ||
153 | }; | ||
154 | |||
155 | struct slave { | ||
156 | struct net_device *dev; /* first - usefull for panic debug */ | ||
157 | struct slave *next; | ||
158 | struct slave *prev; | ||
159 | s16 delay; | ||
160 | u32 jiffies; | ||
161 | s8 link; /* one of BOND_LINK_XXXX */ | ||
162 | s8 state; /* one of BOND_STATE_XXXX */ | ||
163 | u32 original_flags; | ||
164 | u32 link_failure_count; | ||
165 | u16 speed; | ||
166 | u8 duplex; | ||
167 | u8 perm_hwaddr[ETH_ALEN]; | ||
168 | struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */ | ||
169 | struct tlb_slave_info tlb_info; | ||
170 | }; | ||
171 | |||
172 | /* | ||
173 | * Here are the locking policies for the two bonding locks: | ||
174 | * | ||
175 | * 1) Get bond->lock when reading/writing slave list. | ||
176 | * 2) Get bond->curr_slave_lock when reading/writing bond->curr_active_slave. | ||
177 | * (It is unnecessary when the write-lock is put with bond->lock.) | ||
178 | * 3) When we lock with bond->curr_slave_lock, we must lock with bond->lock | ||
179 | * beforehand. | ||
180 | */ | ||
181 | struct bonding { | ||
182 | struct net_device *dev; /* first - usefull for panic debug */ | ||
183 | struct slave *first_slave; | ||
184 | struct slave *curr_active_slave; | ||
185 | struct slave *current_arp_slave; | ||
186 | struct slave *primary_slave; | ||
187 | s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ | ||
188 | rwlock_t lock; | ||
189 | rwlock_t curr_slave_lock; | ||
190 | struct timer_list mii_timer; | ||
191 | struct timer_list arp_timer; | ||
192 | s8 kill_timers; | ||
193 | struct net_device_stats stats; | ||
194 | #ifdef CONFIG_PROC_FS | ||
195 | struct proc_dir_entry *proc_entry; | ||
196 | char proc_file_name[IFNAMSIZ]; | ||
197 | #endif /* CONFIG_PROC_FS */ | ||
198 | struct list_head bond_list; | ||
199 | struct dev_mc_list *mc_list; | ||
200 | u16 flags; | ||
201 | struct ad_bond_info ad_info; | ||
202 | struct alb_bond_info alb_info; | ||
203 | struct bond_params params; | ||
204 | struct list_head vlan_list; | ||
205 | struct vlan_group *vlgrp; | ||
206 | }; | ||
207 | |||
208 | /** | ||
209 | * Returns NULL if the net_device does not belong to any of the bond's slaves | ||
210 | * | ||
211 | * Caller must hold bond lock for read | ||
212 | */ | ||
213 | extern inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev) | ||
214 | { | ||
215 | struct slave *slave = NULL; | ||
216 | int i; | ||
217 | |||
218 | bond_for_each_slave(bond, slave, i) { | ||
219 | if (slave->dev == slave_dev) { | ||
220 | break; | ||
221 | } | ||
222 | } | ||
223 | |||
224 | return slave; | ||
225 | } | ||
226 | |||
227 | extern inline struct bonding *bond_get_bond_by_slave(struct slave *slave) | ||
228 | { | ||
229 | if (!slave || !slave->dev->master) { | ||
230 | return NULL; | ||
231 | } | ||
232 | |||
233 | return (struct bonding *)slave->dev->master->priv; | ||
234 | } | ||
235 | |||
236 | extern inline void bond_set_slave_inactive_flags(struct slave *slave) | ||
237 | { | ||
238 | slave->state = BOND_STATE_BACKUP; | ||
239 | slave->dev->flags |= IFF_NOARP; | ||
240 | } | ||
241 | |||
242 | extern inline void bond_set_slave_active_flags(struct slave *slave) | ||
243 | { | ||
244 | slave->state = BOND_STATE_ACTIVE; | ||
245 | slave->dev->flags &= ~IFF_NOARP; | ||
246 | } | ||
247 | |||
248 | struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); | ||
249 | int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); | ||
250 | |||
251 | #endif /* _LINUX_BONDING_H */ | ||
252 | |||