]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/net/bonding/bond_main.c
bond: Simplify bond device destruction
[net-next-2.6.git] / drivers / net / bonding / bond_main.c
CommitLineData
1da177e4
LT
1/*
2 * originally based on the dummy device.
3 *
4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov.
5 * Licensed under the GPL. Based on dummy.c, and eql.c devices.
6 *
7 * bonding.c: an Ethernet Bonding driver
8 *
9 * This is useful to talk to a Cisco EtherChannel compatible equipment:
10 * Cisco 5500
11 * Sun Trunking (Solaris)
12 * Alteon AceDirector Trunks
13 * Linux Bonding
14 * and probably many L2 switches ...
15 *
16 * How it works:
17 * ifconfig bond0 ipaddress netmask up
18 * will setup a network device, with an ip address. No mac address
19 * will be assigned at this time. The hw mac address will come from
20 * the first slave bonded to the channel. All slaves will then use
21 * this hw mac address.
22 *
23 * ifconfig bond0 down
24 * will release all slaves, marking them as down.
25 *
26 * ifenslave bond0 eth0
27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either
28 * a: be used as initial mac address
29 * b: if a hw mac address already is there, eth0's hw mac address
30 * will then be set from bond0.
31 *
1da177e4
LT
32 */
33
1da177e4
LT
34#include <linux/kernel.h>
35#include <linux/module.h>
1da177e4
LT
36#include <linux/types.h>
37#include <linux/fcntl.h>
38#include <linux/interrupt.h>
39#include <linux/ptrace.h>
40#include <linux/ioport.h>
41#include <linux/in.h>
169a3e66 42#include <net/ip.h>
1da177e4 43#include <linux/ip.h>
169a3e66
JV
44#include <linux/tcp.h>
45#include <linux/udp.h>
1da177e4
LT
46#include <linux/slab.h>
47#include <linux/string.h>
48#include <linux/init.h>
49#include <linux/timer.h>
50#include <linux/socket.h>
51#include <linux/ctype.h>
52#include <linux/inet.h>
53#include <linux/bitops.h>
3d632c3f 54#include <linux/io.h>
1da177e4 55#include <asm/system.h>
1da177e4 56#include <asm/dma.h>
3d632c3f 57#include <linux/uaccess.h>
1da177e4
LT
58#include <linux/errno.h>
59#include <linux/netdevice.h>
60#include <linux/inetdevice.h>
a816c7c7 61#include <linux/igmp.h>
1da177e4
LT
62#include <linux/etherdevice.h>
63#include <linux/skbuff.h>
64#include <net/sock.h>
65#include <linux/rtnetlink.h>
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
68#include <linux/smp.h>
69#include <linux/if_ether.h>
70#include <net/arp.h>
71#include <linux/mii.h>
72#include <linux/ethtool.h>
73#include <linux/if_vlan.h>
74#include <linux/if_bonding.h>
b63bb739 75#include <linux/jiffies.h>
c3ade5ca 76#include <net/route.h>
457c4cbc 77#include <net/net_namespace.h>
1da177e4
LT
78#include "bonding.h"
79#include "bond_3ad.h"
80#include "bond_alb.h"
81
82/*---------------------------- Module parameters ----------------------------*/
83
84/* monitor all links that often (in milliseconds). <=0 disables monitoring */
85#define BOND_LINK_MON_INTERV 0
86#define BOND_LINK_ARP_INTERV 0
87
88static int max_bonds = BOND_DEFAULT_MAX_BONDS;
7893b249 89static int num_grat_arp = 1;
305d552a 90static int num_unsol_na = 1;
1da177e4 91static int miimon = BOND_LINK_MON_INTERV;
3d632c3f
SH
92static int updelay;
93static int downdelay;
1da177e4 94static int use_carrier = 1;
3d632c3f
SH
95static char *mode;
96static char *primary;
a549952a 97static char *primary_reselect;
3d632c3f
SH
98static char *lacp_rate;
99static char *ad_select;
100static char *xmit_hash_policy;
1da177e4 101static int arp_interval = BOND_LINK_ARP_INTERV;
3d632c3f
SH
102static char *arp_ip_target[BOND_MAX_ARP_TARGETS];
103static char *arp_validate;
104static char *fail_over_mac;
d2991f75 105static struct bond_params bonding_defaults;
1da177e4
LT
106
107module_param(max_bonds, int, 0);
108MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
7893b249
MS
109module_param(num_grat_arp, int, 0644);
110MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
305d552a
BH
111module_param(num_unsol_na, int, 0644);
112MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event");
1da177e4
LT
113module_param(miimon, int, 0);
114MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
115module_param(updelay, int, 0);
116MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
117module_param(downdelay, int, 0);
2ac47660
MW
118MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
119 "in milliseconds");
1da177e4 120module_param(use_carrier, int, 0);
2ac47660
MW
121MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
122 "0 for off, 1 for on (default)");
1da177e4 123module_param(mode, charp, 0);
2ac47660
MW
124MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
125 "1 for active-backup, 2 for balance-xor, "
126 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
127 "6 for balance-alb");
1da177e4
LT
128module_param(primary, charp, 0);
129MODULE_PARM_DESC(primary, "Primary network device to use");
a549952a
JP
130module_param(primary_reselect, charp, 0);
131MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
132 "once it comes up; "
133 "0 for always (default), "
134 "1 for only if speed of primary is "
135 "better, "
136 "2 for only on active slave "
137 "failure");
1da177e4 138module_param(lacp_rate, charp, 0);
2ac47660
MW
139MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
140 "(slow/fast)");
fd989c83
JV
141module_param(ad_select, charp, 0);
142MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic: stable (0, default), bandwidth (1), count (2)");
169a3e66 143module_param(xmit_hash_policy, charp, 0);
2ac47660
MW
144MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)"
145 ", 1 for layer 3+4");
1da177e4
LT
146module_param(arp_interval, int, 0);
147MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
148module_param_array(arp_ip_target, charp, NULL, 0);
149MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
f5b2b966
JV
150module_param(arp_validate, charp, 0);
151MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
3915c1e8
JV
152module_param(fail_over_mac, charp, 0);
153MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. none (default), active or follow");
1da177e4
LT
154
155/*----------------------------- Global variables ----------------------------*/
156
f71e1309 157static const char * const version =
1da177e4
LT
158 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
159
12479f9a 160LIST_HEAD(bond_dev_list);
1da177e4
LT
161
162#ifdef CONFIG_PROC_FS
3d632c3f 163static struct proc_dir_entry *bond_proc_dir;
1da177e4
LT
164#endif
165
3d632c3f
SH
166static __be32 arp_target[BOND_MAX_ARP_TARGETS];
167static int arp_ip_count;
1da177e4 168static int bond_mode = BOND_MODE_ROUNDROBIN;
3d632c3f
SH
169static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
170static int lacp_fast;
217df670 171
1da177e4 172
e97fd7c6 173const struct bond_parm_tbl bond_lacp_tbl[] = {
1da177e4
LT
174{ "slow", AD_LACP_SLOW},
175{ "fast", AD_LACP_FAST},
176{ NULL, -1},
177};
178
e97fd7c6 179const struct bond_parm_tbl bond_mode_tbl[] = {
1da177e4
LT
180{ "balance-rr", BOND_MODE_ROUNDROBIN},
181{ "active-backup", BOND_MODE_ACTIVEBACKUP},
182{ "balance-xor", BOND_MODE_XOR},
183{ "broadcast", BOND_MODE_BROADCAST},
184{ "802.3ad", BOND_MODE_8023AD},
185{ "balance-tlb", BOND_MODE_TLB},
186{ "balance-alb", BOND_MODE_ALB},
187{ NULL, -1},
188};
189
e97fd7c6 190const struct bond_parm_tbl xmit_hashtype_tbl[] = {
169a3e66
JV
191{ "layer2", BOND_XMIT_POLICY_LAYER2},
192{ "layer3+4", BOND_XMIT_POLICY_LAYER34},
6f6652be 193{ "layer2+3", BOND_XMIT_POLICY_LAYER23},
169a3e66
JV
194{ NULL, -1},
195};
196
e97fd7c6 197const struct bond_parm_tbl arp_validate_tbl[] = {
f5b2b966
JV
198{ "none", BOND_ARP_VALIDATE_NONE},
199{ "active", BOND_ARP_VALIDATE_ACTIVE},
200{ "backup", BOND_ARP_VALIDATE_BACKUP},
201{ "all", BOND_ARP_VALIDATE_ALL},
202{ NULL, -1},
203};
204
e97fd7c6 205const struct bond_parm_tbl fail_over_mac_tbl[] = {
3915c1e8
JV
206{ "none", BOND_FOM_NONE},
207{ "active", BOND_FOM_ACTIVE},
208{ "follow", BOND_FOM_FOLLOW},
209{ NULL, -1},
210};
211
a549952a
JP
212const struct bond_parm_tbl pri_reselect_tbl[] = {
213{ "always", BOND_PRI_RESELECT_ALWAYS},
214{ "better", BOND_PRI_RESELECT_BETTER},
215{ "failure", BOND_PRI_RESELECT_FAILURE},
216{ NULL, -1},
217};
218
fd989c83
JV
219struct bond_parm_tbl ad_select_tbl[] = {
220{ "stable", BOND_AD_STABLE},
221{ "bandwidth", BOND_AD_BANDWIDTH},
222{ "count", BOND_AD_COUNT},
223{ NULL, -1},
224};
225
1da177e4
LT
226/*-------------------------- Forward declarations ---------------------------*/
227
c3ade5ca 228static void bond_send_gratuitous_arp(struct bonding *bond);
181470fc 229static int bond_init(struct net_device *bond_dev);
c67dfb29 230static void bond_uninit(struct net_device *bond_dev);
1da177e4
LT
231
232/*---------------------------- General routines -----------------------------*/
233
4ad072c9 234static const char *bond_mode_name(int mode)
1da177e4 235{
77afc92b
HE
236 static const char *names[] = {
237 [BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)",
238 [BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)",
239 [BOND_MODE_XOR] = "load balancing (xor)",
240 [BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)",
3d632c3f 241 [BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation",
77afc92b
HE
242 [BOND_MODE_TLB] = "transmit load balancing",
243 [BOND_MODE_ALB] = "adaptive load balancing",
244 };
245
246 if (mode < 0 || mode > BOND_MODE_ALB)
1da177e4 247 return "unknown";
77afc92b
HE
248
249 return names[mode];
1da177e4
LT
250}
251
252/*---------------------------------- VLAN -----------------------------------*/
253
254/**
255 * bond_add_vlan - add a new vlan id on bond
256 * @bond: bond that got the notification
257 * @vlan_id: the vlan id to add
258 *
259 * Returns -ENOMEM if allocation failed.
260 */
261static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
262{
263 struct vlan_entry *vlan;
264
5a03cdb7 265 pr_debug("bond: %s, vlan id %d\n",
3d632c3f 266 (bond ? bond->dev->name : "None"), vlan_id);
1da177e4 267
305d552a 268 vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
3d632c3f 269 if (!vlan)
1da177e4 270 return -ENOMEM;
1da177e4
LT
271
272 INIT_LIST_HEAD(&vlan->vlan_list);
273 vlan->vlan_id = vlan_id;
274
275 write_lock_bh(&bond->lock);
276
277 list_add_tail(&vlan->vlan_list, &bond->vlan_list);
278
279 write_unlock_bh(&bond->lock);
280
5a03cdb7 281 pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
1da177e4
LT
282
283 return 0;
284}
285
286/**
287 * bond_del_vlan - delete a vlan id from bond
288 * @bond: bond that got the notification
289 * @vlan_id: the vlan id to delete
290 *
291 * returns -ENODEV if @vlan_id was not found in @bond.
292 */
293static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
294{
0883beca 295 struct vlan_entry *vlan;
1da177e4
LT
296 int res = -ENODEV;
297
5a03cdb7 298 pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
1da177e4
LT
299
300 write_lock_bh(&bond->lock);
301
0883beca 302 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
1da177e4
LT
303 if (vlan->vlan_id == vlan_id) {
304 list_del(&vlan->vlan_list);
305
58402054 306 if (bond_is_lb(bond))
1da177e4 307 bond_alb_clear_vlan(bond, vlan_id);
1da177e4 308
5a03cdb7 309 pr_debug("removed VLAN ID %d from bond %s\n", vlan_id,
1da177e4
LT
310 bond->dev->name);
311
312 kfree(vlan);
313
314 if (list_empty(&bond->vlan_list) &&
315 (bond->slave_cnt == 0)) {
316 /* Last VLAN removed and no slaves, so
317 * restore block on adding VLANs. This will
318 * be removed once new slaves that are not
319 * VLAN challenged will be added.
320 */
321 bond->dev->features |= NETIF_F_VLAN_CHALLENGED;
322 }
323
324 res = 0;
325 goto out;
326 }
327 }
328
5a03cdb7 329 pr_debug("couldn't find VLAN ID %d in bond %s\n", vlan_id,
1da177e4
LT
330 bond->dev->name);
331
332out:
333 write_unlock_bh(&bond->lock);
334 return res;
335}
336
337/**
338 * bond_has_challenged_slaves
339 * @bond: the bond we're working on
340 *
341 * Searches the slave list. Returns 1 if a vlan challenged slave
342 * was found, 0 otherwise.
343 *
344 * Assumes bond->lock is held.
345 */
346static int bond_has_challenged_slaves(struct bonding *bond)
347{
348 struct slave *slave;
349 int i;
350
351 bond_for_each_slave(bond, slave, i) {
352 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) {
5a03cdb7 353 pr_debug("found VLAN challenged slave - %s\n",
1da177e4
LT
354 slave->dev->name);
355 return 1;
356 }
357 }
358
5a03cdb7 359 pr_debug("no VLAN challenged slaves found\n");
1da177e4
LT
360 return 0;
361}
362
363/**
364 * bond_next_vlan - safely skip to the next item in the vlans list.
365 * @bond: the bond we're working on
366 * @curr: item we're advancing from
367 *
368 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
369 * or @curr->next otherwise (even if it is @curr itself again).
3d632c3f 370 *
1da177e4
LT
371 * Caller must hold bond->lock
372 */
373struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
374{
375 struct vlan_entry *next, *last;
376
3d632c3f 377 if (list_empty(&bond->vlan_list))
1da177e4 378 return NULL;
1da177e4
LT
379
380 if (!curr) {
381 next = list_entry(bond->vlan_list.next,
382 struct vlan_entry, vlan_list);
383 } else {
384 last = list_entry(bond->vlan_list.prev,
385 struct vlan_entry, vlan_list);
386 if (last == curr) {
387 next = list_entry(bond->vlan_list.next,
388 struct vlan_entry, vlan_list);
389 } else {
390 next = list_entry(curr->vlan_list.next,
391 struct vlan_entry, vlan_list);
392 }
393 }
394
395 return next;
396}
397
398/**
399 * bond_dev_queue_xmit - Prepare skb for xmit.
3d632c3f 400 *
1da177e4
LT
401 * @bond: bond device that got this skb for tx.
402 * @skb: hw accel VLAN tagged skb to transmit
403 * @slave_dev: slave that is supposed to xmit this skbuff
3d632c3f 404 *
1da177e4
LT
405 * When the bond gets an skb to transmit that is
406 * already hardware accelerated VLAN tagged, and it
407 * needs to relay this skb to a slave that is not
408 * hw accel capable, the skb needs to be "unaccelerated",
409 * i.e. strip the hwaccel tag and re-insert it as part
410 * of the payload.
411 */
3d632c3f
SH
412int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
413 struct net_device *slave_dev)
1da177e4 414{
966bc6f4 415 unsigned short uninitialized_var(vlan_id);
1da177e4
LT
416
417 if (!list_empty(&bond->vlan_list) &&
418 !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
419 vlan_get_tag(skb, &vlan_id) == 0) {
420 skb->dev = slave_dev;
421 skb = vlan_put_tag(skb, vlan_id);
422 if (!skb) {
423 /* vlan_put_tag() frees the skb in case of error,
424 * so return success here so the calling functions
425 * won't attempt to free is again.
426 */
427 return 0;
428 }
429 } else {
430 skb->dev = slave_dev;
431 }
432
433 skb->priority = 1;
434 dev_queue_xmit(skb);
435
436 return 0;
437}
438
439/*
440 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid
441 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
442 * lock because:
443 * a. This operation is performed in IOCTL context,
444 * b. The operation is protected by the RTNL semaphore in the 8021q code,
445 * c. Holding a lock with BH disabled while directly calling a base driver
446 * entry point is generally a BAD idea.
3d632c3f 447 *
1da177e4
LT
448 * The design of synchronization/protection for this operation in the 8021q
449 * module is good for one or more VLAN devices over a single physical device
450 * and cannot be extended for a teaming solution like bonding, so there is a
451 * potential race condition here where a net device from the vlan group might
452 * be referenced (either by a base driver or the 8021q code) while it is being
453 * removed from the system. However, it turns out we're not making matters
454 * worse, and if it works for regular VLAN usage it will work here too.
455*/
456
457/**
458 * bond_vlan_rx_register - Propagates registration to slaves
459 * @bond_dev: bonding net device that got called
460 * @grp: vlan group being registered
461 */
3d632c3f
SH
462static void bond_vlan_rx_register(struct net_device *bond_dev,
463 struct vlan_group *grp)
1da177e4 464{
454d7c9b 465 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
466 struct slave *slave;
467 int i;
468
469 bond->vlgrp = grp;
470
471 bond_for_each_slave(bond, slave, i) {
472 struct net_device *slave_dev = slave->dev;
eb7cc59a 473 const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1da177e4
LT
474
475 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
eb7cc59a
SH
476 slave_ops->ndo_vlan_rx_register) {
477 slave_ops->ndo_vlan_rx_register(slave_dev, grp);
1da177e4
LT
478 }
479 }
480}
481
482/**
483 * bond_vlan_rx_add_vid - Propagates adding an id to slaves
484 * @bond_dev: bonding net device that got called
485 * @vid: vlan id being added
486 */
487static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
488{
454d7c9b 489 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
490 struct slave *slave;
491 int i, res;
492
493 bond_for_each_slave(bond, slave, i) {
494 struct net_device *slave_dev = slave->dev;
eb7cc59a 495 const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1da177e4
LT
496
497 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
eb7cc59a
SH
498 slave_ops->ndo_vlan_rx_add_vid) {
499 slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid);
1da177e4
LT
500 }
501 }
502
503 res = bond_add_vlan(bond, vid);
504 if (res) {
3d632c3f 505 pr_err(DRV_NAME
4e0952c7 506 ": %s: Error: Failed to add vlan id %d\n",
1da177e4
LT
507 bond_dev->name, vid);
508 }
509}
510
511/**
512 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
513 * @bond_dev: bonding net device that got called
514 * @vid: vlan id being removed
515 */
516static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
517{
454d7c9b 518 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
519 struct slave *slave;
520 struct net_device *vlan_dev;
521 int i, res;
522
523 bond_for_each_slave(bond, slave, i) {
524 struct net_device *slave_dev = slave->dev;
eb7cc59a 525 const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1da177e4
LT
526
527 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
eb7cc59a 528 slave_ops->ndo_vlan_rx_kill_vid) {
1da177e4
LT
529 /* Save and then restore vlan_dev in the grp array,
530 * since the slave's driver might clear it.
531 */
5c15bdec 532 vlan_dev = vlan_group_get_device(bond->vlgrp, vid);
eb7cc59a 533 slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid);
5c15bdec 534 vlan_group_set_device(bond->vlgrp, vid, vlan_dev);
1da177e4
LT
535 }
536 }
537
538 res = bond_del_vlan(bond, vid);
539 if (res) {
3d632c3f 540 pr_err(DRV_NAME
4e0952c7 541 ": %s: Error: Failed to remove vlan id %d\n",
1da177e4
LT
542 bond_dev->name, vid);
543 }
544}
545
546static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
547{
548 struct vlan_entry *vlan;
eb7cc59a 549 const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1da177e4
LT
550
551 write_lock_bh(&bond->lock);
552
eb7cc59a 553 if (list_empty(&bond->vlan_list))
1da177e4 554 goto out;
1da177e4
LT
555
556 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
eb7cc59a
SH
557 slave_ops->ndo_vlan_rx_register)
558 slave_ops->ndo_vlan_rx_register(slave_dev, bond->vlgrp);
1da177e4
LT
559
560 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
eb7cc59a 561 !(slave_ops->ndo_vlan_rx_add_vid))
1da177e4 562 goto out;
1da177e4 563
eb7cc59a
SH
564 list_for_each_entry(vlan, &bond->vlan_list, vlan_list)
565 slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id);
1da177e4
LT
566
567out:
568 write_unlock_bh(&bond->lock);
569}
570
3d632c3f
SH
571static void bond_del_vlans_from_slave(struct bonding *bond,
572 struct net_device *slave_dev)
1da177e4 573{
eb7cc59a 574 const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1da177e4
LT
575 struct vlan_entry *vlan;
576 struct net_device *vlan_dev;
577
578 write_lock_bh(&bond->lock);
579
eb7cc59a 580 if (list_empty(&bond->vlan_list))
1da177e4 581 goto out;
1da177e4
LT
582
583 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
eb7cc59a 584 !(slave_ops->ndo_vlan_rx_kill_vid))
1da177e4 585 goto unreg;
1da177e4
LT
586
587 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
588 /* Save and then restore vlan_dev in the grp array,
589 * since the slave's driver might clear it.
590 */
5c15bdec 591 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
eb7cc59a 592 slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
5c15bdec 593 vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev);
1da177e4
LT
594 }
595
596unreg:
597 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
eb7cc59a
SH
598 slave_ops->ndo_vlan_rx_register)
599 slave_ops->ndo_vlan_rx_register(slave_dev, NULL);
1da177e4
LT
600
601out:
602 write_unlock_bh(&bond->lock);
603}
604
605/*------------------------------- Link status -------------------------------*/
606
ff59c456
JV
607/*
608 * Set the carrier state for the master according to the state of its
609 * slaves. If any slaves are up, the master is up. In 802.3ad mode,
610 * do special 802.3ad magic.
611 *
612 * Returns zero if carrier state does not change, nonzero if it does.
613 */
614static int bond_set_carrier(struct bonding *bond)
615{
616 struct slave *slave;
617 int i;
618
619 if (bond->slave_cnt == 0)
620 goto down;
621
622 if (bond->params.mode == BOND_MODE_8023AD)
623 return bond_3ad_set_carrier(bond);
624
625 bond_for_each_slave(bond, slave, i) {
626 if (slave->link == BOND_LINK_UP) {
627 if (!netif_carrier_ok(bond->dev)) {
628 netif_carrier_on(bond->dev);
629 return 1;
630 }
631 return 0;
632 }
633 }
634
635down:
636 if (netif_carrier_ok(bond->dev)) {
637 netif_carrier_off(bond->dev);
638 return 1;
639 }
640 return 0;
641}
642
1da177e4
LT
643/*
644 * Get link speed and duplex from the slave's base driver
645 * using ethtool. If for some reason the call fails or the
646 * values are invalid, fake speed and duplex to 100/Full
647 * and return error.
648 */
649static int bond_update_speed_duplex(struct slave *slave)
650{
651 struct net_device *slave_dev = slave->dev;
1da177e4 652 struct ethtool_cmd etool;
61a44b9c 653 int res;
1da177e4
LT
654
655 /* Fake speed and duplex */
656 slave->speed = SPEED_100;
657 slave->duplex = DUPLEX_FULL;
658
61a44b9c
MW
659 if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings)
660 return -1;
1da177e4 661
61a44b9c
MW
662 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
663 if (res < 0)
1da177e4 664 return -1;
1da177e4 665
1da177e4
LT
666 switch (etool.speed) {
667 case SPEED_10:
668 case SPEED_100:
669 case SPEED_1000:
94dbffd5 670 case SPEED_10000:
1da177e4
LT
671 break;
672 default:
673 return -1;
674 }
675
676 switch (etool.duplex) {
677 case DUPLEX_FULL:
678 case DUPLEX_HALF:
679 break;
680 default:
681 return -1;
682 }
683
684 slave->speed = etool.speed;
685 slave->duplex = etool.duplex;
686
687 return 0;
688}
689
690/*
691 * if <dev> supports MII link status reporting, check its link status.
692 *
693 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
3d632c3f 694 * depending upon the setting of the use_carrier parameter.
1da177e4
LT
695 *
696 * Return either BMSR_LSTATUS, meaning that the link is up (or we
697 * can't tell and just pretend it is), or 0, meaning that the link is
698 * down.
699 *
700 * If reporting is non-zero, instead of faking link up, return -1 if
701 * both ETHTOOL and MII ioctls fail (meaning the device does not
702 * support them). If use_carrier is set, return whatever it says.
703 * It'd be nice if there was a good way to tell if a driver supports
704 * netif_carrier, but there really isn't.
705 */
3d632c3f
SH
706static int bond_check_dev_link(struct bonding *bond,
707 struct net_device *slave_dev, int reporting)
1da177e4 708{
eb7cc59a 709 const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
d9d52832 710 int (*ioctl)(struct net_device *, struct ifreq *, int);
1da177e4
LT
711 struct ifreq ifr;
712 struct mii_ioctl_data *mii;
1da177e4 713
6c988853
PG
714 if (!reporting && !netif_running(slave_dev))
715 return 0;
716
eb7cc59a 717 if (bond->params.use_carrier)
1da177e4 718 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
1da177e4 719
29112f4e
JP
720 /* Try to get link status using Ethtool first. */
721 if (slave_dev->ethtool_ops) {
722 if (slave_dev->ethtool_ops->get_link) {
723 u32 link;
724
725 link = slave_dev->ethtool_ops->get_link(slave_dev);
726
727 return link ? BMSR_LSTATUS : 0;
728 }
729 }
730
3d632c3f 731 /* Ethtool can't be used, fallback to MII ioctls. */
eb7cc59a 732 ioctl = slave_ops->ndo_do_ioctl;
1da177e4
LT
733 if (ioctl) {
734 /* TODO: set pointer to correct ioctl on a per team member */
735 /* bases to make this more efficient. that is, once */
736 /* we determine the correct ioctl, we will always */
737 /* call it and not the others for that team */
738 /* member. */
739
740 /*
741 * We cannot assume that SIOCGMIIPHY will also read a
742 * register; not all network drivers (e.g., e100)
743 * support that.
744 */
745
746 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */
747 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
748 mii = if_mii(&ifr);
749 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
750 mii->reg_num = MII_BMSR;
3d632c3f
SH
751 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0)
752 return mii->val_out & BMSR_LSTATUS;
1da177e4
LT
753 }
754 }
755
1da177e4
LT
756 /*
757 * If reporting, report that either there's no dev->do_ioctl,
61a44b9c 758 * or both SIOCGMIIREG and get_link failed (meaning that we
1da177e4
LT
759 * cannot report link status). If not reporting, pretend
760 * we're ok.
761 */
3d632c3f 762 return reporting ? -1 : BMSR_LSTATUS;
1da177e4
LT
763}
764
765/*----------------------------- Multicast list ------------------------------*/
766
767/*
768 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
769 */
3d632c3f
SH
770static inline int bond_is_dmi_same(const struct dev_mc_list *dmi1,
771 const struct dev_mc_list *dmi2)
1da177e4
LT
772{
773 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
774 dmi1->dmi_addrlen == dmi2->dmi_addrlen;
775}
776
777/*
778 * returns dmi entry if found, NULL otherwise
779 */
3d632c3f
SH
780static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi,
781 struct dev_mc_list *mc_list)
1da177e4
LT
782{
783 struct dev_mc_list *idmi;
784
785 for (idmi = mc_list; idmi; idmi = idmi->next) {
3d632c3f 786 if (bond_is_dmi_same(dmi, idmi))
1da177e4 787 return idmi;
1da177e4
LT
788 }
789
790 return NULL;
791}
792
793/*
794 * Push the promiscuity flag down to appropriate slaves
795 */
7e1a1ac1 796static int bond_set_promiscuity(struct bonding *bond, int inc)
1da177e4 797{
7e1a1ac1 798 int err = 0;
1da177e4
LT
799 if (USES_PRIMARY(bond->params.mode)) {
800 /* write lock already acquired */
801 if (bond->curr_active_slave) {
7e1a1ac1
WC
802 err = dev_set_promiscuity(bond->curr_active_slave->dev,
803 inc);
1da177e4
LT
804 }
805 } else {
806 struct slave *slave;
807 int i;
808 bond_for_each_slave(bond, slave, i) {
7e1a1ac1
WC
809 err = dev_set_promiscuity(slave->dev, inc);
810 if (err)
811 return err;
1da177e4
LT
812 }
813 }
7e1a1ac1 814 return err;
1da177e4
LT
815}
816
817/*
818 * Push the allmulti flag down to all slaves
819 */
7e1a1ac1 820static int bond_set_allmulti(struct bonding *bond, int inc)
1da177e4 821{
7e1a1ac1 822 int err = 0;
1da177e4
LT
823 if (USES_PRIMARY(bond->params.mode)) {
824 /* write lock already acquired */
825 if (bond->curr_active_slave) {
7e1a1ac1
WC
826 err = dev_set_allmulti(bond->curr_active_slave->dev,
827 inc);
1da177e4
LT
828 }
829 } else {
830 struct slave *slave;
831 int i;
832 bond_for_each_slave(bond, slave, i) {
7e1a1ac1
WC
833 err = dev_set_allmulti(slave->dev, inc);
834 if (err)
835 return err;
1da177e4
LT
836 }
837 }
7e1a1ac1 838 return err;
1da177e4
LT
839}
840
841/*
842 * Add a Multicast address to slaves
843 * according to mode
844 */
845static void bond_mc_add(struct bonding *bond, void *addr, int alen)
846{
847 if (USES_PRIMARY(bond->params.mode)) {
848 /* write lock already acquired */
3d632c3f 849 if (bond->curr_active_slave)
1da177e4 850 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
1da177e4
LT
851 } else {
852 struct slave *slave;
853 int i;
3d632c3f
SH
854
855 bond_for_each_slave(bond, slave, i)
1da177e4 856 dev_mc_add(slave->dev, addr, alen, 0);
1da177e4
LT
857 }
858}
859
860/*
861 * Remove a multicast address from slave
862 * according to mode
863 */
864static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
865{
866 if (USES_PRIMARY(bond->params.mode)) {
867 /* write lock already acquired */
3d632c3f
SH
868 if (bond->curr_active_slave)
869 dev_mc_delete(bond->curr_active_slave->dev, addr,
870 alen, 0);
1da177e4
LT
871 } else {
872 struct slave *slave;
873 int i;
874 bond_for_each_slave(bond, slave, i) {
875 dev_mc_delete(slave->dev, addr, alen, 0);
876 }
877 }
878}
879
a816c7c7
JV
880
881/*
882 * Retrieve the list of registered multicast addresses for the bonding
883 * device and retransmit an IGMP JOIN request to the current active
884 * slave.
885 */
886static void bond_resend_igmp_join_requests(struct bonding *bond)
887{
888 struct in_device *in_dev;
889 struct ip_mc_list *im;
890
891 rcu_read_lock();
892 in_dev = __in_dev_get_rcu(bond->dev);
893 if (in_dev) {
3d632c3f 894 for (im = in_dev->mc_list; im; im = im->next)
a816c7c7 895 ip_mc_rejoin_group(im);
a816c7c7
JV
896 }
897
898 rcu_read_unlock();
899}
900
1da177e4
LT
901/*
902 * Totally destroys the mc_list in bond
903 */
904static void bond_mc_list_destroy(struct bonding *bond)
905{
906 struct dev_mc_list *dmi;
907
908 dmi = bond->mc_list;
909 while (dmi) {
910 bond->mc_list = dmi->next;
911 kfree(dmi);
912 dmi = bond->mc_list;
913 }
3d632c3f
SH
914
915 bond->mc_list = NULL;
1da177e4
LT
916}
917
918/*
919 * Copy all the Multicast addresses from src to the bonding device dst
920 */
de54f390 921static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
dd0fc66f 922 gfp_t gfp_flag)
1da177e4
LT
923{
924 struct dev_mc_list *dmi, *new_dmi;
925
926 for (dmi = mc_list; dmi; dmi = dmi->next) {
de54f390 927 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag);
1da177e4
LT
928
929 if (!new_dmi) {
930 /* FIXME: Potential memory leak !!! */
931 return -ENOMEM;
932 }
933
934 new_dmi->next = bond->mc_list;
935 bond->mc_list = new_dmi;
936 new_dmi->dmi_addrlen = dmi->dmi_addrlen;
937 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen);
938 new_dmi->dmi_users = dmi->dmi_users;
939 new_dmi->dmi_gusers = dmi->dmi_gusers;
940 }
941
942 return 0;
943}
944
945/*
946 * flush all members of flush->mc_list from device dev->mc_list
947 */
3d632c3f
SH
948static void bond_mc_list_flush(struct net_device *bond_dev,
949 struct net_device *slave_dev)
1da177e4 950{
454d7c9b 951 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
952 struct dev_mc_list *dmi;
953
3d632c3f 954 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next)
1da177e4 955 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
1da177e4
LT
956
957 if (bond->params.mode == BOND_MODE_8023AD) {
958 /* del lacpdu mc addr from mc list */
959 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
960
961 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
962 }
963}
964
965/*--------------------------- Active slave change ---------------------------*/
966
967/*
968 * Update the mc list and multicast-related flags for the new and
969 * old active slaves (if any) according to the multicast mode, and
970 * promiscuous flags unconditionally.
971 */
3d632c3f
SH
972static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
973 struct slave *old_active)
1da177e4
LT
974{
975 struct dev_mc_list *dmi;
976
3d632c3f 977 if (!USES_PRIMARY(bond->params.mode))
1da177e4
LT
978 /* nothing to do - mc list is already up-to-date on
979 * all slaves
980 */
981 return;
1da177e4
LT
982
983 if (old_active) {
3d632c3f 984 if (bond->dev->flags & IFF_PROMISC)
1da177e4 985 dev_set_promiscuity(old_active->dev, -1);
1da177e4 986
3d632c3f 987 if (bond->dev->flags & IFF_ALLMULTI)
1da177e4 988 dev_set_allmulti(old_active->dev, -1);
1da177e4 989
3d632c3f
SH
990 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next)
991 dev_mc_delete(old_active->dev, dmi->dmi_addr,
992 dmi->dmi_addrlen, 0);
1da177e4
LT
993 }
994
995 if (new_active) {
7e1a1ac1 996 /* FIXME: Signal errors upstream. */
3d632c3f 997 if (bond->dev->flags & IFF_PROMISC)
1da177e4 998 dev_set_promiscuity(new_active->dev, 1);
1da177e4 999
3d632c3f 1000 if (bond->dev->flags & IFF_ALLMULTI)
1da177e4 1001 dev_set_allmulti(new_active->dev, 1);
1da177e4 1002
3d632c3f
SH
1003 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next)
1004 dev_mc_add(new_active->dev, dmi->dmi_addr,
1005 dmi->dmi_addrlen, 0);
a816c7c7 1006 bond_resend_igmp_join_requests(bond);
1da177e4
LT
1007 }
1008}
1009
3915c1e8
JV
1010/*
1011 * bond_do_fail_over_mac
1012 *
1013 * Perform special MAC address swapping for fail_over_mac settings
1014 *
1015 * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh.
1016 */
1017static void bond_do_fail_over_mac(struct bonding *bond,
1018 struct slave *new_active,
1019 struct slave *old_active)
1f78d9f9
HE
1020 __releases(&bond->curr_slave_lock)
1021 __releases(&bond->lock)
1022 __acquires(&bond->lock)
1023 __acquires(&bond->curr_slave_lock)
3915c1e8
JV
1024{
1025 u8 tmp_mac[ETH_ALEN];
1026 struct sockaddr saddr;
1027 int rv;
1028
1029 switch (bond->params.fail_over_mac) {
1030 case BOND_FOM_ACTIVE:
1031 if (new_active)
1032 memcpy(bond->dev->dev_addr, new_active->dev->dev_addr,
1033 new_active->dev->addr_len);
1034 break;
1035 case BOND_FOM_FOLLOW:
1036 /*
1037 * if new_active && old_active, swap them
1038 * if just old_active, do nothing (going to no active slave)
1039 * if just new_active, set new_active to bond's MAC
1040 */
1041 if (!new_active)
1042 return;
1043
1044 write_unlock_bh(&bond->curr_slave_lock);
1045 read_unlock(&bond->lock);
1046
1047 if (old_active) {
1048 memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN);
1049 memcpy(saddr.sa_data, old_active->dev->dev_addr,
1050 ETH_ALEN);
1051 saddr.sa_family = new_active->dev->type;
1052 } else {
1053 memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN);
1054 saddr.sa_family = bond->dev->type;
1055 }
1056
1057 rv = dev_set_mac_address(new_active->dev, &saddr);
1058 if (rv) {
3d632c3f 1059 pr_err(DRV_NAME
3915c1e8
JV
1060 ": %s: Error %d setting MAC of slave %s\n",
1061 bond->dev->name, -rv, new_active->dev->name);
1062 goto out;
1063 }
1064
1065 if (!old_active)
1066 goto out;
1067
1068 memcpy(saddr.sa_data, tmp_mac, ETH_ALEN);
1069 saddr.sa_family = old_active->dev->type;
1070
1071 rv = dev_set_mac_address(old_active->dev, &saddr);
1072 if (rv)
3d632c3f 1073 pr_err(DRV_NAME
3915c1e8
JV
1074 ": %s: Error %d setting MAC of slave %s\n",
1075 bond->dev->name, -rv, new_active->dev->name);
1076out:
1077 read_lock(&bond->lock);
1078 write_lock_bh(&bond->curr_slave_lock);
1079 break;
1080 default:
3d632c3f 1081 pr_err(DRV_NAME
3915c1e8
JV
1082 ": %s: bond_do_fail_over_mac impossible: bad policy %d\n",
1083 bond->dev->name, bond->params.fail_over_mac);
1084 break;
1085 }
1086
1087}
1088
a549952a
JP
1089static bool bond_should_change_active(struct bonding *bond)
1090{
1091 struct slave *prim = bond->primary_slave;
1092 struct slave *curr = bond->curr_active_slave;
1093
1094 if (!prim || !curr || curr->link != BOND_LINK_UP)
1095 return true;
1096 if (bond->force_primary) {
1097 bond->force_primary = false;
1098 return true;
1099 }
1100 if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
1101 (prim->speed < curr->speed ||
1102 (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
1103 return false;
1104 if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
1105 return false;
1106 return true;
1107}
3915c1e8 1108
1da177e4
LT
1109/**
1110 * find_best_interface - select the best available slave to be the active one
1111 * @bond: our bonding struct
1112 *
1113 * Warning: Caller must hold curr_slave_lock for writing.
1114 */
1115static struct slave *bond_find_best_slave(struct bonding *bond)
1116{
1117 struct slave *new_active, *old_active;
1118 struct slave *bestslave = NULL;
1119 int mintime = bond->params.updelay;
1120 int i;
1121
49b4ad92 1122 new_active = bond->curr_active_slave;
1da177e4
LT
1123
1124 if (!new_active) { /* there were no active slaves left */
3d632c3f 1125 if (bond->slave_cnt > 0) /* found one slave */
1da177e4 1126 new_active = bond->first_slave;
3d632c3f 1127 else
1da177e4 1128 return NULL; /* still no slave, return NULL */
1da177e4
LT
1129 }
1130
1da177e4 1131 if ((bond->primary_slave) &&
a549952a
JP
1132 bond->primary_slave->link == BOND_LINK_UP &&
1133 bond_should_change_active(bond)) {
1da177e4
LT
1134 new_active = bond->primary_slave;
1135 }
1136
1137 /* remember where to stop iterating over the slaves */
1138 old_active = new_active;
1139
1140 bond_for_each_slave_from(bond, new_active, i, old_active) {
b9f60253
JP
1141 if (new_active->link == BOND_LINK_UP) {
1142 return new_active;
1143 } else if (new_active->link == BOND_LINK_BACK &&
1144 IS_UP(new_active->dev)) {
1145 /* link up, but waiting for stabilization */
1146 if (new_active->delay < mintime) {
1147 mintime = new_active->delay;
1148 bestslave = new_active;
1da177e4
LT
1149 }
1150 }
1151 }
1152
1153 return bestslave;
1154}
1155
1156/**
1157 * change_active_interface - change the active slave into the specified one
1158 * @bond: our bonding struct
1159 * @new: the new slave to make the active one
1160 *
1161 * Set the new slave to the bond's settings and unset them on the old
1162 * curr_active_slave.
1163 * Setting include flags, mc-list, promiscuity, allmulti, etc.
1164 *
1165 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP,
1166 * because it is apparently the best available slave we have, even though its
1167 * updelay hasn't timed out yet.
1168 *
3915c1e8
JV
1169 * If new_active is not NULL, caller must hold bond->lock for read and
1170 * curr_slave_lock for write_bh.
1da177e4 1171 */
a77b5325 1172void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1da177e4
LT
1173{
1174 struct slave *old_active = bond->curr_active_slave;
1175
3d632c3f 1176 if (old_active == new_active)
1da177e4 1177 return;
1da177e4
LT
1178
1179 if (new_active) {
b2220cad
JV
1180 new_active->jiffies = jiffies;
1181
1da177e4
LT
1182 if (new_active->link == BOND_LINK_BACK) {
1183 if (USES_PRIMARY(bond->params.mode)) {
3d632c3f 1184 pr_info(DRV_NAME
1da177e4
LT
1185 ": %s: making interface %s the new "
1186 "active one %d ms earlier.\n",
1187 bond->dev->name, new_active->dev->name,
1188 (bond->params.updelay - new_active->delay) * bond->params.miimon);
1189 }
1190
1191 new_active->delay = 0;
1192 new_active->link = BOND_LINK_UP;
1da177e4 1193
3d632c3f 1194 if (bond->params.mode == BOND_MODE_8023AD)
1da177e4 1195 bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
1da177e4 1196
58402054 1197 if (bond_is_lb(bond))
1da177e4 1198 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
1da177e4
LT
1199 } else {
1200 if (USES_PRIMARY(bond->params.mode)) {
3d632c3f 1201 pr_info(DRV_NAME
1da177e4
LT
1202 ": %s: making interface %s the new "
1203 "active one.\n",
1204 bond->dev->name, new_active->dev->name);
1205 }
1206 }
1207 }
1208
3d632c3f 1209 if (USES_PRIMARY(bond->params.mode))
1da177e4 1210 bond_mc_swap(bond, new_active, old_active);
1da177e4 1211
58402054 1212 if (bond_is_lb(bond)) {
1da177e4 1213 bond_alb_handle_active_change(bond, new_active);
8f903c70
JV
1214 if (old_active)
1215 bond_set_slave_inactive_flags(old_active);
1216 if (new_active)
1217 bond_set_slave_active_flags(new_active);
1da177e4
LT
1218 } else {
1219 bond->curr_active_slave = new_active;
1220 }
c3ade5ca
JV
1221
1222 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
3d632c3f 1223 if (old_active)
c3ade5ca 1224 bond_set_slave_inactive_flags(old_active);
c3ade5ca
JV
1225
1226 if (new_active) {
1227 bond_set_slave_active_flags(new_active);
2ab82852 1228
709f8a45
OG
1229 if (bond->params.fail_over_mac)
1230 bond_do_fail_over_mac(bond, new_active,
1231 old_active);
3915c1e8 1232
709f8a45 1233 bond->send_grat_arp = bond->params.num_grat_arp;
b59f9f74 1234 bond_send_gratuitous_arp(bond);
01f3109d 1235
305d552a
BH
1236 bond->send_unsol_na = bond->params.num_unsol_na;
1237 bond_send_unsolicited_na(bond);
1238
01f3109d
OG
1239 write_unlock_bh(&bond->curr_slave_lock);
1240 read_unlock(&bond->lock);
1241
75c78500 1242 netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
01f3109d
OG
1243
1244 read_lock(&bond->lock);
1245 write_lock_bh(&bond->curr_slave_lock);
7893b249 1246 }
c3ade5ca 1247 }
1da177e4
LT
1248}
1249
1250/**
1251 * bond_select_active_slave - select a new active slave, if needed
1252 * @bond: our bonding struct
1253 *
3d632c3f 1254 * This functions should be called when one of the following occurs:
1da177e4
LT
1255 * - The old curr_active_slave has been released or lost its link.
1256 * - The primary_slave has got its link back.
1257 * - A slave has got its link back and there's no old curr_active_slave.
1258 *
3915c1e8 1259 * Caller must hold bond->lock for read and curr_slave_lock for write_bh.
1da177e4 1260 */
a77b5325 1261void bond_select_active_slave(struct bonding *bond)
1da177e4
LT
1262{
1263 struct slave *best_slave;
ff59c456 1264 int rv;
1da177e4
LT
1265
1266 best_slave = bond_find_best_slave(bond);
1267 if (best_slave != bond->curr_active_slave) {
1268 bond_change_active_slave(bond, best_slave);
ff59c456
JV
1269 rv = bond_set_carrier(bond);
1270 if (!rv)
1271 return;
1272
1273 if (netif_carrier_ok(bond->dev)) {
3d632c3f 1274 pr_info(DRV_NAME
ff59c456
JV
1275 ": %s: first active interface up!\n",
1276 bond->dev->name);
1277 } else {
3d632c3f 1278 pr_info(DRV_NAME ": %s: "
ff59c456
JV
1279 "now running without any active interface !\n",
1280 bond->dev->name);
1281 }
1da177e4
LT
1282 }
1283}
1284
1285/*--------------------------- slave list handling ---------------------------*/
1286
1287/*
1288 * This function attaches the slave to the end of list.
1289 *
1290 * bond->lock held for writing by caller.
1291 */
1292static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
1293{
1294 if (bond->first_slave == NULL) { /* attaching the first slave */
1295 new_slave->next = new_slave;
1296 new_slave->prev = new_slave;
1297 bond->first_slave = new_slave;
1298 } else {
1299 new_slave->next = bond->first_slave;
1300 new_slave->prev = bond->first_slave->prev;
1301 new_slave->next->prev = new_slave;
1302 new_slave->prev->next = new_slave;
1303 }
1304
1305 bond->slave_cnt++;
1306}
1307
1308/*
1309 * This function detaches the slave from the list.
1310 * WARNING: no check is made to verify if the slave effectively
1311 * belongs to <bond>.
1312 * Nothing is freed on return, structures are just unchained.
1313 * If any slave pointer in bond was pointing to <slave>,
1314 * it should be changed by the calling function.
1315 *
1316 * bond->lock held for writing by caller.
1317 */
1318static void bond_detach_slave(struct bonding *bond, struct slave *slave)
1319{
3d632c3f 1320 if (slave->next)
1da177e4 1321 slave->next->prev = slave->prev;
1da177e4 1322
3d632c3f 1323 if (slave->prev)
1da177e4 1324 slave->prev->next = slave->next;
1da177e4
LT
1325
1326 if (bond->first_slave == slave) { /* slave is the first slave */
1327 if (bond->slave_cnt > 1) { /* there are more slave */
1328 bond->first_slave = slave->next;
1329 } else {
1330 bond->first_slave = NULL; /* slave was the last one */
1331 }
1332 }
1333
1334 slave->next = NULL;
1335 slave->prev = NULL;
1336 bond->slave_cnt--;
1337}
1338
1339/*---------------------------------- IOCTL ----------------------------------*/
1340
4ad072c9
AB
1341static int bond_sethwaddr(struct net_device *bond_dev,
1342 struct net_device *slave_dev)
1da177e4 1343{
5a03cdb7
HE
1344 pr_debug("bond_dev=%p\n", bond_dev);
1345 pr_debug("slave_dev=%p\n", slave_dev);
1346 pr_debug("slave_dev->addr_len=%d\n", slave_dev->addr_len);
1da177e4
LT
1347 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
1348 return 0;
1349}
1350
7f353bf2
HX
1351#define BOND_VLAN_FEATURES \
1352 (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \
1353 NETIF_F_HW_VLAN_FILTER)
8531c5ff 1354
3d632c3f 1355/*
8e3babcd 1356 * Compute the common dev->feature set available to all slaves. Some
7f353bf2
HX
1357 * feature bits are managed elsewhere, so preserve those feature bits
1358 * on the master device.
8531c5ff
AK
1359 */
1360static int bond_compute_features(struct bonding *bond)
1361{
8531c5ff
AK
1362 struct slave *slave;
1363 struct net_device *bond_dev = bond->dev;
7f353bf2 1364 unsigned long features = bond_dev->features;
278339a4 1365 unsigned long vlan_features = 0;
3158bf7d
MS
1366 unsigned short max_hard_header_len = max((u16)ETH_HLEN,
1367 bond_dev->hard_header_len);
8e3babcd 1368 int i;
8531c5ff 1369
7f353bf2 1370 features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
b63365a2
HX
1371 features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM;
1372
1373 if (!bond->first_slave)
1374 goto done;
1375
1376 features &= ~NETIF_F_ONE_FOR_ALL;
7f353bf2 1377
278339a4 1378 vlan_features = bond->first_slave->dev->vlan_features;
54ef3137 1379 bond_for_each_slave(bond, slave, i) {
b63365a2
HX
1380 features = netdev_increment_features(features,
1381 slave->dev->features,
1382 NETIF_F_ONE_FOR_ALL);
278339a4
JV
1383 vlan_features = netdev_increment_features(vlan_features,
1384 slave->dev->vlan_features,
1385 NETIF_F_ONE_FOR_ALL);
54ef3137
JV
1386 if (slave->dev->hard_header_len > max_hard_header_len)
1387 max_hard_header_len = slave->dev->hard_header_len;
1388 }
8531c5ff 1389
b63365a2 1390done:
7f353bf2 1391 features |= (bond_dev->features & BOND_VLAN_FEATURES);
b63365a2 1392 bond_dev->features = netdev_fix_features(features, NULL);
278339a4 1393 bond_dev->vlan_features = netdev_fix_features(vlan_features, NULL);
54ef3137 1394 bond_dev->hard_header_len = max_hard_header_len;
8531c5ff
AK
1395
1396 return 0;
1397}
1398
872254dd
MS
1399static void bond_setup_by_slave(struct net_device *bond_dev,
1400 struct net_device *slave_dev)
1401{
454d7c9b 1402 struct bonding *bond = netdev_priv(bond_dev);
d90a162a 1403
00829823 1404 bond_dev->header_ops = slave_dev->header_ops;
872254dd
MS
1405
1406 bond_dev->type = slave_dev->type;
1407 bond_dev->hard_header_len = slave_dev->hard_header_len;
1408 bond_dev->addr_len = slave_dev->addr_len;
1409
1410 memcpy(bond_dev->broadcast, slave_dev->broadcast,
1411 slave_dev->addr_len);
d90a162a 1412 bond->setup_by_slave = 1;
872254dd
MS
1413}
1414
1da177e4 1415/* enslave device <slave> to bond device <master> */
a77b5325 1416int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1da177e4 1417{
454d7c9b 1418 struct bonding *bond = netdev_priv(bond_dev);
eb7cc59a 1419 const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1da177e4
LT
1420 struct slave *new_slave = NULL;
1421 struct dev_mc_list *dmi;
1422 struct sockaddr addr;
1423 int link_reporting;
1424 int old_features = bond_dev->features;
1425 int res = 0;
1426
552709d5 1427 if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL &&
eb7cc59a 1428 slave_ops->ndo_do_ioctl == NULL) {
3d632c3f 1429 pr_warning(DRV_NAME
4e0952c7
MW
1430 ": %s: Warning: no link monitoring support for %s\n",
1431 bond_dev->name, slave_dev->name);
1da177e4
LT
1432 }
1433
1434 /* bond must be initialized by bond_open() before enslaving */
1435 if (!(bond_dev->flags & IFF_UP)) {
3d632c3f 1436 pr_warning(DRV_NAME
6b1bf096
MS
1437 " %s: master_dev is not up in bond_enslave\n",
1438 bond_dev->name);
1da177e4
LT
1439 }
1440
1441 /* already enslaved */
1442 if (slave_dev->flags & IFF_SLAVE) {
5a03cdb7 1443 pr_debug("Error, Device was already enslaved\n");
1da177e4
LT
1444 return -EBUSY;
1445 }
1446
1447 /* vlan challenged mutual exclusion */
1448 /* no need to lock since we're protected by rtnl_lock */
1449 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
5a03cdb7 1450 pr_debug("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1da177e4 1451 if (!list_empty(&bond->vlan_list)) {
3d632c3f 1452 pr_err(DRV_NAME
4e0952c7 1453 ": %s: Error: cannot enslave VLAN "
1da177e4 1454 "challenged slave %s on VLAN enabled "
4e0952c7 1455 "bond %s\n", bond_dev->name, slave_dev->name,
1da177e4
LT
1456 bond_dev->name);
1457 return -EPERM;
1458 } else {
3d632c3f 1459 pr_warning(DRV_NAME
4e0952c7 1460 ": %s: Warning: enslaved VLAN challenged "
1da177e4
LT
1461 "slave %s. Adding VLANs will be blocked as "
1462 "long as %s is part of bond %s\n",
4e0952c7 1463 bond_dev->name, slave_dev->name, slave_dev->name,
1da177e4
LT
1464 bond_dev->name);
1465 bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1466 }
1467 } else {
5a03cdb7 1468 pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1da177e4
LT
1469 if (bond->slave_cnt == 0) {
1470 /* First slave, and it is not VLAN challenged,
1471 * so remove the block of adding VLANs over the bond.
1472 */
1473 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1474 }
1475 }
1476
217df670
JV
1477 /*
1478 * Old ifenslave binaries are no longer supported. These can
3d632c3f 1479 * be identified with moderate accuracy by the state of the slave:
217df670
JV
1480 * the current ifenslave will set the interface down prior to
1481 * enslaving it; the old ifenslave will not.
1482 */
1483 if ((slave_dev->flags & IFF_UP)) {
3d632c3f 1484 pr_err(DRV_NAME ": %s is up. "
217df670
JV
1485 "This may be due to an out of date ifenslave.\n",
1486 slave_dev->name);
1487 res = -EPERM;
1488 goto err_undo_flags;
1489 }
1da177e4 1490
872254dd
MS
1491 /* set bonding device ether type by slave - bonding netdevices are
1492 * created with ether_setup, so when the slave type is not ARPHRD_ETHER
1493 * there is a need to override some of the type dependent attribs/funcs.
1494 *
1495 * bond ether type mutual exclusion - don't allow slaves of dissimilar
1496 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
1497 */
1498 if (bond->slave_cnt == 0) {
e36b9d16 1499 if (bond_dev->type != slave_dev->type) {
e36b9d16
MS
1500 pr_debug("%s: change device type from %d to %d\n",
1501 bond_dev->name, bond_dev->type, slave_dev->type);
75c78500
MS
1502
1503 netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
1504
e36b9d16
MS
1505 if (slave_dev->type != ARPHRD_ETHER)
1506 bond_setup_by_slave(bond_dev, slave_dev);
1507 else
1508 ether_setup(bond_dev);
75c78500
MS
1509
1510 netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
e36b9d16 1511 }
872254dd 1512 } else if (bond_dev->type != slave_dev->type) {
3d632c3f 1513 pr_err(DRV_NAME ": %s ether type (%d) is different "
872254dd
MS
1514 "from other slaves (%d), can not enslave it.\n",
1515 slave_dev->name,
1516 slave_dev->type, bond_dev->type);
1517 res = -EINVAL;
1518 goto err_undo_flags;
1519 }
1520
eb7cc59a 1521 if (slave_ops->ndo_set_mac_address == NULL) {
2ab82852 1522 if (bond->slave_cnt == 0) {
3d632c3f 1523 pr_warning(DRV_NAME
dd957c57
JV
1524 ": %s: Warning: The first slave device "
1525 "specified does not support setting the MAC "
3915c1e8 1526 "address. Setting fail_over_mac to active.",
dd957c57 1527 bond_dev->name);
3915c1e8
JV
1528 bond->params.fail_over_mac = BOND_FOM_ACTIVE;
1529 } else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
3d632c3f 1530 pr_err(DRV_NAME
dd957c57
JV
1531 ": %s: Error: The slave device specified "
1532 "does not support setting the MAC address, "
3915c1e8 1533 "but fail_over_mac is not set to active.\n"
2ab82852
MS
1534 , bond_dev->name);
1535 res = -EOPNOTSUPP;
1536 goto err_undo_flags;
1537 }
1da177e4
LT
1538 }
1539
243cb4e5 1540 new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
1da177e4
LT
1541 if (!new_slave) {
1542 res = -ENOMEM;
1543 goto err_undo_flags;
1544 }
1545
1da177e4
LT
1546 /* save slave's original flags before calling
1547 * netdev_set_master and dev_open
1548 */
1549 new_slave->original_flags = slave_dev->flags;
1550
217df670
JV
1551 /*
1552 * Save slave's original ("permanent") mac address for modes
1553 * that need it, and for restoring it upon release, and then
1554 * set it to the master's address
1555 */
1556 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
1da177e4 1557
dd957c57 1558 if (!bond->params.fail_over_mac) {
2ab82852
MS
1559 /*
1560 * Set slave to master's mac address. The application already
1561 * set the master's mac address to that of the first slave
1562 */
1563 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
1564 addr.sa_family = slave_dev->type;
1565 res = dev_set_mac_address(slave_dev, &addr);
1566 if (res) {
5a03cdb7 1567 pr_debug("Error %d calling set_mac_address\n", res);
2ab82852
MS
1568 goto err_free;
1569 }
217df670 1570 }
1da177e4 1571
c2edacf8
JV
1572 res = netdev_set_master(slave_dev, bond_dev);
1573 if (res) {
5a03cdb7 1574 pr_debug("Error %d calling netdev_set_master\n", res);
569f0c4d 1575 goto err_restore_mac;
c2edacf8 1576 }
217df670
JV
1577 /* open the slave since the application closed it */
1578 res = dev_open(slave_dev);
1579 if (res) {
3d632c3f 1580 pr_debug("Opening slave %s failed\n", slave_dev->name);
569f0c4d 1581 goto err_unset_master;
1da177e4
LT
1582 }
1583
1da177e4 1584 new_slave->dev = slave_dev;
0b680e75 1585 slave_dev->priv_flags |= IFF_BONDING;
1da177e4 1586
58402054 1587 if (bond_is_lb(bond)) {
1da177e4
LT
1588 /* bond_alb_init_slave() must be called before all other stages since
1589 * it might fail and we do not want to have to undo everything
1590 */
1591 res = bond_alb_init_slave(bond, new_slave);
3d632c3f 1592 if (res)
569f0c4d 1593 goto err_close;
1da177e4
LT
1594 }
1595
1596 /* If the mode USES_PRIMARY, then the new slave gets the
1597 * master's promisc (and mc) settings only if it becomes the
1598 * curr_active_slave, and that is taken care of later when calling
1599 * bond_change_active()
1600 */
1601 if (!USES_PRIMARY(bond->params.mode)) {
1602 /* set promiscuity level to new slave */
1603 if (bond_dev->flags & IFF_PROMISC) {
7e1a1ac1
WC
1604 res = dev_set_promiscuity(slave_dev, 1);
1605 if (res)
1606 goto err_close;
1da177e4
LT
1607 }
1608
1609 /* set allmulti level to new slave */
1610 if (bond_dev->flags & IFF_ALLMULTI) {
7e1a1ac1
WC
1611 res = dev_set_allmulti(slave_dev, 1);
1612 if (res)
1613 goto err_close;
1da177e4
LT
1614 }
1615
b9e40857 1616 netif_addr_lock_bh(bond_dev);
1da177e4 1617 /* upload master's mc_list to new slave */
3d632c3f
SH
1618 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next)
1619 dev_mc_add(slave_dev, dmi->dmi_addr,
1620 dmi->dmi_addrlen, 0);
b9e40857 1621 netif_addr_unlock_bh(bond_dev);
1da177e4
LT
1622 }
1623
1624 if (bond->params.mode == BOND_MODE_8023AD) {
1625 /* add lacpdu mc addr to mc list */
1626 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
1627
1628 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
1629 }
1630
1631 bond_add_vlans_on_slave(bond, slave_dev);
1632
1633 write_lock_bh(&bond->lock);
1634
1635 bond_attach_slave(bond, new_slave);
1636
1637 new_slave->delay = 0;
1638 new_slave->link_failure_count = 0;
1639
8531c5ff
AK
1640 bond_compute_features(bond);
1641
3915c1e8
JV
1642 write_unlock_bh(&bond->lock);
1643
1644 read_lock(&bond->lock);
1645
f5b2b966
JV
1646 new_slave->last_arp_rx = jiffies;
1647
1da177e4
LT
1648 if (bond->params.miimon && !bond->params.use_carrier) {
1649 link_reporting = bond_check_dev_link(bond, slave_dev, 1);
1650
1651 if ((link_reporting == -1) && !bond->params.arp_interval) {
1652 /*
1653 * miimon is set but a bonded network driver
1654 * does not support ETHTOOL/MII and
1655 * arp_interval is not set. Note: if
1656 * use_carrier is enabled, we will never go
1657 * here (because netif_carrier is always
1658 * supported); thus, we don't need to change
1659 * the messages for netif_carrier.
1660 */
3d632c3f 1661 pr_warning(DRV_NAME
4e0952c7 1662 ": %s: Warning: MII and ETHTOOL support not "
1da177e4
LT
1663 "available for interface %s, and "
1664 "arp_interval/arp_ip_target module parameters "
1665 "not specified, thus bonding will not detect "
1666 "link failures! see bonding.txt for details.\n",
4e0952c7 1667 bond_dev->name, slave_dev->name);
1da177e4
LT
1668 } else if (link_reporting == -1) {
1669 /* unable get link status using mii/ethtool */
3d632c3f 1670 pr_warning(DRV_NAME
4e0952c7 1671 ": %s: Warning: can't get link status from "
1da177e4
LT
1672 "interface %s; the network driver associated "
1673 "with this interface does not support MII or "
1674 "ETHTOOL link status reporting, thus miimon "
1675 "has no effect on this interface.\n",
4e0952c7 1676 bond_dev->name, slave_dev->name);
1da177e4
LT
1677 }
1678 }
1679
1680 /* check for initial state */
1681 if (!bond->params.miimon ||
1682 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) {
1683 if (bond->params.updelay) {
5a03cdb7 1684 pr_debug("Initial state of slave_dev is "
1da177e4
LT
1685 "BOND_LINK_BACK\n");
1686 new_slave->link = BOND_LINK_BACK;
1687 new_slave->delay = bond->params.updelay;
1688 } else {
5a03cdb7 1689 pr_debug("Initial state of slave_dev is "
1da177e4
LT
1690 "BOND_LINK_UP\n");
1691 new_slave->link = BOND_LINK_UP;
1692 }
1693 new_slave->jiffies = jiffies;
1694 } else {
5a03cdb7 1695 pr_debug("Initial state of slave_dev is "
1da177e4
LT
1696 "BOND_LINK_DOWN\n");
1697 new_slave->link = BOND_LINK_DOWN;
1698 }
1699
1700 if (bond_update_speed_duplex(new_slave) &&
1701 (new_slave->link != BOND_LINK_DOWN)) {
3d632c3f 1702 pr_warning(DRV_NAME
4e0952c7 1703 ": %s: Warning: failed to get speed and duplex from %s, "
1da177e4 1704 "assumed to be 100Mb/sec and Full.\n",
4e0952c7 1705 bond_dev->name, new_slave->dev->name);
1da177e4
LT
1706
1707 if (bond->params.mode == BOND_MODE_8023AD) {
3d632c3f 1708 pr_warning(DRV_NAME
4e0952c7 1709 ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL "
1da177e4 1710 "support in base driver for proper aggregator "
4e0952c7 1711 "selection.\n", bond_dev->name);
1da177e4
LT
1712 }
1713 }
1714
1715 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
1716 /* if there is a primary slave, remember it */
a549952a 1717 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
1da177e4 1718 bond->primary_slave = new_slave;
a549952a
JP
1719 bond->force_primary = true;
1720 }
1da177e4
LT
1721 }
1722
3915c1e8
JV
1723 write_lock_bh(&bond->curr_slave_lock);
1724
1da177e4
LT
1725 switch (bond->params.mode) {
1726 case BOND_MODE_ACTIVEBACKUP:
8a8e447b
JV
1727 bond_set_slave_inactive_flags(new_slave);
1728 bond_select_active_slave(bond);
1da177e4
LT
1729 break;
1730 case BOND_MODE_8023AD:
1731 /* in 802.3ad mode, the internal mechanism
1732 * will activate the slaves in the selected
1733 * aggregator
1734 */
1735 bond_set_slave_inactive_flags(new_slave);
1736 /* if this is the first slave */
1737 if (bond->slave_cnt == 1) {
1738 SLAVE_AD_INFO(new_slave).id = 1;
1739 /* Initialize AD with the number of times that the AD timer is called in 1 second
1740 * can be called only after the mac address of the bond is set
1741 */
1742 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL,
1743 bond->params.lacp_fast);
1744 } else {
1745 SLAVE_AD_INFO(new_slave).id =
1746 SLAVE_AD_INFO(new_slave->prev).id + 1;
1747 }
1748
1749 bond_3ad_bind_slave(new_slave);
1750 break;
1751 case BOND_MODE_TLB:
1752 case BOND_MODE_ALB:
1753 new_slave->state = BOND_STATE_ACTIVE;
059fe7a5 1754 bond_set_slave_inactive_flags(new_slave);
5a29f789 1755 bond_select_active_slave(bond);
1da177e4
LT
1756 break;
1757 default:
5a03cdb7 1758 pr_debug("This slave is always active in trunk mode\n");
1da177e4
LT
1759
1760 /* always active in trunk mode */
1761 new_slave->state = BOND_STATE_ACTIVE;
1762
1763 /* In trunking mode there is little meaning to curr_active_slave
1764 * anyway (it holds no special properties of the bond device),
1765 * so we can change it without calling change_active_interface()
1766 */
3d632c3f 1767 if (!bond->curr_active_slave)
1da177e4 1768 bond->curr_active_slave = new_slave;
3d632c3f 1769
1da177e4
LT
1770 break;
1771 } /* switch(bond_mode) */
1772
3915c1e8
JV
1773 write_unlock_bh(&bond->curr_slave_lock);
1774
ff59c456
JV
1775 bond_set_carrier(bond);
1776
3915c1e8 1777 read_unlock(&bond->lock);
1da177e4 1778
b76cdba9
MW
1779 res = bond_create_slave_symlinks(bond_dev, slave_dev);
1780 if (res)
569f0c4d 1781 goto err_close;
b76cdba9 1782
3d632c3f 1783 pr_info(DRV_NAME
1da177e4
LT
1784 ": %s: enslaving %s as a%s interface with a%s link.\n",
1785 bond_dev->name, slave_dev->name,
1786 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
1787 new_slave->link != BOND_LINK_DOWN ? "n up" : " down");
1788
1789 /* enslave is successful */
1790 return 0;
1791
1792/* Undo stages on error */
1da177e4
LT
1793err_close:
1794 dev_close(slave_dev);
1795
569f0c4d
JV
1796err_unset_master:
1797 netdev_set_master(slave_dev, NULL);
1798
1da177e4 1799err_restore_mac:
dd957c57 1800 if (!bond->params.fail_over_mac) {
3915c1e8
JV
1801 /* XXX TODO - fom follow mode needs to change master's
1802 * MAC if this slave's MAC is in use by the bond, or at
1803 * least print a warning.
1804 */
2ab82852
MS
1805 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
1806 addr.sa_family = slave_dev->type;
1807 dev_set_mac_address(slave_dev, &addr);
1808 }
1da177e4
LT
1809
1810err_free:
1811 kfree(new_slave);
1812
1813err_undo_flags:
1814 bond_dev->features = old_features;
3d632c3f 1815
1da177e4
LT
1816 return res;
1817}
1818
1819/*
1820 * Try to release the slave device <slave> from the bond device <master>
1821 * It is legal to access curr_active_slave without a lock because all the function
1822 * is write-locked.
1823 *
1824 * The rules for slave state should be:
1825 * for Active/Backup:
1826 * Active stays on all backups go down
1827 * for Bonded connections:
1828 * The first up interface should be left on and all others downed.
1829 */
a77b5325 1830int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1da177e4 1831{
454d7c9b 1832 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
1833 struct slave *slave, *oldcurrent;
1834 struct sockaddr addr;
1da177e4
LT
1835
1836 /* slave is not a slave or master is not master of this slave */
1837 if (!(slave_dev->flags & IFF_SLAVE) ||
1838 (slave_dev->master != bond_dev)) {
3d632c3f 1839 pr_err(DRV_NAME
4e0952c7 1840 ": %s: Error: cannot release %s.\n",
1da177e4
LT
1841 bond_dev->name, slave_dev->name);
1842 return -EINVAL;
1843 }
1844
1845 write_lock_bh(&bond->lock);
1846
1847 slave = bond_get_slave_by_dev(bond, slave_dev);
1848 if (!slave) {
1849 /* not a slave of this bond */
3d632c3f 1850 pr_info(DRV_NAME
1da177e4
LT
1851 ": %s: %s not enslaved\n",
1852 bond_dev->name, slave_dev->name);
f5e2a7b2 1853 write_unlock_bh(&bond->lock);
1da177e4
LT
1854 return -EINVAL;
1855 }
1856
3915c1e8 1857 if (!bond->params.fail_over_mac) {
89c76c62
SH
1858 if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr)
1859 && bond->slave_cnt > 1)
3d632c3f 1860 pr_warning(DRV_NAME
3915c1e8 1861 ": %s: Warning: the permanent HWaddr of %s - "
e174961c 1862 "%pM - is still in use by %s. "
3915c1e8
JV
1863 "Set the HWaddr of %s to a different address "
1864 "to avoid conflicts.\n",
1865 bond_dev->name, slave_dev->name,
e174961c 1866 slave->perm_hwaddr,
3915c1e8 1867 bond_dev->name, slave_dev->name);
1da177e4
LT
1868 }
1869
1870 /* Inform AD package of unbinding of slave. */
1871 if (bond->params.mode == BOND_MODE_8023AD) {
1872 /* must be called before the slave is
1873 * detached from the list
1874 */
1875 bond_3ad_unbind_slave(slave);
1876 }
1877
3d632c3f 1878 pr_info(DRV_NAME
1da177e4
LT
1879 ": %s: releasing %s interface %s\n",
1880 bond_dev->name,
1881 (slave->state == BOND_STATE_ACTIVE)
1882 ? "active" : "backup",
1883 slave_dev->name);
1884
1885 oldcurrent = bond->curr_active_slave;
1886
1887 bond->current_arp_slave = NULL;
1888
1889 /* release the slave from its bond */
1890 bond_detach_slave(bond, slave);
1891
8531c5ff
AK
1892 bond_compute_features(bond);
1893
3d632c3f 1894 if (bond->primary_slave == slave)
1da177e4 1895 bond->primary_slave = NULL;
1da177e4 1896
3d632c3f 1897 if (oldcurrent == slave)
1da177e4 1898 bond_change_active_slave(bond, NULL);
1da177e4 1899
58402054 1900 if (bond_is_lb(bond)) {
1da177e4
LT
1901 /* Must be called only after the slave has been
1902 * detached from the list and the curr_active_slave
1903 * has been cleared (if our_slave == old_current),
1904 * but before a new active slave is selected.
1905 */
2543331d 1906 write_unlock_bh(&bond->lock);
1da177e4 1907 bond_alb_deinit_slave(bond, slave);
2543331d 1908 write_lock_bh(&bond->lock);
1da177e4
LT
1909 }
1910
059fe7a5
JV
1911 if (oldcurrent == slave) {
1912 /*
1913 * Note that we hold RTNL over this sequence, so there
1914 * is no concern that another slave add/remove event
1915 * will interfere.
1916 */
1917 write_unlock_bh(&bond->lock);
1918 read_lock(&bond->lock);
1919 write_lock_bh(&bond->curr_slave_lock);
1920
1da177e4
LT
1921 bond_select_active_slave(bond);
1922
059fe7a5
JV
1923 write_unlock_bh(&bond->curr_slave_lock);
1924 read_unlock(&bond->lock);
1925 write_lock_bh(&bond->lock);
1926 }
1927
1da177e4 1928 if (bond->slave_cnt == 0) {
ff59c456
JV
1929 bond_set_carrier(bond);
1930
1da177e4
LT
1931 /* if the last slave was removed, zero the mac address
1932 * of the master so it will be set by the application
1933 * to the mac address of the first slave
1934 */
1935 memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
1936
1937 if (list_empty(&bond->vlan_list)) {
1938 bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1939 } else {
3d632c3f 1940 pr_warning(DRV_NAME
4e0952c7 1941 ": %s: Warning: clearing HW address of %s while it "
1da177e4 1942 "still has VLANs.\n",
4e0952c7 1943 bond_dev->name, bond_dev->name);
3d632c3f 1944 pr_warning(DRV_NAME
4e0952c7
MW
1945 ": %s: When re-adding slaves, make sure the bond's "
1946 "HW address matches its VLANs'.\n",
1947 bond_dev->name);
1da177e4
LT
1948 }
1949 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
1950 !bond_has_challenged_slaves(bond)) {
3d632c3f 1951 pr_info(DRV_NAME
4e0952c7 1952 ": %s: last VLAN challenged slave %s "
1da177e4 1953 "left bond %s. VLAN blocking is removed\n",
4e0952c7 1954 bond_dev->name, slave_dev->name, bond_dev->name);
1da177e4
LT
1955 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1956 }
1957
1958 write_unlock_bh(&bond->lock);
1959
b76cdba9
MW
1960 /* must do this from outside any spinlocks */
1961 bond_destroy_slave_symlinks(bond_dev, slave_dev);
1962
1da177e4
LT
1963 bond_del_vlans_from_slave(bond, slave_dev);
1964
1965 /* If the mode USES_PRIMARY, then we should only remove its
1966 * promisc and mc settings if it was the curr_active_slave, but that was
1967 * already taken care of above when we detached the slave
1968 */
1969 if (!USES_PRIMARY(bond->params.mode)) {
1970 /* unset promiscuity level from slave */
3d632c3f 1971 if (bond_dev->flags & IFF_PROMISC)
1da177e4 1972 dev_set_promiscuity(slave_dev, -1);
1da177e4
LT
1973
1974 /* unset allmulti level from slave */
3d632c3f 1975 if (bond_dev->flags & IFF_ALLMULTI)
1da177e4 1976 dev_set_allmulti(slave_dev, -1);
1da177e4
LT
1977
1978 /* flush master's mc_list from slave */
b9e40857 1979 netif_addr_lock_bh(bond_dev);
1da177e4 1980 bond_mc_list_flush(bond_dev, slave_dev);
b9e40857 1981 netif_addr_unlock_bh(bond_dev);
1da177e4
LT
1982 }
1983
1984 netdev_set_master(slave_dev, NULL);
1985
1986 /* close slave before restoring its mac address */
1987 dev_close(slave_dev);
1988
3915c1e8 1989 if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
2ab82852
MS
1990 /* restore original ("permanent") mac address */
1991 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
1992 addr.sa_family = slave_dev->type;
1993 dev_set_mac_address(slave_dev, &addr);
1994 }
1da177e4 1995
8f903c70 1996 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
f5b2b966
JV
1997 IFF_SLAVE_INACTIVE | IFF_BONDING |
1998 IFF_SLAVE_NEEDARP);
1da177e4
LT
1999
2000 kfree(slave);
2001
2002 return 0; /* deletion OK */
2003}
2004
d90a162a 2005/*
3d632c3f 2006* First release a slave and than destroy the bond if no more slaves are left.
d90a162a
MS
2007* Must be under rtnl_lock when this function is called.
2008*/
3d632c3f
SH
2009int bond_release_and_destroy(struct net_device *bond_dev,
2010 struct net_device *slave_dev)
d90a162a 2011{
454d7c9b 2012 struct bonding *bond = netdev_priv(bond_dev);
d90a162a
MS
2013 int ret;
2014
2015 ret = bond_release(bond_dev, slave_dev);
2016 if ((ret == 0) && (bond->slave_cnt == 0)) {
3d632c3f 2017 pr_info(DRV_NAME ": %s: destroying bond %s.\n",
d90a162a 2018 bond_dev->name, bond_dev->name);
9e71626c 2019 unregister_netdevice(bond_dev);
d90a162a
MS
2020 }
2021 return ret;
2022}
2023
1da177e4
LT
2024/*
2025 * This function releases all slaves.
2026 */
2027static int bond_release_all(struct net_device *bond_dev)
2028{
454d7c9b 2029 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
2030 struct slave *slave;
2031 struct net_device *slave_dev;
2032 struct sockaddr addr;
2033
2034 write_lock_bh(&bond->lock);
2035
ff59c456
JV
2036 netif_carrier_off(bond_dev);
2037
3d632c3f 2038 if (bond->slave_cnt == 0)
1da177e4 2039 goto out;
1da177e4
LT
2040
2041 bond->current_arp_slave = NULL;
2042 bond->primary_slave = NULL;
2043 bond_change_active_slave(bond, NULL);
2044
2045 while ((slave = bond->first_slave) != NULL) {
2046 /* Inform AD package of unbinding of slave
2047 * before slave is detached from the list.
2048 */
3d632c3f 2049 if (bond->params.mode == BOND_MODE_8023AD)
1da177e4 2050 bond_3ad_unbind_slave(slave);
1da177e4
LT
2051
2052 slave_dev = slave->dev;
2053 bond_detach_slave(bond, slave);
2054
2543331d
JV
2055 /* now that the slave is detached, unlock and perform
2056 * all the undo steps that should not be called from
2057 * within a lock.
2058 */
2059 write_unlock_bh(&bond->lock);
2060
58402054 2061 if (bond_is_lb(bond)) {
1da177e4
LT
2062 /* must be called only after the slave
2063 * has been detached from the list
2064 */
2065 bond_alb_deinit_slave(bond, slave);
2066 }
2067
8531c5ff
AK
2068 bond_compute_features(bond);
2069
b76cdba9 2070 bond_destroy_slave_symlinks(bond_dev, slave_dev);
1da177e4
LT
2071 bond_del_vlans_from_slave(bond, slave_dev);
2072
2073 /* If the mode USES_PRIMARY, then we should only remove its
2074 * promisc and mc settings if it was the curr_active_slave, but that was
2075 * already taken care of above when we detached the slave
2076 */
2077 if (!USES_PRIMARY(bond->params.mode)) {
2078 /* unset promiscuity level from slave */
3d632c3f 2079 if (bond_dev->flags & IFF_PROMISC)
1da177e4 2080 dev_set_promiscuity(slave_dev, -1);
1da177e4
LT
2081
2082 /* unset allmulti level from slave */
3d632c3f 2083 if (bond_dev->flags & IFF_ALLMULTI)
1da177e4 2084 dev_set_allmulti(slave_dev, -1);
1da177e4
LT
2085
2086 /* flush master's mc_list from slave */
b9e40857 2087 netif_addr_lock_bh(bond_dev);
1da177e4 2088 bond_mc_list_flush(bond_dev, slave_dev);
b9e40857 2089 netif_addr_unlock_bh(bond_dev);
1da177e4
LT
2090 }
2091
2092 netdev_set_master(slave_dev, NULL);
2093
2094 /* close slave before restoring its mac address */
2095 dev_close(slave_dev);
2096
dd957c57 2097 if (!bond->params.fail_over_mac) {
2ab82852
MS
2098 /* restore original ("permanent") mac address*/
2099 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
2100 addr.sa_family = slave_dev->type;
2101 dev_set_mac_address(slave_dev, &addr);
2102 }
1da177e4 2103
8f903c70
JV
2104 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
2105 IFF_SLAVE_INACTIVE);
1da177e4
LT
2106
2107 kfree(slave);
2108
2109 /* re-acquire the lock before getting the next slave */
2110 write_lock_bh(&bond->lock);
2111 }
2112
2113 /* zero the mac address of the master so it will be
2114 * set by the application to the mac address of the
2115 * first slave
2116 */
2117 memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
2118
3d632c3f 2119 if (list_empty(&bond->vlan_list))
1da177e4 2120 bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
3d632c3f
SH
2121 else {
2122 pr_warning(DRV_NAME
4e0952c7 2123 ": %s: Warning: clearing HW address of %s while it "
1da177e4 2124 "still has VLANs.\n",
4e0952c7 2125 bond_dev->name, bond_dev->name);
3d632c3f 2126 pr_warning(DRV_NAME
4e0952c7
MW
2127 ": %s: When re-adding slaves, make sure the bond's "
2128 "HW address matches its VLANs'.\n",
2129 bond_dev->name);
1da177e4
LT
2130 }
2131
3d632c3f 2132 pr_info(DRV_NAME
1da177e4
LT
2133 ": %s: released all slaves\n",
2134 bond_dev->name);
2135
2136out:
2137 write_unlock_bh(&bond->lock);
2138
2139 return 0;
2140}
2141
2142/*
2143 * This function changes the active slave to slave <slave_dev>.
2144 * It returns -EINVAL in the following cases.
2145 * - <slave_dev> is not found in the list.
2146 * - There is not active slave now.
2147 * - <slave_dev> is already active.
2148 * - The link state of <slave_dev> is not BOND_LINK_UP.
2149 * - <slave_dev> is not running.
3d632c3f
SH
2150 * In these cases, this function does nothing.
2151 * In the other cases, current_slave pointer is changed and 0 is returned.
1da177e4
LT
2152 */
2153static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev)
2154{
454d7c9b 2155 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
2156 struct slave *old_active = NULL;
2157 struct slave *new_active = NULL;
2158 int res = 0;
2159
3d632c3f 2160 if (!USES_PRIMARY(bond->params.mode))
1da177e4 2161 return -EINVAL;
1da177e4
LT
2162
2163 /* Verify that master_dev is indeed the master of slave_dev */
3d632c3f 2164 if (!(slave_dev->flags & IFF_SLAVE) || (slave_dev->master != bond_dev))
1da177e4 2165 return -EINVAL;
1da177e4 2166
059fe7a5 2167 read_lock(&bond->lock);
1da177e4 2168
059fe7a5 2169 read_lock(&bond->curr_slave_lock);
1da177e4 2170 old_active = bond->curr_active_slave;
059fe7a5
JV
2171 read_unlock(&bond->curr_slave_lock);
2172
1da177e4
LT
2173 new_active = bond_get_slave_by_dev(bond, slave_dev);
2174
2175 /*
2176 * Changing to the current active: do nothing; return success.
2177 */
2178 if (new_active && (new_active == old_active)) {
059fe7a5 2179 read_unlock(&bond->lock);
1da177e4
LT
2180 return 0;
2181 }
2182
2183 if ((new_active) &&
2184 (old_active) &&
2185 (new_active->link == BOND_LINK_UP) &&
2186 IS_UP(new_active->dev)) {
059fe7a5 2187 write_lock_bh(&bond->curr_slave_lock);
1da177e4 2188 bond_change_active_slave(bond, new_active);
059fe7a5 2189 write_unlock_bh(&bond->curr_slave_lock);
3d632c3f 2190 } else
1da177e4 2191 res = -EINVAL;
1da177e4 2192
059fe7a5 2193 read_unlock(&bond->lock);
1da177e4
LT
2194
2195 return res;
2196}
2197
1da177e4
LT
2198static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
2199{
454d7c9b 2200 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
2201
2202 info->bond_mode = bond->params.mode;
2203 info->miimon = bond->params.miimon;
2204
6603a6f2 2205 read_lock(&bond->lock);
1da177e4 2206 info->num_slaves = bond->slave_cnt;
6603a6f2 2207 read_unlock(&bond->lock);
1da177e4
LT
2208
2209 return 0;
2210}
2211
2212static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)
2213{
454d7c9b 2214 struct bonding *bond = netdev_priv(bond_dev);
1da177e4 2215 struct slave *slave;
689c96cc 2216 int i, res = -ENODEV;
1da177e4 2217
6603a6f2 2218 read_lock(&bond->lock);
1da177e4
LT
2219
2220 bond_for_each_slave(bond, slave, i) {
2221 if (i == (int)info->slave_id) {
689c96cc
ED
2222 res = 0;
2223 strcpy(info->slave_name, slave->dev->name);
2224 info->link = slave->link;
2225 info->state = slave->state;
2226 info->link_failure_count = slave->link_failure_count;
1da177e4
LT
2227 break;
2228 }
2229 }
2230
6603a6f2 2231 read_unlock(&bond->lock);
1da177e4 2232
689c96cc 2233 return res;
1da177e4
LT
2234}
2235
2236/*-------------------------------- Monitoring -------------------------------*/
2237
1da177e4 2238
f0c76d61
JV
2239static int bond_miimon_inspect(struct bonding *bond)
2240{
2241 struct slave *slave;
2242 int i, link_state, commit = 0;
41f89100
JP
2243 bool ignore_updelay;
2244
2245 ignore_updelay = !bond->curr_active_slave ? true : false;
1da177e4
LT
2246
2247 bond_for_each_slave(bond, slave, i) {
f0c76d61 2248 slave->new_link = BOND_LINK_NOCHANGE;
1da177e4 2249
f0c76d61 2250 link_state = bond_check_dev_link(bond, slave->dev, 0);
1da177e4
LT
2251
2252 switch (slave->link) {
f0c76d61
JV
2253 case BOND_LINK_UP:
2254 if (link_state)
2255 continue;
1da177e4 2256
f0c76d61
JV
2257 slave->link = BOND_LINK_FAIL;
2258 slave->delay = bond->params.downdelay;
2259 if (slave->delay) {
3d632c3f 2260 pr_info(DRV_NAME
f0c76d61
JV
2261 ": %s: link status down for %s"
2262 "interface %s, disabling it in %d ms.\n",
2263 bond->dev->name,
2264 (bond->params.mode ==
2265 BOND_MODE_ACTIVEBACKUP) ?
2266 ((slave->state == BOND_STATE_ACTIVE) ?
2267 "active " : "backup ") : "",
2268 slave->dev->name,
2269 bond->params.downdelay * bond->params.miimon);
1da177e4 2270 }
f0c76d61
JV
2271 /*FALLTHRU*/
2272 case BOND_LINK_FAIL:
2273 if (link_state) {
2274 /*
2275 * recovered before downdelay expired
2276 */
2277 slave->link = BOND_LINK_UP;
1da177e4 2278 slave->jiffies = jiffies;
3d632c3f 2279 pr_info(DRV_NAME
1da177e4
LT
2280 ": %s: link status up again after %d "
2281 "ms for interface %s.\n",
1b76b316 2282 bond->dev->name,
f0c76d61
JV
2283 (bond->params.downdelay - slave->delay) *
2284 bond->params.miimon,
2285 slave->dev->name);
2286 continue;
1da177e4 2287 }
f0c76d61
JV
2288
2289 if (slave->delay <= 0) {
2290 slave->new_link = BOND_LINK_DOWN;
2291 commit++;
2292 continue;
1da177e4 2293 }
1da177e4 2294
f0c76d61
JV
2295 slave->delay--;
2296 break;
2297
2298 case BOND_LINK_DOWN:
2299 if (!link_state)
2300 continue;
2301
2302 slave->link = BOND_LINK_BACK;
2303 slave->delay = bond->params.updelay;
2304
2305 if (slave->delay) {
3d632c3f 2306 pr_info(DRV_NAME
f0c76d61
JV
2307 ": %s: link status up for "
2308 "interface %s, enabling it in %d ms.\n",
2309 bond->dev->name, slave->dev->name,
41f89100 2310 ignore_updelay ? 0 :
f0c76d61
JV
2311 bond->params.updelay *
2312 bond->params.miimon);
2313 }
2314 /*FALLTHRU*/
2315 case BOND_LINK_BACK:
2316 if (!link_state) {
2317 slave->link = BOND_LINK_DOWN;
3d632c3f 2318 pr_info(DRV_NAME
1da177e4
LT
2319 ": %s: link status down again after %d "
2320 "ms for interface %s.\n",
1b76b316 2321 bond->dev->name,
f0c76d61
JV
2322 (bond->params.updelay - slave->delay) *
2323 bond->params.miimon,
2324 slave->dev->name);
2325
2326 continue;
2327 }
2328
41f89100
JP
2329 if (ignore_updelay)
2330 slave->delay = 0;
2331
f0c76d61
JV
2332 if (slave->delay <= 0) {
2333 slave->new_link = BOND_LINK_UP;
2334 commit++;
41f89100 2335 ignore_updelay = false;
f0c76d61 2336 continue;
1da177e4 2337 }
f0c76d61
JV
2338
2339 slave->delay--;
1da177e4 2340 break;
f0c76d61
JV
2341 }
2342 }
1da177e4 2343
f0c76d61
JV
2344 return commit;
2345}
1da177e4 2346
f0c76d61
JV
2347static void bond_miimon_commit(struct bonding *bond)
2348{
2349 struct slave *slave;
2350 int i;
2351
2352 bond_for_each_slave(bond, slave, i) {
2353 switch (slave->new_link) {
2354 case BOND_LINK_NOCHANGE:
2355 continue;
1da177e4 2356
f0c76d61
JV
2357 case BOND_LINK_UP:
2358 slave->link = BOND_LINK_UP;
2359 slave->jiffies = jiffies;
2360
2361 if (bond->params.mode == BOND_MODE_8023AD) {
2362 /* prevent it from being the active one */
2363 slave->state = BOND_STATE_BACKUP;
2364 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
2365 /* make it immediately active */
2366 slave->state = BOND_STATE_ACTIVE;
2367 } else if (slave != bond->primary_slave) {
2368 /* prevent it from being the active one */
2369 slave->state = BOND_STATE_BACKUP;
1da177e4 2370 }
1da177e4 2371
3d632c3f 2372 pr_info(DRV_NAME
f0c76d61
JV
2373 ": %s: link status definitely "
2374 "up for interface %s.\n",
2375 bond->dev->name, slave->dev->name);
1da177e4 2376
f0c76d61
JV
2377 /* notify ad that the link status has changed */
2378 if (bond->params.mode == BOND_MODE_8023AD)
2379 bond_3ad_handle_link_change(slave, BOND_LINK_UP);
059fe7a5 2380
58402054 2381 if (bond_is_lb(bond))
f0c76d61
JV
2382 bond_alb_handle_link_change(bond, slave,
2383 BOND_LINK_UP);
1da177e4 2384
f0c76d61
JV
2385 if (!bond->curr_active_slave ||
2386 (slave == bond->primary_slave))
2387 goto do_failover;
1da177e4 2388
f0c76d61 2389 continue;
059fe7a5 2390
f0c76d61 2391 case BOND_LINK_DOWN:
fba4acda
JV
2392 if (slave->link_failure_count < UINT_MAX)
2393 slave->link_failure_count++;
2394
f0c76d61 2395 slave->link = BOND_LINK_DOWN;
1da177e4 2396
f0c76d61
JV
2397 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP ||
2398 bond->params.mode == BOND_MODE_8023AD)
2399 bond_set_slave_inactive_flags(slave);
2400
3d632c3f 2401 pr_info(DRV_NAME
f0c76d61
JV
2402 ": %s: link status definitely down for "
2403 "interface %s, disabling it\n",
2404 bond->dev->name, slave->dev->name);
2405
2406 if (bond->params.mode == BOND_MODE_8023AD)
2407 bond_3ad_handle_link_change(slave,
2408 BOND_LINK_DOWN);
2409
ae63e808 2410 if (bond_is_lb(bond))
f0c76d61
JV
2411 bond_alb_handle_link_change(bond, slave,
2412 BOND_LINK_DOWN);
2413
2414 if (slave == bond->curr_active_slave)
2415 goto do_failover;
2416
2417 continue;
2418
2419 default:
3d632c3f 2420 pr_err(DRV_NAME
f0c76d61
JV
2421 ": %s: invalid new link %d on slave %s\n",
2422 bond->dev->name, slave->new_link,
2423 slave->dev->name);
2424 slave->new_link = BOND_LINK_NOCHANGE;
2425
2426 continue;
2427 }
2428
2429do_failover:
2430 ASSERT_RTNL();
2431 write_lock_bh(&bond->curr_slave_lock);
2432 bond_select_active_slave(bond);
2433 write_unlock_bh(&bond->curr_slave_lock);
2434 }
2435
2436 bond_set_carrier(bond);
1da177e4
LT
2437}
2438
0b0eef66
JV
2439/*
2440 * bond_mii_monitor
2441 *
2442 * Really a wrapper that splits the mii monitor into two phases: an
f0c76d61
JV
2443 * inspection, then (if inspection indicates something needs to be done)
2444 * an acquisition of appropriate locks followed by a commit phase to
2445 * implement whatever link state changes are indicated.
0b0eef66
JV
2446 */
2447void bond_mii_monitor(struct work_struct *work)
2448{
2449 struct bonding *bond = container_of(work, struct bonding,
2450 mii_work.work);
0b0eef66
JV
2451
2452 read_lock(&bond->lock);
f0c76d61
JV
2453 if (bond->kill_timers)
2454 goto out;
2455
2456 if (bond->slave_cnt == 0)
2457 goto re_arm;
b59f9f74
JV
2458
2459 if (bond->send_grat_arp) {
2460 read_lock(&bond->curr_slave_lock);
2461 bond_send_gratuitous_arp(bond);
2462 read_unlock(&bond->curr_slave_lock);
2463 }
2464
305d552a
BH
2465 if (bond->send_unsol_na) {
2466 read_lock(&bond->curr_slave_lock);
2467 bond_send_unsolicited_na(bond);
2468 read_unlock(&bond->curr_slave_lock);
2469 }
2470
f0c76d61 2471 if (bond_miimon_inspect(bond)) {
0b0eef66
JV
2472 read_unlock(&bond->lock);
2473 rtnl_lock();
2474 read_lock(&bond->lock);
f0c76d61
JV
2475
2476 bond_miimon_commit(bond);
2477
5655662d
JV
2478 read_unlock(&bond->lock);
2479 rtnl_unlock(); /* might sleep, hold no other locks */
2480 read_lock(&bond->lock);
0b0eef66
JV
2481 }
2482
f0c76d61
JV
2483re_arm:
2484 if (bond->params.miimon)
2485 queue_delayed_work(bond->wq, &bond->mii_work,
2486 msecs_to_jiffies(bond->params.miimon));
2487out:
0b0eef66 2488 read_unlock(&bond->lock);
0b0eef66 2489}
c3ade5ca 2490
d3bb52b0 2491static __be32 bond_glean_dev_ip(struct net_device *dev)
c3ade5ca
JV
2492{
2493 struct in_device *idev;
2494 struct in_ifaddr *ifa;
a144ea4b 2495 __be32 addr = 0;
c3ade5ca
JV
2496
2497 if (!dev)
2498 return 0;
2499
2500 rcu_read_lock();
e5ed6399 2501 idev = __in_dev_get_rcu(dev);
c3ade5ca
JV
2502 if (!idev)
2503 goto out;
2504
2505 ifa = idev->ifa_list;
2506 if (!ifa)
2507 goto out;
2508
2509 addr = ifa->ifa_local;
2510out:
2511 rcu_read_unlock();
2512 return addr;
2513}
2514
d3bb52b0 2515static int bond_has_this_ip(struct bonding *bond, __be32 ip)
f5b2b966 2516{
0883beca 2517 struct vlan_entry *vlan;
f5b2b966
JV
2518
2519 if (ip == bond->master_ip)
2520 return 1;
2521
0883beca 2522 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
f5b2b966
JV
2523 if (ip == vlan->vlan_ip)
2524 return 1;
2525 }
2526
2527 return 0;
2528}
2529
c3ade5ca
JV
2530/*
2531 * We go to the (large) trouble of VLAN tagging ARP frames because
2532 * switches in VLAN mode (especially if ports are configured as
2533 * "native" to a VLAN) might not pass non-tagged frames.
2534 */
d3bb52b0 2535static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id)
c3ade5ca
JV
2536{
2537 struct sk_buff *skb;
2538
5a03cdb7 2539 pr_debug("arp %d on slave %s: dst %x src %x vid %d\n", arp_op,
c3ade5ca 2540 slave_dev->name, dest_ip, src_ip, vlan_id);
3d632c3f 2541
c3ade5ca
JV
2542 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,
2543 NULL, slave_dev->dev_addr, NULL);
2544
2545 if (!skb) {
3d632c3f 2546 pr_err(DRV_NAME ": ARP packet allocation failed\n");
c3ade5ca
JV
2547 return;
2548 }
2549 if (vlan_id) {
2550 skb = vlan_put_tag(skb, vlan_id);
2551 if (!skb) {
3d632c3f 2552 pr_err(DRV_NAME ": failed to insert VLAN tag\n");
c3ade5ca
JV
2553 return;
2554 }
2555 }
2556 arp_xmit(skb);
2557}
2558
2559
1da177e4
LT
2560static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
2561{
c3ade5ca 2562 int i, vlan_id, rv;
d3bb52b0 2563 __be32 *targets = bond->params.arp_targets;
0883beca 2564 struct vlan_entry *vlan;
c3ade5ca
JV
2565 struct net_device *vlan_dev;
2566 struct flowi fl;
2567 struct rtable *rt;
1da177e4 2568
6b780567
MW
2569 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
2570 if (!targets[i])
5a31bec0 2571 break;
5a03cdb7 2572 pr_debug("basa: target %x\n", targets[i]);
c3ade5ca 2573 if (list_empty(&bond->vlan_list)) {
5a03cdb7 2574 pr_debug("basa: empty vlan: arp_send\n");
c3ade5ca
JV
2575 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2576 bond->master_ip, 0);
2577 continue;
2578 }
2579
2580 /*
2581 * If VLANs are configured, we do a route lookup to
2582 * determine which VLAN interface would be used, so we
2583 * can tag the ARP with the proper VLAN tag.
2584 */
2585 memset(&fl, 0, sizeof(fl));
2586 fl.fl4_dst = targets[i];
2587 fl.fl4_tos = RTO_ONLINK;
2588
f206351a 2589 rv = ip_route_output_key(&init_net, &rt, &fl);
c3ade5ca
JV
2590 if (rv) {
2591 if (net_ratelimit()) {
3d632c3f 2592 pr_warning(DRV_NAME
63779436
HH
2593 ": %s: no route to arp_ip_target %pI4\n",
2594 bond->dev->name, &fl.fl4_dst);
c3ade5ca
JV
2595 }
2596 continue;
2597 }
2598
2599 /*
2600 * This target is not on a VLAN
2601 */
2602 if (rt->u.dst.dev == bond->dev) {
ed4b9f80 2603 ip_rt_put(rt);
5a03cdb7 2604 pr_debug("basa: rtdev == bond->dev: arp_send\n");
c3ade5ca
JV
2605 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2606 bond->master_ip, 0);
2607 continue;
2608 }
2609
2610 vlan_id = 0;
0883beca 2611 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
5c15bdec 2612 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
c3ade5ca
JV
2613 if (vlan_dev == rt->u.dst.dev) {
2614 vlan_id = vlan->vlan_id;
5a03cdb7 2615 pr_debug("basa: vlan match on %s %d\n",
c3ade5ca
JV
2616 vlan_dev->name, vlan_id);
2617 break;
2618 }
2619 }
2620
2621 if (vlan_id) {
ed4b9f80 2622 ip_rt_put(rt);
c3ade5ca
JV
2623 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2624 vlan->vlan_ip, vlan_id);
2625 continue;
2626 }
2627
2628 if (net_ratelimit()) {
3d632c3f 2629 pr_warning(DRV_NAME
63779436
HH
2630 ": %s: no path to arp_ip_target %pI4 via rt.dev %s\n",
2631 bond->dev->name, &fl.fl4_dst,
c3ade5ca
JV
2632 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL");
2633 }
ed4b9f80 2634 ip_rt_put(rt);
c3ade5ca
JV
2635 }
2636}
2637
2638/*
2639 * Kick out a gratuitous ARP for an IP on the bonding master plus one
2640 * for each VLAN above us.
b59f9f74
JV
2641 *
2642 * Caller must hold curr_slave_lock for read or better
c3ade5ca
JV
2643 */
2644static void bond_send_gratuitous_arp(struct bonding *bond)
2645{
2646 struct slave *slave = bond->curr_active_slave;
2647 struct vlan_entry *vlan;
2648 struct net_device *vlan_dev;
2649
5a03cdb7 2650 pr_debug("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name,
c3ade5ca 2651 slave ? slave->dev->name : "NULL");
b59f9f74
JV
2652
2653 if (!slave || !bond->send_grat_arp ||
2654 test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
c3ade5ca
JV
2655 return;
2656
b59f9f74
JV
2657 bond->send_grat_arp--;
2658
c3ade5ca
JV
2659 if (bond->master_ip) {
2660 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
1053f62c 2661 bond->master_ip, 0);
c3ade5ca
JV
2662 }
2663
2664 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
5c15bdec 2665 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
c3ade5ca
JV
2666 if (vlan->vlan_ip) {
2667 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
2668 vlan->vlan_ip, vlan->vlan_id);
2669 }
1da177e4
LT
2670 }
2671}
2672
d3bb52b0 2673static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
f5b2b966
JV
2674{
2675 int i;
d3bb52b0 2676 __be32 *targets = bond->params.arp_targets;
f5b2b966 2677
f5b2b966 2678 for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
5a03cdb7 2679 pr_debug("bva: sip %pI4 tip %pI4 t[%d] %pI4 bhti(tip) %d\n",
63779436 2680 &sip, &tip, i, &targets[i], bond_has_this_ip(bond, tip));
f5b2b966
JV
2681 if (sip == targets[i]) {
2682 if (bond_has_this_ip(bond, tip))
2683 slave->last_arp_rx = jiffies;
2684 return;
2685 }
2686 }
2687}
2688
2689static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
2690{
2691 struct arphdr *arp;
2692 struct slave *slave;
2693 struct bonding *bond;
2694 unsigned char *arp_ptr;
d3bb52b0 2695 __be32 sip, tip;
f5b2b966 2696
c346dca1 2697 if (dev_net(dev) != &init_net)
e730c155
EB
2698 goto out;
2699
f5b2b966
JV
2700 if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
2701 goto out;
2702
454d7c9b 2703 bond = netdev_priv(dev);
f5b2b966
JV
2704 read_lock(&bond->lock);
2705
5a03cdb7 2706 pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
f5b2b966
JV
2707 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
2708 orig_dev ? orig_dev->name : "NULL");
2709
2710 slave = bond_get_slave_by_dev(bond, orig_dev);
2711 if (!slave || !slave_do_arp_validate(bond, slave))
2712 goto out_unlock;
2713
988b7050 2714 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
f5b2b966
JV
2715 goto out_unlock;
2716
d0a92be0 2717 arp = arp_hdr(skb);
f5b2b966
JV
2718 if (arp->ar_hln != dev->addr_len ||
2719 skb->pkt_type == PACKET_OTHERHOST ||
2720 skb->pkt_type == PACKET_LOOPBACK ||
2721 arp->ar_hrd != htons(ARPHRD_ETHER) ||
2722 arp->ar_pro != htons(ETH_P_IP) ||
2723 arp->ar_pln != 4)
2724 goto out_unlock;
2725
2726 arp_ptr = (unsigned char *)(arp + 1);
2727 arp_ptr += dev->addr_len;
2728 memcpy(&sip, arp_ptr, 4);
2729 arp_ptr += 4 + dev->addr_len;
2730 memcpy(&tip, arp_ptr, 4);
2731
5a03cdb7 2732 pr_debug("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n",
63779436
HH
2733 bond->dev->name, slave->dev->name, slave->state,
2734 bond->params.arp_validate, slave_do_arp_validate(bond, slave),
2735 &sip, &tip);
f5b2b966
JV
2736
2737 /*
2738 * Backup slaves won't see the ARP reply, but do come through
2739 * here for each ARP probe (so we swap the sip/tip to validate
2740 * the probe). In a "redundant switch, common router" type of
2741 * configuration, the ARP probe will (hopefully) travel from
2742 * the active, through one switch, the router, then the other
2743 * switch before reaching the backup.
2744 */
2745 if (slave->state == BOND_STATE_ACTIVE)
2746 bond_validate_arp(bond, slave, sip, tip);
2747 else
2748 bond_validate_arp(bond, slave, tip, sip);
2749
2750out_unlock:
2751 read_unlock(&bond->lock);
2752out:
2753 dev_kfree_skb(skb);
2754 return NET_RX_SUCCESS;
2755}
2756
1da177e4
LT
2757/*
2758 * this function is called regularly to monitor each slave's link
2759 * ensuring that traffic is being sent and received when arp monitoring
2760 * is used in load-balancing mode. if the adapter has been dormant, then an
2761 * arp is transmitted to generate traffic. see activebackup_arp_monitor for
2762 * arp monitoring in active backup mode.
2763 */
1b76b316 2764void bond_loadbalance_arp_mon(struct work_struct *work)
1da177e4 2765{
1b76b316
JV
2766 struct bonding *bond = container_of(work, struct bonding,
2767 arp_work.work);
1da177e4
LT
2768 struct slave *slave, *oldcurrent;
2769 int do_failover = 0;
2770 int delta_in_ticks;
2771 int i;
2772
2773 read_lock(&bond->lock);
2774
5ce0da8f 2775 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
1da177e4 2776
3d632c3f 2777 if (bond->kill_timers)
1da177e4 2778 goto out;
1da177e4 2779
3d632c3f 2780 if (bond->slave_cnt == 0)
1da177e4 2781 goto re_arm;
1da177e4
LT
2782
2783 read_lock(&bond->curr_slave_lock);
2784 oldcurrent = bond->curr_active_slave;
2785 read_unlock(&bond->curr_slave_lock);
2786
2787 /* see if any of the previous devices are up now (i.e. they have
2788 * xmt and rcv traffic). the curr_active_slave does not come into
2789 * the picture unless it is null. also, slave->jiffies is not needed
2790 * here because we send an arp on each slave and give a slave as
2791 * long as it needs to get the tx/rx within the delta.
2792 * TODO: what about up/down delay in arp mode? it wasn't here before
2793 * so it can wait
2794 */
2795 bond_for_each_slave(bond, slave, i) {
2796 if (slave->link != BOND_LINK_UP) {
9d21493b 2797 if (time_before_eq(jiffies, dev_trans_start(slave->dev) + delta_in_ticks) &&
b63bb739 2798 time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) {
1da177e4
LT
2799
2800 slave->link = BOND_LINK_UP;
2801 slave->state = BOND_STATE_ACTIVE;
2802
2803 /* primary_slave has no meaning in round-robin
2804 * mode. the window of a slave being up and
2805 * curr_active_slave being null after enslaving
2806 * is closed.
2807 */
2808 if (!oldcurrent) {
3d632c3f 2809 pr_info(DRV_NAME
1da177e4
LT
2810 ": %s: link status definitely "
2811 "up for interface %s, ",
1b76b316 2812 bond->dev->name,
1da177e4
LT
2813 slave->dev->name);
2814 do_failover = 1;
2815 } else {
3d632c3f 2816 pr_info(DRV_NAME
1da177e4 2817 ": %s: interface %s is now up\n",
1b76b316 2818 bond->dev->name,
1da177e4
LT
2819 slave->dev->name);
2820 }
2821 }
2822 } else {
2823 /* slave->link == BOND_LINK_UP */
2824
2825 /* not all switches will respond to an arp request
2826 * when the source ip is 0, so don't take the link down
2827 * if we don't know our ip yet
2828 */
9d21493b 2829 if (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2*delta_in_ticks) ||
4b8a9239 2830 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) {
1da177e4
LT
2831
2832 slave->link = BOND_LINK_DOWN;
2833 slave->state = BOND_STATE_BACKUP;
2834
3d632c3f 2835 if (slave->link_failure_count < UINT_MAX)
1da177e4 2836 slave->link_failure_count++;
1da177e4 2837
3d632c3f 2838 pr_info(DRV_NAME
1da177e4 2839 ": %s: interface %s is now down.\n",
1b76b316 2840 bond->dev->name,
1da177e4
LT
2841 slave->dev->name);
2842
3d632c3f 2843 if (slave == oldcurrent)
1da177e4 2844 do_failover = 1;
1da177e4
LT
2845 }
2846 }
2847
2848 /* note: if switch is in round-robin mode, all links
2849 * must tx arp to ensure all links rx an arp - otherwise
2850 * links may oscillate or not come up at all; if switch is
2851 * in something like xor mode, there is nothing we can
2852 * do - all replies will be rx'ed on same link causing slaves
2853 * to be unstable during low/no traffic periods
2854 */
3d632c3f 2855 if (IS_UP(slave->dev))
1da177e4 2856 bond_arp_send_all(bond, slave);
1da177e4
LT
2857 }
2858
2859 if (do_failover) {
059fe7a5 2860 write_lock_bh(&bond->curr_slave_lock);
1da177e4
LT
2861
2862 bond_select_active_slave(bond);
2863
059fe7a5 2864 write_unlock_bh(&bond->curr_slave_lock);
1da177e4
LT
2865 }
2866
2867re_arm:
1b76b316
JV
2868 if (bond->params.arp_interval)
2869 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
1da177e4
LT
2870out:
2871 read_unlock(&bond->lock);
2872}
2873
2874/*
b2220cad
JV
2875 * Called to inspect slaves for active-backup mode ARP monitor link state
2876 * changes. Sets new_link in slaves to specify what action should take
2877 * place for the slave. Returns 0 if no changes are found, >0 if changes
2878 * to link states must be committed.
2879 *
2880 * Called with bond->lock held for read.
1da177e4 2881 */
b2220cad 2882static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
1da177e4 2883{
1da177e4 2884 struct slave *slave;
b2220cad 2885 int i, commit = 0;
1da177e4 2886
b2220cad
JV
2887 bond_for_each_slave(bond, slave, i) {
2888 slave->new_link = BOND_LINK_NOCHANGE;
1da177e4 2889
b2220cad
JV
2890 if (slave->link != BOND_LINK_UP) {
2891 if (time_before_eq(jiffies, slave_last_rx(bond, slave) +
2892 delta_in_ticks)) {
2893 slave->new_link = BOND_LINK_UP;
2894 commit++;
2895 }
1da177e4 2896
b2220cad
JV
2897 continue;
2898 }
1da177e4 2899
b2220cad
JV
2900 /*
2901 * Give slaves 2*delta after being enslaved or made
2902 * active. This avoids bouncing, as the last receive
2903 * times need a full ARP monitor cycle to be updated.
2904 */
2905 if (!time_after_eq(jiffies, slave->jiffies +
2906 2 * delta_in_ticks))
2907 continue;
2908
2909 /*
2910 * Backup slave is down if:
2911 * - No current_arp_slave AND
2912 * - more than 3*delta since last receive AND
2913 * - the bond has an IP address
2914 *
2915 * Note: a non-null current_arp_slave indicates
2916 * the curr_active_slave went down and we are
2917 * searching for a new one; under this condition
2918 * we only take the curr_active_slave down - this
2919 * gives each slave a chance to tx/rx traffic
2920 * before being taken out
2921 */
2922 if (slave->state == BOND_STATE_BACKUP &&
2923 !bond->current_arp_slave &&
2924 time_after(jiffies, slave_last_rx(bond, slave) +
2925 3 * delta_in_ticks)) {
2926 slave->new_link = BOND_LINK_DOWN;
2927 commit++;
2928 }
2929
2930 /*
2931 * Active slave is down if:
2932 * - more than 2*delta since transmitting OR
2933 * - (more than 2*delta since receive AND
2934 * the bond has an IP address)
2935 */
2936 if ((slave->state == BOND_STATE_ACTIVE) &&
9d21493b 2937 (time_after_eq(jiffies, dev_trans_start(slave->dev) +
b2220cad
JV
2938 2 * delta_in_ticks) ||
2939 (time_after_eq(jiffies, slave_last_rx(bond, slave)
2940 + 2 * delta_in_ticks)))) {
2941 slave->new_link = BOND_LINK_DOWN;
2942 commit++;
2943 }
1da177e4
LT
2944 }
2945
b2220cad
JV
2946 return commit;
2947}
1da177e4 2948
b2220cad
JV
2949/*
2950 * Called to commit link state changes noted by inspection step of
2951 * active-backup mode ARP monitor.
2952 *
2953 * Called with RTNL and bond->lock for read.
2954 */
2955static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
2956{
2957 struct slave *slave;
2958 int i;
1da177e4 2959
b2220cad
JV
2960 bond_for_each_slave(bond, slave, i) {
2961 switch (slave->new_link) {
2962 case BOND_LINK_NOCHANGE:
2963 continue;
ff59c456 2964
b2220cad 2965 case BOND_LINK_UP:
b9f60253
JP
2966 if ((!bond->curr_active_slave &&
2967 time_before_eq(jiffies,
2968 dev_trans_start(slave->dev) +
2969 delta_in_ticks)) ||
2970 bond->curr_active_slave != slave) {
b2220cad 2971 slave->link = BOND_LINK_UP;
b2220cad
JV
2972 bond->current_arp_slave = NULL;
2973
3d632c3f 2974 pr_info(DRV_NAME
b9f60253
JP
2975 ": %s: link status definitely "
2976 "up for interface %s.\n",
2977 bond->dev->name, slave->dev->name);
b2220cad 2978
b9f60253
JP
2979 if (!bond->curr_active_slave ||
2980 (slave == bond->primary_slave))
2981 goto do_failover;
1da177e4 2982
b9f60253 2983 }
1da177e4 2984
b9f60253 2985 continue;
1da177e4 2986
b2220cad
JV
2987 case BOND_LINK_DOWN:
2988 if (slave->link_failure_count < UINT_MAX)
2989 slave->link_failure_count++;
2990
2991 slave->link = BOND_LINK_DOWN;
b9f60253 2992 bond_set_slave_inactive_flags(slave);
b2220cad 2993
b9f60253
JP
2994 pr_info(DRV_NAME
2995 ": %s: link status definitely down for "
2996 "interface %s, disabling it\n",
2997 bond->dev->name, slave->dev->name);
b2220cad 2998
b9f60253 2999 if (slave == bond->curr_active_slave) {
b2220cad 3000 bond->current_arp_slave = NULL;
b9f60253 3001 goto do_failover;
1da177e4 3002 }
b9f60253
JP
3003
3004 continue;
b2220cad
JV
3005
3006 default:
3d632c3f 3007 pr_err(DRV_NAME
b2220cad
JV
3008 ": %s: impossible: new_link %d on slave %s\n",
3009 bond->dev->name, slave->new_link,
3010 slave->dev->name);
b9f60253 3011 continue;
1da177e4 3012 }
1da177e4 3013
b9f60253
JP
3014do_failover:
3015 ASSERT_RTNL();
b2220cad 3016 write_lock_bh(&bond->curr_slave_lock);
b9f60253 3017 bond_select_active_slave(bond);
b2220cad
JV
3018 write_unlock_bh(&bond->curr_slave_lock);
3019 }
1da177e4 3020
b2220cad
JV
3021 bond_set_carrier(bond);
3022}
1da177e4 3023
b2220cad
JV
3024/*
3025 * Send ARP probes for active-backup mode ARP monitor.
3026 *
3027 * Called with bond->lock held for read.
3028 */
3029static void bond_ab_arp_probe(struct bonding *bond)
3030{
3031 struct slave *slave;
3032 int i;
1da177e4 3033
b2220cad 3034 read_lock(&bond->curr_slave_lock);
1da177e4 3035
b2220cad 3036 if (bond->current_arp_slave && bond->curr_active_slave)
3d632c3f 3037 pr_info(DRV_NAME "PROBE: c_arp %s && cas %s BAD\n",
b2220cad
JV
3038 bond->current_arp_slave->dev->name,
3039 bond->curr_active_slave->dev->name);
1da177e4 3040
b2220cad
JV
3041 if (bond->curr_active_slave) {
3042 bond_arp_send_all(bond, bond->curr_active_slave);
3043 read_unlock(&bond->curr_slave_lock);
3044 return;
3045 }
1da177e4 3046
b2220cad 3047 read_unlock(&bond->curr_slave_lock);
059fe7a5 3048
b2220cad
JV
3049 /* if we don't have a curr_active_slave, search for the next available
3050 * backup slave from the current_arp_slave and make it the candidate
3051 * for becoming the curr_active_slave
3052 */
1da177e4 3053
b2220cad
JV
3054 if (!bond->current_arp_slave) {
3055 bond->current_arp_slave = bond->first_slave;
3056 if (!bond->current_arp_slave)
3057 return;
3058 }
1da177e4 3059
b2220cad 3060 bond_set_slave_inactive_flags(bond->current_arp_slave);
059fe7a5 3061
b2220cad
JV
3062 /* search for next candidate */
3063 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) {
3064 if (IS_UP(slave->dev)) {
3065 slave->link = BOND_LINK_BACK;
3066 bond_set_slave_active_flags(slave);
3067 bond_arp_send_all(bond, slave);
1da177e4 3068 slave->jiffies = jiffies;
b2220cad
JV
3069 bond->current_arp_slave = slave;
3070 break;
1da177e4
LT
3071 }
3072
b2220cad
JV
3073 /* if the link state is up at this point, we
3074 * mark it down - this can happen if we have
3075 * simultaneous link failures and
3076 * reselect_active_interface doesn't make this
3077 * one the current slave so it is still marked
3078 * up when it is actually down
1da177e4 3079 */
b2220cad
JV
3080 if (slave->link == BOND_LINK_UP) {
3081 slave->link = BOND_LINK_DOWN;
3082 if (slave->link_failure_count < UINT_MAX)
3083 slave->link_failure_count++;
1da177e4 3084
b2220cad
JV
3085 bond_set_slave_inactive_flags(slave);
3086
3d632c3f 3087 pr_info(DRV_NAME
b2220cad
JV
3088 ": %s: backup interface %s is now down.\n",
3089 bond->dev->name, slave->dev->name);
1da177e4 3090 }
b2220cad
JV
3091 }
3092}
1da177e4 3093
b2220cad
JV
3094void bond_activebackup_arp_mon(struct work_struct *work)
3095{
3096 struct bonding *bond = container_of(work, struct bonding,
3097 arp_work.work);
3098 int delta_in_ticks;
1da177e4 3099
b2220cad 3100 read_lock(&bond->lock);
1da177e4 3101
b2220cad
JV
3102 if (bond->kill_timers)
3103 goto out;
1da177e4 3104
b2220cad 3105 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
1da177e4 3106
b2220cad
JV
3107 if (bond->slave_cnt == 0)
3108 goto re_arm;
3109
b59f9f74
JV
3110 if (bond->send_grat_arp) {
3111 read_lock(&bond->curr_slave_lock);
3112 bond_send_gratuitous_arp(bond);
3113 read_unlock(&bond->curr_slave_lock);
3114 }
3115
305d552a
BH
3116 if (bond->send_unsol_na) {
3117 read_lock(&bond->curr_slave_lock);
3118 bond_send_unsolicited_na(bond);
3119 read_unlock(&bond->curr_slave_lock);
3120 }
3121
b2220cad
JV
3122 if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
3123 read_unlock(&bond->lock);
3124 rtnl_lock();
3125 read_lock(&bond->lock);
3126
3127 bond_ab_arp_commit(bond, delta_in_ticks);
3128
3129 read_unlock(&bond->lock);
3130 rtnl_unlock();
3131 read_lock(&bond->lock);
1da177e4
LT
3132 }
3133
b2220cad
JV
3134 bond_ab_arp_probe(bond);
3135
1da177e4 3136re_arm:
3d632c3f 3137 if (bond->params.arp_interval)
1b76b316 3138 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
1da177e4
LT
3139out:
3140 read_unlock(&bond->lock);
3141}
3142
3143/*------------------------------ proc/seq_file-------------------------------*/
3144
3145#ifdef CONFIG_PROC_FS
3146
1da177e4 3147static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
1f78d9f9
HE
3148 __acquires(&dev_base_lock)
3149 __acquires(&bond->lock)
1da177e4
LT
3150{
3151 struct bonding *bond = seq->private;
3152 loff_t off = 0;
3153 struct slave *slave;
3154 int i;
3155
3156 /* make sure the bond won't be taken away */
3157 read_lock(&dev_base_lock);
6603a6f2 3158 read_lock(&bond->lock);
1da177e4 3159
3d632c3f 3160 if (*pos == 0)
1da177e4 3161 return SEQ_START_TOKEN;
1da177e4
LT
3162
3163 bond_for_each_slave(bond, slave, i) {
3d632c3f 3164 if (++off == *pos)
1da177e4 3165 return slave;
1da177e4
LT
3166 }
3167
3168 return NULL;
3169}
3170
3171static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3172{
3173 struct bonding *bond = seq->private;
3174 struct slave *slave = v;
3175
3176 ++*pos;
3d632c3f 3177 if (v == SEQ_START_TOKEN)
1da177e4 3178 return bond->first_slave;
1da177e4
LT
3179
3180 slave = slave->next;
3181
3182 return (slave == bond->first_slave) ? NULL : slave;
3183}
3184
3185static void bond_info_seq_stop(struct seq_file *seq, void *v)
1f78d9f9
HE
3186 __releases(&bond->lock)
3187 __releases(&dev_base_lock)
1da177e4
LT
3188{
3189 struct bonding *bond = seq->private;
3190
6603a6f2 3191 read_unlock(&bond->lock);
1da177e4
LT
3192 read_unlock(&dev_base_lock);
3193}
3194
3195static void bond_info_show_master(struct seq_file *seq)
3196{
3197 struct bonding *bond = seq->private;
3198 struct slave *curr;
4756b02f 3199 int i;
1da177e4
LT
3200
3201 read_lock(&bond->curr_slave_lock);
3202 curr = bond->curr_active_slave;
3203 read_unlock(&bond->curr_slave_lock);
3204
dd957c57 3205 seq_printf(seq, "Bonding Mode: %s",
1da177e4
LT
3206 bond_mode_name(bond->params.mode));
3207
dd957c57
JV
3208 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
3209 bond->params.fail_over_mac)
3915c1e8
JV
3210 seq_printf(seq, " (fail_over_mac %s)",
3211 fail_over_mac_tbl[bond->params.fail_over_mac].modename);
dd957c57
JV
3212
3213 seq_printf(seq, "\n");
3214
c61b75ad
MW
3215 if (bond->params.mode == BOND_MODE_XOR ||
3216 bond->params.mode == BOND_MODE_8023AD) {
3217 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
3218 xmit_hashtype_tbl[bond->params.xmit_policy].modename,
3219 bond->params.xmit_policy);
3220 }
3221
1da177e4 3222 if (USES_PRIMARY(bond->params.mode)) {
a549952a 3223 seq_printf(seq, "Primary Slave: %s",
0f418b2a
MW
3224 (bond->primary_slave) ?
3225 bond->primary_slave->dev->name : "None");
a549952a
JP
3226 if (bond->primary_slave)
3227 seq_printf(seq, " (primary_reselect %s)",
3228 pri_reselect_tbl[bond->params.primary_reselect].modename);
1da177e4 3229
a549952a 3230 seq_printf(seq, "\nCurrently Active Slave: %s\n",
1da177e4
LT
3231 (curr) ? curr->dev->name : "None");
3232 }
3233
ff59c456
JV
3234 seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ?
3235 "up" : "down");
1da177e4
LT
3236 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon);
3237 seq_printf(seq, "Up Delay (ms): %d\n",
3238 bond->params.updelay * bond->params.miimon);
3239 seq_printf(seq, "Down Delay (ms): %d\n",
3240 bond->params.downdelay * bond->params.miimon);
3241
4756b02f
MW
3242
3243 /* ARP information */
3d632c3f
SH
3244 if (bond->params.arp_interval > 0) {
3245 int printed = 0;
4756b02f
MW
3246 seq_printf(seq, "ARP Polling Interval (ms): %d\n",
3247 bond->params.arp_interval);
3248
3249 seq_printf(seq, "ARP IP target/s (n.n.n.n form):");
3250
3d632c3f 3251 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
4756b02f 3252 if (!bond->params.arp_targets[i])
5a31bec0 3253 break;
4756b02f
MW
3254 if (printed)
3255 seq_printf(seq, ",");
8cf14e38 3256 seq_printf(seq, " %pI4", &bond->params.arp_targets[i]);
4756b02f
MW
3257 printed = 1;
3258 }
3259 seq_printf(seq, "\n");
3260 }
3261
1da177e4
LT
3262 if (bond->params.mode == BOND_MODE_8023AD) {
3263 struct ad_info ad_info;
3264
3265 seq_puts(seq, "\n802.3ad info\n");
3266 seq_printf(seq, "LACP rate: %s\n",
3267 (bond->params.lacp_fast) ? "fast" : "slow");
fd989c83
JV
3268 seq_printf(seq, "Aggregator selection policy (ad_select): %s\n",
3269 ad_select_tbl[bond->params.ad_select].modename);
1da177e4
LT
3270
3271 if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
3272 seq_printf(seq, "bond %s has no active aggregator\n",
3273 bond->dev->name);
3274 } else {
3275 seq_printf(seq, "Active Aggregator Info:\n");
3276
3277 seq_printf(seq, "\tAggregator ID: %d\n",
3278 ad_info.aggregator_id);
3279 seq_printf(seq, "\tNumber of ports: %d\n",
3280 ad_info.ports);
3281 seq_printf(seq, "\tActor Key: %d\n",
3282 ad_info.actor_key);
3283 seq_printf(seq, "\tPartner Key: %d\n",
3284 ad_info.partner_key);
e174961c
JB
3285 seq_printf(seq, "\tPartner Mac Address: %pM\n",
3286 ad_info.partner_system);
1da177e4
LT
3287 }
3288 }
3289}
3290
3d632c3f
SH
3291static void bond_info_show_slave(struct seq_file *seq,
3292 const struct slave *slave)
1da177e4
LT
3293{
3294 struct bonding *bond = seq->private;
3295
3296 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
3297 seq_printf(seq, "MII Status: %s\n",
3298 (slave->link == BOND_LINK_UP) ? "up" : "down");
65509645 3299 seq_printf(seq, "Link Failure Count: %u\n",
1da177e4
LT
3300 slave->link_failure_count);
3301
e174961c 3302 seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr);
1da177e4
LT
3303
3304 if (bond->params.mode == BOND_MODE_8023AD) {
3305 const struct aggregator *agg
3306 = SLAVE_AD_INFO(slave).port.aggregator;
3307
3d632c3f 3308 if (agg)
1da177e4
LT
3309 seq_printf(seq, "Aggregator ID: %d\n",
3310 agg->aggregator_identifier);
3d632c3f 3311 else
1da177e4 3312 seq_puts(seq, "Aggregator ID: N/A\n");
1da177e4
LT
3313 }
3314}
3315
3316static int bond_info_seq_show(struct seq_file *seq, void *v)
3317{
3318 if (v == SEQ_START_TOKEN) {
3319 seq_printf(seq, "%s\n", version);
3320 bond_info_show_master(seq);
3d632c3f 3321 } else
1da177e4 3322 bond_info_show_slave(seq, v);
1da177e4
LT
3323
3324 return 0;
3325}
3326
4101dec9 3327static const struct seq_operations bond_info_seq_ops = {
1da177e4
LT
3328 .start = bond_info_seq_start,
3329 .next = bond_info_seq_next,
3330 .stop = bond_info_seq_stop,
3331 .show = bond_info_seq_show,
3332};
3333
3334static int bond_info_open(struct inode *inode, struct file *file)
3335{
3336 struct seq_file *seq;
3337 struct proc_dir_entry *proc;
3338 int res;
3339
3340 res = seq_open(file, &bond_info_seq_ops);
3341 if (!res) {
3342 /* recover the pointer buried in proc_dir_entry data */
3343 seq = file->private_data;
3344 proc = PDE(inode);
3345 seq->private = proc->data;
3346 }
3347
3348 return res;
3349}
3350
d54b1fdb 3351static const struct file_operations bond_info_fops = {
1da177e4
LT
3352 .owner = THIS_MODULE,
3353 .open = bond_info_open,
3354 .read = seq_read,
3355 .llseek = seq_lseek,
3356 .release = seq_release,
3357};
3358
38fc0026 3359static void bond_create_proc_entry(struct bonding *bond)
1da177e4
LT
3360{
3361 struct net_device *bond_dev = bond->dev;
3362
3363 if (bond_proc_dir) {
a95609cb
DL
3364 bond->proc_entry = proc_create_data(bond_dev->name,
3365 S_IRUGO, bond_proc_dir,
3366 &bond_info_fops, bond);
3d632c3f
SH
3367 if (bond->proc_entry == NULL)
3368 pr_warning(DRV_NAME
1da177e4
LT
3369 ": Warning: Cannot create /proc/net/%s/%s\n",
3370 DRV_NAME, bond_dev->name);
3d632c3f 3371 else
1da177e4 3372 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ);
1da177e4 3373 }
1da177e4
LT
3374}
3375
3376static void bond_remove_proc_entry(struct bonding *bond)
3377{
3378 if (bond_proc_dir && bond->proc_entry) {
3379 remove_proc_entry(bond->proc_file_name, bond_proc_dir);
3380 memset(bond->proc_file_name, 0, IFNAMSIZ);
3381 bond->proc_entry = NULL;
3382 }
3383}
3384
3385/* Create the bonding directory under /proc/net, if doesn't exist yet.
3386 * Caller must hold rtnl_lock.
3387 */
3388static void bond_create_proc_dir(void)
3389{
1da177e4 3390 if (!bond_proc_dir) {
457c4cbc 3391 bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net);
99b76233 3392 if (!bond_proc_dir)
3d632c3f 3393 pr_warning(DRV_NAME
1da177e4
LT
3394 ": Warning: cannot create /proc/net/%s\n",
3395 DRV_NAME);
1da177e4
LT
3396 }
3397}
3398
3399/* Destroy the bonding directory under /proc/net, if empty.
3400 * Caller must hold rtnl_lock.
3401 */
3402static void bond_destroy_proc_dir(void)
3403{
99b76233 3404 if (bond_proc_dir) {
457c4cbc 3405 remove_proc_entry(DRV_NAME, init_net.proc_net);
1da177e4
LT
3406 bond_proc_dir = NULL;
3407 }
3408}
aee64faf
JP
3409
3410#else /* !CONFIG_PROC_FS */
3411
38fc0026 3412static void bond_create_proc_entry(struct bonding *bond)
aee64faf
JP
3413{
3414}
3415
3416static void bond_remove_proc_entry(struct bonding *bond)
3417{
3418}
3419
3420static void bond_create_proc_dir(void)
3421{
3422}
3423
3424static void bond_destroy_proc_dir(void)
3425{
3426}
3427
1da177e4
LT
3428#endif /* CONFIG_PROC_FS */
3429
aee64faf 3430
1da177e4
LT
3431/*-------------------------- netdev event handling --------------------------*/
3432
3433/*
3434 * Change device name
3435 */
3436static int bond_event_changename(struct bonding *bond)
3437{
1da177e4
LT
3438 bond_remove_proc_entry(bond);
3439 bond_create_proc_entry(bond);
7e083840 3440
1da177e4
LT
3441 return NOTIFY_DONE;
3442}
3443
3d632c3f
SH
3444static int bond_master_netdev_event(unsigned long event,
3445 struct net_device *bond_dev)
1da177e4 3446{
454d7c9b 3447 struct bonding *event_bond = netdev_priv(bond_dev);
1da177e4
LT
3448
3449 switch (event) {
3450 case NETDEV_CHANGENAME:
3451 return bond_event_changename(event_bond);
1da177e4
LT
3452 default:
3453 break;
3454 }
3455
3456 return NOTIFY_DONE;
3457}
3458
3d632c3f
SH
3459static int bond_slave_netdev_event(unsigned long event,
3460 struct net_device *slave_dev)
1da177e4
LT
3461{
3462 struct net_device *bond_dev = slave_dev->master;
454d7c9b 3463 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
3464
3465 switch (event) {
3466 case NETDEV_UNREGISTER:
3467 if (bond_dev) {
1284cd3a
JV
3468 if (bond->setup_by_slave)
3469 bond_release_and_destroy(bond_dev, slave_dev);
3470 else
3471 bond_release(bond_dev, slave_dev);
1da177e4
LT
3472 }
3473 break;
3474 case NETDEV_CHANGE:
17d04500
JV
3475 if (bond->params.mode == BOND_MODE_8023AD || bond_is_lb(bond)) {
3476 struct slave *slave;
3477
3478 slave = bond_get_slave_by_dev(bond, slave_dev);
3479 if (slave) {
3480 u16 old_speed = slave->speed;
3481 u16 old_duplex = slave->duplex;
3482
3483 bond_update_speed_duplex(slave);
3484
3485 if (bond_is_lb(bond))
3486 break;
3487
3488 if (old_speed != slave->speed)
3489 bond_3ad_adapter_speed_changed(slave);
3490 if (old_duplex != slave->duplex)
3491 bond_3ad_adapter_duplex_changed(slave);
3492 }
3493 }
3494
1da177e4
LT
3495 break;
3496 case NETDEV_DOWN:
3497 /*
3498 * ... Or is it this?
3499 */
3500 break;
3501 case NETDEV_CHANGEMTU:
3502 /*
3503 * TODO: Should slaves be allowed to
3504 * independently alter their MTU? For
3505 * an active-backup bond, slaves need
3506 * not be the same type of device, so
3507 * MTUs may vary. For other modes,
3508 * slaves arguably should have the
3509 * same MTUs. To do this, we'd need to
3510 * take over the slave's change_mtu
3511 * function for the duration of their
3512 * servitude.
3513 */
3514 break;
3515 case NETDEV_CHANGENAME:
3516 /*
3517 * TODO: handle changing the primary's name
3518 */
3519 break;
8531c5ff
AK
3520 case NETDEV_FEAT_CHANGE:
3521 bond_compute_features(bond);
3522 break;
1da177e4
LT
3523 default:
3524 break;
3525 }
3526
3527 return NOTIFY_DONE;
3528}
3529
3530/*
3531 * bond_netdev_event: handle netdev notifier chain events.
3532 *
3533 * This function receives events for the netdev chain. The caller (an
e041c683 3534 * ioctl handler calling blocking_notifier_call_chain) holds the necessary
1da177e4
LT
3535 * locks for us to safely manipulate the slave devices (RTNL lock,
3536 * dev_probe_lock).
3537 */
3d632c3f
SH
3538static int bond_netdev_event(struct notifier_block *this,
3539 unsigned long event, void *ptr)
1da177e4
LT
3540{
3541 struct net_device *event_dev = (struct net_device *)ptr;
3542
c346dca1 3543 if (dev_net(event_dev) != &init_net)
e9dc8653
EB
3544 return NOTIFY_DONE;
3545
5a03cdb7 3546 pr_debug("event_dev: %s, event: %lx\n",
1da177e4
LT
3547 (event_dev ? event_dev->name : "None"),
3548 event);
3549
0b680e75
JV
3550 if (!(event_dev->priv_flags & IFF_BONDING))
3551 return NOTIFY_DONE;
3552
1da177e4 3553 if (event_dev->flags & IFF_MASTER) {
5a03cdb7 3554 pr_debug("IFF_MASTER\n");
1da177e4
LT
3555 return bond_master_netdev_event(event, event_dev);
3556 }
3557
3558 if (event_dev->flags & IFF_SLAVE) {
5a03cdb7 3559 pr_debug("IFF_SLAVE\n");
1da177e4
LT
3560 return bond_slave_netdev_event(event, event_dev);
3561 }
3562
3563 return NOTIFY_DONE;
3564}
3565
c3ade5ca
JV
3566/*
3567 * bond_inetaddr_event: handle inetaddr notifier chain events.
3568 *
3569 * We keep track of device IPs primarily to use as source addresses in
3570 * ARP monitor probes (rather than spewing out broadcasts all the time).
3571 *
3572 * We track one IP for the main device (if it has one), plus one per VLAN.
3573 */
3574static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
3575{
3576 struct in_ifaddr *ifa = ptr;
3577 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev;
0883beca
PE
3578 struct bonding *bond;
3579 struct vlan_entry *vlan;
c3ade5ca 3580
c346dca1 3581 if (dev_net(ifa->ifa_dev->dev) != &init_net)
6133fb1a
DL
3582 return NOTIFY_DONE;
3583
0883beca 3584 list_for_each_entry(bond, &bond_dev_list, bond_list) {
c3ade5ca
JV
3585 if (bond->dev == event_dev) {
3586 switch (event) {
3587 case NETDEV_UP:
3588 bond->master_ip = ifa->ifa_local;
3589 return NOTIFY_OK;
3590 case NETDEV_DOWN:
3591 bond->master_ip = bond_glean_dev_ip(bond->dev);
3592 return NOTIFY_OK;
3593 default:
3594 return NOTIFY_DONE;
3595 }
3596 }
3597
0883beca 3598 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
5c15bdec 3599 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
c3ade5ca
JV
3600 if (vlan_dev == event_dev) {
3601 switch (event) {
3602 case NETDEV_UP:
3603 vlan->vlan_ip = ifa->ifa_local;
3604 return NOTIFY_OK;
3605 case NETDEV_DOWN:
3606 vlan->vlan_ip =
3607 bond_glean_dev_ip(vlan_dev);
3608 return NOTIFY_OK;
3609 default:
3610 return NOTIFY_DONE;
3611 }
3612 }
3613 }
3614 }
3615 return NOTIFY_DONE;
3616}
3617
1da177e4
LT
3618static struct notifier_block bond_netdev_notifier = {
3619 .notifier_call = bond_netdev_event,
3620};
3621
c3ade5ca
JV
3622static struct notifier_block bond_inetaddr_notifier = {
3623 .notifier_call = bond_inetaddr_event,
3624};
3625
1da177e4
LT
3626/*-------------------------- Packet type handling ---------------------------*/
3627
3628/* register to receive lacpdus on a bond */
3629static void bond_register_lacpdu(struct bonding *bond)
3630{
3631 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type);
3632
3633 /* initialize packet type */
3634 pk_type->type = PKT_TYPE_LACPDU;
3635 pk_type->dev = bond->dev;
3636 pk_type->func = bond_3ad_lacpdu_recv;
3637
3638 dev_add_pack(pk_type);
3639}
3640
3641/* unregister to receive lacpdus on a bond */
3642static void bond_unregister_lacpdu(struct bonding *bond)
3643{
3644 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
3645}
3646
f5b2b966
JV
3647void bond_register_arp(struct bonding *bond)
3648{
3649 struct packet_type *pt = &bond->arp_mon_pt;
3650
c4f283b1
JV
3651 if (pt->type)
3652 return;
3653
f5b2b966 3654 pt->type = htons(ETH_P_ARP);
e245cb71 3655 pt->dev = bond->dev;
f5b2b966
JV
3656 pt->func = bond_arp_rcv;
3657 dev_add_pack(pt);
3658}
3659
3660void bond_unregister_arp(struct bonding *bond)
3661{
c4f283b1
JV
3662 struct packet_type *pt = &bond->arp_mon_pt;
3663
3664 dev_remove_pack(pt);
3665 pt->type = 0;
f5b2b966
JV
3666}
3667
169a3e66
JV
3668/*---------------------------- Hashing Policies -----------------------------*/
3669
6f6652be
JV
3670/*
3671 * Hash for the output device based upon layer 2 and layer 3 data. If
3672 * the packet is not IP mimic bond_xmit_hash_policy_l2()
3673 */
a361c83c 3674static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
6f6652be
JV
3675{
3676 struct ethhdr *data = (struct ethhdr *)skb->data;
3677 struct iphdr *iph = ip_hdr(skb);
3678
f14c4e4e 3679 if (skb->protocol == htons(ETH_P_IP)) {
6f6652be 3680 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
d3da6831 3681 (data->h_dest[5] ^ data->h_source[5])) % count;
6f6652be
JV
3682 }
3683
d3da6831 3684 return (data->h_dest[5] ^ data->h_source[5]) % count;
6f6652be
JV
3685}
3686
169a3e66 3687/*
59c51591 3688 * Hash for the output device based upon layer 3 and layer 4 data. If
169a3e66
JV
3689 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
3690 * altogether not IP, mimic bond_xmit_hash_policy_l2()
3691 */
a361c83c 3692static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
169a3e66
JV
3693{
3694 struct ethhdr *data = (struct ethhdr *)skb->data;
eddc9ec5 3695 struct iphdr *iph = ip_hdr(skb);
d3bb52b0 3696 __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
169a3e66
JV
3697 int layer4_xor = 0;
3698
f14c4e4e
BH
3699 if (skb->protocol == htons(ETH_P_IP)) {
3700 if (!(iph->frag_off & htons(IP_MF|IP_OFFSET)) &&
169a3e66
JV
3701 (iph->protocol == IPPROTO_TCP ||
3702 iph->protocol == IPPROTO_UDP)) {
d3bb52b0 3703 layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
169a3e66
JV
3704 }
3705 return (layer4_xor ^
3706 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
3707
3708 }
3709
d3da6831 3710 return (data->h_dest[5] ^ data->h_source[5]) % count;
169a3e66
JV
3711}
3712
3713/*
3714 * Hash for the output device based upon layer 2 data
3715 */
a361c83c 3716static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
169a3e66
JV
3717{
3718 struct ethhdr *data = (struct ethhdr *)skb->data;
3719
d3da6831 3720 return (data->h_dest[5] ^ data->h_source[5]) % count;
169a3e66
JV
3721}
3722
1da177e4
LT
3723/*-------------------------- Device entry points ----------------------------*/
3724
3725static int bond_open(struct net_device *bond_dev)
3726{
454d7c9b 3727 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
3728
3729 bond->kill_timers = 0;
3730
58402054 3731 if (bond_is_lb(bond)) {
1da177e4
LT
3732 /* bond_alb_initialize must be called before the timer
3733 * is started.
3734 */
3735 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) {
3736 /* something went wrong - fail the open operation */
3737 return -1;
3738 }
3739
1b76b316
JV
3740 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
3741 queue_delayed_work(bond->wq, &bond->alb_work, 0);
1da177e4
LT
3742 }
3743
3744 if (bond->params.miimon) { /* link check interval, in milliseconds. */
1b76b316
JV
3745 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
3746 queue_delayed_work(bond->wq, &bond->mii_work, 0);
1da177e4
LT
3747 }
3748
3749 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */
1b76b316
JV
3750 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
3751 INIT_DELAYED_WORK(&bond->arp_work,
3752 bond_activebackup_arp_mon);
3753 else
3754 INIT_DELAYED_WORK(&bond->arp_work,
3755 bond_loadbalance_arp_mon);
3756
3757 queue_delayed_work(bond->wq, &bond->arp_work, 0);
f5b2b966
JV
3758 if (bond->params.arp_validate)
3759 bond_register_arp(bond);
1da177e4
LT
3760 }
3761
3762 if (bond->params.mode == BOND_MODE_8023AD) {
a40745f5 3763 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
1b76b316 3764 queue_delayed_work(bond->wq, &bond->ad_work, 0);
1da177e4
LT
3765 /* register to receive LACPDUs */
3766 bond_register_lacpdu(bond);
fd989c83 3767 bond_3ad_initiate_agg_selection(bond, 1);
1da177e4
LT
3768 }
3769
3770 return 0;
3771}
3772
3773static int bond_close(struct net_device *bond_dev)
3774{
454d7c9b 3775 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
3776
3777 if (bond->params.mode == BOND_MODE_8023AD) {
3778 /* Unregister the receive of LACPDUs */
3779 bond_unregister_lacpdu(bond);
3780 }
3781
f5b2b966
JV
3782 if (bond->params.arp_validate)
3783 bond_unregister_arp(bond);
3784
1da177e4
LT
3785 write_lock_bh(&bond->lock);
3786
b59f9f74 3787 bond->send_grat_arp = 0;
305d552a 3788 bond->send_unsol_na = 0;
1da177e4
LT
3789
3790 /* signal timers not to re-arm */
3791 bond->kill_timers = 1;
3792
3793 write_unlock_bh(&bond->lock);
3794
1da177e4 3795 if (bond->params.miimon) { /* link check interval, in milliseconds. */
1b76b316 3796 cancel_delayed_work(&bond->mii_work);
1da177e4
LT
3797 }
3798
3799 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */
1b76b316 3800 cancel_delayed_work(&bond->arp_work);
1da177e4
LT
3801 }
3802
3803 switch (bond->params.mode) {
3804 case BOND_MODE_8023AD:
1b76b316 3805 cancel_delayed_work(&bond->ad_work);
1da177e4
LT
3806 break;
3807 case BOND_MODE_TLB:
3808 case BOND_MODE_ALB:
1b76b316 3809 cancel_delayed_work(&bond->alb_work);
1da177e4
LT
3810 break;
3811 default:
3812 break;
3813 }
3814
1da177e4 3815
58402054 3816 if (bond_is_lb(bond)) {
1da177e4
LT
3817 /* Must be called only after all
3818 * slaves have been released
3819 */
3820 bond_alb_deinitialize(bond);
3821 }
3822
3823 return 0;
3824}
3825
3826static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
3827{
454d7c9b 3828 struct bonding *bond = netdev_priv(bond_dev);
eeda3fd6 3829 struct net_device_stats *stats = &bond->stats;
2439f9eb 3830 struct net_device_stats local_stats;
1da177e4
LT
3831 struct slave *slave;
3832 int i;
3833
2439f9eb 3834 memset(&local_stats, 0, sizeof(struct net_device_stats));
1da177e4
LT
3835
3836 read_lock_bh(&bond->lock);
3837
3838 bond_for_each_slave(bond, slave, i) {
eeda3fd6
SH
3839 const struct net_device_stats *sstats = dev_get_stats(slave->dev);
3840
2439f9eb
AG
3841 local_stats.rx_packets += sstats->rx_packets;
3842 local_stats.rx_bytes += sstats->rx_bytes;
3843 local_stats.rx_errors += sstats->rx_errors;
3844 local_stats.rx_dropped += sstats->rx_dropped;
3845
3846 local_stats.tx_packets += sstats->tx_packets;
3847 local_stats.tx_bytes += sstats->tx_bytes;
3848 local_stats.tx_errors += sstats->tx_errors;
3849 local_stats.tx_dropped += sstats->tx_dropped;
3850
3851 local_stats.multicast += sstats->multicast;
3852 local_stats.collisions += sstats->collisions;
3853
3854 local_stats.rx_length_errors += sstats->rx_length_errors;
3855 local_stats.rx_over_errors += sstats->rx_over_errors;
3856 local_stats.rx_crc_errors += sstats->rx_crc_errors;
3857 local_stats.rx_frame_errors += sstats->rx_frame_errors;
3858 local_stats.rx_fifo_errors += sstats->rx_fifo_errors;
3859 local_stats.rx_missed_errors += sstats->rx_missed_errors;
3860
3861 local_stats.tx_aborted_errors += sstats->tx_aborted_errors;
3862 local_stats.tx_carrier_errors += sstats->tx_carrier_errors;
3863 local_stats.tx_fifo_errors += sstats->tx_fifo_errors;
3864 local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors;
3865 local_stats.tx_window_errors += sstats->tx_window_errors;
3866 }
3867
3868 memcpy(stats, &local_stats, sizeof(struct net_device_stats));
1da177e4
LT
3869
3870 read_unlock_bh(&bond->lock);
3871
3872 return stats;
3873}
3874
3875static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
3876{
3877 struct net_device *slave_dev = NULL;
3878 struct ifbond k_binfo;
3879 struct ifbond __user *u_binfo = NULL;
3880 struct ifslave k_sinfo;
3881 struct ifslave __user *u_sinfo = NULL;
3882 struct mii_ioctl_data *mii = NULL;
1da177e4
LT
3883 int res = 0;
3884
5a03cdb7 3885 pr_debug("bond_ioctl: master=%s, cmd=%d\n",
1da177e4
LT
3886 bond_dev->name, cmd);
3887
3888 switch (cmd) {
1da177e4
LT
3889 case SIOCGMIIPHY:
3890 mii = if_mii(ifr);
3d632c3f 3891 if (!mii)
1da177e4 3892 return -EINVAL;
3d632c3f 3893
1da177e4
LT
3894 mii->phy_id = 0;
3895 /* Fall Through */
3896 case SIOCGMIIREG:
3897 /*
3898 * We do this again just in case we were called by SIOCGMIIREG
3899 * instead of SIOCGMIIPHY.
3900 */
3901 mii = if_mii(ifr);
3d632c3f 3902 if (!mii)
1da177e4 3903 return -EINVAL;
3d632c3f 3904
1da177e4
LT
3905
3906 if (mii->reg_num == 1) {
454d7c9b 3907 struct bonding *bond = netdev_priv(bond_dev);
1da177e4 3908 mii->val_out = 0;
6603a6f2 3909 read_lock(&bond->lock);
1da177e4 3910 read_lock(&bond->curr_slave_lock);
3d632c3f 3911 if (netif_carrier_ok(bond->dev))
1da177e4 3912 mii->val_out = BMSR_LSTATUS;
3d632c3f 3913
1da177e4 3914 read_unlock(&bond->curr_slave_lock);
6603a6f2 3915 read_unlock(&bond->lock);
1da177e4
LT
3916 }
3917
3918 return 0;
3919 case BOND_INFO_QUERY_OLD:
3920 case SIOCBONDINFOQUERY:
3921 u_binfo = (struct ifbond __user *)ifr->ifr_data;
3922
3d632c3f 3923 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond)))
1da177e4 3924 return -EFAULT;
1da177e4
LT
3925
3926 res = bond_info_query(bond_dev, &k_binfo);
3d632c3f
SH
3927 if (res == 0 &&
3928 copy_to_user(u_binfo, &k_binfo, sizeof(ifbond)))
3929 return -EFAULT;
1da177e4
LT
3930
3931 return res;
3932 case BOND_SLAVE_INFO_QUERY_OLD:
3933 case SIOCBONDSLAVEINFOQUERY:
3934 u_sinfo = (struct ifslave __user *)ifr->ifr_data;
3935
3d632c3f 3936 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave)))
1da177e4 3937 return -EFAULT;
1da177e4
LT
3938
3939 res = bond_slave_info_query(bond_dev, &k_sinfo);
3d632c3f
SH
3940 if (res == 0 &&
3941 copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave)))
3942 return -EFAULT;
1da177e4
LT
3943
3944 return res;
3945 default:
3946 /* Go on */
3947 break;
3948 }
3949
3d632c3f 3950 if (!capable(CAP_NET_ADMIN))
1da177e4 3951 return -EPERM;
1da177e4 3952
881d966b 3953 slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave);
1da177e4 3954
5a03cdb7 3955 pr_debug("slave_dev=%p: \n", slave_dev);
1da177e4 3956
3d632c3f 3957 if (!slave_dev)
1da177e4 3958 res = -ENODEV;
3d632c3f 3959 else {
5a03cdb7 3960 pr_debug("slave_dev->name=%s: \n", slave_dev->name);
1da177e4
LT
3961 switch (cmd) {
3962 case BOND_ENSLAVE_OLD:
3963 case SIOCBONDENSLAVE:
3964 res = bond_enslave(bond_dev, slave_dev);
3965 break;
3966 case BOND_RELEASE_OLD:
3967 case SIOCBONDRELEASE:
3968 res = bond_release(bond_dev, slave_dev);
3969 break;
3970 case BOND_SETHWADDR_OLD:
3971 case SIOCBONDSETHWADDR:
3972 res = bond_sethwaddr(bond_dev, slave_dev);
3973 break;
3974 case BOND_CHANGE_ACTIVE_OLD:
3975 case SIOCBONDCHANGEACTIVE:
3976 res = bond_ioctl_change_active(bond_dev, slave_dev);
3977 break;
3978 default:
3979 res = -EOPNOTSUPP;
3980 }
3981
3982 dev_put(slave_dev);
3983 }
3984
1da177e4
LT
3985 return res;
3986}
3987
3988static void bond_set_multicast_list(struct net_device *bond_dev)
3989{
454d7c9b 3990 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
3991 struct dev_mc_list *dmi;
3992
1da177e4
LT
3993 /*
3994 * Do promisc before checking multicast_mode
3995 */
3d632c3f 3996 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC))
7e1a1ac1
WC
3997 /*
3998 * FIXME: Need to handle the error when one of the multi-slaves
3999 * encounters error.
4000 */
1da177e4 4001 bond_set_promiscuity(bond, 1);
1da177e4 4002
3d632c3f
SH
4003
4004 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC))
1da177e4 4005 bond_set_promiscuity(bond, -1);
3d632c3f 4006
1da177e4
LT
4007
4008 /* set allmulti flag to slaves */
3d632c3f 4009 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI))
7e1a1ac1
WC
4010 /*
4011 * FIXME: Need to handle the error when one of the multi-slaves
4012 * encounters error.
4013 */
1da177e4 4014 bond_set_allmulti(bond, 1);
1da177e4 4015
3d632c3f
SH
4016
4017 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI))
1da177e4 4018 bond_set_allmulti(bond, -1);
3d632c3f 4019
1da177e4 4020
80ee5ad2
JV
4021 read_lock(&bond->lock);
4022
1da177e4
LT
4023 bond->flags = bond_dev->flags;
4024
4025 /* looking for addresses to add to slaves' mc list */
4026 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
3d632c3f 4027 if (!bond_mc_list_find_dmi(dmi, bond->mc_list))
1da177e4 4028 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen);
1da177e4
LT
4029 }
4030
4031 /* looking for addresses to delete from slaves' list */
4032 for (dmi = bond->mc_list; dmi; dmi = dmi->next) {
3d632c3f 4033 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list))
1da177e4 4034 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen);
1da177e4
LT
4035 }
4036
4037 /* save master's multicast list */
4038 bond_mc_list_destroy(bond);
4039 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC);
4040
80ee5ad2 4041 read_unlock(&bond->lock);
1da177e4
LT
4042}
4043
00829823
SH
4044static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms)
4045{
4046 struct bonding *bond = netdev_priv(dev);
4047 struct slave *slave = bond->first_slave;
4048
4049 if (slave) {
4050 const struct net_device_ops *slave_ops
4051 = slave->dev->netdev_ops;
4052 if (slave_ops->ndo_neigh_setup)
72e2240f 4053 return slave_ops->ndo_neigh_setup(slave->dev, parms);
00829823
SH
4054 }
4055 return 0;
4056}
4057
1da177e4
LT
4058/*
4059 * Change the MTU of all of a master's slaves to match the master
4060 */
4061static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
4062{
454d7c9b 4063 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
4064 struct slave *slave, *stop_at;
4065 int res = 0;
4066 int i;
4067
5a03cdb7 4068 pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond,
1da177e4
LT
4069 (bond_dev ? bond_dev->name : "None"), new_mtu);
4070
4071 /* Can't hold bond->lock with bh disabled here since
4072 * some base drivers panic. On the other hand we can't
4073 * hold bond->lock without bh disabled because we'll
4074 * deadlock. The only solution is to rely on the fact
4075 * that we're under rtnl_lock here, and the slaves
4076 * list won't change. This doesn't solve the problem
4077 * of setting the slave's MTU while it is
4078 * transmitting, but the assumption is that the base
4079 * driver can handle that.
4080 *
4081 * TODO: figure out a way to safely iterate the slaves
4082 * list, but without holding a lock around the actual
4083 * call to the base driver.
4084 */
4085
4086 bond_for_each_slave(bond, slave, i) {
5a03cdb7 4087 pr_debug("s %p s->p %p c_m %p\n", slave,
53a3294e 4088 slave->prev, slave->dev->netdev_ops->ndo_change_mtu);
e944ef79 4089
1da177e4
LT
4090 res = dev_set_mtu(slave->dev, new_mtu);
4091
4092 if (res) {
4093 /* If we failed to set the slave's mtu to the new value
4094 * we must abort the operation even in ACTIVE_BACKUP
4095 * mode, because if we allow the backup slaves to have
4096 * different mtu values than the active slave we'll
4097 * need to change their mtu when doing a failover. That
4098 * means changing their mtu from timer context, which
4099 * is probably not a good idea.
4100 */
5a03cdb7 4101 pr_debug("err %d %s\n", res, slave->dev->name);
1da177e4
LT
4102 goto unwind;
4103 }
4104 }
4105
4106 bond_dev->mtu = new_mtu;
4107
4108 return 0;
4109
4110unwind:
4111 /* unwind from head to the slave that failed */
4112 stop_at = slave;
4113 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4114 int tmp_res;
4115
4116 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu);
4117 if (tmp_res) {
5a03cdb7 4118 pr_debug("unwind err %d dev %s\n", tmp_res,
1da177e4
LT
4119 slave->dev->name);
4120 }
4121 }
4122
4123 return res;
4124}
4125
4126/*
4127 * Change HW address
4128 *
4129 * Note that many devices must be down to change the HW address, and
4130 * downing the master releases all slaves. We can make bonds full of
4131 * bonding devices to test this, however.
4132 */
4133static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
4134{
454d7c9b 4135 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
4136 struct sockaddr *sa = addr, tmp_sa;
4137 struct slave *slave, *stop_at;
4138 int res = 0;
4139 int i;
4140
eb7cc59a
SH
4141 if (bond->params.mode == BOND_MODE_ALB)
4142 return bond_alb_set_mac_address(bond_dev, addr);
4143
4144
5a03cdb7 4145 pr_debug("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
1da177e4 4146
dd957c57 4147 /*
3915c1e8
JV
4148 * If fail_over_mac is set to active, do nothing and return
4149 * success. Returning an error causes ifenslave to fail.
dd957c57 4150 */
3915c1e8 4151 if (bond->params.fail_over_mac == BOND_FOM_ACTIVE)
dd957c57 4152 return 0;
2ab82852 4153
3d632c3f 4154 if (!is_valid_ether_addr(sa->sa_data))
1da177e4 4155 return -EADDRNOTAVAIL;
1da177e4
LT
4156
4157 /* Can't hold bond->lock with bh disabled here since
4158 * some base drivers panic. On the other hand we can't
4159 * hold bond->lock without bh disabled because we'll
4160 * deadlock. The only solution is to rely on the fact
4161 * that we're under rtnl_lock here, and the slaves
4162 * list won't change. This doesn't solve the problem
4163 * of setting the slave's hw address while it is
4164 * transmitting, but the assumption is that the base
4165 * driver can handle that.
4166 *
4167 * TODO: figure out a way to safely iterate the slaves
4168 * list, but without holding a lock around the actual
4169 * call to the base driver.
4170 */
4171
4172 bond_for_each_slave(bond, slave, i) {
eb7cc59a 4173 const struct net_device_ops *slave_ops = slave->dev->netdev_ops;
5a03cdb7 4174 pr_debug("slave %p %s\n", slave, slave->dev->name);
1da177e4 4175
eb7cc59a 4176 if (slave_ops->ndo_set_mac_address == NULL) {
1da177e4 4177 res = -EOPNOTSUPP;
5a03cdb7 4178 pr_debug("EOPNOTSUPP %s\n", slave->dev->name);
1da177e4
LT
4179 goto unwind;
4180 }
4181
4182 res = dev_set_mac_address(slave->dev, addr);
4183 if (res) {
4184 /* TODO: consider downing the slave
4185 * and retry ?
4186 * User should expect communications
4187 * breakage anyway until ARP finish
4188 * updating, so...
4189 */
5a03cdb7 4190 pr_debug("err %d %s\n", res, slave->dev->name);
1da177e4
LT
4191 goto unwind;
4192 }
4193 }
4194
4195 /* success */
4196 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len);
4197 return 0;
4198
4199unwind:
4200 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
4201 tmp_sa.sa_family = bond_dev->type;
4202
4203 /* unwind from head to the slave that failed */
4204 stop_at = slave;
4205 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4206 int tmp_res;
4207
4208 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa);
4209 if (tmp_res) {
5a03cdb7 4210 pr_debug("unwind err %d dev %s\n", tmp_res,
1da177e4
LT
4211 slave->dev->name);
4212 }
4213 }
4214
4215 return res;
4216}
4217
4218static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
4219{
454d7c9b 4220 struct bonding *bond = netdev_priv(bond_dev);
1da177e4 4221 struct slave *slave, *start_at;
cf5f9044 4222 int i, slave_no, res = 1;
1da177e4
LT
4223
4224 read_lock(&bond->lock);
4225
3d632c3f 4226 if (!BOND_IS_OK(bond))
1da177e4 4227 goto out;
1da177e4 4228
cf5f9044
JV
4229 /*
4230 * Concurrent TX may collide on rr_tx_counter; we accept that
4231 * as being rare enough not to justify using an atomic op here
4232 */
4233 slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
1da177e4 4234
cf5f9044
JV
4235 bond_for_each_slave(bond, slave, i) {
4236 slave_no--;
3d632c3f 4237 if (slave_no < 0)
cf5f9044 4238 break;
1da177e4
LT
4239 }
4240
cf5f9044 4241 start_at = slave;
1da177e4
LT
4242 bond_for_each_slave_from(bond, slave, i, start_at) {
4243 if (IS_UP(slave->dev) &&
4244 (slave->link == BOND_LINK_UP) &&
4245 (slave->state == BOND_STATE_ACTIVE)) {
4246 res = bond_dev_queue_xmit(bond, skb, slave->dev);
1da177e4
LT
4247 break;
4248 }
4249 }
4250
1da177e4
LT
4251out:
4252 if (res) {
4253 /* no suitable interface, frame not sent */
4254 dev_kfree_skb(skb);
4255 }
4256 read_unlock(&bond->lock);
ec634fe3 4257 return NETDEV_TX_OK;
1da177e4
LT
4258}
4259
075897ce 4260
1da177e4
LT
4261/*
4262 * in active-backup mode, we know that bond->curr_active_slave is always valid if
4263 * the bond has a usable interface.
4264 */
4265static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
4266{
454d7c9b 4267 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
4268 int res = 1;
4269
1da177e4
LT
4270 read_lock(&bond->lock);
4271 read_lock(&bond->curr_slave_lock);
4272
3d632c3f 4273 if (!BOND_IS_OK(bond))
1da177e4 4274 goto out;
1da177e4 4275
075897ce
JL
4276 if (!bond->curr_active_slave)
4277 goto out;
4278
075897ce
JL
4279 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
4280
1da177e4 4281out:
3d632c3f 4282 if (res)
1da177e4
LT
4283 /* no suitable interface, frame not sent */
4284 dev_kfree_skb(skb);
3d632c3f 4285
1da177e4
LT
4286 read_unlock(&bond->curr_slave_lock);
4287 read_unlock(&bond->lock);
ec634fe3 4288 return NETDEV_TX_OK;
1da177e4
LT
4289}
4290
4291/*
169a3e66
JV
4292 * In bond_xmit_xor() , we determine the output device by using a pre-
4293 * determined xmit_hash_policy(), If the selected device is not enabled,
4294 * find the next active slave.
1da177e4
LT
4295 */
4296static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
4297{
454d7c9b 4298 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
4299 struct slave *slave, *start_at;
4300 int slave_no;
4301 int i;
4302 int res = 1;
4303
4304 read_lock(&bond->lock);
4305
3d632c3f 4306 if (!BOND_IS_OK(bond))
1da177e4 4307 goto out;
1da177e4 4308
a361c83c 4309 slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt);
1da177e4
LT
4310
4311 bond_for_each_slave(bond, slave, i) {
4312 slave_no--;
3d632c3f 4313 if (slave_no < 0)
1da177e4 4314 break;
1da177e4
LT
4315 }
4316
4317 start_at = slave;
4318
4319 bond_for_each_slave_from(bond, slave, i, start_at) {
4320 if (IS_UP(slave->dev) &&
4321 (slave->link == BOND_LINK_UP) &&
4322 (slave->state == BOND_STATE_ACTIVE)) {
4323 res = bond_dev_queue_xmit(bond, skb, slave->dev);
4324 break;
4325 }
4326 }
4327
4328out:
4329 if (res) {
4330 /* no suitable interface, frame not sent */
4331 dev_kfree_skb(skb);
4332 }
4333 read_unlock(&bond->lock);
ec634fe3 4334 return NETDEV_TX_OK;
1da177e4
LT
4335}
4336
4337/*
4338 * in broadcast mode, we send everything to all usable interfaces.
4339 */
4340static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
4341{
454d7c9b 4342 struct bonding *bond = netdev_priv(bond_dev);
1da177e4
LT
4343 struct slave *slave, *start_at;
4344 struct net_device *tx_dev = NULL;
4345 int i;
4346 int res = 1;
4347
4348 read_lock(&bond->lock);
4349
3d632c3f 4350 if (!BOND_IS_OK(bond))
1da177e4 4351 goto out;
1da177e4
LT
4352
4353 read_lock(&bond->curr_slave_lock);
4354 start_at = bond->curr_active_slave;
4355 read_unlock(&bond->curr_slave_lock);
4356
3d632c3f 4357 if (!start_at)
1da177e4 4358 goto out;
1da177e4
LT
4359
4360 bond_for_each_slave_from(bond, slave, i, start_at) {
4361 if (IS_UP(slave->dev) &&
4362 (slave->link == BOND_LINK_UP) &&
4363 (slave->state == BOND_STATE_ACTIVE)) {
4364 if (tx_dev) {
4365 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
4366 if (!skb2) {
3d632c3f 4367 pr_err(DRV_NAME
4e0952c7
MW
4368 ": %s: Error: bond_xmit_broadcast(): "
4369 "skb_clone() failed\n",
4370 bond_dev->name);
1da177e4
LT
4371 continue;
4372 }
4373
4374 res = bond_dev_queue_xmit(bond, skb2, tx_dev);
4375 if (res) {
4376 dev_kfree_skb(skb2);
4377 continue;
4378 }
4379 }
4380 tx_dev = slave->dev;
4381 }
4382 }
4383
3d632c3f 4384 if (tx_dev)
1da177e4 4385 res = bond_dev_queue_xmit(bond, skb, tx_dev);
1da177e4
LT
4386
4387out:
3d632c3f 4388 if (res)
1da177e4
LT
4389 /* no suitable interface, frame not sent */
4390 dev_kfree_skb(skb);
3d632c3f 4391
1da177e4
LT
4392 /* frame sent to all suitable interfaces */
4393 read_unlock(&bond->lock);
ec634fe3 4394 return NETDEV_TX_OK;
1da177e4
LT
4395}
4396
4397/*------------------------- Device initialization ---------------------------*/
4398
6f6652be
JV
4399static void bond_set_xmit_hash_policy(struct bonding *bond)
4400{
4401 switch (bond->params.xmit_policy) {
4402 case BOND_XMIT_POLICY_LAYER23:
4403 bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
4404 break;
4405 case BOND_XMIT_POLICY_LAYER34:
4406 bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
4407 break;
4408 case BOND_XMIT_POLICY_LAYER2:
4409 default:
4410 bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
4411 break;
4412 }
4413}
4414
424efe9c 4415static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
00829823
SH
4416{
4417 const struct bonding *bond = netdev_priv(dev);
4418
4419 switch (bond->params.mode) {
4420 case BOND_MODE_ROUNDROBIN:
4421 return bond_xmit_roundrobin(skb, dev);
4422 case BOND_MODE_ACTIVEBACKUP:
4423 return bond_xmit_activebackup(skb, dev);
4424 case BOND_MODE_XOR:
4425 return bond_xmit_xor(skb, dev);
4426 case BOND_MODE_BROADCAST:
4427 return bond_xmit_broadcast(skb, dev);
4428 case BOND_MODE_8023AD:
4429 return bond_3ad_xmit_xor(skb, dev);
4430 case BOND_MODE_ALB:
4431 case BOND_MODE_TLB:
4432 return bond_alb_xmit(skb, dev);
4433 default:
4434 /* Should never happen, mode already checked */
3d632c3f 4435 pr_err(DRV_NAME ": %s: Error: Unknown bonding mode %d\n",
00829823
SH
4436 dev->name, bond->params.mode);
4437 WARN_ON_ONCE(1);
4438 dev_kfree_skb(skb);
4439 return NETDEV_TX_OK;
4440 }
4441}
4442
4443
1da177e4
LT
4444/*
4445 * set bond mode specific net device operations
4446 */
a77b5325 4447void bond_set_mode_ops(struct bonding *bond, int mode)
1da177e4 4448{
169a3e66
JV
4449 struct net_device *bond_dev = bond->dev;
4450
1da177e4
LT
4451 switch (mode) {
4452 case BOND_MODE_ROUNDROBIN:
1da177e4
LT
4453 break;
4454 case BOND_MODE_ACTIVEBACKUP:
1da177e4
LT
4455 break;
4456 case BOND_MODE_XOR:
6f6652be 4457 bond_set_xmit_hash_policy(bond);
1da177e4
LT
4458 break;
4459 case BOND_MODE_BROADCAST:
1da177e4
LT
4460 break;
4461 case BOND_MODE_8023AD:
8f903c70 4462 bond_set_master_3ad_flags(bond);
6f6652be 4463 bond_set_xmit_hash_policy(bond);
1da177e4 4464 break;
1da177e4 4465 case BOND_MODE_ALB:
8f903c70
JV
4466 bond_set_master_alb_flags(bond);
4467 /* FALLTHRU */
4468 case BOND_MODE_TLB:
1da177e4
LT
4469 break;
4470 default:
4471 /* Should never happen, mode already checked */
3d632c3f 4472 pr_err(DRV_NAME
4e0952c7
MW
4473 ": %s: Error: Unknown bonding mode %d\n",
4474 bond_dev->name,
1da177e4
LT
4475 mode);
4476 break;
4477 }
4478}
4479
217df670
JV
4480static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
4481 struct ethtool_drvinfo *drvinfo)
4482{
4483 strncpy(drvinfo->driver, DRV_NAME, 32);
4484 strncpy(drvinfo->version, DRV_VERSION, 32);
4485 snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION);
4486}
4487
7282d491 4488static const struct ethtool_ops bond_ethtool_ops = {
217df670 4489 .get_drvinfo = bond_ethtool_get_drvinfo,
fa53ebac
SH
4490 .get_link = ethtool_op_get_link,
4491 .get_tx_csum = ethtool_op_get_tx_csum,
4492 .get_sg = ethtool_op_get_sg,
4493 .get_tso = ethtool_op_get_tso,
4494 .get_ufo = ethtool_op_get_ufo,
4495 .get_flags = ethtool_op_get_flags,
8531c5ff
AK
4496};
4497
eb7cc59a 4498static const struct net_device_ops bond_netdev_ops = {
181470fc 4499 .ndo_init = bond_init,
9e71626c 4500 .ndo_uninit = bond_uninit,
eb7cc59a
SH
4501 .ndo_open = bond_open,
4502 .ndo_stop = bond_close,
00829823 4503 .ndo_start_xmit = bond_start_xmit,
eb7cc59a
SH
4504 .ndo_get_stats = bond_get_stats,
4505 .ndo_do_ioctl = bond_do_ioctl,
4506 .ndo_set_multicast_list = bond_set_multicast_list,
4507 .ndo_change_mtu = bond_change_mtu,
eb7cc59a 4508 .ndo_set_mac_address = bond_set_mac_address,
00829823 4509 .ndo_neigh_setup = bond_neigh_setup,
eb7cc59a
SH
4510 .ndo_vlan_rx_register = bond_vlan_rx_register,
4511 .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid,
4512 .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid,
4513};
4514
181470fc 4515static void bond_setup(struct net_device *bond_dev)
1da177e4 4516{
454d7c9b 4517 struct bonding *bond = netdev_priv(bond_dev);
1da177e4 4518
1da177e4
LT
4519 /* initialize rwlocks */
4520 rwlock_init(&bond->lock);
4521 rwlock_init(&bond->curr_slave_lock);
4522
d2991f75 4523 bond->params = bonding_defaults;
1da177e4
LT
4524
4525 /* Initialize pointers */
1da177e4
LT
4526 bond->dev = bond_dev;
4527 INIT_LIST_HEAD(&bond->vlan_list);
4528
4529 /* Initialize the device entry points */
181470fc 4530 ether_setup(bond_dev);
eb7cc59a 4531 bond_dev->netdev_ops = &bond_netdev_ops;
8531c5ff 4532 bond_dev->ethtool_ops = &bond_ethtool_ops;
169a3e66 4533 bond_set_mode_ops(bond, bond->params.mode);
1da177e4 4534
9e71626c 4535 bond_dev->destructor = free_netdev;
1da177e4
LT
4536
4537 /* Initialize the device options */
4538 bond_dev->tx_queue_len = 0;
4539 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
0b680e75 4540 bond_dev->priv_flags |= IFF_BONDING;
181470fc
SH
4541 bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
4542
6cf3f41e
JV
4543 if (bond->params.arp_interval)
4544 bond_dev->priv_flags |= IFF_MASTER_ARPMON;
1da177e4
LT
4545
4546 /* At first, we block adding VLANs. That's the only way to
4547 * prevent problems that occur when adding VLANs over an
4548 * empty bond. The block will be removed once non-challenged
4549 * slaves are enslaved.
4550 */
4551 bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
4552
932ff279 4553 /* don't acquire bond device's netif_tx_lock when
1da177e4
LT
4554 * transmitting */
4555 bond_dev->features |= NETIF_F_LLTX;
4556
4557 /* By default, we declare the bond to be fully
4558 * VLAN hardware accelerated capable. Special
4559 * care is taken in the various xmit functions
4560 * when there are slaves that are not hw accel
4561 * capable
4562 */
1da177e4
LT
4563 bond_dev->features |= (NETIF_F_HW_VLAN_TX |
4564 NETIF_F_HW_VLAN_RX |
4565 NETIF_F_HW_VLAN_FILTER);
4566
1da177e4
LT
4567}
4568
fdaea7a9
JV
4569static void bond_work_cancel_all(struct bonding *bond)
4570{
4571 write_lock_bh(&bond->lock);
4572 bond->kill_timers = 1;
4573 write_unlock_bh(&bond->lock);
4574
4575 if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
4576 cancel_delayed_work(&bond->mii_work);
4577
4578 if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
4579 cancel_delayed_work(&bond->arp_work);
4580
4581 if (bond->params.mode == BOND_MODE_ALB &&
4582 delayed_work_pending(&bond->alb_work))
4583 cancel_delayed_work(&bond->alb_work);
4584
4585 if (bond->params.mode == BOND_MODE_8023AD &&
4586 delayed_work_pending(&bond->ad_work))
4587 cancel_delayed_work(&bond->ad_work);
4588}
4589
c67dfb29
EB
4590/*
4591* Destroy a bonding device.
4592* Must be under rtnl_lock when this function is called.
4593*/
4594static void bond_uninit(struct net_device *bond_dev)
a434e43f 4595{
454d7c9b 4596 struct bonding *bond = netdev_priv(bond_dev);
a434e43f 4597
c67dfb29
EB
4598 /* Release the bonded slaves */
4599 bond_release_all(bond_dev);
4600
a434e43f
JV
4601 list_del(&bond->bond_list);
4602
4603 bond_work_cancel_all(bond);
4604
a434e43f 4605 bond_remove_proc_entry(bond);
c67dfb29
EB
4606
4607 if (bond->wq)
4608 destroy_workqueue(bond->wq);
4609
4610 netif_addr_lock_bh(bond_dev);
4611 bond_mc_list_destroy(bond);
4612 netif_addr_unlock_bh(bond_dev);
a434e43f
JV
4613}
4614
1da177e4
LT
4615/* Unregister and free all bond devices.
4616 * Caller must hold rtnl_lock.
4617 */
4618static void bond_free_all(void)
4619{
4620 struct bonding *bond, *nxt;
4621
4622 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
4623 struct net_device *bond_dev = bond->dev;
4624
9e71626c 4625 unregister_netdevice(bond_dev);
1da177e4
LT
4626 }
4627
1da177e4 4628 bond_destroy_proc_dir();
1da177e4
LT
4629}
4630
4631/*------------------------- Module initialization ---------------------------*/
4632
4633/*
4634 * Convert string input module parms. Accept either the
ece95f7f
JV
4635 * number of the mode or its string name. A bit complicated because
4636 * some mode names are substrings of other names, and calls from sysfs
4637 * may have whitespace in the name (trailing newlines, for example).
1da177e4 4638 */
325dcf7a 4639int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl)
1da177e4 4640{
54b87323 4641 int modeint = -1, i, rv;
a42e534f 4642 char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, };
ece95f7f 4643
a42e534f
JV
4644 for (p = (char *)buf; *p; p++)
4645 if (!(isdigit(*p) || isspace(*p)))
4646 break;
4647
4648 if (*p)
ece95f7f 4649 rv = sscanf(buf, "%20s", modestr);
a42e534f 4650 else
54b87323 4651 rv = sscanf(buf, "%d", &modeint);
a42e534f
JV
4652
4653 if (!rv)
4654 return -1;
1da177e4
LT
4655
4656 for (i = 0; tbl[i].modename; i++) {
54b87323 4657 if (modeint == tbl[i].mode)
ece95f7f
JV
4658 return tbl[i].mode;
4659 if (strcmp(modestr, tbl[i].modename) == 0)
1da177e4 4660 return tbl[i].mode;
1da177e4
LT
4661 }
4662
4663 return -1;
4664}
4665
4666static int bond_check_params(struct bond_params *params)
4667{
a549952a 4668 int arp_validate_value, fail_over_mac_value, primary_reselect_value;
f5b2b966 4669
1da177e4
LT
4670 /*
4671 * Convert string parameters.
4672 */
4673 if (mode) {
4674 bond_mode = bond_parse_parm(mode, bond_mode_tbl);
4675 if (bond_mode == -1) {
3d632c3f 4676 pr_err(DRV_NAME
1da177e4
LT
4677 ": Error: Invalid bonding mode \"%s\"\n",
4678 mode == NULL ? "NULL" : mode);
4679 return -EINVAL;
4680 }
4681 }
4682
169a3e66
JV
4683 if (xmit_hash_policy) {
4684 if ((bond_mode != BOND_MODE_XOR) &&
4685 (bond_mode != BOND_MODE_8023AD)) {
3d632c3f 4686 pr_info(DRV_NAME
3c6aaa24
NP
4687 ": xmit_hash_policy param is irrelevant in"
4688 " mode %s\n",
169a3e66
JV
4689 bond_mode_name(bond_mode));
4690 } else {
4691 xmit_hashtype = bond_parse_parm(xmit_hash_policy,
4692 xmit_hashtype_tbl);
4693 if (xmit_hashtype == -1) {
3d632c3f
SH
4694 pr_err(DRV_NAME
4695 ": Error: Invalid xmit_hash_policy \"%s\"\n",
4696 xmit_hash_policy == NULL ? "NULL" :
169a3e66
JV
4697 xmit_hash_policy);
4698 return -EINVAL;
4699 }
4700 }
4701 }
4702
1da177e4
LT
4703 if (lacp_rate) {
4704 if (bond_mode != BOND_MODE_8023AD) {
3d632c3f 4705 pr_info(DRV_NAME
1da177e4
LT
4706 ": lacp_rate param is irrelevant in mode %s\n",
4707 bond_mode_name(bond_mode));
4708 } else {
4709 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl);
4710 if (lacp_fast == -1) {
3d632c3f 4711 pr_err(DRV_NAME
1da177e4
LT
4712 ": Error: Invalid lacp rate \"%s\"\n",
4713 lacp_rate == NULL ? "NULL" : lacp_rate);
4714 return -EINVAL;
4715 }
4716 }
4717 }
4718
fd989c83
JV
4719 if (ad_select) {
4720 params->ad_select = bond_parse_parm(ad_select, ad_select_tbl);
4721 if (params->ad_select == -1) {
3d632c3f 4722 pr_err(DRV_NAME
fd989c83
JV
4723 ": Error: Invalid ad_select \"%s\"\n",
4724 ad_select == NULL ? "NULL" : ad_select);
4725 return -EINVAL;
4726 }
4727
4728 if (bond_mode != BOND_MODE_8023AD) {
3d632c3f 4729 pr_warning(DRV_NAME
fd989c83
JV
4730 ": ad_select param only affects 802.3ad mode\n");
4731 }
4732 } else {
4733 params->ad_select = BOND_AD_STABLE;
4734 }
4735
f5841306 4736 if (max_bonds < 0) {
3d632c3f 4737 pr_warning(DRV_NAME
1da177e4 4738 ": Warning: max_bonds (%d) not in range %d-%d, so it "
4e0952c7 4739 "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n",
b8a9787e 4740 max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);
1da177e4
LT
4741 max_bonds = BOND_DEFAULT_MAX_BONDS;
4742 }
4743
4744 if (miimon < 0) {
3d632c3f 4745 pr_warning(DRV_NAME
1da177e4
LT
4746 ": Warning: miimon module parameter (%d), "
4747 "not in range 0-%d, so it was reset to %d\n",
4748 miimon, INT_MAX, BOND_LINK_MON_INTERV);
4749 miimon = BOND_LINK_MON_INTERV;
4750 }
4751
4752 if (updelay < 0) {
3d632c3f 4753 pr_warning(DRV_NAME
1da177e4
LT
4754 ": Warning: updelay module parameter (%d), "
4755 "not in range 0-%d, so it was reset to 0\n",
4756 updelay, INT_MAX);
4757 updelay = 0;
4758 }
4759
4760 if (downdelay < 0) {
3d632c3f 4761 pr_warning(DRV_NAME
1da177e4
LT
4762 ": Warning: downdelay module parameter (%d), "
4763 "not in range 0-%d, so it was reset to 0\n",
4764 downdelay, INT_MAX);
4765 downdelay = 0;
4766 }
4767
4768 if ((use_carrier != 0) && (use_carrier != 1)) {
3d632c3f 4769 pr_warning(DRV_NAME
1da177e4
LT
4770 ": Warning: use_carrier module parameter (%d), "
4771 "not of valid value (0/1), so it was set to 1\n",
4772 use_carrier);
4773 use_carrier = 1;
4774 }
4775
7893b249 4776 if (num_grat_arp < 0 || num_grat_arp > 255) {
3d632c3f 4777 pr_warning(DRV_NAME
7893b249
MS
4778 ": Warning: num_grat_arp (%d) not in range 0-255 so it "
4779 "was reset to 1 \n", num_grat_arp);
4780 num_grat_arp = 1;
4781 }
4782
305d552a 4783 if (num_unsol_na < 0 || num_unsol_na > 255) {
3d632c3f 4784 pr_warning(DRV_NAME
305d552a
BH
4785 ": Warning: num_unsol_na (%d) not in range 0-255 so it "
4786 "was reset to 1 \n", num_unsol_na);
4787 num_unsol_na = 1;
4788 }
4789
1da177e4
LT
4790 /* reset values for 802.3ad */
4791 if (bond_mode == BOND_MODE_8023AD) {
4792 if (!miimon) {
3d632c3f 4793 pr_warning(DRV_NAME
1da177e4
LT
4794 ": Warning: miimon must be specified, "
4795 "otherwise bonding will not detect link "
4796 "failure, speed and duplex which are "
4797 "essential for 802.3ad operation\n");
3d632c3f 4798 pr_warning("Forcing miimon to 100msec\n");
1da177e4
LT
4799 miimon = 100;
4800 }
4801 }
4802
4803 /* reset values for TLB/ALB */
4804 if ((bond_mode == BOND_MODE_TLB) ||
4805 (bond_mode == BOND_MODE_ALB)) {
4806 if (!miimon) {
3d632c3f 4807 pr_warning(DRV_NAME
1da177e4
LT
4808 ": Warning: miimon must be specified, "
4809 "otherwise bonding will not detect link "
4810 "failure and link speed which are essential "
4811 "for TLB/ALB load balancing\n");
3d632c3f 4812 pr_warning("Forcing miimon to 100msec\n");
1da177e4
LT
4813 miimon = 100;
4814 }
4815 }
4816
4817 if (bond_mode == BOND_MODE_ALB) {
e5e2a8fd 4818 pr_notice(DRV_NAME
1da177e4
LT
4819 ": In ALB mode you might experience client "
4820 "disconnections upon reconnection of a link if the "
4821 "bonding module updelay parameter (%d msec) is "
4822 "incompatible with the forwarding delay time of the "
4823 "switch\n",
4824 updelay);
4825 }
4826
4827 if (!miimon) {
4828 if (updelay || downdelay) {
4829 /* just warn the user the up/down delay will have
4830 * no effect since miimon is zero...
4831 */
3d632c3f 4832 pr_warning(DRV_NAME
1da177e4
LT
4833 ": Warning: miimon module parameter not set "
4834 "and updelay (%d) or downdelay (%d) module "
4835 "parameter is set; updelay and downdelay have "
4836 "no effect unless miimon is set\n",
4837 updelay, downdelay);
4838 }
4839 } else {
4840 /* don't allow arp monitoring */
4841 if (arp_interval) {
3d632c3f 4842 pr_warning(DRV_NAME
1da177e4
LT
4843 ": Warning: miimon (%d) and arp_interval (%d) "
4844 "can't be used simultaneously, disabling ARP "
4845 "monitoring\n",
4846 miimon, arp_interval);
4847 arp_interval = 0;
4848 }
4849
4850 if ((updelay % miimon) != 0) {
3d632c3f 4851 pr_warning(DRV_NAME
1da177e4
LT
4852 ": Warning: updelay (%d) is not a multiple "
4853 "of miimon (%d), updelay rounded to %d ms\n",
4854 updelay, miimon, (updelay / miimon) * miimon);
4855 }
4856
4857 updelay /= miimon;
4858
4859 if ((downdelay % miimon) != 0) {
3d632c3f 4860 pr_warning(DRV_NAME
1da177e4
LT
4861 ": Warning: downdelay (%d) is not a multiple "
4862 "of miimon (%d), downdelay rounded to %d ms\n",
4863 downdelay, miimon,
4864 (downdelay / miimon) * miimon);
4865 }
4866
4867 downdelay /= miimon;
4868 }
4869
4870 if (arp_interval < 0) {
3d632c3f 4871 pr_warning(DRV_NAME
1da177e4
LT
4872 ": Warning: arp_interval module parameter (%d) "
4873 ", not in range 0-%d, so it was reset to %d\n",
4874 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV);
4875 arp_interval = BOND_LINK_ARP_INTERV;
4876 }
4877
4878 for (arp_ip_count = 0;
4879 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count];
4880 arp_ip_count++) {
4881 /* not complete check, but should be good enough to
4882 catch mistakes */
4883 if (!isdigit(arp_ip_target[arp_ip_count][0])) {
3d632c3f 4884 pr_warning(DRV_NAME
1da177e4
LT
4885 ": Warning: bad arp_ip_target module parameter "
4886 "(%s), ARP monitoring will not be performed\n",
4887 arp_ip_target[arp_ip_count]);
4888 arp_interval = 0;
4889 } else {
d3bb52b0 4890 __be32 ip = in_aton(arp_ip_target[arp_ip_count]);
1da177e4
LT
4891 arp_target[arp_ip_count] = ip;
4892 }
4893 }
4894
4895 if (arp_interval && !arp_ip_count) {
4896 /* don't allow arping if no arp_ip_target given... */
3d632c3f 4897 pr_warning(DRV_NAME
1da177e4
LT
4898 ": Warning: arp_interval module parameter (%d) "
4899 "specified without providing an arp_ip_target "
4900 "parameter, arp_interval was reset to 0\n",
4901 arp_interval);
4902 arp_interval = 0;
4903 }
4904
f5b2b966
JV
4905 if (arp_validate) {
4906 if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
3d632c3f
SH
4907 pr_err(DRV_NAME
4908 ": arp_validate only supported in active-backup mode\n");
f5b2b966
JV
4909 return -EINVAL;
4910 }
4911 if (!arp_interval) {
3d632c3f 4912 pr_err(DRV_NAME
f5b2b966
JV
4913 ": arp_validate requires arp_interval\n");
4914 return -EINVAL;
4915 }
4916
4917 arp_validate_value = bond_parse_parm(arp_validate,
4918 arp_validate_tbl);
4919 if (arp_validate_value == -1) {
3d632c3f 4920 pr_err(DRV_NAME
f5b2b966
JV
4921 ": Error: invalid arp_validate \"%s\"\n",
4922 arp_validate == NULL ? "NULL" : arp_validate);
4923 return -EINVAL;
4924 }
4925 } else
4926 arp_validate_value = 0;
4927
1da177e4 4928 if (miimon) {
3d632c3f 4929 pr_info(DRV_NAME
1da177e4
LT
4930 ": MII link monitoring set to %d ms\n",
4931 miimon);
4932 } else if (arp_interval) {
4933 int i;
4934
3d632c3f
SH
4935 pr_info(DRV_NAME ": ARP monitoring set to %d ms,"
4936 " validate %s, with %d target(s):",
f5b2b966
JV
4937 arp_interval,
4938 arp_validate_tbl[arp_validate_value].modename,
4939 arp_ip_count);
1da177e4
LT
4940
4941 for (i = 0; i < arp_ip_count; i++)
e5e2a8fd 4942 pr_info(" %s", arp_ip_target[i]);
1da177e4 4943
e5e2a8fd 4944 pr_info("\n");
1da177e4 4945
b8a9787e 4946 } else if (max_bonds) {
1da177e4
LT
4947 /* miimon and arp_interval not set, we need one so things
4948 * work as expected, see bonding.txt for details
4949 */
3d632c3f 4950 pr_warning(DRV_NAME
1da177e4
LT
4951 ": Warning: either miimon or arp_interval and "
4952 "arp_ip_target module parameters must be specified, "
4953 "otherwise bonding will not detect link failures! see "
4954 "bonding.txt for details.\n");
4955 }
4956
4957 if (primary && !USES_PRIMARY(bond_mode)) {
4958 /* currently, using a primary only makes sense
4959 * in active backup, TLB or ALB modes
4960 */
3d632c3f 4961 pr_warning(DRV_NAME
1da177e4
LT
4962 ": Warning: %s primary device specified but has no "
4963 "effect in %s mode\n",
4964 primary, bond_mode_name(bond_mode));
4965 primary = NULL;
4966 }
4967
a549952a
JP
4968 if (primary && primary_reselect) {
4969 primary_reselect_value = bond_parse_parm(primary_reselect,
4970 pri_reselect_tbl);
4971 if (primary_reselect_value == -1) {
4972 pr_err(DRV_NAME
4973 ": Error: Invalid primary_reselect \"%s\"\n",
4974 primary_reselect ==
4975 NULL ? "NULL" : primary_reselect);
4976 return -EINVAL;
4977 }
4978 } else {
4979 primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;
4980 }
4981
3915c1e8
JV
4982 if (fail_over_mac) {
4983 fail_over_mac_value = bond_parse_parm(fail_over_mac,
4984 fail_over_mac_tbl);
4985 if (fail_over_mac_value == -1) {
3d632c3f 4986 pr_err(DRV_NAME
3915c1e8
JV
4987 ": Error: invalid fail_over_mac \"%s\"\n",
4988 arp_validate == NULL ? "NULL" : arp_validate);
4989 return -EINVAL;
4990 }
4991
4992 if (bond_mode != BOND_MODE_ACTIVEBACKUP)
3d632c3f 4993 pr_warning(DRV_NAME
3915c1e8
JV
4994 ": Warning: fail_over_mac only affects "
4995 "active-backup mode.\n");
4996 } else {
4997 fail_over_mac_value = BOND_FOM_NONE;
4998 }
dd957c57 4999
1da177e4
LT
5000 /* fill params struct with the proper values */
5001 params->mode = bond_mode;
169a3e66 5002 params->xmit_policy = xmit_hashtype;
1da177e4 5003 params->miimon = miimon;
7893b249 5004 params->num_grat_arp = num_grat_arp;
305d552a 5005 params->num_unsol_na = num_unsol_na;
1da177e4 5006 params->arp_interval = arp_interval;
f5b2b966 5007 params->arp_validate = arp_validate_value;
1da177e4
LT
5008 params->updelay = updelay;
5009 params->downdelay = downdelay;
5010 params->use_carrier = use_carrier;
5011 params->lacp_fast = lacp_fast;
5012 params->primary[0] = 0;
a549952a 5013 params->primary_reselect = primary_reselect_value;
3915c1e8 5014 params->fail_over_mac = fail_over_mac_value;
1da177e4
LT
5015
5016 if (primary) {
5017 strncpy(params->primary, primary, IFNAMSIZ);
5018 params->primary[IFNAMSIZ - 1] = 0;
5019 }
5020
5021 memcpy(params->arp_targets, arp_target, sizeof(arp_target));
5022
5023 return 0;
5024}
5025
0daa2303 5026static struct lock_class_key bonding_netdev_xmit_lock_key;
cf508b12 5027static struct lock_class_key bonding_netdev_addr_lock_key;
0daa2303 5028
e8a0464c
DM
5029static void bond_set_lockdep_class_one(struct net_device *dev,
5030 struct netdev_queue *txq,
5031 void *_unused)
c773e847
DM
5032{
5033 lockdep_set_class(&txq->_xmit_lock,
5034 &bonding_netdev_xmit_lock_key);
5035}
5036
5037static void bond_set_lockdep_class(struct net_device *dev)
5038{
cf508b12
DM
5039 lockdep_set_class(&dev->addr_list_lock,
5040 &bonding_netdev_addr_lock_key);
e8a0464c 5041 netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
c773e847
DM
5042}
5043
181470fc
SH
5044/*
5045 * Called from registration process
5046 */
5047static int bond_init(struct net_device *bond_dev)
5048{
5049 struct bonding *bond = netdev_priv(bond_dev);
5050
5051 pr_debug("Begin bond_init for %s\n", bond_dev->name);
5052
5053 bond->wq = create_singlethread_workqueue(bond_dev->name);
5054 if (!bond->wq)
5055 return -ENOMEM;
5056
5057 bond_set_lockdep_class(bond_dev);
5058
5059 netif_carrier_off(bond_dev);
5060
5061 bond_create_proc_entry(bond);
5062 list_add_tail(&bond->bond_list, &bond_dev_list);
5063
6151b3d4 5064 bond_prepare_sysfs_group(bond);
181470fc
SH
5065 return 0;
5066}
5067
dfe60397 5068/* Create a new bond based on the specified name and bonding parameters.
e4b91c48 5069 * If name is NULL, obtain a suitable "bond%d" name for us.
dfe60397
MW
5070 * Caller must NOT hold rtnl_lock; we need to release it here before we
5071 * set up our sysfs entries.
5072 */
d2991f75 5073int bond_create(const char *name)
dfe60397
MW
5074{
5075 struct net_device *bond_dev;
5076 int res;
5077
5078 rtnl_lock();
027ea041 5079
e4b91c48 5080 bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "",
181470fc 5081 bond_setup);
dfe60397 5082 if (!bond_dev) {
3d632c3f 5083 pr_err(DRV_NAME ": %s: eek! can't alloc netdev!\n",
dfe60397
MW
5084 name);
5085 res = -ENOMEM;
30c15ba9 5086 goto out;
dfe60397
MW
5087 }
5088
e4b91c48
JV
5089 if (!name) {
5090 res = dev_alloc_name(bond_dev, "bond%d");
5091 if (res < 0)
5092 goto out_netdev;
5093 }
5094
dfe60397 5095 res = register_netdevice(bond_dev);
0daa2303 5096
30c15ba9 5097out:
7e083840 5098 rtnl_unlock();
30c15ba9 5099 return res;
dfe60397
MW
5100out_netdev:
5101 free_netdev(bond_dev);
30c15ba9 5102 goto out;
dfe60397
MW
5103}
5104
1da177e4
LT
5105static int __init bonding_init(void)
5106{
1da177e4
LT
5107 int i;
5108 int res;
5109
3d632c3f 5110 pr_info("%s", version);
1da177e4 5111
dfe60397 5112 res = bond_check_params(&bonding_defaults);
3d632c3f 5113 if (res)
dfe60397 5114 goto out;
1da177e4 5115
1da177e4 5116 bond_create_proc_dir();
027ea041 5117
1da177e4 5118 for (i = 0; i < max_bonds; i++) {
d2991f75 5119 res = bond_create(NULL);
dfe60397
MW
5120 if (res)
5121 goto err;
1da177e4
LT
5122 }
5123
b76cdba9
MW
5124 res = bond_create_sysfs();
5125 if (res)
5126 goto err;
5127
1da177e4 5128 register_netdevice_notifier(&bond_netdev_notifier);
c3ade5ca 5129 register_inetaddr_notifier(&bond_inetaddr_notifier);
305d552a 5130 bond_register_ipv6_notifier();
1da177e4 5131
dfe60397
MW
5132 goto out;
5133err:
40abc270 5134 rtnl_lock();
1da177e4 5135 bond_free_all();
1da177e4 5136 rtnl_unlock();
dfe60397 5137out:
1da177e4 5138 return res;
dfe60397 5139
1da177e4
LT
5140}
5141
5142static void __exit bonding_exit(void)
5143{
5144 unregister_netdevice_notifier(&bond_netdev_notifier);
c3ade5ca 5145 unregister_inetaddr_notifier(&bond_inetaddr_notifier);
305d552a 5146 bond_unregister_ipv6_notifier();
1da177e4 5147
ae68c398
PE
5148 bond_destroy_sysfs();
5149
1da177e4
LT
5150 rtnl_lock();
5151 bond_free_all();
5152 rtnl_unlock();
5153}
5154
5155module_init(bonding_init);
5156module_exit(bonding_exit);
5157MODULE_LICENSE("GPL");
5158MODULE_VERSION(DRV_VERSION);
5159MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);
5160MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");