#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <linux/if_bonding.h>
+#include <linux/jiffies.h>
#include <net/route.h>
#include <net/net_namespace.h>
#include "bonding.h"
struct bond_parm_tbl xmit_hashtype_tbl[] = {
{ "layer2", BOND_XMIT_POLICY_LAYER2},
{ "layer3+4", BOND_XMIT_POLICY_LAYER34},
+{ "layer2+3", BOND_XMIT_POLICY_LAYER23},
{ NULL, -1},
};
dev_set_allmulti(slave_dev, 1);
}
+ netif_tx_lock_bh(bond_dev);
/* upload master's mc_list to new slave */
for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
+ netif_tx_unlock_bh(bond_dev);
}
if (bond->params.mode == BOND_MODE_8023AD) {
* has been cleared (if our_slave == old_current),
* but before a new active slave is selected.
*/
+ write_unlock_bh(&bond->lock);
bond_alb_deinit_slave(bond, slave);
+ write_lock_bh(&bond->lock);
}
if (oldcurrent == slave) {
}
/* flush master's mc_list from slave */
+ netif_tx_lock_bh(bond_dev);
bond_mc_list_flush(bond_dev, slave_dev);
+ netif_tx_unlock_bh(bond_dev);
}
netdev_set_master(slave_dev, NULL);
slave_dev = slave->dev;
bond_detach_slave(bond, slave);
+ /* now that the slave is detached, unlock and perform
+ * all the undo steps that should not be called from
+ * within a lock.
+ */
+ write_unlock_bh(&bond->lock);
+
if ((bond->params.mode == BOND_MODE_TLB) ||
(bond->params.mode == BOND_MODE_ALB)) {
/* must be called only after the slave
bond_compute_features(bond);
- /* now that the slave is detached, unlock and perform
- * all the undo steps that should not be called from
- * within a lock.
- */
- write_unlock_bh(&bond->lock);
-
bond_destroy_slave_symlinks(bond_dev, slave_dev);
bond_del_vlans_from_slave(bond, slave_dev);
}
/* flush master's mc_list from slave */
+ netif_tx_lock_bh(bond_dev);
bond_mc_list_flush(bond_dev, slave_dev);
+ netif_tx_unlock_bh(bond_dev);
}
netdev_set_master(slave_dev, NULL);
rtnl_lock();
read_lock(&bond->lock);
__bond_mii_monitor(bond, 1);
- rtnl_unlock();
+ read_unlock(&bond->lock);
+ rtnl_unlock(); /* might sleep, hold no other locks */
+ read_lock(&bond->lock);
}
delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
fl.fl4_dst = targets[i];
fl.fl4_tos = RTO_ONLINK;
- rv = ip_route_output_key(&rt, &fl);
+ rv = ip_route_output_key(&init_net, &rt, &fl);
if (rv) {
if (net_ratelimit()) {
printk(KERN_WARNING DRV_NAME
if (!slave || !slave_do_arp_validate(bond, slave))
goto out_unlock;
- /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
- if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
- (2 * dev->addr_len) +
- (2 * sizeof(u32)))))
+ if (!pskb_may_pull(skb, arp_hdr_len(dev)))
goto out_unlock;
arp = arp_hdr(skb);
*/
bond_for_each_slave(bond, slave, i) {
if (slave->link != BOND_LINK_UP) {
- if (((jiffies - slave->dev->trans_start) <= delta_in_ticks) &&
- ((jiffies - slave->dev->last_rx) <= delta_in_ticks)) {
+ if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) &&
+ time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) {
slave->link = BOND_LINK_UP;
slave->state = BOND_STATE_ACTIVE;
* when the source ip is 0, so don't take the link down
* if we don't know our ip yet
*/
- if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
- (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
+ if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
+ (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks) &&
bond_has_ip(bond))) {
slave->link = BOND_LINK_DOWN;
}
if (do_failover) {
- rtnl_lock();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
- rtnl_unlock();
-
}
re_arm:
*/
bond_for_each_slave(bond, slave, i) {
if (slave->link != BOND_LINK_UP) {
- if ((jiffies - slave_last_rx(bond, slave)) <=
- delta_in_ticks) {
+ if (time_before_eq(jiffies,
+ slave_last_rx(bond, slave) + delta_in_ticks)) {
slave->link = BOND_LINK_UP;
- rtnl_lock();
-
write_lock_bh(&bond->curr_slave_lock);
if ((!bond->curr_active_slave) &&
- ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) {
+ time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) {
bond_change_active_slave(bond, slave);
bond->current_arp_slave = NULL;
} else if (bond->curr_active_slave != slave) {
}
write_unlock_bh(&bond->curr_slave_lock);
- rtnl_unlock();
}
} else {
read_lock(&bond->curr_slave_lock);
if ((slave != bond->curr_active_slave) &&
(!bond->current_arp_slave) &&
- (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) &&
+ (time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks) &&
bond_has_ip(bond))) {
/* a backup slave has gone down; three times
* the delta allows the current slave to be
* before being taken out. if a primary is being used, check
* if it is up and needs to take over as the curr_active_slave
*/
- if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
- (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) &&
- bond_has_ip(bond))) &&
- ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) {
+ if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
+ (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks) &&
+ bond_has_ip(bond))) &&
+ time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) {
slave->link = BOND_LINK_DOWN;
bond->dev->name,
slave->dev->name);
- rtnl_lock();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
- rtnl_unlock();
-
bond->current_arp_slave = slave;
if (slave) {
bond->primary_slave->dev->name);
/* primary is up so switch to it */
- rtnl_lock();
write_lock_bh(&bond->curr_slave_lock);
bond_change_active_slave(bond, bond->primary_slave);
write_unlock_bh(&bond->curr_slave_lock);
- rtnl_unlock();
-
slave = bond->primary_slave;
slave->jiffies = jiffies;
} else {
case NETDEV_CHANGENAME:
return bond_event_changename(event_bond);
case NETDEV_UNREGISTER:
- /*
- * TODO: remove a bond from the list?
- */
+ bond_release_all(event_bond->dev);
break;
default:
break;
struct bonding *bond, *bond_next;
struct vlan_entry *vlan, *vlan_next;
+ if (ifa->ifa_dev->dev->nd_net != &init_net)
+ return NOTIFY_DONE;
+
list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) {
if (bond->dev == event_dev) {
switch (event) {
/*---------------------------- Hashing Policies -----------------------------*/
+/*
+ * Hash for the output device based upon layer 2 and layer 3 data. If
+ * the packet is not IP mimic bond_xmit_hash_policy_l2()
+ */
+static int bond_xmit_hash_policy_l23(struct sk_buff *skb,
+ struct net_device *bond_dev, int count)
+{
+ struct ethhdr *data = (struct ethhdr *)skb->data;
+ struct iphdr *iph = ip_hdr(skb);
+
+ if (skb->protocol == __constant_htons(ETH_P_IP)) {
+ return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
+ (data->h_dest[5] ^ bond_dev->dev_addr[5])) % count;
+ }
+
+ return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
+}
+
/*
* Hash for the output device based upon layer 3 and layer 4 data. If
* the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
{
struct bonding *bond = bond_dev->priv;
struct net_device_stats *stats = &(bond->stats), *sstats;
+ struct net_device_stats local_stats;
struct slave *slave;
int i;
- memset(stats, 0, sizeof(struct net_device_stats));
+ memset(&local_stats, 0, sizeof(struct net_device_stats));
read_lock_bh(&bond->lock);
bond_for_each_slave(bond, slave, i) {
sstats = slave->dev->get_stats(slave->dev);
- stats->rx_packets += sstats->rx_packets;
- stats->rx_bytes += sstats->rx_bytes;
- stats->rx_errors += sstats->rx_errors;
- stats->rx_dropped += sstats->rx_dropped;
+ local_stats.rx_packets += sstats->rx_packets;
+ local_stats.rx_bytes += sstats->rx_bytes;
+ local_stats.rx_errors += sstats->rx_errors;
+ local_stats.rx_dropped += sstats->rx_dropped;
- stats->tx_packets += sstats->tx_packets;
- stats->tx_bytes += sstats->tx_bytes;
- stats->tx_errors += sstats->tx_errors;
- stats->tx_dropped += sstats->tx_dropped;
+ local_stats.tx_packets += sstats->tx_packets;
+ local_stats.tx_bytes += sstats->tx_bytes;
+ local_stats.tx_errors += sstats->tx_errors;
+ local_stats.tx_dropped += sstats->tx_dropped;
- stats->multicast += sstats->multicast;
- stats->collisions += sstats->collisions;
+ local_stats.multicast += sstats->multicast;
+ local_stats.collisions += sstats->collisions;
- stats->rx_length_errors += sstats->rx_length_errors;
- stats->rx_over_errors += sstats->rx_over_errors;
- stats->rx_crc_errors += sstats->rx_crc_errors;
- stats->rx_frame_errors += sstats->rx_frame_errors;
- stats->rx_fifo_errors += sstats->rx_fifo_errors;
- stats->rx_missed_errors += sstats->rx_missed_errors;
+ local_stats.rx_length_errors += sstats->rx_length_errors;
+ local_stats.rx_over_errors += sstats->rx_over_errors;
+ local_stats.rx_crc_errors += sstats->rx_crc_errors;
+ local_stats.rx_frame_errors += sstats->rx_frame_errors;
+ local_stats.rx_fifo_errors += sstats->rx_fifo_errors;
+ local_stats.rx_missed_errors += sstats->rx_missed_errors;
- stats->tx_aborted_errors += sstats->tx_aborted_errors;
- stats->tx_carrier_errors += sstats->tx_carrier_errors;
- stats->tx_fifo_errors += sstats->tx_fifo_errors;
- stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
- stats->tx_window_errors += sstats->tx_window_errors;
+ local_stats.tx_aborted_errors += sstats->tx_aborted_errors;
+ local_stats.tx_carrier_errors += sstats->tx_carrier_errors;
+ local_stats.tx_fifo_errors += sstats->tx_fifo_errors;
+ local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors;
+ local_stats.tx_window_errors += sstats->tx_window_errors;
}
+ memcpy(stats, &local_stats, sizeof(struct net_device_stats));
+
read_unlock_bh(&bond->lock);
return stats;
struct bonding *bond = bond_dev->priv;
struct dev_mc_list *dmi;
- write_lock_bh(&bond->lock);
-
/*
* Do promisc before checking multicast_mode
*/
bond_set_allmulti(bond, -1);
}
+ read_lock(&bond->lock);
+
bond->flags = bond_dev->flags;
/* looking for addresses to add to slaves' mc list */
bond_mc_list_destroy(bond);
bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC);
- write_unlock_bh(&bond->lock);
+ read_unlock(&bond->lock);
}
/*
/*------------------------- Device initialization ---------------------------*/
+static void bond_set_xmit_hash_policy(struct bonding *bond)
+{
+ switch (bond->params.xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER23:
+ bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
+ break;
+ case BOND_XMIT_POLICY_LAYER34:
+ bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
+ break;
+ case BOND_XMIT_POLICY_LAYER2:
+ default:
+ bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
+ break;
+ }
+}
+
/*
* set bond mode specific net device operations
*/
break;
case BOND_MODE_XOR:
bond_dev->hard_start_xmit = bond_xmit_xor;
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
- bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
- else
- bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
+ bond_set_xmit_hash_policy(bond);
break;
case BOND_MODE_BROADCAST:
bond_dev->hard_start_xmit = bond_xmit_broadcast;
case BOND_MODE_8023AD:
bond_set_master_3ad_flags(bond);
bond_dev->hard_start_xmit = bond_3ad_xmit_xor;
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
- bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
- else
- bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
+ bond_set_xmit_hash_policy(bond);
break;
case BOND_MODE_ALB:
bond_set_master_alb_flags(bond);
#endif
}
+static void bond_work_cancel_all(struct bonding *bond)
+{
+ write_lock_bh(&bond->lock);
+ bond->kill_timers = 1;
+ write_unlock_bh(&bond->lock);
+
+ if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
+ cancel_delayed_work(&bond->mii_work);
+
+ if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
+ cancel_delayed_work(&bond->arp_work);
+
+ if (bond->params.mode == BOND_MODE_ALB &&
+ delayed_work_pending(&bond->alb_work))
+ cancel_delayed_work(&bond->alb_work);
+
+ if (bond->params.mode == BOND_MODE_8023AD &&
+ delayed_work_pending(&bond->ad_work))
+ cancel_delayed_work(&bond->ad_work);
+}
+
/* Unregister and free all bond devices.
* Caller must hold rtnl_lock.
*/
list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
struct net_device *bond_dev = bond->dev;
+ bond_work_cancel_all(bond);
+ netif_tx_lock_bh(bond_dev);
bond_mc_list_destroy(bond);
+ netif_tx_unlock_bh(bond_dev);
/* Release the bonded slaves */
bond_release_all(bond_dev);
bond_deinit(bond_dev);
/*
* Convert string input module parms. Accept either the
- * number of the mode or its string name.
+ * number of the mode or its string name. A bit complicated because
+ * some mode names are substrings of other names, and calls from sysfs
+ * may have whitespace in the name (trailing newlines, for example).
*/
-int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl)
+int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl)
{
- int i;
+ int mode = -1, i, rv;
+ char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, };
+
+ for (p = (char *)buf; *p; p++)
+ if (!(isdigit(*p) || isspace(*p)))
+ break;
+
+ if (*p)
+ rv = sscanf(buf, "%20s", modestr);
+ else
+ rv = sscanf(buf, "%d", &mode);
+
+ if (!rv)
+ return -1;
for (i = 0; tbl[i].modename; i++) {
- if ((isdigit(*mode_arg) &&
- tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) ||
- (strncmp(mode_arg, tbl[i].modename,
- strlen(tbl[i].modename)) == 0)) {
+ if (mode == tbl[i].mode)
+ return tbl[i].mode;
+ if (strcmp(modestr, tbl[i].modename) == 0)
return tbl[i].mode;
- }
}
return -1;
int bond_create(char *name, struct bond_params *params, struct bonding **newbond)
{
struct net_device *bond_dev;
+ struct bonding *bond, *nxt;
int res;
rtnl_lock();
+ down_write(&bonding_rwsem);
+
+ /* Check to see if the bond already exists. */
+ if (name) {
+ list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list)
+ if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) {
+ printk(KERN_ERR DRV_NAME
+ ": cannot add bond %s; it already exists\n",
+ name);
+ res = -EPERM;
+ goto out_rtnl;
+ }
+ }
+
bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "",
ether_setup);
if (!bond_dev) {
netif_carrier_off(bond_dev);
+ up_write(&bonding_rwsem);
rtnl_unlock(); /* allows sysfs registration of net device */
res = bond_create_sysfs_entry(bond_dev->priv);
if (res < 0) {
rtnl_lock();
+ down_write(&bonding_rwsem);
goto out_bond;
}
out_netdev:
free_netdev(bond_dev);
out_rtnl:
+ up_write(&bonding_rwsem);
rtnl_unlock();
return res;
}
-static void bond_work_cancel_all(struct bonding *bond)
-{
- write_lock_bh(&bond->lock);
- bond->kill_timers = 1;
- write_unlock_bh(&bond->lock);
-
- if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
- cancel_delayed_work(&bond->mii_work);
-
- if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
- cancel_delayed_work(&bond->arp_work);
-
- if (bond->params.mode == BOND_MODE_ALB &&
- delayed_work_pending(&bond->alb_work))
- cancel_delayed_work(&bond->alb_work);
-
- if (bond->params.mode == BOND_MODE_8023AD &&
- delayed_work_pending(&bond->ad_work))
- cancel_delayed_work(&bond->ad_work);
-}
-
static int __init bonding_init(void)
{
int i;
#ifdef CONFIG_PROC_FS
bond_create_proc_dir();
#endif
+
+ init_rwsem(&bonding_rwsem);
+
for (i = 0; i < max_bonds; i++) {
res = bond_create(NULL, &bonding_defaults, NULL);
if (res)