diff options
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r-- | drivers/net/bonding/bond_main.c | 500 |
1 files changed, 228 insertions, 272 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e883bfe2e727..dfb4f6dd5de0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -78,6 +78,7 @@ #include <net/netns/generic.h> #include <net/pkt_sched.h> #include <linux/rculist.h> +#include <net/flow_keys.h> #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on module_param(xmit_hash_policy, charp, 0); MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; " "0 for layer 2 (default), 1 for layer 3+4, " - "2 for layer 2+3"); + "2 for layer 2+3, 3 for encap layer 2+3, " + "4 for encap layer 3+4"); module_param(arp_interval, int, 0); MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); module_param_array(arp_ip_target, charp, NULL, 0); @@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = { { "layer2", BOND_XMIT_POLICY_LAYER2}, { "layer3+4", BOND_XMIT_POLICY_LAYER34}, { "layer2+3", BOND_XMIT_POLICY_LAYER23}, +{ "encap2+3", BOND_XMIT_POLICY_ENCAP23}, +{ "encap3+4", BOND_XMIT_POLICY_ENCAP34}, { NULL, -1}, }; @@ -332,10 +336,11 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); - struct slave *slave; + struct slave *slave, *rollback_slave; + struct list_head *iter; int res; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { res = vlan_vid_add(slave->dev, proto, vid); if (res) goto unwind; @@ -344,9 +349,13 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, return 0; unwind: - /* unwind from the slave that failed */ - bond_for_each_slave_continue_reverse(bond, slave) - vlan_vid_del(slave->dev, proto, vid); + /* unwind to the slave that failed */ + bond_for_each_slave(bond, rollback_slave, iter) { + if (rollback_slave == slave) + break; + + vlan_vid_del(rollback_slave->dev, proto, vid); + } return res; } @@ -360,9 +369,10 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) + bond_for_each_slave(bond, slave, iter) vlan_vid_del(slave->dev, proto, vid); if (bond_is_lb(bond)) @@ -382,15 +392,16 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, */ static int bond_set_carrier(struct bonding *bond) { + struct list_head *iter; struct slave *slave; - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto down; if (bond->params.mode == BOND_MODE_8023AD) return bond_3ad_set_carrier(bond); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) { if (!netif_carrier_ok(bond->dev)) { netif_carrier_on(bond->dev); @@ -522,7 +533,9 @@ static int bond_check_dev_link(struct bonding *bond, */ static int bond_set_promiscuity(struct bonding *bond, int inc) { + struct list_head *iter; int err = 0; + if (USES_PRIMARY(bond->params.mode)) { /* write lock already acquired */ if (bond->curr_active_slave) { @@ -532,7 +545,7 @@ static int bond_set_promiscuity(struct bonding *bond, int inc) } else { struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { err = dev_set_promiscuity(slave->dev, inc); if (err) return err; @@ -546,7 +559,9 @@ static int bond_set_promiscuity(struct bonding *bond, int inc) */ static int bond_set_allmulti(struct bonding *bond, int inc) { + struct list_head *iter; int err = 0; + if (USES_PRIMARY(bond->params.mode)) { /* write lock already acquired */ if (bond->curr_active_slave) { @@ -556,7 +571,7 @@ static int bond_set_allmulti(struct bonding *bond, int inc) } else { struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { err = dev_set_allmulti(slave->dev, inc); if (err) return err; @@ -774,43 +789,24 @@ static bool bond_should_change_active(struct bonding *bond) /** * find_best_interface - select the best available slave to be the active one * @bond: our bonding struct - * - * Warning: Caller must hold curr_slave_lock for writing. */ static struct slave *bond_find_best_slave(struct bonding *bond) { - struct slave *new_active, *old_active; - struct slave *bestslave = NULL; + struct slave *slave, *bestslave = NULL; + struct list_head *iter; int mintime = bond->params.updelay; - int i; - - new_active = bond->curr_active_slave; - - if (!new_active) { /* there were no active slaves left */ - new_active = bond_first_slave(bond); - if (!new_active) - return NULL; /* still no slave, return NULL */ - } - if ((bond->primary_slave) && - bond->primary_slave->link == BOND_LINK_UP && - bond_should_change_active(bond)) { - new_active = bond->primary_slave; - } - - /* remember where to stop iterating over the slaves */ - old_active = new_active; - - bond_for_each_slave_from(bond, new_active, i, old_active) { - if (new_active->link == BOND_LINK_UP) { - return new_active; - } else if (new_active->link == BOND_LINK_BACK && - IS_UP(new_active->dev)) { - /* link up, but waiting for stabilization */ - if (new_active->delay < mintime) { - mintime = new_active->delay; - bestslave = new_active; - } + if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && + bond_should_change_active(bond)) + return bond->primary_slave; + + bond_for_each_slave(bond, slave, iter) { + if (slave->link == BOND_LINK_UP) + return slave; + if (slave->link == BOND_LINK_BACK && IS_UP(slave->dev) && + slave->delay < mintime) { + mintime = slave->delay; + bestslave = slave; } } @@ -980,7 +976,6 @@ void bond_select_active_slave(struct bonding *bond) */ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) { - list_add_tail_rcu(&new_slave->list, &bond->slave_list); bond->slave_cnt++; } @@ -996,7 +991,6 @@ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) */ static void bond_detach_slave(struct bonding *bond, struct slave *slave) { - list_del_rcu(&slave->list); bond->slave_cnt--; } @@ -1046,9 +1040,10 @@ static void bond_poll_controller(struct net_device *bond_dev) static void bond_netpoll_cleanup(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) + bond_for_each_slave(bond, slave, iter) if (IS_UP(slave->dev)) slave_disable_netpoll(slave); } @@ -1056,10 +1051,11 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev) static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp) { struct bonding *bond = netdev_priv(dev); + struct list_head *iter; struct slave *slave; int err = 0; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { err = slave_enable_netpoll(slave); if (err) { bond_netpoll_cleanup(dev); @@ -1087,10 +1083,11 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t features) { struct bonding *bond = netdev_priv(dev); + struct list_head *iter; netdev_features_t mask; struct slave *slave; - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { /* Disable adding VLANs to empty bond. But why? --mq */ features |= NETIF_F_VLAN_CHALLENGED; return features; @@ -1100,7 +1097,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { features = netdev_increment_features(features, slave->dev->features, mask); @@ -1118,16 +1115,17 @@ static void bond_compute_features(struct bonding *bond) { unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE; netdev_features_t vlan_features = BOND_VLAN_FEATURES; + struct net_device *bond_dev = bond->dev; + struct list_head *iter; + struct slave *slave; unsigned short max_hard_header_len = ETH_HLEN; unsigned int gso_max_size = GSO_MAX_SIZE; - struct net_device *bond_dev = bond->dev; u16 gso_max_segs = GSO_MAX_SEGS; - struct slave *slave; - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto done; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { vlan_features = netdev_increment_features(vlan_features, slave->dev->vlan_features, BOND_VLAN_FEATURES); @@ -1233,11 +1231,12 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } static int bond_master_upper_dev_link(struct net_device *bond_dev, - struct net_device *slave_dev) + struct net_device *slave_dev, + struct slave *slave) { int err; - err = netdev_master_upper_dev_link(slave_dev, bond_dev); + err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); if (err) return err; slave_dev->flags |= IFF_SLAVE; @@ -1258,7 +1257,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { struct bonding *bond = netdev_priv(bond_dev); const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - struct slave *new_slave = NULL; + struct slave *new_slave = NULL, *prev_slave; struct sockaddr addr; int link_reporting; int res = 0, i; @@ -1313,7 +1312,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) * bond ether type mutual exclusion - don't allow slaves of dissimilar * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond */ - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { if (bond_dev->type != slave_dev->type) { pr_debug("%s: change device type from %d to %d\n", bond_dev->name, @@ -1352,7 +1351,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } if (slave_ops->ndo_set_mac_address == NULL) { - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.", bond_dev->name); bond->params.fail_over_mac = BOND_FOM_ACTIVE; @@ -1368,7 +1367,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) /* If this is the first slave, then we need to set the master's hardware * address to be the same as the slave's. */ - if (list_empty(&bond->slave_list) && + if (!bond_has_slaves(bond) && bond->dev->addr_assign_type == NET_ADDR_RANDOM) bond_set_dev_addr(bond->dev, slave_dev); @@ -1377,7 +1376,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) res = -ENOMEM; goto err_undo_flags; } - INIT_LIST_HEAD(&new_slave->list); /* * Set the new_slave's queue_id to be zero. Queue ID mapping * is set via sysfs or module option if desired. @@ -1413,17 +1411,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } - res = bond_master_upper_dev_link(bond_dev, slave_dev); - if (res) { - pr_debug("Error %d calling bond_master_upper_dev_link\n", res); - goto err_restore_mac; - } - /* open the slave since the application closed it */ res = dev_open(slave_dev); if (res) { pr_debug("Opening slave %s failed\n", slave_dev->name); - goto err_unset_master; + goto err_restore_mac; } new_slave->bond = bond; @@ -1481,6 +1473,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) write_lock_bh(&bond->lock); + prev_slave = bond_last_slave(bond); bond_attach_slave(bond, new_slave); new_slave->delay = 0; @@ -1568,16 +1561,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) */ bond_set_slave_inactive_flags(new_slave); /* if this is the first slave */ - if (bond_first_slave(bond) == new_slave) { + if (!prev_slave) { SLAVE_AD_INFO(new_slave).id = 1; /* Initialize AD with the number of times that the AD timer is called in 1 second * can be called only after the mac address of the bond is set */ bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL); } else { - struct slave *prev_slave; - - prev_slave = bond_prev_slave(bond, new_slave); SLAVE_AD_INFO(new_slave).id = SLAVE_AD_INFO(prev_slave).id + 1; } @@ -1626,17 +1616,20 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) read_unlock(&bond->lock); - res = bond_create_slave_symlinks(bond_dev, slave_dev); - if (res) - goto err_detach; - res = netdev_rx_handler_register(slave_dev, bond_handle_frame, new_slave); if (res) { pr_debug("Error %d calling netdev_rx_handler_register\n", res); - goto err_dest_symlinks; + goto err_detach; + } + + res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); + if (res) { + pr_debug("Error %d calling bond_master_upper_dev_link\n", res); + goto err_unregister; } + pr_info("%s: enslaving %s as a%s interface with a%s link.\n", bond_dev->name, slave_dev->name, bond_is_active_slave(new_slave) ? "n active" : " backup", @@ -1646,8 +1639,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return 0; /* Undo stages on error */ -err_dest_symlinks: - bond_destroy_slave_symlinks(bond_dev, slave_dev); +err_unregister: + netdev_rx_handler_unregister(slave_dev); err_detach: if (!USES_PRIMARY(bond->params.mode)) @@ -1675,9 +1668,6 @@ err_close: slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); -err_unset_master: - bond_upper_dev_unlink(bond_dev, slave_dev); - err_restore_mac: if (!bond->params.fail_over_mac) { /* XXX TODO - fom follow mode needs to change master's @@ -1698,7 +1688,7 @@ err_free: err_undo_flags: bond_compute_features(bond); /* Enslave of first slave has failed and we need to fix master's mac */ - if (list_empty(&bond->slave_list) && + if (!bond_has_slaves(bond) && ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr)) eth_hw_addr_random(bond_dev); @@ -1749,6 +1739,8 @@ static int __bond_release_one(struct net_device *bond_dev, } write_unlock_bh(&bond->lock); + + bond_upper_dev_unlink(bond_dev, slave_dev); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -1777,7 +1769,7 @@ static int __bond_release_one(struct net_device *bond_dev, if (!all && !bond->params.fail_over_mac) { if (ether_addr_equal(bond_dev->dev_addr, slave->perm_hwaddr) && - !list_empty(&bond->slave_list)) + bond_has_slaves(bond)) pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", bond_dev->name, slave_dev->name, slave->perm_hwaddr, @@ -1820,7 +1812,7 @@ static int __bond_release_one(struct net_device *bond_dev, write_lock_bh(&bond->lock); } - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { bond_set_carrier(bond); eth_hw_addr_random(bond_dev); @@ -1836,7 +1828,7 @@ static int __bond_release_one(struct net_device *bond_dev, unblock_netpoll_tx(); synchronize_rcu(); - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); } @@ -1848,8 +1840,6 @@ static int __bond_release_one(struct net_device *bond_dev, bond_dev->name, slave_dev->name, bond_dev->name); /* must do this from outside any spinlocks */ - bond_destroy_slave_symlinks(bond_dev, slave_dev); - vlan_vids_del_by_dev(slave_dev, bond_dev); /* If the mode USES_PRIMARY, then this cases was handled above by @@ -1873,8 +1863,6 @@ static int __bond_release_one(struct net_device *bond_dev, bond_hw_addr_flush(bond_dev, slave_dev); } - bond_upper_dev_unlink(bond_dev, slave_dev); - slave_disable_netpoll(slave); /* close slave before restoring its mac address */ @@ -1913,7 +1901,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, int ret; ret = bond_release(bond_dev, slave_dev); - if (ret == 0 && list_empty(&bond->slave_list)) { + if (ret == 0 && !bond_has_slaves(bond)) { bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; pr_info("%s: destroying bond %s.\n", bond_dev->name, bond_dev->name); @@ -1994,11 +1982,12 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; int i = 0, res = -ENODEV; struct slave *slave; read_lock(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (i++ == (int)info->slave_id) { res = 0; strcpy(info->slave_name, slave->dev->name); @@ -2019,12 +2008,13 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in static int bond_miimon_inspect(struct bonding *bond) { int link_state, commit = 0; + struct list_head *iter; struct slave *slave; bool ignore_updelay; ignore_updelay = !bond->curr_active_slave ? true : false; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2118,9 +2108,10 @@ static int bond_miimon_inspect(struct bonding *bond) static void bond_miimon_commit(struct bonding *bond) { + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { switch (slave->new_link) { case BOND_LINK_NOCHANGE: continue; @@ -2225,7 +2216,7 @@ void bond_mii_monitor(struct work_struct *work) delay = msecs_to_jiffies(bond->params.miimon); - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto re_arm; should_notify_peers = bond_should_notify_peers(bond); @@ -2274,7 +2265,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip) return true; rcu_read_lock(); - netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { if (ip == bond_confirm_addr(upper, 0, ip)) { ret = true; break; @@ -2349,10 +2340,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) * * TODO: QinQ? */ - netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) { + netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper, + vlan_iter) { if (!is_vlan_dev(vlan_upper)) continue; - netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) { + netdev_for_each_all_upper_dev_rcu(vlan_upper, upper, + iter) { if (upper == rt->dst.dev) { vlan_id = vlan_dev_vlan_id(vlan_upper); rcu_read_unlock(); @@ -2365,7 +2358,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) * our upper vlans, then just search for any dev that * matches, and in case it's a vlan - save the id */ - netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { if (upper == rt->dst.dev) { /* if it's a vlan - get its VID */ if (is_vlan_dev(upper)) @@ -2512,11 +2505,12 @@ void bond_loadbalance_arp_mon(struct work_struct *work) struct bonding *bond = container_of(work, struct bonding, arp_work.work); struct slave *slave, *oldcurrent; + struct list_head *iter; int do_failover = 0; read_lock(&bond->lock); - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto re_arm; oldcurrent = bond->curr_active_slave; @@ -2528,7 +2522,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * TODO: what about up/down delay in arp mode? it wasn't here before * so it can wait */ - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); if (slave->link != BOND_LINK_UP) { @@ -2619,10 +2613,11 @@ re_arm: static int bond_ab_arp_inspect(struct bonding *bond) { unsigned long trans_start, last_rx; + struct list_head *iter; struct slave *slave; int commit = 0; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; last_rx = slave_last_rx(bond, slave); @@ -2689,9 +2684,10 @@ static int bond_ab_arp_inspect(struct bonding *bond) static void bond_ab_arp_commit(struct bonding *bond) { unsigned long trans_start; + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { switch (slave->new_link) { case BOND_LINK_NOCHANGE: continue; @@ -2762,8 +2758,9 @@ do_failover: */ static void bond_ab_arp_probe(struct bonding *bond) { - struct slave *slave, *next_slave; - int i; + struct slave *slave, *before = NULL, *new_slave = NULL; + struct list_head *iter; + bool found = false; read_lock(&bond->curr_slave_lock); @@ -2793,18 +2790,12 @@ static void bond_ab_arp_probe(struct bonding *bond) bond_set_slave_inactive_flags(bond->current_arp_slave); - /* search for next candidate */ - next_slave = bond_next_slave(bond, bond->current_arp_slave); - bond_for_each_slave_from(bond, slave, i, next_slave) { - if (IS_UP(slave->dev)) { - slave->link = BOND_LINK_BACK; - bond_set_slave_active_flags(slave); - bond_arp_send_all(bond, slave); - slave->jiffies = jiffies; - bond->current_arp_slave = slave; - break; - } + bond_for_each_slave(bond, slave, iter) { + if (!found && !before && IS_UP(slave->dev)) + before = slave; + if (found && !new_slave && IS_UP(slave->dev)) + new_slave = slave; /* if the link state is up at this point, we * mark it down - this can happen if we have * simultaneous link failures and @@ -2812,7 +2803,7 @@ static void bond_ab_arp_probe(struct bonding *bond) * one the current slave so it is still marked * up when it is actually down */ - if (slave->link == BOND_LINK_UP) { + if (!IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { slave->link = BOND_LINK_DOWN; if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2822,7 +2813,22 @@ static void bond_ab_arp_probe(struct bonding *bond) pr_info("%s: backup interface %s is now down.\n", bond->dev->name, slave->dev->name); } + if (slave == bond->current_arp_slave) + found = true; } + + if (!new_slave && before) + new_slave = before; + + if (!new_slave) + return; + + new_slave->link = BOND_LINK_BACK; + bond_set_slave_active_flags(new_slave); + bond_arp_send_all(bond, new_slave); + new_slave->jiffies = jiffies; + bond->current_arp_slave = new_slave; + } void bond_activebackup_arp_mon(struct work_struct *work) @@ -2836,7 +2842,7 @@ void bond_activebackup_arp_mon(struct work_struct *work) delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto re_arm; should_notify_peers = bond_should_notify_peers(bond); @@ -3033,99 +3039,85 @@ static struct notifier_block bond_netdev_notifier = { /*---------------------------- Hashing Policies -----------------------------*/ -/* - * Hash for the output device based upon layer 2 data - */ -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) +/* L2 hash helper */ +static inline u32 bond_eth_hash(struct sk_buff *skb) { struct ethhdr *data = (struct ethhdr *)skb->data; if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) - return (data->h_dest[5] ^ data->h_source[5]) % count; + return data->h_dest[5] ^ data->h_source[5]; return 0; } -/* - * Hash for the output device based upon layer 2 and layer 3 data. If - * the packet is not IP, fall back on bond_xmit_hash_policy_l2() - */ -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) +/* Extract the appropriate headers based on bond's xmit policy */ +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, + struct flow_keys *fk) { - const struct ethhdr *data; + const struct ipv6hdr *iph6; const struct iphdr *iph; - const struct ipv6hdr *ipv6h; - u32 v6hash; - const __be32 *s, *d; + int noff, proto = -1; + + if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) + return skb_flow_dissect(skb, fk); - if (skb->protocol == htons(ETH_P_IP) && - pskb_network_may_pull(skb, sizeof(*iph))) { + fk->ports = 0; + noff = skb_network_offset(skb); + if (skb->protocol == htons(ETH_P_IP)) { + if (!pskb_may_pull(skb, noff + sizeof(*iph))) + return false; iph = ip_hdr(skb); - data = (struct ethhdr *)skb->data; - return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ - (data->h_dest[5] ^ data->h_source[5])) % count; - } else if (skb->protocol == htons(ETH_P_IPV6) && - pskb_network_may_pull(skb, sizeof(*ipv6h))) { - ipv6h = ipv6_hdr(skb); - data = (struct ethhdr *)skb->data; - s = &ipv6h->saddr.s6_addr32[0]; - d = &ipv6h->daddr.s6_addr32[0]; - v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); - v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8); - return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count; - } - - return bond_xmit_hash_policy_l2(skb, count); + fk->src = iph->saddr; + fk->dst = iph->daddr; + noff += iph->ihl << 2; + if (!ip_is_fragment(iph)) + proto = iph->protocol; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (!pskb_may_pull(skb, noff + sizeof(*iph6))) + return false; + iph6 = ipv6_hdr(skb); + fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); + fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + noff += sizeof(*iph6); + proto = iph6->nexthdr; + } else { + return false; + } + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) + fk->ports = skb_flow_get_ports(skb, noff, proto); + + return true; } -/* - * Hash for the output device based upon layer 3 and layer 4 data. If - * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is - * altogether not IP, fall back on bond_xmit_hash_policy_l2() +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * @count: modulo value + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device + * which will be reduced modulo count before returning. */ -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) +int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count) { - u32 layer4_xor = 0; - const struct iphdr *iph; - const struct ipv6hdr *ipv6h; - const __be32 *s, *d; - const __be16 *l4 = NULL; - __be16 _l4[2]; - int noff = skb_network_offset(skb); - int poff; - - if (skb->protocol == htons(ETH_P_IP) && - pskb_may_pull(skb, noff + sizeof(*iph))) { - iph = ip_hdr(skb); - poff = proto_ports_offset(iph->protocol); + struct flow_keys flow; + u32 hash; - if (!ip_is_fragment(iph) && poff >= 0) { - l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff, - sizeof(_l4), &_l4); - if (l4) - layer4_xor = ntohs(l4[0] ^ l4[1]); - } - return (layer4_xor ^ - ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; - } else if (skb->protocol == htons(ETH_P_IPV6) && - pskb_may_pull(skb, noff + sizeof(*ipv6h))) { - ipv6h = ipv6_hdr(skb); - poff = proto_ports_offset(ipv6h->nexthdr); - if (poff >= 0) { - l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff, - sizeof(_l4), &_l4); - if (l4) - layer4_xor = ntohs(l4[0] ^ l4[1]); - } - s = &ipv6h->saddr.s6_addr32[0]; - d = &ipv6h->daddr.s6_addr32[0]; - layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); - layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^ - (layer4_xor >> 8); - return layer4_xor % count; - } + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || + !bond_flow_dissect(bond, skb, &flow)) + return bond_eth_hash(skb) % count; + + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || + bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) + hash = bond_eth_hash(skb); + else + hash = (__force u32)flow.ports; + hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; + hash ^= (hash >> 16); + hash ^= (hash >> 8); - return bond_xmit_hash_policy_l2(skb, count); + return hash % count; } /*-------------------------- Device entry points ----------------------------*/ @@ -3155,13 +3147,14 @@ static void bond_work_cancel_all(struct bonding *bond) static int bond_open(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; /* reset slave->backup and slave->inactive */ read_lock(&bond->lock); - if (!list_empty(&bond->slave_list)) { + if (bond_has_slaves(bond)) { read_lock(&bond->curr_slave_lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) && (slave != bond->curr_active_slave)) { bond_set_slave_inactive_flags(slave); @@ -3221,12 +3214,13 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, { struct bonding *bond = netdev_priv(bond_dev); struct rtnl_link_stats64 temp; + struct list_head *iter; struct slave *slave; memset(stats, 0, sizeof(*stats)); read_lock_bh(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { const struct rtnl_link_stats64 *sstats = dev_get_stats(slave->dev, &temp); @@ -3393,22 +3387,24 @@ static void bond_change_rx_flags(struct net_device *bond_dev, int change) static void bond_set_rx_mode(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; - ASSERT_RTNL(); + rcu_read_lock(); if (USES_PRIMARY(bond->params.mode)) { - slave = rtnl_dereference(bond->curr_active_slave); + slave = rcu_dereference(bond->curr_active_slave); if (slave) { dev_uc_sync(slave->dev, bond_dev); dev_mc_sync(slave->dev, bond_dev); } } else { - bond_for_each_slave(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { dev_uc_sync_multiple(slave->dev, bond_dev); dev_mc_sync_multiple(slave->dev, bond_dev); } } + rcu_read_unlock(); } static int bond_neigh_init(struct neighbour *n) @@ -3471,7 +3467,8 @@ static int bond_neigh_setup(struct net_device *dev, static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) { struct bonding *bond = netdev_priv(bond_dev); - struct slave *slave; + struct slave *slave, *rollback_slave; + struct list_head *iter; int res = 0; pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond, @@ -3492,10 +3489,9 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) * call to the base driver. */ - bond_for_each_slave(bond, slave) { - pr_debug("s %p s->p %p c_m %p\n", + bond_for_each_slave(bond, slave, iter) { + pr_debug("s %p c_m %p\n", slave, - bond_prev_slave(bond, slave), slave->dev->netdev_ops->ndo_change_mtu); res = dev_set_mtu(slave->dev, new_mtu); @@ -3520,13 +3516,16 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) unwind: /* unwind from head to the slave that failed */ - bond_for_each_slave_continue_reverse(bond, slave) { + bond_for_each_slave(bond, rollback_slave, iter) { int tmp_res; - tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); + if (rollback_slave == slave) + break; + + tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu); if (tmp_res) { pr_debug("unwind err %d dev %s\n", - tmp_res, slave->dev->name); + tmp_res, rollback_slave->dev->name); } } @@ -3543,8 +3542,9 @@ unwind: static int bond_set_mac_address(struct net_device *bond_dev, void *addr) { struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave, *rollback_slave; struct sockaddr *sa = addr, tmp_sa; - struct slave *slave; + struct list_head *iter; int res = 0; if (bond->params.mode == BOND_MODE_ALB) @@ -3578,7 +3578,7 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) * call to the base driver. */ - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { const struct net_device_ops *slave_ops = slave->dev->netdev_ops; pr_debug("slave %p %s\n", slave, slave->dev->name); @@ -3610,13 +3610,16 @@ unwind: tmp_sa.sa_family = bond_dev->type; /* unwind from head to the slave that failed */ - bond_for_each_slave_continue_reverse(bond, slave) { + bond_for_each_slave(bond, rollback_slave, iter) { int tmp_res; - tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); + if (rollback_slave == slave) + break; + + tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_sa); if (tmp_res) { pr_debug("unwind err %d dev %s\n", - tmp_res, slave->dev->name); + tmp_res, rollback_slave->dev->name); } } @@ -3635,11 +3638,12 @@ unwind: */ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) { + struct list_head *iter; struct slave *slave; int i = slave_id; /* Here we start from the slave with slave_id */ - bond_for_each_slave_rcu(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) { if (slave_can_tx(slave)) { bond_dev_queue_xmit(bond, skb, slave->dev); @@ -3650,7 +3654,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) /* Here we start from the first slave up to slave_id */ i = slave_id; - bond_for_each_slave_rcu(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) break; if (slave_can_tx(slave)) { @@ -3707,8 +3711,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d return NETDEV_TX_OK; } -/* - * In bond_xmit_xor() , we determine the output device by using a pre- +/* In bond_xmit_xor() , we determine the output device by using a pre- * determined xmit_hash_policy(), If the selected device is not enabled, * find the next active slave. */ @@ -3716,8 +3719,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - bond_xmit_slave_id(bond, skb, - bond->xmit_hash_policy(skb, bond->slave_cnt)); + bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt)); return NETDEV_TX_OK; } @@ -3727,8 +3729,9 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave = NULL; + struct list_head *iter; - bond_for_each_slave_rcu(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { if (bond_is_last_slave(bond, slave)) break; if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { @@ -3753,22 +3756,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) /*------------------------- Device initialization ---------------------------*/ -static void bond_set_xmit_hash_policy(struct bonding *bond) -{ - switch (bond->params.xmit_policy) { - case BOND_XMIT_POLICY_LAYER23: - bond->xmit_hash_policy = bond_xmit_hash_policy_l23; - break; - case BOND_XMIT_POLICY_LAYER34: - bond->xmit_hash_policy = bond_xmit_hash_policy_l34; - break; - case BOND_XMIT_POLICY_LAYER2: - default: - bond->xmit_hash_policy = bond_xmit_hash_policy_l2; - break; - } -} - /* * Lookup the slave that corresponds to a qid */ @@ -3777,13 +3764,14 @@ static inline int bond_slave_override(struct bonding *bond, { struct slave *slave = NULL; struct slave *check_slave; + struct list_head *iter; int res = 1; if (!skb->queue_mapping) return 1; /* Find out if any slaves have the same mapping as this skb. */ - bond_for_each_slave_rcu(bond, check_slave) { + bond_for_each_slave_rcu(bond, check_slave, iter) { if (check_slave->queue_id == skb->queue_mapping) { slave = check_slave; break; @@ -3869,7 +3857,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; rcu_read_lock(); - if (!list_empty(&bond->slave_list)) + if (bond_has_slaves(bond)) ret = __bond_start_xmit(skb, dev); else kfree_skb(skb); @@ -3878,43 +3866,12 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -/* - * set bond mode specific net device operations - */ -void bond_set_mode_ops(struct bonding *bond, int mode) -{ - struct net_device *bond_dev = bond->dev; - - switch (mode) { - case BOND_MODE_ROUNDROBIN: - break; - case BOND_MODE_ACTIVEBACKUP: - break; - case BOND_MODE_XOR: - bond_set_xmit_hash_policy(bond); - break; - case BOND_MODE_BROADCAST: - break; - case BOND_MODE_8023AD: - bond_set_xmit_hash_policy(bond); - break; - case BOND_MODE_ALB: - /* FALLTHRU */ - case BOND_MODE_TLB: - break; - default: - /* Should never happen, mode already checked */ - pr_err("%s: Error: Unknown bonding mode %d\n", - bond_dev->name, mode); - break; - } -} - static int bond_ethtool_get_settings(struct net_device *bond_dev, struct ethtool_cmd *ecmd) { struct bonding *bond = netdev_priv(bond_dev); unsigned long speed = 0; + struct list_head *iter; struct slave *slave; ecmd->duplex = DUPLEX_UNKNOWN; @@ -3926,7 +3883,7 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev, * this is an accurate maximum. */ read_lock(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (SLAVE_IS_OK(slave)) { if (slave->speed != SPEED_UNKNOWN) speed += slave->speed; @@ -4001,7 +3958,6 @@ static void bond_setup(struct net_device *bond_dev) /* initialize rwlocks */ rwlock_init(&bond->lock); rwlock_init(&bond->curr_slave_lock); - INIT_LIST_HEAD(&bond->slave_list); bond->params = bonding_defaults; /* Initialize pointers */ @@ -4011,7 +3967,6 @@ static void bond_setup(struct net_device *bond_dev) ether_setup(bond_dev); bond_dev->netdev_ops = &bond_netdev_ops; bond_dev->ethtool_ops = &bond_ethtool_ops; - bond_set_mode_ops(bond, bond->params.mode); bond_dev->destructor = bond_destructor; @@ -4057,12 +4012,13 @@ static void bond_setup(struct net_device *bond_dev) static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct slave *slave, *tmp_slave; + struct list_head *iter; + struct slave *slave; bond_netpoll_cleanup(bond_dev); /* Release the bonded slaves */ - list_for_each_entry_safe(slave, tmp_slave, &bond->slave_list, list) + bond_for_each_slave(bond, slave, iter) __bond_release_one(bond_dev, slave->dev, true); pr_info("%s: released all slaves\n", bond_dev->name); |