summaryrefslogtreecommitdiff
path: root/drivers/net/bonding/bond_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r--drivers/net/bonding/bond_main.c1042
1 files changed, 535 insertions, 507 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 484c9e3e5e82..3d56339a8a10 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -90,6 +90,8 @@
#include <net/tls.h>
#endif
#include <net/ip6_route.h>
+#include <net/netdev_lock.h>
+#include <net/xdp.h>
#include "bonding_priv.h"
@@ -140,8 +142,7 @@ module_param(downdelay, int, 0);
MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
"in milliseconds");
module_param(use_carrier, int, 0);
-MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
- "0 for off, 1 for on (default)");
+MODULE_PARM_DESC(use_carrier, "option obsolete, use_carrier cannot be disabled");
module_param(mode, charp, 0);
MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, "
"1 for active-backup, 2 for balance-xor, "
@@ -210,6 +211,8 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0);
unsigned int bond_net_id __read_mostly;
+DEFINE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled);
+
static const struct flow_dissector_key flow_keys_bonding_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -321,9 +324,9 @@ static bool bond_sk_check(struct bonding *bond)
}
}
-static bool bond_xdp_check(struct bonding *bond)
+bool bond_xdp_check(struct bonding *bond, int mode)
{
- switch (BOND_MODE(bond)) {
+ switch (mode) {
case BOND_MODE_ROUNDROBIN:
case BOND_MODE_ACTIVEBACKUP:
return true;
@@ -418,14 +421,49 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
#ifdef CONFIG_XFRM_OFFLOAD
/**
+ * bond_ipsec_dev - Get active device for IPsec offload
+ * @xs: pointer to transformer state struct
+ *
+ * Context: caller must hold rcu_read_lock.
+ *
+ * Return: the device for ipsec offload, or NULL if not exist.
+ **/
+static struct net_device *bond_ipsec_dev(struct xfrm_state *xs)
+{
+ struct net_device *bond_dev = xs->xso.dev;
+ struct bonding *bond;
+ struct slave *slave;
+
+ bond = netdev_priv(bond_dev);
+ if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)
+ return NULL;
+
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (!slave)
+ return NULL;
+
+ if (!xs->xso.real_dev)
+ return NULL;
+
+ if (xs->xso.real_dev != slave->dev)
+ pr_warn_ratelimited("%s: (slave %s): not same with IPsec offload real dev %s\n",
+ bond_dev->name, slave->dev->name, xs->xso.real_dev->name);
+
+ return slave->dev;
+}
+
+/**
* bond_ipsec_add_sa - program device with a security association
+ * @bond_dev: pointer to the bond net device
* @xs: pointer to transformer state struct
* @extack: extack point to fill failure reason
**/
-static int bond_ipsec_add_sa(struct xfrm_state *xs,
+static int bond_ipsec_add_sa(struct net_device *bond_dev,
+ struct xfrm_state *xs,
struct netlink_ext_ack *extack)
{
- struct net_device *bond_dev = xs->xso.dev;
+ struct net_device *real_dev;
+ netdevice_tracker tracker;
struct bond_ipsec *ipsec;
struct bonding *bond;
struct slave *slave;
@@ -437,112 +475,185 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs,
rcu_read_lock();
bond = netdev_priv(bond_dev);
slave = rcu_dereference(bond->curr_active_slave);
- if (!slave) {
- rcu_read_unlock();
- return -ENODEV;
+ real_dev = slave ? slave->dev : NULL;
+ netdev_hold(real_dev, &tracker, GFP_ATOMIC);
+ rcu_read_unlock();
+ if (!real_dev) {
+ err = -ENODEV;
+ goto out;
}
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_add ||
- netif_is_bond_master(slave->dev)) {
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_add ||
+ netif_is_bond_master(real_dev)) {
NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload");
- rcu_read_unlock();
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
- ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC);
+ ipsec = kmalloc(sizeof(*ipsec), GFP_KERNEL);
if (!ipsec) {
- rcu_read_unlock();
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
- xs->xso.real_dev = slave->dev;
- err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs, extack);
+ err = real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, xs, extack);
if (!err) {
+ xs->xso.real_dev = real_dev;
ipsec->xs = xs;
INIT_LIST_HEAD(&ipsec->list);
- spin_lock_bh(&bond->ipsec_lock);
+ mutex_lock(&bond->ipsec_lock);
list_add(&ipsec->list, &bond->ipsec_list);
- spin_unlock_bh(&bond->ipsec_lock);
+ mutex_unlock(&bond->ipsec_lock);
} else {
kfree(ipsec);
}
- rcu_read_unlock();
+out:
+ netdev_put(real_dev, &tracker);
return err;
}
static void bond_ipsec_add_sa_all(struct bonding *bond)
{
struct net_device *bond_dev = bond->dev;
+ struct net_device *real_dev;
struct bond_ipsec *ipsec;
struct slave *slave;
- rcu_read_lock();
- slave = rcu_dereference(bond->curr_active_slave);
- if (!slave)
- goto out;
+ slave = rtnl_dereference(bond->curr_active_slave);
+ real_dev = slave ? slave->dev : NULL;
+ if (!real_dev)
+ return;
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_add ||
- netif_is_bond_master(slave->dev)) {
- spin_lock_bh(&bond->ipsec_lock);
+ mutex_lock(&bond->ipsec_lock);
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_add ||
+ netif_is_bond_master(real_dev)) {
if (!list_empty(&bond->ipsec_list))
- slave_warn(bond_dev, slave->dev,
+ slave_warn(bond_dev, real_dev,
"%s: no slave xdo_dev_state_add\n",
__func__);
- spin_unlock_bh(&bond->ipsec_lock);
goto out;
}
- spin_lock_bh(&bond->ipsec_lock);
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
- ipsec->xs->xso.real_dev = slave->dev;
- if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) {
- slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__);
- ipsec->xs->xso.real_dev = NULL;
+ /* If new state is added before ipsec_lock acquired */
+ if (ipsec->xs->xso.real_dev == real_dev)
+ continue;
+
+ if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev,
+ ipsec->xs, NULL)) {
+ slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
+ continue;
}
+
+ spin_lock_bh(&ipsec->xs->lock);
+ /* xs might have been killed by the user during the migration
+ * to the new dev, but bond_ipsec_del_sa() should have done
+ * nothing, as xso.real_dev is NULL.
+ * Delete it from the device we just added it to. The pending
+ * bond_ipsec_free_sa() call will do the rest of the cleanup.
+ */
+ if (ipsec->xs->km.state == XFRM_STATE_DEAD &&
+ real_dev->xfrmdev_ops->xdo_dev_state_delete)
+ real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
+ ipsec->xs);
+ ipsec->xs->xso.real_dev = real_dev;
+ spin_unlock_bh(&ipsec->xs->lock);
}
- spin_unlock_bh(&bond->ipsec_lock);
out:
- rcu_read_unlock();
+ mutex_unlock(&bond->ipsec_lock);
}
/**
* bond_ipsec_del_sa - clear out this specific SA
+ * @bond_dev: pointer to the bond net device
* @xs: pointer to transformer state struct
**/
-static void bond_ipsec_del_sa(struct xfrm_state *xs)
+static void bond_ipsec_del_sa(struct net_device *bond_dev,
+ struct xfrm_state *xs)
{
- struct net_device *bond_dev = xs->xso.dev;
+ struct net_device *real_dev;
+
+ if (!bond_dev || !xs->xso.real_dev)
+ return;
+
+ real_dev = xs->xso.real_dev;
+
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
+ netif_is_bond_master(real_dev)) {
+ slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__);
+ return;
+ }
+
+ real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs);
+}
+
+static void bond_ipsec_del_sa_all(struct bonding *bond)
+{
+ struct net_device *bond_dev = bond->dev;
+ struct net_device *real_dev;
struct bond_ipsec *ipsec;
- struct bonding *bond;
struct slave *slave;
+ slave = rtnl_dereference(bond->curr_active_slave);
+ real_dev = slave ? slave->dev : NULL;
+ if (!real_dev)
+ return;
+
+ mutex_lock(&bond->ipsec_lock);
+ list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ if (!ipsec->xs->xso.real_dev)
+ continue;
+
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
+ netif_is_bond_master(real_dev)) {
+ slave_warn(bond_dev, real_dev,
+ "%s: no slave xdo_dev_state_delete\n",
+ __func__);
+ continue;
+ }
+
+ spin_lock_bh(&ipsec->xs->lock);
+ ipsec->xs->xso.real_dev = NULL;
+ /* Don't double delete states killed by the user. */
+ if (ipsec->xs->km.state != XFRM_STATE_DEAD)
+ real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
+ ipsec->xs);
+ spin_unlock_bh(&ipsec->xs->lock);
+
+ if (real_dev->xfrmdev_ops->xdo_dev_state_free)
+ real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev,
+ ipsec->xs);
+ }
+ mutex_unlock(&bond->ipsec_lock);
+}
+
+static void bond_ipsec_free_sa(struct net_device *bond_dev,
+ struct xfrm_state *xs)
+{
+ struct net_device *real_dev;
+ struct bond_ipsec *ipsec;
+ struct bonding *bond;
+
if (!bond_dev)
return;
- rcu_read_lock();
bond = netdev_priv(bond_dev);
- slave = rcu_dereference(bond->curr_active_slave);
-
- if (!slave)
- goto out;
+ mutex_lock(&bond->ipsec_lock);
if (!xs->xso.real_dev)
goto out;
- WARN_ON(xs->xso.real_dev != slave->dev);
+ real_dev = xs->xso.real_dev;
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_delete ||
- netif_is_bond_master(slave->dev)) {
- slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__);
- goto out;
- }
-
- slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs);
+ xs->xso.real_dev = NULL;
+ if (real_dev->xfrmdev_ops &&
+ real_dev->xfrmdev_ops->xdo_dev_state_free)
+ real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs);
out:
- spin_lock_bh(&bond->ipsec_lock);
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
if (ipsec->xs == xs) {
list_del(&ipsec->list);
@@ -550,88 +661,84 @@ out:
break;
}
}
- spin_unlock_bh(&bond->ipsec_lock);
- rcu_read_unlock();
+ mutex_unlock(&bond->ipsec_lock);
}
-static void bond_ipsec_del_sa_all(struct bonding *bond)
+/**
+ * bond_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
{
- struct net_device *bond_dev = bond->dev;
- struct bond_ipsec *ipsec;
- struct slave *slave;
+ struct net_device *real_dev;
rcu_read_lock();
- slave = rcu_dereference(bond->curr_active_slave);
- if (!slave) {
+ real_dev = bond_ipsec_dev(xs);
+ if (!real_dev || netif_is_bond_master(real_dev)) {
rcu_read_unlock();
- return;
+ return false;
}
- spin_lock_bh(&bond->ipsec_lock);
- list_for_each_entry(ipsec, &bond->ipsec_list, list) {
- if (!ipsec->xs->xso.real_dev)
- continue;
-
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_delete ||
- netif_is_bond_master(slave->dev)) {
- slave_warn(bond_dev, slave->dev,
- "%s: no slave xdo_dev_state_delete\n",
- __func__);
- } else {
- slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs);
- }
- ipsec->xs->xso.real_dev = NULL;
- }
- spin_unlock_bh(&bond->ipsec_lock);
rcu_read_unlock();
+ return true;
}
/**
- * bond_ipsec_offload_ok - can this packet use the xfrm hw offload
- * @skb: current data packet
+ * bond_advance_esn_state - ESN support for IPSec HW offload
* @xs: pointer to transformer state struct
**/
-static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+static void bond_advance_esn_state(struct xfrm_state *xs)
{
- struct net_device *bond_dev = xs->xso.dev;
struct net_device *real_dev;
- struct slave *curr_active;
- struct bonding *bond;
- int err;
- bond = netdev_priv(bond_dev);
rcu_read_lock();
- curr_active = rcu_dereference(bond->curr_active_slave);
- real_dev = curr_active->dev;
+ real_dev = bond_ipsec_dev(xs);
+ if (!real_dev)
+ goto out;
- if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
- err = false;
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
+ pr_warn_ratelimited("%s: %s doesn't support xdo_dev_state_advance_esn\n", __func__, real_dev->name);
goto out;
}
- if (!xs->xso.real_dev) {
- err = false;
+ real_dev->xfrmdev_ops->xdo_dev_state_advance_esn(xs);
+out:
+ rcu_read_unlock();
+}
+
+/**
+ * bond_xfrm_update_stats - Update xfrm state
+ * @xs: pointer to transformer state struct
+ **/
+static void bond_xfrm_update_stats(struct xfrm_state *xs)
+{
+ struct net_device *real_dev;
+
+ rcu_read_lock();
+ real_dev = bond_ipsec_dev(xs);
+ if (!real_dev)
goto out;
- }
if (!real_dev->xfrmdev_ops ||
- !real_dev->xfrmdev_ops->xdo_dev_offload_ok ||
- netif_is_bond_master(real_dev)) {
- err = false;
+ !real_dev->xfrmdev_ops->xdo_dev_state_update_stats) {
+ pr_warn_ratelimited("%s: %s doesn't support xdo_dev_state_update_stats\n", __func__, real_dev->name);
goto out;
}
- err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs);
+ real_dev->xfrmdev_ops->xdo_dev_state_update_stats(xs);
out:
rcu_read_unlock();
- return err;
}
static const struct xfrmdev_ops bond_xfrmdev_ops = {
.xdo_dev_state_add = bond_ipsec_add_sa,
.xdo_dev_state_delete = bond_ipsec_del_sa,
+ .xdo_dev_state_free = bond_ipsec_free_sa,
.xdo_dev_offload_ok = bond_ipsec_offload_ok,
+ .xdo_dev_state_advance_esn = bond_advance_esn_state,
+ .xdo_dev_state_update_stats = bond_xfrm_update_stats,
};
#endif /* CONFIG_XFRM_OFFLOAD */
@@ -722,73 +829,6 @@ const char *bond_slave_link_status(s8 link)
}
}
-/* if <dev> supports MII link status reporting, check its link status.
- *
- * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
- * depending upon the setting of the use_carrier parameter.
- *
- * Return either BMSR_LSTATUS, meaning that the link is up (or we
- * can't tell and just pretend it is), or 0, meaning that the link is
- * down.
- *
- * If reporting is non-zero, instead of faking link up, return -1 if
- * both ETHTOOL and MII ioctls fail (meaning the device does not
- * support them). If use_carrier is set, return whatever it says.
- * It'd be nice if there was a good way to tell if a driver supports
- * netif_carrier, but there really isn't.
- */
-static int bond_check_dev_link(struct bonding *bond,
- struct net_device *slave_dev, int reporting)
-{
- const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
- int (*ioctl)(struct net_device *, struct ifreq *, int);
- struct ifreq ifr;
- struct mii_ioctl_data *mii;
-
- if (!reporting && !netif_running(slave_dev))
- return 0;
-
- if (bond->params.use_carrier)
- return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
-
- /* Try to get link status using Ethtool first. */
- if (slave_dev->ethtool_ops->get_link)
- return slave_dev->ethtool_ops->get_link(slave_dev) ?
- BMSR_LSTATUS : 0;
-
- /* Ethtool can't be used, fallback to MII ioctls. */
- ioctl = slave_ops->ndo_eth_ioctl;
- if (ioctl) {
- /* TODO: set pointer to correct ioctl on a per team member
- * bases to make this more efficient. that is, once
- * we determine the correct ioctl, we will always
- * call it and not the others for that team
- * member.
- */
-
- /* We cannot assume that SIOCGMIIPHY will also read a
- * register; not all network drivers (e.g., e100)
- * support that.
- */
-
- /* Yes, the mii is overlaid on the ifreq.ifr_ifru */
- strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
- mii = if_mii(&ifr);
- if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
- mii->reg_num = MII_BMSR;
- if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0)
- return mii->val_out & BMSR_LSTATUS;
- }
- }
-
- /* If reporting, report that either there's no ndo_eth_ioctl,
- * or both SIOCGMIIREG and get_link failed (meaning that we
- * cannot report link status). If not reporting, pretend
- * we're ok.
- */
- return reporting ? -1 : BMSR_LSTATUS;
-}
-
/*----------------------------- Multicast list ------------------------------*/
/* Push the promiscuity flag down to appropriate slaves */
@@ -891,6 +931,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
if (bond->dev->flags & IFF_UP)
bond_hw_addr_flush(bond->dev, old_active->dev);
+
+ bond_slave_ns_maddrs_add(bond, old_active);
}
if (new_active) {
@@ -907,6 +949,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
dev_mc_sync(new_active->dev, bond->dev);
netif_addr_unlock_bh(bond->dev);
}
+
+ bond_slave_ns_maddrs_del(bond, new_active);
}
}
@@ -924,7 +968,7 @@ static int bond_set_dev_addr(struct net_device *bond_dev,
slave_dbg(bond_dev, slave_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n",
bond_dev, slave_dev, slave_dev->addr_len);
- err = dev_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL);
+ err = netif_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL);
if (err)
return err;
@@ -998,8 +1042,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,
ss.ss_family = bond->dev->type;
}
- rv = dev_set_mac_address(new_active->dev,
- (struct sockaddr *)&ss, NULL);
+ rv = dev_set_mac_address(new_active->dev, &ss, NULL);
if (rv) {
slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n",
-rv);
@@ -1013,8 +1056,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,
new_active->dev->addr_len);
ss.ss_family = old_active->dev->type;
- rv = dev_set_mac_address(old_active->dev,
- (struct sockaddr *)&ss, NULL);
+ rv = dev_set_mac_address(old_active->dev, &ss, NULL);
if (rv)
slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n",
-rv);
@@ -1120,23 +1162,35 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
return bestslave;
}
+/* must be called in RCU critical section or with RTNL held */
static bool bond_should_notify_peers(struct bonding *bond)
{
- struct slave *slave;
-
- rcu_read_lock();
- slave = rcu_dereference(bond->curr_active_slave);
- rcu_read_unlock();
+ struct bond_up_slave *usable;
+ struct slave *slave = NULL;
- if (!slave || !bond->send_peer_notif ||
+ if (!bond->send_peer_notif ||
bond->send_peer_notif %
max(1, bond->params.peer_notif_delay) != 0 ||
- !netif_carrier_ok(bond->dev) ||
- test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
+ !netif_carrier_ok(bond->dev))
return false;
+ /* The send_peer_notif is set by active-backup or 8023ad
+ * mode, and cleared in bond_close() when changing mode.
+ * It is safe to only check bond mode here.
+ */
+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+ usable = rcu_dereference_rtnl(bond->usable_slaves);
+ if (!usable || !READ_ONCE(usable->count))
+ return false;
+ } else {
+ slave = rcu_dereference_rtnl(bond->curr_active_slave);
+ if (!slave || test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &slave->dev->state))
+ return false;
+ }
+
netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n",
- slave ? slave->dev->name : "NULL");
+ slave ? slave->dev->name : "all");
return true;
}
@@ -1362,7 +1416,7 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev)
slave_disable_netpoll(slave);
}
-static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int bond_netpoll_setup(struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
struct list_head *iter;
@@ -1402,9 +1456,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
struct slave *slave;
mask = features;
-
- features &= ~NETIF_F_ONE_FOR_ALL;
- features |= NETIF_F_ALL_FOR_ALL;
+ features = netdev_base_features(features);
bond_for_each_slave(bond, slave, iter) {
features = netdev_increment_features(features,
@@ -1416,89 +1468,13 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
return features;
}
-#define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
- NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \
- NETIF_F_HIGHDMA | NETIF_F_LRO)
-
-#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
- NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE)
-
-#define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
- NETIF_F_GSO_SOFTWARE)
-
-
-static void bond_compute_features(struct bonding *bond)
-{
- unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
- IFF_XMIT_DST_RELEASE_PERM;
- netdev_features_t vlan_features = BOND_VLAN_FEATURES;
- netdev_features_t enc_features = BOND_ENC_FEATURES;
-#ifdef CONFIG_XFRM_OFFLOAD
- netdev_features_t xfrm_features = BOND_XFRM_FEATURES;
-#endif /* CONFIG_XFRM_OFFLOAD */
- netdev_features_t mpls_features = BOND_MPLS_FEATURES;
- struct net_device *bond_dev = bond->dev;
- struct list_head *iter;
- struct slave *slave;
- unsigned short max_hard_header_len = ETH_HLEN;
- unsigned int tso_max_size = TSO_MAX_SIZE;
- u16 tso_max_segs = TSO_MAX_SEGS;
-
- if (!bond_has_slaves(bond))
- goto done;
- vlan_features &= NETIF_F_ALL_FOR_ALL;
- mpls_features &= NETIF_F_ALL_FOR_ALL;
-
- bond_for_each_slave(bond, slave, iter) {
- vlan_features = netdev_increment_features(vlan_features,
- slave->dev->vlan_features, BOND_VLAN_FEATURES);
-
- enc_features = netdev_increment_features(enc_features,
- slave->dev->hw_enc_features,
- BOND_ENC_FEATURES);
-
-#ifdef CONFIG_XFRM_OFFLOAD
- xfrm_features = netdev_increment_features(xfrm_features,
- slave->dev->hw_enc_features,
- BOND_XFRM_FEATURES);
-#endif /* CONFIG_XFRM_OFFLOAD */
-
- mpls_features = netdev_increment_features(mpls_features,
- slave->dev->mpls_features,
- BOND_MPLS_FEATURES);
-
- dst_release_flag &= slave->dev->priv_flags;
- if (slave->dev->hard_header_len > max_hard_header_len)
- max_hard_header_len = slave->dev->hard_header_len;
-
- tso_max_size = min(tso_max_size, slave->dev->tso_max_size);
- tso_max_segs = min(tso_max_segs, slave->dev->tso_max_segs);
- }
- bond_dev->hard_header_len = max_hard_header_len;
-
-done:
- bond_dev->vlan_features = vlan_features;
- bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
- NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX;
-#ifdef CONFIG_XFRM_OFFLOAD
- bond_dev->hw_enc_features |= xfrm_features;
-#endif /* CONFIG_XFRM_OFFLOAD */
- bond_dev->mpls_features = mpls_features;
- netif_set_tso_max_segs(bond_dev, tso_max_segs);
- netif_set_tso_max_size(bond_dev, tso_max_size);
-
- bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
- if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
- dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
- bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
-
- netdev_change_features(bond_dev);
-}
-
static void bond_setup_by_slave(struct net_device *bond_dev,
struct net_device *slave_dev)
{
+ bool was_up = !!(bond_dev->flags & IFF_UP);
+
+ dev_close(bond_dev);
+
bond_dev->header_ops = slave_dev->header_ops;
bond_dev->type = slave_dev->type;
@@ -1513,6 +1489,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
}
+ if (was_up)
+ dev_open(bond_dev, NULL);
}
/* On bonding slaves other than the currently active slave, suppress
@@ -1804,7 +1782,7 @@ void bond_xdp_set_features(struct net_device *bond_dev)
ASSERT_RTNL();
- if (!bond_xdp_check(bond)) {
+ if (!bond_xdp_check(bond, BOND_MODE(bond)) || !bond_has_slaves(bond)) {
xdp_clear_features_flag(bond_dev);
return;
}
@@ -1812,6 +1790,8 @@ void bond_xdp_set_features(struct net_device *bond_dev)
bond_for_each_slave(bond, slave, iter)
val &= slave->dev->xdp_features;
+ val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY;
+
xdp_set_features_flag(bond_dev, val);
}
@@ -1823,7 +1803,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
struct slave *new_slave = NULL, *prev_slave;
struct sockaddr_storage ss;
- int link_reporting;
int res = 0, i;
if (slave_dev->flags & IFF_MASTER &&
@@ -1833,12 +1812,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
return -EPERM;
}
- if (!bond->params.use_carrier &&
- slave_dev->ethtool_ops->get_link == NULL &&
- slave_ops->ndo_eth_ioctl == NULL) {
- slave_warn(bond_dev, slave_dev, "no link monitoring support\n");
- }
-
/* already in-use? */
if (netdev_is_rx_handler_busy(slave_dev)) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack,
@@ -1987,15 +1960,27 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
* set the master's mac address to that of the first slave
*/
memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
- ss.ss_family = slave_dev->type;
- res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss,
- extack);
- if (res) {
- slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res);
- goto err_restore_mtu;
- }
+ } else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
+ BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
+ bond_has_slaves(bond) &&
+ memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) {
+ /* Set slave to random address to avoid duplicate mac
+ * address in later fail over.
+ */
+ eth_random_addr(ss.__data);
+ } else {
+ goto skip_mac_set;
}
+ ss.ss_family = slave_dev->type;
+ res = dev_set_mac_address(slave_dev, &ss, extack);
+ if (res) {
+ slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res);
+ goto err_restore_mtu;
+ }
+
+skip_mac_set:
+
/* set no_addrconf flag before open to prevent IPv6 addrconf */
slave_dev->priv_flags |= IFF_NO_ADDRCONF;
@@ -2041,29 +2026,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
new_slave->last_tx = new_slave->last_rx;
- if (bond->params.miimon && !bond->params.use_carrier) {
- link_reporting = bond_check_dev_link(bond, slave_dev, 1);
-
- if ((link_reporting == -1) && !bond->params.arp_interval) {
- /* miimon is set but a bonded network driver
- * does not support ETHTOOL/MII and
- * arp_interval is not set. Note: if
- * use_carrier is enabled, we will never go
- * here (because netif_carrier is always
- * supported); thus, we don't need to change
- * the messages for netif_carrier.
- */
- slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n");
- } else if (link_reporting == -1) {
- /* unable get link status using mii/ethtool */
- slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n");
- }
- }
-
/* check for initial state */
new_slave->link = BOND_LINK_NOCHANGE;
if (bond->params.miimon) {
- if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
+ if (netif_running(slave_dev) && netif_carrier_ok(slave_dev)) {
if (bond->params.updelay) {
bond_set_slave_link_state(new_slave,
BOND_LINK_BACK,
@@ -2216,19 +2182,25 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
}
bond->slave_cnt++;
- bond_compute_features(bond);
+ netdev_compute_master_upper_features(bond->dev, true);
bond_set_carrier(bond);
+ /* Needs to be called before bond_select_active_slave(), which will
+ * remove the maddrs if the slave is selected as active slave.
+ */
+ bond_slave_ns_maddrs_add(bond, new_slave);
+
if (bond_uses_primary(bond)) {
block_netpoll_tx();
bond_select_active_slave(bond);
unblock_netpoll_tx();
}
- if (bond_mode_can_use_xmit_hash(bond))
+ /* broadcast mode uses the all_slaves to loop through slaves. */
+ if (bond_mode_can_use_xmit_hash(bond) ||
+ BOND_MODE(bond) == BOND_MODE_BROADCAST)
bond_update_slave_arr(bond, NULL);
-
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
if (bond->xdp_prog) {
@@ -2252,7 +2224,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
goto err_sysfs_del;
}
- res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ res = dev_xdp_propagate(slave_dev, &xdp);
if (res < 0) {
/* ndo_bpf() sets extack error message */
slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
@@ -2312,7 +2284,7 @@ err_restore_mac:
bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr,
new_slave->dev->addr_len);
ss.ss_family = slave_dev->type;
- dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL);
+ dev_set_mac_address(slave_dev, &ss, NULL);
}
err_restore_mtu:
@@ -2388,7 +2360,7 @@ static int __bond_release_one(struct net_device *bond_dev,
.prog = NULL,
.extack = NULL,
};
- if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+ if (dev_xdp_propagate(slave_dev, &xdp))
slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
}
@@ -2402,7 +2374,8 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_upper_dev_unlink(bond, slave);
- if (bond_mode_can_use_xmit_hash(bond))
+ if (bond_mode_can_use_xmit_hash(bond) ||
+ BOND_MODE(bond) == BOND_MODE_BROADCAST)
bond_update_slave_arr(bond, slave);
slave_info(bond_dev, slave_dev, "Releasing %s interface\n",
@@ -2412,7 +2385,7 @@ static int __bond_release_one(struct net_device *bond_dev,
RCU_INIT_POINTER(bond->current_arp_slave, NULL);
- if (!all && (!bond->params.fail_over_mac ||
+ if (!all && (bond->params.fail_over_mac != BOND_FOM_ACTIVE ||
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) {
if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) &&
bond_has_slaves(bond))
@@ -2426,6 +2399,12 @@ static int __bond_release_one(struct net_device *bond_dev,
if (oldcurrent == slave)
bond_change_active_slave(bond, NULL);
+ /* Must be called after bond_change_active_slave () as the slave
+ * might change from an active slave to a backup slave. Then it is
+ * necessary to clear the maddrs on the backup slave.
+ */
+ bond_slave_ns_maddrs_del(bond, slave);
+
if (bond_is_lb(bond)) {
/* Must be called only after the slave has been
* detached from the list and the curr_active_slave
@@ -2458,7 +2437,7 @@ static int __bond_release_one(struct net_device *bond_dev,
call_netdevice_notifiers(NETDEV_RELEASE, bond->dev);
}
- bond_compute_features(bond);
+ netdev_compute_master_upper_features(bond->dev, true);
if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
(old_features & NETIF_F_VLAN_CHALLENGED))
slave_info(bond_dev, slave_dev, "last VLAN challenged slave left bond - VLAN blocking is removed\n");
@@ -2500,13 +2479,16 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_hw_addr_copy(ss.__data, slave->perm_hwaddr,
slave->dev->addr_len);
ss.ss_family = slave_dev->type;
- dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL);
+ dev_set_mac_address(slave_dev, &ss, NULL);
}
- if (unregister)
- __dev_set_mtu(slave_dev, slave->original_mtu);
- else
+ if (unregister) {
+ netdev_lock_ops(slave_dev);
+ __netif_set_mtu(slave_dev, slave->original_mtu);
+ netdev_unlock_ops(slave_dev);
+ } else {
dev_set_mtu(slave_dev, slave->original_mtu);
+ }
if (!netif_is_bond_master(slave_dev))
slave_dev->priv_flags &= ~IFF_BONDING;
@@ -2592,7 +2574,8 @@ static int bond_miimon_inspect(struct bonding *bond)
bond_for_each_slave_rcu(bond, slave, iter) {
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
- link_state = bond_check_dev_link(bond, slave->dev, 0);
+ link_state = netif_running(slave->dev) &&
+ netif_carrier_ok(slave->dev);
switch (slave->link) {
case BOND_LINK_UP:
@@ -2602,7 +2585,7 @@ static int bond_miimon_inspect(struct bonding *bond)
bond_propose_link_state(slave, BOND_LINK_FAIL);
commit++;
slave->delay = bond->params.downdelay;
- if (slave->delay) {
+ if (slave->delay && net_ratelimit()) {
slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
(BOND_MODE(bond) ==
BOND_MODE_ACTIVEBACKUP) ?
@@ -2616,9 +2599,10 @@ static int bond_miimon_inspect(struct bonding *bond)
/* recovered before downdelay expired */
bond_propose_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
- slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
- (bond->params.downdelay - slave->delay) *
- bond->params.miimon);
+ if (net_ratelimit())
+ slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
+ (bond->params.downdelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -2640,7 +2624,7 @@ static int bond_miimon_inspect(struct bonding *bond)
commit++;
slave->delay = bond->params.updelay;
- if (slave->delay) {
+ if (slave->delay && net_ratelimit()) {
slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
ignore_updelay ? 0 :
bond->params.updelay *
@@ -2650,9 +2634,10 @@ static int bond_miimon_inspect(struct bonding *bond)
case BOND_LINK_BACK:
if (!link_state) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
- slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
- (bond->params.updelay - slave->delay) *
- bond->params.miimon);
+ if (net_ratelimit())
+ slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
+ (bond->params.updelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -2799,7 +2784,7 @@ static void bond_mii_monitor(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
mii_work.work);
- bool should_notify_peers = false;
+ bool should_notify_peers;
bool commit;
unsigned long delay;
struct slave *slave;
@@ -2811,30 +2796,33 @@ static void bond_mii_monitor(struct work_struct *work)
goto re_arm;
rcu_read_lock();
+
should_notify_peers = bond_should_notify_peers(bond);
commit = !!bond_miimon_inspect(bond);
- if (bond->send_peer_notif) {
- rcu_read_unlock();
- if (rtnl_trylock()) {
- bond->send_peer_notif--;
- rtnl_unlock();
- }
- } else {
- rcu_read_unlock();
- }
- if (commit) {
+ rcu_read_unlock();
+
+ if (commit || bond->send_peer_notif) {
/* Race avoidance with bond_close cancel of workqueue */
if (!rtnl_trylock()) {
delay = 1;
- should_notify_peers = false;
goto re_arm;
}
- bond_for_each_slave(bond, slave, iter) {
- bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER);
+ if (commit) {
+ bond_for_each_slave(bond, slave, iter) {
+ bond_commit_link_state(slave,
+ BOND_SLAVE_NOTIFY_LATER);
+ }
+ bond_miimon_commit(bond);
+ }
+
+ if (bond->send_peer_notif) {
+ bond->send_peer_notif--;
+ if (should_notify_peers)
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+ bond->dev);
}
- bond_miimon_commit(bond);
rtnl_unlock(); /* might sleep, hold no other locks */
}
@@ -2842,13 +2830,6 @@ static void bond_mii_monitor(struct work_struct *work)
re_arm:
if (bond->params.miimon)
queue_delayed_work(bond->wq, &bond->mii_work, delay);
-
- if (should_notify_peers) {
- if (!rtnl_trylock())
- return;
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
- rtnl_unlock();
- }
}
static int bond_upper_dev_walk(struct net_device *upper,
@@ -3003,8 +2984,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
tags = NULL;
/* Find out through which dev should the packet go */
- rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
- RTO_ONLINK, 0);
+ rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 0, 0,
+ RT_SCOPE_LINK);
if (IS_ERR(rt)) {
/* there's no route to target - try to send arp
* probe to generate any traffic (arp_validate=0)
@@ -3186,7 +3167,6 @@ static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
/* Find out through which dev should the packet go */
memset(&fl6, 0, sizeof(struct flowi6));
fl6.daddr = targets[i];
- fl6.flowi6_oif = bond->dev->ifindex;
dst = ip6_route_output(dev_net(bond->dev), NULL, &fl6);
if (dst->error) {
@@ -3957,7 +3937,7 @@ static int bond_slave_netdev_event(unsigned long event,
case NETDEV_FEAT_CHANGE:
if (!bond->notifier_ctx) {
bond->notifier_ctx = true;
- bond_compute_features(bond);
+ netdev_compute_master_upper_features(bond->dev, true);
bond->notifier_ctx = false;
}
break;
@@ -4022,7 +4002,7 @@ static inline const void *bond_pull_data(struct sk_buff *skb,
if (likely(n <= hlen))
return data;
else if (skb && likely(pskb_may_pull(skb, n)))
- return skb->head;
+ return skb->data;
return NULL;
}
@@ -4070,7 +4050,7 @@ static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *
}
if (l34 && *ip_proto >= 0)
- fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
+ fk->ports.ports = skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
return true;
}
@@ -4242,7 +4222,7 @@ void bond_work_init_all(struct bonding *bond)
INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
}
-static void bond_work_cancel_all(struct bonding *bond)
+void bond_work_cancel_all(struct bonding *bond)
{
cancel_delayed_work_sync(&bond->mii_work);
cancel_delayed_work_sync(&bond->arp_work);
@@ -4304,6 +4284,9 @@ static int bond_open(struct net_device *bond_dev)
bond_for_each_slave(bond, slave, iter)
dev_mc_add(slave->dev, lacpdu_mcast_addr);
+
+ if (bond->params.broadcast_neighbor)
+ static_branch_inc(&bond_bcast_neigh_enabled);
}
if (bond_mode_can_use_xmit_hash(bond))
@@ -4323,6 +4306,10 @@ static int bond_close(struct net_device *bond_dev)
bond_alb_deinitialize(bond);
bond->recv_probe = NULL;
+ if (BOND_MODE(bond) == BOND_MODE_8023AD &&
+ bond->params.broadcast_neighbor)
+ static_branch_dec(&bond_bcast_neigh_enabled);
+
if (bond_uses_primary(bond)) {
rcu_read_lock();
slave = rcu_dereference(bond->curr_active_slave);
@@ -4446,11 +4433,6 @@ static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cm
{
struct bonding *bond = netdev_priv(bond_dev);
struct mii_ioctl_data *mii = NULL;
- const struct net_device_ops *ops;
- struct net_device *real_dev;
- struct hwtstamp_config cfg;
- struct ifreq ifrr;
- int res = 0;
netdev_dbg(bond_dev, "bond_eth_ioctl: cmd=%d\n", cmd);
@@ -4477,44 +4459,11 @@ static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cm
}
break;
- case SIOCSHWTSTAMP:
- if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- if (!(cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX))
- return -EOPNOTSUPP;
-
- fallthrough;
- case SIOCGHWTSTAMP:
- real_dev = bond_option_active_slave_get_rcu(bond);
- if (!real_dev)
- return -EOPNOTSUPP;
-
- strscpy_pad(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
- ifrr.ifr_ifru = ifr->ifr_ifru;
-
- ops = real_dev->netdev_ops;
- if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) {
- res = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
- if (res)
- return res;
-
- ifr->ifr_ifru = ifrr.ifr_ifru;
- if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- /* Set the BOND_PHC_INDEX flag to notify user space */
- cfg.flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX;
-
- return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ?
- -EFAULT : 0;
- }
- fallthrough;
default:
- res = -EOPNOTSUPP;
+ return -EOPNOTSUPP;
}
- return res;
+ return 0;
}
static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
@@ -4737,7 +4686,7 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
}
}
- bond_dev->mtu = new_mtu;
+ WRITE_ONCE(bond_dev->mtu, new_mtu);
return 0;
@@ -4820,8 +4769,7 @@ unwind:
if (rollback_slave == slave)
break;
- tmp_res = dev_set_mac_address(rollback_slave->dev,
- (struct sockaddr *)&tmp_ss, NULL);
+ tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_ss, NULL);
if (tmp_res) {
slave_dbg(bond_dev, rollback_slave->dev, "%s: unwind err %d\n",
__func__, tmp_res);
@@ -5083,19 +5031,7 @@ static void bond_set_slave_arr(struct bonding *bond,
static void bond_reset_slave_arr(struct bonding *bond)
{
- struct bond_up_slave *usable, *all;
-
- usable = rtnl_dereference(bond->usable_slaves);
- if (usable) {
- RCU_INIT_POINTER(bond->usable_slaves, NULL);
- kfree_rcu(usable, rcu);
- }
-
- all = rtnl_dereference(bond->all_slaves);
- if (all) {
- RCU_INIT_POINTER(bond->all_slaves, NULL);
- kfree_rcu(all, rcu);
- }
+ bond_set_slave_arr(bond, NULL, NULL);
}
/* Build the usable slaves array in control path for modes that use xmit-hash
@@ -5209,6 +5145,37 @@ static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
return slaves->arr[hash % count];
}
+static bool bond_should_broadcast_neighbor(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct {
+ struct ipv6hdr ip6;
+ struct icmp6hdr icmp6;
+ } *combined, _combined;
+
+ if (!static_branch_unlikely(&bond_bcast_neigh_enabled))
+ return false;
+
+ if (!bond->params.broadcast_neighbor)
+ return false;
+
+ if (skb->protocol == htons(ETH_P_ARP))
+ return true;
+
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ combined = skb_header_pointer(skb, skb_mac_header_len(skb),
+ sizeof(_combined),
+ &_combined);
+ if (combined && combined->ip6.nexthdr == NEXTHDR_ICMP &&
+ (combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+ combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT))
+ return true;
+ }
+
+ return false;
+}
+
/* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out.
@@ -5228,17 +5195,27 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb,
return bond_tx_drop(dev, skb);
}
-/* in broadcast mode, we send everything to all usable interfaces. */
+/* in broadcast mode, we send everything to all or usable slave interfaces.
+ * under rcu_read_lock when this function is called.
+ */
static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
- struct net_device *bond_dev)
+ struct net_device *bond_dev,
+ bool all_slaves)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct slave *slave = NULL;
- struct list_head *iter;
+ struct bond_up_slave *slaves;
bool xmit_suc = false;
bool skb_used = false;
+ int slaves_count, i;
- bond_for_each_slave_rcu(bond, slave, iter) {
+ if (all_slaves)
+ slaves = rcu_dereference(bond->all_slaves);
+ else
+ slaves = rcu_dereference(bond->usable_slaves);
+
+ slaves_count = slaves ? READ_ONCE(slaves->count) : 0;
+ for (i = 0; i < slaves_count; i++) {
+ struct slave *slave = slaves->arr[i];
struct sk_buff *skb2;
if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
@@ -5284,7 +5261,7 @@ static inline int bond_slave_override(struct bonding *bond,
/* Find out if any slaves have the same mapping as this skb. */
bond_for_each_slave_rcu(bond, slave, iter) {
- if (slave->queue_id == skb_get_queue_mapping(skb)) {
+ if (READ_ONCE(slave->queue_id) == skb_get_queue_mapping(skb)) {
if (bond_slave_is_up(slave) &&
slave->link == BOND_LINK_UP) {
bond_dev_queue_xmit(bond, skb, slave->dev);
@@ -5476,10 +5453,13 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
case BOND_MODE_ACTIVEBACKUP:
return bond_xmit_activebackup(skb, dev);
case BOND_MODE_8023AD:
+ if (bond_should_broadcast_neighbor(skb, dev))
+ return bond_xmit_broadcast(skb, dev, false);
+ fallthrough;
case BOND_MODE_XOR:
return bond_3ad_xor_xmit(skb, dev);
case BOND_MODE_BROADCAST:
- return bond_xmit_broadcast(skb, dev);
+ return bond_xmit_broadcast(skb, dev, true);
case BOND_MODE_ALB:
return bond_alb_xmit(skb, dev);
case BOND_MODE_TLB:
@@ -5536,9 +5516,9 @@ bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
break;
default:
- /* Should never happen. Mode guarded by bond_xdp_check() */
- netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
- WARN_ON_ONCE(1);
+ if (net_ratelimit())
+ netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n",
+ BOND_MODE(bond));
return NULL;
}
@@ -5602,8 +5582,11 @@ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
ASSERT_RTNL();
- if (!bond_xdp_check(bond))
+ if (!bond_xdp_check(bond, BOND_MODE(bond))) {
+ BOND_NL_ERR(dev, extack,
+ "No native XDP support for the current bonding mode");
return -EOPNOTSUPP;
+ }
old_prog = bond->xdp_prog;
bond->xdp_prog = prog;
@@ -5626,7 +5609,7 @@ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
goto err;
}
- err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ err = dev_xdp_propagate(slave_dev, &xdp);
if (err < 0) {
/* ndo_bpf() sets extack error message */
slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
@@ -5658,7 +5641,7 @@ err:
if (slave == rollback_slave)
break;
- err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ err_unwind = dev_xdp_propagate(slave_dev, &xdp);
if (err_unwind < 0)
slave_err(dev, slave_dev,
"Error %d when unwinding XDP program change\n", err_unwind);
@@ -5688,6 +5671,67 @@ static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
return speed;
}
+/* Set the BOND_PHC_INDEX flag to notify user space */
+static int bond_set_phc_index_flag(struct kernel_hwtstamp_config *kernel_cfg)
+{
+ struct ifreq *ifr = kernel_cfg->ifr;
+ struct hwtstamp_config cfg;
+
+ if (kernel_cfg->copied_to_user) {
+ /* Lower device has a legacy implementation */
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ cfg.flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX;
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+ } else {
+ kernel_cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX;
+ }
+
+ return 0;
+}
+
+static int bond_hwtstamp_get(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct net_device *real_dev;
+ int err;
+
+ real_dev = bond_option_active_slave_get_rcu(bond);
+ if (!real_dev)
+ return -EOPNOTSUPP;
+
+ err = generic_hwtstamp_get_lower(real_dev, cfg);
+ if (err)
+ return err;
+
+ return bond_set_phc_index_flag(cfg);
+}
+
+static int bond_hwtstamp_set(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct net_device *real_dev;
+ int err;
+
+ if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX))
+ return -EOPNOTSUPP;
+
+ real_dev = bond_option_active_slave_get_rcu(bond);
+ if (!real_dev)
+ return -EOPNOTSUPP;
+
+ err = generic_hwtstamp_set_lower(real_dev, cfg, extack);
+ if (err)
+ return err;
+
+ return bond_set_phc_index_flag(cfg);
+}
+
static int bond_ethtool_get_link_ksettings(struct net_device *bond_dev,
struct ethtool_link_ksettings *cmd)
{
@@ -5706,6 +5750,7 @@ static int bond_ethtool_get_link_ksettings(struct net_device *bond_dev,
*/
bond_for_each_slave(bond, slave, iter) {
if (bond_slave_can_tx(slave)) {
+ bond_update_speed_duplex(slave);
if (slave->speed != SPEED_UNKNOWN) {
if (BOND_MODE(bond) == BOND_MODE_BROADCAST)
speed = bond_mode_bcast_speed(slave,
@@ -5732,14 +5777,12 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
}
static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
- struct ethtool_ts_info *info)
+ struct kernel_ethtool_ts_info *info)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct ethtool_ts_info ts_info;
- const struct ethtool_ops *ops;
+ struct kernel_ethtool_ts_info ts_info;
struct net_device *real_dev;
bool sw_tx_support = false;
- struct phy_device *phydev;
struct list_head *iter;
struct slave *slave;
int ret = 0;
@@ -5750,29 +5793,12 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
rcu_read_unlock();
if (real_dev) {
- ops = real_dev->ethtool_ops;
- phydev = real_dev->phydev;
-
- if (phy_has_tsinfo(phydev)) {
- ret = phy_ts_info(phydev, info);
- goto out;
- } else if (ops->get_ts_info) {
- ret = ops->get_ts_info(real_dev, info);
- goto out;
- }
+ ret = ethtool_get_ts_info_by_layer(real_dev, info);
} else {
/* Check if all slaves support software tx timestamping */
rcu_read_lock();
bond_for_each_slave_rcu(bond, slave, iter) {
- ret = -1;
- ops = slave->dev->ethtool_ops;
- phydev = slave->dev->phydev;
-
- if (phy_has_tsinfo(phydev))
- ret = phy_ts_info(phydev, &ts_info);
- else if (ops->get_ts_info)
- ret = ops->get_ts_info(slave->dev, &ts_info);
-
+ ret = ethtool_get_ts_info_by_layer(slave->dev, &ts_info);
if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_TX_SOFTWARE)) {
sw_tx_support = true;
continue;
@@ -5784,15 +5810,9 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
rcu_read_unlock();
}
- ret = 0;
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
if (sw_tx_support)
info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE;
- info->phc_index = -1;
-
-out:
dev_put(real_dev);
return ret;
}
@@ -5836,6 +5856,8 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_bpf = bond_xdp,
.ndo_xdp_xmit = bond_xdp_xmit,
.ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
+ .ndo_hwtstamp_get = bond_hwtstamp_get,
+ .ndo_hwtstamp_set = bond_hwtstamp_set,
};
static const struct device_type bond_type = {
@@ -5849,8 +5871,7 @@ static void bond_destructor(struct net_device *bond_dev)
if (bond->wq)
destroy_workqueue(bond->wq);
- if (bond->rr_tx_counter)
- free_percpu(bond->rr_tx_counter);
+ free_percpu(bond->rr_tx_counter);
}
void bond_setup(struct net_device *bond_dev)
@@ -5883,11 +5904,14 @@ void bond_setup(struct net_device *bond_dev)
/* set up xfrm device ops (only supported in active-backup right now) */
bond_dev->xfrmdev_ops = &bond_xfrmdev_ops;
INIT_LIST_HEAD(&bond->ipsec_list);
- spin_lock_init(&bond->ipsec_lock);
+ mutex_init(&bond->ipsec_lock);
#endif /* CONFIG_XFRM_OFFLOAD */
/* don't acquire bond device's netif_tx_lock when transmitting */
- bond_dev->features |= NETIF_F_LLTX;
+ bond_dev->lltx = true;
+
+ /* Don't allow bond devices to change network namespaces. */
+ bond_dev->netns_immutable = true;
/* By default, we declare the bond to be fully
* VLAN hardware accelerated capable. Special
@@ -5896,25 +5920,22 @@ void bond_setup(struct net_device *bond_dev)
* capable
*/
- /* Don't allow bond devices to change network namespaces. */
- bond_dev->features |= NETIF_F_NETNS_LOCAL;
-
- bond_dev->hw_features = BOND_VLAN_FEATURES |
+ bond_dev->hw_features = MASTER_UPPER_DEV_VLAN_FEATURES |
NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_HW_VLAN_CTAG_FILTER;
+ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_FILTER;
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
bond_dev->features |= bond_dev->hw_features;
bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ bond_dev->features |= NETIF_F_GSO_PARTIAL;
#ifdef CONFIG_XFRM_OFFLOAD
bond_dev->hw_features |= BOND_XFRM_FEATURES;
/* Only enable XFRM features if this is an active-backup config */
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
bond_dev->features |= BOND_XFRM_FEATURES;
#endif /* CONFIG_XFRM_OFFLOAD */
-
- if (bond_xdp_check(bond))
- bond_dev->xdp_features = NETDEV_XDP_ACT_MASK;
}
/* Destroy a bonding device.
@@ -5923,7 +5944,6 @@ void bond_setup(struct net_device *bond_dev)
static void bond_uninit(struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct bond_up_slave *usable, *all;
struct list_head *iter;
struct slave *slave;
@@ -5934,26 +5954,20 @@ static void bond_uninit(struct net_device *bond_dev)
__bond_release_one(bond_dev, slave->dev, true, true);
netdev_info(bond_dev, "Released all slaves\n");
- usable = rtnl_dereference(bond->usable_slaves);
- if (usable) {
- RCU_INIT_POINTER(bond->usable_slaves, NULL);
- kfree_rcu(usable, rcu);
- }
+#ifdef CONFIG_XFRM_OFFLOAD
+ mutex_destroy(&bond->ipsec_lock);
+#endif /* CONFIG_XFRM_OFFLOAD */
- all = rtnl_dereference(bond->all_slaves);
- if (all) {
- RCU_INIT_POINTER(bond->all_slaves, NULL);
- kfree_rcu(all, rcu);
- }
+ bond_set_slave_arr(bond, NULL, NULL);
- list_del(&bond->bond_list);
+ list_del_rcu(&bond->bond_list);
bond_debug_unregister(bond);
}
/*------------------------- Module initialization ---------------------------*/
-static int bond_check_params(struct bond_params *params)
+static int __init bond_check_params(struct bond_params *params)
{
int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
struct bond_opt_value newval;
@@ -6054,10 +6068,10 @@ static int bond_check_params(struct bond_params *params)
downdelay = 0;
}
- if ((use_carrier != 0) && (use_carrier != 1)) {
- pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n",
- use_carrier);
- use_carrier = 1;
+ if (use_carrier != 1) {
+ pr_err("Error: invalid use_carrier parameter (%d)\n",
+ use_carrier);
+ return -EINVAL;
}
if (num_peer_notif < 0 || num_peer_notif > 255) {
@@ -6304,7 +6318,6 @@ static int bond_check_params(struct bond_params *params)
params->updelay = updelay;
params->downdelay = downdelay;
params->peer_notif_delay = 0;
- params->use_carrier = use_carrier;
params->lacp_active = 1;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
@@ -6320,6 +6333,8 @@ static int bond_check_params(struct bond_params *params)
params->ad_actor_sys_prio = ad_actor_sys_prio;
eth_zero_addr(params->ad_actor_system);
params->ad_user_port_key = ad_user_port_key;
+ params->coupled_control = 1;
+ params->broadcast_neighbor = 0;
if (packets_per_slave > 0) {
params->reciprocal_packets_per_slave =
reciprocal_value(packets_per_slave);
@@ -6350,7 +6365,8 @@ static int bond_init(struct net_device *bond_dev)
netdev_dbg(bond_dev, "Begin bond_init\n");
- bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM);
+ bond->wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM,
+ bond_dev->name);
if (!bond->wq)
return -ENOMEM;
@@ -6359,7 +6375,7 @@ static int bond_init(struct net_device *bond_dev)
spin_lock_init(&bond->stats_lock);
netdev_lockdep_set_classes(bond_dev);
- list_add_tail(&bond->bond_list, &bn->dev_list);
+ list_add_tail_rcu(&bond->bond_list, &bn->dev_list);
bond_prepare_sysfs_group(bond);
@@ -6429,28 +6445,36 @@ static int __net_init bond_net_init(struct net *net)
return 0;
}
-static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
+ * race condition in bond unloading") we need to remove sysfs files
+ * before we remove our devices (done later in bond_net_exit_rtnl())
+ */
+static void __net_exit bond_net_pre_exit(struct net *net)
{
- struct bond_net *bn;
- struct net *net;
- LIST_HEAD(list);
+ struct bond_net *bn = net_generic(net, bond_net_id);
- list_for_each_entry(net, net_list, exit_list) {
- bn = net_generic(net, bond_net_id);
- bond_destroy_sysfs(bn);
- }
+ bond_destroy_sysfs(bn);
+}
+
+static void __net_exit bond_net_exit_rtnl(struct net *net,
+ struct list_head *dev_kill_list)
+{
+ struct bond_net *bn = net_generic(net, bond_net_id);
+ struct bonding *bond, *tmp_bond;
/* Kill off any bonds created after unregistering bond rtnl ops */
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
- struct bonding *bond, *tmp_bond;
+ list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
+ unregister_netdevice_queue(bond->dev, dev_kill_list);
+}
- bn = net_generic(net, bond_net_id);
- list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
- unregister_netdevice_queue(bond->dev, &list);
- }
- unregister_netdevice_many(&list);
- rtnl_unlock();
+/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
+ * only after all bonds are gone") bond_destroy_proc_dir() is called
+ * after bond_net_exit_rtnl() has completed.
+ */
+static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+{
+ struct bond_net *bn;
+ struct net *net;
list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
@@ -6460,6 +6484,8 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list)
static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
+ .pre_exit = bond_net_pre_exit,
+ .exit_rtnl = bond_net_exit_rtnl,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
@@ -6474,16 +6500,16 @@ static int __init bonding_init(void)
if (res)
goto out;
+ bond_create_debugfs();
+
res = register_pernet_subsys(&bond_net_ops);
if (res)
- goto out;
+ goto err_net_ops;
res = bond_netlink_init();
if (res)
goto err_link;
- bond_create_debugfs();
-
for (i = 0; i < max_bonds; i++) {
res = bond_create(&init_net, NULL);
if (res)
@@ -6498,10 +6524,11 @@ static int __init bonding_init(void)
out:
return res;
err:
- bond_destroy_debugfs();
bond_netlink_fini();
err_link:
unregister_pernet_subsys(&bond_net_ops);
+err_net_ops:
+ bond_destroy_debugfs();
goto out;
}
@@ -6510,11 +6537,11 @@ static void __exit bonding_exit(void)
{
unregister_netdevice_notifier(&bond_netdev_notifier);
- bond_destroy_debugfs();
-
bond_netlink_fini();
unregister_pernet_subsys(&bond_net_ops);
+ bond_destroy_debugfs();
+
#ifdef CONFIG_NET_POLL_CONTROLLER
/* Make sure we don't have an imbalance on our netpoll blocking */
WARN_ON(atomic_read(&netpoll_block_tx));
@@ -6526,3 +6553,4 @@ module_exit(bonding_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION(DRV_DESCRIPTION);
MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
+MODULE_IMPORT_NS("NETDEV_INTERNAL");