summaryrefslogtreecommitdiff
path: root/drivers/net/bonding/bond_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r--drivers/net/bonding/bond_main.c515
1 files changed, 330 insertions, 185 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2c5ed0a7cb18..c4d53e8e7c15 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -90,6 +90,7 @@
#include <net/tls.h>
#endif
#include <net/ip6_route.h>
+#include <net/netdev_lock.h>
#include <net/xdp.h>
#include "bonding_priv.h"
@@ -322,9 +323,9 @@ static bool bond_sk_check(struct bonding *bond)
}
}
-static bool bond_xdp_check(struct bonding *bond)
+bool bond_xdp_check(struct bonding *bond, int mode)
{
- switch (BOND_MODE(bond)) {
+ switch (mode) {
case BOND_MODE_ROUNDROBIN:
case BOND_MODE_ACTIVEBACKUP:
return true;
@@ -419,14 +420,49 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
#ifdef CONFIG_XFRM_OFFLOAD
/**
+ * bond_ipsec_dev - Get active device for IPsec offload
+ * @xs: pointer to transformer state struct
+ *
+ * Context: caller must hold rcu_read_lock.
+ *
+ * Return: the device for ipsec offload, or NULL if not exist.
+ **/
+static struct net_device *bond_ipsec_dev(struct xfrm_state *xs)
+{
+ struct net_device *bond_dev = xs->xso.dev;
+ struct bonding *bond;
+ struct slave *slave;
+
+ bond = netdev_priv(bond_dev);
+ if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)
+ return NULL;
+
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (!slave)
+ return NULL;
+
+ if (!xs->xso.real_dev)
+ return NULL;
+
+ if (xs->xso.real_dev != slave->dev)
+ pr_warn_ratelimited("%s: (slave %s): not same with IPsec offload real dev %s\n",
+ bond_dev->name, slave->dev->name, xs->xso.real_dev->name);
+
+ return slave->dev;
+}
+
+/**
* bond_ipsec_add_sa - program device with a security association
+ * @bond_dev: pointer to the bond net device
* @xs: pointer to transformer state struct
* @extack: extack point to fill failure reason
**/
-static int bond_ipsec_add_sa(struct xfrm_state *xs,
+static int bond_ipsec_add_sa(struct net_device *bond_dev,
+ struct xfrm_state *xs,
struct netlink_ext_ack *extack)
{
- struct net_device *bond_dev = xs->xso.dev;
+ struct net_device *real_dev;
+ netdevice_tracker tracker;
struct bond_ipsec *ipsec;
struct bonding *bond;
struct slave *slave;
@@ -438,112 +474,185 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs,
rcu_read_lock();
bond = netdev_priv(bond_dev);
slave = rcu_dereference(bond->curr_active_slave);
- if (!slave) {
- rcu_read_unlock();
- return -ENODEV;
+ real_dev = slave ? slave->dev : NULL;
+ netdev_hold(real_dev, &tracker, GFP_ATOMIC);
+ rcu_read_unlock();
+ if (!real_dev) {
+ err = -ENODEV;
+ goto out;
}
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_add ||
- netif_is_bond_master(slave->dev)) {
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_add ||
+ netif_is_bond_master(real_dev)) {
NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload");
- rcu_read_unlock();
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
- ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC);
+ ipsec = kmalloc(sizeof(*ipsec), GFP_KERNEL);
if (!ipsec) {
- rcu_read_unlock();
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
- xs->xso.real_dev = slave->dev;
- err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs, extack);
+ err = real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, xs, extack);
if (!err) {
+ xs->xso.real_dev = real_dev;
ipsec->xs = xs;
INIT_LIST_HEAD(&ipsec->list);
- spin_lock_bh(&bond->ipsec_lock);
+ mutex_lock(&bond->ipsec_lock);
list_add(&ipsec->list, &bond->ipsec_list);
- spin_unlock_bh(&bond->ipsec_lock);
+ mutex_unlock(&bond->ipsec_lock);
} else {
kfree(ipsec);
}
- rcu_read_unlock();
+out:
+ netdev_put(real_dev, &tracker);
return err;
}
static void bond_ipsec_add_sa_all(struct bonding *bond)
{
struct net_device *bond_dev = bond->dev;
+ struct net_device *real_dev;
struct bond_ipsec *ipsec;
struct slave *slave;
- rcu_read_lock();
- slave = rcu_dereference(bond->curr_active_slave);
- if (!slave)
- goto out;
+ slave = rtnl_dereference(bond->curr_active_slave);
+ real_dev = slave ? slave->dev : NULL;
+ if (!real_dev)
+ return;
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_add ||
- netif_is_bond_master(slave->dev)) {
- spin_lock_bh(&bond->ipsec_lock);
+ mutex_lock(&bond->ipsec_lock);
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_add ||
+ netif_is_bond_master(real_dev)) {
if (!list_empty(&bond->ipsec_list))
- slave_warn(bond_dev, slave->dev,
+ slave_warn(bond_dev, real_dev,
"%s: no slave xdo_dev_state_add\n",
__func__);
- spin_unlock_bh(&bond->ipsec_lock);
goto out;
}
- spin_lock_bh(&bond->ipsec_lock);
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
- ipsec->xs->xso.real_dev = slave->dev;
- if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) {
- slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__);
- ipsec->xs->xso.real_dev = NULL;
+ /* If new state is added before ipsec_lock acquired */
+ if (ipsec->xs->xso.real_dev == real_dev)
+ continue;
+
+ if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev,
+ ipsec->xs, NULL)) {
+ slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
+ continue;
}
+
+ spin_lock_bh(&ipsec->xs->lock);
+ /* xs might have been killed by the user during the migration
+ * to the new dev, but bond_ipsec_del_sa() should have done
+ * nothing, as xso.real_dev is NULL.
+ * Delete it from the device we just added it to. The pending
+ * bond_ipsec_free_sa() call will do the rest of the cleanup.
+ */
+ if (ipsec->xs->km.state == XFRM_STATE_DEAD &&
+ real_dev->xfrmdev_ops->xdo_dev_state_delete)
+ real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
+ ipsec->xs);
+ ipsec->xs->xso.real_dev = real_dev;
+ spin_unlock_bh(&ipsec->xs->lock);
}
- spin_unlock_bh(&bond->ipsec_lock);
out:
- rcu_read_unlock();
+ mutex_unlock(&bond->ipsec_lock);
}
/**
* bond_ipsec_del_sa - clear out this specific SA
+ * @bond_dev: pointer to the bond net device
* @xs: pointer to transformer state struct
**/
-static void bond_ipsec_del_sa(struct xfrm_state *xs)
+static void bond_ipsec_del_sa(struct net_device *bond_dev,
+ struct xfrm_state *xs)
{
- struct net_device *bond_dev = xs->xso.dev;
+ struct net_device *real_dev;
+
+ if (!bond_dev || !xs->xso.real_dev)
+ return;
+
+ real_dev = xs->xso.real_dev;
+
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
+ netif_is_bond_master(real_dev)) {
+ slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__);
+ return;
+ }
+
+ real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs);
+}
+
+static void bond_ipsec_del_sa_all(struct bonding *bond)
+{
+ struct net_device *bond_dev = bond->dev;
+ struct net_device *real_dev;
struct bond_ipsec *ipsec;
- struct bonding *bond;
struct slave *slave;
+ slave = rtnl_dereference(bond->curr_active_slave);
+ real_dev = slave ? slave->dev : NULL;
+ if (!real_dev)
+ return;
+
+ mutex_lock(&bond->ipsec_lock);
+ list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ if (!ipsec->xs->xso.real_dev)
+ continue;
+
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
+ netif_is_bond_master(real_dev)) {
+ slave_warn(bond_dev, real_dev,
+ "%s: no slave xdo_dev_state_delete\n",
+ __func__);
+ continue;
+ }
+
+ spin_lock_bh(&ipsec->xs->lock);
+ ipsec->xs->xso.real_dev = NULL;
+ /* Don't double delete states killed by the user. */
+ if (ipsec->xs->km.state != XFRM_STATE_DEAD)
+ real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
+ ipsec->xs);
+ spin_unlock_bh(&ipsec->xs->lock);
+
+ if (real_dev->xfrmdev_ops->xdo_dev_state_free)
+ real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev,
+ ipsec->xs);
+ }
+ mutex_unlock(&bond->ipsec_lock);
+}
+
+static void bond_ipsec_free_sa(struct net_device *bond_dev,
+ struct xfrm_state *xs)
+{
+ struct net_device *real_dev;
+ struct bond_ipsec *ipsec;
+ struct bonding *bond;
+
if (!bond_dev)
return;
- rcu_read_lock();
bond = netdev_priv(bond_dev);
- slave = rcu_dereference(bond->curr_active_slave);
-
- if (!slave)
- goto out;
+ mutex_lock(&bond->ipsec_lock);
if (!xs->xso.real_dev)
goto out;
- WARN_ON(xs->xso.real_dev != slave->dev);
+ real_dev = xs->xso.real_dev;
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_delete ||
- netif_is_bond_master(slave->dev)) {
- slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__);
- goto out;
- }
-
- slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs);
+ xs->xso.real_dev = NULL;
+ if (real_dev->xfrmdev_ops &&
+ real_dev->xfrmdev_ops->xdo_dev_state_free)
+ real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs);
out:
- spin_lock_bh(&bond->ipsec_lock);
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
if (ipsec->xs == xs) {
list_del(&ipsec->list);
@@ -551,88 +660,84 @@ out:
break;
}
}
- spin_unlock_bh(&bond->ipsec_lock);
- rcu_read_unlock();
+ mutex_unlock(&bond->ipsec_lock);
}
-static void bond_ipsec_del_sa_all(struct bonding *bond)
+/**
+ * bond_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
{
- struct net_device *bond_dev = bond->dev;
- struct bond_ipsec *ipsec;
- struct slave *slave;
+ struct net_device *real_dev;
rcu_read_lock();
- slave = rcu_dereference(bond->curr_active_slave);
- if (!slave) {
+ real_dev = bond_ipsec_dev(xs);
+ if (!real_dev || netif_is_bond_master(real_dev)) {
rcu_read_unlock();
- return;
+ return false;
}
- spin_lock_bh(&bond->ipsec_lock);
- list_for_each_entry(ipsec, &bond->ipsec_list, list) {
- if (!ipsec->xs->xso.real_dev)
- continue;
-
- if (!slave->dev->xfrmdev_ops ||
- !slave->dev->xfrmdev_ops->xdo_dev_state_delete ||
- netif_is_bond_master(slave->dev)) {
- slave_warn(bond_dev, slave->dev,
- "%s: no slave xdo_dev_state_delete\n",
- __func__);
- } else {
- slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs);
- }
- ipsec->xs->xso.real_dev = NULL;
- }
- spin_unlock_bh(&bond->ipsec_lock);
rcu_read_unlock();
+ return true;
}
/**
- * bond_ipsec_offload_ok - can this packet use the xfrm hw offload
- * @skb: current data packet
+ * bond_advance_esn_state - ESN support for IPSec HW offload
* @xs: pointer to transformer state struct
**/
-static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+static void bond_advance_esn_state(struct xfrm_state *xs)
{
- struct net_device *bond_dev = xs->xso.dev;
struct net_device *real_dev;
- struct slave *curr_active;
- struct bonding *bond;
- int err;
- bond = netdev_priv(bond_dev);
rcu_read_lock();
- curr_active = rcu_dereference(bond->curr_active_slave);
- real_dev = curr_active->dev;
+ real_dev = bond_ipsec_dev(xs);
+ if (!real_dev)
+ goto out;
- if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
- err = false;
+ if (!real_dev->xfrmdev_ops ||
+ !real_dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
+ pr_warn_ratelimited("%s: %s doesn't support xdo_dev_state_advance_esn\n", __func__, real_dev->name);
goto out;
}
- if (!xs->xso.real_dev) {
- err = false;
+ real_dev->xfrmdev_ops->xdo_dev_state_advance_esn(xs);
+out:
+ rcu_read_unlock();
+}
+
+/**
+ * bond_xfrm_update_stats - Update xfrm state
+ * @xs: pointer to transformer state struct
+ **/
+static void bond_xfrm_update_stats(struct xfrm_state *xs)
+{
+ struct net_device *real_dev;
+
+ rcu_read_lock();
+ real_dev = bond_ipsec_dev(xs);
+ if (!real_dev)
goto out;
- }
if (!real_dev->xfrmdev_ops ||
- !real_dev->xfrmdev_ops->xdo_dev_offload_ok ||
- netif_is_bond_master(real_dev)) {
- err = false;
+ !real_dev->xfrmdev_ops->xdo_dev_state_update_stats) {
+ pr_warn_ratelimited("%s: %s doesn't support xdo_dev_state_update_stats\n", __func__, real_dev->name);
goto out;
}
- err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs);
+ real_dev->xfrmdev_ops->xdo_dev_state_update_stats(xs);
out:
rcu_read_unlock();
- return err;
}
static const struct xfrmdev_ops bond_xfrmdev_ops = {
.xdo_dev_state_add = bond_ipsec_add_sa,
.xdo_dev_state_delete = bond_ipsec_del_sa,
+ .xdo_dev_state_free = bond_ipsec_free_sa,
.xdo_dev_offload_ok = bond_ipsec_offload_ok,
+ .xdo_dev_state_advance_esn = bond_advance_esn_state,
+ .xdo_dev_state_update_stats = bond_xfrm_update_stats,
};
#endif /* CONFIG_XFRM_OFFLOAD */
@@ -742,9 +847,9 @@ static int bond_check_dev_link(struct bonding *bond,
struct net_device *slave_dev, int reporting)
{
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
- int (*ioctl)(struct net_device *, struct ifreq *, int);
- struct ifreq ifr;
struct mii_ioctl_data *mii;
+ struct ifreq ifr;
+ int ret;
if (!reporting && !netif_running(slave_dev))
return 0;
@@ -753,13 +858,16 @@ static int bond_check_dev_link(struct bonding *bond,
return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
/* Try to get link status using Ethtool first. */
- if (slave_dev->ethtool_ops->get_link)
- return slave_dev->ethtool_ops->get_link(slave_dev) ?
- BMSR_LSTATUS : 0;
+ if (slave_dev->ethtool_ops->get_link) {
+ netdev_lock_ops(slave_dev);
+ ret = slave_dev->ethtool_ops->get_link(slave_dev);
+ netdev_unlock_ops(slave_dev);
+
+ return ret ? BMSR_LSTATUS : 0;
+ }
/* Ethtool can't be used, fallback to MII ioctls. */
- ioctl = slave_ops->ndo_eth_ioctl;
- if (ioctl) {
+ if (slave_ops->ndo_eth_ioctl) {
/* TODO: set pointer to correct ioctl on a per team member
* bases to make this more efficient. that is, once
* we determine the correct ioctl, we will always
@@ -775,9 +883,10 @@ static int bond_check_dev_link(struct bonding *bond,
/* Yes, the mii is overlaid on the ifreq.ifr_ifru */
strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
mii = if_mii(&ifr);
- if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
+
+ if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
mii->reg_num = MII_BMSR;
- if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0)
+ if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0)
return mii->val_out & BMSR_LSTATUS;
}
}
@@ -892,6 +1001,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
if (bond->dev->flags & IFF_UP)
bond_hw_addr_flush(bond->dev, old_active->dev);
+
+ bond_slave_ns_maddrs_add(bond, old_active);
}
if (new_active) {
@@ -908,6 +1019,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
dev_mc_sync(new_active->dev, bond->dev);
netif_addr_unlock_bh(bond->dev);
}
+
+ bond_slave_ns_maddrs_del(bond, new_active);
}
}
@@ -999,8 +1112,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,
ss.ss_family = bond->dev->type;
}
- rv = dev_set_mac_address(new_active->dev,
- (struct sockaddr *)&ss, NULL);
+ rv = dev_set_mac_address(new_active->dev, &ss, NULL);
if (rv) {
slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n",
-rv);
@@ -1014,8 +1126,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,
new_active->dev->addr_len);
ss.ss_family = old_active->dev->type;
- rv = dev_set_mac_address(old_active->dev,
- (struct sockaddr *)&ss, NULL);
+ rv = dev_set_mac_address(old_active->dev, &ss, NULL);
if (rv)
slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n",
-rv);
@@ -1121,13 +1232,10 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
return bestslave;
}
+/* must be called in RCU critical section or with RTNL held */
static bool bond_should_notify_peers(struct bonding *bond)
{
- struct slave *slave;
-
- rcu_read_lock();
- slave = rcu_dereference(bond->curr_active_slave);
- rcu_read_unlock();
+ struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave);
if (!slave || !bond->send_peer_notif ||
bond->send_peer_notif %
@@ -1363,7 +1471,7 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev)
slave_disable_netpoll(slave);
}
-static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int bond_netpoll_setup(struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
struct list_head *iter;
@@ -1403,9 +1511,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
struct slave *slave;
mask = features;
-
- features &= ~NETIF_F_ONE_FOR_ALL;
- features |= NETIF_F_ALL_FOR_ALL;
+ features = netdev_base_features(features);
bond_for_each_slave(bond, slave, iter) {
features = netdev_increment_features(features,
@@ -1419,17 +1525,22 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
#define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_GSO_ENCAP_ALL | \
NETIF_F_HIGHDMA | NETIF_F_LRO)
#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
- NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE)
+ NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_GSO_PARTIAL)
#define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
NETIF_F_GSO_SOFTWARE)
+#define BOND_GSO_PARTIAL_FEATURES (NETIF_F_GSO_ESP)
+
static void bond_compute_features(struct bonding *bond)
{
+ netdev_features_t gso_partial_features = BOND_GSO_PARTIAL_FEATURES;
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
IFF_XMIT_DST_RELEASE_PERM;
netdev_features_t vlan_features = BOND_VLAN_FEATURES;
@@ -1447,8 +1558,9 @@ static void bond_compute_features(struct bonding *bond)
if (!bond_has_slaves(bond))
goto done;
- vlan_features &= NETIF_F_ALL_FOR_ALL;
- mpls_features &= NETIF_F_ALL_FOR_ALL;
+
+ vlan_features = netdev_base_features(vlan_features);
+ mpls_features = netdev_base_features(mpls_features);
bond_for_each_slave(bond, slave, iter) {
vlan_features = netdev_increment_features(vlan_features,
@@ -1464,6 +1576,10 @@ static void bond_compute_features(struct bonding *bond)
BOND_XFRM_FEATURES);
#endif /* CONFIG_XFRM_OFFLOAD */
+ gso_partial_features = netdev_increment_features(gso_partial_features,
+ slave->dev->gso_partial_features,
+ BOND_GSO_PARTIAL_FEATURES);
+
mpls_features = netdev_increment_features(mpls_features,
slave->dev->mpls_features,
BOND_MPLS_FEATURES);
@@ -1478,6 +1594,7 @@ static void bond_compute_features(struct bonding *bond)
bond_dev->hard_header_len = max_hard_header_len;
done:
+ bond_dev->gso_partial_features = gso_partial_features;
bond_dev->vlan_features = vlan_features;
bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
NETIF_F_HW_VLAN_CTAG_TX |
@@ -1811,7 +1928,7 @@ void bond_xdp_set_features(struct net_device *bond_dev)
ASSERT_RTNL();
- if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) {
+ if (!bond_xdp_check(bond, BOND_MODE(bond)) || !bond_has_slaves(bond)) {
xdp_clear_features_flag(bond_dev);
return;
}
@@ -1996,15 +2113,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
* set the master's mac address to that of the first slave
*/
memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
- ss.ss_family = slave_dev->type;
- res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss,
- extack);
- if (res) {
- slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res);
- goto err_restore_mtu;
- }
+ } else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
+ BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
+ memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) {
+ /* Set slave to random address to avoid duplicate mac
+ * address in later fail over.
+ */
+ eth_random_addr(ss.__data);
+ } else {
+ goto skip_mac_set;
+ }
+
+ ss.ss_family = slave_dev->type;
+ res = dev_set_mac_address(slave_dev, &ss, extack);
+ if (res) {
+ slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res);
+ goto err_restore_mtu;
}
+skip_mac_set:
+
/* set no_addrconf flag before open to prevent IPv6 addrconf */
slave_dev->priv_flags |= IFF_NO_ADDRCONF;
@@ -2228,6 +2356,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond_compute_features(bond);
bond_set_carrier(bond);
+ /* Needs to be called before bond_select_active_slave(), which will
+ * remove the maddrs if the slave is selected as active slave.
+ */
+ bond_slave_ns_maddrs_add(bond, new_slave);
+
if (bond_uses_primary(bond)) {
block_netpoll_tx();
bond_select_active_slave(bond);
@@ -2237,7 +2370,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
if (bond_mode_can_use_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
-
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
if (bond->xdp_prog) {
@@ -2261,7 +2393,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
goto err_sysfs_del;
}
- res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ res = dev_xdp_propagate(slave_dev, &xdp);
if (res < 0) {
/* ndo_bpf() sets extack error message */
slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
@@ -2321,7 +2453,7 @@ err_restore_mac:
bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr,
new_slave->dev->addr_len);
ss.ss_family = slave_dev->type;
- dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL);
+ dev_set_mac_address(slave_dev, &ss, NULL);
}
err_restore_mtu:
@@ -2397,7 +2529,7 @@ static int __bond_release_one(struct net_device *bond_dev,
.prog = NULL,
.extack = NULL,
};
- if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+ if (dev_xdp_propagate(slave_dev, &xdp))
slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
}
@@ -2421,7 +2553,7 @@ static int __bond_release_one(struct net_device *bond_dev,
RCU_INIT_POINTER(bond->current_arp_slave, NULL);
- if (!all && (!bond->params.fail_over_mac ||
+ if (!all && (bond->params.fail_over_mac != BOND_FOM_ACTIVE ||
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) {
if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) &&
bond_has_slaves(bond))
@@ -2435,6 +2567,12 @@ static int __bond_release_one(struct net_device *bond_dev,
if (oldcurrent == slave)
bond_change_active_slave(bond, NULL);
+ /* Must be called after bond_change_active_slave () as the slave
+ * might change from an active slave to a backup slave. Then it is
+ * necessary to clear the maddrs on the backup slave.
+ */
+ bond_slave_ns_maddrs_del(bond, slave);
+
if (bond_is_lb(bond)) {
/* Must be called only after the slave has been
* detached from the list and the curr_active_slave
@@ -2509,13 +2647,16 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_hw_addr_copy(ss.__data, slave->perm_hwaddr,
slave->dev->addr_len);
ss.ss_family = slave_dev->type;
- dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL);
+ dev_set_mac_address(slave_dev, &ss, NULL);
}
- if (unregister)
+ if (unregister) {
+ netdev_lock_ops(slave_dev);
__dev_set_mtu(slave_dev, slave->original_mtu);
- else
+ netdev_unlock_ops(slave_dev);
+ } else {
dev_set_mtu(slave_dev, slave->original_mtu);
+ }
if (!netif_is_bond_master(slave_dev))
slave_dev->priv_flags &= ~IFF_BONDING;
@@ -3014,8 +3155,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
tags = NULL;
/* Find out through which dev should the packet go */
- rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
- RTO_ONLINK, 0);
+ rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 0, 0,
+ RT_SCOPE_LINK);
if (IS_ERR(rt)) {
/* there's no route to target - try to send arp
* probe to generate any traffic (arp_validate=0)
@@ -4081,7 +4222,7 @@ static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *
}
if (l34 && *ip_proto >= 0)
- fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
+ fk->ports.ports = skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
return true;
}
@@ -4710,7 +4851,7 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
}
}
- bond_dev->mtu = new_mtu;
+ WRITE_ONCE(bond_dev->mtu, new_mtu);
return 0;
@@ -4793,8 +4934,7 @@ unwind:
if (rollback_slave == slave)
break;
- tmp_res = dev_set_mac_address(rollback_slave->dev,
- (struct sockaddr *)&tmp_ss, NULL);
+ tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_ss, NULL);
if (tmp_res) {
slave_dbg(bond_dev, rollback_slave->dev, "%s: unwind err %d\n",
__func__, tmp_res);
@@ -5245,7 +5385,7 @@ static inline int bond_slave_override(struct bonding *bond,
/* Find out if any slaves have the same mapping as this skb. */
bond_for_each_slave_rcu(bond, slave, iter) {
- if (slave->queue_id == skb_get_queue_mapping(skb)) {
+ if (READ_ONCE(slave->queue_id) == skb_get_queue_mapping(skb)) {
if (bond_slave_is_up(slave) &&
slave->link == BOND_LINK_UP) {
bond_dev_queue_xmit(bond, skb, slave->dev);
@@ -5497,9 +5637,9 @@ bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
break;
default:
- /* Should never happen. Mode guarded by bond_xdp_check() */
- netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
- WARN_ON_ONCE(1);
+ if (net_ratelimit())
+ netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n",
+ BOND_MODE(bond));
return NULL;
}
@@ -5563,8 +5703,11 @@ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
ASSERT_RTNL();
- if (!bond_xdp_check(bond))
+ if (!bond_xdp_check(bond, BOND_MODE(bond))) {
+ BOND_NL_ERR(dev, extack,
+ "No native XDP support for the current bonding mode");
return -EOPNOTSUPP;
+ }
old_prog = bond->xdp_prog;
bond->xdp_prog = prog;
@@ -5587,7 +5730,7 @@ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
goto err;
}
- err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ err = dev_xdp_propagate(slave_dev, &xdp);
if (err < 0) {
/* ndo_bpf() sets extack error message */
slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
@@ -5619,7 +5762,7 @@ err:
if (slave == rollback_slave)
break;
- err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ err_unwind = dev_xdp_propagate(slave_dev, &xdp);
if (err_unwind < 0)
slave_err(dev, slave_dev,
"Error %d when unwinding XDP program change\n", err_unwind);
@@ -5755,10 +5898,10 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
}
static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
- struct ethtool_ts_info *info)
+ struct kernel_ethtool_ts_info *info)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct ethtool_ts_info ts_info;
+ struct kernel_ethtool_ts_info ts_info;
struct net_device *real_dev;
bool sw_tx_support = false;
struct list_head *iter;
@@ -5882,11 +6025,14 @@ void bond_setup(struct net_device *bond_dev)
/* set up xfrm device ops (only supported in active-backup right now) */
bond_dev->xfrmdev_ops = &bond_xfrmdev_ops;
INIT_LIST_HEAD(&bond->ipsec_list);
- spin_lock_init(&bond->ipsec_lock);
+ mutex_init(&bond->ipsec_lock);
#endif /* CONFIG_XFRM_OFFLOAD */
/* don't acquire bond device's netif_tx_lock when transmitting */
- bond_dev->features |= NETIF_F_LLTX;
+ bond_dev->lltx = true;
+
+ /* Don't allow bond devices to change network namespaces. */
+ bond_dev->netns_immutable = true;
/* By default, we declare the bond to be fully
* VLAN hardware accelerated capable. Special
@@ -5895,9 +6041,6 @@ void bond_setup(struct net_device *bond_dev)
* capable
*/
- /* Don't allow bond devices to change network namespaces. */
- bond_dev->features |= NETIF_F_NETNS_LOCAL;
-
bond_dev->hw_features = BOND_VLAN_FEATURES |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER |
@@ -5907,6 +6050,7 @@ void bond_setup(struct net_device *bond_dev)
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
bond_dev->features |= bond_dev->hw_features;
bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ bond_dev->features |= NETIF_F_GSO_PARTIAL;
#ifdef CONFIG_XFRM_OFFLOAD
bond_dev->hw_features |= BOND_XFRM_FEATURES;
/* Only enable XFRM features if this is an active-backup config */
@@ -5931,9 +6075,13 @@ static void bond_uninit(struct net_device *bond_dev)
__bond_release_one(bond_dev, slave->dev, true, true);
netdev_info(bond_dev, "Released all slaves\n");
+#ifdef CONFIG_XFRM_OFFLOAD
+ mutex_destroy(&bond->ipsec_lock);
+#endif /* CONFIG_XFRM_OFFLOAD */
+
bond_set_slave_arr(bond, NULL, NULL);
- list_del(&bond->bond_list);
+ list_del_rcu(&bond->bond_list);
bond_debug_unregister(bond);
}
@@ -6338,7 +6486,8 @@ static int bond_init(struct net_device *bond_dev)
netdev_dbg(bond_dev, "Begin bond_init\n");
- bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM);
+ bond->wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM,
+ bond_dev->name);
if (!bond->wq)
return -ENOMEM;
@@ -6347,7 +6496,7 @@ static int bond_init(struct net_device *bond_dev)
spin_lock_init(&bond->stats_lock);
netdev_lockdep_set_classes(bond_dev);
- list_add_tail(&bond->bond_list, &bn->dev_list);
+ list_add_tail_rcu(&bond->bond_list, &bn->dev_list);
bond_prepare_sysfs_group(bond);
@@ -6419,7 +6568,7 @@ static int __net_init bond_net_init(struct net *net)
/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
* race condition in bond unloading") we need to remove sysfs files
- * before we remove our devices (done later in bond_net_exit_batch_rtnl())
+ * before we remove our devices (done later in bond_net_exit_rtnl())
*/
static void __net_exit bond_net_pre_exit(struct net *net)
{
@@ -6428,25 +6577,20 @@ static void __net_exit bond_net_pre_exit(struct net *net)
bond_destroy_sysfs(bn);
}
-static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_kill_list)
+static void __net_exit bond_net_exit_rtnl(struct net *net,
+ struct list_head *dev_kill_list)
{
- struct bond_net *bn;
- struct net *net;
+ struct bond_net *bn = net_generic(net, bond_net_id);
+ struct bonding *bond, *tmp_bond;
/* Kill off any bonds created after unregistering bond rtnl ops */
- list_for_each_entry(net, net_list, exit_list) {
- struct bonding *bond, *tmp_bond;
-
- bn = net_generic(net, bond_net_id);
- list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
- unregister_netdevice_queue(bond->dev, dev_kill_list);
- }
+ list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
+ unregister_netdevice_queue(bond->dev, dev_kill_list);
}
/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
* only after all bonds are gone") bond_destroy_proc_dir() is called
- * after bond_net_exit_batch_rtnl() has completed.
+ * after bond_net_exit_rtnl() has completed.
*/
static void __net_exit bond_net_exit_batch(struct list_head *net_list)
{
@@ -6462,7 +6606,7 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list)
static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
.pre_exit = bond_net_pre_exit,
- .exit_batch_rtnl = bond_net_exit_batch_rtnl,
+ .exit_rtnl = bond_net_exit_rtnl,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
@@ -6477,16 +6621,16 @@ static int __init bonding_init(void)
if (res)
goto out;
+ bond_create_debugfs();
+
res = register_pernet_subsys(&bond_net_ops);
if (res)
- goto out;
+ goto err_net_ops;
res = bond_netlink_init();
if (res)
goto err_link;
- bond_create_debugfs();
-
for (i = 0; i < max_bonds; i++) {
res = bond_create(&init_net, NULL);
if (res)
@@ -6501,10 +6645,11 @@ static int __init bonding_init(void)
out:
return res;
err:
- bond_destroy_debugfs();
bond_netlink_fini();
err_link:
unregister_pernet_subsys(&bond_net_ops);
+err_net_ops:
+ bond_destroy_debugfs();
goto out;
}
@@ -6513,11 +6658,11 @@ static void __exit bonding_exit(void)
{
unregister_netdevice_notifier(&bond_netdev_notifier);
- bond_destroy_debugfs();
-
bond_netlink_fini();
unregister_pernet_subsys(&bond_net_ops);
+ bond_destroy_debugfs();
+
#ifdef CONFIG_NET_POLL_CONTROLLER
/* Make sure we don't have an imbalance on our netpoll blocking */
WARN_ON(atomic_read(&netpoll_block_tx));