summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp/ipoib/ipoib_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c494
1 files changed, 330 insertions, 164 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index d932f99201d1..300afc27c561 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -49,13 +49,11 @@
#include <linux/jhash.h>
#include <net/arp.h>
#include <net/addrconf.h>
+#include <net/netdev_lock.h>
+#include <net/pkt_sched.h>
#include <linux/inetdevice.h>
#include <rdma/ib_cache.h>
-#define DRV_VERSION "1.0.0"
-
-const char ipoib_driver_version[] = DRV_VERSION;
-
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
MODULE_LICENSE("Dual BSD/GPL");
@@ -90,11 +88,11 @@ struct workqueue_struct *ipoib_workqueue;
struct ib_sa_client ipoib_sa_client;
-static void ipoib_add_one(struct ib_device *device);
+static int ipoib_add_one(struct ib_device *device);
static void ipoib_remove_one(struct ib_device *device, void *client_data);
static void ipoib_neigh_reclaim(struct rcu_head *rp);
static struct net_device *ipoib_get_net_dev_by_params(
- struct ib_device *dev, u8 port, u16 pkey,
+ struct ib_device *dev, u32 port, u16 pkey,
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data);
static int ipoib_set_mac(struct net_device *dev, void *addr);
@@ -135,6 +133,52 @@ static int ipoib_netdev_event(struct notifier_block *this,
}
#endif
+struct ipoib_ifupdown_work {
+ struct work_struct work;
+ struct net_device *dev;
+ netdevice_tracker dev_tracker;
+ bool up;
+};
+
+static void ipoib_ifupdown_task(struct work_struct *work)
+{
+ struct ipoib_ifupdown_work *pwork =
+ container_of(work, struct ipoib_ifupdown_work, work);
+ struct net_device *dev = pwork->dev;
+ unsigned int flags;
+
+ rtnl_lock();
+ flags = dev->flags;
+ if (pwork->up)
+ flags |= IFF_UP;
+ else
+ flags &= ~IFF_UP;
+
+ if (dev->flags != flags)
+ dev_change_flags(dev, flags, NULL);
+ rtnl_unlock();
+ netdev_put(dev, &pwork->dev_tracker);
+ kfree(pwork);
+}
+
+static void ipoib_schedule_ifupdown_task(struct net_device *dev, bool up)
+{
+ struct ipoib_ifupdown_work *work;
+
+ if ((up && (dev->flags & IFF_UP)) ||
+ (!up && !(dev->flags & IFF_UP)))
+ return;
+
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return;
+ work->dev = dev;
+ netdev_hold(dev, &work->dev_tracker, GFP_KERNEL);
+ work->up = up;
+ INIT_WORK(&work->work, ipoib_ifupdown_task);
+ queue_work(ipoib_workqueue, &work->work);
+}
+
int ipoib_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -145,8 +189,6 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- priv->sm_fullmember_sendonly_support = false;
-
if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
@@ -159,19 +201,17 @@ int ipoib_open(struct net_device *dev)
struct ipoib_dev_priv *cpriv;
/* Bring up any child interfaces too */
- down_read(&priv->vlan_rwsem);
- list_for_each_entry(cpriv, &priv->child_intfs, list) {
- int flags;
-
- flags = cpriv->dev->flags;
- if (flags & IFF_UP)
- continue;
+ netdev_lock_ops_to_full(dev);
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ ipoib_schedule_ifupdown_task(cpriv->dev, true);
+ netdev_unlock_full_to_ops(dev);
+ } else if (priv->parent) {
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
- dev_change_flags(cpriv->dev, flags | IFF_UP, NULL);
- }
- up_read(&priv->vlan_rwsem);
+ if (!test_bit(IPOIB_FLAG_ADMIN_UP, &ppriv->flags))
+ ipoib_dbg(priv, "parent device %s is not up, so child device may be not functioning.\n",
+ ppriv->dev->name);
}
-
netif_start_queue(dev);
return 0;
@@ -199,17 +239,10 @@ static int ipoib_stop(struct net_device *dev)
struct ipoib_dev_priv *cpriv;
/* Bring down any child interfaces too */
- down_read(&priv->vlan_rwsem);
- list_for_each_entry(cpriv, &priv->child_intfs, list) {
- int flags;
-
- flags = cpriv->dev->flags;
- if (!(flags & IFF_UP))
- continue;
-
- dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL);
- }
- up_read(&priv->vlan_rwsem);
+ netdev_lock_ops_to_full(dev);
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ ipoib_schedule_ifupdown_task(cpriv->dev, false);
+ netdev_unlock_full_to_ops(dev);
}
return 0;
@@ -239,7 +272,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
priv->mcast_mtu);
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -266,7 +299,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
if (carrier_status)
netif_carrier_on(dev);
} else {
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
}
return ret;
@@ -317,28 +350,28 @@ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr,
return false;
}
-/**
- * Find the master net_device on top of the given net_device.
+/*
+ * Find the L2 master net_device on top of the given net_device.
* @dev: base IPoIB net_device
*
- * Returns the master net_device with a reference held, or the same net_device
- * if no master exists.
+ * Returns the L2 master net_device with reference held if the L2 master
+ * exists (such as bond netdevice), or returns same netdev with reference
+ * held when master does not exist or when L3 master (such as VRF netdev).
*/
static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
{
struct net_device *master;
rcu_read_lock();
+
master = netdev_master_upper_dev_get_rcu(dev);
- if (master)
- dev_hold(master);
- rcu_read_unlock();
+ if (!master || netif_is_l3_master(master))
+ master = dev;
- if (master)
- return master;
+ dev_hold(master);
+ rcu_read_unlock();
- dev_hold(dev);
- return dev;
+ return master;
}
struct ipoib_walk_data {
@@ -346,9 +379,10 @@ struct ipoib_walk_data {
struct net_device *result;
};
-static int ipoib_upper_walk(struct net_device *upper, void *_data)
+static int ipoib_upper_walk(struct net_device *upper,
+ struct netdev_nested_priv *priv)
{
- struct ipoib_walk_data *data = _data;
+ struct ipoib_walk_data *data = (struct ipoib_walk_data *)priv->data;
int ret = 0;
if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) {
@@ -361,8 +395,9 @@ static int ipoib_upper_walk(struct net_device *upper, void *_data)
}
/**
- * Find a net_device matching the given address, which is an upper device of
- * the given net_device.
+ * ipoib_get_net_dev_match_addr - Find a net_device matching
+ * the given address, which is an upper device of the given net_device.
+ *
* @addr: IP address to look for.
* @dev: base IPoIB net_device
*
@@ -372,10 +407,12 @@ static int ipoib_upper_walk(struct net_device *upper, void *_data)
static struct net_device *ipoib_get_net_dev_match_addr(
const struct sockaddr *addr, struct net_device *dev)
{
+ struct netdev_nested_priv priv;
struct ipoib_walk_data data = {
.addr = addr,
};
+ priv.data = (void *)&data;
rcu_read_lock();
if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
dev_hold(dev);
@@ -383,7 +420,7 @@ static struct net_device *ipoib_get_net_dev_match_addr(
goto out;
}
- netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data);
+ netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &priv);
out:
rcu_read_unlock();
return data.result;
@@ -423,17 +460,20 @@ static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
}
}
+ if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+ return matches;
+
/* Check child interfaces */
- down_read_nested(&priv->vlan_rwsem, nesting);
+ netdev_lock(priv->dev);
list_for_each_entry(child_priv, &priv->child_intfs, list) {
matches += ipoib_match_gid_pkey_addr(child_priv, gid,
- pkey_index, addr,
- nesting + 1,
- found_net_dev);
+ pkey_index, addr,
+ nesting + 1,
+ found_net_dev);
if (matches > 1)
break;
}
- up_read(&priv->vlan_rwsem);
+ netdev_unlock(priv->dev);
return matches;
}
@@ -441,7 +481,7 @@ static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
/* Returns the number of matching net_devs found (between 0 and 2). Also
* return the matching net_device in the @net_dev parameter, holding a
* reference to the net_device, if the number of matches >= 1 */
-static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
+static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u32 port,
u16 pkey_index,
const union ib_gid *gid,
const struct sockaddr *addr,
@@ -466,7 +506,7 @@ static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
}
static struct net_device *ipoib_get_net_dev_by_params(
- struct ib_device *dev, u8 port, u16 pkey,
+ struct ib_device *dev, u32 port, u16 pkey,
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data)
{
@@ -483,10 +523,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
if (ret)
return NULL;
- if (!dev_list)
- return NULL;
-
- /* See if we can find a unique device matching the L2 parameters */
+ /* See if we can find a unique device matching the pkey and GID */
matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
gid, NULL, &net_dev);
@@ -499,7 +536,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
dev_put(net_dev);
- /* Couldn't find a unique device with L2 parameters only. Use L3
+ /* Couldn't find a unique device with pkey and GID only. Use L3
* address to uniquely match the net device */
matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
gid, addr, &net_dev);
@@ -509,7 +546,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
default:
dev_warn_ratelimited(&dev->dev,
"duplicate IP address detected\n");
- /* Fall through */
+ fallthrough;
case 1:
return net_dev;
}
@@ -531,8 +568,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
ipoib_warn(priv, "enabling connected mode "
"will cause multicast packet drops\n");
+ netdev_lock_ops(dev);
netdev_update_features(dev);
- dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
+ netif_set_mtu(dev, ipoib_cm_max_mtu(dev));
+ netif_set_real_num_tx_queues(dev, 1);
+ netdev_unlock_ops(dev);
rtnl_unlock();
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
@@ -542,8 +582,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
if (!strcmp(buf, "datagram\n")) {
clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+ netdev_lock_ops(dev);
netdev_update_features(dev);
- dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
+ netif_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
+ netif_set_real_num_tx_queues(dev, dev->num_tx_queues);
+ netdev_unlock_ops(dev);
rtnl_unlock();
ipoib_flush_paths(dev);
return (!rtnl_trylock()) ? -EBUSY : 0;
@@ -613,7 +656,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- ipoib_dbg(ipoib_priv(dev), "path_free\n");
+ ipoib_dbg(ipoib_priv(dev), "%s\n", __func__);
/* remove all neigh connected to this path */
ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -740,7 +783,7 @@ void ipoib_flush_paths(struct net_device *dev)
static void path_rec_completion(int status,
struct sa_path_rec *pathrec,
- void *path_ptr)
+ unsigned int num_prs, void *path_ptr)
{
struct ipoib_path *path = path_ptr;
struct net_device *dev = path->dev;
@@ -1182,16 +1225,52 @@ unref:
return NETDEV_TX_OK;
}
-static void ipoib_timeout(struct net_device *dev)
+static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
+ if (rn->tx_timeout) {
+ rn->tx_timeout(dev, txqueue);
+ return;
+ }
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
- ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
- netif_queue_stopped(dev),
- priv->tx_head, priv->tx_tail);
- /* XXX reset QP, etc. */
+ ipoib_warn(priv,
+ "queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n",
+ netif_queue_stopped(dev), priv->tx_head, priv->tx_tail,
+ priv->global_tx_head, priv->global_tx_tail);
+
+
+ schedule_work(&priv->tx_timeout_work);
+}
+
+void ipoib_ib_tx_timeout_work(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work,
+ struct ipoib_dev_priv,
+ tx_timeout_work);
+ int err;
+
+ rtnl_lock();
+ netdev_lock_ops(priv->dev);
+
+ if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+ goto unlock;
+
+ ipoib_stop(priv->dev);
+ err = ipoib_open(priv->dev);
+ if (err) {
+ ipoib_warn(priv, "ipoib_open failed recovering from a tx_timeout, err(%d).\n",
+ err);
+ goto unlock;
+ }
+
+ netif_tx_wake_all_queues(priv->dev);
+unlock:
+ netdev_unlock_ops(priv->dev);
+ rtnl_unlock();
+
}
static int ipoib_hard_header(struct sk_buff *skb,
@@ -1236,10 +1315,10 @@ static int ipoib_get_iflink(const struct net_device *dev)
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
- return dev->ifindex;
+ return READ_ONCE(dev->ifindex);
/* child/vlan interface */
- return priv->parent->ifindex;
+ return READ_ONCE(priv->parent->ifindex);
}
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -1279,7 +1358,7 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
neigh = rcu_dereference_bh(neigh->hnext)) {
if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
/* found, take one ref on behalf of the caller */
- if (!atomic_inc_not_zero(&neigh->refcnt)) {
+ if (!refcount_inc_not_zero(&neigh->refcnt)) {
/* deleted */
neigh = NULL;
goto out_unlock;
@@ -1374,7 +1453,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
INIT_LIST_HEAD(&neigh->list);
ipoib_cm_set(neigh, NULL);
/* one ref on behalf of the caller */
- atomic_set(&neigh->refcnt, 1);
+ refcount_set(&neigh->refcnt, 1);
return neigh;
}
@@ -1406,7 +1485,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
lockdep_is_held(&priv->lock))) {
if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
/* found, take one ref on behalf of the caller */
- if (!atomic_inc_not_zero(&neigh->refcnt)) {
+ if (!refcount_inc_not_zero(&neigh->refcnt)) {
/* deleted */
neigh = NULL;
break;
@@ -1421,7 +1500,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
goto out_unlock;
/* one ref on behalf of the hash table */
- atomic_inc(&neigh->refcnt);
+ refcount_inc(&neigh->refcnt);
neigh->alive = jiffies;
/* put in hash */
rcu_assign_pointer(neigh->hnext,
@@ -1641,7 +1720,7 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
+ ipoib_dbg(priv, "%s\n", __func__);
init_completion(&priv->ntbl.deleted);
cancel_delayed_work_sync(&priv->neigh_reap_task);
@@ -1655,8 +1734,10 @@ static void ipoib_napi_add(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC);
- netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE);
+ netif_napi_add_weight(dev, &priv->recv_napi, ipoib_rx_poll,
+ IPOIB_NUM_WC);
+ netif_napi_add_weight(dev, &priv->send_napi, ipoib_tx_poll,
+ MAX_SEND_CQE);
}
static void ipoib_napi_del(struct net_device *dev)
@@ -1687,6 +1768,7 @@ static void ipoib_dev_uninit_default(struct net_device *dev)
static int ipoib_dev_init_default(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ u8 addr_mod[3];
ipoib_napi_add(dev);
@@ -1705,7 +1787,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
goto out_rx_ring_cleanup;
}
- /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
+ /* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */
if (ipoib_transport_dev_init(dev, priv->ca)) {
pr_warn("%s: ipoib_transport_dev_init failed\n",
@@ -1714,9 +1796,10 @@ static int ipoib_dev_init_default(struct net_device *dev)
}
/* after qp created set dev address */
- priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
- priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
- priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+ addr_mod[0] = (priv->qp->qp_num >> 16) & 0xff;
+ addr_mod[1] = (priv->qp->qp_num >> 8) & 0xff;
+ addr_mod[2] = (priv->qp->qp_num) & 0xff;
+ dev_addr_mod(priv->dev, 1, addr_mod, sizeof(addr_mod));
return 0;
@@ -1736,10 +1819,35 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- if (!priv->rn_ops->ndo_do_ioctl)
+ if (!priv->rn_ops->ndo_eth_ioctl)
return -EOPNOTSUPP;
- return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd);
+ return priv->rn_ops->ndo_eth_ioctl(dev, ifr, cmd);
+}
+
+static int ipoib_hwtstamp_get(struct net_device *dev,
+ struct kernel_hwtstamp_config *config)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if (!priv->rn_ops->ndo_hwtstamp_get)
+ /* legacy */
+ return dev_eth_ioctl(dev, config->ifr, SIOCGHWTSTAMP);
+
+ return priv->rn_ops->ndo_hwtstamp_get(dev, config);
+}
+
+static int ipoib_hwtstamp_set(struct net_device *dev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if (!priv->rn_ops->ndo_hwtstamp_set)
+ /* legacy */
+ return dev_eth_ioctl(dev, config->ifr, SIOCSHWTSTAMP);
+
+ return priv->rn_ops->ndo_hwtstamp_set(dev, config, extack);
}
static int ipoib_dev_init(struct net_device *dev)
@@ -1839,11 +1947,12 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev)
static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
{
priv->hca_caps = priv->ca->attrs.device_cap_flags;
+ priv->kernel_caps = priv->ca->attrs.kernel_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
- if (priv->hca_caps & IB_DEVICE_UD_TSO)
+ if (priv->kernel_caps & IBK_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
priv->dev->features |= priv->dev->hw_features;
@@ -1862,7 +1971,7 @@ static int ipoib_parent_init(struct net_device *ndev)
priv->port);
return result;
}
- priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+ priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr);
result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
if (result) {
@@ -1877,8 +1986,7 @@ static int ipoib_parent_init(struct net_device *ndev)
priv->ca->name, priv->port, result);
return result;
}
- memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
- sizeof(union ib_gid));
+ dev_addr_mod(priv->dev, 4, priv->local_gid.raw, sizeof(union ib_gid));
SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
priv->dev->dev_port = priv->port - 1;
@@ -1893,22 +2001,24 @@ static void ipoib_child_init(struct net_device *ndev)
struct ipoib_dev_priv *priv = ipoib_priv(ndev);
struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
- dev_hold(priv->parent);
-
- down_write(&ppriv->vlan_rwsem);
- list_add_tail(&priv->list, &ppriv->child_intfs);
- up_write(&ppriv->vlan_rwsem);
-
priv->max_ib_mtu = ppriv->max_ib_mtu;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
- memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
- memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
+ if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN))
+ memcpy(&priv->local_gid, priv->dev->dev_addr + 4,
+ sizeof(priv->local_gid));
+ else {
+ __dev_addr_set(priv->dev, ppriv->dev->dev_addr,
+ INFINIBAND_ALEN);
+ memcpy(&priv->local_gid, &ppriv->local_gid,
+ sizeof(priv->local_gid));
+ }
}
static int ipoib_ndo_init(struct net_device *ndev)
{
struct ipoib_dev_priv *priv = ipoib_priv(ndev);
int rc;
+ struct rdma_netdev *rn = netdev_priv(ndev);
if (priv->parent) {
ipoib_child_init(ndev);
@@ -1921,6 +2031,7 @@ static int ipoib_ndo_init(struct net_device *ndev)
/* MTU will be reset when mcast join happens */
ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
priv->mcast_mtu = priv->admin_mtu = ndev->mtu;
+ rn->mtu = priv->mcast_mtu;
ndev->max_mtu = IPOIB_CM_MTU;
ndev->neigh_priv_len = sizeof(struct ipoib_neigh);
@@ -1941,6 +2052,17 @@ static int ipoib_ndo_init(struct net_device *ndev)
if (rc) {
pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n",
priv->ca->name, priv->dev->name, priv->port, rc);
+ return rc;
+ }
+
+ if (priv->parent) {
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+ dev_hold(priv->parent);
+
+ netdev_lock(priv->parent);
+ list_add_tail(&priv->list, &ppriv->child_intfs);
+ netdev_unlock(priv->parent);
}
return 0;
@@ -1950,34 +2072,33 @@ static void ipoib_ndo_uninit(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- ASSERT_RTNL();
-
/*
* ipoib_remove_one guarantees the children are removed before the
* parent, and that is the only place where a parent can be removed.
*/
WARN_ON(!list_empty(&priv->child_intfs));
+ if (priv->parent) {
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+ netdev_lock(ppriv->dev);
+ list_del(&priv->list);
+ netdev_unlock(ppriv->dev);
+ }
+
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
/* no more works over the priv->wq */
if (priv->wq) {
- flush_workqueue(priv->wq);
+ /* See ipoib_mcast_carrier_on_task() */
+ WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags));
destroy_workqueue(priv->wq);
priv->wq = NULL;
}
- if (priv->parent) {
- struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
-
- down_write(&ppriv->vlan_rwsem);
- list_del(&priv->list);
- up_write(&ppriv->vlan_rwsem);
-
- dev_put(priv->parent);
- }
+ dev_put(priv->parent);
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
@@ -1998,6 +2119,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
return err;
ivf->vf = vf;
+ memcpy(ivf->mac, dev->dev_addr, dev->addr_len);
return 0;
}
@@ -2012,6 +2134,15 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type);
}
+static int ipoib_get_vf_guid(struct net_device *dev, int vf,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ return ib_get_vf_guid(priv->ca, vf, priv->port, node_guid, port_guid);
+}
+
static int ipoib_get_vf_stats(struct net_device *dev, int vf,
struct ifla_vf_stats *vf_stats)
{
@@ -2038,10 +2169,13 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_set_vf_link_state = ipoib_set_vf_link_state,
.ndo_get_vf_config = ipoib_get_vf_config,
.ndo_get_vf_stats = ipoib_get_vf_stats,
+ .ndo_get_vf_guid = ipoib_get_vf_guid,
.ndo_set_vf_guid = ipoib_set_vf_guid,
.ndo_set_mac_address = ipoib_set_mac,
.ndo_get_stats64 = ipoib_get_stats,
- .ndo_do_ioctl = ipoib_ioctl,
+ .ndo_eth_ioctl = ipoib_ioctl,
+ .ndo_hwtstamp_get = ipoib_hwtstamp_get,
+ .ndo_hwtstamp_set = ipoib_hwtstamp_set,
};
static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -2056,23 +2190,33 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_set_rx_mode = ipoib_set_mcast_list,
.ndo_get_iflink = ipoib_get_iflink,
.ndo_get_stats64 = ipoib_get_stats,
- .ndo_do_ioctl = ipoib_ioctl,
+ .ndo_eth_ioctl = ipoib_ioctl,
+ .ndo_hwtstamp_get = ipoib_hwtstamp_get,
+ .ndo_hwtstamp_set = ipoib_hwtstamp_set,
+};
+
+static const struct net_device_ops ipoib_netdev_default_pf = {
+ .ndo_init = ipoib_dev_init_default,
+ .ndo_uninit = ipoib_dev_uninit_default,
+ .ndo_open = ipoib_ib_dev_open_default,
+ .ndo_stop = ipoib_ib_dev_stop_default,
};
void ipoib_setup_common(struct net_device *dev)
{
dev->header_ops = &ipoib_header_ops;
+ dev->netdev_ops = &ipoib_netdev_default_pf;
ipoib_set_ethtool_ops(dev);
- dev->watchdog_timeo = HZ;
+ dev->watchdog_timeo = 10 * HZ;
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
dev->hard_header_len = IPOIB_HARD_LEN;
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
- dev->tx_queue_len = ipoib_sendq_size * 2;
+ dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
dev->features = (NETIF_F_VLAN_CHALLENGED |
NETIF_F_HIGHDMA);
netif_keep_dst(dev);
@@ -2093,7 +2237,6 @@ static void ipoib_build_priv(struct net_device *dev)
priv->dev = dev;
spin_lock_init(&priv->lock);
- init_rwsem(&priv->vlan_rwsem);
mutex_init(&priv->mcast_mutex);
INIT_LIST_HEAD(&priv->path_list);
@@ -2103,22 +2246,17 @@ static void ipoib_build_priv(struct net_device *dev)
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
+ INIT_WORK(&priv->reschedule_napi_work, ipoib_napi_schedule_work);
INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
+ INIT_WORK(&priv->tx_timeout_work, ipoib_ib_tx_timeout_work);
INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
-static const struct net_device_ops ipoib_netdev_default_pf = {
- .ndo_init = ipoib_dev_init_default,
- .ndo_uninit = ipoib_dev_uninit_default,
- .ndo_open = ipoib_ib_dev_open_default,
- .ndo_stop = ipoib_ib_dev_stop_default,
-};
-
-static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port,
+static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u32 port,
const char *name)
{
struct net_device *dev;
@@ -2135,7 +2273,7 @@ static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port,
return dev;
}
-int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name,
+int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name,
struct net_device *dev)
{
struct rdma_netdev *rn = netdev_priv(dev);
@@ -2155,16 +2293,23 @@ int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name,
if (rc != -EOPNOTSUPP)
goto out;
- dev->netdev_ops = &ipoib_netdev_default_pf;
rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
+
+ rc = netif_set_real_num_tx_queues(dev, 1);
+ if (rc)
+ goto out;
+
+ rc = netif_set_real_num_rx_queues(dev, 1);
+ if (rc)
+ goto out;
}
priv->rn_ops = dev->netdev_ops;
- if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
+ if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION)
dev->netdev_ops = &ipoib_netdev_ops_vf;
else
dev->netdev_ops = &ipoib_netdev_ops_pf;
@@ -2187,7 +2332,7 @@ out:
return rc;
}
-struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+struct net_device *ipoib_intf_alloc(struct ib_device *hca, u32 port,
const char *name)
{
struct net_device *dev;
@@ -2232,23 +2377,24 @@ void ipoib_intf_free(struct net_device *dev)
kfree(priv);
}
-static ssize_t show_pkey(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t pkey_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct net_device *ndev = to_net_dev(dev);
struct ipoib_dev_priv *priv = ipoib_priv(ndev);
- return sprintf(buf, "0x%04x\n", priv->pkey);
+ return sysfs_emit(buf, "0x%04x\n", priv->pkey);
}
-static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
+static DEVICE_ATTR_RO(pkey);
-static ssize_t show_umcast(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t umcast_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct net_device *ndev = to_net_dev(dev);
struct ipoib_dev_priv *priv = ipoib_priv(ndev);
- return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
+ return sysfs_emit(buf, "%d\n",
+ test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
@@ -2263,9 +2409,8 @@ void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
}
-static ssize_t set_umcast(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t umcast_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
@@ -2273,7 +2418,7 @@ static ssize_t set_umcast(struct device *dev,
return count;
}
-static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast);
+static DEVICE_ATTR_RW(umcast);
int ipoib_add_umcast_attr(struct net_device *dev)
{
@@ -2290,16 +2435,16 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
memcpy(&priv->local_gid.global.interface_id,
&gid->global.interface_id,
sizeof(gid->global.interface_id));
- memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
+ dev_addr_mod(netdev, 4, (u8 *)&priv->local_gid, sizeof(priv->local_gid));
clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
netif_addr_unlock_bh(netdev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
- down_read(&priv->vlan_rwsem);
+ netdev_lock_ops_to_full(priv->dev);
list_for_each_entry(child_priv, &priv->child_intfs, list)
set_base_guid(child_priv, gid);
- up_read(&priv->vlan_rwsem);
+ netdev_unlock_full_to_ops(priv->dev);
}
}
@@ -2339,14 +2484,22 @@ static int ipoib_set_mac(struct net_device *dev, void *addr)
set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+ struct ipoib_dev_priv *cpriv;
+
+ netdev_lock_ops_to_full(dev);
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ queue_work(ipoib_workqueue, &cpriv->flush_light);
+ netdev_unlock_full_to_ops(dev);
+ }
queue_work(ipoib_workqueue, &priv->flush_light);
return 0;
}
-static ssize_t create_child(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t create_child_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
int pkey;
int ret;
@@ -2361,11 +2514,11 @@ static ssize_t create_child(struct device *dev,
return ret ? ret : count;
}
-static DEVICE_ATTR(create_child, S_IWUSR, NULL, create_child);
+static DEVICE_ATTR_WO(create_child);
-static ssize_t delete_child(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t delete_child_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
int pkey;
int ret;
@@ -2381,7 +2534,7 @@ static ssize_t delete_child(struct device *dev,
return ret ? ret : count;
}
-static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child);
+static DEVICE_ATTR_WO(delete_child);
int ipoib_add_pkey_attr(struct net_device *dev)
{
@@ -2402,23 +2555,34 @@ static ssize_t dev_id_show(struct device *dev,
{
struct net_device *ndev = to_net_dev(dev);
- if (ndev->dev_id == ndev->dev_port)
+ /*
+ * ndev->dev_port will be equal to 0 in old kernel prior to commit
+ * 9b8b2a323008 ("IB/ipoib: Use dev_port to expose network interface
+ * port numbers") Zero was chosen as special case for user space
+ * applications to fallback and query dev_id to check if it has
+ * different value or not.
+ *
+ * Don't print warning in such scenario.
+ *
+ * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L358
+ */
+ if (ndev->dev_port && ndev->dev_id == ndev->dev_port)
netdev_info_once(ndev,
"\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n",
current->comm);
- return sprintf(buf, "%#x\n", ndev->dev_id);
+ return sysfs_emit(buf, "%#x\n", ndev->dev_id);
}
static DEVICE_ATTR_RO(dev_id);
-int ipoib_intercept_dev_id_attr(struct net_device *dev)
+static int ipoib_intercept_dev_id_attr(struct net_device *dev)
{
device_remove_file(&dev->dev, &dev_attr_dev_id);
return device_create_file(&dev->dev, &dev_attr_dev_id);
}
static struct net_device *ipoib_add_port(const char *format,
- struct ib_device *hca, u8 port)
+ struct ib_device *hca, u32 port)
{
struct rtnl_link_ops *ops = ipoib_get_link_ops();
struct rdma_netdev_alloc_params params;
@@ -2439,7 +2603,11 @@ static struct net_device *ipoib_add_port(const char *format,
ib_register_event_handler(&priv->event_handler);
/* call event handler to ensure pkey in sync */
- queue_work(ipoib_workqueue, &priv->flush_heavy);
+ ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY);
+
+ ndev->rtnl_link_ops = ipoib_get_link_ops();
+
+ dev_net_set(ndev, rdma_dev_net(hca));
result = register_netdev(ndev);
if (result) {
@@ -2490,21 +2658,21 @@ sysfs_failed:
return ERR_PTR(-ENOMEM);
}
-static void ipoib_add_one(struct ib_device *device)
+static int ipoib_add_one(struct ib_device *device)
{
struct list_head *dev_list;
struct net_device *dev;
struct ipoib_dev_priv *priv;
- int p;
+ unsigned int p;
int count = 0;
dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
if (!dev_list)
- return;
+ return -ENOMEM;
INIT_LIST_HEAD(dev_list);
- for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
+ rdma_for_each_port (device, p) {
if (!rdma_protocol_ib(device, p))
continue;
dev = ipoib_add_port("ib%d", device, p);
@@ -2517,10 +2685,11 @@ static void ipoib_add_one(struct ib_device *device)
if (!count) {
kfree(dev_list);
- return;
+ return -EOPNOTSUPP;
}
ib_set_client_data(device, &ipoib_client, dev_list);
+ return 0;
}
static void ipoib_remove_one(struct ib_device *device, void *client_data)
@@ -2528,18 +2697,17 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
struct list_head *dev_list = client_data;
- if (!dev_list)
- return;
-
list_for_each_entry_safe(priv, tmp, dev_list, list) {
LIST_HEAD(head);
ipoib_parent_unregister_pre(priv->dev);
rtnl_lock();
+ netdev_lock(priv->dev);
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs,
list)
unregister_netdevice_queue(cpriv->dev, &head);
+ netdev_unlock(priv->dev);
unregister_netdevice_queue(priv->dev, &head);
unregister_netdevice_many(&head);
@@ -2577,9 +2745,7 @@ static int __init ipoib_init_module(void)
*/
BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
- ret = ipoib_register_debugfs();
- if (ret)
- return ret;
+ ipoib_register_debugfs();
/*
* We create a global workqueue here that is used for all flush