diff options
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c')
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 494 |
1 files changed, 330 insertions, 164 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d932f99201d1..300afc27c561 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,13 +49,11 @@ #include <linux/jhash.h> #include <net/arp.h> #include <net/addrconf.h> +#include <net/netdev_lock.h> +#include <net/pkt_sched.h> #include <linux/inetdevice.h> #include <rdma/ib_cache.h> -#define DRV_VERSION "1.0.0" - -const char ipoib_driver_version[] = DRV_VERSION; - MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -90,11 +88,11 @@ struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; -static void ipoib_add_one(struct ib_device *device); +static int ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device, void *client_data); static void ipoib_neigh_reclaim(struct rcu_head *rp); static struct net_device *ipoib_get_net_dev_by_params( - struct ib_device *dev, u8 port, u16 pkey, + struct ib_device *dev, u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data); static int ipoib_set_mac(struct net_device *dev, void *addr); @@ -135,6 +133,52 @@ static int ipoib_netdev_event(struct notifier_block *this, } #endif +struct ipoib_ifupdown_work { + struct work_struct work; + struct net_device *dev; + netdevice_tracker dev_tracker; + bool up; +}; + +static void ipoib_ifupdown_task(struct work_struct *work) +{ + struct ipoib_ifupdown_work *pwork = + container_of(work, struct ipoib_ifupdown_work, work); + struct net_device *dev = pwork->dev; + unsigned int flags; + + rtnl_lock(); + flags = dev->flags; + if (pwork->up) + flags |= IFF_UP; + else + flags &= ~IFF_UP; + + if (dev->flags != flags) + dev_change_flags(dev, flags, NULL); + rtnl_unlock(); + netdev_put(dev, &pwork->dev_tracker); + kfree(pwork); +} + +static void ipoib_schedule_ifupdown_task(struct net_device *dev, bool up) +{ + struct ipoib_ifupdown_work *work; + + if ((up && (dev->flags & IFF_UP)) || + (!up && !(dev->flags & IFF_UP))) + return; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return; + work->dev = dev; + netdev_hold(dev, &work->dev_tracker, GFP_KERNEL); + work->up = up; + INIT_WORK(&work->work, ipoib_ifupdown_task); + queue_work(ipoib_workqueue, &work->work); +} + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -145,8 +189,6 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - priv->sm_fullmember_sendonly_support = false; - if (ipoib_ib_dev_open(dev)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return 0; @@ -159,19 +201,17 @@ int ipoib_open(struct net_device *dev) struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ - down_read(&priv->vlan_rwsem); - list_for_each_entry(cpriv, &priv->child_intfs, list) { - int flags; - - flags = cpriv->dev->flags; - if (flags & IFF_UP) - continue; + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_schedule_ifupdown_task(cpriv->dev, true); + netdev_unlock_full_to_ops(dev); + } else if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - dev_change_flags(cpriv->dev, flags | IFF_UP, NULL); - } - up_read(&priv->vlan_rwsem); + if (!test_bit(IPOIB_FLAG_ADMIN_UP, &ppriv->flags)) + ipoib_dbg(priv, "parent device %s is not up, so child device may be not functioning.\n", + ppriv->dev->name); } - netif_start_queue(dev); return 0; @@ -199,17 +239,10 @@ static int ipoib_stop(struct net_device *dev) struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ - down_read(&priv->vlan_rwsem); - list_for_each_entry(cpriv, &priv->child_intfs, list) { - int flags; - - flags = cpriv->dev->flags; - if (!(flags & IFF_UP)) - continue; - - dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL); - } - up_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_schedule_ifupdown_task(cpriv->dev, false); + netdev_unlock_full_to_ops(dev); } return 0; @@ -239,7 +272,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } @@ -266,7 +299,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) if (carrier_status) netif_carrier_on(dev); } else { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); } return ret; @@ -317,28 +350,28 @@ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, return false; } -/** - * Find the master net_device on top of the given net_device. +/* + * Find the L2 master net_device on top of the given net_device. * @dev: base IPoIB net_device * - * Returns the master net_device with a reference held, or the same net_device - * if no master exists. + * Returns the L2 master net_device with reference held if the L2 master + * exists (such as bond netdevice), or returns same netdev with reference + * held when master does not exist or when L3 master (such as VRF netdev). */ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) { struct net_device *master; rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); - if (master) - dev_hold(master); - rcu_read_unlock(); + if (!master || netif_is_l3_master(master)) + master = dev; - if (master) - return master; + dev_hold(master); + rcu_read_unlock(); - dev_hold(dev); - return dev; + return master; } struct ipoib_walk_data { @@ -346,9 +379,10 @@ struct ipoib_walk_data { struct net_device *result; }; -static int ipoib_upper_walk(struct net_device *upper, void *_data) +static int ipoib_upper_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - struct ipoib_walk_data *data = _data; + struct ipoib_walk_data *data = (struct ipoib_walk_data *)priv->data; int ret = 0; if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) { @@ -361,8 +395,9 @@ static int ipoib_upper_walk(struct net_device *upper, void *_data) } /** - * Find a net_device matching the given address, which is an upper device of - * the given net_device. + * ipoib_get_net_dev_match_addr - Find a net_device matching + * the given address, which is an upper device of the given net_device. + * * @addr: IP address to look for. * @dev: base IPoIB net_device * @@ -372,10 +407,12 @@ static int ipoib_upper_walk(struct net_device *upper, void *_data) static struct net_device *ipoib_get_net_dev_match_addr( const struct sockaddr *addr, struct net_device *dev) { + struct netdev_nested_priv priv; struct ipoib_walk_data data = { .addr = addr, }; + priv.data = (void *)&data; rcu_read_lock(); if (ipoib_is_dev_match_addr_rcu(addr, dev)) { dev_hold(dev); @@ -383,7 +420,7 @@ static struct net_device *ipoib_get_net_dev_match_addr( goto out; } - netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data); + netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &priv); out: rcu_read_unlock(); return data.result; @@ -423,17 +460,20 @@ static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, } } + if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) + return matches; + /* Check child interfaces */ - down_read_nested(&priv->vlan_rwsem, nesting); + netdev_lock(priv->dev); list_for_each_entry(child_priv, &priv->child_intfs, list) { matches += ipoib_match_gid_pkey_addr(child_priv, gid, - pkey_index, addr, - nesting + 1, - found_net_dev); + pkey_index, addr, + nesting + 1, + found_net_dev); if (matches > 1) break; } - up_read(&priv->vlan_rwsem); + netdev_unlock(priv->dev); return matches; } @@ -441,7 +481,7 @@ static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, /* Returns the number of matching net_devs found (between 0 and 2). Also * return the matching net_device in the @net_dev parameter, holding a * reference to the net_device, if the number of matches >= 1 */ -static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port, +static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u32 port, u16 pkey_index, const union ib_gid *gid, const struct sockaddr *addr, @@ -466,7 +506,7 @@ static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port, } static struct net_device *ipoib_get_net_dev_by_params( - struct ib_device *dev, u8 port, u16 pkey, + struct ib_device *dev, u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data) { @@ -483,10 +523,7 @@ static struct net_device *ipoib_get_net_dev_by_params( if (ret) return NULL; - if (!dev_list) - return NULL; - - /* See if we can find a unique device matching the L2 parameters */ + /* See if we can find a unique device matching the pkey and GID */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, NULL, &net_dev); @@ -499,7 +536,7 @@ static struct net_device *ipoib_get_net_dev_by_params( dev_put(net_dev); - /* Couldn't find a unique device with L2 parameters only. Use L3 + /* Couldn't find a unique device with pkey and GID only. Use L3 * address to uniquely match the net device */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, addr, &net_dev); @@ -509,7 +546,7 @@ static struct net_device *ipoib_get_net_dev_by_params( default: dev_warn_ratelimited(&dev->dev, "duplicate IP address detected\n"); - /* Fall through */ + fallthrough; case 1: return net_dev; } @@ -531,8 +568,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + netdev_lock_ops(dev); netdev_update_features(dev); - dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); + netif_set_mtu(dev, ipoib_cm_max_mtu(dev)); + netif_set_real_num_tx_queues(dev, 1); + netdev_unlock_ops(dev); rtnl_unlock(); priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; @@ -542,8 +582,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + netdev_lock_ops(dev); netdev_update_features(dev); - dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); + netif_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); + netif_set_real_num_tx_queues(dev, dev->num_tx_queues); + netdev_unlock_ops(dev); rtnl_unlock(); ipoib_flush_paths(dev); return (!rtnl_trylock()) ? -EBUSY : 0; @@ -613,7 +656,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) while ((skb = __skb_dequeue(&path->queue))) dev_kfree_skb_irq(skb); - ipoib_dbg(ipoib_priv(dev), "path_free\n"); + ipoib_dbg(ipoib_priv(dev), "%s\n", __func__); /* remove all neigh connected to this path */ ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); @@ -740,7 +783,7 @@ void ipoib_flush_paths(struct net_device *dev) static void path_rec_completion(int status, struct sa_path_rec *pathrec, - void *path_ptr) + unsigned int num_prs, void *path_ptr) { struct ipoib_path *path = path_ptr; struct net_device *dev = path->dev; @@ -1182,16 +1225,52 @@ unref: return NETDEV_TX_OK; } -static void ipoib_timeout(struct net_device *dev) +static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); + if (rn->tx_timeout) { + rn->tx_timeout(dev, txqueue); + return; + } ipoib_warn(priv, "transmit timeout: latency %d msecs\n", jiffies_to_msecs(jiffies - dev_trans_start(dev))); - ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", - netif_queue_stopped(dev), - priv->tx_head, priv->tx_tail); - /* XXX reset QP, etc. */ + ipoib_warn(priv, + "queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n", + netif_queue_stopped(dev), priv->tx_head, priv->tx_tail, + priv->global_tx_head, priv->global_tx_tail); + + + schedule_work(&priv->tx_timeout_work); +} + +void ipoib_ib_tx_timeout_work(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, + struct ipoib_dev_priv, + tx_timeout_work); + int err; + + rtnl_lock(); + netdev_lock_ops(priv->dev); + + if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + goto unlock; + + ipoib_stop(priv->dev); + err = ipoib_open(priv->dev); + if (err) { + ipoib_warn(priv, "ipoib_open failed recovering from a tx_timeout, err(%d).\n", + err); + goto unlock; + } + + netif_tx_wake_all_queues(priv->dev); +unlock: + netdev_unlock_ops(priv->dev); + rtnl_unlock(); + } static int ipoib_hard_header(struct sk_buff *skb, @@ -1236,10 +1315,10 @@ static int ipoib_get_iflink(const struct net_device *dev) /* parent interface */ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) - return dev->ifindex; + return READ_ONCE(dev->ifindex); /* child/vlan interface */ - return priv->parent->ifindex; + return READ_ONCE(priv->parent->ifindex); } static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) @@ -1279,7 +1358,7 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) neigh = rcu_dereference_bh(neigh->hnext)) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { /* found, take one ref on behalf of the caller */ - if (!atomic_inc_not_zero(&neigh->refcnt)) { + if (!refcount_inc_not_zero(&neigh->refcnt)) { /* deleted */ neigh = NULL; goto out_unlock; @@ -1374,7 +1453,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, INIT_LIST_HEAD(&neigh->list); ipoib_cm_set(neigh, NULL); /* one ref on behalf of the caller */ - atomic_set(&neigh->refcnt, 1); + refcount_set(&neigh->refcnt, 1); return neigh; } @@ -1406,7 +1485,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, lockdep_is_held(&priv->lock))) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { /* found, take one ref on behalf of the caller */ - if (!atomic_inc_not_zero(&neigh->refcnt)) { + if (!refcount_inc_not_zero(&neigh->refcnt)) { /* deleted */ neigh = NULL; break; @@ -1421,7 +1500,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, goto out_unlock; /* one ref on behalf of the hash table */ - atomic_inc(&neigh->refcnt); + refcount_inc(&neigh->refcnt); neigh->alive = jiffies; /* put in hash */ rcu_assign_pointer(neigh->hnext, @@ -1641,7 +1720,7 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); + ipoib_dbg(priv, "%s\n", __func__); init_completion(&priv->ntbl.deleted); cancel_delayed_work_sync(&priv->neigh_reap_task); @@ -1655,8 +1734,10 @@ static void ipoib_napi_add(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC); - netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE); + netif_napi_add_weight(dev, &priv->recv_napi, ipoib_rx_poll, + IPOIB_NUM_WC); + netif_napi_add_weight(dev, &priv->send_napi, ipoib_tx_poll, + MAX_SEND_CQE); } static void ipoib_napi_del(struct net_device *dev) @@ -1687,6 +1768,7 @@ static void ipoib_dev_uninit_default(struct net_device *dev) static int ipoib_dev_init_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + u8 addr_mod[3]; ipoib_napi_add(dev); @@ -1705,7 +1787,7 @@ static int ipoib_dev_init_default(struct net_device *dev) goto out_rx_ring_cleanup; } - /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ + /* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */ if (ipoib_transport_dev_init(dev, priv->ca)) { pr_warn("%s: ipoib_transport_dev_init failed\n", @@ -1714,9 +1796,10 @@ static int ipoib_dev_init_default(struct net_device *dev) } /* after qp created set dev address */ - priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; - priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; - priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; + addr_mod[0] = (priv->qp->qp_num >> 16) & 0xff; + addr_mod[1] = (priv->qp->qp_num >> 8) & 0xff; + addr_mod[2] = (priv->qp->qp_num) & 0xff; + dev_addr_mod(priv->dev, 1, addr_mod, sizeof(addr_mod)); return 0; @@ -1736,10 +1819,35 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, { struct ipoib_dev_priv *priv = ipoib_priv(dev); - if (!priv->rn_ops->ndo_do_ioctl) + if (!priv->rn_ops->ndo_eth_ioctl) return -EOPNOTSUPP; - return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); + return priv->rn_ops->ndo_eth_ioctl(dev, ifr, cmd); +} + +static int ipoib_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (!priv->rn_ops->ndo_hwtstamp_get) + /* legacy */ + return dev_eth_ioctl(dev, config->ifr, SIOCGHWTSTAMP); + + return priv->rn_ops->ndo_hwtstamp_get(dev, config); +} + +static int ipoib_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (!priv->rn_ops->ndo_hwtstamp_set) + /* legacy */ + return dev_eth_ioctl(dev, config->ifr, SIOCSHWTSTAMP); + + return priv->rn_ops->ndo_hwtstamp_set(dev, config, extack); } static int ipoib_dev_init(struct net_device *dev) @@ -1839,11 +1947,12 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev) static void ipoib_set_dev_features(struct ipoib_dev_priv *priv) { priv->hca_caps = priv->ca->attrs.device_cap_flags; + priv->kernel_caps = priv->ca->attrs.kernel_cap_flags; if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; - if (priv->hca_caps & IB_DEVICE_UD_TSO) + if (priv->kernel_caps & IBK_UD_TSO) priv->dev->hw_features |= NETIF_F_TSO; priv->dev->features |= priv->dev->hw_features; @@ -1862,7 +1971,7 @@ static int ipoib_parent_init(struct net_device *ndev) priv->port); return result; } - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr); result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey); if (result) { @@ -1877,8 +1986,7 @@ static int ipoib_parent_init(struct net_device *ndev) priv->ca->name, priv->port, result); return result; } - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, - sizeof(union ib_gid)); + dev_addr_mod(priv->dev, 4, priv->local_gid.raw, sizeof(union ib_gid)); SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); priv->dev->dev_port = priv->port - 1; @@ -1893,22 +2001,24 @@ static void ipoib_child_init(struct net_device *ndev) struct ipoib_dev_priv *priv = ipoib_priv(ndev); struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - dev_hold(priv->parent); - - down_write(&ppriv->vlan_rwsem); - list_add_tail(&priv->list, &ppriv->child_intfs); - up_write(&ppriv->vlan_rwsem); - priv->max_ib_mtu = ppriv->max_ib_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); + if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN)) + memcpy(&priv->local_gid, priv->dev->dev_addr + 4, + sizeof(priv->local_gid)); + else { + __dev_addr_set(priv->dev, ppriv->dev->dev_addr, + INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, + sizeof(priv->local_gid)); + } } static int ipoib_ndo_init(struct net_device *ndev) { struct ipoib_dev_priv *priv = ipoib_priv(ndev); int rc; + struct rdma_netdev *rn = netdev_priv(ndev); if (priv->parent) { ipoib_child_init(ndev); @@ -1921,6 +2031,7 @@ static int ipoib_ndo_init(struct net_device *ndev) /* MTU will be reset when mcast join happens */ ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = ndev->mtu; + rn->mtu = priv->mcast_mtu; ndev->max_mtu = IPOIB_CM_MTU; ndev->neigh_priv_len = sizeof(struct ipoib_neigh); @@ -1941,6 +2052,17 @@ static int ipoib_ndo_init(struct net_device *ndev) if (rc) { pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n", priv->ca->name, priv->dev->name, priv->port, rc); + return rc; + } + + if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + dev_hold(priv->parent); + + netdev_lock(priv->parent); + list_add_tail(&priv->list, &ppriv->child_intfs); + netdev_unlock(priv->parent); } return 0; @@ -1950,34 +2072,33 @@ static void ipoib_ndo_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - ASSERT_RTNL(); - /* * ipoib_remove_one guarantees the children are removed before the * parent, and that is the only place where a parent can be removed. */ WARN_ON(!list_empty(&priv->child_intfs)); + if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + netdev_lock(ppriv->dev); + list_del(&priv->list); + netdev_unlock(ppriv->dev); + } + ipoib_neigh_hash_uninit(dev); ipoib_ib_dev_cleanup(dev); /* no more works over the priv->wq */ if (priv->wq) { - flush_workqueue(priv->wq); + /* See ipoib_mcast_carrier_on_task() */ + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); destroy_workqueue(priv->wq); priv->wq = NULL; } - if (priv->parent) { - struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - - down_write(&ppriv->vlan_rwsem); - list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); - - dev_put(priv->parent); - } + dev_put(priv->parent); } static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) @@ -1998,6 +2119,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf, return err; ivf->vf = vf; + memcpy(ivf->mac, dev->dev_addr, dev->addr_len); return 0; } @@ -2012,6 +2134,15 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type) return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type); } +static int ipoib_get_vf_guid(struct net_device *dev, int vf, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + return ib_get_vf_guid(priv->ca, vf, priv->port, node_guid, port_guid); +} + static int ipoib_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats) { @@ -2038,10 +2169,13 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_set_vf_link_state = ipoib_set_vf_link_state, .ndo_get_vf_config = ipoib_get_vf_config, .ndo_get_vf_stats = ipoib_get_vf_stats, + .ndo_get_vf_guid = ipoib_get_vf_guid, .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, .ndo_get_stats64 = ipoib_get_stats, - .ndo_do_ioctl = ipoib_ioctl, + .ndo_eth_ioctl = ipoib_ioctl, + .ndo_hwtstamp_get = ipoib_hwtstamp_get, + .ndo_hwtstamp_set = ipoib_hwtstamp_set, }; static const struct net_device_ops ipoib_netdev_ops_vf = { @@ -2056,23 +2190,33 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_set_rx_mode = ipoib_set_mcast_list, .ndo_get_iflink = ipoib_get_iflink, .ndo_get_stats64 = ipoib_get_stats, - .ndo_do_ioctl = ipoib_ioctl, + .ndo_eth_ioctl = ipoib_ioctl, + .ndo_hwtstamp_get = ipoib_hwtstamp_get, + .ndo_hwtstamp_set = ipoib_hwtstamp_set, +}; + +static const struct net_device_ops ipoib_netdev_default_pf = { + .ndo_init = ipoib_dev_init_default, + .ndo_uninit = ipoib_dev_uninit_default, + .ndo_open = ipoib_ib_dev_open_default, + .ndo_stop = ipoib_ib_dev_stop_default, }; void ipoib_setup_common(struct net_device *dev) { dev->header_ops = &ipoib_header_ops; + dev->netdev_ops = &ipoib_netdev_default_pf; ipoib_set_ethtool_ops(dev); - dev->watchdog_timeo = HZ; + dev->watchdog_timeo = 10 * HZ; dev->flags |= IFF_BROADCAST | IFF_MULTICAST; dev->hard_header_len = IPOIB_HARD_LEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; - dev->tx_queue_len = ipoib_sendq_size * 2; + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->features = (NETIF_F_VLAN_CHALLENGED | NETIF_F_HIGHDMA); netif_keep_dst(dev); @@ -2093,7 +2237,6 @@ static void ipoib_build_priv(struct net_device *dev) priv->dev = dev; spin_lock_init(&priv->lock); - init_rwsem(&priv->vlan_rwsem); mutex_init(&priv->mcast_mutex); INIT_LIST_HEAD(&priv->path_list); @@ -2103,22 +2246,17 @@ static void ipoib_build_priv(struct net_device *dev) INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); + INIT_WORK(&priv->reschedule_napi_work, ipoib_napi_schedule_work); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); + INIT_WORK(&priv->tx_timeout_work, ipoib_ib_tx_timeout_work); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); } -static const struct net_device_ops ipoib_netdev_default_pf = { - .ndo_init = ipoib_dev_init_default, - .ndo_uninit = ipoib_dev_uninit_default, - .ndo_open = ipoib_ib_dev_open_default, - .ndo_stop = ipoib_ib_dev_stop_default, -}; - -static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port, +static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u32 port, const char *name) { struct net_device *dev; @@ -2135,7 +2273,7 @@ static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port, return dev; } -int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name, +int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name, struct net_device *dev) { struct rdma_netdev *rn = netdev_priv(dev); @@ -2155,16 +2293,23 @@ int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name, if (rc != -EOPNOTSUPP) goto out; - dev->netdev_ops = &ipoib_netdev_default_pf; rn->send = ipoib_send; rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; rn->hca = hca; + + rc = netif_set_real_num_tx_queues(dev, 1); + if (rc) + goto out; + + rc = netif_set_real_num_rx_queues(dev, 1); + if (rc) + goto out; } priv->rn_ops = dev->netdev_ops; - if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION) + if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION) dev->netdev_ops = &ipoib_netdev_ops_vf; else dev->netdev_ops = &ipoib_netdev_ops_pf; @@ -2187,7 +2332,7 @@ out: return rc; } -struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port, +struct net_device *ipoib_intf_alloc(struct ib_device *hca, u32 port, const char *name) { struct net_device *dev; @@ -2232,23 +2377,24 @@ void ipoib_intf_free(struct net_device *dev) kfree(priv); } -static ssize_t show_pkey(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t pkey_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "0x%04x\n", priv->pkey); + return sysfs_emit(buf, "0x%04x\n", priv->pkey); } -static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static DEVICE_ATTR_RO(pkey); -static ssize_t show_umcast(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t umcast_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); + return sysfs_emit(buf, "%d\n", + test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); } void ipoib_set_umcast(struct net_device *ndev, int umcast_val) @@ -2263,9 +2409,8 @@ void ipoib_set_umcast(struct net_device *ndev, int umcast_val) clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); } -static ssize_t set_umcast(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t umcast_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { unsigned long umcast_val = simple_strtoul(buf, NULL, 0); @@ -2273,7 +2418,7 @@ static ssize_t set_umcast(struct device *dev, return count; } -static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast); +static DEVICE_ATTR_RW(umcast); int ipoib_add_umcast_attr(struct net_device *dev) { @@ -2290,16 +2435,16 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) memcpy(&priv->local_gid.global.interface_id, &gid->global.interface_id, sizeof(gid->global.interface_id)); - memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); + dev_addr_mod(netdev, 4, (u8 *)&priv->local_gid, sizeof(priv->local_gid)); clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); netif_addr_unlock_bh(netdev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { - down_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(priv->dev); list_for_each_entry(child_priv, &priv->child_intfs, list) set_base_guid(child_priv, gid); - up_read(&priv->vlan_rwsem); + netdev_unlock_full_to_ops(priv->dev); } } @@ -2339,14 +2484,22 @@ static int ipoib_set_mac(struct net_device *dev, void *addr) set_base_guid(priv, (union ib_gid *)(ss->__data + 4)); + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + struct ipoib_dev_priv *cpriv; + + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + queue_work(ipoib_workqueue, &cpriv->flush_light); + netdev_unlock_full_to_ops(dev); + } queue_work(ipoib_workqueue, &priv->flush_light); return 0; } -static ssize_t create_child(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t create_child_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int pkey; int ret; @@ -2361,11 +2514,11 @@ static ssize_t create_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(create_child, S_IWUSR, NULL, create_child); +static DEVICE_ATTR_WO(create_child); -static ssize_t delete_child(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t delete_child_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int pkey; int ret; @@ -2381,7 +2534,7 @@ static ssize_t delete_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child); +static DEVICE_ATTR_WO(delete_child); int ipoib_add_pkey_attr(struct net_device *dev) { @@ -2402,23 +2555,34 @@ static ssize_t dev_id_show(struct device *dev, { struct net_device *ndev = to_net_dev(dev); - if (ndev->dev_id == ndev->dev_port) + /* + * ndev->dev_port will be equal to 0 in old kernel prior to commit + * 9b8b2a323008 ("IB/ipoib: Use dev_port to expose network interface + * port numbers") Zero was chosen as special case for user space + * applications to fallback and query dev_id to check if it has + * different value or not. + * + * Don't print warning in such scenario. + * + * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L358 + */ + if (ndev->dev_port && ndev->dev_id == ndev->dev_port) netdev_info_once(ndev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); - return sprintf(buf, "%#x\n", ndev->dev_id); + return sysfs_emit(buf, "%#x\n", ndev->dev_id); } static DEVICE_ATTR_RO(dev_id); -int ipoib_intercept_dev_id_attr(struct net_device *dev) +static int ipoib_intercept_dev_id_attr(struct net_device *dev) { device_remove_file(&dev->dev, &dev_attr_dev_id); return device_create_file(&dev->dev, &dev_attr_dev_id); } static struct net_device *ipoib_add_port(const char *format, - struct ib_device *hca, u8 port) + struct ib_device *hca, u32 port) { struct rtnl_link_ops *ops = ipoib_get_link_ops(); struct rdma_netdev_alloc_params params; @@ -2439,7 +2603,11 @@ static struct net_device *ipoib_add_port(const char *format, ib_register_event_handler(&priv->event_handler); /* call event handler to ensure pkey in sync */ - queue_work(ipoib_workqueue, &priv->flush_heavy); + ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY); + + ndev->rtnl_link_ops = ipoib_get_link_ops(); + + dev_net_set(ndev, rdma_dev_net(hca)); result = register_netdev(ndev); if (result) { @@ -2490,21 +2658,21 @@ sysfs_failed: return ERR_PTR(-ENOMEM); } -static void ipoib_add_one(struct ib_device *device) +static int ipoib_add_one(struct ib_device *device) { struct list_head *dev_list; struct net_device *dev; struct ipoib_dev_priv *priv; - int p; + unsigned int p; int count = 0; dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); if (!dev_list) - return; + return -ENOMEM; INIT_LIST_HEAD(dev_list); - for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { + rdma_for_each_port (device, p) { if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); @@ -2517,10 +2685,11 @@ static void ipoib_add_one(struct ib_device *device) if (!count) { kfree(dev_list); - return; + return -EOPNOTSUPP; } ib_set_client_data(device, &ipoib_client, dev_list); + return 0; } static void ipoib_remove_one(struct ib_device *device, void *client_data) @@ -2528,18 +2697,17 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv; struct list_head *dev_list = client_data; - if (!dev_list) - return; - list_for_each_entry_safe(priv, tmp, dev_list, list) { LIST_HEAD(head); ipoib_parent_unregister_pre(priv->dev); rtnl_lock(); + netdev_lock(priv->dev); list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) unregister_netdevice_queue(cpriv->dev, &head); + netdev_unlock(priv->dev); unregister_netdevice_queue(priv->dev, &head); unregister_netdevice_many(&head); @@ -2577,9 +2745,7 @@ static int __init ipoib_init_module(void) */ BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); - ret = ipoib_register_debugfs(); - if (ret) - return ret; + ipoib_register_debugfs(); /* * We create a global workqueue here that is used for all flush |
