diff options
Diffstat (limited to 'drivers/infiniband/ulp')
21 files changed, 377 insertions, 184 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 963e936da5e3..91f866e3fb8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -329,14 +329,6 @@ struct ipoib_dev_priv { unsigned long flags; - /* - * This protects access to the child_intfs list. - * To READ from child_intfs the RTNL or vlan_rwsem read side must be - * held. To WRITE RTNL and the vlan_rwsem write side must be held (in - * that order) This lock exists because we have a few contexts where - * we need the child_intfs, but do not want to grab the RTNL. - */ - struct rw_semaphore vlan_rwsem; struct mutex mcast_mutex; struct rb_root path_tree; @@ -399,6 +391,9 @@ struct ipoib_dev_priv { struct ib_event_handler event_handler; struct net_device *parent; + /* 'child_intfs' and 'list' membership of all child devices are + * protected by the netdev instance lock of 'dev'. + */ struct list_head child_intfs; struct list_head list; int child_type; @@ -509,12 +504,12 @@ struct net_device *ipoib_intf_alloc(struct ib_device *hca, u32 port, const char *format); int ipoib_intf_init(struct ib_device *hca, u32 port, const char *format, struct net_device *dev); -void ipoib_ib_tx_timer_func(struct timer_list *t); void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_heavy(struct work_struct *work); +void ipoib_queue_work(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level); void ipoib_ib_tx_timeout_work(struct work_struct *work); -void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open_default(struct net_device *dev); @@ -533,7 +528,6 @@ void ipoib_mcast_restart_task(struct work_struct *work); void ipoib_mcast_start_thread(struct net_device *dev); void ipoib_mcast_stop_thread(struct net_device *dev); -void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); @@ -610,7 +604,6 @@ int ipoib_set_mode(struct net_device *dev, const char *buf); void ipoib_setup_common(struct net_device *dev); -void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); void ipoib_set_ethtool_ops(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 7da94fb8d7fa..4feb7170535c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -128,16 +128,13 @@ static void ipoib_get_ethtool_stats(struct net_device *dev, static void ipoib_get_strings(struct net_device __always_unused *dev, u32 stringset, u8 *data) { - u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) { - memcpy(p, ipoib_gstrings_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) + ethtool_puts(&data, + ipoib_gstrings_stats[i].stat_string); break; default: break; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5cde275daa94..10b0dbda6cd5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -40,6 +40,7 @@ #include <linux/ip.h> #include <linux/tcp.h> +#include <net/netdev_lock.h> #include <rdma/ib_cache.h> #include "ipoib.h" @@ -781,16 +782,20 @@ static void ipoib_napi_enable(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - napi_enable(&priv->recv_napi); - napi_enable(&priv->send_napi); + netdev_lock_ops_to_full(dev); + napi_enable_locked(&priv->recv_napi); + napi_enable_locked(&priv->send_napi); + netdev_unlock_full_to_ops(dev); } static void ipoib_napi_disable(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - napi_disable(&priv->recv_napi); - napi_disable(&priv->send_napi); + netdev_lock_ops_to_full(dev); + napi_disable_locked(&priv->recv_napi); + napi_disable_locked(&priv->send_napi); + netdev_unlock_full_to_ops(dev); } int ipoib_ib_dev_stop_default(struct net_device *dev) @@ -1172,24 +1177,11 @@ out: } static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, - enum ipoib_flush_level level, - int nesting) + enum ipoib_flush_level level) { - struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; int result; - down_read_nested(&priv->vlan_rwsem, nesting); - - /* - * Flush any child interfaces too -- they might be up even if - * the parent is down. - */ - list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, level, nesting + 1); - - up_read(&priv->vlan_rwsem); - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && level != IPOIB_FLUSH_HEAVY) { /* Make sure the dev_addr is set even if not flushing */ @@ -1253,10 +1245,14 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { + netdev_lock_ops(dev); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - if (ipoib_ib_dev_open(dev)) + result = ipoib_ib_dev_open(dev); + netdev_unlock_ops(dev); + + if (result) return; if (netif_queue_stopped(dev)) @@ -1280,7 +1276,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_light); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); } void ipoib_ib_dev_flush_normal(struct work_struct *work) @@ -1288,7 +1284,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_normal); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); } void ipoib_ib_dev_flush_heavy(struct work_struct *work) @@ -1297,10 +1293,35 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) container_of(work, struct ipoib_dev_priv, flush_heavy); rtnl_lock(); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); rtnl_unlock(); } +void ipoib_queue_work(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level) +{ + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + struct ipoib_dev_priv *cpriv; + + netdev_lock(priv->dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_queue_work(cpriv, level); + netdev_unlock(priv->dev); + } + + switch (level) { + case IPOIB_FLUSH_LIGHT: + queue_work(ipoib_workqueue, &priv->flush_light); + break; + case IPOIB_FLUSH_NORMAL: + queue_work(ipoib_workqueue, &priv->flush_normal); + break; + case IPOIB_FLUSH_HEAVY: + queue_work(ipoib_workqueue, &priv->flush_heavy); + break; + } +} + void ipoib_ib_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7a5be705d718..f2f5465f2a90 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,6 +49,8 @@ #include <linux/jhash.h> #include <net/arp.h> #include <net/addrconf.h> +#include <net/netdev_lock.h> +#include <net/pkt_sched.h> #include <linux/inetdevice.h> #include <rdma/ib_cache.h> @@ -131,6 +133,52 @@ static int ipoib_netdev_event(struct notifier_block *this, } #endif +struct ipoib_ifupdown_work { + struct work_struct work; + struct net_device *dev; + netdevice_tracker dev_tracker; + bool up; +}; + +static void ipoib_ifupdown_task(struct work_struct *work) +{ + struct ipoib_ifupdown_work *pwork = + container_of(work, struct ipoib_ifupdown_work, work); + struct net_device *dev = pwork->dev; + unsigned int flags; + + rtnl_lock(); + flags = dev->flags; + if (pwork->up) + flags |= IFF_UP; + else + flags &= ~IFF_UP; + + if (dev->flags != flags) + dev_change_flags(dev, flags, NULL); + rtnl_unlock(); + netdev_put(dev, &pwork->dev_tracker); + kfree(pwork); +} + +static void ipoib_schedule_ifupdown_task(struct net_device *dev, bool up) +{ + struct ipoib_ifupdown_work *work; + + if ((up && (dev->flags & IFF_UP)) || + (!up && !(dev->flags & IFF_UP))) + return; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return; + work->dev = dev; + netdev_hold(dev, &work->dev_tracker, GFP_KERNEL); + work->up = up; + INIT_WORK(&work->work, ipoib_ifupdown_task); + queue_work(ipoib_workqueue, &work->work); +} + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -153,17 +201,10 @@ int ipoib_open(struct net_device *dev) struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ - down_read(&priv->vlan_rwsem); - list_for_each_entry(cpriv, &priv->child_intfs, list) { - int flags; - - flags = cpriv->dev->flags; - if (flags & IFF_UP) - continue; - - dev_change_flags(cpriv->dev, flags | IFF_UP, NULL); - } - up_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_schedule_ifupdown_task(cpriv->dev, true); + netdev_unlock_full_to_ops(dev); } else if (priv->parent) { struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); @@ -198,17 +239,10 @@ static int ipoib_stop(struct net_device *dev) struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ - down_read(&priv->vlan_rwsem); - list_for_each_entry(cpriv, &priv->child_intfs, list) { - int flags; - - flags = cpriv->dev->flags; - if (!(flags & IFF_UP)) - continue; - - dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL); - } - up_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_schedule_ifupdown_task(cpriv->dev, false); + netdev_unlock_full_to_ops(dev); } return 0; @@ -238,7 +272,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } @@ -265,7 +299,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) if (carrier_status) netif_carrier_on(dev); } else { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); } return ret; @@ -329,8 +363,7 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); - if (master) - dev_hold(master); + dev_hold(master); rcu_read_unlock(); if (master) @@ -426,17 +459,20 @@ static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, } } + if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) + return matches; + /* Check child interfaces */ - down_read_nested(&priv->vlan_rwsem, nesting); + netdev_lock(priv->dev); list_for_each_entry(child_priv, &priv->child_intfs, list) { matches += ipoib_match_gid_pkey_addr(child_priv, gid, - pkey_index, addr, - nesting + 1, - found_net_dev); + pkey_index, addr, + nesting + 1, + found_net_dev); if (matches > 1) break; } - up_read(&priv->vlan_rwsem); + netdev_unlock(priv->dev); return matches; } @@ -531,9 +567,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + netdev_lock_ops(dev); netdev_update_features(dev); - dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); + netif_set_mtu(dev, ipoib_cm_max_mtu(dev)); netif_set_real_num_tx_queues(dev, 1); + netdev_unlock_ops(dev); rtnl_unlock(); priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; @@ -543,9 +581,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + netdev_lock_ops(dev); netdev_update_features(dev); - dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); + netif_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); netif_set_real_num_tx_queues(dev, dev->num_tx_queues); + netdev_unlock_ops(dev); rtnl_unlock(); ipoib_flush_paths(dev); return (!rtnl_trylock()) ? -EBUSY : 0; @@ -1212,6 +1252,7 @@ void ipoib_ib_tx_timeout_work(struct work_struct *work) int err; rtnl_lock(); + netdev_lock_ops(priv->dev); if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) goto unlock; @@ -1226,6 +1267,7 @@ void ipoib_ib_tx_timeout_work(struct work_struct *work) netif_tx_wake_all_queues(priv->dev); unlock: + netdev_unlock_ops(priv->dev); rtnl_unlock(); } @@ -1272,10 +1314,10 @@ static int ipoib_get_iflink(const struct net_device *dev) /* parent interface */ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) - return dev->ifindex; + return READ_ONCE(dev->ifindex); /* child/vlan interface */ - return priv->parent->ifindex; + return READ_ONCE(priv->parent->ifindex); } static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) @@ -1992,9 +2034,9 @@ static int ipoib_ndo_init(struct net_device *ndev) dev_hold(priv->parent); - down_write(&ppriv->vlan_rwsem); + netdev_lock(priv->parent); list_add_tail(&priv->list, &ppriv->child_intfs); - up_write(&ppriv->vlan_rwsem); + netdev_unlock(priv->parent); } return 0; @@ -2004,8 +2046,6 @@ static void ipoib_ndo_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - ASSERT_RTNL(); - /* * ipoib_remove_one guarantees the children are removed before the * parent, and that is the only place where a parent can be removed. @@ -2015,9 +2055,9 @@ static void ipoib_ndo_uninit(struct net_device *dev) if (priv->parent) { struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - down_write(&ppriv->vlan_rwsem); + netdev_lock(ppriv->dev); list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); + netdev_unlock(ppriv->dev); } ipoib_neigh_hash_uninit(dev); @@ -2146,7 +2186,7 @@ void ipoib_setup_common(struct net_device *dev) dev->hard_header_len = IPOIB_HARD_LEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; - dev->tx_queue_len = ipoib_sendq_size * 2; + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->features = (NETIF_F_VLAN_CHALLENGED | NETIF_F_HIGHDMA); netif_keep_dst(dev); @@ -2167,7 +2207,6 @@ static void ipoib_build_priv(struct net_device *dev) priv->dev = dev; spin_lock_init(&priv->lock); - init_rwsem(&priv->vlan_rwsem); mutex_init(&priv->mcast_mutex); INIT_LIST_HEAD(&priv->path_list); @@ -2372,10 +2411,10 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) netif_addr_unlock_bh(netdev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { - down_read(&priv->vlan_rwsem); + netdev_lock_ops_to_full(priv->dev); list_for_each_entry(child_priv, &priv->child_intfs, list) set_base_guid(child_priv, gid); - up_read(&priv->vlan_rwsem); + netdev_unlock_full_to_ops(priv->dev); } } @@ -2415,6 +2454,14 @@ static int ipoib_set_mac(struct net_device *dev, void *addr) set_base_guid(priv, (union ib_gid *)(ss->__data + 4)); + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + struct ipoib_dev_priv *cpriv; + + netdev_lock_ops_to_full(dev); + list_for_each_entry(cpriv, &priv->child_intfs, list) + queue_work(ipoib_workqueue, &cpriv->flush_light); + netdev_unlock_full_to_ops(dev); + } queue_work(ipoib_workqueue, &priv->flush_light); return 0; @@ -2526,7 +2573,7 @@ static struct net_device *ipoib_add_port(const char *format, ib_register_event_handler(&priv->event_handler); /* call event handler to ensure pkey in sync */ - queue_work(ipoib_workqueue, &priv->flush_heavy); + ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY); ndev->rtnl_link_ops = ipoib_get_link_ops(); @@ -2624,9 +2671,11 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) rtnl_lock(); + netdev_lock(priv->dev); list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) unregister_netdevice_queue(cpriv->dev, &head); + netdev_unlock(priv->dev); unregister_netdevice_queue(priv->dev, &head); unregister_netdevice_many(&head); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 319d4288eddd..8a4ab9ff0a68 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -287,8 +287,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, ah = ipoib_create_ah(dev, priv->pd, &av); if (IS_ERR(ah)) { - ipoib_warn(priv, "ib_address_create failed %ld\n", - -PTR_ERR(ah)); + ipoib_warn(priv, "ib_address_create failed %pe\n", ah); /* use original error */ return PTR_ERR(ah); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 9ad8d9856275..53db7c8191e3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -97,10 +97,13 @@ out_err: return ret; } -static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ipoib_new_child_link(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct net *link_net = rtnl_newlink_link_net(params); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct net_device *pdev; struct ipoib_dev_priv *ppriv; u16 child_pkey; @@ -109,7 +112,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + pdev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK])); if (!pdev || pdev->type != ARPHRD_INFINIBAND) return -ENODEV; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 368e5d77416d..86983080d28b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -280,15 +280,15 @@ void ipoib_event(struct ib_event_handler *handler, dev_name(&record->device->dev), record->element.port_num); if (record->event == IB_EVENT_CLIENT_REREGISTER) { - queue_work(ipoib_workqueue, &priv->flush_light); + ipoib_queue_work(priv, IPOIB_FLUSH_LIGHT); } else if (record->event == IB_EVENT_PORT_ERR || record->event == IB_EVENT_PORT_ACTIVE || record->event == IB_EVENT_LID_CHANGE) { - queue_work(ipoib_workqueue, &priv->flush_normal); + ipoib_queue_work(priv, IPOIB_FLUSH_NORMAL); } else if (record->event == IB_EVENT_PKEY_CHANGE) { - queue_work(ipoib_workqueue, &priv->flush_heavy); + ipoib_queue_work(priv, IPOIB_FLUSH_HEAVY); } else if (record->event == IB_EVENT_GID_CHANGE && !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { - queue_work(ipoib_workqueue, &priv->flush_light); + ipoib_queue_work(priv, IPOIB_FLUSH_LIGHT); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 4bd161e86f8d..243e8f555eca 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -53,8 +53,7 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv) { struct ipoib_dev_priv *tpriv; - - ASSERT_RTNL(); + bool result = true; /* * Since the legacy sysfs interface uses pkey for deletion it cannot @@ -73,13 +72,17 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv, if (ppriv->pkey == priv->pkey) return false; + netdev_lock(ppriv->dev); list_for_each_entry(tpriv, &ppriv->child_intfs, list) { if (tpriv->pkey == priv->pkey && - tpriv->child_type == IPOIB_LEGACY_CHILD) - return false; + tpriv->child_type == IPOIB_LEGACY_CHILD) { + result = false; + break; + } } + netdev_unlock(ppriv->dev); - return true; + return result; } /* @@ -98,8 +101,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, int result; struct rdma_netdev *rn = netdev_priv(ndev); - ASSERT_RTNL(); - /* * We do not need to touch priv if register_netdevice fails, so just * always use this flow. @@ -184,8 +185,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ppriv = ipoib_priv(pdev); - snprintf(intf_name, sizeof(intf_name), "%s.%04x", - ppriv->dev->name, pkey); + /* If you increase IFNAMSIZ, update snprintf below + * to allow longer names. + */ + BUILD_BUG_ON(IFNAMSIZ != 16); + snprintf(intf_name, sizeof(intf_name), "%.10s.%04x", ppriv->dev->name, + pkey); ndev = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (IS_ERR(ndev)) { @@ -263,6 +268,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) ppriv = ipoib_priv(pdev); rc = -ENODEV; + netdev_lock(ppriv->dev); list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { if (priv->pkey == pkey && priv->child_type == IPOIB_LEGACY_CHILD) { @@ -274,9 +280,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) goto out; } - down_write(&ppriv->vlan_rwsem); list_del_init(&priv->list); - up_write(&ppriv->vlan_rwsem); work->dev = priv->dev; INIT_WORK(&work->work, ipoib_vlan_delete_task); queue_work(ipoib_workqueue, &work->work); @@ -287,6 +291,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) } out: + netdev_unlock(ppriv->dev); rtnl_unlock(); return rc; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index bb9aaff92ca3..a5be6f1ba12b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -393,10 +393,10 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) * @task: iscsi task * @sector: error sector if exsists (output) * - * Return: zero if no data-integrity errors have occured - * 0x1: data-integrity error occured in the guard-block - * 0x2: data-integrity error occured in the reference tag - * 0x3: data-integrity error occured in the application tag + * Return: zero if no data-integrity errors have occurred + * 0x1: data-integrity error occurred in the guard-block + * 0x2: data-integrity error occurred in the reference tag + * 0x3: data-integrity error occurred in the application tag * * In addition the error sector is marked. */ diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 68429a5f796d..1d7ac24c4c00 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -507,10 +507,6 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_free_rx_descriptors(struct iser_conn *iser_conn); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - enum iser_data_dir cmd_dir); - int iser_reg_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir dir, bool all_imm); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 00a7303c8cc6..42977a5326ee 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -91,9 +91,6 @@ isert_qp_event_callback(struct ib_event *e, void *context) case IB_EVENT_COMM_EST: rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); break; - case IB_EVENT_QP_LAST_WQE_REACHED: - isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n"); - break; default: break; } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c index 29b3d8fce3f5..316959940d2f 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c @@ -164,9 +164,7 @@ static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data) return; for (i = 0; i < VNIC_STATS_LEN; i++) - memcpy(data + i * ETH_GSTRING_LEN, - vnic_gstrings_stats[i].stat_string, - ETH_GSTRING_LEN); + ethtool_puts(&data, vnic_gstrings_stats[i].stat_string); } /* ethtool ops */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index d3c436ead694..4aa80c9388f0 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -133,7 +133,7 @@ static ssize_t mpath_policy_store(struct device *dev, /* distinguish "mi" and "min-latency" with length */ len = strnlen(buf, NAME_MAX); - if (buf[len - 1] == '\n') + if (len && buf[len - 1] == '\n') len--; if (!strncasecmp(buf, "round-robin", 11) || diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 88106cf5ce55..71387811b281 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -331,7 +331,7 @@ static void rtrs_clt_fast_reg_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(con->c.path, "Failed IB_WR_REG_MR: %s\n", + rtrs_err_rl(con->c.path, "Failed IB_WR_REG_MR: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } @@ -351,11 +351,11 @@ static void rtrs_clt_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(con->c.path, "Failed IB_WR_LOCAL_INV: %s\n", + rtrs_err_rl(con->c.path, "Failed IB_WR_LOCAL_INV: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } - req->need_inv = false; + req->mr->need_inval = false; if (req->need_inv_comp) complete(&req->inv_comp); else @@ -391,12 +391,13 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, clt_path = to_clt_path(con->c.path); if (req->sg_cnt) { - if (req->dir == DMA_FROM_DEVICE && req->need_inv) { + if (req->mr->need_inval) { /* - * We are here to invalidate read requests + * We are here to invalidate read/write requests * ourselves. In normal scenario server should - * send INV for all read requests, but - * we are here, thus two things could happen: + * send INV for all read requests, we do local + * invalidate for write requests ourselves, but + * we are here, thus three things could happen: * * 1. this is failover, when errno != 0 * and can_wait == 1, @@ -404,6 +405,9 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, * 2. something totally bad happened and * server forgot to send INV, so we * should do that ourselves. + * + * 3. write request finishes, we need to do local + * invalidate */ if (can_wait) { @@ -418,18 +422,10 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, refcount_inc(&req->ref); err = rtrs_inv_rkey(req); if (err) { - rtrs_err(con->c.path, "Send INV WR key=%#x: %d\n", + rtrs_err_rl(con->c.path, "Send INV WR key=%#x: %d\n", req->mr->rkey, err); } else if (can_wait) { wait_for_completion(&req->inv_comp); - } else { - /* - * Something went wrong, so request will be - * completed from INV callback. - */ - WARN_ON_ONCE(1); - - return; } if (!refcount_dec_and_test(&req->ref)) return; @@ -446,8 +442,10 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, req->con = NULL; if (errno) { - rtrs_err_rl(con->c.path, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n", - errno, kobject_name(&clt_path->kobj), clt_path->hca_name, + rtrs_err_rl(con->c.path, + "IO %s request failed: error=%d path=%s [%s:%u] notify=%d\n", + req->dir == DMA_TO_DEVICE ? "write" : "read", errno, + kobject_name(&clt_path->kobj), clt_path->hca_name, clt_path->hca_port, notify); } @@ -501,7 +499,7 @@ static void process_io_rsp(struct rtrs_clt_path *clt_path, u32 msg_id, req = &clt_path->reqs[msg_id]; /* Drop need_inv if server responded with send with invalidation */ - req->need_inv &= !w_inval; + req->mr->need_inval &= !w_inval; complete_rdma_req(req, errno, true, false); } @@ -626,6 +624,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) */ if (WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done)) return; + clt_path->s.hb_missed_cnt = 0; rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload); if (imm_type == RTRS_IO_RSP_IMM || @@ -643,7 +642,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); - clt_path->s.hb_missed_cnt = 0; clt_path->s.hb_cur_latency = ktime_sub(ktime_get(), clt_path->s.hb_last_sent); if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) @@ -670,6 +668,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) /* * Key invalidations from server side */ + clt_path->s.hb_missed_cnt = 0; WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); @@ -967,7 +966,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, req->dir = dir; req->con = rtrs_permit_to_clt_con(clt_path, permit); req->conf = conf; - req->need_inv = false; + req->mr->need_inval = false; req->need_inv_comp = false; req->inv_errno = 0; refcount_set(&req->ref, 1); @@ -1089,7 +1088,6 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) int ret, count = 0; u32 imm, buf_id; struct ib_reg_wr rwr; - struct ib_send_wr inv_wr; struct ib_send_wr *wr = NULL; bool fr_en = false; @@ -1130,13 +1128,6 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) req->sg_cnt, req->dir); return ret; } - inv_wr = (struct ib_send_wr) { - .opcode = IB_WR_LOCAL_INV, - .wr_cqe = &req->inv_cqe, - .send_flags = IB_SEND_SIGNALED, - .ex.invalidate_rkey = req->mr->rkey, - }; - req->inv_cqe.done = rtrs_clt_inv_rkey_done; rwr = (struct ib_reg_wr) { .wr.opcode = IB_WR_REG_MR, .wr.wr_cqe = &fast_reg_cqe, @@ -1146,7 +1137,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) }; wr = &rwr.wr; fr_en = true; - refcount_inc(&req->ref); + req->mr->need_inval = true; } /* * Update stats now, after request is successfully sent it is not @@ -1156,7 +1147,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, count, req->usr_len + sizeof(*msg), - imm, wr, &inv_wr); + imm, wr, NULL); if (ret) { rtrs_err_rl(s, "Write request failed: error=%d path=%s [%s:%u]\n", @@ -1164,6 +1155,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) clt_path->hca_port); if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) atomic_dec(&clt_path->stats->inflight); + if (req->mr->need_inval) { + req->mr->need_inval = false; + refcount_dec(&req->ref); + } if (req->sg_cnt) ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); @@ -1213,7 +1208,7 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) ret = rtrs_map_sg_fr(req, count); if (ret < 0) { rtrs_err_rl(s, - "Read request failed, failed to map fast reg. data, err: %d\n", + "Read request failed, failed to map fast reg. data, err: %d\n", ret); ib_dma_unmap_sg(dev->ib_dev, req->sglist, req->sg_cnt, req->dir); @@ -1237,7 +1232,7 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) msg->desc[0].len = cpu_to_le32(req->mr->length); /* Further invalidation is required */ - req->need_inv = !!RTRS_MSG_NEED_INVAL_F; + req->mr->need_inval = !!RTRS_MSG_NEED_INVAL_F; } else { msg->sg_cnt = 0; @@ -1270,7 +1265,7 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) clt_path->hca_port); if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) atomic_dec(&clt_path->stats->inflight); - req->need_inv = false; + req->mr->need_inval = false; if (req->sg_cnt) ib_dma_unmap_sg(dev->ib_dev, req->sglist, req->sg_cnt, req->dir); @@ -1494,7 +1489,9 @@ static bool rtrs_clt_change_state_get_old(struct rtrs_clt_path *clt_path, static void rtrs_clt_hb_err_handler(struct rtrs_con *c) { struct rtrs_clt_con *con = container_of(c, typeof(*con), c); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + rtrs_err(con->c.path, "HB err handler for path=%s\n", kobject_name(&clt_path->kobj)); rtrs_rdma_error_recovery(con); } @@ -2346,6 +2343,12 @@ static int init_conns(struct rtrs_clt_path *clt_path) if (err) goto destroy; } + + /* + * Set the cid to con_num - 1, since if we fail later, we want to stay in bounds. + */ + cid = clt_path->s.con_num - 1; + err = alloc_path_reqs(clt_path); if (err) goto destroy; @@ -3140,8 +3143,20 @@ close_path: return err; } +void rtrs_clt_ib_event_handler(struct ib_event_handler *handler, + struct ib_event *ibevent) +{ + pr_info("Handling event: %s (%d).\n", ib_event_msg(ibevent->event), + ibevent->event); +} + + static int rtrs_clt_ib_dev_init(struct rtrs_ib_dev *dev) { + INIT_IB_EVENT_HANDLER(&dev->event_handler, dev->ib_dev, + rtrs_clt_ib_event_handler); + ib_register_event_handler(&dev->event_handler); + if (!(dev->ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { pr_err("Memory registrations not supported.\n"); @@ -3151,8 +3166,15 @@ static int rtrs_clt_ib_dev_init(struct rtrs_ib_dev *dev) return 0; } +static void rtrs_clt_ib_dev_deinit(struct rtrs_ib_dev *dev) +{ + ib_unregister_event_handler(&dev->event_handler); +} + + static const struct rtrs_rdma_dev_pd_ops dev_pd_ops = { - .init = rtrs_clt_ib_dev_init + .init = rtrs_clt_ib_dev_init, + .deinit = rtrs_clt_ib_dev_deinit }; static int __init rtrs_client_init(void) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index f848c0392d98..0f57759b3080 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -115,7 +115,6 @@ struct rtrs_clt_io_req { struct completion inv_comp; int inv_errno; bool need_inv_comp; - bool need_inv; refcount_t ref; }; @@ -213,6 +212,8 @@ int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *path, void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt_sess *clt, int value); int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt_sess *clt); void free_path(struct rtrs_clt_path *clt_path); +void rtrs_clt_ib_event_handler(struct ib_event_handler *handler, + struct ib_event *ibevent); /* rtrs-clt-stats.c */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index ab25619261d2..ef29bd483b5a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -69,6 +69,7 @@ struct rtrs_ib_dev; struct rtrs_rdma_dev_pd_ops { int (*init)(struct rtrs_ib_dev *dev); + void (*deinit)(struct rtrs_ib_dev *dev); }; struct rtrs_rdma_dev_pd { @@ -84,6 +85,7 @@ struct rtrs_ib_dev { struct kref ref; struct list_head entry; struct rtrs_rdma_dev_pd *pool; + struct ib_event_handler event_handler; }; struct rtrs_con { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 1d33efb8fb03..ef4abdea3c2d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -26,7 +26,10 @@ MODULE_LICENSE("GPL"); #define DEFAULT_SESS_QUEUE_DEPTH 512 #define MAX_HDR_SIZE PAGE_SIZE -static struct rtrs_rdma_dev_pd dev_pd; +static const struct rtrs_rdma_dev_pd_ops dev_pd_ops; +static struct rtrs_rdma_dev_pd dev_pd = { + .ops = &dev_pd_ops +}; const struct class rtrs_dev_class = { .name = "rtrs-server", }; @@ -346,6 +349,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, struct rtrs_srv_mr *srv_mr; bool need_inval = false; enum ib_send_flags flags; + struct ib_sge list; u32 imm; int err; @@ -398,7 +402,6 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); imm_wr.wr.next = NULL; if (always_invalidate) { - struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; srv_mr = &srv_path->mrs[id->msg_id]; @@ -672,6 +675,10 @@ err: static void rtrs_srv_hb_err_handler(struct rtrs_con *c) { + struct rtrs_srv_con *con = container_of(c, typeof(*con), c); + struct rtrs_srv_path *srv_path = to_srv_path(con->c.path); + + rtrs_err(con->c.path, "HB err handler for path=%s\n", kobject_name(&srv_path->kobj)); close_path(to_srv_path(c->path)); } @@ -931,12 +938,11 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) if (err) goto close; -out: rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); return; close: + rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); close_path(srv_path); - goto out; } static int post_recv_info_req(struct rtrs_srv_con *con) @@ -987,6 +993,16 @@ static int post_recv_path(struct rtrs_srv_path *srv_path) q_size = SERVICE_CON_QUEUE_DEPTH; else q_size = srv->queue_depth; + if (srv_path->state != RTRS_SRV_CONNECTING) { + rtrs_err(s, "Path state invalid. state %s\n", + rtrs_srv_state_str(srv_path->state)); + return -EIO; + } + + if (!srv_path->s.con[cid]) { + rtrs_err(s, "Conn not set for %d\n", cid); + return -EIO; + } err = post_recv_io(to_srv_con(srv_path->s.con[cid]), q_size); if (err) { @@ -1229,6 +1245,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) */ if (WARN_ON(wc->wr_cqe != &io_comp_cqe)) return; + srv_path->s.hb_missed_cnt = 0; err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); if (err) { rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); @@ -2255,6 +2272,34 @@ static int check_module_params(void) return 0; } +void rtrs_srv_ib_event_handler(struct ib_event_handler *handler, + struct ib_event *ibevent) +{ + pr_info("Handling event: %s (%d).\n", ib_event_msg(ibevent->event), + ibevent->event); +} + +static int rtrs_srv_ib_dev_init(struct rtrs_ib_dev *dev) +{ + INIT_IB_EVENT_HANDLER(&dev->event_handler, dev->ib_dev, + rtrs_srv_ib_event_handler); + ib_register_event_handler(&dev->event_handler); + + return 0; +} + +static void rtrs_srv_ib_dev_deinit(struct rtrs_ib_dev *dev) +{ + ib_unregister_event_handler(&dev->event_handler); +} + + +static const struct rtrs_rdma_dev_pd_ops dev_pd_ops = { + .init = rtrs_srv_ib_dev_init, + .deinit = rtrs_srv_ib_dev_deinit +}; + + static int __init rtrs_server_init(void) { int err; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 5e325b82ff33..014f85681f37 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -132,6 +132,8 @@ struct rtrs_srv_ib_ctx { extern const struct class rtrs_dev_class; void close_path(struct rtrs_srv_path *srv_path); +void rtrs_srv_ib_event_handler(struct ib_event_handler *handler, + struct ib_event *ibevent); static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s, size_t size, int d) diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 4e17d546d4cc..bf38ac6f87c4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -584,6 +584,9 @@ static void dev_free(struct kref *ref) list_del(&dev->entry); mutex_unlock(&pool->mutex); + if (pool->ops && pool->ops->deinit) + pool->ops->deinit(dev); + ib_dealloc_pd(dev->ib_pd); kfree(dev); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 2916e77f589b..1378651735f6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2844,7 +2844,8 @@ static int srp_target_alloc(struct scsi_target *starget) return 0; } -static int srp_slave_configure(struct scsi_device *sdev) +static int srp_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct Scsi_Host *shost = sdev->host; struct srp_target_port *target = host_to_target(shost); @@ -3067,7 +3068,7 @@ static const struct scsi_host_template srp_template = { .name = "InfiniBand SRP initiator", .proc_name = DRV_NAME, .target_alloc = srp_target_alloc, - .slave_configure = srp_slave_configure, + .sdev_configure = srp_sdev_configure, .info = srp_target_info, .init_cmd_priv = srp_init_cmd_priv, .exit_cmd_priv = srp_exit_cmd_priv, @@ -3978,7 +3979,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u32 port) return host; put_host: - device_del(&host->dev); put_device(&host->dev); return NULL; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 58f70cfec45a..5dfb4644446b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -68,6 +68,8 @@ MODULE_LICENSE("Dual BSD/GPL"); static u64 srpt_service_guid; static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */ static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */ +static DEFINE_MUTEX(srpt_mc_mutex); /* Protects srpt_memory_caches. */ +static DEFINE_XARRAY(srpt_memory_caches); /* See also srpt_memory_cache_entry */ static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE; module_param(srp_max_req_size, int, 0444); @@ -79,12 +81,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); @@ -101,6 +107,63 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_process_wait_list(struct srpt_rdma_ch *ch); +/* Type of the entries in srpt_memory_caches. */ +struct srpt_memory_cache_entry { + refcount_t ref; + struct kmem_cache *c; +}; + +static struct kmem_cache *srpt_cache_get(unsigned int object_size) +{ + struct srpt_memory_cache_entry *e; + char name[32]; + void *res; + + guard(mutex)(&srpt_mc_mutex); + e = xa_load(&srpt_memory_caches, object_size); + if (e) { + refcount_inc(&e->ref); + return e->c; + } + snprintf(name, sizeof(name), "srpt-%u", object_size); + e = kmalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return NULL; + refcount_set(&e->ref, 1); + e->c = kmem_cache_create(name, object_size, /*align=*/512, 0, NULL); + if (!e->c) + goto free_entry; + res = xa_store(&srpt_memory_caches, object_size, e, GFP_KERNEL); + if (xa_is_err(res)) + goto destroy_cache; + return e->c; + +destroy_cache: + kmem_cache_destroy(e->c); + +free_entry: + kfree(e); + return NULL; +} + +static void srpt_cache_put(struct kmem_cache *c) +{ + struct srpt_memory_cache_entry *e = NULL; + unsigned long object_size; + + guard(mutex)(&srpt_mc_mutex); + xa_for_each(&srpt_memory_caches, object_size, e) + if (e->c == c) + break; + if (WARN_ON_ONCE(!e)) + return; + if (!refcount_dec_and_test(&e->ref)) + return; + WARN_ON_ONCE(xa_erase(&srpt_memory_caches, object_size) != e); + kmem_cache_destroy(e->c); + kfree(e); +} + /* * The only allowed channel state changes are those that change the channel * state into a state with a higher numerical value. Hence the new > prev test. @@ -210,10 +273,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1807,8 +1872,7 @@ retry: ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; @@ -2114,13 +2178,13 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_buf_cache, DMA_TO_DEVICE); - kmem_cache_destroy(ch->rsp_buf_cache); + srpt_cache_put(ch->rsp_buf_cache); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, sdev, ch->rq_size, ch->req_buf_cache, DMA_FROM_DEVICE); - kmem_cache_destroy(ch->req_buf_cache); + srpt_cache_put(ch->req_buf_cache); kref_put(&ch->kref, srpt_free_ch); } @@ -2240,8 +2304,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, INIT_LIST_HEAD(&ch->cmd_wait_list); ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size; - ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size, - 512, 0, NULL); + ch->rsp_buf_cache = srpt_cache_get(ch->max_rsp_size); if (!ch->rsp_buf_cache) goto free_ch; @@ -2275,8 +2338,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, alignment_offset = round_up(imm_data_offset, 512) - imm_data_offset; req_sz = alignment_offset + imm_data_offset + srp_max_req_size; - ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz, - 512, 0, NULL); + ch->req_buf_cache = srpt_cache_get(req_sz); if (!ch->req_buf_cache) goto free_rsp_ring; @@ -2473,7 +2535,7 @@ free_recv_ring: ch->req_buf_cache, DMA_FROM_DEVICE); free_recv_cache: - kmem_cache_destroy(ch->req_buf_cache); + srpt_cache_put(ch->req_buf_cache); free_rsp_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, @@ -2481,7 +2543,7 @@ free_rsp_ring: ch->rsp_buf_cache, DMA_TO_DEVICE); free_rsp_cache: - kmem_cache_destroy(ch->rsp_buf_cache); + srpt_cache_put(ch->rsp_buf_cache); free_ch: if (rdma_cm_id) @@ -3050,7 +3112,7 @@ static void srpt_free_srq(struct srpt_device *sdev) srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, sdev->srq_size, sdev->req_buf_cache, DMA_FROM_DEVICE); - kmem_cache_destroy(sdev->req_buf_cache); + srpt_cache_put(sdev->req_buf_cache); sdev->srq = NULL; } @@ -3077,8 +3139,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev) pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, sdev->device->attrs.max_srq_wr, dev_name(&device->dev)); - sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf", - srp_max_req_size, 0, 0, NULL); + sdev->req_buf_cache = srpt_cache_get(srp_max_req_size); if (!sdev->req_buf_cache) goto free_srq; @@ -3100,7 +3161,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev) return 0; free_cache: - kmem_cache_destroy(sdev->req_buf_cache); + srpt_cache_put(sdev->req_buf_cache); free_srq: ib_destroy_srq(srq); @@ -3204,7 +3265,6 @@ static int srpt_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, srpt_event_handler); - ib_register_event_handler(&sdev->event_handler); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; @@ -3227,6 +3287,7 @@ static int srpt_add_one(struct ib_device *device) } } + ib_register_event_handler(&sdev->event_handler); spin_lock(&srpt_dev_lock); list_add_tail(&sdev->list, &srpt_dev_list); spin_unlock(&srpt_dev_lock); @@ -3237,7 +3298,6 @@ static int srpt_add_one(struct ib_device *device) err_port: srpt_unregister_mad_agent(sdev, i); - ib_unregister_event_handler(&sdev->event_handler); err_cm: if (sdev->cm_id) ib_destroy_cm_id(sdev->cm_id); |