summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core/device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/device.c')
-rw-r--r--drivers/infiniband/core/device.c298
1 files changed, 257 insertions, 41 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 07cb6c5ffda0..d4263385850a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -209,23 +209,6 @@ static void __ibdev_printk(const char *level, const struct ib_device *ibdev,
printk("%s(NULL ib_device): %pV", level, vaf);
}
-void ibdev_printk(const char *level, const struct ib_device *ibdev,
- const char *format, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, format);
-
- vaf.fmt = format;
- vaf.va = &args;
-
- __ibdev_printk(level, ibdev, &vaf);
-
- va_end(args);
-}
-EXPORT_SYMBOL(ibdev_printk);
-
#define define_ibdev_printk_level(func, level) \
void func(const struct ib_device *ibdev, const char *fmt, ...) \
{ \
@@ -437,6 +420,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
client->rename(ibdev, client_data);
}
up_read(&ibdev->client_data_rwsem);
+ rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT);
up_read(&devices_rwsem);
return 0;
}
@@ -503,6 +487,7 @@ static void ib_device_release(struct device *device)
rcu_head);
}
+ mutex_destroy(&dev->subdev_lock);
mutex_destroy(&dev->unregistration_lock);
mutex_destroy(&dev->compat_devs_mutex);
@@ -543,6 +528,8 @@ static struct class ib_class = {
static void rdma_init_coredev(struct ib_core_device *coredev,
struct ib_device *dev, struct net *net)
{
+ bool is_full_dev = &dev->coredev == coredev;
+
/* This BUILD_BUG_ON is intended to catch layout change
* of union of ib_core_device and device.
* dev must be the first element as ib_core and providers
@@ -554,6 +541,13 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
coredev->dev.class = &ib_class;
coredev->dev.groups = dev->groups;
+
+ /*
+ * Don't expose hw counters outside of the init namespace.
+ */
+ if (!is_full_dev && dev->hw_stats_attr_index)
+ coredev->dev.groups[dev->hw_stats_attr_index] = NULL;
+
device_initialize(&coredev->dev);
coredev->owner = dev;
INIT_LIST_HEAD(&coredev->port_list);
@@ -641,6 +635,11 @@ struct ib_device *_ib_alloc_device(size_t size)
BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
+
+ mutex_init(&device->subdev_lock);
+ INIT_LIST_HEAD(&device->subdev_list_head);
+ INIT_LIST_HEAD(&device->subdev_list);
+
return device;
}
EXPORT_SYMBOL(_ib_alloc_device);
@@ -1345,6 +1344,37 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
{
}
+static void ib_device_notify_register(struct ib_device *device)
+{
+ struct net_device *netdev;
+ u32 port;
+ int ret;
+
+ down_read(&devices_rwsem);
+
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
+ ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
+ if (ret)
+ goto out;
+
+ rdma_for_each_port(device, port) {
+ netdev = ib_device_get_netdev(device, port);
+ if (!netdev)
+ continue;
+
+ ret = rdma_nl_notify_event(device, port,
+ RDMA_NETDEV_ATTACH_EVENT);
+ dev_put(netdev);
+ if (ret)
+ goto out;
+ }
+
+out:
+ up_read(&devices_rwsem);
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device: Device to register
@@ -1441,8 +1471,9 @@ int ib_register_device(struct ib_device *device, const char *name,
return ret;
}
dev_set_uevent_suppress(&device->dev, false);
- /* Mark for userspace that device is ready */
- kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
+ ib_device_notify_register(device);
+
ib_device_put(device);
return 0;
@@ -1461,6 +1492,18 @@ EXPORT_SYMBOL(ib_register_device);
/* Callers must hold a get on the device. */
static void __ib_unregister_device(struct ib_device *ib_dev)
{
+ struct ib_device *sub, *tmp;
+
+ mutex_lock(&ib_dev->subdev_lock);
+ list_for_each_entry_safe_reverse(sub, tmp,
+ &ib_dev->subdev_list_head,
+ subdev_list) {
+ list_del(&sub->subdev_list);
+ ib_dev->ops.del_sub_dev(sub);
+ ib_device_put(ib_dev);
+ }
+ mutex_unlock(&ib_dev->subdev_lock);
+
/*
* We have a registration lock so that all the calls to unregister are
* fully fenced, once any unregister returns the device is truely
@@ -1473,6 +1516,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
goto out;
disable_device(ib_dev);
+ rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
/* Expedite removing unregistered pointers from the hash table */
free_netdevs(ib_dev);
@@ -2141,11 +2185,15 @@ static void add_ndev_hash(struct ib_port_data *pdata)
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
u32 port)
{
+ enum rdma_nl_notify_event_type etype;
struct net_device *old_ndev;
struct ib_port_data *pdata;
unsigned long flags;
int ret;
+ if (!rdma_is_port_valid(ib_dev, port))
+ return -EINVAL;
+
/*
* Drivers wish to call this before ib_register_driver, so we have to
* setup the port data early.
@@ -2154,9 +2202,6 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
if (ret)
return ret;
- if (!rdma_is_port_valid(ib_dev, port))
- return -EINVAL;
-
pdata = &ib_dev->port_data[port];
spin_lock_irqsave(&pdata->netdev_lock, flags);
old_ndev = rcu_dereference_protected(
@@ -2166,16 +2211,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
return 0;
}
- if (old_ndev)
- netdev_tracker_free(ndev, &pdata->netdev_tracker);
- if (ndev)
- netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
rcu_assign_pointer(pdata->netdev, ndev);
+ netdev_put(old_ndev, &pdata->netdev_tracker);
+ netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
add_ndev_hash(pdata);
- if (old_ndev)
- __dev_put(old_ndev);
+
+ /* Make sure that the device is registered before we send events */
+ if (xa_load(&devices, ib_dev->index) != ib_dev)
+ return 0;
+
+ etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
+ rdma_nl_notify_event(ib_dev, port, etype);
return 0;
}
@@ -2223,6 +2271,9 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
if (!rdma_is_port_valid(ib_dev, port))
return NULL;
+ if (!ib_dev->port_data)
+ return NULL;
+
pdata = &ib_dev->port_data[port];
/*
@@ -2235,22 +2286,40 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
spin_lock(&pdata->netdev_lock);
res = rcu_dereference_protected(
pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
- if (res)
- dev_hold(res);
+ dev_hold(res);
spin_unlock(&pdata->netdev_lock);
}
- /*
- * If we are starting to unregister expedite things by preventing
- * propagation of an unregistering netdev.
- */
- if (res && res->reg_state != NETREG_REGISTERED) {
- dev_put(res);
- return NULL;
+ return res;
+}
+EXPORT_SYMBOL(ib_device_get_netdev);
+
+/**
+ * ib_query_netdev_port - Query the port number of a net_device
+ * associated with an ibdev
+ * @ibdev: IB device
+ * @ndev: Network device
+ * @port: IB port the net_device is connected to
+ */
+int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev,
+ u32 *port)
+{
+ struct net_device *ib_ndev;
+ u32 port_num;
+
+ rdma_for_each_port(ibdev, port_num) {
+ ib_ndev = ib_device_get_netdev(ibdev, port_num);
+ if (ndev == ib_ndev) {
+ *port = port_num;
+ dev_put(ib_ndev);
+ return 0;
+ }
+ dev_put(ib_ndev);
}
- return res;
+ return -ENOENT;
}
+EXPORT_SYMBOL(ib_query_netdev_port);
/**
* ib_device_get_by_netdev - Find an IB device associated with a netdev
@@ -2311,9 +2380,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
if (filter(ib_dev, port, idev, filter_cookie))
cb(ib_dev, port, idev, cookie);
-
- if (idev)
- dev_put(idev);
+ dev_put(idev);
}
}
@@ -2601,6 +2668,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
ops->uverbs_no_driver_id_binding;
SET_DEVICE_OP(dev_ops, add_gid);
+ SET_DEVICE_OP(dev_ops, add_sub_dev);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
@@ -2617,6 +2685,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, counter_alloc_stats);
SET_DEVICE_OP(dev_ops, counter_bind_qp);
SET_DEVICE_OP(dev_ops, counter_dealloc);
+ SET_DEVICE_OP(dev_ops, counter_init);
SET_DEVICE_OP(dev_ops, counter_unbind_qp);
SET_DEVICE_OP(dev_ops, counter_update_stats);
SET_DEVICE_OP(dev_ops, create_ah);
@@ -2635,6 +2704,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, dealloc_ucontext);
SET_DEVICE_OP(dev_ops, dealloc_xrcd);
SET_DEVICE_OP(dev_ops, del_gid);
+ SET_DEVICE_OP(dev_ops, del_sub_dev);
SET_DEVICE_OP(dev_ops, dereg_mr);
SET_DEVICE_OP(dev_ops, destroy_ah);
SET_DEVICE_OP(dev_ops, destroy_counters);
@@ -2717,6 +2787,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, resize_cq);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
+ SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
+ SET_DEVICE_OP(dev_ops, report_port_event);
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
@@ -2728,9 +2800,59 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
SET_OBJ_SIZE(dev_ops, ib_xrcd);
+ SET_OBJ_SIZE(dev_ops, rdma_counter);
}
EXPORT_SYMBOL(ib_set_device_ops);
+int ib_add_sub_device(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name)
+{
+ struct ib_device *sub;
+ int ret = 0;
+
+ if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev)
+ return -EOPNOTSUPP;
+
+ if (!ib_device_try_get(parent))
+ return -EINVAL;
+
+ sub = parent->ops.add_sub_dev(parent, type, name);
+ if (IS_ERR(sub)) {
+ ib_device_put(parent);
+ return PTR_ERR(sub);
+ }
+
+ sub->type = type;
+ sub->parent = parent;
+
+ mutex_lock(&parent->subdev_lock);
+ list_add_tail(&parent->subdev_list_head, &sub->subdev_list);
+ mutex_unlock(&parent->subdev_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_add_sub_device);
+
+int ib_del_sub_device_and_put(struct ib_device *sub)
+{
+ struct ib_device *parent = sub->parent;
+
+ if (!parent)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&parent->subdev_lock);
+ list_del(&sub->subdev_list);
+ mutex_unlock(&parent->subdev_lock);
+
+ ib_device_put(sub);
+ parent->ops.del_sub_dev(sub);
+ ib_device_put(parent);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_del_sub_device_and_put);
+
#ifdef CONFIG_INFINIBAND_VIRT_DMA
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
{
@@ -2761,6 +2883,97 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
},
};
+void ib_dispatch_port_state_event(struct ib_device *ibdev, struct net_device *ndev)
+{
+ enum ib_port_state curr_state;
+ struct ib_event ibevent = {};
+ u32 port;
+
+ if (ib_query_netdev_port(ibdev, ndev, &port))
+ return;
+
+ curr_state = ib_get_curr_port_state(ndev);
+
+ write_lock_irq(&ibdev->cache_lock);
+ if (ibdev->port_data[port].cache.last_port_state == curr_state) {
+ write_unlock_irq(&ibdev->cache_lock);
+ return;
+ }
+ ibdev->port_data[port].cache.last_port_state = curr_state;
+ write_unlock_irq(&ibdev->cache_lock);
+
+ ibevent.event = (curr_state == IB_PORT_DOWN) ?
+ IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE;
+ ibevent.device = ibdev;
+ ibevent.element.port_num = port;
+ ib_dispatch_event(&ibevent);
+}
+EXPORT_SYMBOL(ib_dispatch_port_state_event);
+
+static void handle_port_event(struct net_device *ndev, unsigned long event)
+{
+ struct ib_device *ibdev;
+
+ /* Currently, link events in bonding scenarios are still
+ * reported by drivers that support bonding.
+ */
+ if (netif_is_lag_master(ndev) || netif_is_lag_port(ndev))
+ return;
+
+ ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
+ if (!ibdev)
+ return;
+
+ if (ibdev->ops.report_port_event) {
+ ibdev->ops.report_port_event(ibdev, ndev, event);
+ goto put_ibdev;
+ }
+
+ ib_dispatch_port_state_event(ibdev, ndev);
+
+put_ibdev:
+ ib_device_put(ibdev);
+};
+
+static int ib_netdevice_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct ib_device *ibdev;
+ u32 port;
+
+ switch (event) {
+ case NETDEV_CHANGENAME:
+ ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ if (ib_query_netdev_port(ibdev, ndev, &port)) {
+ ib_device_put(ibdev);
+ break;
+ }
+
+ rdma_nl_notify_event(ibdev, port, RDMA_NETDEV_RENAME_EVENT);
+ ib_device_put(ibdev);
+ break;
+
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ case NETDEV_DOWN:
+ handle_port_event(ndev, event);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nb_netdevice = {
+ .notifier_call = ib_netdevice_event,
+};
+
static int __init ib_core_init(void)
{
int ret = -ENOMEM;
@@ -2832,6 +3045,8 @@ static int __init ib_core_init(void)
goto err_parent;
}
+ register_netdevice_notifier(&nb_netdevice);
+
return 0;
err_parent:
@@ -2861,6 +3076,7 @@ err:
static void __exit ib_core_cleanup(void)
{
+ unregister_netdevice_notifier(&nb_netdevice);
roce_gid_mgmt_cleanup();
rdma_nl_unregister(RDMA_NL_LS);
nldev_exit();