diff options
Diffstat (limited to 'drivers/infiniband/core/device.c')
| -rw-r--r-- | drivers/infiniband/core/device.c | 716 |
1 files changed, 525 insertions, 191 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 905a2beaf885..13e8a1714bbd 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); +static struct workqueue_struct *ib_unreg_wq; /* * Each of the three rwsem locks (devices, clients, client_data) protects the @@ -144,6 +145,33 @@ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) } EXPORT_SYMBOL(rdma_dev_access_netns); +/** + * rdma_dev_has_raw_cap() - Returns whether a specified rdma device has + * CAP_NET_RAW capability or not. + * + * @dev: Pointer to rdma device whose capability to be checked + * + * Returns true if a rdma device's owning user namespace has CAP_NET_RAW + * capability, otherwise false. When rdma subsystem is in legacy shared network, + * namespace mode, the default net namespace is considered. + */ +bool rdma_dev_has_raw_cap(const struct ib_device *dev) +{ + const struct net *net; + + /* Network namespace is the resource whose user namespace + * to be considered. When in shared mode, there is no reliable + * network namespace resource, so consider the default net namespace. + */ + if (ib_devices_shared_netns) + net = &init_net; + else + net = read_pnet(&dev->coredev.rdma_net); + + return ns_capable(net->user_ns, CAP_NET_RAW); +} +EXPORT_SYMBOL(rdma_dev_has_raw_cap); + /* * xarray has this behavior where it won't iterate over NULL values stored in * allocated arrays. So we need our own iterator to see all values stored in @@ -208,23 +236,6 @@ static void __ibdev_printk(const char *level, const struct ib_device *ibdev, printk("%s(NULL ib_device): %pV", level, vaf); } -void ibdev_printk(const char *level, const struct ib_device *ibdev, - const char *format, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, format); - - vaf.fmt = format; - vaf.va = &args; - - __ibdev_printk(level, ibdev, &vaf); - - va_end(args); -} -EXPORT_SYMBOL(ibdev_printk); - #define define_ibdev_printk_level(func, level) \ void func(const struct ib_device *ibdev, const char *fmt, ...) \ { \ @@ -272,7 +283,6 @@ static void ib_device_check_mandatory(struct ib_device *device) } mandatory_table[] = { IB_MANDATORY_FUNC(query_device), IB_MANDATORY_FUNC(query_port), - IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), IB_MANDATORY_FUNC(create_qp), @@ -285,6 +295,7 @@ static void ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(reg_user_mr), IB_MANDATORY_FUNC(dereg_mr), IB_MANDATORY_FUNC(get_port_immutable) }; @@ -421,7 +432,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) return ret; } - strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX); ret = rename_compat_devs(ibdev); downgrade_write(&devices_rwsem); @@ -436,6 +447,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) client->rename(ibdev, client_data); } up_read(&ibdev->client_data_rwsem); + rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT); up_read(&devices_rwsem); return 0; } @@ -491,6 +503,8 @@ static void ib_device_release(struct device *device) free_netdevs(dev); WARN_ON(refcount_read(&dev->refcount)); + if (dev->hw_stats_data) + ib_device_release_hw_stats(dev->hw_stats_data); if (dev->port_data) { ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); @@ -500,6 +514,7 @@ static void ib_device_release(struct device *device) rcu_head); } + mutex_destroy(&dev->subdev_lock); mutex_destroy(&dev->unregistration_lock); mutex_destroy(&dev->compat_devs_mutex); @@ -508,7 +523,7 @@ static void ib_device_release(struct device *device) kfree_rcu(dev, rcu_head); } -static int ib_device_uevent(struct device *device, +static int ib_device_uevent(const struct device *device, struct kobj_uevent_env *env) { if (add_uevent_var(env, "NAME=%s", dev_name(device))) @@ -521,9 +536,9 @@ static int ib_device_uevent(struct device *device, return 0; } -static const void *net_namespace(struct device *d) +static const void *net_namespace(const struct device *d) { - struct ib_core_device *coredev = + const struct ib_core_device *coredev = container_of(d, struct ib_core_device, dev); return read_pnet(&coredev->rdma_net); @@ -540,6 +555,8 @@ static struct class ib_class = { static void rdma_init_coredev(struct ib_core_device *coredev, struct ib_device *dev, struct net *net) { + bool is_full_dev = &dev->coredev == coredev; + /* This BUILD_BUG_ON is intended to catch layout change * of union of ib_core_device and device. * dev must be the first element as ib_core and providers @@ -551,6 +568,13 @@ static void rdma_init_coredev(struct ib_core_device *coredev, coredev->dev.class = &ib_class; coredev->dev.groups = dev->groups; + + /* + * Don't expose hw counters outside of the init namespace. + */ + if (!is_full_dev && dev->hw_stats_attr_index) + coredev->dev.groups[dev->hw_stats_attr_index] = NULL; + device_initialize(&coredev->dev); coredev->owner = dev; INIT_LIST_HEAD(&coredev->port_list); @@ -560,6 +584,8 @@ static void rdma_init_coredev(struct ib_core_device *coredev, /** * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate + * @net: network namespace device should be located in, namespace + * must stay valid until ib_register_device() is completed. * * Low-level drivers should use ib_alloc_device() to allocate &struct * ib_device. @size is the size of the structure to be allocated, @@ -567,9 +593,10 @@ static void rdma_init_coredev(struct ib_core_device *coredev, * ib_dealloc_device() must be used to free structures allocated with * ib_alloc_device(). */ -struct ib_device *_ib_alloc_device(size_t size) +struct ib_device *_ib_alloc_device(size_t size, struct net *net) { struct ib_device *device; + unsigned int i; if (WARN_ON(size < sizeof(struct ib_device))) return NULL; @@ -583,8 +610,15 @@ struct ib_device *_ib_alloc_device(size_t size) return NULL; } - device->groups[0] = &ib_dev_attr_group; - rdma_init_coredev(&device->coredev, device, &init_net); + /* ib_devices_shared_netns can't change while we have active namespaces + * in the system which means either init_net is passed or the user has + * no idea what they are doing. + * + * To avoid breaking backward compatibility, when in shared mode, + * force to init the device in the init_net. + */ + net = ib_devices_shared_netns ? &init_net : net; + rdma_init_coredev(&device->coredev, device, net); INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->qp_open_list_lock); @@ -601,6 +635,48 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + spin_lock_init(&device->cq_pools_lock); + for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++) + INIT_LIST_HEAD(&device->cq_pools[i]); + + rwlock_init(&device->cache_lock); + + device->uverbs_cmd_mask = + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); + + mutex_init(&device->subdev_lock); + INIT_LIST_HEAD(&device->subdev_list_head); + INIT_LIST_HEAD(&device->subdev_list); + return device; } EXPORT_SYMBOL(_ib_alloc_device); @@ -743,7 +819,7 @@ static void remove_client_context(struct ib_device *device, static int alloc_port_data(struct ib_device *device) { struct ib_port_data_rcu *pdata_rcu; - unsigned int port; + u32 port; if (device->port_data) return 0; @@ -752,6 +828,10 @@ static int alloc_port_data(struct ib_device *device) if (WARN_ON(!device->phys_port_cnt)) return -EINVAL; + /* Reserve U32_MAX so the logic to go over all the ports is sane */ + if (WARN_ON(device->phys_port_cnt == U32_MAX)) + return -EINVAL; + /* * device->port_data is indexed directly by the port number to make * access to this data as efficient as possible. @@ -760,7 +840,7 @@ static int alloc_port_data(struct ib_device *device) * empty slots at the beginning. */ pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata, - rdma_end_port(device) + 1), + size_add(rdma_end_port(device), 1)), GFP_KERNEL); if (!pdata_rcu) return -ENOMEM; @@ -783,7 +863,7 @@ static int alloc_port_data(struct ib_device *device) return 0; } -static int verify_immutable(const struct ib_device *dev, u8 port) +static int verify_immutable(const struct ib_device *dev, u32 port) { return WARN_ON(!rdma_cap_ib_mad(dev, port) && rdma_max_mad_size(dev, port) != 0); @@ -791,7 +871,7 @@ static int verify_immutable(const struct ib_device *dev, u8 port) static int setup_port_data(struct ib_device *device) { - unsigned int port; + u32 port; int ret; ret = alloc_port_data(device); @@ -812,6 +892,20 @@ static int setup_port_data(struct ib_device *device) return 0; } +/** + * ib_port_immutable_read() - Read rdma port's immutable data + * @dev: IB device + * @port: port number whose immutable data to read. It starts with index 1 and + * valid upto including rdma_end_port(). + */ +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port) +{ + WARN_ON(!rdma_is_port_valid(dev, port)); + return &dev->port_data[port].immutable; +} +EXPORT_SYMBOL(ib_port_immutable_read); + void ib_get_device_fw_str(struct ib_device *dev, char *str) { if (dev->ops.get_dev_fw_str) @@ -832,15 +926,8 @@ static void ib_policy_change_task(struct work_struct *work) rdma_for_each_port (dev, i) { u64 sp; - int ret = ib_get_cached_subnet_prefix(dev, - i, - &sp); - - WARN_ONCE(ret, - "ib_get_cached_subnet_prefix err: %d, this should never happen here\n", - ret); - if (!ret) - ib_security_cache_change(dev, i, sp); + ib_get_cached_subnet_prefix(dev, i, &sp); + ib_security_cache_change(dev, i, sp); } } up_read(&devices_rwsem); @@ -1166,7 +1253,7 @@ static int assign_name(struct ib_device *device, const char *name) ret = -ENFILE; goto out; } - strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); + strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, &last_id, GFP_KERNEL); @@ -1178,56 +1265,6 @@ out: return ret; } -static void setup_dma_device(struct ib_device *device) -{ - struct device *parent = device->dev.parent; - - WARN_ON_ONCE(device->dma_device); - if (device->dev.dma_ops) { - /* - * The caller provided custom DMA operations. Copy the - * DMA-related fields that are used by e.g. dma_alloc_coherent() - * into device->dev. - */ - device->dma_device = &device->dev; - if (!device->dev.dma_mask) { - if (parent) - device->dev.dma_mask = parent->dma_mask; - else - WARN_ON_ONCE(true); - } - if (!device->dev.coherent_dma_mask) { - if (parent) - device->dev.coherent_dma_mask = - parent->coherent_dma_mask; - else - WARN_ON_ONCE(true); - } - } else { - /* - * The caller did not provide custom DMA operations. Use the - * DMA mapping operations of the parent device. - */ - WARN_ON_ONCE(!parent); - device->dma_device = parent; - } - - if (!device->dev.dma_parms) { - if (parent) { - /* - * The caller did not provide DMA parameters, so - * 'parent' probably represents a PCI device. The PCI - * core sets the maximum segment size to 64 - * KB. Increase this parameter to 2 GB. - */ - device->dev.dma_parms = parent->dma_parms; - dma_set_max_seg_size(device->dma_device, SZ_2G); - } else { - WARN_ON_ONCE(true); - } - } -} - /* * setup_device() allocates memory and sets up data that requires calling the * device ops, this is the only reason these actions are not done during @@ -1238,7 +1275,6 @@ static int setup_device(struct ib_device *device) struct ib_udata uhw = {.outlen = 0, .inlen = 0}; int ret; - setup_dma_device(device); ib_device_check_mandatory(device); ret = setup_port_data(device); @@ -1282,6 +1318,8 @@ static void disable_device(struct ib_device *device) remove_client_context(device, cid); } + ib_cq_pool_cleanup(device); + /* Pairs with refcount_set in enable_device */ ib_device_put(device); wait_for_completion(&device->unreg_completion); @@ -1339,11 +1377,49 @@ out: return ret; } +static void prevent_dealloc_device(struct ib_device *ib_dev) +{ +} + +static void ib_device_notify_register(struct ib_device *device) +{ + struct net_device *netdev; + u32 port; + int ret; + + down_read(&devices_rwsem); + + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); + if (ret) + goto out; + + rdma_for_each_port(device, port) { + netdev = ib_device_get_netdev(device, port); + if (!netdev) + continue; + + ret = rdma_nl_notify_event(device, port, + RDMA_NETDEV_ATTACH_EVENT); + dev_put(netdev); + if (ret) + goto out; + } + +out: + up_read(&devices_rwsem); +} + /** * ib_register_device - Register an IB device with IB core * @device: Device to register * @name: unique string device name. This may include a '%' which will - * cause a unique index to be added to the passed device name. + * cause a unique index to be added to the passed device name. + * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB + * device will be used. In this case the caller should fully + * setup the ibdev for DMA. This usually means using dma_virt_ops. * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a @@ -1354,7 +1430,8 @@ out: * asynchronously then the device pointer may become freed as soon as this * function returns. */ -int ib_register_device(struct ib_device *device, const char *name) +int ib_register_device(struct ib_device *device, const char *name, + struct device *dma_device) { int ret; @@ -1362,6 +1439,14 @@ int ib_register_device(struct ib_device *device, const char *name) if (ret) return ret; + /* + * If the caller does not provide a DMA capable device then the IB core + * will set up ib_sge and scatterlist structures that stash the kernel + * virtual address into the address field. + */ + WARN_ON(dma_device && !dma_device->dma_parms); + device->dma_device = dma_device; + ret = setup_device(device); if (ret) return ret; @@ -1373,6 +1458,12 @@ int ib_register_device(struct ib_device *device, const char *name) return ret; } + device->groups[0] = &ib_dev_attr_group; + device->groups[1] = device->ops.device_group; + ret = ib_setup_device_attrs(device); + if (ret) + goto cache_cleanup; + ib_device_register_rdmacg(device); rdma_counter_init(device); @@ -1386,18 +1477,14 @@ int ib_register_device(struct ib_device *device, const char *name) if (ret) goto cg_cleanup; - ret = ib_device_register_sysfs(device); + ret = ib_setup_port_attrs(&device->coredev); if (ret) { dev_warn(&device->dev, "Couldn't register device with driver model\n"); goto dev_cleanup; } - ib_cq_pool_init(device); ret = enable_device_and_get(device); - dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -1409,16 +1496,21 @@ int ib_register_device(struct ib_device *device, const char *name) * possibility for a parallel unregistration along with this * error flow. Since we have a refcount here we know any * parallel flow is stopped in disable_device and will see the - * NULL pointers, causing the responsibility to + * special dealloc_driver pointer, causing the responsibility to * ib_dealloc_device() to revert back to this thread. */ dealloc_fn = device->ops.dealloc_driver; - device->ops.dealloc_driver = NULL; + device->ops.dealloc_driver = prevent_dealloc_device; ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; + dev_set_uevent_suppress(&device->dev, false); return ret; } + dev_set_uevent_suppress(&device->dev, false); + + ib_device_notify_register(device); + ib_device_put(device); return 0; @@ -1428,6 +1520,7 @@ dev_cleanup: cg_cleanup: dev_set_uevent_suppress(&device->dev, false); ib_device_unregister_rdmacg(device); +cache_cleanup: ib_cache_cleanup_one(device); return ret; } @@ -1436,9 +1529,21 @@ EXPORT_SYMBOL(ib_register_device); /* Callers must hold a get on the device. */ static void __ib_unregister_device(struct ib_device *ib_dev) { + struct ib_device *sub, *tmp; + + mutex_lock(&ib_dev->subdev_lock); + list_for_each_entry_safe_reverse(sub, tmp, + &ib_dev->subdev_list_head, + subdev_list) { + list_del(&sub->subdev_list); + ib_dev->ops.del_sub_dev(sub); + ib_device_put(ib_dev); + } + mutex_unlock(&ib_dev->subdev_lock); + /* * We have a registration lock so that all the calls to unregister are - * fully fenced, once any unregister returns the device is truely + * fully fenced, once any unregister returns the device is truly * unregistered even if multiple callers are unregistering it at the * same time. This also interacts with the registration flow and * provides sane semantics if register and unregister are racing. @@ -1448,12 +1553,12 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); - ib_cq_pool_destroy(ib_dev); + rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT); /* Expedite removing unregistered pointers from the hash table */ free_netdevs(ib_dev); - ib_device_unregister_sysfs(ib_dev); + ib_free_port_attrs(&ib_dev->coredev); device_del(&ib_dev->dev); ib_device_unregister_rdmacg(ib_dev); ib_cache_cleanup_one(ib_dev); @@ -1462,7 +1567,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev) * Drivers using the new flow may not call ib_dealloc_device except * in error unwind prior to registration success. */ - if (ib_dev->ops.dealloc_driver) { + if (ib_dev->ops.dealloc_driver && + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); ib_dealloc_device(ib_dev); } @@ -1578,7 +1684,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) WARN_ON(!refcount_read(&ib_dev->refcount)); WARN_ON(!ib_dev->ops.dealloc_driver); get_device(&ib_dev->dev); - if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) + if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work)) put_device(&ib_dev->dev); } EXPORT_SYMBOL(ib_unregister_device_queued); @@ -1670,13 +1776,11 @@ int ib_device_set_netns_put(struct sk_buff *skb, } /* - * Currently supported only for those providers which support - * disassociation and don't do port specific sysfs init. Once a - * port_cleanup infrastructure is implemented, this limitation will be - * removed. + * All the ib_clients, including uverbs, are reset when the namespace is + * changed and this cannot be blocked waiting for userspace to do + * something, so disassociation is mandatory. */ - if (!dev->ops.disassociate_ucontext || dev->ops.init_port || - ib_devices_shared_netns) { + if (!dev->ops.disassociate_ucontext || ib_devices_shared_netns) { ret = -EOPNOTSUPP; goto ns_err; } @@ -1707,7 +1811,7 @@ static int assign_client_id(struct ib_client *client) { int ret; - down_write(&clients_rwsem); + lockdep_assert_held(&clients_rwsem); /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in @@ -1716,14 +1820,11 @@ static int assign_client_id(struct ib_client *client) client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) - goto out; + return ret; highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - -out: - up_write(&clients_rwsem); - return ret; + return 0; } static void remove_client_id(struct ib_client *client) @@ -1753,25 +1854,35 @@ int ib_register_client(struct ib_client *client) { struct ib_device *device; unsigned long index; + bool need_unreg = false; int ret; refcount_set(&client->uses, 1); init_completion(&client->uses_zero); + + /* + * The devices_rwsem is held in write mode to ensure that a racing + * ib_register_device() sees a consisent view of clients and devices. + */ + down_write(&devices_rwsem); + down_write(&clients_rwsem); ret = assign_client_id(client); if (ret) - return ret; + goto out; - down_read(&devices_rwsem); + need_unreg = true; xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { ret = add_client_context(device, client); - if (ret) { - up_read(&devices_rwsem); - ib_unregister_client(client); - return ret; - } + if (ret) + goto out; } - up_read(&devices_rwsem); - return 0; + ret = 0; +out: + up_write(&clients_rwsem); + up_write(&devices_rwsem); + if (need_unreg && ret) + ib_unregister_client(client); + return ret; } EXPORT_SYMBOL(ib_register_client); @@ -1884,9 +1995,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev, /** * ib_get_client_nl_info - Fetch the nl_info from a client - * @device - IB device - * @client_name - Name of the client - * @res - Result of the query + * @ibdev: IB device + * @client_name: Name of the client + * @res: Result of the query */ int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, struct ib_client_nl_info *res) @@ -1988,7 +2099,7 @@ void ib_dispatch_event_clients(struct ib_event *event) } static int iw_query_port(struct ib_device *device, - u8 port_num, + u32 port_num, struct ib_port_attr *port_attr) { struct in_device *inetdev; @@ -2027,10 +2138,9 @@ static int iw_query_port(struct ib_device *device, } static int __ib_query_port(struct ib_device *device, - u8 port_num, + u32 port_num, struct ib_port_attr *port_attr) { - union ib_gid gid = {}; int err; memset(port_attr, 0, sizeof(*port_attr)); @@ -2043,11 +2153,8 @@ static int __ib_query_port(struct ib_device *device, IB_LINK_LAYER_INFINIBAND) return 0; - err = device->ops.query_gid(device, port_num, 0, &gid); - if (err) - return err; - - port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); + ib_get_cached_subnet_prefix(device, port_num, + &port_attr->subnet_prefix); return 0; } @@ -2061,7 +2168,7 @@ static int __ib_query_port(struct ib_device *device, * @port_attr pointer. */ int ib_query_port(struct ib_device *device, - u8 port_num, + u32 port_num, struct ib_port_attr *port_attr) { if (!rdma_is_port_valid(device, port_num)) @@ -2113,13 +2220,17 @@ static void add_ndev_hash(struct ib_port_data *pdata) * NETDEV_UNREGISTER event. */ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, - unsigned int port) + u32 port) { + enum rdma_nl_notify_event_type etype; struct net_device *old_ndev; struct ib_port_data *pdata; unsigned long flags; int ret; + if (!rdma_is_port_valid(ib_dev, port)) + return -EINVAL; + /* * Drivers wish to call this before ib_register_driver, so we have to * setup the port data early. @@ -2128,9 +2239,6 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, if (ret) return ret; - if (!rdma_is_port_valid(ib_dev, port)) - return -EINVAL; - pdata = &ib_dev->port_data[port]; spin_lock_irqsave(&pdata->netdev_lock, flags); old_ndev = rcu_dereference_protected( @@ -2140,14 +2248,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, return 0; } - if (ndev) - dev_hold(ndev); rcu_assign_pointer(pdata->netdev, ndev); + netdev_put(old_ndev, &pdata->netdev_tracker); + netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); spin_unlock_irqrestore(&pdata->netdev_lock, flags); add_ndev_hash(pdata); - if (old_ndev) - dev_put(old_ndev); + + /* Make sure that the device is registered before we send events */ + if (xa_load(&devices, ib_dev->index) != ib_dev) + return 0; + + etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT; + rdma_nl_notify_event(ib_dev, port, etype); return 0; } @@ -2156,7 +2269,7 @@ EXPORT_SYMBOL(ib_device_set_netdev); static void free_netdevs(struct ib_device *ib_dev) { unsigned long flags; - unsigned int port; + u32 port; if (!ib_dev->port_data) return; @@ -2180,14 +2293,14 @@ static void free_netdevs(struct ib_device *ib_dev) * comparisons after the put */ rcu_assign_pointer(pdata->netdev, NULL); - dev_put(ndev); + netdev_put(ndev, &pdata->netdev_tracker); } spin_unlock_irqrestore(&pdata->netdev_lock, flags); } } struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, - unsigned int port) + u32 port) { struct ib_port_data *pdata; struct net_device *res; @@ -2195,6 +2308,9 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, if (!rdma_is_port_valid(ib_dev, port)) return NULL; + if (!ib_dev->port_data) + return NULL; + pdata = &ib_dev->port_data[port]; /* @@ -2207,22 +2323,40 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, spin_lock(&pdata->netdev_lock); res = rcu_dereference_protected( pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); - if (res) - dev_hold(res); + dev_hold(res); spin_unlock(&pdata->netdev_lock); } - /* - * If we are starting to unregister expedite things by preventing - * propagation of an unregistering netdev. - */ - if (res && res->reg_state != NETREG_REGISTERED) { - dev_put(res); - return NULL; + return res; +} +EXPORT_SYMBOL(ib_device_get_netdev); + +/** + * ib_query_netdev_port - Query the port number of a net_device + * associated with an ibdev + * @ibdev: IB device + * @ndev: Network device + * @port: IB port the net_device is connected to + */ +int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev, + u32 *port) +{ + struct net_device *ib_ndev; + u32 port_num; + + rdma_for_each_port(ibdev, port_num) { + ib_ndev = ib_device_get_netdev(ibdev, port_num); + if (ndev == ib_ndev) { + *port = port_num; + dev_put(ib_ndev); + return 0; + } + dev_put(ib_ndev); } - return res; + return -ENOENT; } +EXPORT_SYMBOL(ib_query_netdev_port); /** * ib_device_get_by_netdev - Find an IB device associated with a netdev @@ -2274,7 +2408,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_callback cb, void *cookie) { - unsigned int port; + u32 port; rdma_for_each_port (ib_dev, port) if (rdma_protocol_roce(ib_dev, port)) { @@ -2283,9 +2417,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, if (filter(ib_dev, port, idev, filter_cookie)) cb(ib_dev, port, idev, cookie); - - if (idev) - dev_put(idev); + dev_put(idev); } } @@ -2314,7 +2446,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, up_read(&devices_rwsem); } -/** +/* * ib_enum_all_devs - enumerate all ib_devices * @cb: Callback to call for each found ib_device * @@ -2352,11 +2484,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, * ib_query_pkey() fetches the specified P_Key table entry. */ int ib_query_pkey(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey) + u32 port_num, u16 index, u16 *pkey) { if (!rdma_is_port_valid(device, port_num)) return -EINVAL; + if (!device->ops.query_pkey) + return -EOPNOTSUPP; + return device->ops.query_pkey(device, port_num, index, pkey); } EXPORT_SYMBOL(ib_query_pkey); @@ -2394,7 +2529,7 @@ EXPORT_SYMBOL(ib_modify_device); * @port_modify_mask and @port_modify structure. */ int ib_modify_port(struct ib_device *device, - u8 port_num, int port_modify_mask, + u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { int rc; @@ -2426,10 +2561,10 @@ EXPORT_SYMBOL(ib_modify_port); * parameter may be NULL. */ int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index) + u32 *port_num, u16 *index) { union ib_gid tmp_gid; - unsigned int port; + u32 port; int ret, i; rdma_for_each_port (device, port) { @@ -2440,7 +2575,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, ++i) { ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) - return ret; + continue; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) @@ -2463,7 +2599,7 @@ EXPORT_SYMBOL(ib_find_gid); * @index: The index into the PKey table where the PKey was found. */ int ib_find_pkey(struct ib_device *device, - u8 port_num, u16 pkey, u16 *index) + u32 port_num, u16 pkey, u16 *index) { int ret, i; u16 tmp_pkey; @@ -2506,7 +2642,7 @@ EXPORT_SYMBOL(ib_find_pkey); * */ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, - u8 port, + u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr) @@ -2569,9 +2705,12 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) ops->uverbs_no_driver_id_binding; SET_DEVICE_OP(dev_ops, add_gid); + SET_DEVICE_OP(dev_ops, add_sub_dev); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); - SET_DEVICE_OP(dev_ops, alloc_hw_stats); + SET_DEVICE_OP(dev_ops, alloc_dmah); + SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); + SET_DEVICE_OP(dev_ops, alloc_hw_port_stats); SET_DEVICE_OP(dev_ops, alloc_mr); SET_DEVICE_OP(dev_ops, alloc_mr_integrity); SET_DEVICE_OP(dev_ops, alloc_mw); @@ -2584,24 +2723,28 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, counter_alloc_stats); SET_DEVICE_OP(dev_ops, counter_bind_qp); SET_DEVICE_OP(dev_ops, counter_dealloc); + SET_DEVICE_OP(dev_ops, counter_init); SET_DEVICE_OP(dev_ops, counter_unbind_qp); SET_DEVICE_OP(dev_ops, counter_update_stats); SET_DEVICE_OP(dev_ops, create_ah); SET_DEVICE_OP(dev_ops, create_counters); SET_DEVICE_OP(dev_ops, create_cq); + SET_DEVICE_OP(dev_ops, create_cq_umem); SET_DEVICE_OP(dev_ops, create_flow); - SET_DEVICE_OP(dev_ops, create_flow_action_esp); SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_user_ah); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); + SET_DEVICE_OP(dev_ops, dealloc_dmah); SET_DEVICE_OP(dev_ops, dealloc_driver); SET_DEVICE_OP(dev_ops, dealloc_mw); SET_DEVICE_OP(dev_ops, dealloc_pd); SET_DEVICE_OP(dev_ops, dealloc_ucontext); SET_DEVICE_OP(dev_ops, dealloc_xrcd); SET_DEVICE_OP(dev_ops, del_gid); + SET_DEVICE_OP(dev_ops, del_sub_dev); SET_DEVICE_OP(dev_ops, dereg_mr); SET_DEVICE_OP(dev_ops, destroy_ah); SET_DEVICE_OP(dev_ops, destroy_counters); @@ -2612,24 +2755,33 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table); SET_DEVICE_OP(dev_ops, destroy_srq); SET_DEVICE_OP(dev_ops, destroy_wq); + SET_DEVICE_OP(dev_ops, device_group); SET_DEVICE_OP(dev_ops, detach_mcast); SET_DEVICE_OP(dev_ops, disassociate_ucontext); SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); - SET_DEVICE_OP(dev_ops, fill_res_entry); - SET_DEVICE_OP(dev_ops, fill_stat_entry); + SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry); + SET_DEVICE_OP(dev_ops, fill_res_cq_entry); + SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw); + SET_DEVICE_OP(dev_ops, fill_res_mr_entry); + SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw); + SET_DEVICE_OP(dev_ops, fill_res_qp_entry); + SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw); + SET_DEVICE_OP(dev_ops, fill_res_srq_entry); + SET_DEVICE_OP(dev_ops, fill_res_srq_entry_raw); + SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); SET_DEVICE_OP(dev_ops, get_link_layer); SET_DEVICE_OP(dev_ops, get_netdev); + SET_DEVICE_OP(dev_ops, get_numa_node); SET_DEVICE_OP(dev_ops, get_port_immutable); SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_guid); SET_DEVICE_OP(dev_ops, get_vf_stats); - SET_DEVICE_OP(dev_ops, init_port); SET_DEVICE_OP(dev_ops, iw_accept); SET_DEVICE_OP(dev_ops, iw_add_ref); SET_DEVICE_OP(dev_ops, iw_connect); @@ -2645,13 +2797,16 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); - SET_DEVICE_OP(dev_ops, modify_flow_action_esp); + SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); SET_DEVICE_OP(dev_ops, modify_srq); SET_DEVICE_OP(dev_ops, modify_wq); SET_DEVICE_OP(dev_ops, peek_cq); + SET_DEVICE_OP(dev_ops, pre_destroy_cq); SET_DEVICE_OP(dev_ops, poll_cq); + SET_DEVICE_OP(dev_ops, port_groups); + SET_DEVICE_OP(dev_ops, post_destroy_cq); SET_DEVICE_OP(dev_ops, post_recv); SET_DEVICE_OP(dev_ops, post_send); SET_DEVICE_OP(dev_ops, post_srq_recv); @@ -2663,25 +2818,99 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, query_port); SET_DEVICE_OP(dev_ops, query_qp); SET_DEVICE_OP(dev_ops, query_srq); + SET_DEVICE_OP(dev_ops, query_ucontext); SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); SET_DEVICE_OP(dev_ops, read_counters); SET_DEVICE_OP(dev_ops, reg_dm_mr); SET_DEVICE_OP(dev_ops, reg_user_mr); - SET_DEVICE_OP(dev_ops, req_ncomp_notif); + SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf); SET_DEVICE_OP(dev_ops, req_notify_cq); SET_DEVICE_OP(dev_ops, rereg_user_mr); SET_DEVICE_OP(dev_ops, resize_cq); SET_DEVICE_OP(dev_ops, set_vf_guid); SET_DEVICE_OP(dev_ops, set_vf_link_state); + SET_DEVICE_OP(dev_ops, ufile_hw_cleanup); + SET_DEVICE_OP(dev_ops, report_port_event); SET_OBJ_SIZE(dev_ops, ib_ah); + SET_OBJ_SIZE(dev_ops, ib_counters); SET_OBJ_SIZE(dev_ops, ib_cq); + SET_OBJ_SIZE(dev_ops, ib_dmah); + SET_OBJ_SIZE(dev_ops, ib_mw); SET_OBJ_SIZE(dev_ops, ib_pd); + SET_OBJ_SIZE(dev_ops, ib_qp); + SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table); SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); + SET_OBJ_SIZE(dev_ops, ib_xrcd); + SET_OBJ_SIZE(dev_ops, rdma_counter); } EXPORT_SYMBOL(ib_set_device_ops); +int ib_add_sub_device(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name) +{ + struct ib_device *sub; + int ret = 0; + + if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev) + return -EOPNOTSUPP; + + if (!ib_device_try_get(parent)) + return -EINVAL; + + sub = parent->ops.add_sub_dev(parent, type, name); + if (IS_ERR(sub)) { + ib_device_put(parent); + return PTR_ERR(sub); + } + + sub->type = type; + sub->parent = parent; + + mutex_lock(&parent->subdev_lock); + list_add_tail(&parent->subdev_list_head, &sub->subdev_list); + mutex_unlock(&parent->subdev_lock); + + return ret; +} +EXPORT_SYMBOL(ib_add_sub_device); + +int ib_del_sub_device_and_put(struct ib_device *sub) +{ + struct ib_device *parent = sub->parent; + + if (!parent) + return -EOPNOTSUPP; + + mutex_lock(&parent->subdev_lock); + list_del(&sub->subdev_list); + mutex_unlock(&parent->subdev_lock); + + ib_device_put(sub); + parent->ops.del_sub_dev(sub); + ib_device_put(parent); + + return 0; +} +EXPORT_SYMBOL(ib_del_sub_device_and_put); + +#ifdef CONFIG_INFINIBAND_VIRT_DMA +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = (uintptr_t)sg_virt(s); + sg_dma_len(s) = s->length; + } + return nents; +} +EXPORT_SYMBOL(ib_dma_virt_map_sg); +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, @@ -2697,29 +2926,121 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { }, }; +void ib_dispatch_port_state_event(struct ib_device *ibdev, struct net_device *ndev) +{ + enum ib_port_state curr_state; + struct ib_event ibevent = {}; + u32 port; + + if (ib_query_netdev_port(ibdev, ndev, &port)) + return; + + curr_state = ib_get_curr_port_state(ndev); + + write_lock_irq(&ibdev->cache_lock); + if (ibdev->port_data[port].cache.last_port_state == curr_state) { + write_unlock_irq(&ibdev->cache_lock); + return; + } + ibdev->port_data[port].cache.last_port_state = curr_state; + write_unlock_irq(&ibdev->cache_lock); + + ibevent.event = (curr_state == IB_PORT_DOWN) ? + IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE; + ibevent.device = ibdev; + ibevent.element.port_num = port; + ib_dispatch_event(&ibevent); +} +EXPORT_SYMBOL(ib_dispatch_port_state_event); + +static void handle_port_event(struct net_device *ndev, unsigned long event) +{ + struct ib_device *ibdev; + + /* Currently, link events in bonding scenarios are still + * reported by drivers that support bonding. + */ + if (netif_is_lag_master(ndev) || netif_is_lag_port(ndev)) + return; + + ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN); + if (!ibdev) + return; + + if (ibdev->ops.report_port_event) { + ibdev->ops.report_port_event(ibdev, ndev, event); + goto put_ibdev; + } + + ib_dispatch_port_state_event(ibdev, ndev); + +put_ibdev: + ib_device_put(ibdev); +}; + +static int ib_netdevice_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct ib_device *ibdev; + u32 port; + + switch (event) { + case NETDEV_CHANGENAME: + ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN); + if (!ibdev) + return NOTIFY_DONE; + + if (ib_query_netdev_port(ibdev, ndev, &port)) { + ib_device_put(ibdev); + break; + } + + rdma_nl_notify_event(ibdev, port, RDMA_NETDEV_RENAME_EVENT); + ib_device_put(ibdev); + break; + + case NETDEV_UP: + case NETDEV_CHANGE: + case NETDEV_DOWN: + handle_port_event(ndev, event); + break; + + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block nb_netdevice = { + .notifier_call = ib_netdevice_event, +}; + static int __init ib_core_init(void) { - int ret; + int ret = -ENOMEM; - ib_wq = alloc_workqueue("infiniband", 0, 0); + ib_wq = alloc_workqueue("infiniband", WQ_PERCPU, 0); if (!ib_wq) return -ENOMEM; - ib_comp_wq = alloc_workqueue("ib-comp-wq", - WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); - if (!ib_comp_wq) { - ret = -ENOMEM; + ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!ib_unreg_wq) goto err; - } + + ib_comp_wq = alloc_workqueue("ib-comp-wq", + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_PERCPU, 0); + if (!ib_comp_wq) + goto err_unbound; ib_comp_unbound_wq = alloc_workqueue("ib-comp-unb-wq", WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); - if (!ib_comp_unbound_wq) { - ret = -ENOMEM; + if (!ib_comp_unbound_wq) goto err_comp; - } ret = class_register(&ib_class); if (ret) { @@ -2731,7 +3052,7 @@ static int __init ib_core_init(void) ret = addr_init(); if (ret) { - pr_warn("Could't init IB address resolution\n"); + pr_warn("Couldn't init IB address resolution\n"); goto err_ibnl; } @@ -2761,10 +3082,20 @@ static int __init ib_core_init(void) nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); - roce_gid_mgmt_init(); + ret = roce_gid_mgmt_init(); + if (ret) { + pr_warn("Couldn't init RoCE GID management\n"); + goto err_parent; + } + + register_netdevice_notifier(&nb_netdevice); return 0; +err_parent: + rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); + unregister_pernet_device(&rdma_dev_net_ops); err_compat: unregister_blocking_lsm_notifier(&ibdev_lsm_nb); err_sa: @@ -2779,6 +3110,8 @@ err_comp_unbound: destroy_workqueue(ib_comp_unbound_wq); err_comp: destroy_workqueue(ib_comp_wq); +err_unbound: + destroy_workqueue(ib_unreg_wq); err: destroy_workqueue(ib_wq); return ret; @@ -2786,9 +3119,10 @@ err: static void __exit ib_core_cleanup(void) { + unregister_netdevice_notifier(&nb_netdevice); roce_gid_mgmt_cleanup(); - nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); unregister_pernet_device(&rdma_dev_net_ops); unregister_blocking_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); @@ -2800,7 +3134,7 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); - flush_workqueue(system_unbound_wq); + destroy_workqueue(ib_unreg_wq); WARN_ON(!xa_empty(&clients)); WARN_ON(!xa_empty(&devices)); } |
