diff options
Diffstat (limited to 'drivers/infiniband/core/verbs.c')
| -rw-r--r-- | drivers/infiniband/core/verbs.c | 1415 |
1 files changed, 954 insertions, 461 deletions
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ac011836bb54..11b1a194de44 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -50,8 +50,10 @@ #include <rdma/ib_cache.h> #include <rdma/ib_addr.h> #include <rdma/rw.h> +#include <rdma/lag.h> #include "core_priv.h" +#include <trace/events/rdma_core.h> static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); @@ -94,10 +96,10 @@ static const char * const wc_statuses[] = { [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", [IB_WC_LOC_PROT_ERR] = "local protection error", [IB_WC_WR_FLUSH_ERR] = "WR flushed", - [IB_WC_MW_BIND_ERR] = "memory management operation error", + [IB_WC_MW_BIND_ERR] = "memory bind operation error", [IB_WC_BAD_RESP_ERR] = "bad response error", [IB_WC_LOC_ACCESS_ERR] = "local access error", - [IB_WC_REM_INV_REQ_ERR] = "invalid request error", + [IB_WC_REM_INV_REQ_ERR] = "remote invalid request error", [IB_WC_REM_ACCESS_ERR] = "remote access error", [IB_WC_REM_OP_ERR] = "remote operation error", [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", @@ -145,6 +147,8 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) case IB_RATE_50_GBPS: return 20; case IB_RATE_400_GBPS: return 160; case IB_RATE_600_GBPS: return 240; + case IB_RATE_800_GBPS: return 320; + case IB_RATE_1600_GBPS: return 640; default: return -1; } } @@ -174,6 +178,8 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) case 20: return IB_RATE_50_GBPS; case 160: return IB_RATE_400_GBPS; case 240: return IB_RATE_600_GBPS; + case 320: return IB_RATE_800_GBPS; + case 640: return IB_RATE_1600_GBPS; default: return IB_RATE_PORT_CURRENT; } } @@ -203,13 +209,15 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) case IB_RATE_50_GBPS: return 53125; case IB_RATE_400_GBPS: return 425000; case IB_RATE_600_GBPS: return 637500; + case IB_RATE_800_GBPS: return 850000; + case IB_RATE_1600_GBPS: return 1700000; default: return -1; } } EXPORT_SYMBOL(ib_rate_to_mbps); __attribute_const__ enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type) +rdma_node_get_transport(unsigned int node_type) { if (node_type == RDMA_NODE_USNIC) @@ -218,12 +226,15 @@ rdma_node_get_transport(enum rdma_node_type node_type) return RDMA_TRANSPORT_USNIC_UDP; if (node_type == RDMA_NODE_RNIC) return RDMA_TRANSPORT_IWARP; + if (node_type == RDMA_NODE_UNSPECIFIED) + return RDMA_TRANSPORT_UNSPECIFIED; return RDMA_TRANSPORT_IB; } EXPORT_SYMBOL(rdma_node_get_transport); -enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) +enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, + u32 port_num) { enum rdma_transport_type lt; if (device->ops.get_link_layer) @@ -240,8 +251,10 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ /** - * ib_alloc_pd - Allocates an unused protection domain. + * __ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * @caller: caller's build-time module name * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. @@ -254,18 +267,27 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, { struct ib_pd *pd; int mr_access_flags = 0; + int ret; - pd = device->ops.alloc_pd(device, NULL, NULL); - if (IS_ERR(pd)) - return pd; + pd = rdma_zalloc_drv_obj(device, ib_pd); + if (!pd) + return ERR_PTR(-ENOMEM); pd->device = device; - pd->uobject = NULL; - pd->__internal_mr = NULL; - atomic_set(&pd->usecnt, 0); pd->flags = flags; - if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); + rdma_restrack_set_name(&pd->res, caller); + + ret = device->ops.alloc_pd(pd, NULL); + if (ret) { + rdma_restrack_put(&pd->res); + kfree(pd); + return ERR_PTR(ret); + } + rdma_restrack_add(&pd->res); + + if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else mr_access_flags |= IB_ACCESS_LOCAL_WRITE; @@ -275,10 +297,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; } - pd->res.type = RDMA_RESTRACK_PD; - rdma_restrack_set_task(&pd->res, caller); - rdma_restrack_kadd(&pd->res); - if (mr_access_flags) { struct ib_mr *mr; @@ -290,12 +308,13 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DMA; mr->uobject = NULL; mr->need_inval = false; pd->__internal_mr = mr; - if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) + if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)) pd->local_dma_lkey = pd->__internal_mr->lkey; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) @@ -307,34 +326,33 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, EXPORT_SYMBOL(__ib_alloc_pd); /** - * ib_dealloc_pd - Deallocates a protection domain. + * ib_dealloc_pd_user - Deallocates a protection domain. * @pd: The protection domain to deallocate. + * @udata: Valid user data or NULL for kernel object * * It is an error to call this function while any resources in the pd still * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ -void ib_dealloc_pd(struct ib_pd *pd) +int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) { int ret; if (pd->__internal_mr) { - ret = pd->device->ops.dereg_mr(pd->__internal_mr); + ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL); WARN_ON(ret); pd->__internal_mr = NULL; } - /* uverbs manipulates usecnt with proper locking, while the kabi - requires the caller to guarantee we can't race here. */ - WARN_ON(atomic_read(&pd->usecnt)); + ret = pd->device->ops.dealloc_pd(pd, udata); + if (ret) + return ret; rdma_restrack_del(&pd->res); - /* Making delalloc_pd a void return is a WIP, no driver should return - an error here. */ - ret = pd->device->ops.dealloc_pd(pd); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); + kfree(pd); + return ret; } -EXPORT_SYMBOL(ib_dealloc_pd); +EXPORT_SYMBOL(ib_dealloc_pd_user); /* Address handles */ @@ -354,7 +372,7 @@ void rdma_copy_ah_attr(struct rdma_ah_attr *dest, EXPORT_SYMBOL(rdma_copy_ah_attr); /** - * rdma_replace_ah_attr - Replace valid ah_attr with new new one. + * rdma_replace_ah_attr - Replace valid ah_attr with new one. * @old: Pointer to existing ah_attr which needs to be replaced. * old is assumed to be valid or zero'd * @new: Pointer to the new ah_attr. @@ -488,27 +506,45 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, - struct ib_udata *udata) + struct ib_udata *udata, + struct net_device *xmit_slave) { + struct rdma_ah_init_attr init_attr = {}; + struct ib_device *device = pd->device; struct ib_ah *ah; + int ret; might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!pd->device->ops.create_ah) + if (!udata && !device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); - ah = pd->device->ops.create_ah(pd, ah_attr, flags, udata); + ah = rdma_zalloc_drv_obj_gfp( + device, ib_ah, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); - if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; - ah->uobject = NULL; - ah->type = ah_attr->type; - ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + ah->device = device; + ah->pd = pd; + ah->type = ah_attr->type; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + init_attr.ah_attr = ah_attr; + init_attr.flags = flags; + init_attr.xmit_slave = xmit_slave; - atomic_inc(&pd->usecnt); + if (udata) + ret = device->ops.create_user_ah(ah, &init_attr, udata); + else + ret = device->ops.create_ah(ah, &init_attr, NULL); + if (ret) { + if (ah->sgid_attr) + rdma_put_gid_attr(ah->sgid_attr); + kfree(ah); + return ERR_PTR(ret); } + atomic_inc(&pd->usecnt); return ah; } @@ -527,15 +563,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags) { const struct ib_gid_attr *old_sgid_attr; + struct net_device *slave; struct ib_ah *ah; int ret; ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); if (ret) return ERR_PTR(ret); - - ah = _rdma_create_ah(pd, ah_attr, flags, NULL); - + slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? + GFP_KERNEL : GFP_ATOMIC); + if (IS_ERR(slave)) { + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ERR_CAST(slave); + } + ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave); + rdma_lag_put_ah_roce_slave(slave); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; } @@ -574,7 +617,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, } } - ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); + ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, + udata, NULL); out: rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); @@ -616,7 +660,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) EXPORT_SYMBOL(ib_get_rdma_header_version); static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, - u8 port_num, + u32 port_num, const struct ib_grh *grh) { int grh_version; @@ -645,20 +689,21 @@ static bool find_gid_index(const union ib_gid *gid, void *context) { struct find_gid_index_context *ctx = context; + u16 vlan_id = 0xffff; + int ret; if (ctx->gid_type != gid_attr->gid_type) return false; - if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || - (is_vlan_dev(gid_attr->ndev) && - vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); + if (ret) return false; - return true; + return ctx->vlan_id == vlan_id; } static const struct ib_gid_attr * -get_sgid_attr_from_eth(struct ib_device *device, u8 port_num, +get_sgid_attr_from_eth(struct ib_device *device, u32 port_num, u16 vlan_id, const union ib_gid *sgid, enum ib_gid_type gid_type) { @@ -693,7 +738,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, (struct in6_addr *)dgid); return 0; } else if (net_type == RDMA_NETWORK_IPV6 || - net_type == RDMA_NETWORK_IB) { + net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) { *dgid = hdr->ibgrh.dgid; *sgid = hdr->ibgrh.sgid; return 0; @@ -705,7 +750,7 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); /* Resolve destination mac address and hop limit for unicast destination * GID entry, considering the source GID entry as well. - * ah_attribute must have have valid port_num, sgid_index. + * ah_attribute must have valid port_num, sgid_index. */ static int ib_resolve_unicast_gid_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) @@ -745,7 +790,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device, * On success the caller is responsible to call rdma_destroy_ah_attr on the * attr. */ -int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, +int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) { @@ -876,7 +921,7 @@ void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr) EXPORT_SYMBOL(rdma_destroy_ah_attr); struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, - const struct ib_grh *grh, u8 port_num) + const struct ib_grh *grh, u32 port_num) { struct rdma_ah_attr ah_attr; struct ib_ah *ah; @@ -925,7 +970,7 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) } EXPORT_SYMBOL(rdma_query_ah); -int rdma_destroy_ah(struct ib_ah *ah, u32 flags) +int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; @@ -934,51 +979,87 @@ int rdma_destroy_ah(struct ib_ah *ah, u32 flags) might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); pd = ah->pd; + ret = ah->device->ops.destroy_ah(ah, flags); - if (!ret) { - atomic_dec(&pd->usecnt); - if (sgid_attr) - rdma_put_gid_attr(sgid_attr); - } + if (ret) + return ret; + + atomic_dec(&pd->usecnt); + if (sgid_attr) + rdma_put_gid_attr(sgid_attr); + kfree(ah); return ret; } -EXPORT_SYMBOL(rdma_destroy_ah); +EXPORT_SYMBOL(rdma_destroy_ah_user); /* Shared receive queues */ -struct ib_srq *ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr) +/** + * ib_create_srq_user - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * SRQ. If SRQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created SRQ. + * @uobject: uobject pointer if this is not a kernel SRQ + * @udata: udata pointer if this is not a kernel SRQ + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ib_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ib_create_srq_user(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_usrq_object *uobject, + struct ib_udata *udata) { struct ib_srq *srq; + int ret; - if (!pd->device->ops.create_srq) - return ERR_PTR(-EOPNOTSUPP); + srq = rdma_zalloc_drv_obj(pd->device, ib_srq); + if (!srq) + return ERR_PTR(-ENOMEM); - srq = pd->device->ops.create_srq(pd, srq_init_attr, NULL); - - if (!IS_ERR(srq)) { - srq->device = pd->device; - srq->pd = pd; - srq->uobject = NULL; - srq->event_handler = srq_init_attr->event_handler; - srq->srq_context = srq_init_attr->srq_context; - srq->srq_type = srq_init_attr->srq_type; - if (ib_srq_has_cq(srq->srq_type)) { - srq->ext.cq = srq_init_attr->ext.cq; - atomic_inc(&srq->ext.cq->usecnt); - } - if (srq->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + srq->device = pd->device; + srq->pd = pd; + srq->event_handler = srq_init_attr->event_handler; + srq->srq_context = srq_init_attr->srq_context; + srq->srq_type = srq_init_attr->srq_type; + srq->uobject = uobject; + + if (ib_srq_has_cq(srq->srq_type)) { + srq->ext.cq = srq_init_attr->ext.cq; + atomic_inc(&srq->ext.cq->usecnt); + } + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + if (srq->ext.xrc.xrcd) atomic_inc(&srq->ext.xrc.xrcd->usecnt); - } - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); } + atomic_inc(&pd->usecnt); + + rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ); + rdma_restrack_parent_name(&srq->res, &pd->res); + + ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); + if (ret) { + rdma_restrack_put(&srq->res); + atomic_dec(&pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + kfree(srq); + return ERR_PTR(ret); + } + + rdma_restrack_add(&srq->res); return srq; } -EXPORT_SYMBOL(ib_create_srq); +EXPORT_SYMBOL(ib_create_srq_user); int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, @@ -998,56 +1079,51 @@ int ib_query_srq(struct ib_srq *srq, } EXPORT_SYMBOL(ib_query_srq); -int ib_destroy_srq(struct ib_srq *srq) +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) { - struct ib_pd *pd; - enum ib_srq_type srq_type; - struct ib_xrcd *uninitialized_var(xrcd); - struct ib_cq *uninitialized_var(cq); int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; - pd = srq->pd; - srq_type = srq->srq_type; - if (ib_srq_has_cq(srq_type)) - cq = srq->ext.cq; - if (srq_type == IB_SRQT_XRC) - xrcd = srq->ext.xrc.xrcd; + ret = srq->device->ops.destroy_srq(srq, udata); + if (ret) + return ret; - ret = srq->device->ops.destroy_srq(srq); - if (!ret) { - atomic_dec(&pd->usecnt); - if (srq_type == IB_SRQT_XRC) - atomic_dec(&xrcd->usecnt); - if (ib_srq_has_cq(srq_type)) - atomic_dec(&cq->usecnt); - } + atomic_dec(&srq->pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + rdma_restrack_del(&srq->res); + kfree(srq); return ret; } -EXPORT_SYMBOL(ib_destroy_srq); +EXPORT_SYMBOL(ib_destroy_srq_user); /* Queue pairs */ +static void __ib_qp_event_handler(struct ib_event *event, void *context) +{ + struct ib_qp *qp = event->element.qp; + + if (event->event == IB_EVENT_QP_LAST_WQE_REACHED) + complete(&qp->srq_completion); + if (qp->registered_event_handler) + qp->registered_event_handler(event, qp->qp_context); +} + static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = context; unsigned long flags; - spin_lock_irqsave(&qp->device->event_handler_lock, flags); + spin_lock_irqsave(&qp->device->qp_open_list_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); - spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); -} - -static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) -{ - mutex_lock(&xrcd->tgt_qp_mutex); - list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); - mutex_unlock(&xrcd->tgt_qp_mutex); + spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); } static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, @@ -1077,9 +1153,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, qp->qp_num = real_qp->qp_num; qp->qp_type = real_qp->qp_type; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_add(&qp->open_list, &real_qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); return qp; } @@ -1092,24 +1168,24 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) return ERR_PTR(-EINVAL); - qp = ERR_PTR(-EINVAL); - mutex_lock(&xrcd->tgt_qp_mutex); - list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { - if (real_qp->qp_num == qp_open_attr->qp_num) { - qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, - qp_open_attr->qp_context); - break; - } + down_read(&xrcd->tgt_qps_rwsem); + real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num); + if (!real_qp) { + up_read(&xrcd->tgt_qps_rwsem); + return ERR_PTR(-EINVAL); } - mutex_unlock(&xrcd->tgt_qp_mutex); + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, + qp_open_attr->qp_context); + up_read(&xrcd->tgt_qps_rwsem); return qp; } EXPORT_SYMBOL(ib_open_qp); -static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, - struct ib_qp_init_attr *qp_init_attr) +static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, + struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; + int err; qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; @@ -1122,89 +1198,179 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, qp_init_attr->qp_context); - if (!IS_ERR(qp)) - __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); - else - real_qp->device->ops.destroy_qp(real_qp); + if (IS_ERR(qp)) + return qp; + + err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num, + real_qp, GFP_KERNEL)); + if (err) { + ib_close_qp(qp); + return ERR_PTR(err); + } return qp; } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata, + struct ib_uqp_object *uobj, const char *caller) { - struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; + struct ib_udata dummy = {}; struct ib_qp *qp; int ret; - if (qp_init_attr->rwq_ind_tbl && - (qp_init_attr->recv_cq || - qp_init_attr->srq || qp_init_attr->cap.max_recv_wr || - qp_init_attr->cap.max_recv_sge)) - return ERR_PTR(-EINVAL); + if (!dev->ops.create_qp) + return ERR_PTR(-EOPNOTSUPP); + + qp = rdma_zalloc_drv_obj_numa(dev, ib_qp); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->device = dev; + qp->pd = pd; + qp->uobject = uobj; + qp->real_qp = qp; + + qp->qp_type = attr->qp_type; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->srq = attr->srq; + qp->event_handler = __ib_qp_event_handler; + qp->registered_event_handler = attr->event_handler; + qp->port = attr->port_num; + qp->qp_context = attr->qp_context; + + spin_lock_init(&qp->mr_lock); + INIT_LIST_HEAD(&qp->rdma_mrs); + INIT_LIST_HEAD(&qp->sig_mrs); + init_completion(&qp->srq_completion); + + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; + + rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); + WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); + rdma_restrack_set_name(&qp->res, udata ? NULL : caller); + ret = dev->ops.create_qp(qp, attr, udata); + if (ret) + goto err_create; /* - * If the callers is using the RDMA API calculate the resources - * needed for the RDMA READ/WRITE operations. - * - * Note that these callers need to pass in a port number. + * TODO: The mlx4 internally overwrites send_cq and recv_cq. + * Unfortunately, it is not an easy task to fix that driver. */ - if (qp_init_attr->cap.max_rdma_ctxs) - rdma_rw_init_qp(device, qp_init_attr); + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; - qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); - if (IS_ERR(qp)) + ret = ib_create_qp_security(qp, dev); + if (ret) + goto err_security; + + rdma_restrack_add(&qp->res); + return qp; + +err_security: + qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL); +err_create: + rdma_restrack_put(&qp->res); + kfree(qp); + return ERR_PTR(ret); + +} + +/** + * ib_create_qp_user - Creates a QP associated with the specified protection + * domain. + * @dev: IB device + * @pd: The protection domain associated with the QP. + * @attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + * @udata: User data + * @uobj: uverbs obect + * @caller: caller's build-time module name + */ +struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata, + struct ib_uqp_object *uobj, const char *caller) +{ + struct ib_qp *qp, *xrc_qp; + + if (attr->qp_type == IB_QPT_XRC_TGT) + qp = create_qp(dev, pd, attr, NULL, NULL, caller); + else + qp = create_qp(dev, pd, attr, udata, uobj, NULL); + if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp)) return qp; - ret = ib_create_qp_security(qp, device); - if (ret) { + xrc_qp = create_xrc_qp_user(qp, attr); + if (IS_ERR(xrc_qp)) { ib_destroy_qp(qp); - return ERR_PTR(ret); + return xrc_qp; } - qp->real_qp = qp; - qp->qp_type = qp_init_attr->qp_type; - qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; + xrc_qp->uobject = uobj; + return xrc_qp; +} +EXPORT_SYMBOL(ib_create_qp_user); - atomic_set(&qp->usecnt, 0); - qp->mrs_used = 0; - spin_lock_init(&qp->mr_lock); - INIT_LIST_HEAD(&qp->rdma_mrs); - INIT_LIST_HEAD(&qp->sig_mrs); - qp->port = 0; +void ib_qp_usecnt_inc(struct ib_qp *qp) +{ + if (qp->pd) + atomic_inc(&qp->pd->usecnt); + if (qp->send_cq) + atomic_inc(&qp->send_cq->usecnt); + if (qp->recv_cq) + atomic_inc(&qp->recv_cq->usecnt); + if (qp->srq) + atomic_inc(&qp->srq->usecnt); + if (qp->rwq_ind_tbl) + atomic_inc(&qp->rwq_ind_tbl->usecnt); +} +EXPORT_SYMBOL(ib_qp_usecnt_inc); - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) - return ib_create_xrc_qp(qp, qp_init_attr); +void ib_qp_usecnt_dec(struct ib_qp *qp) +{ + if (qp->rwq_ind_tbl) + atomic_dec(&qp->rwq_ind_tbl->usecnt); + if (qp->srq) + atomic_dec(&qp->srq->usecnt); + if (qp->recv_cq) + atomic_dec(&qp->recv_cq->usecnt); + if (qp->send_cq) + atomic_dec(&qp->send_cq->usecnt); + if (qp->pd) + atomic_dec(&qp->pd->usecnt); +} +EXPORT_SYMBOL(ib_qp_usecnt_dec); - qp->event_handler = qp_init_attr->event_handler; - qp->qp_context = qp_init_attr->qp_context; - if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { - qp->recv_cq = NULL; - qp->srq = NULL; - } else { - qp->recv_cq = qp_init_attr->recv_cq; - if (qp_init_attr->recv_cq) - atomic_inc(&qp_init_attr->recv_cq->usecnt); - qp->srq = qp_init_attr->srq; - if (qp->srq) - atomic_inc(&qp_init_attr->srq->usecnt); - } +struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller) +{ + struct ib_device *device = pd->device; + struct ib_qp *qp; + int ret; - qp->send_cq = qp_init_attr->send_cq; - qp->xrcd = NULL; + /* + * If the callers is using the RDMA API calculate the resources + * needed for the RDMA READ/WRITE operations. + * + * Note that these callers need to pass in a port number. + */ + if (qp_init_attr->cap.max_rdma_ctxs) + rdma_rw_init_qp(device, qp_init_attr); - atomic_inc(&pd->usecnt); - if (qp_init_attr->send_cq) - atomic_inc(&qp_init_attr->send_cq->usecnt); - if (qp_init_attr->rwq_ind_tbl) - atomic_inc(&qp->rwq_ind_tbl->usecnt); + qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller); + if (IS_ERR(qp)) + return qp; + + ib_qp_usecnt_inc(qp); if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); - if (ret) { - pr_err("failed to init MR pool ret= %d\n", ret); - ib_destroy_qp(qp); - return ERR_PTR(ret); - } + if (ret) + goto err; } /* @@ -1215,10 +1381,17 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->max_write_sge = qp_init_attr->cap.max_send_sge; qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, device->attrs.max_sge_rd); + if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) + qp->integrity_en = true; return qp; + +err: + ib_destroy_qp(qp); + return ERR_PTR(ret); + } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_qp_kernel); static const struct { int valid; @@ -1591,22 +1764,48 @@ static bool is_qp_type_connected(const struct ib_qp *qp) qp->qp_type == IB_QPT_XRC_TGT); } -/** +/* * IB core internal function to perform QP attributes modification. */ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { - u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; const struct ib_gid_attr *old_sgid_attr_av; const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + attr->xmit_slave = NULL; if (attr_mask & IB_QP_AV) { ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, &old_sgid_attr_av); if (ret) return ret; + + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + struct net_device *slave; + + /* + * If the user provided the qp_attr then we have to + * resolve it. Kerne users have to provide already + * resolved rdma_ah_attr's. + */ + if (udata) { + ret = ib_resolve_eth_dmac(qp->device, + &attr->ah_attr); + if (ret) + goto out_av; + } + slave = rdma_lag_get_ah_roce_slave(qp->device, + &attr->ah_attr, + GFP_KERNEL); + if (IS_ERR(slave)) { + ret = PTR_ERR(slave); + goto out_av; + } + attr->xmit_slave = slave; + } } if (attr_mask & IB_QP_ALT_PATH) { /* @@ -1628,23 +1827,11 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (!(rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num) && rdma_protocol_ib(qp->device, port))) { - ret = EINVAL; + ret = -EINVAL; goto out; } } - /* - * If the user provided the qp_attr then we have to resolve it. Kernel - * users have to provide already resolved rdma_ah_attr's - */ - if (udata && (attr_mask & IB_QP_AV) && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto out; - } - if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { dev_warn(&qp->device->dev, @@ -1661,6 +1848,14 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } } + /* + * Bind this qp to a counter automatically based on the rdma counter + * rules. This only set in RST2INIT with port specified + */ + if (!qp->counter && (attr_mask & IB_QP_PORT) && + ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT)) + rdma_counter_bind_qp_auto(qp, attr->port_num); + ret = ib_security_modify_qp(qp, attr, attr_mask, udata); if (ret) goto out; @@ -1678,8 +1873,10 @@ out: if (attr_mask & IB_QP_ALT_PATH) rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); out_av: - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { + rdma_lag_put_ah_roce_slave(attr->xmit_slave); rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); + } return ret; } @@ -1701,20 +1898,100 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, } EXPORT_SYMBOL(ib_modify_qp_with_udata); -int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes, + u16 *speed, u8 *width) +{ + if (!lanes) { + if (netdev_speed <= SPEED_1000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_SDR; + } else if (netdev_speed <= SPEED_10000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_20000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_DDR; + } else if (netdev_speed <= SPEED_25000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_40000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_50000) { + *width = IB_WIDTH_2X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_100000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_200000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_HDR; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_NDR; + } + + return; + } + + switch (lanes) { + case 1: + *width = IB_WIDTH_1X; + break; + case 2: + *width = IB_WIDTH_2X; + break; + case 4: + *width = IB_WIDTH_4X; + break; + case 8: + *width = IB_WIDTH_8X; + break; + case 12: + *width = IB_WIDTH_12X; + break; + default: + *width = IB_WIDTH_1X; + } + + switch (netdev_speed / lanes) { + case SPEED_2500: + *speed = IB_SPEED_SDR; + break; + case SPEED_5000: + *speed = IB_SPEED_DDR; + break; + case SPEED_10000: + *speed = IB_SPEED_FDR10; + break; + case SPEED_14000: + *speed = IB_SPEED_FDR; + break; + case SPEED_25000: + *speed = IB_SPEED_EDR; + break; + case SPEED_50000: + *speed = IB_SPEED_HDR; + break; + case SPEED_100000: + *speed = IB_SPEED_NDR; + break; + default: + *speed = IB_SPEED_SDR; + } +} + +int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) { int rc; u32 netdev_speed; struct net_device *netdev; - struct ethtool_link_ksettings lksettings; + struct ethtool_link_ksettings lksettings = {}; if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; - if (!dev->ops.get_netdev) - return -EOPNOTSUPP; - - netdev = dev->ops.get_netdev(dev, port_num); + netdev = ib_device_get_netdev(dev, port_num); if (!netdev) return -ENODEV; @@ -1724,33 +2001,17 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) dev_put(netdev); - if (!rc) { + if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) { netdev_speed = lksettings.base.speed; } else { netdev_speed = SPEED_1000; - pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name, - netdev_speed); + if (rc) + pr_warn("%s speed is unknown, defaulting to %u\n", + netdev->name, netdev_speed); } - if (netdev_speed <= SPEED_1000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_SDR; - } else if (netdev_speed <= SPEED_10000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_FDR10; - } else if (netdev_speed <= SPEED_20000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_DDR; - } else if (netdev_speed <= SPEED_25000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_EDR; - } else if (netdev_speed <= SPEED_40000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_FDR10; - } else { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_EDR; - } + ib_get_width_and_speed(netdev_speed, lksettings.lanes, + speed, width); return 0; } @@ -1787,9 +2048,9 @@ int ib_close_qp(struct ib_qp *qp) if (real_qp == qp) return -EINVAL; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_del(&qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); atomic_dec(&real_qp->usecnt); if (qp->qp_sec) @@ -1808,34 +2069,27 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) real_qp = qp->real_qp; xrcd = real_qp->xrcd; - - mutex_lock(&xrcd->tgt_qp_mutex); + down_write(&xrcd->tgt_qps_rwsem); ib_close_qp(qp); if (atomic_read(&real_qp->usecnt) == 0) - list_del(&real_qp->xrcd_list); + xa_erase(&xrcd->tgt_qps, real_qp->qp_num); else real_qp = NULL; - mutex_unlock(&xrcd->tgt_qp_mutex); + up_write(&xrcd->tgt_qps_rwsem); if (real_qp) { ret = ib_destroy_qp(real_qp); if (!ret) atomic_dec(&xrcd->usecnt); - else - __ib_insert_xrcd_qp(xrcd, real_qp); } return 0; } -int ib_destroy_qp(struct ib_qp *qp) +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) { const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; - struct ib_pd *pd; - struct ib_cq *scq, *rcq; - struct ib_srq *srq; - struct ib_rwq_ind_table *ind_tbl; struct ib_qp_security *sec; int ret; @@ -1847,11 +2101,6 @@ int ib_destroy_qp(struct ib_qp *qp) if (qp->real_qp != qp) return __ib_destroy_shared_qp(qp); - pd = qp->pd; - scq = qp->send_cq; - rcq = qp->recv_cq; - srq = qp->srq; - ind_tbl = qp->rwq_ind_tbl; sec = qp->qp_sec; if (sec) ib_destroy_qp_security_begin(sec); @@ -1859,33 +2108,28 @@ int ib_destroy_qp(struct ib_qp *qp) if (!qp->uobject) rdma_rw_cleanup_mrs(qp); - rdma_restrack_del(&qp->res); - ret = qp->device->ops.destroy_qp(qp); - if (!ret) { - if (alt_path_sgid_attr) - rdma_put_gid_attr(alt_path_sgid_attr); - if (av_sgid_attr) - rdma_put_gid_attr(av_sgid_attr); - if (pd) - atomic_dec(&pd->usecnt); - if (scq) - atomic_dec(&scq->usecnt); - if (rcq) - atomic_dec(&rcq->usecnt); - if (srq) - atomic_dec(&srq->usecnt); - if (ind_tbl) - atomic_dec(&ind_tbl->usecnt); - if (sec) - ib_destroy_qp_security_end(sec); - } else { + rdma_counter_unbind_qp(qp, qp->port, true); + ret = qp->device->ops.destroy_qp(qp, udata); + if (ret) { if (sec) ib_destroy_qp_security_abort(sec); + return ret; } + if (alt_path_sgid_attr) + rdma_put_gid_attr(alt_path_sgid_attr); + if (av_sgid_attr) + rdma_put_gid_attr(av_sgid_attr); + + ib_qp_usecnt_dec(qp); + if (sec) + ib_destroy_qp_security_end(sec); + + rdma_restrack_del(&qp->res); + kfree(qp); return ret; } -EXPORT_SYMBOL(ib_destroy_qp); +EXPORT_SYMBOL(ib_destroy_qp_user); /* Completion queues */ @@ -1897,45 +2141,70 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, const char *caller) { struct ib_cq *cq; + int ret; + + cq = rdma_zalloc_drv_obj(device, ib_cq); + if (!cq) + return ERR_PTR(-ENOMEM); + + cq->device = device; + cq->uobject = NULL; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); - cq = device->ops.create_cq(device, cq_attr, NULL, NULL); - - if (!IS_ERR(cq)) { - cq->device = device; - cq->uobject = NULL; - cq->comp_handler = comp_handler; - cq->event_handler = event_handler; - cq->cq_context = cq_context; - atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_set_task(&cq->res, caller); - rdma_restrack_kadd(&cq->res); + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); + rdma_restrack_set_name(&cq->res, caller); + + ret = device->ops.create_cq(cq, cq_attr, NULL); + if (ret) { + rdma_restrack_put(&cq->res); + kfree(cq); + return ERR_PTR(ret); } + rdma_restrack_add(&cq->res); return cq; } EXPORT_SYMBOL(__ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { + if (cq->shared) + return -EOPNOTSUPP; + return cq->device->ops.modify_cq ? cq->device->ops.modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_set_cq_moderation); -int ib_destroy_cq(struct ib_cq *cq) +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { + int ret; + + if (WARN_ON_ONCE(cq->shared)) + return -EOPNOTSUPP; + if (atomic_read(&cq->usecnt)) return -EBUSY; + ret = cq->device->ops.destroy_cq(cq, udata); + if (ret) + return ret; + rdma_restrack_del(&cq->res); - return cq->device->ops.destroy_cq(cq); + kfree(cq); + return ret; } -EXPORT_SYMBOL(ib_destroy_cq); +EXPORT_SYMBOL(ib_destroy_cq_user); int ib_resize_cq(struct ib_cq *cq, int cqe) { + if (cq->shared) + return -EOPNOTSUPP; + return cq->device->ops.resize_cq ? cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; } @@ -1943,23 +2212,78 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ -int ib_dereg_mr(struct ib_mr *mr) +struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags) +{ + struct ib_mr *mr; + + if (access_flags & IB_ACCESS_ON_DEMAND) { + if (!(pd->device->attrs.kernel_cap_flags & + IBK_ON_DEMAND_PAGING)) { + pr_debug("ODP support not available\n"); + return ERR_PTR(-EINVAL); + } + } + + mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr, + access_flags, NULL, NULL); + + if (IS_ERR(mr)) + return mr; + + mr->device = pd->device; + mr->type = IB_MR_TYPE_USER; + mr->pd = pd; + mr->dm = NULL; + atomic_inc(&pd->usecnt); + mr->iova = virt_addr; + mr->length = length; + + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_parent_name(&mr->res, &pd->res); + rdma_restrack_add(&mr->res); + + return mr; +} +EXPORT_SYMBOL(ib_reg_user_mr); + +int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge) +{ + if (!pd->device->ops.advise_mr) + return -EOPNOTSUPP; + + if (!num_sge) + return 0; + + return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, + NULL); +} +EXPORT_SYMBOL(ib_advise_mr); + +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; struct ib_dm *dm = mr->dm; + struct ib_dmah *dmah = mr->dmah; + struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; + trace_mr_dereg(mr); rdma_restrack_del(&mr->res); - ret = mr->device->ops.dereg_mr(mr); + ret = mr->device->ops.dereg_mr(mr, udata); if (!ret) { atomic_dec(&pd->usecnt); if (dm) atomic_dec(&dm->usecnt); + if (dmah) + atomic_dec(&dmah->usecnt); + kfree(sig_attrs); } return ret; } -EXPORT_SYMBOL(ib_dereg_mr); +EXPORT_SYMBOL(ib_dereg_mr_user); /** * ib_alloc_mr() - Allocates a memory region @@ -1973,78 +2297,104 @@ EXPORT_SYMBOL(ib_dereg_mr); * max_num_sg * used_page_size. * */ -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, +struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { struct ib_mr *mr; - if (!pd->device->ops.alloc_mr) - return ERR_PTR(-EOPNOTSUPP); + if (!pd->device->ops.alloc_mr) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } - mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->dm = NULL; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - mr->need_inval = false; - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); + if (mr_type == IB_MR_TYPE_INTEGRITY) { + WARN_ON_ONCE(1); + mr = ERR_PTR(-EINVAL); + goto out; } + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); + if (IS_ERR(mr)) + goto out; + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + mr->need_inval = false; + mr->type = mr_type; + mr->sig_attrs = NULL; + + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_parent_name(&mr->res, &pd->res); + rdma_restrack_add(&mr->res); +out: + trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr); -/* "Fast" memory regions */ - -struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr) +/** + * ib_alloc_mr_integrity() - Allocates an integrity memory region + * @pd: protection domain associated with the region + * @max_num_data_sg: maximum data sg entries available for registration + * @max_num_meta_sg: maximum metadata sg entries available for + * registration + * + * Notes: + * Memory registration page/sg lists must not exceed max_num_sg, + * also the integrity page/sg lists must not exceed max_num_meta_sg. + * + */ +struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg) { - struct ib_fmr *fmr; - - if (!pd->device->ops.alloc_fmr) - return ERR_PTR(-EOPNOTSUPP); + struct ib_mr *mr; + struct ib_sig_attrs *sig_attrs; - fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr); - if (!IS_ERR(fmr)) { - fmr->device = pd->device; - fmr->pd = pd; - atomic_inc(&pd->usecnt); + if (!pd->device->ops.alloc_mr_integrity || + !pd->device->ops.map_mr_sg_pi) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; } - return fmr; -} -EXPORT_SYMBOL(ib_alloc_fmr); - -int ib_unmap_fmr(struct list_head *fmr_list) -{ - struct ib_fmr *fmr; - - if (list_empty(fmr_list)) - return 0; + if (!max_num_meta_sg) { + mr = ERR_PTR(-EINVAL); + goto out; + } - fmr = list_entry(fmr_list->next, struct ib_fmr, list); - return fmr->device->ops.unmap_fmr(fmr_list); -} -EXPORT_SYMBOL(ib_unmap_fmr); + sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); + if (!sig_attrs) { + mr = ERR_PTR(-ENOMEM); + goto out; + } -int ib_dealloc_fmr(struct ib_fmr *fmr) -{ - struct ib_pd *pd; - int ret; + mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, + max_num_meta_sg); + if (IS_ERR(mr)) { + kfree(sig_attrs); + goto out; + } - pd = fmr->pd; - ret = fmr->device->ops.dealloc_fmr(fmr); - if (!ret) - atomic_dec(&pd->usecnt); + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + mr->need_inval = false; + mr->type = IB_MR_TYPE_INTEGRITY; + mr->sig_attrs = sig_attrs; - return ret; + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_parent_name(&mr->res, &pd->res); + rdma_restrack_add(&mr->res); +out: + trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); + return mr; } -EXPORT_SYMBOL(ib_dealloc_fmr); +EXPORT_SYMBOL(ib_alloc_mr_integrity); /* Multicast groups */ @@ -2053,7 +2403,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) struct ib_qp_init_attr init_attr = {}; struct ib_qp_attr attr = {}; int num_eth_ports = 0; - int port; + unsigned int port; /* If QP state >= init, it is assigned to a port and we can check this * port only. @@ -2068,7 +2418,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) } /* Can't get a quick answer, iterate over all ports */ - for (port = 0; port < qp->device->phys_port_cnt; port++) + rdma_for_each_port(qp->device, port) if (rdma_port_get_link_layer(qp->device, port) != IB_LINK_LAYER_INFINIBAND) num_eth_ports++; @@ -2122,44 +2472,61 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) } EXPORT_SYMBOL(ib_detach_mcast); -struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) +/** + * ib_alloc_xrcd_user - Allocates an XRC domain. + * @device: The device on which to allocate the XRC domain. + * @inode: inode to connect XRCD + * @udata: Valid user data or NULL for kernel object + */ +struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, + struct inode *inode, struct ib_udata *udata) { struct ib_xrcd *xrcd; + int ret; if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->ops.alloc_xrcd(device, NULL, NULL); - if (!IS_ERR(xrcd)) { - xrcd->device = device; - xrcd->inode = NULL; - atomic_set(&xrcd->usecnt, 0); - mutex_init(&xrcd->tgt_qp_mutex); - INIT_LIST_HEAD(&xrcd->tgt_qp_list); - } + xrcd = rdma_zalloc_drv_obj(device, ib_xrcd); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + xrcd->device = device; + xrcd->inode = inode; + atomic_set(&xrcd->usecnt, 0); + init_rwsem(&xrcd->tgt_qps_rwsem); + xa_init(&xrcd->tgt_qps); + ret = device->ops.alloc_xrcd(xrcd, udata); + if (ret) + goto err; return xrcd; +err: + kfree(xrcd); + return ERR_PTR(ret); } -EXPORT_SYMBOL(__ib_alloc_xrcd); +EXPORT_SYMBOL(ib_alloc_xrcd_user); -int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +/** + * ib_dealloc_xrcd_user - Deallocates an XRC domain. + * @xrcd: The XRC domain to deallocate. + * @udata: Valid user data or NULL for kernel object + */ +int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) { - struct ib_qp *qp; int ret; if (atomic_read(&xrcd->usecnt)) return -EBUSY; - while (!list_empty(&xrcd->tgt_qp_list)) { - qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); - ret = ib_destroy_qp(qp); - if (ret) - return ret; - } - - return xrcd->device->ops.dealloc_xrcd(xrcd); + WARN_ON(!xa_empty(&xrcd->tgt_qps)); + ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata); + if (ret) + return ret; + kfree(xrcd); + return ret; } -EXPORT_SYMBOL(ib_dealloc_xrcd); +EXPORT_SYMBOL(ib_dealloc_xrcd_user); /** * ib_create_wq - Creates a WQ associated with the specified protection @@ -2201,109 +2568,28 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, EXPORT_SYMBOL(ib_create_wq); /** - * ib_destroy_wq - Destroys the specified WQ. + * ib_destroy_wq_user - Destroys the specified user WQ. * @wq: The WQ to destroy. + * @udata: Valid user data */ -int ib_destroy_wq(struct ib_wq *wq) +int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata) { - int err; struct ib_cq *cq = wq->cq; struct ib_pd *pd = wq->pd; + int ret; if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->ops.destroy_wq(wq); - if (!err) { - atomic_dec(&pd->usecnt); - atomic_dec(&cq->usecnt); - } - return err; -} -EXPORT_SYMBOL(ib_destroy_wq); - -/** - * ib_modify_wq - Modifies the specified WQ. - * @wq: The WQ to modify. - * @wq_attr: On input, specifies the WQ attributes to modify. - * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ - * are being modified. - * On output, the current values of selected WQ attributes are returned. - */ -int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, - u32 wq_attr_mask) -{ - int err; - - if (!wq->device->ops.modify_wq) - return -EOPNOTSUPP; - - err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL); - return err; -} -EXPORT_SYMBOL(ib_modify_wq); - -/* - * ib_create_rwq_ind_table - Creates a RQ Indirection Table. - * @device: The device on which to create the rwq indirection table. - * @ib_rwq_ind_table_init_attr: A list of initial attributes required to - * create the Indirection Table. - * - * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less - * than the created ib_rwq_ind_table object and the caller is responsible - * for its memory allocation/free. - */ -struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr) -{ - struct ib_rwq_ind_table *rwq_ind_table; - int i; - u32 table_size; - - if (!device->ops.create_rwq_ind_table) - return ERR_PTR(-EOPNOTSUPP); - - table_size = (1 << init_attr->log_ind_tbl_size); - rwq_ind_table = device->ops.create_rwq_ind_table(device, - init_attr, NULL); - if (IS_ERR(rwq_ind_table)) - return rwq_ind_table; - - rwq_ind_table->ind_tbl = init_attr->ind_tbl; - rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; - rwq_ind_table->device = device; - rwq_ind_table->uobject = NULL; - atomic_set(&rwq_ind_table->usecnt, 0); - - for (i = 0; i < table_size; i++) - atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); - - return rwq_ind_table; -} -EXPORT_SYMBOL(ib_create_rwq_ind_table); - -/* - * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. - * @wq_ind_table: The Indirection Table to destroy. -*/ -int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) -{ - int err, i; - u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); - struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; - - if (atomic_read(&rwq_ind_table->usecnt)) - return -EBUSY; - - err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table); - if (!err) { - for (i = 0; i < table_size; i++) - atomic_dec(&ind_tbl[i]->usecnt); - } + ret = wq->device->ops.destroy_wq(wq, udata); + if (ret) + return ret; - return err; + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + return ret; } -EXPORT_SYMBOL(ib_destroy_rwq_ind_table); +EXPORT_SYMBOL(ib_destroy_wq_user); int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) @@ -2315,7 +2601,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, } EXPORT_SYMBOL(ib_check_mr_status); -int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, +int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port, int state) { if (!device->ops.set_vf_link_state) @@ -2325,7 +2611,7 @@ int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, } EXPORT_SYMBOL(ib_set_vf_link_state); -int ib_get_vf_config(struct ib_device *device, int vf, u8 port, +int ib_get_vf_config(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *info) { if (!device->ops.get_vf_config) @@ -2335,7 +2621,7 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port, } EXPORT_SYMBOL(ib_get_vf_config); -int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, +int ib_get_vf_stats(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats) { if (!device->ops.get_vf_stats) @@ -2345,7 +2631,7 @@ int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, } EXPORT_SYMBOL(ib_get_vf_stats); -int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, +int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid, int type) { if (!device->ops.set_vf_guid) @@ -2355,6 +2641,53 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, } EXPORT_SYMBOL(ib_set_vf_guid); +int ib_get_vf_guid(struct ib_device *device, int vf, u32 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + if (!device->ops.get_vf_guid) + return -EOPNOTSUPP; + + return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); +} +EXPORT_SYMBOL(ib_get_vf_guid); +/** + * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection + * information) and set an appropriate memory region for registration. + * @mr: memory region + * @data_sg: dma mapped scatterlist for data + * @data_sg_nents: number of entries in data_sg + * @data_sg_offset: offset in bytes into data_sg + * @meta_sg: dma mapped scatterlist for metadata + * @meta_sg_nents: number of entries in meta_sg + * @meta_sg_offset: offset in bytes into meta_sg + * @page_size: page vector desired page size + * + * Constraints: + * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY. + * + * Return: 0 on success. + * + * After this completes successfully, the memory region + * is ready for registration. + */ +int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset, unsigned int page_size) +{ + if (unlikely(!mr->device->ops.map_mr_sg_pi || + WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY))) + return -EOPNOTSUPP; + + mr->page_size = page_size; + + return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, + meta_sg_nents, meta_sg_offset); +} +EXPORT_SYMBOL(ib_map_mr_sg_pi); + /** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. @@ -2365,6 +2698,7 @@ EXPORT_SYMBOL(ib_set_vf_guid); * @page_size: page vector desired page size * * Constraints: + * * - The first sg element is allowed to have an offset. * - Each sg element must either be aligned to page_size or virtually * contiguous to the previous element. In case an sg element has a @@ -2398,10 +2732,12 @@ EXPORT_SYMBOL(ib_map_mr_sg); * @mr: memory region * @sgl: dma mapped scatterlist * @sg_nents: number of entries in sg - * @sg_offset_p: IN: start offset in bytes into sg - * OUT: offset in bytes for element n of the sg of the first + * @sg_offset_p: ==== ======================================================= + * IN start offset in bytes into sg + * OUT offset in bytes for element n of the sg of the first * byte that has not been processed where n is the return * value of this function. + * ==== ======================================================= * @set_page: driver page assignment function pointer * * Core service helper for drivers to convert the largest @@ -2566,6 +2902,72 @@ static void __ib_drain_rq(struct ib_qp *qp) wait_for_completion(&rdrain.done); } +/* + * __ib_drain_srq() - Block until Last WQE Reached event arrives, or timeout + * expires. + * @qp: queue pair associated with SRQ to drain + * + * Quoting 10.3.1 Queue Pair and EE Context States: + * + * Note, for QPs that are associated with an SRQ, the Consumer should take the + * QP through the Error State before invoking a Destroy QP or a Modify QP to the + * Reset State. The Consumer may invoke the Destroy QP without first performing + * a Modify QP to the Error State and waiting for the Affiliated Asynchronous + * Last WQE Reached Event. However, if the Consumer does not wait for the + * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment + * leakage may occur. Therefore, it is good programming practice to tear down a + * QP that is associated with an SRQ by using the following process: + * + * - Put the QP in the Error State + * - Wait for the Affiliated Asynchronous Last WQE Reached Event; + * - either: + * drain the CQ by invoking the Poll CQ verb and either wait for CQ + * to be empty or the number of Poll CQ operations has exceeded + * CQ capacity size; + * - or + * post another WR that completes on the same CQ and wait for this + * WR to return as a WC; + * - and then invoke a Destroy QP or Reset QP. + * + * We use the first option. + */ +static void __ib_drain_srq(struct ib_qp *qp) +{ + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct ib_cq *cq; + int n, polled = 0; + int ret; + + if (!qp->srq) { + WARN_ONCE(1, "QP 0x%p is not associated with SRQ\n", qp); + return; + } + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + WARN_ONCE(ret, "failed to drain shared recv queue: %d\n", ret); + return; + } + + if (ib_srq_has_cq(qp->srq->srq_type)) { + cq = qp->srq->ext.cq; + } else if (qp->recv_cq) { + cq = qp->recv_cq; + } else { + WARN_ONCE(1, "QP 0x%p has no CQ associated with SRQ\n", qp); + return; + } + + if (wait_for_completion_timeout(&qp->srq_completion, 60 * HZ) > 0) { + while (polled != cq->cqe) { + n = ib_process_cq_direct(cq, cq->cqe - polled); + if (!n) + return; + polled += n; + } + } +} + /** * ib_drain_sq() - Block until all SQ CQEs have been consumed by the * application. @@ -2591,6 +2993,7 @@ void ib_drain_sq(struct ib_qp *qp) qp->device->ops.drain_sq(qp); else __ib_drain_sq(qp); + trace_cq_drain_complete(qp->send_cq); } EXPORT_SYMBOL(ib_drain_sq); @@ -2619,6 +3022,7 @@ void ib_drain_rq(struct ib_qp *qp) qp->device->ops.drain_rq(qp); else __ib_drain_rq(qp); + trace_cq_drain_complete(qp->recv_cq); } EXPORT_SYMBOL(ib_drain_rq); @@ -2642,10 +3046,12 @@ void ib_drain_qp(struct ib_qp *qp) ib_drain_sq(qp); if (!qp->srq) ib_drain_rq(qp); + else + __ib_drain_srq(qp); } EXPORT_SYMBOL(ib_drain_qp); -struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, +struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)) @@ -2671,7 +3077,7 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(rdma_alloc_netdev); -int rdma_init_netdev(struct ib_device *device, u8 port_num, +int rdma_init_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), @@ -2692,3 +3098,90 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num, netdev, params.param); } EXPORT_SYMBOL(rdma_init_netdev); + +void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgsz) +{ + memset(biter, 0, sizeof(struct ib_block_iter)); + biter->__sg = sglist; + biter->__sg_nents = nents; + + /* Driver provides best block size to use */ + biter->__pg_bit = __fls(pgsz); +} +EXPORT_SYMBOL(__rdma_block_iter_start); + +bool __rdma_block_iter_next(struct ib_block_iter *biter) +{ + unsigned int block_offset; + unsigned int delta; + + if (!biter->__sg_nents || !biter->__sg) + return false; + + biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; + block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); + delta = BIT_ULL(biter->__pg_bit) - block_offset; + + while (biter->__sg_nents && biter->__sg && + sg_dma_len(biter->__sg) - biter->__sg_advance <= delta) { + delta -= sg_dma_len(biter->__sg) - biter->__sg_advance; + biter->__sg_advance = 0; + biter->__sg = sg_next(biter->__sg); + biter->__sg_nents--; + } + biter->__sg_advance += delta; + + return true; +} +EXPORT_SYMBOL(__rdma_block_iter_next); + +/** + * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct + * for the drivers. + * @descs: array of static descriptors + * @num_counters: number of elements in array + * @lifespan: milliseconds between updates + */ +struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const struct rdma_stat_desc *descs, int num_counters, + unsigned long lifespan) +{ + struct rdma_hw_stats *stats; + + stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL); + if (!stats) + return NULL; + + stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!stats->is_disabled) + goto err; + + stats->descs = descs; + stats->num_counters = num_counters; + stats->lifespan = msecs_to_jiffies(lifespan); + mutex_init(&stats->lock); + + return stats; + +err: + kfree(stats); + return NULL; +} +EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); + +/** + * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats + * @stats: statistics to release + */ +void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) +{ + if (!stats) + return; + + kfree(stats->is_disabled); + kfree(stats); +} +EXPORT_SYMBOL(rdma_free_hw_stats_struct); |
