diff options
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe.c | 110 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_av.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_comp.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_cq.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_loc.h | 32 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mcast.c | 542 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mmap.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mr.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mw.c | 38 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_net.c | 41 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_pool.c | 433 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_pool.h | 105 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_qp.c | 57 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_queue.c | 10 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_recv.c | 26 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_req.c | 71 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_resp.c | 170 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_verbs.c | 108 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_verbs.h | 27 |
20 files changed, 866 insertions, 968 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index fab291245366..2dae7538a2ea 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -28,8 +28,8 @@ void rxe_dealloc(struct ib_device *ib_dev) rxe_pool_cleanup(&rxe->cq_pool); rxe_pool_cleanup(&rxe->mr_pool); rxe_pool_cleanup(&rxe->mw_pool); - rxe_pool_cleanup(&rxe->mc_grp_pool); - rxe_pool_cleanup(&rxe->mc_elem_pool); + + WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree)); if (rxe->tfm) crypto_free_shash(rxe->tfm); @@ -114,106 +114,37 @@ static void rxe_init_ports(struct rxe_dev *rxe) } /* init pools of managed objects */ -static int rxe_init_pools(struct rxe_dev *rxe) +static void rxe_init_pools(struct rxe_dev *rxe) { - int err; - - err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, - rxe->max_ucontext); - if (err) - goto err1; - - err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, - rxe->attr.max_pd); - if (err) - goto err2; - - err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, - rxe->attr.max_ah); - if (err) - goto err3; - - err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, - rxe->attr.max_srq); - if (err) - goto err4; - - err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, - rxe->attr.max_qp); - if (err) - goto err5; - - err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, - rxe->attr.max_cq); - if (err) - goto err6; - - err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, - rxe->attr.max_mr); - if (err) - goto err7; - - err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, - rxe->attr.max_mw); - if (err) - goto err8; - - err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, - rxe->attr.max_mcast_grp); - if (err) - goto err9; - - err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, - rxe->attr.max_total_mcast_qp_attach); - if (err) - goto err10; - - return 0; - -err10: - rxe_pool_cleanup(&rxe->mc_grp_pool); -err9: - rxe_pool_cleanup(&rxe->mw_pool); -err8: - rxe_pool_cleanup(&rxe->mr_pool); -err7: - rxe_pool_cleanup(&rxe->cq_pool); -err6: - rxe_pool_cleanup(&rxe->qp_pool); -err5: - rxe_pool_cleanup(&rxe->srq_pool); -err4: - rxe_pool_cleanup(&rxe->ah_pool); -err3: - rxe_pool_cleanup(&rxe->pd_pool); -err2: - rxe_pool_cleanup(&rxe->uc_pool); -err1: - return err; + rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC); + rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD); + rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH); + rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ); + rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP); + rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ); + rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR); + rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW); } /* initialize rxe device state */ -static int rxe_init(struct rxe_dev *rxe) +static void rxe_init(struct rxe_dev *rxe) { - int err; - /* init default device parameters */ rxe_init_device_param(rxe); rxe_init_ports(rxe); - - err = rxe_init_pools(rxe); - if (err) - return err; + rxe_init_pools(rxe); /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); - mutex_init(&rxe->usdev_lock); + /* init multicast support */ + spin_lock_init(&rxe->mcg_lock); + rxe->mcg_tree = RB_ROOT; - return 0; + mutex_init(&rxe->usdev_lock); } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) @@ -235,12 +166,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) */ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) { - int err; - - err = rxe_init(rxe); - if (err) - return err; - + rxe_init(rxe); rxe_set_mtu(rxe, mtu); return rxe_register_device(rxe, ibdev_name); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index fb9066e6f5f0..30fbdf3bc76a 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -12,7 +12,6 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> #include <linux/skbuff.h> #include <rdma/ib_verbs.h> diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 38c7b6fb39d7..3b05314ca739 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -99,11 +99,14 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) av->network_type = type; } -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) { struct rxe_ah *ah; u32 ah_num; + if (ahp) + *ahp = NULL; + if (!pkt || !pkt->qp) return NULL; @@ -117,10 +120,22 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) if (ah_num) { /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); - if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { + if (!ah) { pr_warn("Unable to find AH matching ah_num\n"); return NULL; } + + if (rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("PDs don't match for AH and QP\n"); + rxe_put(ah); + return NULL; + } + + if (ahp) + *ahp = ah; + else + rxe_put(ah); + return &ah->av; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index f363fe3fa414..138b3e7d3a5f 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -526,7 +526,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->sq.queue; while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -548,7 +548,7 @@ static void free_pkt(struct rxe_pkt_info *pkt) struct ib_device *dev = qp->ibqp.device; kfree_skb(skb); - rxe_drop_ref(qp); + rxe_put(qp); ib_device_put(dev); } @@ -562,7 +562,7 @@ int rxe_completer(void *arg) enum comp_state state; int ret = 0; - rxe_add_ref(qp); + rxe_get(qp); if (!qp->valid || qp->req.state == QP_STATE_ERROR || qp->req.state == QP_STATE_RESET) { @@ -761,7 +761,7 @@ int rxe_completer(void *arg) done: if (pkt) free_pkt(pkt); - rxe_drop_ref(qp); + rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 6baaaa34458e..642b52539ac3 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -42,13 +42,14 @@ err1: static void rxe_send_complete(struct tasklet_struct *t) { struct rxe_cq *cq = from_tasklet(cq, t, comp_task); + unsigned long flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, flags); if (cq->is_dying) { - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); return; } - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } @@ -107,12 +108,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) struct ib_event ev; int full; void *addr; + unsigned long flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, flags); full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT); if (unlikely(full)) { - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); if (cq->ibcq.event_handler) { ev.device = cq->ibcq.device; ev.element.cq = &cq->ibcq; @@ -128,7 +130,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); if ((cq->notify == IB_CQ_NEXT_COMP) || (cq->notify == IB_CQ_SOLICITED && solicited)) { @@ -141,9 +143,11 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) void rxe_cq_disable(struct rxe_cq *cq) { - spin_lock_bh(&cq->cq_lock); + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); cq->is_dying = true; - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); } void rxe_cq_cleanup(struct rxe_pool_elem *elem) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index b1e174afb1d4..2ffbe3390668 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -19,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, @@ -40,18 +40,10 @@ void rxe_cq_disable(struct rxe_cq *cq); void rxe_cq_cleanup(struct rxe_pool_elem *arg); /* rxe_mcast.c */ -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p); - -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp); - -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid); - -void rxe_drop_all_mcast_groups(struct rxe_qp *qp); - -void rxe_mc_cleanup(struct rxe_pool_elem *arg); +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid); +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +void rxe_cleanup_mcg(struct kref *kref); /* rxe_mmap.c */ struct rxe_mmap_info { @@ -102,35 +94,27 @@ void rxe_mw_cleanup(struct rxe_pool_elem *arg); /* rxe_net.c */ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb); +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb); int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct sk_buff *skb); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid); -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid); /* rxe_qp.c */ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); - int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, struct ib_pd *ibpd, struct ib_udata *udata); - int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); - int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata); - int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - void rxe_qp_error(struct rxe_qp *qp); - +int rxe_qp_chk_destroy(struct rxe_qp *qp); void rxe_qp_destroy(struct rxe_qp *qp); - void rxe_qp_cleanup(struct rxe_pool_elem *elem); static inline int qp_num(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index bd1ac88b8700..ae8f11cb704a 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -1,178 +1,488 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* + * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ +/* + * rxe_mcast.c implements driver support for multicast transport. + * It is based on two data structures struct rxe_mcg ('mcg') and + * struct rxe_mca ('mca'). An mcg is allocated each time a qp is + * attached to a new mgid for the first time. These are indexed by + * a red-black tree using the mgid. This data structure is searched + * for the mcg when a multicast packet is received and when another + * qp is attached to the same mgid. It is cleaned up when the last qp + * is detached from the mcg. Each time a qp is attached to an mcg an + * mca is created. It holds a pointer to the qp and is added to a list + * of qp's that are attached to the mcg. The qp_list is used to replicate + * mcast packets in the rxe receive path. + */ + #include "rxe.h" -#include "rxe_loc.h" -/* caller should hold mc_grp_pool->pool_lock */ -static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe, - struct rxe_pool *pool, - union ib_gid *mgid) +/** + * rxe_mcast_add - add multicast address to rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid + * + * Returns 0 on success else an error + */ +static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { - int err; - struct rxe_mc_grp *grp; + unsigned char ll_addr[ETH_ALEN]; - grp = rxe_alloc_locked(&rxe->mc_grp_pool); - if (!grp) - return ERR_PTR(-ENOMEM); + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - INIT_LIST_HEAD(&grp->qp_list); - spin_lock_init(&grp->mcg_lock); - grp->rxe = rxe; - rxe_add_key_locked(grp, mgid); + return dev_mc_add(rxe->ndev, ll_addr); +} - err = rxe_mcast_add(rxe, mgid); - if (unlikely(err)) { - rxe_drop_key_locked(grp); - rxe_drop_ref(grp); - return ERR_PTR(err); +/** + * rxe_mcast_delete - delete multicast address from rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid + * + * Returns 0 on success else an error + */ +static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) +{ + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + + return dev_mc_del(rxe->ndev, ll_addr); +} + +/** + * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) + * @mcg: mcg object with an embedded red-black tree node + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock and + * is responsible to avoid adding the same mcg twice to the tree. + */ +static void __rxe_insert_mcg(struct rxe_mcg *mcg) +{ + struct rb_root *tree = &mcg->rxe->mcg_tree; + struct rb_node **link = &tree->rb_node; + struct rb_node *node = NULL; + struct rxe_mcg *tmp; + int cmp; + + while (*link) { + node = *link; + tmp = rb_entry(node, struct rxe_mcg, node); + + cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); + if (cmp > 0) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; } - return grp; + rb_link_node(&mcg->node, node, link); + rb_insert_color(&mcg->node, tree); } -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p) +/** + * __rxe_remove_mcg - remove an mcg from red-black tree holding lock + * @mcg: mcast group object with an embedded red-black tree node + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_remove_mcg(struct rxe_mcg *mcg) { - int err; - struct rxe_mc_grp *grp; - struct rxe_pool *pool = &rxe->mc_grp_pool; + rb_erase(&mcg->node, &mcg->rxe->mcg_tree); +} - if (rxe->attr.max_mcast_qp_attach == 0) - return -EINVAL; +/** + * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock + * @rxe: rxe device object + * @mgid: multicast IP address + * + * Context: caller must hold rxe->mcg_lock + * Returns: mcg on success and takes a ref to mcg else NULL + */ +static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, + union ib_gid *mgid) +{ + struct rb_root *tree = &rxe->mcg_tree; + struct rxe_mcg *mcg; + struct rb_node *node; + int cmp; - write_lock_bh(&pool->pool_lock); + node = tree->rb_node; - grp = rxe_pool_get_key_locked(pool, mgid); - if (grp) - goto done; + while (node) { + mcg = rb_entry(node, struct rxe_mcg, node); - grp = create_grp(rxe, pool, mgid); - if (IS_ERR(grp)) { - write_unlock_bh(&pool->pool_lock); - err = PTR_ERR(grp); - return err; + cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); + + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + break; } -done: - write_unlock_bh(&pool->pool_lock); - *grp_p = grp; + if (node) { + kref_get(&mcg->ref_cnt); + return mcg; + } + + return NULL; +} + +/** + * rxe_lookup_mcg - lookup up mcg in red-back tree + * @rxe: rxe device object + * @mgid: multicast IP address + * + * Returns: mcg if found else NULL + */ +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) +{ + struct rxe_mcg *mcg; + unsigned long flags; + + spin_lock_irqsave(&rxe->mcg_lock, flags); + mcg = __rxe_lookup_mcg(rxe, mgid); + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + + return mcg; +} + +/** + * __rxe_init_mcg - initialize a new mcg + * @rxe: rxe device + * @mgid: multicast address as a gid + * @mcg: new mcg object + * + * Context: caller should hold rxe->mcg lock + * Returns: 0 on success else an error + */ +static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mcg *mcg) +{ + int err; + + err = rxe_mcast_add(rxe, mgid); + if (unlikely(err)) + return err; + + kref_init(&mcg->ref_cnt); + memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); + INIT_LIST_HEAD(&mcg->qp_list); + mcg->rxe = rxe; + + /* caller holds a ref on mcg but that will be + * dropped when mcg goes out of scope. We need to take a ref + * on the pointer that will be saved in the red-black tree + * by __rxe_insert_mcg and used to lookup mcg from mgid later. + * Inserting mcg makes it visible to outside so this should + * be done last after the object is ready. + */ + kref_get(&mcg->ref_cnt); + __rxe_insert_mcg(mcg); + return 0; } -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp) +/** + * rxe_get_mcg - lookup or allocate a mcg + * @rxe: rxe device object + * @mgid: multicast IP address as a gid + * + * Returns: mcg on success else ERR_PTR(error) + */ +static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { + struct rxe_mcg *mcg, *tmp; + unsigned long flags; int err; - struct rxe_mc_elem *elem; - /* check to see of the qp is already a member of the group */ - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); - list_for_each_entry(elem, &grp->qp_list, qp_list) { - if (elem->qp == qp) { - err = 0; - goto out; - } - } + if (rxe->attr.max_mcast_grp == 0) + return ERR_PTR(-EINVAL); - if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) { - err = -ENOMEM; + /* check to see if mcg already exists */ + mcg = rxe_lookup_mcg(rxe, mgid); + if (mcg) + return mcg; + + /* speculative alloc of new mcg */ + mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); + if (!mcg) + return ERR_PTR(-ENOMEM); + + spin_lock_irqsave(&rxe->mcg_lock, flags); + /* re-check to see if someone else just added it */ + tmp = __rxe_lookup_mcg(rxe, mgid); + if (tmp) { + kfree(mcg); + mcg = tmp; goto out; } - elem = rxe_alloc_locked(&rxe->mc_elem_pool); - if (!elem) { + if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { err = -ENOMEM; - goto out; + goto err_dec; + } + + err = __rxe_init_mcg(rxe, mgid, mcg); + if (err) + goto err_dec; +out: + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + return mcg; + +err_dec: + atomic_dec(&rxe->mcg_num); + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + kfree(mcg); + return ERR_PTR(err); +} + +/** + * rxe_cleanup_mcg - cleanup mcg for kref_put + * @kref: struct kref embnedded in mcg + */ +void rxe_cleanup_mcg(struct kref *kref) +{ + struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); + + kfree(mcg); +} + +/** + * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock + * @mcg: the mcg object + * + * Context: caller is holding rxe->mcg_lock + * no qp's are attached to mcg + */ +static void __rxe_destroy_mcg(struct rxe_mcg *mcg) +{ + struct rxe_dev *rxe = mcg->rxe; + + /* remove mcg from red-black tree then drop ref */ + __rxe_remove_mcg(mcg); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + rxe_mcast_delete(mcg->rxe, &mcg->mgid); + atomic_dec(&rxe->mcg_num); +} + +/** + * rxe_destroy_mcg - destroy mcg object + * @mcg: the mcg object + * + * Context: no qp's are attached to mcg + */ +static void rxe_destroy_mcg(struct rxe_mcg *mcg) +{ + unsigned long flags; + + spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); + __rxe_destroy_mcg(mcg); + spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); +} + +/** + * __rxe_init_mca - initialize a new mca holding lock + * @qp: qp object + * @mcg: mcg object + * @mca: empty space for new mca + * + * Context: caller must hold references on qp and mcg, rxe->mcg_lock + * and pass memory for new mca + * + * Returns: 0 on success else an error + */ +static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, + struct rxe_mca *mca) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int n; + + n = atomic_inc_return(&rxe->mcg_attach); + if (n > rxe->attr.max_total_mcast_qp_attach) { + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; } - /* each qp holds a ref on the grp */ - rxe_add_ref(grp); + n = atomic_inc_return(&mcg->qp_num); + if (n > rxe->attr.max_mcast_qp_attach) { + atomic_dec(&mcg->qp_num); + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; + } + + atomic_inc(&qp->mcg_num); - grp->num_qp++; - elem->qp = qp; - elem->grp = grp; + rxe_get(qp); + mca->qp = qp; - list_add(&elem->qp_list, &grp->qp_list); - list_add(&elem->grp_list, &qp->grp_list); + list_add_tail(&mca->qp_list, &mcg->qp_list); - err = 0; + return 0; +} + +/** + * rxe_attach_mcg - attach qp to mcg if not already attached + * @qp: qp object + * @mcg: mcg object + * + * Context: caller must hold reference on qp and mcg. + * Returns: 0 on success else an error + */ +static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; + unsigned long flags; + int err; + + /* check to see if the qp is already a member of the group */ + spin_lock_irqsave(&rxe->mcg_lock, flags); + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + return 0; + } + } + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + + /* speculative alloc new mca without using GFP_ATOMIC */ + mca = kzalloc(sizeof(*mca), GFP_KERNEL); + if (!mca) + return -ENOMEM; + + spin_lock_irqsave(&rxe->mcg_lock, flags); + /* re-check to see if someone else just attached qp */ + list_for_each_entry(tmp, &mcg->qp_list, qp_list) { + if (tmp->qp == qp) { + kfree(mca); + err = 0; + goto out; + } + } + + err = __rxe_init_mca(qp, mcg, mca); + if (err) + kfree(mca); out: - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); + spin_unlock_irqrestore(&rxe->mcg_lock, flags); return err; } -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid) +/** + * __rxe_cleanup_mca - cleanup mca object holding lock + * @mca: mca object + * @mcg: mcg object + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem, *tmp; + list_del(&mca->qp_list); - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); - if (!grp) - goto err1; + atomic_dec(&mcg->qp_num); + atomic_dec(&mcg->rxe->mcg_attach); + atomic_dec(&mca->qp->mcg_num); + rxe_put(mca->qp); - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); + kfree(mca); +} - list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) { - if (elem->qp == qp) { - list_del(&elem->qp_list); - list_del(&elem->grp_list); - grp->num_qp--; +/** + * rxe_detach_mcg - detach qp from mcg + * @mcg: mcg object + * @qp: qp object + * + * Returns: 0 on success else an error if qp is not attached. + */ +static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; + unsigned long flags; - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(elem); - rxe_drop_ref(grp); /* ref held by QP */ - rxe_drop_ref(grp); /* ref from get_key */ + spin_lock_irqsave(&rxe->mcg_lock, flags); + list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + __rxe_cleanup_mca(mca, mcg); + + /* if the number of qp's attached to the + * mcast group falls to zero go ahead and + * tear it down. This will not free the + * object since we are still holding a ref + * from the caller + */ + if (atomic_read(&mcg->qp_num) <= 0) + __rxe_destroy_mcg(mcg); + + spin_unlock_irqrestore(&rxe->mcg_lock, flags); return 0; } } - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(grp); /* ref from get_key */ -err1: + /* we didn't find the qp on the list */ + spin_unlock_irqrestore(&rxe->mcg_lock, flags); return -EINVAL; } -void rxe_drop_all_mcast_groups(struct rxe_qp *qp) +/** + * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) + * @ibqp: (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem; + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; - while (1) { - spin_lock_bh(&qp->grp_lock); - if (list_empty(&qp->grp_list)) { - spin_unlock_bh(&qp->grp_lock); - break; - } - elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem, - grp_list); - list_del(&elem->grp_list); - spin_unlock_bh(&qp->grp_lock); - - grp = elem->grp; - spin_lock_bh(&grp->mcg_lock); - list_del(&elem->qp_list); - grp->num_qp--; - spin_unlock_bh(&grp->mcg_lock); - rxe_drop_ref(grp); - rxe_drop_ref(elem); - } + /* takes a ref on mcg if successful */ + mcg = rxe_get_mcg(rxe, mgid); + if (IS_ERR(mcg)) + return PTR_ERR(mcg); + + err = rxe_attach_mcg(mcg, qp); + + /* if we failed to attach the first qp to mcg tear it down */ + if (atomic_read(&mcg->qp_num) == 0) + rxe_destroy_mcg(mcg); + + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } -void rxe_mc_cleanup(struct rxe_pool_elem *elem) +/** + * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) + * @ibqp: address of (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp = container_of(elem, typeof(*grp), elem); - struct rxe_dev *rxe = grp->rxe; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; + int err; - rxe_drop_key(grp); - rxe_mcast_delete(rxe, &grp->mgid); + mcg = rxe_lookup_mcg(rxe, mgid); + if (!mcg) + return -EINVAL; + + err = rxe_detach_mcg(mcg, qp); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 035f226af133..9149b6095429 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -4,7 +4,6 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ -#include <linux/module.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/errno.h> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 453ef3c9d535..60a31b718774 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -459,7 +459,7 @@ int copy_data( if (offset >= sge->length) { if (mr) { - rxe_drop_ref(mr); + rxe_put(mr); mr = NULL; } sge++; @@ -504,13 +504,13 @@ int copy_data( dma->resid = resid; if (mr) - rxe_drop_ref(mr); + rxe_put(mr); return 0; err2: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); err1: return err; } @@ -569,7 +569,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || mr_pd(mr) != pd || (access && !(access & mr->access)) || mr->state != RXE_MR_STATE_VALID)) { - rxe_drop_ref(mr); + rxe_put(mr); mr = NULL; } @@ -613,7 +613,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) ret = 0; err_drop_ref: - rxe_drop_ref(mr); + rxe_put(mr); err: return ret; } @@ -690,9 +690,8 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } mr->state = RXE_MR_STATE_INVALID; - rxe_drop_ref(mr_pd(mr)); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_put(mr_pd(mr)); + rxe_put(mr); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 32dd8c0b8b9e..c86b2efd58f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -12,15 +12,14 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) struct rxe_dev *rxe = to_rdev(ibmw->device); int ret; - rxe_add_ref(pd); + rxe_get(pd); ret = rxe_add_to_pool(&rxe->mw_pool, mw); if (ret) { - rxe_drop_ref(pd); + rxe_put(pd); return ret; } - rxe_add_index(mw); mw->rkey = ibmw->rkey = (mw->elem.index << 8) | rxe_get_next_key(-1); mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ? RXE_MW_STATE_FREE : RXE_MW_STATE_VALID; @@ -36,14 +35,14 @@ static void rxe_do_dealloc_mw(struct rxe_mw *mw) mw->mr = NULL; atomic_dec(&mr->num_mw); - rxe_drop_ref(mr); + rxe_put(mr); } if (mw->qp) { struct rxe_qp *qp = mw->qp; mw->qp = NULL; - rxe_drop_ref(qp); + rxe_put(qp); } mw->access = 0; @@ -61,8 +60,8 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) rxe_do_dealloc_mw(mw); spin_unlock_bh(&mw->lock); - rxe_drop_ref(mw); - rxe_drop_ref(pd); + rxe_put(mw); + rxe_put(pd); return 0; } @@ -171,7 +170,7 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, mw->length = wqe->wr.wr.mw.length; if (mw->mr) { - rxe_drop_ref(mw->mr); + rxe_put(mw->mr); atomic_dec(&mw->mr->num_mw); mw->mr = NULL; } @@ -179,11 +178,11 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (mw->length) { mw->mr = mr; atomic_inc(&mr->num_mw); - rxe_add_ref(mr); + rxe_get(mr); } if (mw->ibmw.type == IB_MW_TYPE_2) { - rxe_add_ref(qp); + rxe_get(qp); mw->qp = qp; } } @@ -234,9 +233,9 @@ err_unlock: spin_unlock_bh(&mw->lock); err_drop_mr: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); err_drop_mw: - rxe_drop_ref(mw); + rxe_put(mw); err: return ret; } @@ -261,13 +260,13 @@ static void rxe_do_invalidate_mw(struct rxe_mw *mw) /* valid type 2 MW will always have a QP pointer */ qp = mw->qp; mw->qp = NULL; - rxe_drop_ref(qp); + rxe_put(qp); /* valid type 2 MW will always have an MR pointer */ mr = mw->mr; mw->mr = NULL; atomic_dec(&mr->num_mw); - rxe_drop_ref(mr); + rxe_put(mr); mw->access = 0; mw->addr = 0; @@ -302,7 +301,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) err_unlock: spin_unlock_bh(&mw->lock); err_drop_ref: - rxe_drop_ref(mw); + rxe_put(mw); err: return ret; } @@ -323,16 +322,9 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) (mw->length == 0) || (access && !(access & mw->access)) || mw->state != RXE_MW_STATE_VALID)) { - rxe_drop_ref(mw); + rxe_put(mw); return NULL; } return mw; } - -void rxe_mw_cleanup(struct rxe_pool_elem *elem) -{ - struct rxe_mw *mw = container_of(elem, typeof(*mw), elem); - - rxe_drop_index(mw); -} diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index be72bdbfb4ba..c53f4529f098 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -20,24 +20,6 @@ static struct rxe_recv_sockets recv_sockets; -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) -{ - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - - return dev_mc_add(rxe->ndev, ll_addr); -} - -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) -{ - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - - return dev_mc_del(rxe->ndev, ll_addr); -} - static struct dst_entry *rxe_find_route4(struct net_device *ndev, struct in_addr *saddr, struct in_addr *daddr) @@ -289,13 +271,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); - struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -315,11 +297,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; - struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -340,16 +322,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb) +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err = 0; if (skb->protocol == htons(ETH_P_IP)) - err = prepare4(pkt, skb); + err = prepare4(av, pkt, skb); else if (skb->protocol == htons(ETH_P_IPV6)) - err = prepare6(pkt, skb); + err = prepare6(av, pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; @@ -365,7 +348,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_run_task(&qp->req.task, 1); - rxe_drop_ref(qp); + rxe_put(qp); } static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) @@ -375,7 +358,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) skb->destructor = rxe_skb_tx_dtor; skb->sk = pkt->qp->sk->sk; - rxe_add_ref(pkt->qp); + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) { @@ -385,7 +368,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } else { pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); atomic_dec(&pkt->qp->skb_out); - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 4cb003885e00..87066d04ed18 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -12,129 +12,93 @@ static const struct rxe_type_info { const char *name; size_t size; size_t elem_offset; - void (*cleanup)(struct rxe_pool_elem *obj); + void (*cleanup)(struct rxe_pool_elem *elem); enum rxe_pool_flags flags; u32 min_index; u32 max_index; - size_t key_offset; - size_t key_size; + u32 max_elem; } rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { - .name = "rxe-uc", + .name = "uc", .size = sizeof(struct rxe_ucontext), .elem_offset = offsetof(struct rxe_ucontext, elem), - .flags = RXE_POOL_NO_ALLOC, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_PD] = { - .name = "rxe-pd", + .name = "pd", .size = sizeof(struct rxe_pd), .elem_offset = offsetof(struct rxe_pd, elem), - .flags = RXE_POOL_NO_ALLOC, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_AH] = { - .name = "rxe-ah", + .name = "ah", .size = sizeof(struct rxe_ah), .elem_offset = offsetof(struct rxe_ah, elem), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_AH_INDEX, .max_index = RXE_MAX_AH_INDEX, + .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1, }, [RXE_TYPE_SRQ] = { - .name = "rxe-srq", + .name = "srq", .size = sizeof(struct rxe_srq), .elem_offset = offsetof(struct rxe_srq, elem), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, + .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, }, [RXE_TYPE_QP] = { - .name = "rxe-qp", + .name = "qp", .size = sizeof(struct rxe_qp), .elem_offset = offsetof(struct rxe_qp, elem), .cleanup = rxe_qp_cleanup, - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, + .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1, }, [RXE_TYPE_CQ] = { - .name = "rxe-cq", + .name = "cq", .size = sizeof(struct rxe_cq), .elem_offset = offsetof(struct rxe_cq, elem), - .flags = RXE_POOL_NO_ALLOC, .cleanup = rxe_cq_cleanup, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_MR] = { - .name = "rxe-mr", + .name = "mr", .size = sizeof(struct rxe_mr), .elem_offset = offsetof(struct rxe_mr, elem), .cleanup = rxe_mr_cleanup, - .flags = RXE_POOL_INDEX, + .flags = RXE_POOL_ALLOC, .min_index = RXE_MIN_MR_INDEX, .max_index = RXE_MAX_MR_INDEX, + .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, }, [RXE_TYPE_MW] = { - .name = "rxe-mw", + .name = "mw", .size = sizeof(struct rxe_mw), .elem_offset = offsetof(struct rxe_mw, elem), - .cleanup = rxe_mw_cleanup, - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_MW_INDEX, .max_index = RXE_MAX_MW_INDEX, - }, - [RXE_TYPE_MC_GRP] = { - .name = "rxe-mc_grp", - .size = sizeof(struct rxe_mc_grp), - .elem_offset = offsetof(struct rxe_mc_grp, elem), - .cleanup = rxe_mc_cleanup, - .flags = RXE_POOL_KEY, - .key_offset = offsetof(struct rxe_mc_grp, mgid), - .key_size = sizeof(union ib_gid), - }, - [RXE_TYPE_MC_ELEM] = { - .name = "rxe-mc_elem", - .size = sizeof(struct rxe_mc_elem), - .elem_offset = offsetof(struct rxe_mc_elem, elem), + .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, }, }; -static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) -{ - int err = 0; - - if ((max - min + 1) < pool->max_elem) { - pr_warn("not enough indices for max_elem\n"); - err = -EINVAL; - goto out; - } - - pool->index.max_index = max; - pool->index.min_index = min; - - pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL); - if (!pool->index.table) { - err = -ENOMEM; - goto out; - } - -out: - return err; -} - -int rxe_pool_init( - struct rxe_dev *rxe, - struct rxe_pool *pool, - enum rxe_elem_type type, - unsigned int max_elem) +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type) { const struct rxe_type_info *info = &rxe_type_info[type]; - int err = 0; memset(pool, 0, sizeof(*pool)); pool->rxe = rxe; pool->name = info->name; pool->type = type; - pool->max_elem = max_elem; + pool->max_elem = info->max_elem; pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); pool->elem_offset = info->elem_offset; pool->flags = info->flags; @@ -142,225 +106,31 @@ int rxe_pool_init( atomic_set(&pool->num_elem, 0); - rwlock_init(&pool->pool_lock); - - if (pool->flags & RXE_POOL_INDEX) { - pool->index.tree = RB_ROOT; - err = rxe_pool_init_index(pool, info->max_index, - info->min_index); - if (err) - goto out; - } - - if (pool->flags & RXE_POOL_KEY) { - pool->key.tree = RB_ROOT; - pool->key.key_offset = info->key_offset; - pool->key.key_size = info->key_size; - } - -out: - return err; + xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); + pool->limit.min = info->min_index; + pool->limit.max = info->max_index; } void rxe_pool_cleanup(struct rxe_pool *pool) { - if (atomic_read(&pool->num_elem) > 0) - pr_warn("%s pool destroyed with unfree'd elem\n", - pool->name); - - if (pool->flags & RXE_POOL_INDEX) - bitmap_free(pool->index.table); -} - -static u32 alloc_index(struct rxe_pool *pool) -{ - u32 index; - u32 range = pool->index.max_index - pool->index.min_index + 1; - - index = find_next_zero_bit(pool->index.table, range, pool->index.last); - if (index >= range) - index = find_first_zero_bit(pool->index.table, range); - - WARN_ON_ONCE(index >= range); - set_bit(index, pool->index.table); - pool->index.last = index; - return index + pool->index.min_index; -} - -static int rxe_insert_index(struct rxe_pool *pool, struct rxe_pool_elem *new) -{ - struct rb_node **link = &pool->index.tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_elem *elem; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_elem, index_node); - - if (elem->index == new->index) { - pr_warn("element already exists!\n"); - return -EINVAL; - } - - if (elem->index > new->index) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->index_node, parent, link); - rb_insert_color(&new->index_node, &pool->index.tree); - - return 0; -} - -static int rxe_insert_key(struct rxe_pool *pool, struct rxe_pool_elem *new) -{ - struct rb_node **link = &pool->key.tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_elem *elem; - int cmp; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_elem, key_node); - - cmp = memcmp((u8 *)elem + pool->key.key_offset, - (u8 *)new + pool->key.key_offset, - pool->key.key_size); - - if (cmp == 0) { - pr_warn("key already exists!\n"); - return -EINVAL; - } - - if (cmp > 0) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->key_node, parent, link); - rb_insert_color(&new->key_node, &pool->key.tree); - - return 0; -} - -int __rxe_add_key_locked(struct rxe_pool_elem *elem, void *key) -{ - struct rxe_pool *pool = elem->pool; - int err; - - memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); - err = rxe_insert_key(pool, elem); - - return err; -} - -int __rxe_add_key(struct rxe_pool_elem *elem, void *key) -{ - struct rxe_pool *pool = elem->pool; - int err; - - write_lock_bh(&pool->pool_lock); - err = __rxe_add_key_locked(elem, key); - write_unlock_bh(&pool->pool_lock); - - return err; -} - -void __rxe_drop_key_locked(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - rb_erase(&elem->key_node, &pool->key.tree); -} - -void __rxe_drop_key(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - write_lock_bh(&pool->pool_lock); - __rxe_drop_key_locked(elem); - write_unlock_bh(&pool->pool_lock); -} - -int __rxe_add_index_locked(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - int err; - - elem->index = alloc_index(pool); - err = rxe_insert_index(pool, elem); - - return err; -} - -int __rxe_add_index(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - int err; - - write_lock_bh(&pool->pool_lock); - err = __rxe_add_index_locked(elem); - write_unlock_bh(&pool->pool_lock); - - return err; -} - -void __rxe_drop_index_locked(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - clear_bit(elem->index - pool->index.min_index, pool->index.table); - rb_erase(&elem->index_node, &pool->index.tree); -} - -void __rxe_drop_index(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - write_lock_bh(&pool->pool_lock); - __rxe_drop_index_locked(elem); - write_unlock_bh(&pool->pool_lock); -} - -void *rxe_alloc_locked(struct rxe_pool *pool) -{ - struct rxe_pool_elem *elem; - void *obj; - - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; - - obj = kzalloc(pool->elem_size, GFP_ATOMIC); - if (!obj) - goto out_cnt; - - elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset); - - elem->pool = pool; - elem->obj = obj; - kref_init(&elem->ref_cnt); - - return obj; - -out_cnt: - atomic_dec(&pool->num_elem); - return NULL; + WARN_ON(!xa_empty(&pool->xa)); } void *rxe_alloc(struct rxe_pool *pool) { struct rxe_pool_elem *elem; void *obj; + int err; + + if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC))) + return NULL; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; obj = kzalloc(pool->elem_size, GFP_KERNEL); if (!obj) - goto out_cnt; + goto err_cnt; elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset); @@ -368,127 +138,86 @@ void *rxe_alloc(struct rxe_pool *pool) elem->obj = obj; kref_init(&elem->ref_cnt); + err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit, + &pool->next, GFP_KERNEL); + if (err) + goto err_free; + return obj; -out_cnt: +err_free: + kfree(obj); +err_cnt: atomic_dec(&pool->num_elem); return NULL; } int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem) { + int err; + + if (WARN_ON(pool->flags & RXE_POOL_ALLOC)) + return -EINVAL; + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; elem->pool = pool; elem->obj = (u8 *)elem - pool->elem_offset; kref_init(&elem->ref_cnt); + err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit, + &pool->next, GFP_KERNEL); + if (err) + goto err_cnt; + return 0; -out_cnt: +err_cnt: atomic_dec(&pool->num_elem); return -EINVAL; } -void rxe_elem_release(struct kref *kref) -{ - struct rxe_pool_elem *elem = - container_of(kref, struct rxe_pool_elem, ref_cnt); - struct rxe_pool *pool = elem->pool; - void *obj; - - if (pool->cleanup) - pool->cleanup(elem); - - if (!(pool->flags & RXE_POOL_NO_ALLOC)) { - obj = elem->obj; - kfree(obj); - } - - atomic_dec(&pool->num_elem); -} - -void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rb_node *node; struct rxe_pool_elem *elem; + struct xarray *xa = &pool->xa; + unsigned long flags; void *obj; - node = pool->index.tree.rb_node; - - while (node) { - elem = rb_entry(node, struct rxe_pool_elem, index_node); - - if (elem->index > index) - node = node->rb_left; - else if (elem->index < index) - node = node->rb_right; - else - break; - } - - if (node) { - kref_get(&elem->ref_cnt); + xa_lock_irqsave(xa, flags); + elem = xa_load(xa, index); + if (elem && kref_get_unless_zero(&elem->ref_cnt)) obj = elem->obj; - } else { + else obj = NULL; - } - - return obj; -} - -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) -{ - void *obj; - - read_lock_bh(&pool->pool_lock); - obj = rxe_pool_get_index_locked(pool, index); - read_unlock_bh(&pool->pool_lock); + xa_unlock_irqrestore(xa, flags); return obj; } -void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) +static void rxe_elem_release(struct kref *kref) { - struct rb_node *node; - struct rxe_pool_elem *elem; - void *obj; - int cmp; - - node = pool->key.tree.rb_node; + struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt); + struct rxe_pool *pool = elem->pool; - while (node) { - elem = rb_entry(node, struct rxe_pool_elem, key_node); + xa_erase(&pool->xa, elem->index); - cmp = memcmp((u8 *)elem + pool->key.key_offset, - key, pool->key.key_size); + if (pool->cleanup) + pool->cleanup(elem); - if (cmp > 0) - node = node->rb_left; - else if (cmp < 0) - node = node->rb_right; - else - break; - } + if (pool->flags & RXE_POOL_ALLOC) + kfree(elem->obj); - if (node) { - kref_get(&elem->ref_cnt); - obj = elem->obj; - } else { - obj = NULL; - } - - return obj; + atomic_dec(&pool->num_elem); } -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +int __rxe_get(struct rxe_pool_elem *elem) { - void *obj; - - read_lock_bh(&pool->pool_lock); - obj = rxe_pool_get_key_locked(pool, key); - read_unlock_bh(&pool->pool_lock); + return kref_get_unless_zero(&elem->ref_cnt); +} - return obj; +int __rxe_put(struct rxe_pool_elem *elem) +{ + return kref_put(&elem->ref_cnt, rxe_elem_release); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 214279310f4d..24bcc786c1b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -8,9 +8,7 @@ #define RXE_POOL_H enum rxe_pool_flags { - RXE_POOL_INDEX = BIT(1), - RXE_POOL_KEY = BIT(2), - RXE_POOL_NO_ALLOC = BIT(4), + RXE_POOL_ALLOC = BIT(1), }; enum rxe_elem_type { @@ -22,8 +20,6 @@ enum rxe_elem_type { RXE_TYPE_CQ, RXE_TYPE_MR, RXE_TYPE_MW, - RXE_TYPE_MC_GRP, - RXE_TYPE_MC_ELEM, RXE_NUM_TYPES, /* keep me last */ }; @@ -32,20 +28,13 @@ struct rxe_pool_elem { void *obj; struct kref ref_cnt; struct list_head list; - - /* only used if keyed */ - struct rb_node key_node; - - /* only used if indexed */ - struct rb_node index_node; u32 index; }; struct rxe_pool { struct rxe_dev *rxe; const char *name; - rwlock_t pool_lock; /* protects pool add/del/search */ - void (*cleanup)(struct rxe_pool_elem *obj); + void (*cleanup)(struct rxe_pool_elem *elem); enum rxe_pool_flags flags; enum rxe_elem_type type; @@ -54,36 +43,22 @@ struct rxe_pool { size_t elem_size; size_t elem_offset; - /* only used if indexed */ - struct { - struct rb_root tree; - unsigned long *table; - u32 last; - u32 max_index; - u32 min_index; - } index; - - /* only used if keyed */ - struct { - struct rb_root tree; - size_t key_offset; - size_t key_size; - } key; + struct xarray xa; + struct xa_limit limit; + u32 next; }; /* initialize a pool of objects with given limit on * number of elements. gets parameters from rxe_type_info * pool elements will be allocated out of a slab cache */ -int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, - enum rxe_elem_type type, u32 max_elem); +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type); /* free resources from object pool */ void rxe_pool_cleanup(struct rxe_pool *pool); -/* allocate an object from pool holding and not holding the pool lock */ -void *rxe_alloc_locked(struct rxe_pool *pool); - +/* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ @@ -91,69 +66,17 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem); #define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem) -/* assign an index to an indexed object and insert object into - * pool's rb tree holding and not holding the pool_lock - */ -int __rxe_add_index_locked(struct rxe_pool_elem *elem); - -#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->elem) - -int __rxe_add_index(struct rxe_pool_elem *elem); - -#define rxe_add_index(obj) __rxe_add_index(&(obj)->elem) - -/* drop an index and remove object from rb tree - * holding and not holding the pool_lock - */ -void __rxe_drop_index_locked(struct rxe_pool_elem *elem); - -#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->elem) - -void __rxe_drop_index(struct rxe_pool_elem *elem); - -#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->elem) - -/* assign a key to a keyed object and insert object into - * pool's rb tree holding and not holding pool_lock - */ -int __rxe_add_key_locked(struct rxe_pool_elem *elem, void *key); - -#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->elem, key) - -int __rxe_add_key(struct rxe_pool_elem *elem, void *key); - -#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->elem, key) - -/* remove elem from rb tree holding and not holding the pool_lock */ -void __rxe_drop_key_locked(struct rxe_pool_elem *elem); - -#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->elem) - -void __rxe_drop_key(struct rxe_pool_elem *elem); - -#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->elem) - -/* lookup an indexed object from index holding and not holding the pool_lock. - * takes a reference on object - */ -void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index); - +/* lookup an indexed object from index. takes a reference on object */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -/* lookup keyed object from key holding and not holding the pool_lock. - * takes a reference on the objecti - */ -void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key); +int __rxe_get(struct rxe_pool_elem *elem); -void *rxe_pool_get_key(struct rxe_pool *pool, void *key); +#define rxe_get(obj) __rxe_get(&(obj)->elem) -/* cleanup an object when all references are dropped */ -void rxe_elem_release(struct kref *kref); +int __rxe_put(struct rxe_pool_elem *elem); -/* take a reference on an object */ -#define rxe_add_ref(obj) kref_get(&(obj)->elem.ref_cnt) +#define rxe_put(obj) __rxe_put(&(obj)->elem) -/* drop a reference on an object */ -#define rxe_drop_ref(obj) kref_put(&(obj)->elem.ref_cnt, rxe_elem_release) +#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt) #endif /* RXE_POOL_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 5018b9387694..62acf890af6c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -135,12 +135,8 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) { - if (res->type == RXE_ATOMIC_MASK) { + if (res->type == RXE_ATOMIC_MASK) kfree_skb(res->atomic.skb); - } else if (res->type == RXE_READ_MASK) { - if (res->read.mr) - rxe_drop_ref(res->read.mr); - } res->type = 0; } @@ -188,9 +184,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, break; } - INIT_LIST_HEAD(&qp->grp_list); - - spin_lock_init(&qp->grp_lock); spin_lock_init(&qp->state_lock); atomic_set(&qp->ssn, 0); @@ -330,11 +323,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - rxe_add_ref(pd); - rxe_add_ref(rcq); - rxe_add_ref(scq); + rxe_get(pd); + rxe_get(rcq); + rxe_get(scq); if (srq) - rxe_add_ref(srq); + rxe_get(srq); qp->pd = pd; qp->rcq = rcq; @@ -366,10 +359,10 @@ err1: qp->srq = NULL; if (srq) - rxe_drop_ref(srq); - rxe_drop_ref(scq); - rxe_drop_ref(rcq); - rxe_drop_ref(pd); + rxe_put(srq); + rxe_put(scq); + rxe_put(rcq); + rxe_put(pd); return err; } @@ -528,7 +521,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) qp->resp.sent_psn_nak = 0; if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -770,6 +763,20 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) return 0; } +int rxe_qp_chk_destroy(struct rxe_qp *qp) +{ + /* See IBA o10-2.2.3 + * An attempt to destroy a QP while attached to a mcast group + * will fail immediately. + */ + if (atomic_read(&qp->mcg_num)) { + pr_debug("Attempt to destroy QP while attached to multicast group\n"); + return -EBUSY; + } + + return 0; +} + /* called by the destroy qp verb */ void rxe_qp_destroy(struct rxe_qp *qp) { @@ -798,28 +805,24 @@ static void rxe_qp_do_cleanup(struct work_struct *work) { struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); - rxe_drop_all_mcast_groups(qp); - if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); if (qp->srq) - rxe_drop_ref(qp->srq); + rxe_put(qp->srq); if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); if (qp->scq) - rxe_drop_ref(qp->scq); + rxe_put(qp->scq); if (qp->rcq) - rxe_drop_ref(qp->rcq); + rxe_put(qp->rcq); if (qp->pd) - rxe_drop_ref(qp->pd); + rxe_put(qp->pd); - if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); - qp->resp.mr = NULL; - } + if (qp->resp.mr) + rxe_put(qp->resp.mr); if (qp_type(qp) == IB_QPT_RC) sk_dst_reset(qp->sk->sk); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index a1b283dd2d4c..dbd4971039c0 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -151,6 +151,8 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, struct rxe_queue *new_q; unsigned int num_elem = *num_elem_p; int err; + unsigned long producer_flags; + unsigned long consumer_flags; new_q = rxe_queue_init(q->rxe, &num_elem, elem_size, q->type); if (!new_q) @@ -164,17 +166,17 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, goto err1; } - spin_lock_bh(consumer_lock); + spin_lock_irqsave(consumer_lock, consumer_flags); if (producer_lock) { - spin_lock_bh(producer_lock); + spin_lock_irqsave(producer_lock, producer_flags); err = resize_finish(q, new_q, num_elem); - spin_unlock_bh(producer_lock); + spin_unlock_irqrestore(producer_lock, producer_flags); } else { err = resize_finish(q, new_q, num_elem); } - spin_unlock_bh(consumer_lock); + spin_unlock_irqrestore(consumer_lock, consumer_flags); rxe_queue_cleanup(new_q); /* new/old dep on err */ if (err) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 6a6cc1fa90e4..d09a8b68c962 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -217,7 +217,7 @@ static int hdr_check(struct rxe_pkt_info *pkt) return 0; err2: - rxe_drop_ref(qp); + rxe_put(qp); err1: return -EINVAL; } @@ -233,8 +233,8 @@ static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb) static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) { struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - struct rxe_mc_grp *mcg; - struct rxe_mc_elem *mce; + struct rxe_mcg *mcg; + struct rxe_mca *mca; struct rxe_qp *qp; union ib_gid dgid; int err; @@ -246,19 +246,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); /* lookup mcast group corresponding to mgid, takes a ref */ - mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); + mcg = rxe_lookup_mcg(rxe, &dgid); if (!mcg) goto drop; /* mcast group not registered */ - spin_lock_bh(&mcg->mcg_lock); + spin_lock_bh(&rxe->mcg_lock); /* this is unreliable datagram service so we let * failures to deliver a multicast packet to a * single QP happen and just move on and try * the rest of them on the list */ - list_for_each_entry(mce, &mcg->qp_list, qp_list) { - qp = mce->qp; + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + qp = mca->qp; /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -273,7 +273,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * skb and pass to the QP. Pass the original skb to * the last QP in the list. */ - if (mce->qp_list.next != &mcg->qp_list) { + if (mca->qp_list.next != &mcg->qp_list) { struct sk_buff *cskb; struct rxe_pkt_info *cpkt; @@ -288,19 +288,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) cpkt = SKB_TO_PKT(cskb); cpkt->qp = qp; - rxe_add_ref(qp); + rxe_get(qp); rxe_rcv_pkt(cpkt, cskb); } else { pkt->qp = qp; - rxe_add_ref(qp); + rxe_get(qp); rxe_rcv_pkt(pkt, skb); skb = NULL; /* mark consumed */ } } - spin_unlock_bh(&mcg->mcg_lock); + spin_unlock_bh(&rxe->mcg_lock); - rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); if (likely(!skb)) return; @@ -397,7 +397,7 @@ void rxe_rcv(struct sk_buff *skb) drop: if (pkt->qp) - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); ib_device_put(&rxe->ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 5eb89052dd66..ae5fbc79dd5c 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -358,14 +358,14 @@ static inline int get_mtu(struct rxe_qp *qp) } static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_av *av, struct rxe_send_wqe *wqe, - int opcode, int payload, + int opcode, u32 payload, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; - struct rxe_av *av; int pad = (-payload) & 0x3; int paylen; int solicited; @@ -374,21 +374,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - - /* pkt->hdr, port_num and mask are initialized in ifc layer */ - pkt->rxe = rxe; - pkt->opcode = opcode; - pkt->qp = qp; - pkt->psn = qp->req.psn; - pkt->mask = rxe_opcode[opcode].mask; - pkt->paylen = paylen; - pkt->wqe = wqe; + pkt->paylen = paylen; /* init skb */ - av = rxe_get_av(pkt); - if (!av) - return NULL; - skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -447,13 +435,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, return skb; } -static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, struct sk_buff *skb, - int paylen) +static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, + struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 paylen) { int err; - err = rxe_prepare(pkt, skb); + err = rxe_prepare(av, pkt, skb); if (err) return err; @@ -497,7 +485,7 @@ static void update_wqe_state(struct rxe_qp *qp, static void update_wqe_psn(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, - int payload) + u32 payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -540,7 +528,7 @@ static void rollback_state(struct rxe_send_wqe *wqe, } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) + struct rxe_pkt_info *pkt) { qp->req.opcode = pkt->opcode; @@ -608,19 +596,22 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) int rxe_requester(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; enum rxe_hdr_mask mask; - int payload; + u32 payload; int mtu; int opcode; int ret; struct rxe_send_wqe rollback_wqe; u32 rollback_psn; struct rxe_queue *q = qp->sq.queue; + struct rxe_ah *ah; + struct rxe_av *av; - rxe_add_ref(qp); + rxe_get(qp); next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -699,20 +690,34 @@ next_wqe: wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; __rxe_do_task(&qp->comp.task); - rxe_drop_ref(qp); + rxe_put(qp); return 0; } payload = mtu; } - skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + pkt.rxe = rxe; + pkt.opcode = opcode; + pkt.qp = qp; + pkt.psn = qp->req.psn; + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err_drop_ah; + } + + skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + goto err_drop_ah; } - ret = finish_packet(qp, wqe, &pkt, skb, payload); + ret = finish_packet(qp, av, wqe, &pkt, skb, payload); if (unlikely(ret)) { pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); if (ret == -EFAULT) @@ -720,9 +725,12 @@ next_wqe: else wqe->status = IB_WC_LOC_QP_OP_ERR; kfree_skb(skb); - goto err; + goto err_drop_ah; } + if (ah) + rxe_put(ah); + /* * To prevent a race on wqe access between requester and completer, * wqe members state and psn need to be set before calling @@ -747,15 +755,18 @@ next_wqe: goto err; } - update_state(qp, wqe, &pkt, payload); + update_state(qp, wqe, &pkt); goto next_wqe; +err_drop_ah: + if (ah) + rxe_put(ah); err: wqe->state = wqe_state_error; __rxe_do_task(&qp->comp.task); exit: - rxe_drop_ref(qp); + rxe_put(qp); return -EAGAIN; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index e8f435fa6e4d..16fc7ea1298d 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -99,7 +99,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp, if (qp->resp.state == QP_STATE_ERROR) { while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -297,21 +297,22 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) struct ib_event ev; unsigned int count; size_t size; + unsigned long flags; if (srq->error) return RESPST_ERR_RNR; - spin_lock_bh(&srq->rq.consumer_lock); + spin_lock_irqsave(&srq->rq.consumer_lock, flags); wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); if (!wqe) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_ERR_RNR; } /* don't trust user space data */ if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); return RESPST_ERR_MALFORMED_WQE; } @@ -327,11 +328,11 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) goto event; } - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_CHK_LENGTH; event: - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; @@ -463,8 +464,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (mw->access & IB_ZERO_BASED) qp->resp.offset = mw->addr; - rxe_drop_ref(mw); - rxe_add_ref(mr); + rxe_put(mw); + rxe_get(mr); } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { @@ -507,9 +508,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp, err: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); if (mw) - rxe_drop_ref(mw); + rxe_put(mw); return state; } @@ -632,7 +633,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, if (ack->mask & RXE_ATMACK_MASK) atmack_set_orig(ack, qp->resp.atomic_orig); - err = rxe_prepare(ack, skb); + err = rxe_prepare(&qp->pri_av, ack, skb); if (err) { kfree_skb(skb); return NULL; @@ -641,6 +642,78 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, return skb; } +static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct resp_res *res; + u32 pkts; + + res = &qp->resp.resources[qp->resp.res_head]; + rxe_advance_resp_resource(qp); + free_rd_atomic_resource(qp, res); + + res->type = RXE_READ_MASK; + res->replay = 0; + res->read.va = qp->resp.va + qp->resp.offset; + res->read.va_org = qp->resp.va + qp->resp.offset; + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); + res->first_psn = pkt->psn; + res->cur_psn = pkt->psn; + res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; + + res->state = rdatm_res_state_new; + + return res; +} + +/** + * rxe_recheck_mr - revalidate MR from rkey and get a reference + * @qp: the qp + * @rkey: the rkey + * + * This code allows the MR to be invalidated or deregistered or + * the MW if one was used to be invalidated or deallocated. + * It is assumed that the access permissions if originally good + * are OK and the mappings to be unchanged. + * + * Return: mr on success else NULL + */ +static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mr *mr; + struct rxe_mw *mw; + + if (rkey_is_mw(rkey)) { + mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); + if (!mw || mw->rkey != rkey) + return NULL; + + if (mw->state != RXE_MW_STATE_VALID) { + rxe_put(mw); + return NULL; + } + + mr = mw->mr; + rxe_put(mw); + } else { + mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + if (!mr || mr->rkey != rkey) + return NULL; + } + + if (mr->state != RXE_MR_STATE_VALID) { + rxe_put(mr); + return NULL; + } + + return mr; +} + /* RDMA read response. If res is not NULL, then we have a current RDMA request * being processed or replayed. */ @@ -655,53 +728,26 @@ static enum resp_states read_reply(struct rxe_qp *qp, int opcode; int err; struct resp_res *res = qp->resp.res; + struct rxe_mr *mr; if (!res) { - /* This is the first time we process that request. Get a - * resource - */ - res = &qp->resp.resources[qp->resp.res_head]; - - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); - - res->type = RXE_READ_MASK; - res->replay = 0; - - res->read.va = qp->resp.va + - qp->resp.offset; - res->read.va_org = qp->resp.va + - qp->resp.offset; - - res->first_psn = req_pkt->psn; - - if (reth_len(req_pkt)) { - res->last_psn = (req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1) & BTH_PSN_MASK; - } else { - res->last_psn = res->first_psn; - } - res->cur_psn = req_pkt->psn; - - res->read.resid = qp->resp.resid; - res->read.length = qp->resp.resid; - res->read.rkey = qp->resp.rkey; - - /* note res inherits the reference to mr from qp */ - res->read.mr = qp->resp.mr; - qp->resp.mr = NULL; - - qp->resp.res = res; - res->state = rdatm_res_state_new; + res = rxe_prepare_read_res(qp, req_pkt); + qp->resp.res = res; } if (res->state == rdatm_res_state_new) { + mr = qp->resp.mr; + qp->resp.mr = NULL; + if (res->read.resid <= mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; else opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; } else { + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; + if (res->read.resid > mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; else @@ -717,10 +763,12 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (!skb) return RESPST_ERR_RNR; - err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), + err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), payload, RXE_FROM_MR_OBJ); if (err) pr_err("Failed copying memory\n"); + if (mr) + rxe_put(mr); if (bth_pad(&ack_pkt)) { u8 *pad = payload_addr(&ack_pkt) + payload; @@ -814,6 +862,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ERR_INVALIDATE_RKEY; } + if (pkt->mask & RXE_END_MASK) + /* We successfully processed this new request. */ + qp->resp.msn++; + /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; @@ -821,11 +873,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) { - /* We successfully processed this new request. */ - qp->resp.msn++; + if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; - } else if (qp_type(qp) == IB_QPT_RC) + else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; @@ -987,7 +1037,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, rc = rxe_xmit_packet(qp, &ack_pkt, skb); if (rc) { pr_err_ratelimited("Failed sending ack\n"); - rxe_drop_ref(qp); + rxe_put(qp); } out: return rc; @@ -1016,13 +1066,13 @@ static enum resp_states cleanup(struct rxe_qp *qp, if (pkt) { skb = skb_dequeue(&qp->req_pkts); - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1166,7 +1216,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1180,7 +1230,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->rq.queue; while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -1200,7 +1250,7 @@ int rxe_responder(void *arg) struct rxe_pkt_info *pkt = NULL; int ret = 0; - rxe_add_ref(qp); + rxe_get(qp); qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; @@ -1387,6 +1437,6 @@ int rxe_responder(void *arg) exit: ret = -EAGAIN; done: - rxe_drop_ref(qp); + rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 915ad6664321..67184b0281a0 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) { struct rxe_ucontext *uc = to_ruc(ibuc); - rxe_drop_ref(uc); + rxe_put(uc); } static int rxe_port_immutable(struct ib_device *dev, u32 port_num, @@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); - rxe_drop_ref(pd); + rxe_put(pd); return 0; } @@ -181,7 +181,6 @@ static int rxe_create_ah(struct ib_ah *ibah, return err; /* create index > 0 */ - rxe_add_index(ah); ah->ah_num = ah->elem.index; if (uresp) { @@ -189,8 +188,7 @@ static int rxe_create_ah(struct ib_ah *ibah, err = copy_to_user(&uresp->ah_num, &ah->ah_num, sizeof(uresp->ah_num)); if (err) { - rxe_drop_index(ah); - rxe_drop_ref(ah); + rxe_put(ah); return -EFAULT; } } else if (ah->is_user) { @@ -230,8 +228,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); - rxe_drop_index(ah); - rxe_drop_ref(ah); + rxe_put(ah); return 0; } @@ -306,7 +303,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, if (err) goto err1; - rxe_add_ref(pd); + rxe_get(pd); srq->pd = pd; err = rxe_srq_from_init(rxe, srq, init, udata, uresp); @@ -316,8 +313,8 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, return 0; err2: - rxe_drop_ref(pd); - rxe_drop_ref(srq); + rxe_put(pd); + rxe_put(srq); err1: return err; } @@ -374,8 +371,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) if (srq->rq.queue) rxe_queue_cleanup(srq->rq.queue); - rxe_drop_ref(srq->pd); - rxe_drop_ref(srq); + rxe_put(srq->pd); + rxe_put(srq); return 0; } @@ -384,8 +381,9 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, { int err = 0; struct rxe_srq *srq = to_rsrq(ibsrq); + unsigned long flags; - spin_lock_bh(&srq->rq.producer_lock); + spin_lock_irqsave(&srq->rq.producer_lock, flags); while (wr) { err = post_one_recv(&srq->rq, wr); @@ -394,7 +392,7 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, wr = wr->next; } - spin_unlock_bh(&srq->rq.producer_lock); + spin_unlock_irqrestore(&srq->rq.producer_lock, flags); if (err) *bad_wr = wr; @@ -437,7 +435,6 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, if (err) return err; - rxe_add_index(qp); err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata); if (err) goto qp_init; @@ -445,8 +442,7 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, return 0; qp_init: - rxe_drop_index(qp); - rxe_drop_ref(qp); + rxe_put(qp); return err; } @@ -493,10 +489,14 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); + int ret; + + ret = rxe_qp_chk_destroy(qp); + if (ret) + return ret; rxe_qp_destroy(qp); - rxe_drop_index(qp); - rxe_drop_ref(qp); + rxe_put(qp); return 0; } @@ -638,18 +638,19 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, int err; struct rxe_sq *sq = &qp->sq; struct rxe_send_wqe *send_wqe; + unsigned long flags; int full; err = validate_send_wr(qp, ibwr, mask, length); if (err) return err; - spin_lock_bh(&qp->sq.sq_lock); + spin_lock_irqsave(&qp->sq.sq_lock, flags); full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER); if (unlikely(full)) { - spin_unlock_bh(&qp->sq.sq_lock); + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return -ENOMEM; } @@ -658,7 +659,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER); - spin_unlock_bh(&qp->sq.sq_lock); + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return 0; } @@ -738,6 +739,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int err = 0; struct rxe_qp *qp = to_rqp(ibqp); struct rxe_rq *rq = &qp->rq; + unsigned long flags; if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { *bad_wr = wr; @@ -751,7 +753,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, goto err1; } - spin_lock_bh(&rq->producer_lock); + spin_lock_irqsave(&rq->producer_lock, flags); while (wr) { err = post_one_recv(rq, wr); @@ -762,7 +764,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, wr = wr->next; } - spin_unlock_bh(&rq->producer_lock); + spin_unlock_irqrestore(&rq->producer_lock, flags); if (qp->resp.state == QP_STATE_ERROR) rxe_run_task(&qp->resp.task, 1); @@ -807,7 +809,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) rxe_cq_disable(cq); - rxe_drop_ref(cq); + rxe_put(cq); return 0; } @@ -843,8 +845,9 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) int i; struct rxe_cq *cq = to_rcq(ibcq); struct rxe_cqe *cqe; + unsigned long flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, flags); for (i = 0; i < num_entries; i++) { cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER); if (!cqe) @@ -853,7 +856,7 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) memcpy(wc++, &cqe->ibwc, sizeof(*wc)); queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER); } - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); return i; } @@ -873,8 +876,9 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) struct rxe_cq *cq = to_rcq(ibcq); int ret = 0; int empty; + unsigned long irq_flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; @@ -883,7 +887,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) ret = 1; - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, irq_flags); return ret; } @@ -898,8 +902,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) if (!mr) return ERR_PTR(-ENOMEM); - rxe_add_index(mr); - rxe_add_ref(pd); + rxe_get(pd); rxe_mr_init_dma(pd, access, mr); return &mr->ibmr; @@ -922,9 +925,8 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, goto err2; } - rxe_add_index(mr); - rxe_add_ref(pd); + rxe_get(pd); err = rxe_mr_init_user(pd, start, length, iova, access, mr); if (err) @@ -933,9 +935,8 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, return &mr->ibmr; err3: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_put(pd); + rxe_put(mr); err2: return ERR_PTR(err); } @@ -957,9 +958,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, goto err1; } - rxe_add_index(mr); - - rxe_add_ref(pd); + rxe_get(pd); err = rxe_mr_init_fast(pd, max_num_sg, mr); if (err) @@ -968,9 +967,8 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, return &mr->ibmr; err2: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_put(pd); + rxe_put(mr); err1: return ERR_PTR(err); } @@ -999,32 +997,6 @@ static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, return n; } -static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) -{ - int err; - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); - struct rxe_mc_grp *grp; - - /* takes a ref on grp if successful */ - err = rxe_mcast_get_grp(rxe, mgid, &grp); - if (err) - return err; - - err = rxe_mcast_add_grp_elem(rxe, qp, grp); - - rxe_drop_ref(grp); - return err; -} - -static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) -{ - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); - - return rxe_mcast_drop_grp_elem(rxe, qp, mgid); -} - static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index e48969e8d4c8..e7eff1ca75e9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -157,7 +157,6 @@ struct resp_res { struct sk_buff *skb; } atomic; struct { - struct rxe_mr *mr; u64 va_org; u32 rkey; u32 length; @@ -232,9 +231,7 @@ struct rxe_qp { struct rxe_av pri_av; struct rxe_av alt_av; - /* list of mcast groups qp has joined (for cleanup) */ - struct list_head grp_list; - spinlock_t grp_lock; /* guard grp_list */ + atomic_t mcg_num; struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; @@ -353,23 +350,20 @@ struct rxe_mw { u64 length; }; -struct rxe_mc_grp { - struct rxe_pool_elem elem; - spinlock_t mcg_lock; /* guard group */ +struct rxe_mcg { + struct rb_node node; + struct kref ref_cnt; struct rxe_dev *rxe; struct list_head qp_list; union ib_gid mgid; - int num_qp; + atomic_t qp_num; u32 qkey; u16 pkey; }; -struct rxe_mc_elem { - struct rxe_pool_elem elem; +struct rxe_mca { struct list_head qp_list; - struct list_head grp_list; struct rxe_qp *qp; - struct rxe_mc_grp *grp; }; struct rxe_port { @@ -401,7 +395,12 @@ struct rxe_dev { struct rxe_pool mr_pool; struct rxe_pool mw_pool; struct rxe_pool mc_grp_pool; - struct rxe_pool mc_elem_pool; + + /* multicast support */ + spinlock_t mcg_lock; + struct rb_root mcg_tree; + atomic_t mcg_num; + atomic_t mcg_attach; spinlock_t pending_lock; /* guard pending_mmaps */ struct list_head pending_mmaps; @@ -482,6 +481,4 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); -void rxe_mc_cleanup(struct rxe_pool_elem *elem); - #endif /* RXE_VERBS_H */ |