summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c110
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c542
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c38
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c41
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c433
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h105
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c57
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c26
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c71
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c170
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c108
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h27
20 files changed, 866 insertions, 968 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index fab291245366..2dae7538a2ea 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -28,8 +28,8 @@ void rxe_dealloc(struct ib_device *ib_dev)
rxe_pool_cleanup(&rxe->cq_pool);
rxe_pool_cleanup(&rxe->mr_pool);
rxe_pool_cleanup(&rxe->mw_pool);
- rxe_pool_cleanup(&rxe->mc_grp_pool);
- rxe_pool_cleanup(&rxe->mc_elem_pool);
+
+ WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree));
if (rxe->tfm)
crypto_free_shash(rxe->tfm);
@@ -114,106 +114,37 @@ static void rxe_init_ports(struct rxe_dev *rxe)
}
/* init pools of managed objects */
-static int rxe_init_pools(struct rxe_dev *rxe)
+static void rxe_init_pools(struct rxe_dev *rxe)
{
- int err;
-
- err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC,
- rxe->max_ucontext);
- if (err)
- goto err1;
-
- err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD,
- rxe->attr.max_pd);
- if (err)
- goto err2;
-
- err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH,
- rxe->attr.max_ah);
- if (err)
- goto err3;
-
- err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ,
- rxe->attr.max_srq);
- if (err)
- goto err4;
-
- err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP,
- rxe->attr.max_qp);
- if (err)
- goto err5;
-
- err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ,
- rxe->attr.max_cq);
- if (err)
- goto err6;
-
- err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR,
- rxe->attr.max_mr);
- if (err)
- goto err7;
-
- err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW,
- rxe->attr.max_mw);
- if (err)
- goto err8;
-
- err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP,
- rxe->attr.max_mcast_grp);
- if (err)
- goto err9;
-
- err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
- rxe->attr.max_total_mcast_qp_attach);
- if (err)
- goto err10;
-
- return 0;
-
-err10:
- rxe_pool_cleanup(&rxe->mc_grp_pool);
-err9:
- rxe_pool_cleanup(&rxe->mw_pool);
-err8:
- rxe_pool_cleanup(&rxe->mr_pool);
-err7:
- rxe_pool_cleanup(&rxe->cq_pool);
-err6:
- rxe_pool_cleanup(&rxe->qp_pool);
-err5:
- rxe_pool_cleanup(&rxe->srq_pool);
-err4:
- rxe_pool_cleanup(&rxe->ah_pool);
-err3:
- rxe_pool_cleanup(&rxe->pd_pool);
-err2:
- rxe_pool_cleanup(&rxe->uc_pool);
-err1:
- return err;
+ rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC);
+ rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD);
+ rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH);
+ rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ);
+ rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP);
+ rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ);
+ rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR);
+ rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW);
}
/* initialize rxe device state */
-static int rxe_init(struct rxe_dev *rxe)
+static void rxe_init(struct rxe_dev *rxe)
{
- int err;
-
/* init default device parameters */
rxe_init_device_param(rxe);
rxe_init_ports(rxe);
-
- err = rxe_init_pools(rxe);
- if (err)
- return err;
+ rxe_init_pools(rxe);
/* init pending mmap list */
spin_lock_init(&rxe->mmap_offset_lock);
spin_lock_init(&rxe->pending_lock);
INIT_LIST_HEAD(&rxe->pending_mmaps);
- mutex_init(&rxe->usdev_lock);
+ /* init multicast support */
+ spin_lock_init(&rxe->mcg_lock);
+ rxe->mcg_tree = RB_ROOT;
- return 0;
+ mutex_init(&rxe->usdev_lock);
}
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
@@ -235,12 +166,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
*/
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name)
{
- int err;
-
- err = rxe_init(rxe);
- if (err)
- return err;
-
+ rxe_init(rxe);
rxe_set_mtu(rxe, mtu);
return rxe_register_device(rxe, ibdev_name);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index fb9066e6f5f0..30fbdf3bc76a 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -12,7 +12,6 @@
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
#include <linux/skbuff.h>
#include <rdma/ib_verbs.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 38c7b6fb39d7..3b05314ca739 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -99,11 +99,14 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr)
av->network_type = type;
}
-struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp)
{
struct rxe_ah *ah;
u32 ah_num;
+ if (ahp)
+ *ahp = NULL;
+
if (!pkt || !pkt->qp)
return NULL;
@@ -117,10 +120,22 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
if (ah_num) {
/* only new user provider or kernel client */
ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num);
- if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) {
+ if (!ah) {
pr_warn("Unable to find AH matching ah_num\n");
return NULL;
}
+
+ if (rxe_ah_pd(ah) != pkt->qp->pd) {
+ pr_warn("PDs don't match for AH and QP\n");
+ rxe_put(ah);
+ return NULL;
+ }
+
+ if (ahp)
+ *ahp = ah;
+ else
+ rxe_put(ah);
+
return &ah->av;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index f363fe3fa414..138b3e7d3a5f 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -526,7 +526,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
struct rxe_queue *q = qp->sq.queue;
while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
@@ -548,7 +548,7 @@ static void free_pkt(struct rxe_pkt_info *pkt)
struct ib_device *dev = qp->ibqp.device;
kfree_skb(skb);
- rxe_drop_ref(qp);
+ rxe_put(qp);
ib_device_put(dev);
}
@@ -562,7 +562,7 @@ int rxe_completer(void *arg)
enum comp_state state;
int ret = 0;
- rxe_add_ref(qp);
+ rxe_get(qp);
if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
qp->req.state == QP_STATE_RESET) {
@@ -761,7 +761,7 @@ int rxe_completer(void *arg)
done:
if (pkt)
free_pkt(pkt);
- rxe_drop_ref(qp);
+ rxe_put(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 6baaaa34458e..642b52539ac3 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -42,13 +42,14 @@ err1:
static void rxe_send_complete(struct tasklet_struct *t)
{
struct rxe_cq *cq = from_tasklet(cq, t, comp_task);
+ unsigned long flags;
- spin_lock_bh(&cq->cq_lock);
+ spin_lock_irqsave(&cq->cq_lock, flags);
if (cq->is_dying) {
- spin_unlock_bh(&cq->cq_lock);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
return;
}
- spin_unlock_bh(&cq->cq_lock);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
@@ -107,12 +108,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
struct ib_event ev;
int full;
void *addr;
+ unsigned long flags;
- spin_lock_bh(&cq->cq_lock);
+ spin_lock_irqsave(&cq->cq_lock, flags);
full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (unlikely(full)) {
- spin_unlock_bh(&cq->cq_lock);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
if (cq->ibcq.event_handler) {
ev.device = cq->ibcq.device;
ev.element.cq = &cq->ibcq;
@@ -128,7 +130,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
- spin_unlock_bh(&cq->cq_lock);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
if ((cq->notify == IB_CQ_NEXT_COMP) ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -141,9 +143,11 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
void rxe_cq_disable(struct rxe_cq *cq)
{
- spin_lock_bh(&cq->cq_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
cq->is_dying = true;
- spin_unlock_bh(&cq->cq_lock);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
}
void rxe_cq_cleanup(struct rxe_pool_elem *elem)
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index b1e174afb1d4..2ffbe3390668 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -19,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
-struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp);
/* rxe_cq.c */
int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
@@ -40,18 +40,10 @@ void rxe_cq_disable(struct rxe_cq *cq);
void rxe_cq_cleanup(struct rxe_pool_elem *arg);
/* rxe_mcast.c */
-int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
- struct rxe_mc_grp **grp_p);
-
-int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct rxe_mc_grp *grp);
-
-int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- union ib_gid *mgid);
-
-void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
-
-void rxe_mc_cleanup(struct rxe_pool_elem *arg);
+struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
+int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
+int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
+void rxe_cleanup_mcg(struct kref *kref);
/* rxe_mmap.c */
struct rxe_mmap_info {
@@ -102,35 +94,27 @@ void rxe_mw_cleanup(struct rxe_pool_elem *arg);
/* rxe_net.c */
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
-int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb);
+int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb);
int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
struct sk_buff *skb);
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
-int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
-int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
/* rxe_qp.c */
int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
-
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init,
struct rxe_create_qp_resp __user *uresp,
struct ib_pd *ibpd, struct ib_udata *udata);
-
int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
-
int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_attr *attr, int mask);
-
int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask, struct ib_udata *udata);
-
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
-
void rxe_qp_error(struct rxe_qp *qp);
-
+int rxe_qp_chk_destroy(struct rxe_qp *qp);
void rxe_qp_destroy(struct rxe_qp *qp);
-
void rxe_qp_cleanup(struct rxe_pool_elem *elem);
static inline int qp_num(struct rxe_qp *qp)
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index bd1ac88b8700..ae8f11cb704a 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -1,178 +1,488 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
+ * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved.
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
+/*
+ * rxe_mcast.c implements driver support for multicast transport.
+ * It is based on two data structures struct rxe_mcg ('mcg') and
+ * struct rxe_mca ('mca'). An mcg is allocated each time a qp is
+ * attached to a new mgid for the first time. These are indexed by
+ * a red-black tree using the mgid. This data structure is searched
+ * for the mcg when a multicast packet is received and when another
+ * qp is attached to the same mgid. It is cleaned up when the last qp
+ * is detached from the mcg. Each time a qp is attached to an mcg an
+ * mca is created. It holds a pointer to the qp and is added to a list
+ * of qp's that are attached to the mcg. The qp_list is used to replicate
+ * mcast packets in the rxe receive path.
+ */
+
#include "rxe.h"
-#include "rxe_loc.h"
-/* caller should hold mc_grp_pool->pool_lock */
-static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe,
- struct rxe_pool *pool,
- union ib_gid *mgid)
+/**
+ * rxe_mcast_add - add multicast address to rxe device
+ * @rxe: rxe device object
+ * @mgid: multicast address as a gid
+ *
+ * Returns 0 on success else an error
+ */
+static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
{
- int err;
- struct rxe_mc_grp *grp;
+ unsigned char ll_addr[ETH_ALEN];
- grp = rxe_alloc_locked(&rxe->mc_grp_pool);
- if (!grp)
- return ERR_PTR(-ENOMEM);
+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
- INIT_LIST_HEAD(&grp->qp_list);
- spin_lock_init(&grp->mcg_lock);
- grp->rxe = rxe;
- rxe_add_key_locked(grp, mgid);
+ return dev_mc_add(rxe->ndev, ll_addr);
+}
- err = rxe_mcast_add(rxe, mgid);
- if (unlikely(err)) {
- rxe_drop_key_locked(grp);
- rxe_drop_ref(grp);
- return ERR_PTR(err);
+/**
+ * rxe_mcast_delete - delete multicast address from rxe device
+ * @rxe: rxe device object
+ * @mgid: multicast address as a gid
+ *
+ * Returns 0 on success else an error
+ */
+static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+ unsigned char ll_addr[ETH_ALEN];
+
+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+
+ return dev_mc_del(rxe->ndev, ll_addr);
+}
+
+/**
+ * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree)
+ * @mcg: mcg object with an embedded red-black tree node
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock and
+ * is responsible to avoid adding the same mcg twice to the tree.
+ */
+static void __rxe_insert_mcg(struct rxe_mcg *mcg)
+{
+ struct rb_root *tree = &mcg->rxe->mcg_tree;
+ struct rb_node **link = &tree->rb_node;
+ struct rb_node *node = NULL;
+ struct rxe_mcg *tmp;
+ int cmp;
+
+ while (*link) {
+ node = *link;
+ tmp = rb_entry(node, struct rxe_mcg, node);
+
+ cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid));
+ if (cmp > 0)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
}
- return grp;
+ rb_link_node(&mcg->node, node, link);
+ rb_insert_color(&mcg->node, tree);
}
-int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
- struct rxe_mc_grp **grp_p)
+/**
+ * __rxe_remove_mcg - remove an mcg from red-black tree holding lock
+ * @mcg: mcast group object with an embedded red-black tree node
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock
+ */
+static void __rxe_remove_mcg(struct rxe_mcg *mcg)
{
- int err;
- struct rxe_mc_grp *grp;
- struct rxe_pool *pool = &rxe->mc_grp_pool;
+ rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
+}
- if (rxe->attr.max_mcast_qp_attach == 0)
- return -EINVAL;
+/**
+ * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock
+ * @rxe: rxe device object
+ * @mgid: multicast IP address
+ *
+ * Context: caller must hold rxe->mcg_lock
+ * Returns: mcg on success and takes a ref to mcg else NULL
+ */
+static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
+ union ib_gid *mgid)
+{
+ struct rb_root *tree = &rxe->mcg_tree;
+ struct rxe_mcg *mcg;
+ struct rb_node *node;
+ int cmp;
- write_lock_bh(&pool->pool_lock);
+ node = tree->rb_node;
- grp = rxe_pool_get_key_locked(pool, mgid);
- if (grp)
- goto done;
+ while (node) {
+ mcg = rb_entry(node, struct rxe_mcg, node);
- grp = create_grp(rxe, pool, mgid);
- if (IS_ERR(grp)) {
- write_unlock_bh(&pool->pool_lock);
- err = PTR_ERR(grp);
- return err;
+ cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid));
+
+ if (cmp > 0)
+ node = node->rb_left;
+ else if (cmp < 0)
+ node = node->rb_right;
+ else
+ break;
}
-done:
- write_unlock_bh(&pool->pool_lock);
- *grp_p = grp;
+ if (node) {
+ kref_get(&mcg->ref_cnt);
+ return mcg;
+ }
+
+ return NULL;
+}
+
+/**
+ * rxe_lookup_mcg - lookup up mcg in red-back tree
+ * @rxe: rxe device object
+ * @mgid: multicast IP address
+ *
+ * Returns: mcg if found else NULL
+ */
+struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+ struct rxe_mcg *mcg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rxe->mcg_lock, flags);
+ mcg = __rxe_lookup_mcg(rxe, mgid);
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+
+ return mcg;
+}
+
+/**
+ * __rxe_init_mcg - initialize a new mcg
+ * @rxe: rxe device
+ * @mgid: multicast address as a gid
+ * @mcg: new mcg object
+ *
+ * Context: caller should hold rxe->mcg lock
+ * Returns: 0 on success else an error
+ */
+static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mcg *mcg)
+{
+ int err;
+
+ err = rxe_mcast_add(rxe, mgid);
+ if (unlikely(err))
+ return err;
+
+ kref_init(&mcg->ref_cnt);
+ memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
+ INIT_LIST_HEAD(&mcg->qp_list);
+ mcg->rxe = rxe;
+
+ /* caller holds a ref on mcg but that will be
+ * dropped when mcg goes out of scope. We need to take a ref
+ * on the pointer that will be saved in the red-black tree
+ * by __rxe_insert_mcg and used to lookup mcg from mgid later.
+ * Inserting mcg makes it visible to outside so this should
+ * be done last after the object is ready.
+ */
+ kref_get(&mcg->ref_cnt);
+ __rxe_insert_mcg(mcg);
+
return 0;
}
-int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct rxe_mc_grp *grp)
+/**
+ * rxe_get_mcg - lookup or allocate a mcg
+ * @rxe: rxe device object
+ * @mgid: multicast IP address as a gid
+ *
+ * Returns: mcg on success else ERR_PTR(error)
+ */
+static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
+ struct rxe_mcg *mcg, *tmp;
+ unsigned long flags;
int err;
- struct rxe_mc_elem *elem;
- /* check to see of the qp is already a member of the group */
- spin_lock_bh(&qp->grp_lock);
- spin_lock_bh(&grp->mcg_lock);
- list_for_each_entry(elem, &grp->qp_list, qp_list) {
- if (elem->qp == qp) {
- err = 0;
- goto out;
- }
- }
+ if (rxe->attr.max_mcast_grp == 0)
+ return ERR_PTR(-EINVAL);
- if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {
- err = -ENOMEM;
+ /* check to see if mcg already exists */
+ mcg = rxe_lookup_mcg(rxe, mgid);
+ if (mcg)
+ return mcg;
+
+ /* speculative alloc of new mcg */
+ mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
+ if (!mcg)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_irqsave(&rxe->mcg_lock, flags);
+ /* re-check to see if someone else just added it */
+ tmp = __rxe_lookup_mcg(rxe, mgid);
+ if (tmp) {
+ kfree(mcg);
+ mcg = tmp;
goto out;
}
- elem = rxe_alloc_locked(&rxe->mc_elem_pool);
- if (!elem) {
+ if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
err = -ENOMEM;
- goto out;
+ goto err_dec;
+ }
+
+ err = __rxe_init_mcg(rxe, mgid, mcg);
+ if (err)
+ goto err_dec;
+out:
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ return mcg;
+
+err_dec:
+ atomic_dec(&rxe->mcg_num);
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ kfree(mcg);
+ return ERR_PTR(err);
+}
+
+/**
+ * rxe_cleanup_mcg - cleanup mcg for kref_put
+ * @kref: struct kref embnedded in mcg
+ */
+void rxe_cleanup_mcg(struct kref *kref)
+{
+ struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
+
+ kfree(mcg);
+}
+
+/**
+ * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock
+ * @mcg: the mcg object
+ *
+ * Context: caller is holding rxe->mcg_lock
+ * no qp's are attached to mcg
+ */
+static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
+{
+ struct rxe_dev *rxe = mcg->rxe;
+
+ /* remove mcg from red-black tree then drop ref */
+ __rxe_remove_mcg(mcg);
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+
+ rxe_mcast_delete(mcg->rxe, &mcg->mgid);
+ atomic_dec(&rxe->mcg_num);
+}
+
+/**
+ * rxe_destroy_mcg - destroy mcg object
+ * @mcg: the mcg object
+ *
+ * Context: no qp's are attached to mcg
+ */
+static void rxe_destroy_mcg(struct rxe_mcg *mcg)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
+ __rxe_destroy_mcg(mcg);
+ spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
+}
+
+/**
+ * __rxe_init_mca - initialize a new mca holding lock
+ * @qp: qp object
+ * @mcg: mcg object
+ * @mca: empty space for new mca
+ *
+ * Context: caller must hold references on qp and mcg, rxe->mcg_lock
+ * and pass memory for new mca
+ *
+ * Returns: 0 on success else an error
+ */
+static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg,
+ struct rxe_mca *mca)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int n;
+
+ n = atomic_inc_return(&rxe->mcg_attach);
+ if (n > rxe->attr.max_total_mcast_qp_attach) {
+ atomic_dec(&rxe->mcg_attach);
+ return -ENOMEM;
}
- /* each qp holds a ref on the grp */
- rxe_add_ref(grp);
+ n = atomic_inc_return(&mcg->qp_num);
+ if (n > rxe->attr.max_mcast_qp_attach) {
+ atomic_dec(&mcg->qp_num);
+ atomic_dec(&rxe->mcg_attach);
+ return -ENOMEM;
+ }
+
+ atomic_inc(&qp->mcg_num);
- grp->num_qp++;
- elem->qp = qp;
- elem->grp = grp;
+ rxe_get(qp);
+ mca->qp = qp;
- list_add(&elem->qp_list, &grp->qp_list);
- list_add(&elem->grp_list, &qp->grp_list);
+ list_add_tail(&mca->qp_list, &mcg->qp_list);
- err = 0;
+ return 0;
+}
+
+/**
+ * rxe_attach_mcg - attach qp to mcg if not already attached
+ * @qp: qp object
+ * @mcg: mcg object
+ *
+ * Context: caller must hold reference on qp and mcg.
+ * Returns: 0 on success else an error
+ */
+static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
+{
+ struct rxe_dev *rxe = mcg->rxe;
+ struct rxe_mca *mca, *tmp;
+ unsigned long flags;
+ int err;
+
+ /* check to see if the qp is already a member of the group */
+ spin_lock_irqsave(&rxe->mcg_lock, flags);
+ list_for_each_entry(mca, &mcg->qp_list, qp_list) {
+ if (mca->qp == qp) {
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ return 0;
+ }
+ }
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+
+ /* speculative alloc new mca without using GFP_ATOMIC */
+ mca = kzalloc(sizeof(*mca), GFP_KERNEL);
+ if (!mca)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&rxe->mcg_lock, flags);
+ /* re-check to see if someone else just attached qp */
+ list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
+ if (tmp->qp == qp) {
+ kfree(mca);
+ err = 0;
+ goto out;
+ }
+ }
+
+ err = __rxe_init_mca(qp, mcg, mca);
+ if (err)
+ kfree(mca);
out:
- spin_unlock_bh(&grp->mcg_lock);
- spin_unlock_bh(&qp->grp_lock);
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
return err;
}
-int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
- union ib_gid *mgid)
+/**
+ * __rxe_cleanup_mca - cleanup mca object holding lock
+ * @mca: mca object
+ * @mcg: mcg object
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock
+ */
+static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
{
- struct rxe_mc_grp *grp;
- struct rxe_mc_elem *elem, *tmp;
+ list_del(&mca->qp_list);
- grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
- if (!grp)
- goto err1;
+ atomic_dec(&mcg->qp_num);
+ atomic_dec(&mcg->rxe->mcg_attach);
+ atomic_dec(&mca->qp->mcg_num);
+ rxe_put(mca->qp);
- spin_lock_bh(&qp->grp_lock);
- spin_lock_bh(&grp->mcg_lock);
+ kfree(mca);
+}
- list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {
- if (elem->qp == qp) {
- list_del(&elem->qp_list);
- list_del(&elem->grp_list);
- grp->num_qp--;
+/**
+ * rxe_detach_mcg - detach qp from mcg
+ * @mcg: mcg object
+ * @qp: qp object
+ *
+ * Returns: 0 on success else an error if qp is not attached.
+ */
+static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
+{
+ struct rxe_dev *rxe = mcg->rxe;
+ struct rxe_mca *mca, *tmp;
+ unsigned long flags;
- spin_unlock_bh(&grp->mcg_lock);
- spin_unlock_bh(&qp->grp_lock);
- rxe_drop_ref(elem);
- rxe_drop_ref(grp); /* ref held by QP */
- rxe_drop_ref(grp); /* ref from get_key */
+ spin_lock_irqsave(&rxe->mcg_lock, flags);
+ list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
+ if (mca->qp == qp) {
+ __rxe_cleanup_mca(mca, mcg);
+
+ /* if the number of qp's attached to the
+ * mcast group falls to zero go ahead and
+ * tear it down. This will not free the
+ * object since we are still holding a ref
+ * from the caller
+ */
+ if (atomic_read(&mcg->qp_num) <= 0)
+ __rxe_destroy_mcg(mcg);
+
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
return 0;
}
}
- spin_unlock_bh(&grp->mcg_lock);
- spin_unlock_bh(&qp->grp_lock);
- rxe_drop_ref(grp); /* ref from get_key */
-err1:
+ /* we didn't find the qp on the list */
+ spin_unlock_irqrestore(&rxe->mcg_lock, flags);
return -EINVAL;
}
-void rxe_drop_all_mcast_groups(struct rxe_qp *qp)
+/**
+ * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1)
+ * @ibqp: (IB) qp object
+ * @mgid: multicast IP address
+ * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6)
+ *
+ * Returns: 0 on success else an errno
+ */
+int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
{
- struct rxe_mc_grp *grp;
- struct rxe_mc_elem *elem;
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibqp->device);
+ struct rxe_qp *qp = to_rqp(ibqp);
+ struct rxe_mcg *mcg;
- while (1) {
- spin_lock_bh(&qp->grp_lock);
- if (list_empty(&qp->grp_list)) {
- spin_unlock_bh(&qp->grp_lock);
- break;
- }
- elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem,
- grp_list);
- list_del(&elem->grp_list);
- spin_unlock_bh(&qp->grp_lock);
-
- grp = elem->grp;
- spin_lock_bh(&grp->mcg_lock);
- list_del(&elem->qp_list);
- grp->num_qp--;
- spin_unlock_bh(&grp->mcg_lock);
- rxe_drop_ref(grp);
- rxe_drop_ref(elem);
- }
+ /* takes a ref on mcg if successful */
+ mcg = rxe_get_mcg(rxe, mgid);
+ if (IS_ERR(mcg))
+ return PTR_ERR(mcg);
+
+ err = rxe_attach_mcg(mcg, qp);
+
+ /* if we failed to attach the first qp to mcg tear it down */
+ if (atomic_read(&mcg->qp_num) == 0)
+ rxe_destroy_mcg(mcg);
+
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+
+ return err;
}
-void rxe_mc_cleanup(struct rxe_pool_elem *elem)
+/**
+ * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2)
+ * @ibqp: address of (IB) qp object
+ * @mgid: multicast IP address
+ * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6)
+ *
+ * Returns: 0 on success else an errno
+ */
+int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
{
- struct rxe_mc_grp *grp = container_of(elem, typeof(*grp), elem);
- struct rxe_dev *rxe = grp->rxe;
+ struct rxe_dev *rxe = to_rdev(ibqp->device);
+ struct rxe_qp *qp = to_rqp(ibqp);
+ struct rxe_mcg *mcg;
+ int err;
- rxe_drop_key(grp);
- rxe_mcast_delete(rxe, &grp->mgid);
+ mcg = rxe_lookup_mcg(rxe, mgid);
+ if (!mcg)
+ return -EINVAL;
+
+ err = rxe_detach_mcg(mcg, qp);
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+
+ return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 035f226af133..9149b6095429 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -4,7 +4,6 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
-#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/errno.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 453ef3c9d535..60a31b718774 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -459,7 +459,7 @@ int copy_data(
if (offset >= sge->length) {
if (mr) {
- rxe_drop_ref(mr);
+ rxe_put(mr);
mr = NULL;
}
sge++;
@@ -504,13 +504,13 @@ int copy_data(
dma->resid = resid;
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
return 0;
err2:
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
err1:
return err;
}
@@ -569,7 +569,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
(type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
mr_pd(mr) != pd || (access && !(access & mr->access)) ||
mr->state != RXE_MR_STATE_VALID)) {
- rxe_drop_ref(mr);
+ rxe_put(mr);
mr = NULL;
}
@@ -613,7 +613,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
ret = 0;
err_drop_ref:
- rxe_drop_ref(mr);
+ rxe_put(mr);
err:
return ret;
}
@@ -690,9 +690,8 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
mr->state = RXE_MR_STATE_INVALID;
- rxe_drop_ref(mr_pd(mr));
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
+ rxe_put(mr_pd(mr));
+ rxe_put(mr);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 32dd8c0b8b9e..c86b2efd58f2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -12,15 +12,14 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
struct rxe_dev *rxe = to_rdev(ibmw->device);
int ret;
- rxe_add_ref(pd);
+ rxe_get(pd);
ret = rxe_add_to_pool(&rxe->mw_pool, mw);
if (ret) {
- rxe_drop_ref(pd);
+ rxe_put(pd);
return ret;
}
- rxe_add_index(mw);
mw->rkey = ibmw->rkey = (mw->elem.index << 8) | rxe_get_next_key(-1);
mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ?
RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
@@ -36,14 +35,14 @@ static void rxe_do_dealloc_mw(struct rxe_mw *mw)
mw->mr = NULL;
atomic_dec(&mr->num_mw);
- rxe_drop_ref(mr);
+ rxe_put(mr);
}
if (mw->qp) {
struct rxe_qp *qp = mw->qp;
mw->qp = NULL;
- rxe_drop_ref(qp);
+ rxe_put(qp);
}
mw->access = 0;
@@ -61,8 +60,8 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
rxe_do_dealloc_mw(mw);
spin_unlock_bh(&mw->lock);
- rxe_drop_ref(mw);
- rxe_drop_ref(pd);
+ rxe_put(mw);
+ rxe_put(pd);
return 0;
}
@@ -171,7 +170,7 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
mw->length = wqe->wr.wr.mw.length;
if (mw->mr) {
- rxe_drop_ref(mw->mr);
+ rxe_put(mw->mr);
atomic_dec(&mw->mr->num_mw);
mw->mr = NULL;
}
@@ -179,11 +178,11 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (mw->length) {
mw->mr = mr;
atomic_inc(&mr->num_mw);
- rxe_add_ref(mr);
+ rxe_get(mr);
}
if (mw->ibmw.type == IB_MW_TYPE_2) {
- rxe_add_ref(qp);
+ rxe_get(qp);
mw->qp = qp;
}
}
@@ -234,9 +233,9 @@ err_unlock:
spin_unlock_bh(&mw->lock);
err_drop_mr:
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
err_drop_mw:
- rxe_drop_ref(mw);
+ rxe_put(mw);
err:
return ret;
}
@@ -261,13 +260,13 @@ static void rxe_do_invalidate_mw(struct rxe_mw *mw)
/* valid type 2 MW will always have a QP pointer */
qp = mw->qp;
mw->qp = NULL;
- rxe_drop_ref(qp);
+ rxe_put(qp);
/* valid type 2 MW will always have an MR pointer */
mr = mw->mr;
mw->mr = NULL;
atomic_dec(&mr->num_mw);
- rxe_drop_ref(mr);
+ rxe_put(mr);
mw->access = 0;
mw->addr = 0;
@@ -302,7 +301,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
err_unlock:
spin_unlock_bh(&mw->lock);
err_drop_ref:
- rxe_drop_ref(mw);
+ rxe_put(mw);
err:
return ret;
}
@@ -323,16 +322,9 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
(mw->length == 0) ||
(access && !(access & mw->access)) ||
mw->state != RXE_MW_STATE_VALID)) {
- rxe_drop_ref(mw);
+ rxe_put(mw);
return NULL;
}
return mw;
}
-
-void rxe_mw_cleanup(struct rxe_pool_elem *elem)
-{
- struct rxe_mw *mw = container_of(elem, typeof(*mw), elem);
-
- rxe_drop_index(mw);
-}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index be72bdbfb4ba..c53f4529f098 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -20,24 +20,6 @@
static struct rxe_recv_sockets recv_sockets;
-int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
-{
- unsigned char ll_addr[ETH_ALEN];
-
- ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
-
- return dev_mc_add(rxe->ndev, ll_addr);
-}
-
-int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
-{
- unsigned char ll_addr[ETH_ALEN];
-
- ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
-
- return dev_mc_del(rxe->ndev, ll_addr);
-}
-
static struct dst_entry *rxe_find_route4(struct net_device *ndev,
struct in_addr *saddr,
struct in_addr *daddr)
@@ -289,13 +271,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
}
-static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
bool xnet = false;
__be16 df = htons(IP_DF);
- struct rxe_av *av = rxe_get_av(pkt);
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
@@ -315,11 +297,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
-static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
- struct rxe_av *av = rxe_get_av(pkt);
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
@@ -340,16 +322,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
-int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
int err = 0;
if (skb->protocol == htons(ETH_P_IP))
- err = prepare4(pkt, skb);
+ err = prepare4(av, pkt, skb);
else if (skb->protocol == htons(ETH_P_IPV6))
- err = prepare6(pkt, skb);
+ err = prepare6(av, pkt, skb);
- if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac))
+ if (ether_addr_equal(skb->dev->dev_addr, av->dmac))
pkt->mask |= RXE_LOOPBACK_MASK;
return err;
@@ -365,7 +348,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
rxe_run_task(&qp->req.task, 1);
- rxe_drop_ref(qp);
+ rxe_put(qp);
}
static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
@@ -375,7 +358,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
skb->destructor = rxe_skb_tx_dtor;
skb->sk = pkt->qp->sk->sk;
- rxe_add_ref(pkt->qp);
+ rxe_get(pkt->qp);
atomic_inc(&pkt->qp->skb_out);
if (skb->protocol == htons(ETH_P_IP)) {
@@ -385,7 +368,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
} else {
pr_err("Unknown layer 3 protocol: %d\n", skb->protocol);
atomic_dec(&pkt->qp->skb_out);
- rxe_drop_ref(pkt->qp);
+ rxe_put(pkt->qp);
kfree_skb(skb);
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 4cb003885e00..87066d04ed18 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -12,129 +12,93 @@ static const struct rxe_type_info {
const char *name;
size_t size;
size_t elem_offset;
- void (*cleanup)(struct rxe_pool_elem *obj);
+ void (*cleanup)(struct rxe_pool_elem *elem);
enum rxe_pool_flags flags;
u32 min_index;
u32 max_index;
- size_t key_offset;
- size_t key_size;
+ u32 max_elem;
} rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
- .name = "rxe-uc",
+ .name = "uc",
.size = sizeof(struct rxe_ucontext),
.elem_offset = offsetof(struct rxe_ucontext, elem),
- .flags = RXE_POOL_NO_ALLOC,
+ .min_index = 1,
+ .max_index = UINT_MAX,
+ .max_elem = UINT_MAX,
},
[RXE_TYPE_PD] = {
- .name = "rxe-pd",
+ .name = "pd",
.size = sizeof(struct rxe_pd),
.elem_offset = offsetof(struct rxe_pd, elem),
- .flags = RXE_POOL_NO_ALLOC,
+ .min_index = 1,
+ .max_index = UINT_MAX,
+ .max_elem = UINT_MAX,
},
[RXE_TYPE_AH] = {
- .name = "rxe-ah",
+ .name = "ah",
.size = sizeof(struct rxe_ah),
.elem_offset = offsetof(struct rxe_ah, elem),
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_AH_INDEX,
.max_index = RXE_MAX_AH_INDEX,
+ .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1,
},
[RXE_TYPE_SRQ] = {
- .name = "rxe-srq",
+ .name = "srq",
.size = sizeof(struct rxe_srq),
.elem_offset = offsetof(struct rxe_srq, elem),
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
+ .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1,
},
[RXE_TYPE_QP] = {
- .name = "rxe-qp",
+ .name = "qp",
.size = sizeof(struct rxe_qp),
.elem_offset = offsetof(struct rxe_qp, elem),
.cleanup = rxe_qp_cleanup,
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_QP_INDEX,
.max_index = RXE_MAX_QP_INDEX,
+ .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1,
},
[RXE_TYPE_CQ] = {
- .name = "rxe-cq",
+ .name = "cq",
.size = sizeof(struct rxe_cq),
.elem_offset = offsetof(struct rxe_cq, elem),
- .flags = RXE_POOL_NO_ALLOC,
.cleanup = rxe_cq_cleanup,
+ .min_index = 1,
+ .max_index = UINT_MAX,
+ .max_elem = UINT_MAX,
},
[RXE_TYPE_MR] = {
- .name = "rxe-mr",
+ .name = "mr",
.size = sizeof(struct rxe_mr),
.elem_offset = offsetof(struct rxe_mr, elem),
.cleanup = rxe_mr_cleanup,
- .flags = RXE_POOL_INDEX,
+ .flags = RXE_POOL_ALLOC,
.min_index = RXE_MIN_MR_INDEX,
.max_index = RXE_MAX_MR_INDEX,
+ .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1,
},
[RXE_TYPE_MW] = {
- .name = "rxe-mw",
+ .name = "mw",
.size = sizeof(struct rxe_mw),
.elem_offset = offsetof(struct rxe_mw, elem),
- .cleanup = rxe_mw_cleanup,
- .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.min_index = RXE_MIN_MW_INDEX,
.max_index = RXE_MAX_MW_INDEX,
- },
- [RXE_TYPE_MC_GRP] = {
- .name = "rxe-mc_grp",
- .size = sizeof(struct rxe_mc_grp),
- .elem_offset = offsetof(struct rxe_mc_grp, elem),
- .cleanup = rxe_mc_cleanup,
- .flags = RXE_POOL_KEY,
- .key_offset = offsetof(struct rxe_mc_grp, mgid),
- .key_size = sizeof(union ib_gid),
- },
- [RXE_TYPE_MC_ELEM] = {
- .name = "rxe-mc_elem",
- .size = sizeof(struct rxe_mc_elem),
- .elem_offset = offsetof(struct rxe_mc_elem, elem),
+ .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1,
},
};
-static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
-{
- int err = 0;
-
- if ((max - min + 1) < pool->max_elem) {
- pr_warn("not enough indices for max_elem\n");
- err = -EINVAL;
- goto out;
- }
-
- pool->index.max_index = max;
- pool->index.min_index = min;
-
- pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL);
- if (!pool->index.table) {
- err = -ENOMEM;
- goto out;
- }
-
-out:
- return err;
-}
-
-int rxe_pool_init(
- struct rxe_dev *rxe,
- struct rxe_pool *pool,
- enum rxe_elem_type type,
- unsigned int max_elem)
+void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
+ enum rxe_elem_type type)
{
const struct rxe_type_info *info = &rxe_type_info[type];
- int err = 0;
memset(pool, 0, sizeof(*pool));
pool->rxe = rxe;
pool->name = info->name;
pool->type = type;
- pool->max_elem = max_elem;
+ pool->max_elem = info->max_elem;
pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN);
pool->elem_offset = info->elem_offset;
pool->flags = info->flags;
@@ -142,225 +106,31 @@ int rxe_pool_init(
atomic_set(&pool->num_elem, 0);
- rwlock_init(&pool->pool_lock);
-
- if (pool->flags & RXE_POOL_INDEX) {
- pool->index.tree = RB_ROOT;
- err = rxe_pool_init_index(pool, info->max_index,
- info->min_index);
- if (err)
- goto out;
- }
-
- if (pool->flags & RXE_POOL_KEY) {
- pool->key.tree = RB_ROOT;
- pool->key.key_offset = info->key_offset;
- pool->key.key_size = info->key_size;
- }
-
-out:
- return err;
+ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
+ pool->limit.min = info->min_index;
+ pool->limit.max = info->max_index;
}
void rxe_pool_cleanup(struct rxe_pool *pool)
{
- if (atomic_read(&pool->num_elem) > 0)
- pr_warn("%s pool destroyed with unfree'd elem\n",
- pool->name);
-
- if (pool->flags & RXE_POOL_INDEX)
- bitmap_free(pool->index.table);
-}
-
-static u32 alloc_index(struct rxe_pool *pool)
-{
- u32 index;
- u32 range = pool->index.max_index - pool->index.min_index + 1;
-
- index = find_next_zero_bit(pool->index.table, range, pool->index.last);
- if (index >= range)
- index = find_first_zero_bit(pool->index.table, range);
-
- WARN_ON_ONCE(index >= range);
- set_bit(index, pool->index.table);
- pool->index.last = index;
- return index + pool->index.min_index;
-}
-
-static int rxe_insert_index(struct rxe_pool *pool, struct rxe_pool_elem *new)
-{
- struct rb_node **link = &pool->index.tree.rb_node;
- struct rb_node *parent = NULL;
- struct rxe_pool_elem *elem;
-
- while (*link) {
- parent = *link;
- elem = rb_entry(parent, struct rxe_pool_elem, index_node);
-
- if (elem->index == new->index) {
- pr_warn("element already exists!\n");
- return -EINVAL;
- }
-
- if (elem->index > new->index)
- link = &(*link)->rb_left;
- else
- link = &(*link)->rb_right;
- }
-
- rb_link_node(&new->index_node, parent, link);
- rb_insert_color(&new->index_node, &pool->index.tree);
-
- return 0;
-}
-
-static int rxe_insert_key(struct rxe_pool *pool, struct rxe_pool_elem *new)
-{
- struct rb_node **link = &pool->key.tree.rb_node;
- struct rb_node *parent = NULL;
- struct rxe_pool_elem *elem;
- int cmp;
-
- while (*link) {
- parent = *link;
- elem = rb_entry(parent, struct rxe_pool_elem, key_node);
-
- cmp = memcmp((u8 *)elem + pool->key.key_offset,
- (u8 *)new + pool->key.key_offset,
- pool->key.key_size);
-
- if (cmp == 0) {
- pr_warn("key already exists!\n");
- return -EINVAL;
- }
-
- if (cmp > 0)
- link = &(*link)->rb_left;
- else
- link = &(*link)->rb_right;
- }
-
- rb_link_node(&new->key_node, parent, link);
- rb_insert_color(&new->key_node, &pool->key.tree);
-
- return 0;
-}
-
-int __rxe_add_key_locked(struct rxe_pool_elem *elem, void *key)
-{
- struct rxe_pool *pool = elem->pool;
- int err;
-
- memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
- err = rxe_insert_key(pool, elem);
-
- return err;
-}
-
-int __rxe_add_key(struct rxe_pool_elem *elem, void *key)
-{
- struct rxe_pool *pool = elem->pool;
- int err;
-
- write_lock_bh(&pool->pool_lock);
- err = __rxe_add_key_locked(elem, key);
- write_unlock_bh(&pool->pool_lock);
-
- return err;
-}
-
-void __rxe_drop_key_locked(struct rxe_pool_elem *elem)
-{
- struct rxe_pool *pool = elem->pool;
-
- rb_erase(&elem->key_node, &pool->key.tree);
-}
-
-void __rxe_drop_key(struct rxe_pool_elem *elem)
-{
- struct rxe_pool *pool = elem->pool;
-
- write_lock_bh(&pool->pool_lock);
- __rxe_drop_key_locked(elem);
- write_unlock_bh(&pool->pool_lock);
-}
-
-int __rxe_add_index_locked(struct rxe_pool_elem *elem)
-{
- struct rxe_pool *pool = elem->pool;
- int err;
-
- elem->index = alloc_index(pool);
- err = rxe_insert_index(pool, elem);
-
- return err;
-}
-
-int __rxe_add_index(struct rxe_pool_elem *elem)
-{
- struct rxe_pool *pool = elem->pool;
- int err;
-
- write_lock_bh(&pool->pool_lock);
- err = __rxe_add_index_locked(elem);
- write_unlock_bh(&pool->pool_lock);
-
- return err;
-}
-
-void __rxe_drop_index_locked(struct rxe_pool_elem *elem)
-{
- struct rxe_pool *pool = elem->pool;
-
- clear_bit(elem->index - pool->index.min_index, pool->index.table);
- rb_erase(&elem->index_node, &pool->index.tree);
-}
-
-void __rxe_drop_index(struct rxe_pool_elem *elem)
-{
- struct rxe_pool *pool = elem->pool;
-
- write_lock_bh(&pool->pool_lock);
- __rxe_drop_index_locked(elem);
- write_unlock_bh(&pool->pool_lock);
-}
-
-void *rxe_alloc_locked(struct rxe_pool *pool)
-{
- struct rxe_pool_elem *elem;
- void *obj;
-
- if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
- goto out_cnt;
-
- obj = kzalloc(pool->elem_size, GFP_ATOMIC);
- if (!obj)
- goto out_cnt;
-
- elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset);
-
- elem->pool = pool;
- elem->obj = obj;
- kref_init(&elem->ref_cnt);
-
- return obj;
-
-out_cnt:
- atomic_dec(&pool->num_elem);
- return NULL;
+ WARN_ON(!xa_empty(&pool->xa));
}
void *rxe_alloc(struct rxe_pool *pool)
{
struct rxe_pool_elem *elem;
void *obj;
+ int err;
+
+ if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC)))
+ return NULL;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
- goto out_cnt;
+ goto err_cnt;
obj = kzalloc(pool->elem_size, GFP_KERNEL);
if (!obj)
- goto out_cnt;
+ goto err_cnt;
elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset);
@@ -368,127 +138,86 @@ void *rxe_alloc(struct rxe_pool *pool)
elem->obj = obj;
kref_init(&elem->ref_cnt);
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
+ &pool->next, GFP_KERNEL);
+ if (err)
+ goto err_free;
+
return obj;
-out_cnt:
+err_free:
+ kfree(obj);
+err_cnt:
atomic_dec(&pool->num_elem);
return NULL;
}
int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
{
+ int err;
+
+ if (WARN_ON(pool->flags & RXE_POOL_ALLOC))
+ return -EINVAL;
+
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
- goto out_cnt;
+ goto err_cnt;
elem->pool = pool;
elem->obj = (u8 *)elem - pool->elem_offset;
kref_init(&elem->ref_cnt);
+ err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
+ &pool->next, GFP_KERNEL);
+ if (err)
+ goto err_cnt;
+
return 0;
-out_cnt:
+err_cnt:
atomic_dec(&pool->num_elem);
return -EINVAL;
}
-void rxe_elem_release(struct kref *kref)
-{
- struct rxe_pool_elem *elem =
- container_of(kref, struct rxe_pool_elem, ref_cnt);
- struct rxe_pool *pool = elem->pool;
- void *obj;
-
- if (pool->cleanup)
- pool->cleanup(elem);
-
- if (!(pool->flags & RXE_POOL_NO_ALLOC)) {
- obj = elem->obj;
- kfree(obj);
- }
-
- atomic_dec(&pool->num_elem);
-}
-
-void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index)
+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
- struct rb_node *node;
struct rxe_pool_elem *elem;
+ struct xarray *xa = &pool->xa;
+ unsigned long flags;
void *obj;
- node = pool->index.tree.rb_node;
-
- while (node) {
- elem = rb_entry(node, struct rxe_pool_elem, index_node);
-
- if (elem->index > index)
- node = node->rb_left;
- else if (elem->index < index)
- node = node->rb_right;
- else
- break;
- }
-
- if (node) {
- kref_get(&elem->ref_cnt);
+ xa_lock_irqsave(xa, flags);
+ elem = xa_load(xa, index);
+ if (elem && kref_get_unless_zero(&elem->ref_cnt))
obj = elem->obj;
- } else {
+ else
obj = NULL;
- }
-
- return obj;
-}
-
-void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
-{
- void *obj;
-
- read_lock_bh(&pool->pool_lock);
- obj = rxe_pool_get_index_locked(pool, index);
- read_unlock_bh(&pool->pool_lock);
+ xa_unlock_irqrestore(xa, flags);
return obj;
}
-void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key)
+static void rxe_elem_release(struct kref *kref)
{
- struct rb_node *node;
- struct rxe_pool_elem *elem;
- void *obj;
- int cmp;
-
- node = pool->key.tree.rb_node;
+ struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
+ struct rxe_pool *pool = elem->pool;
- while (node) {
- elem = rb_entry(node, struct rxe_pool_elem, key_node);
+ xa_erase(&pool->xa, elem->index);
- cmp = memcmp((u8 *)elem + pool->key.key_offset,
- key, pool->key.key_size);
+ if (pool->cleanup)
+ pool->cleanup(elem);
- if (cmp > 0)
- node = node->rb_left;
- else if (cmp < 0)
- node = node->rb_right;
- else
- break;
- }
+ if (pool->flags & RXE_POOL_ALLOC)
+ kfree(elem->obj);
- if (node) {
- kref_get(&elem->ref_cnt);
- obj = elem->obj;
- } else {
- obj = NULL;
- }
-
- return obj;
+ atomic_dec(&pool->num_elem);
}
-void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+int __rxe_get(struct rxe_pool_elem *elem)
{
- void *obj;
-
- read_lock_bh(&pool->pool_lock);
- obj = rxe_pool_get_key_locked(pool, key);
- read_unlock_bh(&pool->pool_lock);
+ return kref_get_unless_zero(&elem->ref_cnt);
+}
- return obj;
+int __rxe_put(struct rxe_pool_elem *elem)
+{
+ return kref_put(&elem->ref_cnt, rxe_elem_release);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 214279310f4d..24bcc786c1b3 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -8,9 +8,7 @@
#define RXE_POOL_H
enum rxe_pool_flags {
- RXE_POOL_INDEX = BIT(1),
- RXE_POOL_KEY = BIT(2),
- RXE_POOL_NO_ALLOC = BIT(4),
+ RXE_POOL_ALLOC = BIT(1),
};
enum rxe_elem_type {
@@ -22,8 +20,6 @@ enum rxe_elem_type {
RXE_TYPE_CQ,
RXE_TYPE_MR,
RXE_TYPE_MW,
- RXE_TYPE_MC_GRP,
- RXE_TYPE_MC_ELEM,
RXE_NUM_TYPES, /* keep me last */
};
@@ -32,20 +28,13 @@ struct rxe_pool_elem {
void *obj;
struct kref ref_cnt;
struct list_head list;
-
- /* only used if keyed */
- struct rb_node key_node;
-
- /* only used if indexed */
- struct rb_node index_node;
u32 index;
};
struct rxe_pool {
struct rxe_dev *rxe;
const char *name;
- rwlock_t pool_lock; /* protects pool add/del/search */
- void (*cleanup)(struct rxe_pool_elem *obj);
+ void (*cleanup)(struct rxe_pool_elem *elem);
enum rxe_pool_flags flags;
enum rxe_elem_type type;
@@ -54,36 +43,22 @@ struct rxe_pool {
size_t elem_size;
size_t elem_offset;
- /* only used if indexed */
- struct {
- struct rb_root tree;
- unsigned long *table;
- u32 last;
- u32 max_index;
- u32 min_index;
- } index;
-
- /* only used if keyed */
- struct {
- struct rb_root tree;
- size_t key_offset;
- size_t key_size;
- } key;
+ struct xarray xa;
+ struct xa_limit limit;
+ u32 next;
};
/* initialize a pool of objects with given limit on
* number of elements. gets parameters from rxe_type_info
* pool elements will be allocated out of a slab cache
*/
-int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
- enum rxe_elem_type type, u32 max_elem);
+void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
+ enum rxe_elem_type type);
/* free resources from object pool */
void rxe_pool_cleanup(struct rxe_pool *pool);
-/* allocate an object from pool holding and not holding the pool lock */
-void *rxe_alloc_locked(struct rxe_pool *pool);
-
+/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
/* connect already allocated object to pool */
@@ -91,69 +66,17 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem);
#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem)
-/* assign an index to an indexed object and insert object into
- * pool's rb tree holding and not holding the pool_lock
- */
-int __rxe_add_index_locked(struct rxe_pool_elem *elem);
-
-#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->elem)
-
-int __rxe_add_index(struct rxe_pool_elem *elem);
-
-#define rxe_add_index(obj) __rxe_add_index(&(obj)->elem)
-
-/* drop an index and remove object from rb tree
- * holding and not holding the pool_lock
- */
-void __rxe_drop_index_locked(struct rxe_pool_elem *elem);
-
-#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->elem)
-
-void __rxe_drop_index(struct rxe_pool_elem *elem);
-
-#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->elem)
-
-/* assign a key to a keyed object and insert object into
- * pool's rb tree holding and not holding pool_lock
- */
-int __rxe_add_key_locked(struct rxe_pool_elem *elem, void *key);
-
-#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->elem, key)
-
-int __rxe_add_key(struct rxe_pool_elem *elem, void *key);
-
-#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->elem, key)
-
-/* remove elem from rb tree holding and not holding the pool_lock */
-void __rxe_drop_key_locked(struct rxe_pool_elem *elem);
-
-#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->elem)
-
-void __rxe_drop_key(struct rxe_pool_elem *elem);
-
-#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->elem)
-
-/* lookup an indexed object from index holding and not holding the pool_lock.
- * takes a reference on object
- */
-void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index);
-
+/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
-/* lookup keyed object from key holding and not holding the pool_lock.
- * takes a reference on the objecti
- */
-void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key);
+int __rxe_get(struct rxe_pool_elem *elem);
-void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
+#define rxe_get(obj) __rxe_get(&(obj)->elem)
-/* cleanup an object when all references are dropped */
-void rxe_elem_release(struct kref *kref);
+int __rxe_put(struct rxe_pool_elem *elem);
-/* take a reference on an object */
-#define rxe_add_ref(obj) kref_get(&(obj)->elem.ref_cnt)
+#define rxe_put(obj) __rxe_put(&(obj)->elem)
-/* drop a reference on an object */
-#define rxe_drop_ref(obj) kref_put(&(obj)->elem.ref_cnt, rxe_elem_release)
+#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt)
#endif /* RXE_POOL_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 5018b9387694..62acf890af6c 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -135,12 +135,8 @@ static void free_rd_atomic_resources(struct rxe_qp *qp)
void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
{
- if (res->type == RXE_ATOMIC_MASK) {
+ if (res->type == RXE_ATOMIC_MASK)
kfree_skb(res->atomic.skb);
- } else if (res->type == RXE_READ_MASK) {
- if (res->read.mr)
- rxe_drop_ref(res->read.mr);
- }
res->type = 0;
}
@@ -188,9 +184,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
break;
}
- INIT_LIST_HEAD(&qp->grp_list);
-
- spin_lock_init(&qp->grp_lock);
spin_lock_init(&qp->state_lock);
atomic_set(&qp->ssn, 0);
@@ -330,11 +323,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
- rxe_add_ref(pd);
- rxe_add_ref(rcq);
- rxe_add_ref(scq);
+ rxe_get(pd);
+ rxe_get(rcq);
+ rxe_get(scq);
if (srq)
- rxe_add_ref(srq);
+ rxe_get(srq);
qp->pd = pd;
qp->rcq = rcq;
@@ -366,10 +359,10 @@ err1:
qp->srq = NULL;
if (srq)
- rxe_drop_ref(srq);
- rxe_drop_ref(scq);
- rxe_drop_ref(rcq);
- rxe_drop_ref(pd);
+ rxe_put(srq);
+ rxe_put(scq);
+ rxe_put(rcq);
+ rxe_put(pd);
return err;
}
@@ -528,7 +521,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
qp->resp.sent_psn_nak = 0;
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -770,6 +763,20 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
return 0;
}
+int rxe_qp_chk_destroy(struct rxe_qp *qp)
+{
+ /* See IBA o10-2.2.3
+ * An attempt to destroy a QP while attached to a mcast group
+ * will fail immediately.
+ */
+ if (atomic_read(&qp->mcg_num)) {
+ pr_debug("Attempt to destroy QP while attached to multicast group\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
/* called by the destroy qp verb */
void rxe_qp_destroy(struct rxe_qp *qp)
{
@@ -798,28 +805,24 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
- rxe_drop_all_mcast_groups(qp);
-
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
if (qp->srq)
- rxe_drop_ref(qp->srq);
+ rxe_put(qp->srq);
if (qp->rq.queue)
rxe_queue_cleanup(qp->rq.queue);
if (qp->scq)
- rxe_drop_ref(qp->scq);
+ rxe_put(qp->scq);
if (qp->rcq)
- rxe_drop_ref(qp->rcq);
+ rxe_put(qp->rcq);
if (qp->pd)
- rxe_drop_ref(qp->pd);
+ rxe_put(qp->pd);
- if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
- qp->resp.mr = NULL;
- }
+ if (qp->resp.mr)
+ rxe_put(qp->resp.mr);
if (qp_type(qp) == IB_QPT_RC)
sk_dst_reset(qp->sk->sk);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index a1b283dd2d4c..dbd4971039c0 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -151,6 +151,8 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
struct rxe_queue *new_q;
unsigned int num_elem = *num_elem_p;
int err;
+ unsigned long producer_flags;
+ unsigned long consumer_flags;
new_q = rxe_queue_init(q->rxe, &num_elem, elem_size, q->type);
if (!new_q)
@@ -164,17 +166,17 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
goto err1;
}
- spin_lock_bh(consumer_lock);
+ spin_lock_irqsave(consumer_lock, consumer_flags);
if (producer_lock) {
- spin_lock_bh(producer_lock);
+ spin_lock_irqsave(producer_lock, producer_flags);
err = resize_finish(q, new_q, num_elem);
- spin_unlock_bh(producer_lock);
+ spin_unlock_irqrestore(producer_lock, producer_flags);
} else {
err = resize_finish(q, new_q, num_elem);
}
- spin_unlock_bh(consumer_lock);
+ spin_unlock_irqrestore(consumer_lock, consumer_flags);
rxe_queue_cleanup(new_q); /* new/old dep on err */
if (err)
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 6a6cc1fa90e4..d09a8b68c962 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -217,7 +217,7 @@ static int hdr_check(struct rxe_pkt_info *pkt)
return 0;
err2:
- rxe_drop_ref(qp);
+ rxe_put(qp);
err1:
return -EINVAL;
}
@@ -233,8 +233,8 @@ static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb)
static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
{
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
- struct rxe_mc_grp *mcg;
- struct rxe_mc_elem *mce;
+ struct rxe_mcg *mcg;
+ struct rxe_mca *mca;
struct rxe_qp *qp;
union ib_gid dgid;
int err;
@@ -246,19 +246,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid));
/* lookup mcast group corresponding to mgid, takes a ref */
- mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
+ mcg = rxe_lookup_mcg(rxe, &dgid);
if (!mcg)
goto drop; /* mcast group not registered */
- spin_lock_bh(&mcg->mcg_lock);
+ spin_lock_bh(&rxe->mcg_lock);
/* this is unreliable datagram service so we let
* failures to deliver a multicast packet to a
* single QP happen and just move on and try
* the rest of them on the list
*/
- list_for_each_entry(mce, &mcg->qp_list, qp_list) {
- qp = mce->qp;
+ list_for_each_entry(mca, &mcg->qp_list, qp_list) {
+ qp = mca->qp;
/* validate qp for incoming packet */
err = check_type_state(rxe, pkt, qp);
@@ -273,7 +273,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
* skb and pass to the QP. Pass the original skb to
* the last QP in the list.
*/
- if (mce->qp_list.next != &mcg->qp_list) {
+ if (mca->qp_list.next != &mcg->qp_list) {
struct sk_buff *cskb;
struct rxe_pkt_info *cpkt;
@@ -288,19 +288,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
cpkt = SKB_TO_PKT(cskb);
cpkt->qp = qp;
- rxe_add_ref(qp);
+ rxe_get(qp);
rxe_rcv_pkt(cpkt, cskb);
} else {
pkt->qp = qp;
- rxe_add_ref(qp);
+ rxe_get(qp);
rxe_rcv_pkt(pkt, skb);
skb = NULL; /* mark consumed */
}
}
- spin_unlock_bh(&mcg->mcg_lock);
+ spin_unlock_bh(&rxe->mcg_lock);
- rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
+ kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
if (likely(!skb))
return;
@@ -397,7 +397,7 @@ void rxe_rcv(struct sk_buff *skb)
drop:
if (pkt->qp)
- rxe_drop_ref(pkt->qp);
+ rxe_put(pkt->qp);
kfree_skb(skb);
ib_device_put(&rxe->ib_dev);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 5eb89052dd66..ae5fbc79dd5c 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -358,14 +358,14 @@ static inline int get_mtu(struct rxe_qp *qp)
}
static struct sk_buff *init_req_packet(struct rxe_qp *qp,
+ struct rxe_av *av,
struct rxe_send_wqe *wqe,
- int opcode, int payload,
+ int opcode, u32 payload,
struct rxe_pkt_info *pkt)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct sk_buff *skb;
struct rxe_send_wr *ibwr = &wqe->wr;
- struct rxe_av *av;
int pad = (-payload) & 0x3;
int paylen;
int solicited;
@@ -374,21 +374,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
/* length from start of bth to end of icrc */
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
-
- /* pkt->hdr, port_num and mask are initialized in ifc layer */
- pkt->rxe = rxe;
- pkt->opcode = opcode;
- pkt->qp = qp;
- pkt->psn = qp->req.psn;
- pkt->mask = rxe_opcode[opcode].mask;
- pkt->paylen = paylen;
- pkt->wqe = wqe;
+ pkt->paylen = paylen;
/* init skb */
- av = rxe_get_av(pkt);
- if (!av)
- return NULL;
-
skb = rxe_init_packet(rxe, av, paylen, pkt);
if (unlikely(!skb))
return NULL;
@@ -447,13 +435,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
return skb;
}
-static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, struct sk_buff *skb,
- int paylen)
+static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
+ struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 paylen)
{
int err;
- err = rxe_prepare(pkt, skb);
+ err = rxe_prepare(av, pkt, skb);
if (err)
return err;
@@ -497,7 +485,7 @@ static void update_wqe_state(struct rxe_qp *qp,
static void update_wqe_psn(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt,
- int payload)
+ u32 payload)
{
/* number of packets left to send including current one */
int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
@@ -540,7 +528,7 @@ static void rollback_state(struct rxe_send_wqe *wqe,
}
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, int payload)
+ struct rxe_pkt_info *pkt)
{
qp->req.opcode = pkt->opcode;
@@ -608,19 +596,22 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
int rxe_requester(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_pkt_info pkt;
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
enum rxe_hdr_mask mask;
- int payload;
+ u32 payload;
int mtu;
int opcode;
int ret;
struct rxe_send_wqe rollback_wqe;
u32 rollback_psn;
struct rxe_queue *q = qp->sq.queue;
+ struct rxe_ah *ah;
+ struct rxe_av *av;
- rxe_add_ref(qp);
+ rxe_get(qp);
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -699,20 +690,34 @@ next_wqe:
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
__rxe_do_task(&qp->comp.task);
- rxe_drop_ref(qp);
+ rxe_put(qp);
return 0;
}
payload = mtu;
}
- skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
+ pkt.rxe = rxe;
+ pkt.opcode = opcode;
+ pkt.qp = qp;
+ pkt.psn = qp->req.psn;
+ pkt.mask = rxe_opcode[opcode].mask;
+ pkt.wqe = wqe;
+
+ av = rxe_get_av(&pkt, &ah);
+ if (unlikely(!av)) {
+ pr_err("qp#%d Failed no address vector\n", qp_num(qp));
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ goto err_drop_ah;
+ }
+
+ skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
wqe->status = IB_WC_LOC_QP_OP_ERR;
- goto err;
+ goto err_drop_ah;
}
- ret = finish_packet(qp, wqe, &pkt, skb, payload);
+ ret = finish_packet(qp, av, wqe, &pkt, skb, payload);
if (unlikely(ret)) {
pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
if (ret == -EFAULT)
@@ -720,9 +725,12 @@ next_wqe:
else
wqe->status = IB_WC_LOC_QP_OP_ERR;
kfree_skb(skb);
- goto err;
+ goto err_drop_ah;
}
+ if (ah)
+ rxe_put(ah);
+
/*
* To prevent a race on wqe access between requester and completer,
* wqe members state and psn need to be set before calling
@@ -747,15 +755,18 @@ next_wqe:
goto err;
}
- update_state(qp, wqe, &pkt, payload);
+ update_state(qp, wqe, &pkt);
goto next_wqe;
+err_drop_ah:
+ if (ah)
+ rxe_put(ah);
err:
wqe->state = wqe_state_error;
__rxe_do_task(&qp->comp.task);
exit:
- rxe_drop_ref(qp);
+ rxe_put(qp);
return -EAGAIN;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index e8f435fa6e4d..16fc7ea1298d 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -99,7 +99,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
if (qp->resp.state == QP_STATE_ERROR) {
while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
@@ -297,21 +297,22 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
struct ib_event ev;
unsigned int count;
size_t size;
+ unsigned long flags;
if (srq->error)
return RESPST_ERR_RNR;
- spin_lock_bh(&srq->rq.consumer_lock);
+ spin_lock_irqsave(&srq->rq.consumer_lock, flags);
wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
if (!wqe) {
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
return RESPST_ERR_RNR;
}
/* don't trust user space data */
if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
return RESPST_ERR_MALFORMED_WQE;
}
@@ -327,11 +328,11 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
goto event;
}
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
return RESPST_CHK_LENGTH;
event:
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
@@ -463,8 +464,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (mw->access & IB_ZERO_BASED)
qp->resp.offset = mw->addr;
- rxe_drop_ref(mw);
- rxe_add_ref(mr);
+ rxe_put(mw);
+ rxe_get(mr);
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
@@ -507,9 +508,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
err:
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
if (mw)
- rxe_drop_ref(mw);
+ rxe_put(mw);
return state;
}
@@ -632,7 +633,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
if (ack->mask & RXE_ATMACK_MASK)
atmack_set_orig(ack, qp->resp.atomic_orig);
- err = rxe_prepare(ack, skb);
+ err = rxe_prepare(&qp->pri_av, ack, skb);
if (err) {
kfree_skb(skb);
return NULL;
@@ -641,6 +642,78 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
return skb;
}
+static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ struct resp_res *res;
+ u32 pkts;
+
+ res = &qp->resp.resources[qp->resp.res_head];
+ rxe_advance_resp_resource(qp);
+ free_rd_atomic_resource(qp, res);
+
+ res->type = RXE_READ_MASK;
+ res->replay = 0;
+ res->read.va = qp->resp.va + qp->resp.offset;
+ res->read.va_org = qp->resp.va + qp->resp.offset;
+ res->read.resid = qp->resp.resid;
+ res->read.length = qp->resp.resid;
+ res->read.rkey = qp->resp.rkey;
+
+ pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
+ res->first_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
+
+ res->state = rdatm_res_state_new;
+
+ return res;
+}
+
+/**
+ * rxe_recheck_mr - revalidate MR from rkey and get a reference
+ * @qp: the qp
+ * @rkey: the rkey
+ *
+ * This code allows the MR to be invalidated or deregistered or
+ * the MW if one was used to be invalidated or deallocated.
+ * It is assumed that the access permissions if originally good
+ * are OK and the mappings to be unchanged.
+ *
+ * Return: mr on success else NULL
+ */
+static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mr *mr;
+ struct rxe_mw *mw;
+
+ if (rkey_is_mw(rkey)) {
+ mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
+ if (!mw || mw->rkey != rkey)
+ return NULL;
+
+ if (mw->state != RXE_MW_STATE_VALID) {
+ rxe_put(mw);
+ return NULL;
+ }
+
+ mr = mw->mr;
+ rxe_put(mw);
+ } else {
+ mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+ if (!mr || mr->rkey != rkey)
+ return NULL;
+ }
+
+ if (mr->state != RXE_MR_STATE_VALID) {
+ rxe_put(mr);
+ return NULL;
+ }
+
+ return mr;
+}
+
/* RDMA read response. If res is not NULL, then we have a current RDMA request
* being processed or replayed.
*/
@@ -655,53 +728,26 @@ static enum resp_states read_reply(struct rxe_qp *qp,
int opcode;
int err;
struct resp_res *res = qp->resp.res;
+ struct rxe_mr *mr;
if (!res) {
- /* This is the first time we process that request. Get a
- * resource
- */
- res = &qp->resp.resources[qp->resp.res_head];
-
- free_rd_atomic_resource(qp, res);
- rxe_advance_resp_resource(qp);
-
- res->type = RXE_READ_MASK;
- res->replay = 0;
-
- res->read.va = qp->resp.va +
- qp->resp.offset;
- res->read.va_org = qp->resp.va +
- qp->resp.offset;
-
- res->first_psn = req_pkt->psn;
-
- if (reth_len(req_pkt)) {
- res->last_psn = (req_pkt->psn +
- (reth_len(req_pkt) + mtu - 1) /
- mtu - 1) & BTH_PSN_MASK;
- } else {
- res->last_psn = res->first_psn;
- }
- res->cur_psn = req_pkt->psn;
-
- res->read.resid = qp->resp.resid;
- res->read.length = qp->resp.resid;
- res->read.rkey = qp->resp.rkey;
-
- /* note res inherits the reference to mr from qp */
- res->read.mr = qp->resp.mr;
- qp->resp.mr = NULL;
-
- qp->resp.res = res;
- res->state = rdatm_res_state_new;
+ res = rxe_prepare_read_res(qp, req_pkt);
+ qp->resp.res = res;
}
if (res->state == rdatm_res_state_new) {
+ mr = qp->resp.mr;
+ qp->resp.mr = NULL;
+
if (res->read.resid <= mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
else
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
} else {
+ mr = rxe_recheck_mr(qp, res->read.rkey);
+ if (!mr)
+ return RESPST_ERR_RKEY_VIOLATION;
+
if (res->read.resid > mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
else
@@ -717,10 +763,12 @@ static enum resp_states read_reply(struct rxe_qp *qp,
if (!skb)
return RESPST_ERR_RNR;
- err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
+ err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
payload, RXE_FROM_MR_OBJ);
if (err)
pr_err("Failed copying memory\n");
+ if (mr)
+ rxe_put(mr);
if (bth_pad(&ack_pkt)) {
u8 *pad = payload_addr(&ack_pkt) + payload;
@@ -814,6 +862,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_ERR_INVALIDATE_RKEY;
}
+ if (pkt->mask & RXE_END_MASK)
+ /* We successfully processed this new request. */
+ qp->resp.msn++;
+
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.ack_psn = qp->resp.psn;
@@ -821,11 +873,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
- if (pkt->mask & RXE_COMP_MASK) {
- /* We successfully processed this new request. */
- qp->resp.msn++;
+ if (pkt->mask & RXE_COMP_MASK)
return RESPST_COMPLETE;
- } else if (qp_type(qp) == IB_QPT_RC)
+ else if (qp_type(qp) == IB_QPT_RC)
return RESPST_ACKNOWLEDGE;
else
return RESPST_CLEANUP;
@@ -987,7 +1037,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
rc = rxe_xmit_packet(qp, &ack_pkt, skb);
if (rc) {
pr_err_ratelimited("Failed sending ack\n");
- rxe_drop_ref(qp);
+ rxe_put(qp);
}
out:
return rc;
@@ -1016,13 +1066,13 @@ static enum resp_states cleanup(struct rxe_qp *qp,
if (pkt) {
skb = skb_dequeue(&qp->req_pkts);
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -1166,7 +1216,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -1180,7 +1230,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
struct rxe_queue *q = qp->rq.queue;
while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
@@ -1200,7 +1250,7 @@ int rxe_responder(void *arg)
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
- rxe_add_ref(qp);
+ rxe_get(qp);
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
@@ -1387,6 +1437,6 @@ int rxe_responder(void *arg)
exit:
ret = -EAGAIN;
done:
- rxe_drop_ref(qp);
+ rxe_put(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 915ad6664321..67184b0281a0 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
{
struct rxe_ucontext *uc = to_ruc(ibuc);
- rxe_drop_ref(uc);
+ rxe_put(uc);
}
static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
@@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);
- rxe_drop_ref(pd);
+ rxe_put(pd);
return 0;
}
@@ -181,7 +181,6 @@ static int rxe_create_ah(struct ib_ah *ibah,
return err;
/* create index > 0 */
- rxe_add_index(ah);
ah->ah_num = ah->elem.index;
if (uresp) {
@@ -189,8 +188,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
sizeof(uresp->ah_num));
if (err) {
- rxe_drop_index(ah);
- rxe_drop_ref(ah);
+ rxe_put(ah);
return -EFAULT;
}
} else if (ah->is_user) {
@@ -230,8 +228,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
- rxe_drop_index(ah);
- rxe_drop_ref(ah);
+ rxe_put(ah);
return 0;
}
@@ -306,7 +303,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
if (err)
goto err1;
- rxe_add_ref(pd);
+ rxe_get(pd);
srq->pd = pd;
err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
@@ -316,8 +313,8 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
return 0;
err2:
- rxe_drop_ref(pd);
- rxe_drop_ref(srq);
+ rxe_put(pd);
+ rxe_put(srq);
err1:
return err;
}
@@ -374,8 +371,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
if (srq->rq.queue)
rxe_queue_cleanup(srq->rq.queue);
- rxe_drop_ref(srq->pd);
- rxe_drop_ref(srq);
+ rxe_put(srq->pd);
+ rxe_put(srq);
return 0;
}
@@ -384,8 +381,9 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
{
int err = 0;
struct rxe_srq *srq = to_rsrq(ibsrq);
+ unsigned long flags;
- spin_lock_bh(&srq->rq.producer_lock);
+ spin_lock_irqsave(&srq->rq.producer_lock, flags);
while (wr) {
err = post_one_recv(&srq->rq, wr);
@@ -394,7 +392,7 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
wr = wr->next;
}
- spin_unlock_bh(&srq->rq.producer_lock);
+ spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
if (err)
*bad_wr = wr;
@@ -437,7 +435,6 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
if (err)
return err;
- rxe_add_index(qp);
err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
if (err)
goto qp_init;
@@ -445,8 +442,7 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
return 0;
qp_init:
- rxe_drop_index(qp);
- rxe_drop_ref(qp);
+ rxe_put(qp);
return err;
}
@@ -493,10 +489,14 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct rxe_qp *qp = to_rqp(ibqp);
+ int ret;
+
+ ret = rxe_qp_chk_destroy(qp);
+ if (ret)
+ return ret;
rxe_qp_destroy(qp);
- rxe_drop_index(qp);
- rxe_drop_ref(qp);
+ rxe_put(qp);
return 0;
}
@@ -638,18 +638,19 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
int err;
struct rxe_sq *sq = &qp->sq;
struct rxe_send_wqe *send_wqe;
+ unsigned long flags;
int full;
err = validate_send_wr(qp, ibwr, mask, length);
if (err)
return err;
- spin_lock_bh(&qp->sq.sq_lock);
+ spin_lock_irqsave(&qp->sq.sq_lock, flags);
full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER);
if (unlikely(full)) {
- spin_unlock_bh(&qp->sq.sq_lock);
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
return -ENOMEM;
}
@@ -658,7 +659,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER);
- spin_unlock_bh(&qp->sq.sq_lock);
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
return 0;
}
@@ -738,6 +739,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int err = 0;
struct rxe_qp *qp = to_rqp(ibqp);
struct rxe_rq *rq = &qp->rq;
+ unsigned long flags;
if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
*bad_wr = wr;
@@ -751,7 +753,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
goto err1;
}
- spin_lock_bh(&rq->producer_lock);
+ spin_lock_irqsave(&rq->producer_lock, flags);
while (wr) {
err = post_one_recv(rq, wr);
@@ -762,7 +764,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
wr = wr->next;
}
- spin_unlock_bh(&rq->producer_lock);
+ spin_unlock_irqrestore(&rq->producer_lock, flags);
if (qp->resp.state == QP_STATE_ERROR)
rxe_run_task(&qp->resp.task, 1);
@@ -807,7 +809,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
rxe_cq_disable(cq);
- rxe_drop_ref(cq);
+ rxe_put(cq);
return 0;
}
@@ -843,8 +845,9 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
int i;
struct rxe_cq *cq = to_rcq(ibcq);
struct rxe_cqe *cqe;
+ unsigned long flags;
- spin_lock_bh(&cq->cq_lock);
+ spin_lock_irqsave(&cq->cq_lock, flags);
for (i = 0; i < num_entries; i++) {
cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER);
if (!cqe)
@@ -853,7 +856,7 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
memcpy(wc++, &cqe->ibwc, sizeof(*wc));
queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER);
}
- spin_unlock_bh(&cq->cq_lock);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
return i;
}
@@ -873,8 +876,9 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
struct rxe_cq *cq = to_rcq(ibcq);
int ret = 0;
int empty;
+ unsigned long irq_flags;
- spin_lock_bh(&cq->cq_lock);
+ spin_lock_irqsave(&cq->cq_lock, irq_flags);
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = flags & IB_CQ_SOLICITED_MASK;
@@ -883,7 +887,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
ret = 1;
- spin_unlock_bh(&cq->cq_lock);
+ spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
return ret;
}
@@ -898,8 +902,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
if (!mr)
return ERR_PTR(-ENOMEM);
- rxe_add_index(mr);
- rxe_add_ref(pd);
+ rxe_get(pd);
rxe_mr_init_dma(pd, access, mr);
return &mr->ibmr;
@@ -922,9 +925,8 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
goto err2;
}
- rxe_add_index(mr);
- rxe_add_ref(pd);
+ rxe_get(pd);
err = rxe_mr_init_user(pd, start, length, iova, access, mr);
if (err)
@@ -933,9 +935,8 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
return &mr->ibmr;
err3:
- rxe_drop_ref(pd);
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
+ rxe_put(pd);
+ rxe_put(mr);
err2:
return ERR_PTR(err);
}
@@ -957,9 +958,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
goto err1;
}
- rxe_add_index(mr);
-
- rxe_add_ref(pd);
+ rxe_get(pd);
err = rxe_mr_init_fast(pd, max_num_sg, mr);
if (err)
@@ -968,9 +967,8 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
return &mr->ibmr;
err2:
- rxe_drop_ref(pd);
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
+ rxe_put(pd);
+ rxe_put(mr);
err1:
return ERR_PTR(err);
}
@@ -999,32 +997,6 @@ static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
return n;
}
-static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
-{
- int err;
- struct rxe_dev *rxe = to_rdev(ibqp->device);
- struct rxe_qp *qp = to_rqp(ibqp);
- struct rxe_mc_grp *grp;
-
- /* takes a ref on grp if successful */
- err = rxe_mcast_get_grp(rxe, mgid, &grp);
- if (err)
- return err;
-
- err = rxe_mcast_add_grp_elem(rxe, qp, grp);
-
- rxe_drop_ref(grp);
- return err;
-}
-
-static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
-{
- struct rxe_dev *rxe = to_rdev(ibqp->device);
- struct rxe_qp *qp = to_rqp(ibqp);
-
- return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
-}
-
static ssize_t parent_show(struct device *device,
struct device_attribute *attr, char *buf)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index e48969e8d4c8..e7eff1ca75e9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -157,7 +157,6 @@ struct resp_res {
struct sk_buff *skb;
} atomic;
struct {
- struct rxe_mr *mr;
u64 va_org;
u32 rkey;
u32 length;
@@ -232,9 +231,7 @@ struct rxe_qp {
struct rxe_av pri_av;
struct rxe_av alt_av;
- /* list of mcast groups qp has joined (for cleanup) */
- struct list_head grp_list;
- spinlock_t grp_lock; /* guard grp_list */
+ atomic_t mcg_num;
struct sk_buff_head req_pkts;
struct sk_buff_head resp_pkts;
@@ -353,23 +350,20 @@ struct rxe_mw {
u64 length;
};
-struct rxe_mc_grp {
- struct rxe_pool_elem elem;
- spinlock_t mcg_lock; /* guard group */
+struct rxe_mcg {
+ struct rb_node node;
+ struct kref ref_cnt;
struct rxe_dev *rxe;
struct list_head qp_list;
union ib_gid mgid;
- int num_qp;
+ atomic_t qp_num;
u32 qkey;
u16 pkey;
};
-struct rxe_mc_elem {
- struct rxe_pool_elem elem;
+struct rxe_mca {
struct list_head qp_list;
- struct list_head grp_list;
struct rxe_qp *qp;
- struct rxe_mc_grp *grp;
};
struct rxe_port {
@@ -401,7 +395,12 @@ struct rxe_dev {
struct rxe_pool mr_pool;
struct rxe_pool mw_pool;
struct rxe_pool mc_grp_pool;
- struct rxe_pool mc_elem_pool;
+
+ /* multicast support */
+ spinlock_t mcg_lock;
+ struct rb_root mcg_tree;
+ atomic_t mcg_num;
+ atomic_t mcg_attach;
spinlock_t pending_lock; /* guard pending_mmaps */
struct list_head pending_mmaps;
@@ -482,6 +481,4 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw)
int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
-void rxe_mc_cleanup(struct rxe_pool_elem *elem);
-
#endif /* RXE_VERBS_H */