summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw/rxe/rxe_qp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_qp.c')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c264
1 files changed, 183 insertions, 81 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index a569b111a9d2..845bdd03ca28 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -15,6 +15,54 @@
#include "rxe_queue.h"
#include "rxe_task.h"
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * lockdep can detect false positive circular dependencies
+ * when there are user-space socket API users or in kernel
+ * users switching between a tcp and rdma transport.
+ * Maybe also switching between siw and rxe may cause
+ * problems as per default sockets are only classified
+ * by family and not by ip protocol. And there might
+ * be different locks used between the application
+ * and the low level sockets.
+ *
+ * Problems were seen with ksmbd.ko and cifs.ko,
+ * switching transports, use git blame to find
+ * more details.
+ */
+static struct lock_class_key rxe_send_sk_key[2];
+static struct lock_class_key rxe_send_slock_key[2];
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+
+static inline void rxe_reclassify_send_socket(struct socket *sock)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct sock *sk = sock->sk;
+
+ if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
+ return;
+
+ switch (sk->sk_family) {
+ case AF_INET:
+ sock_lock_init_class_and_name(sk,
+ "slock-AF_INET-RDMA-RXE-SEND",
+ &rxe_send_slock_key[0],
+ "sk_lock-AF_INET-RDMA-RXE-SEND",
+ &rxe_send_sk_key[0]);
+ break;
+ case AF_INET6:
+ sock_lock_init_class_and_name(sk,
+ "slock-AF_INET6-RDMA-RXE-SEND",
+ &rxe_send_slock_key[1],
+ "sk_lock-AF_INET6-RDMA-RXE-SEND",
+ &rxe_send_sk_key[1]);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+}
+
static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
@@ -183,17 +231,68 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
atomic_set(&qp->skb_out, 0);
}
+static int rxe_init_sq(struct rxe_qp *qp, struct ib_qp_init_attr *init,
+ struct ib_udata *udata,
+ struct rxe_create_qp_resp __user *uresp)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int wqe_size;
+ int err;
+
+ qp->sq.max_wr = init->cap.max_send_wr;
+ wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge),
+ init->cap.max_inline_data);
+ qp->sq.max_sge = wqe_size / sizeof(struct ib_sge);
+ qp->sq.max_inline = wqe_size;
+ wqe_size += sizeof(struct rxe_send_wqe);
+
+ qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size,
+ QUEUE_TYPE_FROM_CLIENT);
+ if (!qp->sq.queue) {
+ rxe_err_qp(qp, "Unable to allocate send queue\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ /* prepare info for caller to mmap send queue if user space qp */
+ err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata,
+ qp->sq.queue->buf, qp->sq.queue->buf_size,
+ &qp->sq.queue->ip);
+ if (err) {
+ rxe_err_qp(qp, "do_mmap_info failed, err = %d\n", err);
+ goto err_free;
+ }
+
+ /* return actual capabilities to caller which may be larger
+ * than requested
+ */
+ init->cap.max_send_wr = qp->sq.max_wr;
+ init->cap.max_send_sge = qp->sq.max_sge;
+ init->cap.max_inline_data = qp->sq.max_inline;
+
+ return 0;
+
+err_free:
+ vfree(qp->sq.queue->buf);
+ kfree(qp->sq.queue);
+ qp->sq.queue = NULL;
+err_out:
+ return err;
+}
+
static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init, struct ib_udata *udata,
struct rxe_create_qp_resp __user *uresp)
{
int err;
- int wqe_size;
- enum queue_type type;
+
+ /* if we don't finish qp create make sure queue is valid */
+ skb_queue_head_init(&qp->req_pkts);
err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
if (err < 0)
return err;
+ rxe_reclassify_send_socket(qp->sk);
qp->sk->sk->sk_user_data = qp;
/* pick a source UDP port number for this QP based on
@@ -204,32 +303,10 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
* (0xc000 - 0xffff).
*/
qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff);
- qp->sq.max_wr = init->cap.max_send_wr;
- /* These caps are limited by rxe_qp_chk_cap() done by the caller */
- wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge),
- init->cap.max_inline_data);
- qp->sq.max_sge = init->cap.max_send_sge =
- wqe_size / sizeof(struct ib_sge);
- qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
- wqe_size += sizeof(struct rxe_send_wqe);
-
- type = QUEUE_TYPE_FROM_CLIENT;
- qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr,
- wqe_size, type);
- if (!qp->sq.queue)
- return -ENOMEM;
-
- err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata,
- qp->sq.queue->buf, qp->sq.queue->buf_size,
- &qp->sq.queue->ip);
-
- if (err) {
- vfree(qp->sq.queue->buf);
- kfree(qp->sq.queue);
- qp->sq.queue = NULL;
+ err = rxe_init_sq(qp, init, udata, uresp);
+ if (err)
return err;
- }
qp->req.wqe_index = queue_get_producer(qp->sq.queue,
QUEUE_TYPE_FROM_CLIENT);
@@ -237,8 +314,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.opcode = -1;
qp->comp.opcode = -1;
- rxe_init_task(&qp->req.task, qp, rxe_requester);
- rxe_init_task(&qp->comp.task, qp, rxe_completer);
+ rxe_init_task(&qp->send_task, qp, rxe_sender);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
if (init->qp_type == IB_QPT_RC) {
@@ -248,39 +324,68 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
return 0;
}
+static int rxe_init_rq(struct rxe_qp *qp, struct ib_qp_init_attr *init,
+ struct ib_udata *udata,
+ struct rxe_create_qp_resp __user *uresp)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int wqe_size;
+ int err;
+
+ qp->rq.max_wr = init->cap.max_recv_wr;
+ qp->rq.max_sge = init->cap.max_recv_sge;
+ wqe_size = sizeof(struct rxe_recv_wqe) +
+ qp->rq.max_sge*sizeof(struct ib_sge);
+
+ qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size,
+ QUEUE_TYPE_FROM_CLIENT);
+ if (!qp->rq.queue) {
+ rxe_err_qp(qp, "Unable to allocate recv queue\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ /* prepare info for caller to mmap recv queue if user space qp */
+ err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata,
+ qp->rq.queue->buf, qp->rq.queue->buf_size,
+ &qp->rq.queue->ip);
+ if (err) {
+ rxe_err_qp(qp, "do_mmap_info failed, err = %d\n", err);
+ goto err_free;
+ }
+
+ /* return actual capabilities to caller which may be larger
+ * than requested
+ */
+ init->cap.max_recv_wr = qp->rq.max_wr;
+
+ return 0;
+
+err_free:
+ vfree(qp->rq.queue->buf);
+ kfree(qp->rq.queue);
+ qp->rq.queue = NULL;
+err_out:
+ return err;
+}
+
static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init,
struct ib_udata *udata,
struct rxe_create_qp_resp __user *uresp)
{
int err;
- int wqe_size;
- enum queue_type type;
+
+ /* if we don't finish qp create make sure queue is valid */
+ skb_queue_head_init(&qp->resp_pkts);
if (!qp->srq) {
- qp->rq.max_wr = init->cap.max_recv_wr;
- qp->rq.max_sge = init->cap.max_recv_sge;
-
- wqe_size = rcv_wqe_size(qp->rq.max_sge);
-
- type = QUEUE_TYPE_FROM_CLIENT;
- qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
- wqe_size, type);
- if (!qp->rq.queue)
- return -ENOMEM;
-
- err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata,
- qp->rq.queue->buf, qp->rq.queue->buf_size,
- &qp->rq.queue->ip);
- if (err) {
- vfree(qp->rq.queue->buf);
- kfree(qp->rq.queue);
- qp->rq.queue = NULL;
+ err = rxe_init_rq(qp, init, udata, uresp);
+ if (err)
return err;
- }
}
- rxe_init_task(&qp->resp.task, qp, rxe_responder);
+ rxe_init_task(&qp->recv_task, qp, rxe_receiver);
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
@@ -307,10 +412,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (srq)
rxe_get(srq);
- qp->pd = pd;
- qp->rcq = rcq;
- qp->scq = scq;
- qp->srq = srq;
+ qp->pd = pd;
+ qp->rcq = rcq;
+ qp->scq = scq;
+ qp->srq = srq;
atomic_inc(&rcq->num_wq);
atomic_inc(&scq->num_wq);
@@ -457,14 +562,12 @@ err1:
static void rxe_qp_reset(struct rxe_qp *qp)
{
/* stop tasks from running */
- rxe_disable_task(&qp->resp.task);
- rxe_disable_task(&qp->comp.task);
- rxe_disable_task(&qp->req.task);
+ rxe_disable_task(&qp->recv_task);
+ rxe_disable_task(&qp->send_task);
/* drain work and packet queuesc */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->rq.queue)
rxe_queue_reset(qp->rq.queue);
@@ -491,9 +594,8 @@ static void rxe_qp_reset(struct rxe_qp *qp)
cleanup_rd_atomic_resources(qp);
/* reenable tasks */
- rxe_enable_task(&qp->resp.task);
- rxe_enable_task(&qp->comp.task);
- rxe_enable_task(&qp->req.task);
+ rxe_enable_task(&qp->recv_task);
+ rxe_enable_task(&qp->send_task);
}
/* move the qp to the error state */
@@ -505,9 +607,8 @@ void rxe_qp_error(struct rxe_qp *qp)
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
- rxe_sched_task(&qp->resp.task);
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->recv_task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -518,8 +619,7 @@ static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -724,6 +824,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
* Yield the processor
*/
spin_lock_irqsave(&qp->state_lock, flags);
+ attr->cur_qp_state = qp_state(qp);
if (qp->attr.sq_draining) {
spin_unlock_irqrestore(&qp->state_lock, flags);
cond_resched();
@@ -759,24 +860,25 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
spin_unlock_irqrestore(&qp->state_lock, flags);
qp->qp_timeout_jiffies = 0;
- if (qp_type(qp) == IB_QPT_RC) {
- del_timer_sync(&qp->retrans_timer);
- del_timer_sync(&qp->rnr_nak_timer);
+ /* In the function timer_setup, .function is initialized. If .function
+ * is NULL, it indicates the function timer_setup is not called, the
+ * timer is not initialized. Or else, the timer is initialized.
+ */
+ if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function &&
+ qp->rnr_nak_timer.function) {
+ timer_delete_sync(&qp->retrans_timer);
+ timer_delete_sync(&qp->rnr_nak_timer);
}
- if (qp->resp.task.func)
- rxe_cleanup_task(&qp->resp.task);
-
- if (qp->req.task.func)
- rxe_cleanup_task(&qp->req.task);
+ if (qp->recv_task.func)
+ rxe_cleanup_task(&qp->recv_task);
- if (qp->comp.task.func)
- rxe_cleanup_task(&qp->comp.task);
+ if (qp->send_task.func)
+ rxe_cleanup_task(&qp->send_task);
/* flush out any receive wr's or pending requests */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);