diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_qp.c')
| -rw-r--r-- | drivers/infiniband/sw/rxe/rxe_qp.c | 264 |
1 files changed, 183 insertions, 81 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a569b111a9d2..845bdd03ca28 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -15,6 +15,54 @@ #include "rxe_queue.h" #include "rxe_task.h" +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * lockdep can detect false positive circular dependencies + * when there are user-space socket API users or in kernel + * users switching between a tcp and rdma transport. + * Maybe also switching between siw and rxe may cause + * problems as per default sockets are only classified + * by family and not by ip protocol. And there might + * be different locks used between the application + * and the low level sockets. + * + * Problems were seen with ksmbd.ko and cifs.ko, + * switching transports, use git blame to find + * more details. + */ +static struct lock_class_key rxe_send_sk_key[2]; +static struct lock_class_key rxe_send_slock_key[2]; +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ + +static inline void rxe_reclassify_send_socket(struct socket *sock) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, + "slock-AF_INET-RDMA-RXE-SEND", + &rxe_send_slock_key[0], + "sk_lock-AF_INET-RDMA-RXE-SEND", + &rxe_send_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, + "slock-AF_INET6-RDMA-RXE-SEND", + &rxe_send_slock_key[1], + "sk_lock-AF_INET6-RDMA-RXE-SEND", + &rxe_send_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ +} + static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { @@ -183,17 +231,68 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, atomic_set(&qp->skb_out, 0); } +static int rxe_init_sq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->sq.max_wr = init->cap.max_send_wr; + wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), + init->cap.max_inline_data); + qp->sq.max_sge = wqe_size / sizeof(struct ib_sge); + qp->sq.max_inline = wqe_size; + wqe_size += sizeof(struct rxe_send_wqe); + + qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->sq.queue) { + rxe_err_qp(qp, "Unable to allocate send queue\n"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap send queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, + qp->sq.queue->buf, qp->sq.queue->buf_size, + &qp->sq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d\n", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + return 0; + +err_free: + vfree(qp->sq.queue->buf); + kfree(qp->sq.queue); + qp->sq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->req_pkts); err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) return err; + rxe_reclassify_send_socket(qp->sk); qp->sk->sk->sk_user_data = qp; /* pick a source UDP port number for this QP based on @@ -204,32 +303,10 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, * (0xc000 - 0xffff). */ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff); - qp->sq.max_wr = init->cap.max_send_wr; - /* These caps are limited by rxe_qp_chk_cap() done by the caller */ - wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), - init->cap.max_inline_data); - qp->sq.max_sge = init->cap.max_send_sge = - wqe_size / sizeof(struct ib_sge); - qp->sq.max_inline = init->cap.max_inline_data = wqe_size; - wqe_size += sizeof(struct rxe_send_wqe); - - type = QUEUE_TYPE_FROM_CLIENT; - qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, - wqe_size, type); - if (!qp->sq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, - qp->sq.queue->buf, qp->sq.queue->buf_size, - &qp->sq.queue->ip); - - if (err) { - vfree(qp->sq.queue->buf); - kfree(qp->sq.queue); - qp->sq.queue = NULL; + err = rxe_init_sq(qp, init, udata, uresp); + if (err) return err; - } qp->req.wqe_index = queue_get_producer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); @@ -237,8 +314,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->req.opcode = -1; qp->comp.opcode = -1; - rxe_init_task(&qp->req.task, qp, rxe_requester); - rxe_init_task(&qp->comp.task, qp, rxe_completer); + rxe_init_task(&qp->send_task, qp, rxe_sender); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { @@ -248,39 +324,68 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return 0; } +static int rxe_init_rq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + wqe_size = sizeof(struct rxe_recv_wqe) + + qp->rq.max_sge*sizeof(struct ib_sge); + + qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->rq.queue) { + rxe_err_qp(qp, "Unable to allocate recv queue\n"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap recv queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, + qp->rq.queue->buf, qp->rq.queue->buf_size, + &qp->rq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d\n", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_recv_wr = qp->rq.max_wr; + + return 0; + +err_free: + vfree(qp->rq.queue->buf); + kfree(qp->rq.queue); + qp->rq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->resp_pkts); if (!qp->srq) { - qp->rq.max_wr = init->cap.max_recv_wr; - qp->rq.max_sge = init->cap.max_recv_sge; - - wqe_size = rcv_wqe_size(qp->rq.max_sge); - - type = QUEUE_TYPE_FROM_CLIENT; - qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, - wqe_size, type); - if (!qp->rq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, - qp->rq.queue->buf, qp->rq.queue->buf_size, - &qp->rq.queue->ip); - if (err) { - vfree(qp->rq.queue->buf); - kfree(qp->rq.queue); - qp->rq.queue = NULL; + err = rxe_init_rq(qp, init, udata, uresp); + if (err) return err; - } } - rxe_init_task(&qp->resp.task, qp, rxe_responder); + rxe_init_task(&qp->recv_task, qp, rxe_receiver); qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; @@ -307,10 +412,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, if (srq) rxe_get(srq); - qp->pd = pd; - qp->rcq = rcq; - qp->scq = scq; - qp->srq = srq; + qp->pd = pd; + qp->rcq = rcq; + qp->scq = scq; + qp->srq = srq; atomic_inc(&rcq->num_wq); atomic_inc(&scq->num_wq); @@ -457,14 +562,12 @@ err1: static void rxe_qp_reset(struct rxe_qp *qp) { /* stop tasks from running */ - rxe_disable_task(&qp->resp.task); - rxe_disable_task(&qp->comp.task); - rxe_disable_task(&qp->req.task); + rxe_disable_task(&qp->recv_task); + rxe_disable_task(&qp->send_task); /* drain work and packet queuesc */ - rxe_requester(qp); - rxe_completer(qp); - rxe_responder(qp); + rxe_sender(qp); + rxe_receiver(qp); if (qp->rq.queue) rxe_queue_reset(qp->rq.queue); @@ -491,9 +594,8 @@ static void rxe_qp_reset(struct rxe_qp *qp) cleanup_rd_atomic_resources(qp); /* reenable tasks */ - rxe_enable_task(&qp->resp.task); - rxe_enable_task(&qp->comp.task); - rxe_enable_task(&qp->req.task); + rxe_enable_task(&qp->recv_task); + rxe_enable_task(&qp->send_task); } /* move the qp to the error state */ @@ -505,9 +607,8 @@ void rxe_qp_error(struct rxe_qp *qp) qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ - rxe_sched_task(&qp->resp.task); - rxe_sched_task(&qp->comp.task); - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->recv_task); + rxe_sched_task(&qp->send_task); spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -518,8 +619,7 @@ static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr, spin_lock_irqsave(&qp->state_lock, flags); qp->attr.sq_draining = 1; - rxe_sched_task(&qp->comp.task); - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->send_task); spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -724,6 +824,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) * Yield the processor */ spin_lock_irqsave(&qp->state_lock, flags); + attr->cur_qp_state = qp_state(qp); if (qp->attr.sq_draining) { spin_unlock_irqrestore(&qp->state_lock, flags); cond_resched(); @@ -759,24 +860,25 @@ static void rxe_qp_do_cleanup(struct work_struct *work) spin_unlock_irqrestore(&qp->state_lock, flags); qp->qp_timeout_jiffies = 0; - if (qp_type(qp) == IB_QPT_RC) { - del_timer_sync(&qp->retrans_timer); - del_timer_sync(&qp->rnr_nak_timer); + /* In the function timer_setup, .function is initialized. If .function + * is NULL, it indicates the function timer_setup is not called, the + * timer is not initialized. Or else, the timer is initialized. + */ + if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function && + qp->rnr_nak_timer.function) { + timer_delete_sync(&qp->retrans_timer); + timer_delete_sync(&qp->rnr_nak_timer); } - if (qp->resp.task.func) - rxe_cleanup_task(&qp->resp.task); - - if (qp->req.task.func) - rxe_cleanup_task(&qp->req.task); + if (qp->recv_task.func) + rxe_cleanup_task(&qp->recv_task); - if (qp->comp.task.func) - rxe_cleanup_task(&qp->comp.task); + if (qp->send_task.func) + rxe_cleanup_task(&qp->send_task); /* flush out any receive wr's or pending requests */ - rxe_requester(qp); - rxe_completer(qp); - rxe_responder(qp); + rxe_sender(qp); + rxe_receiver(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); |
