diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_qp.c')
| -rw-r--r-- | drivers/infiniband/sw/rxe/rxe_qp.c | 696 |
1 files changed, 387 insertions, 309 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 1ab6af7ddb25..845bdd03ca28 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -15,38 +15,86 @@ #include "rxe_queue.h" #include "rxe_task.h" +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * lockdep can detect false positive circular dependencies + * when there are user-space socket API users or in kernel + * users switching between a tcp and rdma transport. + * Maybe also switching between siw and rxe may cause + * problems as per default sockets are only classified + * by family and not by ip protocol. And there might + * be different locks used between the application + * and the low level sockets. + * + * Problems were seen with ksmbd.ko and cifs.ko, + * switching transports, use git blame to find + * more details. + */ +static struct lock_class_key rxe_send_sk_key[2]; +static struct lock_class_key rxe_send_slock_key[2]; +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ + +static inline void rxe_reclassify_send_socket(struct socket *sock) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, + "slock-AF_INET-RDMA-RXE-SEND", + &rxe_send_slock_key[0], + "sk_lock-AF_INET-RDMA-RXE-SEND", + &rxe_send_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, + "slock-AF_INET6-RDMA-RXE-SEND", + &rxe_send_slock_key[1], + "sk_lock-AF_INET6-RDMA-RXE-SEND", + &rxe_send_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ +} + static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid send wr = %d > %d\n", - cap->max_send_wr, rxe->attr.max_qp_wr); + rxe_dbg_dev(rxe, "invalid send wr = %u > %d\n", + cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_warn("invalid send sge = %d > %d\n", - cap->max_send_sge, rxe->attr.max_send_sge); + rxe_dbg_dev(rxe, "invalid send sge = %u > %d\n", + cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid recv wr = %d > %d\n", - cap->max_recv_wr, rxe->attr.max_qp_wr); + rxe_dbg_dev(rxe, "invalid recv wr = %u > %d\n", + cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_warn("invalid recv sge = %d > %d\n", - cap->max_recv_sge, rxe->attr.max_recv_sge); + rxe_dbg_dev(rxe, "invalid recv sge = %u > %d\n", + cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_warn("invalid max inline data = %d > %d\n", - cap->max_inline_data, rxe->max_inline_data); + rxe_dbg_dev(rxe, "invalid max inline data = %u > %d\n", + cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -63,7 +111,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) int port_num = init->port_num; switch (init->qp_type) { - case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_RC: case IB_QPT_UC: @@ -74,28 +121,23 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) } if (!init->recv_cq || !init->send_cq) { - pr_warn("missing cq\n"); + rxe_dbg_dev(rxe, "missing cq\n"); goto err1; } if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) goto err1; - if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { + if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - pr_warn("invalid port = %d\n", port_num); + rxe_dbg_dev(rxe, "invalid port = %d\n", port_num); goto err1; } port = &rxe->port; - if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { - pr_warn("SMI QP exists for port %d\n", port_num); - goto err1; - } - if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - pr_warn("GSI QP exists for port %d\n", port_num); + rxe_dbg_dev(rxe, "GSI QP exists for port %d\n", port_num); goto err1; } } @@ -126,21 +168,15 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { struct resp_res *res = &qp->resp.resources[i]; - free_rd_atomic_resource(qp, res); + free_rd_atomic_resource(res); } kfree(qp->resp.resources); qp->resp.resources = NULL; } } -void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) +void free_rd_atomic_resource(struct resp_res *res) { - if (res->type == RXE_ATOMIC_MASK) { - kfree_skb(res->atomic.skb); - } else if (res->type == RXE_READ_MASK) { - if (res->read.mr) - rxe_drop_ref(res->read.mr); - } res->type = 0; } @@ -152,7 +188,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp) if (qp->resp.resources) { for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { res = &qp->resp.resources[i]; - free_rd_atomic_resource(qp, res); + free_rd_atomic_resource(res); } } } @@ -167,16 +203,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, qp->attr.path_mtu = 1; qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu); - qpn = qp->pelem.index; + qpn = qp->elem.index; port = &rxe->port; switch (init->qp_type) { - case IB_QPT_SMI: - qp->ibqp.qp_num = 0; - port->qp_smi_index = qpn; - qp->attr.port_num = init->port_num; - break; - case IB_QPT_GSI: qp->ibqp.qp_num = 1; port->qp_gsi_index = qpn; @@ -188,28 +218,81 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, break; } - INIT_LIST_HEAD(&qp->grp_list); + spin_lock_init(&qp->state_lock); - skb_queue_head_init(&qp->send_pkts); + spin_lock_init(&qp->sq.sq_lock); + spin_lock_init(&qp->rq.producer_lock); + spin_lock_init(&qp->rq.consumer_lock); - spin_lock_init(&qp->grp_lock); - spin_lock_init(&qp->state_lock); + skb_queue_head_init(&qp->req_pkts); + skb_queue_head_init(&qp->resp_pkts); atomic_set(&qp->ssn, 0); atomic_set(&qp->skb_out, 0); } +static int rxe_init_sq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->sq.max_wr = init->cap.max_send_wr; + wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), + init->cap.max_inline_data); + qp->sq.max_sge = wqe_size / sizeof(struct ib_sge); + qp->sq.max_inline = wqe_size; + wqe_size += sizeof(struct rxe_send_wqe); + + qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->sq.queue) { + rxe_err_qp(qp, "Unable to allocate send queue\n"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap send queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, + qp->sq.queue->buf, qp->sq.queue->buf_size, + &qp->sq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d\n", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + return 0; + +err_free: + vfree(qp->sq.queue->buf); + kfree(qp->sq.queue); + qp->sq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->req_pkts); err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) return err; + rxe_reclassify_send_socket(qp->sk); qp->sk->sk->sk_user_data = qp; /* pick a source UDP port number for this QP based on @@ -219,53 +302,19 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, * the port number must be in the Dynamic Ports range * (0xc000 - 0xffff). */ - qp->src_port = RXE_ROCE_V2_SPORT + - (hash_32_generic(qp_num(qp), 14) & 0x3fff); - qp->sq.max_wr = init->cap.max_send_wr; - - /* These caps are limited by rxe_qp_chk_cap() done by the caller */ - wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), - init->cap.max_inline_data); - qp->sq.max_sge = init->cap.max_send_sge = - wqe_size / sizeof(struct ib_sge); - qp->sq.max_inline = init->cap.max_inline_data = wqe_size; - wqe_size += sizeof(struct rxe_send_wqe); + qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff); - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; - qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, - wqe_size, type); - if (!qp->sq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, - qp->sq.queue->buf, qp->sq.queue->buf_size, - &qp->sq.queue->ip); - - if (err) { - vfree(qp->sq.queue->buf); - kfree(qp->sq.queue); - qp->sq.queue = NULL; + err = rxe_init_sq(qp, init, udata, uresp); + if (err) return err; - } - if (qp->is_user) - qp->req.wqe_index = producer_index(qp->sq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->req.wqe_index = producer_index(qp->sq.queue, - QUEUE_TYPE_KERNEL); + qp->req.wqe_index = queue_get_producer(qp->sq.queue, + QUEUE_TYPE_FROM_CLIENT); - qp->req.state = QP_STATE_RESET; qp->req.opcode = -1; qp->comp.opcode = -1; - spin_lock_init(&qp->sq.sq_lock); - skb_queue_head_init(&qp->req_pkts); - - rxe_init_task(rxe, &qp->req.task, qp, - rxe_requester, "req"); - rxe_init_task(rxe, &qp->comp.task, qp, - rxe_completer, "comp"); + rxe_init_task(&qp->send_task, qp, rxe_sender); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { @@ -275,54 +324,71 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return 0; } +static int rxe_init_rq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + wqe_size = sizeof(struct rxe_recv_wqe) + + qp->rq.max_sge*sizeof(struct ib_sge); + + qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->rq.queue) { + rxe_err_qp(qp, "Unable to allocate recv queue\n"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap recv queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, + qp->rq.queue->buf, qp->rq.queue->buf_size, + &qp->rq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d\n", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_recv_wr = qp->rq.max_wr; + + return 0; + +err_free: + vfree(qp->rq.queue->buf); + kfree(qp->rq.queue); + qp->rq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->resp_pkts); if (!qp->srq) { - qp->rq.max_wr = init->cap.max_recv_wr; - qp->rq.max_sge = init->cap.max_recv_sge; - - wqe_size = rcv_wqe_size(qp->rq.max_sge); - - pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", - qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); - - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; - qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, - wqe_size, type); - if (!qp->rq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, - qp->rq.queue->buf, qp->rq.queue->buf_size, - &qp->rq.queue->ip); - if (err) { - vfree(qp->rq.queue->buf); - kfree(qp->rq.queue); - qp->rq.queue = NULL; + err = rxe_init_rq(qp, init, udata, uresp); + if (err) return err; - } } - spin_lock_init(&qp->rq.producer_lock); - spin_lock_init(&qp->rq.consumer_lock); - - qp->rq.is_user = qp->is_user; - - skb_queue_head_init(&qp->resp_pkts); - - rxe_init_task(rxe, &qp->resp.task, qp, - rxe_responder, "resp"); + rxe_init_task(&qp->recv_task, qp, rxe_receiver); qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; - qp->resp.state = QP_STATE_RESET; return 0; } @@ -338,17 +404,21 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; + unsigned long flags; - rxe_add_ref(pd); - rxe_add_ref(rcq); - rxe_add_ref(scq); + rxe_get(pd); + rxe_get(rcq); + rxe_get(scq); if (srq) - rxe_add_ref(srq); + rxe_get(srq); + + qp->pd = pd; + qp->rcq = rcq; + qp->scq = scq; + qp->srq = srq; - qp->pd = pd; - qp->rcq = rcq; - qp->scq = scq; - qp->srq = srq; + atomic_inc(&rcq->num_wq); + atomic_inc(&scq->num_wq); rxe_qp_init_misc(rxe, qp, init); @@ -360,24 +430,30 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, if (err) goto err2; + spin_lock_irqsave(&qp->state_lock, flags); qp->attr.qp_state = IB_QPS_RESET; qp->valid = 1; + spin_unlock_irqrestore(&qp->state_lock, flags); return 0; err2: rxe_queue_cleanup(qp->sq.queue); + qp->sq.queue = NULL; err1: + atomic_dec(&rcq->num_wq); + atomic_dec(&scq->num_wq); + qp->pd = NULL; qp->rcq = NULL; qp->scq = NULL; qp->srq = NULL; if (srq) - rxe_drop_ref(srq); - rxe_drop_ref(scq); - rxe_drop_ref(rcq); - rxe_drop_ref(pd); + rxe_put(srq); + rxe_put(scq); + rxe_put(rcq); + rxe_put(pd); return err; } @@ -408,33 +484,12 @@ int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init) return 0; } -/* called by the modify qp verb, this routine checks all the parameters before - * making any changes - */ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) { - enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? - attr->cur_qp_state : qp->attr.qp_state; - enum ib_qp_state new_state = (mask & IB_QP_STATE) ? - attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - pr_warn("invalid mask or state for qp\n"); - goto err1; - } - - if (mask & IB_QP_STATE) { - if (cur_state == IB_QPS_SQD) { - if (qp->req.state == QP_STATE_DRAIN && - new_state != IB_QPS_ERR) - goto err1; - } - } - if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { - pr_warn("invalid port %d\n", attr->port_num); + rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num); goto err1; } } @@ -442,19 +497,26 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) goto err1; - if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) + if (mask & IB_QP_ACCESS_FLAGS) { + if (!(qp_type(qp) == IB_QPT_RC || qp_type(qp) == IB_QPT_UC)) + goto err1; + if (attr->qp_access_flags & ~RXE_ACCESS_SUPPORTED_QP) + goto err1; + } + + if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr)) goto err1; if (mask & IB_QP_ALT_PATH) { - if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) + if (rxe_av_chk_attr(qp, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { - pr_warn("invalid alt port %d\n", attr->alt_port_num); + rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { - pr_warn("invalid QP alt timeout %d > 31\n", - attr->alt_timeout); + rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n", + attr->alt_timeout); goto err1; } } @@ -466,7 +528,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, enum ib_mtu mtu = attr->path_mtu; if (mtu > max_mtu) { - pr_debug("invalid mtu (%d) > (%d)\n", + rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n", ib_mtu_enum_to_int(mtu), ib_mtu_enum_to_int(max_mtu)); goto err1; @@ -475,17 +537,17 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { - pr_warn("invalid max_rd_atomic %d > %d\n", - attr->max_rd_atomic, - rxe->attr.max_qp_rd_atom); + rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n", + attr->max_rd_atomic, + rxe->attr.max_qp_rd_atom); goto err1; } } if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { - pr_warn("invalid QP timeout %d > 31\n", - attr->timeout); + rxe_dbg_qp(qp, "invalid timeout %d > 31\n", + attr->timeout); goto err1; } } @@ -500,34 +562,23 @@ err1: static void rxe_qp_reset(struct rxe_qp *qp) { /* stop tasks from running */ - rxe_disable_task(&qp->resp.task); - - /* stop request/comp */ - if (qp->sq.queue) { - if (qp_type(qp) == IB_QPT_RC) - rxe_disable_task(&qp->comp.task); - rxe_disable_task(&qp->req.task); - } + rxe_disable_task(&qp->recv_task); + rxe_disable_task(&qp->send_task); - /* move qp to the reset state */ - qp->req.state = QP_STATE_RESET; - qp->resp.state = QP_STATE_RESET; + /* drain work and packet queuesc */ + rxe_sender(qp); + rxe_receiver(qp); - /* let state machines reset themselves drain work and packet queues - * etc. - */ - __rxe_do_task(&qp->resp.task); - - if (qp->sq.queue) { - __rxe_do_task(&qp->comp.task); - __rxe_do_task(&qp->req.task); + if (qp->rq.queue) + rxe_queue_reset(qp->rq.queue); + if (qp->sq.queue) rxe_queue_reset(qp->sq.queue); - } /* cleanup attributes */ atomic_set(&qp->ssn, 0); qp->req.opcode = -1; qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; qp->req.noack_pkts = 0; qp->resp.msn = 0; qp->resp.opcode = -1; @@ -536,61 +587,114 @@ static void rxe_qp_reset(struct rxe_qp *qp) qp->resp.sent_psn_nak = 0; if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } cleanup_rd_atomic_resources(qp); /* reenable tasks */ - rxe_enable_task(&qp->resp.task); + rxe_enable_task(&qp->recv_task); + rxe_enable_task(&qp->send_task); +} - if (qp->sq.queue) { - if (qp_type(qp) == IB_QPT_RC) - rxe_enable_task(&qp->comp.task); +/* move the qp to the error state */ +void rxe_qp_error(struct rxe_qp *qp) +{ + unsigned long flags; - rxe_enable_task(&qp->req.task); - } + spin_lock_irqsave(&qp->state_lock, flags); + qp->attr.qp_state = IB_QPS_ERR; + + /* drain work and packet queues */ + rxe_sched_task(&qp->recv_task); + rxe_sched_task(&qp->send_task); + spin_unlock_irqrestore(&qp->state_lock, flags); } -/* drain the send queue */ -static void rxe_qp_drain(struct rxe_qp *qp) +static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr, + int mask) { - if (qp->sq.queue) { - if (qp->req.state != QP_STATE_DRAINED) { - qp->req.state = QP_STATE_DRAIN; - if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); - else - __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); - } - } + unsigned long flags; + + spin_lock_irqsave(&qp->state_lock, flags); + qp->attr.sq_draining = 1; + rxe_sched_task(&qp->send_task); + spin_unlock_irqrestore(&qp->state_lock, flags); } -/* move the qp to the error state */ -void rxe_qp_error(struct rxe_qp *qp) +/* caller should hold qp->state_lock */ +static int __qp_chk_state(struct rxe_qp *qp, struct ib_qp_attr *attr, + int mask) { - qp->req.state = QP_STATE_ERROR; - qp->resp.state = QP_STATE_ERROR; - qp->attr.qp_state = IB_QPS_ERR; + enum ib_qp_state cur_state; + enum ib_qp_state new_state; - /* drain work and packet queues */ - rxe_run_task(&qp->resp.task, 1); + cur_state = (mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : qp->attr.qp_state; + new_state = (mask & IB_QP_STATE) ? + attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) + return -EINVAL; + + if (mask & IB_QP_STATE && cur_state == IB_QPS_SQD) { + if (qp->attr.sq_draining && new_state != IB_QPS_ERR) + return -EINVAL; + } - if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); - else - __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); + return 0; } +static const char *const qps2str[] = { + [IB_QPS_RESET] = "RESET", + [IB_QPS_INIT] = "INIT", + [IB_QPS_RTR] = "RTR", + [IB_QPS_RTS] = "RTS", + [IB_QPS_SQD] = "SQD", + [IB_QPS_SQE] = "SQE", + [IB_QPS_ERR] = "ERR", +}; + /* called by the modify qp verb */ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata) { int err; + if (mask & IB_QP_CUR_STATE) + qp->attr.cur_qp_state = attr->qp_state; + + if (mask & IB_QP_STATE) { + unsigned long flags; + + spin_lock_irqsave(&qp->state_lock, flags); + err = __qp_chk_state(qp, attr, mask); + if (!err) { + qp->attr.qp_state = attr->qp_state; + rxe_dbg_qp(qp, "state -> %s\n", + qps2str[attr->qp_state]); + } + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (err) + return err; + + switch (attr->qp_state) { + case IB_QPS_RESET: + rxe_qp_reset(qp); + break; + case IB_QPS_SQD: + rxe_qp_sqd(qp, attr, mask); + break; + case IB_QPS_ERR: + rxe_qp_error(qp); + break; + default: + break; + } + } + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = attr->max_rd_atomic ? roundup_pow_of_two(attr->max_rd_atomic) : 0; @@ -612,9 +716,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, return err; } - if (mask & IB_QP_CUR_STATE) - qp->attr.cur_qp_state = attr->qp_state; - if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY) qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify; @@ -660,27 +761,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_RETRY_CNT) { qp->attr.retry_cnt = attr->retry_cnt; qp->comp.retry_cnt = attr->retry_cnt; - pr_debug("qp#%d set retry count = %d\n", qp_num(qp), - attr->retry_cnt); + rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt); } if (mask & IB_QP_RNR_RETRY) { qp->attr.rnr_retry = attr->rnr_retry; qp->comp.rnr_retry = attr->rnr_retry; - pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), - attr->rnr_retry); + rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry); } if (mask & IB_QP_RQ_PSN) { qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); qp->resp.psn = qp->attr.rq_psn; - pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), - qp->resp.psn); + rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn); } if (mask & IB_QP_MIN_RNR_TIMER) { qp->attr.min_rnr_timer = attr->min_rnr_timer; - pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), + rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n", attr->min_rnr_timer); } @@ -688,7 +786,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); qp->req.psn = qp->attr.sq_psn; qp->comp.psn = qp->attr.sq_psn; - pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); + rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn); } if (mask & IB_QP_PATH_MIG_STATE) @@ -697,54 +795,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_DEST_QPN) qp->attr.dest_qp_num = attr->dest_qp_num; - if (mask & IB_QP_STATE) { - qp->attr.qp_state = attr->qp_state; - - switch (attr->qp_state) { - case IB_QPS_RESET: - pr_debug("qp#%d state -> RESET\n", qp_num(qp)); - rxe_qp_reset(qp); - break; - - case IB_QPS_INIT: - pr_debug("qp#%d state -> INIT\n", qp_num(qp)); - qp->req.state = QP_STATE_INIT; - qp->resp.state = QP_STATE_INIT; - break; - - case IB_QPS_RTR: - pr_debug("qp#%d state -> RTR\n", qp_num(qp)); - qp->resp.state = QP_STATE_READY; - break; - - case IB_QPS_RTS: - pr_debug("qp#%d state -> RTS\n", qp_num(qp)); - qp->req.state = QP_STATE_READY; - break; - - case IB_QPS_SQD: - pr_debug("qp#%d state -> SQD\n", qp_num(qp)); - rxe_qp_drain(qp); - break; - - case IB_QPS_SQE: - pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); - /* Not possible from modify_qp. */ - break; - - case IB_QPS_ERR: - pr_debug("qp#%d state -> ERR\n", qp_num(qp)); - rxe_qp_error(qp); - break; - } - } - return 0; } /* called by the query qp verb */ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) { + unsigned long flags; + *attr = qp->attr; attr->rq_psn = qp->resp.psn; @@ -762,86 +820,106 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) rxe_av_to_attr(&qp->pri_av, &attr->ah_attr); rxe_av_to_attr(&qp->alt_av, &attr->alt_ah_attr); - if (qp->req.state == QP_STATE_DRAIN) { - attr->sq_draining = 1; - /* applications that get this state - * typically spin on it. yield the - * processor - */ + /* Applications that get this state typically spin on it. + * Yield the processor + */ + spin_lock_irqsave(&qp->state_lock, flags); + attr->cur_qp_state = qp_state(qp); + if (qp->attr.sq_draining) { + spin_unlock_irqrestore(&qp->state_lock, flags); cond_resched(); } else { - attr->sq_draining = 0; + spin_unlock_irqrestore(&qp->state_lock, flags); } - pr_debug("attr->sq_draining = %d\n", attr->sq_draining); - return 0; } -/* called by the destroy qp verb */ -void rxe_qp_destroy(struct rxe_qp *qp) +int rxe_qp_chk_destroy(struct rxe_qp *qp) { - qp->valid = 0; - qp->qp_timeout_jiffies = 0; - rxe_cleanup_task(&qp->resp.task); - - if (qp_type(qp) == IB_QPT_RC) { - del_timer_sync(&qp->retrans_timer); - del_timer_sync(&qp->rnr_nak_timer); + /* See IBA o10-2.2.3 + * An attempt to destroy a QP while attached to a mcast group + * will fail immediately. + */ + if (atomic_read(&qp->mcg_num)) { + rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n"); + return -EBUSY; } - rxe_cleanup_task(&qp->req.task); - rxe_cleanup_task(&qp->comp.task); - - /* flush out any receive wr's or pending requests */ - __rxe_do_task(&qp->req.task); - if (qp->sq.queue) { - __rxe_do_task(&qp->comp.task); - __rxe_do_task(&qp->req.task); - } + return 0; } /* called when the last reference to the qp is dropped */ static void rxe_qp_do_cleanup(struct work_struct *work) { struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); + unsigned long flags; - rxe_drop_all_mcast_groups(qp); + spin_lock_irqsave(&qp->state_lock, flags); + qp->valid = 0; + spin_unlock_irqrestore(&qp->state_lock, flags); + qp->qp_timeout_jiffies = 0; + + /* In the function timer_setup, .function is initialized. If .function + * is NULL, it indicates the function timer_setup is not called, the + * timer is not initialized. Or else, the timer is initialized. + */ + if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function && + qp->rnr_nak_timer.function) { + timer_delete_sync(&qp->retrans_timer); + timer_delete_sync(&qp->rnr_nak_timer); + } + + if (qp->recv_task.func) + rxe_cleanup_task(&qp->recv_task); + + if (qp->send_task.func) + rxe_cleanup_task(&qp->send_task); + + /* flush out any receive wr's or pending requests */ + rxe_sender(qp); + rxe_receiver(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); if (qp->srq) - rxe_drop_ref(qp->srq); + rxe_put(qp->srq); if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); - if (qp->scq) - rxe_drop_ref(qp->scq); - if (qp->rcq) - rxe_drop_ref(qp->rcq); - if (qp->pd) - rxe_drop_ref(qp->pd); + if (qp->scq) { + atomic_dec(&qp->scq->num_wq); + rxe_put(qp->scq); + } - if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); - qp->resp.mr = NULL; + if (qp->rcq) { + atomic_dec(&qp->rcq->num_wq); + rxe_put(qp->rcq); } - if (qp_type(qp) == IB_QPT_RC) - sk_dst_reset(qp->sk->sk); + if (qp->pd) + rxe_put(qp->pd); + + if (qp->resp.mr) + rxe_put(qp->resp.mr); free_rd_atomic_resources(qp); - kernel_sock_shutdown(qp->sk, SHUT_RDWR); - sock_release(qp->sk); + if (qp->sk) { + if (qp_type(qp) == IB_QPT_RC) + sk_dst_reset(qp->sk->sk); + + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); + } } /* called when the last reference to the qp is dropped */ -void rxe_qp_cleanup(struct rxe_pool_entry *arg) +void rxe_qp_cleanup(struct rxe_pool_elem *elem) { - struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem); + struct rxe_qp *qp = container_of(elem, typeof(*qp), elem); execute_in_process_context(rxe_qp_do_cleanup, &qp->cleanup_work); } |
