diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-29 17:21:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-29 17:21:24 -0700 |
commit | af3877265dd88d7e333f94fb37bc09554544adca (patch) | |
tree | 3f84b1a5d1e052039f510cece55f1c06e580ea8b /drivers/infiniband/sw | |
parent | 1ae78a14516b9372e4c90a89ac21b259339a3a3a (diff) | |
parent | 531094dc7164718d28ebb581d729807d7e846363 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Usual wide collection of unrelated items in drivers:
- Driver bug fixes and treewide cleanups in hfi1, siw, qib, mlx5,
rxe, usnic, usnic, bnxt_re, ocrdma, iser:
- remove unnecessary NULL checks
- kmap obsolescence
- pci_enable_pcie_error_reporting() obsolescence
- unused variables and macros
- trace event related warnings
- casting warnings
- Code cleanups for irdm and erdma
- EFA reporting of 128 byte PCIe TLP support
- mlx5 more agressively uses the out of order HW feature
- Big rework of how state machines and tasks work in rxe
- Fix a syzkaller found crash netdev refcount leak in siw
- bnxt_re revises their HW description header
- Congestion control for bnxt_re
- Use mmu_notifiers more safely in hfi1
- mlx5 gets better support for PCIe relaxed ordering inside VMs"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (81 commits)
RDMA/efa: Add rdma write capability to device caps
RDMA/mlx5: Use correct device num_ports when modify DC
RDMA/irdma: Drop spurious WQ_UNBOUND from alloc_ordered_workqueue() call
RDMA/rxe: Fix spinlock recursion deadlock on requester
RDMA/mlx5: Fix flow counter query via DEVX
RDMA/rxe: Protect QP state with qp->state_lock
RDMA/rxe: Move code to check if drained to subroutine
RDMA/rxe: Remove qp->req.state
RDMA/rxe: Remove qp->comp.state
RDMA/rxe: Remove qp->resp.state
RDMA/mlx5: Allow relaxed ordering read in VFs and VMs
net/mlx5: Update relaxed ordering read HCA capabilities
RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR
RDMA/mlx5: Remove pcie_relaxed_ordering_enabled() check for RO write
RDMA: Add ib_virt_dma_to_page()
RDMA/rxe: Fix the error "trying to register non-static key in rxe_cleanup_task"
RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()
RDMA/rxe: Fix incorrect TASKLET_STATE_SCHED check in rxe_task.c
IB/hfi1: Place struct mmu_rb_handler on cache line start
IB/hfi1: Fix bugs with non-PAGE_SIZE-end multi-iovec user SDMA requests
...
Diffstat (limited to 'drivers/infiniband/sw')
24 files changed, 1369 insertions, 793 deletions
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 9b4c0389d2c0..dc83d0ac6a38 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -464,8 +464,6 @@ void rvt_qp_exit(struct rvt_dev_info *rdi) if (qps_inuse) rvt_pr_err(rdi, "QP memory leak! %u still in use\n", qps_inuse); - if (!rdi->qp_dev) - return; kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); @@ -2040,7 +2038,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, wqe = rvt_get_swqe_ptr(qp, qp->s_head); /* cplen has length from above */ - memcpy(&wqe->wr, wr, cplen); + memcpy(&wqe->ud_wr, wr, cplen); wqe->length = 0; j = 0; diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 136c2efe3466..7a7e713de52d 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -160,6 +160,8 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) port->attr.active_mtu = mtu; port->mtu_cap = ib_mtu_enum_to_int(mtu); + + rxe_info_dev(rxe, "Set mtu to %d", port->mtu_cap); } /* called by ifc layer to create new rxe device. @@ -175,26 +177,26 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) { - struct rxe_dev *exists; + struct rxe_dev *rxe; int err = 0; if (is_vlan_dev(ndev)) { - pr_err("rxe creation allowed on top of a real device only\n"); + rxe_err("rxe creation allowed on top of a real device only"); err = -EPERM; goto err; } - exists = rxe_get_dev_from_net(ndev); - if (exists) { - ib_device_put(&exists->ib_dev); - rxe_dbg(exists, "already configured on %s\n", ndev->name); + rxe = rxe_get_dev_from_net(ndev); + if (rxe) { + ib_device_put(&rxe->ib_dev); + rxe_err_dev(rxe, "already configured on %s", ndev->name); err = -EEXIST; goto err; } err = rxe_net_add(ibdev_name, ndev); if (err) { - rxe_dbg(exists, "failed to add %s\n", ndev->name); + rxe_err("failed to add %s\n", ndev->name); goto err; } err: diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 2415f3704f57..d33dd6cf83d3 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -38,7 +38,8 @@ #define RXE_ROCE_V2_SPORT (0xc000) -#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \ +#define rxe_dbg(fmt, ...) pr_debug("%s: " fmt "\n", __func__, ##__VA_ARGS__) +#define rxe_dbg_dev(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \ "%s: " fmt, __func__, ##__VA_ARGS__) #define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \ "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__) @@ -57,6 +58,48 @@ #define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \ "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err(fmt, ...) pr_err_ratelimited("%s: " fmt "\n", __func__, \ + ##__VA_ARGS__) +#define rxe_err_dev(rxe, fmt, ...) ibdev_err_ratelimited(&(rxe)->ib_dev, \ + "%s: " fmt, __func__, ##__VA_ARGS__) +#define rxe_err_uc(uc, fmt, ...) ibdev_err_ratelimited((uc)->ibuc.device, \ + "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err_pd(pd, fmt, ...) ibdev_err_ratelimited((pd)->ibpd.device, \ + "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err_ah(ah, fmt, ...) ibdev_err_ratelimited((ah)->ibah.device, \ + "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err_srq(srq, fmt, ...) ibdev_err_ratelimited((srq)->ibsrq.device, \ + "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err_qp(qp, fmt, ...) ibdev_err_ratelimited((qp)->ibqp.device, \ + "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err_cq(cq, fmt, ...) ibdev_err_ratelimited((cq)->ibcq.device, \ + "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err_mr(mr, fmt, ...) ibdev_err_ratelimited((mr)->ibmr.device, \ + "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_err_mw(mw, fmt, ...) ibdev_err_ratelimited((mw)->ibmw.device, \ + "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__) + +#define rxe_info(fmt, ...) pr_info_ratelimited("%s: " fmt "\n", __func__, \ + ##__VA_ARGS__) +#define rxe_info_dev(rxe, fmt, ...) ibdev_info_ratelimited(&(rxe)->ib_dev, \ + "%s: " fmt, __func__, ##__VA_ARGS__) +#define rxe_info_uc(uc, fmt, ...) ibdev_info_ratelimited((uc)->ibuc.device, \ + "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_info_pd(pd, fmt, ...) ibdev_info_ratelimited((pd)->ibpd.device, \ + "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_info_ah(ah, fmt, ...) ibdev_info_ratelimited((ah)->ibah.device, \ + "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_info_srq(srq, fmt, ...) ibdev_info_ratelimited((srq)->ibsrq.device, \ + "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_info_qp(qp, fmt, ...) ibdev_info_ratelimited((qp)->ibqp.device, \ + "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_info_cq(cq, fmt, ...) ibdev_info_ratelimited((cq)->ibcq.device, \ + "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_info_mr(mr, fmt, ...) ibdev_info_ratelimited((mr)->ibmr.device, \ + "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_info_mw(mw, fmt, ...) ibdev_info_ratelimited((mw)->ibmw.device, \ + "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__) + /* responder states */ enum resp_states { RESPST_NONE, @@ -90,7 +133,6 @@ enum resp_states { RESPST_ERR_LENGTH, RESPST_ERR_CQ_OVERFLOW, RESPST_ERROR, - RESPST_RESET, RESPST_DONE, RESPST_EXIT, }; diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 20737fec392b..db18ace74d2b 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -118,10 +118,12 @@ void retransmit_timer(struct timer_list *t) rxe_dbg_qp(qp, "retransmit timer fired\n"); + spin_lock_bh(&qp->state_lock); if (qp->valid) { qp->comp.timeout = 1; rxe_sched_task(&qp->comp.task); } + spin_unlock_bh(&qp->state_lock); } void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) @@ -322,7 +324,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task); + rxe_sched_task(&qp->req.task); } } return COMPST_ERROR_RETRY; @@ -428,6 +430,10 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, uwc->wc_flags = IB_WC_WITH_IMM; uwc->byte_len = wqe->dma.length; } + } else { + if (wqe->status != IB_WC_WR_FLUSH_ERR) + rxe_err_qp(qp, "non-flush error status = %d", + wqe->status); } } @@ -469,30 +475,16 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task); + rxe_sched_task(&qp->req.task); } } -static inline enum comp_state complete_ack(struct rxe_qp *qp, - struct rxe_pkt_info *pkt, - struct rxe_send_wqe *wqe) +static void comp_check_sq_drain_done(struct rxe_qp *qp) { - if (wqe->has_rd_atomic) { - wqe->has_rd_atomic = 0; - atomic_inc(&qp->req.rd_atomic); - if (qp->req.need_rd_atomic) { - qp->comp.timeout_retry = 0; - qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task); - } - } - - if (unlikely(qp->req.state == QP_STATE_DRAIN)) { - /* state_lock used by requester & completer */ - spin_lock_bh(&qp->state_lock); - if ((qp->req.state == QP_STATE_DRAIN) && - (qp->comp.psn == qp->req.psn)) { - qp->req.state = QP_STATE_DRAINED; + spin_lock_bh(&qp->state_lock); + if (unlikely(qp_state(qp) == IB_QPS_SQD)) { + if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) { + qp->attr.sq_draining = 0; spin_unlock_bh(&qp->state_lock); if (qp->ibqp.event_handler) { @@ -504,10 +496,27 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } - } else { - spin_unlock_bh(&qp->state_lock); + return; } } + spin_unlock_bh(&qp->state_lock); +} + +static inline enum comp_state complete_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + rxe_sched_task(&qp->req.task); + } + } + + comp_check_sq_drain_done(qp); do_complete(qp, wqe); @@ -538,25 +547,60 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, return COMPST_GET_WQE; } -static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) +/* drain incoming response packet queue */ +static void drain_resp_pkts(struct rxe_qp *qp) { struct sk_buff *skb; - struct rxe_send_wqe *wqe; - struct rxe_queue *q = qp->sq.queue; while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } +} + +/* complete send wqe with flush error */ +static int flush_send_wqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_cqe cqe = {}; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + int err; + + if (qp->is_user) { + uwc->wr_id = wqe->wr.wr_id; + uwc->status = IB_WC_WR_FLUSH_ERR; + uwc->qp_num = qp->ibqp.qp_num; + } else { + wc->wr_id = wqe->wr.wr_id; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->qp = &qp->ibqp; + } + + err = rxe_cq_post(qp->scq, &cqe, 0); + if (err) + rxe_dbg_cq(qp->scq, "post cq failed, err = %d", err); + + return err; +} + +/* drain and optionally complete the send queue + * if unable to complete a wqe, i.e. cq is full, stop + * completing and flush the remaining wqes + */ +static void flush_send_queue(struct rxe_qp *qp, bool notify) +{ + struct rxe_send_wqe *wqe; + struct rxe_queue *q = qp->sq.queue; + int err; while ((wqe = queue_head(q, q->type))) { if (notify) { - wqe->status = IB_WC_WR_FLUSH_ERR; - do_complete(qp, wqe); - } else { - queue_advance_consumer(q, q->type); + err = flush_send_wqe(qp, wqe); + if (err) + notify = 0; } + queue_advance_consumer(q, q->type); } } @@ -571,9 +615,28 @@ static void free_pkt(struct rxe_pkt_info *pkt) ib_device_put(dev); } -int rxe_completer(void *arg) +/* reset the retry timer if + * - QP is type RC + * - there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * - the timeout parameter is set + * - the QP is alive + */ +static void reset_retry_timer(struct rxe_qp *qp) +{ + if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) { + spin_lock_bh(&qp->state_lock); + if (qp_state(qp) >= IB_QPS_RTS && + psn_compare(qp->req.psn, qp->comp.psn) > 0) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + spin_unlock_bh(&qp->state_lock); + } +} + +int rxe_completer(struct rxe_qp *qp) { - struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_send_wqe *wqe = NULL; struct sk_buff *skb = NULL; @@ -581,15 +644,17 @@ int rxe_completer(void *arg) enum comp_state state; int ret; - if (!rxe_get(qp)) - return -EAGAIN; + spin_lock_bh(&qp->state_lock); + if (!qp->valid || qp_state(qp) == IB_QPS_ERR || + qp_state(qp) == IB_QPS_RESET) { + bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); - if (!qp->valid || qp->comp.state == QP_STATE_ERROR || - qp->comp.state == QP_STATE_RESET) { - rxe_drain_resp_pkts(qp, qp->valid && - qp->comp.state == QP_STATE_ERROR); + drain_resp_pkts(qp); + flush_send_queue(qp, notify); + spin_unlock_bh(&qp->state_lock); goto exit; } + spin_unlock_bh(&qp->state_lock); if (qp->comp.timeout) { qp->comp.timeout_retry = 1; @@ -677,20 +742,7 @@ int rxe_completer(void *arg) break; } - /* re reset the timeout counter if - * (1) QP is type RC - * (2) the QP is alive - * (3) there is a packet sent by the requester that - * might be acked (we still might get spurious - * timeouts but try to keep them as few as possible) - * (4) the timeout parameter is set - */ - if ((qp_type(qp) == IB_QPT_RC) && - (qp->req.state == QP_STATE_READY) && - (psn_compare(qp->req.psn, qp->comp.psn) > 0) && - qp->qp_timeout_jiffies) - mod_timer(&qp->retrans_timer, - jiffies + qp->qp_timeout_jiffies); + reset_retry_timer(qp); goto exit; case COMPST_ERROR_RETRY: @@ -730,7 +782,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task); + rxe_sched_task(&qp->req.task); } goto done; @@ -752,6 +804,7 @@ int rxe_completer(void *arg) */ qp->req.wait_for_rnr_timer = 1; rxe_dbg_qp(qp, "set rnr nak timer\n"); + // TODO who protects from destroy_qp?? mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); @@ -784,7 +837,5 @@ exit: out: if (pkt) free_pkt(pkt); - rxe_put(qp); - return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 1df186534639..20ff0c0c4605 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, int count; if (cqe <= 0) { - rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe); + rxe_dbg_dev(rxe, "cqe(%d) <= 0\n", cqe); goto err1; } if (cqe > rxe->attr.max_cqe) { - rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n", + rxe_dbg_dev(rxe, "cqe(%d) > max_cqe(%d)\n", cqe, rxe->attr.max_cqe); goto err1; } @@ -39,21 +39,6 @@ err1: return -EINVAL; } -static void rxe_send_complete(struct tasklet_struct *t) -{ - struct rxe_cq *cq = from_tasklet(cq, t, comp_task); - unsigned long flags; - - spin_lock_irqsave(&cq->cq_lock, flags); - if (cq->is_dying) { - spin_unlock_irqrestore(&cq->cq_lock, flags); - return; - } - spin_unlock_irqrestore(&cq->cq_lock, flags); - - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); -} - int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, int comp_vector, struct ib_udata *udata, struct rxe_create_cq_resp __user *uresp) @@ -65,7 +50,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, cq->queue = rxe_queue_init(rxe, &cqe, sizeof(struct rxe_cqe), type); if (!cq->queue) { - rxe_dbg(rxe, "unable to create cq\n"); + rxe_dbg_dev(rxe, "unable to create cq\n"); return -ENOMEM; } @@ -79,10 +64,6 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, cq->is_user = uresp; - cq->is_dying = false; - - tasklet_setup(&cq->comp_task, rxe_send_complete); - spin_lock_init(&cq->cq_lock); cq->ibcq.cqe = cqe; return 0; @@ -103,6 +84,7 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, return err; } +/* caller holds reference to cq */ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) { struct ib_event ev; @@ -114,6 +96,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT); if (unlikely(full)) { + rxe_err_cq(cq, "queue full"); spin_unlock_irqrestore(&cq->cq_lock, flags); if (cq->ibcq.event_handler) { ev.device = cq->ibcq.device; @@ -135,21 +118,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) if ((cq->notify == IB_CQ_NEXT_COMP) || (cq->notify == IB_CQ_SOLICITED && solicited)) { cq->notify = 0; - tasklet_schedule(&cq->comp_task); + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } return 0; } -void rxe_cq_disable(struct rxe_cq *cq) -{ - unsigned long flags; - - spin_lock_irqsave(&cq->cq_lock, flags); - cq->is_dying = true; - spin_unlock_irqrestore(&cq->cq_lock, flags); -} - void rxe_cq_cleanup(struct rxe_pool_elem *elem) { struct rxe_cq *cq = container_of(elem, typeof(*cq), elem); diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index 71bc2c189588..fdf5f08cd8f1 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe) tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { - rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n", + rxe_dbg_dev(rxe, "failed to init crc32 algorithm err: %ld\n", PTR_ERR(tfm)); return PTR_ERR(tfm); } @@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) *(__be32 *)shash_desc_ctx(shash) = crc; err = crypto_shash_update(shash, next, len); if (unlikely(err)) { - rxe_dbg(rxe, "failed crc calculation, err: %d\n", err); + rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err); return (__force __be32)crc32_le((__force u32)crc, next, len); } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 1bb0cb479eb1..804b15e929dd 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -80,7 +80,6 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); int rxe_invalidate_mr(struct rxe_qp *qp, u32 key); int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); -int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); void rxe_mr_cleanup(struct rxe_pool_elem *elem); /* rxe_mw.c */ @@ -171,9 +170,9 @@ void rxe_srq_cleanup(struct rxe_pool_elem *elem); void rxe_dealloc(struct ib_device *ib_dev); -int rxe_completer(void *arg); -int rxe_requester(void *arg); -int rxe_responder(void *arg); +int rxe_completer(struct rxe_qp *qp); +int rxe_requester(struct rxe_qp *qp); +int rxe_responder(struct rxe_qp *qp); /* rxe_icrc.c */ int rxe_icrc_init(struct rxe_dev *rxe); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index a47d72dbc537..6b7f2bd69879 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /* Don't allow a mmap larger than the object. */ if (size > ip->info.size) { - rxe_dbg(rxe, "mmap region is larger than the object!\n"); + rxe_dbg_dev(rxe, "mmap region is larger than the object!\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) goto found_it; } - rxe_dbg(rxe, "unable to find pending mmap info\n"); + rxe_dbg_dev(rxe, "unable to find pending mmap info\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -98,7 +98,7 @@ found_it: ret = remap_vmalloc_range(vma, ip->obj, 0); if (ret) { - rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret); + rxe_dbg_dev(rxe, "err %d from remap_vmalloc_range\n", ret); goto done; } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index b10aa1580a64..0e538fafcc20 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -210,10 +210,10 @@ err1: return err; } -static int rxe_set_page(struct ib_mr *ibmr, u64 iova) +static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr) { struct rxe_mr *mr = to_rmr(ibmr); - struct page *page = virt_to_page(iova & mr->page_mask); + struct page *page = ib_virt_dma_to_page(dma_addr); bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT); int err; @@ -279,16 +279,16 @@ static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr, return 0; } -static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 iova, void *addr, +static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 dma_addr, void *addr, unsigned int length, enum rxe_mr_copy_dir dir) { - unsigned int page_offset = iova & (PAGE_SIZE - 1); + unsigned int page_offset = dma_addr & (PAGE_SIZE - 1); unsigned int bytes; struct page *page; u8 *va; while (length) { - page = virt_to_page(iova & mr->page_mask); + page = ib_virt_dma_to_page(dma_addr); bytes = min_t(unsigned int, length, PAGE_SIZE - page_offset); va = kmap_local_page(page); @@ -300,7 +300,7 @@ static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 iova, void *addr, kunmap_local(va); page_offset = 0; - iova += bytes; + dma_addr += bytes; addr += bytes; length -= bytes; } @@ -488,7 +488,7 @@ int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, if (mr->ibmr.type == IB_MR_TYPE_DMA) { page_offset = iova & (PAGE_SIZE - 1); - page = virt_to_page(iova & PAGE_MASK); + page = ib_virt_dma_to_page(iova); } else { unsigned long index; int err; @@ -545,7 +545,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) if (mr->ibmr.type == IB_MR_TYPE_DMA) { page_offset = iova & (PAGE_SIZE - 1); - page = virt_to_page(iova & PAGE_MASK); + page = ib_virt_dma_to_page(iova); } else { unsigned long index; int err; @@ -722,19 +722,6 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) return 0; } -int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) -{ - struct rxe_mr *mr = to_rmr(ibmr); - - /* See IBA 10.6.7.2.6 */ - if (atomic_read(&mr->num_mw) > 0) - return -EINVAL; - - rxe_cleanup(mr); - kfree_rcu(mr); - return 0; -} - void rxe_mr_cleanup(struct rxe_pool_elem *elem) { struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index e02e1624bcf4..2bc7361152ea 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -413,11 +413,14 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, int is_request = pkt->mask & RXE_REQ_MASK; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - if ((is_request && (qp->req.state != QP_STATE_READY)) || - (!is_request && (qp->resp.state != QP_STATE_READY))) { + spin_lock_bh(&qp->state_lock); + if ((is_request && (qp_state(qp) < IB_QPS_RTS)) || + (!is_request && (qp_state(qp) < IB_QPS_RTR))) { + spin_unlock_bh(&qp->state_lock); rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n"); goto drop; } + spin_unlock_bh(&qp->state_lock); rxe_icrc_generate(skb, pkt); @@ -596,7 +599,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_port_down(rxe); break; case NETDEV_CHANGEMTU: - rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); + rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_CHANGE: @@ -608,7 +611,7 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: - rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n", + rxe_dbg_dev(rxe, "ignoring netdev event = %ld for %s\n", event, ndev->name); break; } diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index ab72db68b58f..c5451a4488ca 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - rxe_dbg(rxe, "invalid send wr = %u > %d\n", + rxe_dbg_dev(rxe, "invalid send wr = %u > %d\n", cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - rxe_dbg(rxe, "invalid send sge = %u > %d\n", + rxe_dbg_dev(rxe, "invalid send sge = %u > %d\n", cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - rxe_dbg(rxe, "invalid recv wr = %u > %d\n", + rxe_dbg_dev(rxe, "invalid recv wr = %u > %d\n", cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - rxe_dbg(rxe, "invalid recv sge = %u > %d\n", + rxe_dbg_dev(rxe, "invalid recv sge = %u > %d\n", cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - rxe_dbg(rxe, "invalid max inline data = %u > %d\n", + rxe_dbg_dev(rxe, "invalid max inline data = %u > %d\n", cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) } if (!init->recv_cq || !init->send_cq) { - rxe_dbg(rxe, "missing cq\n"); + rxe_dbg_dev(rxe, "missing cq\n"); goto err1; } @@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - rxe_dbg(rxe, "invalid port = %d\n", port_num); + rxe_dbg_dev(rxe, "invalid port = %d\n", port_num); goto err1; } port = &rxe->port; if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num); + rxe_dbg_dev(rxe, "GSI QP exists for port %d\n", port_num); goto err1; } } @@ -231,8 +231,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->req.wqe_index = queue_get_producer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); - qp->req.state = QP_STATE_RESET; - qp->comp.state = QP_STATE_RESET; qp->req.opcode = -1; qp->comp.opcode = -1; @@ -287,7 +285,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; - qp->resp.state = QP_STATE_RESET; return 0; } @@ -328,8 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, if (err) goto err2; + spin_lock_bh(&qp->state_lock); qp->attr.qp_state = IB_QPS_RESET; qp->valid = 1; + spin_unlock_bh(&qp->state_lock); return 0; @@ -380,30 +379,9 @@ int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init) return 0; } -/* called by the modify qp verb, this routine checks all the parameters before - * making any changes - */ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) { - enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? - attr->cur_qp_state : qp->attr.qp_state; - enum ib_qp_state new_state = (mask & IB_QP_STATE) ? - attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - rxe_dbg_qp(qp, "invalid mask or state\n"); - goto err1; - } - - if (mask & IB_QP_STATE) { - if (cur_state == IB_QPS_SQD) { - if (qp->req.state == QP_STATE_DRAIN && - new_state != IB_QPS_ERR) - goto err1; - } - } - if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num); @@ -473,29 +451,18 @@ static void rxe_qp_reset(struct rxe_qp *qp) { /* stop tasks from running */ rxe_disable_task(&qp->resp.task); + rxe_disable_task(&qp->comp.task); + rxe_disable_task(&qp->req.task); - /* stop request/comp */ - if (qp->sq.queue) { - if (qp_type(qp) == IB_QPT_RC) - rxe_disable_task(&qp->comp.task); - rxe_disable_task(&qp->req.task); - } - - /* move qp to the reset state */ - qp->req.state = QP_STATE_RESET; - qp->comp.state = QP_STATE_RESET; - qp->resp.state = QP_STATE_RESET; + /* drain work and packet queuesc */ + rxe_requester(qp); + rxe_completer(qp); + rxe_responder(qp); - /* let state machines reset themselves drain work and packet queues - * etc. - */ - __rxe_do_task(&qp->resp.task); - - if (qp->sq.queue) { - __rxe_do_task(&qp->comp.task); - __rxe_do_task(&qp->req.task); + if (qp->rq.queue) + rxe_queue_reset(qp->rq.queue); + if (qp->sq.queue) rxe_queue_reset(qp->sq.queue); - } /* cleanup attributes */ atomic_set(&qp->ssn, 0); @@ -518,54 +485,103 @@ static void rxe_qp_reset(struct rxe_qp *qp) /* reenable tasks */ rxe_enable_task(&qp->resp.task); - - if (qp->sq.queue) { - if (qp_type(qp) == IB_QPT_RC) - rxe_enable_task(&qp->comp.task); - - rxe_enable_task(&qp->req.task); - } -} - -/* drain the send queue */ -static void rxe_qp_drain(struct rxe_qp *qp) -{ - if (qp->sq.queue) { - if (qp->req.state != QP_STATE_DRAINED) { - qp->req.state = QP_STATE_DRAIN; - if (qp_type(qp) == IB_QPT_RC) - rxe_sched_task(&qp->comp.task); - else - __rxe_do_task(&qp->comp.task); - rxe_sched_task(&qp->req.task); - } - } + rxe_enable_task(&qp->comp.task); + rxe_enable_task(&qp->req.task); } /* move the qp to the error state */ void rxe_qp_error(struct rxe_qp *qp) { - qp->req.state = QP_STATE_ERROR; - qp->resp.state = QP_STATE_ERROR; - qp->comp.state = QP_STATE_ERROR; + spin_lock_bh(&qp->state_lock); qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ rxe_sched_task(&qp->resp.task); + rxe_sched_task(&qp->comp.task); + rxe_sched_task(&qp->req.task); + spin_unlock_bh(&qp->state_lock); +} - if (qp_type(qp) == IB_QPT_RC) - rxe_sched_task(&qp->comp.task); - else - __rxe_do_task(&qp->comp.task); +static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr, + int mask) +{ + spin_lock_bh(&qp->state_lock); + qp->attr.sq_draining = 1; + rxe_sched_task(&qp->comp.task); rxe_sched_task(&qp->req.task); + spin_unlock_bh(&qp->state_lock); } +/* caller should hold qp->state_lock */ +static int __qp_chk_state(struct rxe_qp *qp, struct ib_qp_attr *attr, + int mask) +{ + enum ib_qp_state cur_state; + enum ib_qp_state new_state; + + cur_state = (mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : qp->attr.qp_state; + new_state = (mask & IB_QP_STATE) ? + attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) + return -EINVAL; + + if (mask & IB_QP_STATE && cur_state == IB_QPS_SQD) { + if (qp->attr.sq_draining && new_state != IB_QPS_ERR) + return -EINVAL; + } + + return 0; +} + +static const char *const qps2str[] = { + [IB_QPS_RESET] = "RESET", + [IB_QPS_INIT] = "INIT", + [IB_QPS_RTR] = "RTR", + [IB_QPS_RTS] = "RTS", + [IB_QPS_SQD] = "SQD", + [IB_QPS_SQE] = "SQE", + [IB_QPS_ERR] = "ERR", +}; + /* called by the modify qp verb */ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata) { int err; + if (mask & IB_QP_CUR_STATE) + qp->attr.cur_qp_state = attr->qp_state; + + if (mask & IB_QP_STATE) { + spin_lock_bh(&qp->state_lock); + err = __qp_chk_state(qp, attr, mask); + if (!err) { + qp->attr.qp_state = attr->qp_state; + rxe_dbg_qp(qp, "state -> %s\n", + qps2str[attr->qp_state]); + } + spin_unlock_bh(&qp->state_lock); + + if (err) + return err; + + switch (attr->qp_state) { + case IB_QPS_RESET: + rxe_qp_reset(qp); + break; + case IB_QPS_SQD: + rxe_qp_sqd(qp, attr, mask); + break; + case IB_QPS_ERR: + rxe_qp_error(qp); + break; + default: + break; + } + } + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = attr->max_rd_atomic ? roundup_pow_of_two(attr->max_rd_atomic) : 0; @@ -587,9 +603,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, return err; } - if (mask & IB_QP_CUR_STATE) - qp->attr.cur_qp_state = attr->qp_state; - if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY) qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify; @@ -669,50 +682,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_DEST_QPN) qp->attr.dest_qp_num = attr->dest_qp_num; - if (mask & IB_QP_STATE) { - qp->attr.qp_state = attr->qp_state; - - switch (attr->qp_state) { - case IB_QPS_RESET: - rxe_dbg_qp(qp, "state -> RESET\n"); - rxe_qp_reset(qp); - break; - - case IB_QPS_INIT: - rxe_dbg_qp(qp, "state -> INIT\n"); - qp->req.state = QP_STATE_INIT; - qp->resp.state = QP_STATE_INIT; - qp->comp.state = QP_STATE_INIT; - break; - - case IB_QPS_RTR: - rxe_dbg_qp(qp, "state -> RTR\n"); - qp->resp.state = QP_STATE_READY; - break; - - case IB_QPS_RTS: - rxe_dbg_qp(qp, "state -> RTS\n"); - qp->req.state = QP_STATE_READY; - qp->comp.state = QP_STATE_READY; - break; - - case IB_QPS_SQD: - rxe_dbg_qp(qp, "state -> SQD\n"); - rxe_qp_drain(qp); - break; - - case IB_QPS_SQE: - rxe_dbg_qp(qp, "state -> SQE !!?\n"); - /* Not possible from modify_qp. */ - break; - - case IB_QPS_ERR: - rxe_dbg_qp(qp, "state -> ERR\n"); - rxe_qp_error(qp); - break; - } - } - return 0; } @@ -736,18 +705,15 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) rxe_av_to_attr(&qp->pri_av, &attr->ah_attr); rxe_av_to_attr(&qp->alt_av, &attr->alt_ah_attr); - if (qp->req.state == QP_STATE_DRAIN) { - attr->sq_draining = 1; - /* applications that get this state - * typically spin on it. yield the - * processor - */ + /* Applications that get this state typically spin on it. + * Yield the processor + */ + spin_lock_bh(&qp->state_lock); + if (qp->attr.sq_draining) { + spin_unlock_bh(&qp->state_lock); cond_resched(); - } else { - attr->sq_draining = 0; } - - rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining); + spin_unlock_bh(&qp->state_lock); return 0; } @@ -771,26 +737,29 @@ static void rxe_qp_do_cleanup(struct work_struct *work) { struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); + spin_lock_bh(&qp->state_lock); qp->valid = 0; + spin_unlock_bh(&qp->state_lock); qp->qp_timeout_jiffies = 0; - rxe_cleanup_task(&qp->resp.task); if (qp_type(qp) == IB_QPT_RC) { del_timer_sync(&qp->retrans_timer); del_timer_sync(&qp->rnr_nak_timer); } - rxe_cleanup_task(&qp->req.task); - rxe_cleanup_task(&qp->comp.task); + if (qp->resp.task.func) + rxe_cleanup_task(&qp->resp.task); - /* flush out any receive wr's or pending requests */ if (qp->req.task.func) - __rxe_do_task(&qp->req.task); + rxe_cleanup_task(&qp->req.task); - if (qp->sq.queue) { - __rxe_do_task(&qp->comp.task); - __rxe_do_task(&qp->req.task); - } + if (qp->comp.task.func) + rxe_cleanup_task(&qp->comp.task); + + /* flush out any receive wr's or pending requests */ + rxe_requester(qp); + rxe_completer(qp); + rxe_responder(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index d6dbf5a0058d..9611ee191a46 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -61,11 +61,11 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, /* num_elem == 0 is allowed, but uninteresting */ if (*num_elem < 0) - goto err1; + return NULL; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) - goto err1; + return NULL; q->rxe = rxe; q->type = type; @@ -100,7 +100,6 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, err2: kfree(q); -err1: return NULL; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 434a693cd4a5..2f953cc74256 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -38,12 +38,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return -EINVAL; } + spin_lock_bh(&qp->state_lock); if (pkt->mask & RXE_REQ_MASK) { - if (unlikely(qp->resp.state != QP_STATE_READY)) + if (unlikely(qp_state(qp) < IB_QPS_RTR)) { + spin_unlock_bh(&qp->state_lock); return -EINVAL; - } else if (unlikely(qp->req.state < QP_STATE_READY || - qp->req.state > QP_STATE_DRAINED)) - return -EINVAL; + } + } else { + if (unlikely(qp_state(qp) < IB_QPS_RTS)) { + spin_unlock_bh(&qp->state_lock); + return -EINVAL; + } + } + spin_unlock_bh(&qp->state_lock); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 899c8779f800..65134a9aefe7 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -102,44 +102,44 @@ void rnr_nak_timer(struct timer_list *t) rxe_dbg_qp(qp, "nak timer fired\n"); - /* request a send queue retry */ - qp->req.need_retry = 1; - qp->req.wait_for_rnr_timer = 0; - rxe_sched_task(&qp->req.task); + spin_lock_bh(&qp->state_lock); + if (qp->valid) { + /* request a send queue retry */ + qp->req.need_retry = 1; + qp->req.wait_for_rnr_timer = 0; + rxe_sched_task(&qp->req.task); + } + spin_unlock_bh(&qp->state_lock); } -static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) +static void req_check_sq_drain_done(struct rxe_qp *qp) { - struct rxe_send_wqe *wqe; - struct rxe_queue *q = qp->sq.queue; - unsigned int index = qp->req.wqe_index; + struct rxe_queue *q; + unsigned int index; unsigned int cons; - unsigned int prod; + struct rxe_send_wqe *wqe; - wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); - cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); - prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); + spin_lock_bh(&qp->state_lock); + if (qp_state(qp) == IB_QPS_SQD) { + q = qp->sq.queue; + index = qp->req.wqe_index; + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + wqe = queue_addr_from_index(q, cons); - if (unlikely(qp->req.state == QP_STATE_DRAIN)) { /* check to see if we are drained; * state_lock used by requester and completer */ - spin_lock_bh(&qp->state_lock); do { - if (qp->req.state != QP_STATE_DRAIN) { + if (!qp->attr.sq_draining) /* comp just finished */ - spin_unlock_bh(&qp->state_lock); break; - } if (wqe && ((index != cons) || - (wqe->state != wqe_state_posted))) { + (wqe->state != wqe_state_posted))) /* comp not done yet */ - spin_unlock_bh(&qp->state_lock); break; - } - qp->req.state = QP_STATE_DRAINED; + qp->attr.sq_draining = 0; spin_unlock_bh(&qp->state_lock); if (qp->ibqp.event_handler) { @@ -151,18 +151,42 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } + return; } while (0); } + spin_unlock_bh(&qp->state_lock); +} + +static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp) +{ + struct rxe_queue *q = qp->sq.queue; + unsigned int index = qp->req.wqe_index; + unsigned int prod; + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); if (index == prod) return NULL; + else + return queue_addr_from_index(q, index); +} + +static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) +{ + struct rxe_send_wqe *wqe; + + req_check_sq_drain_done(qp); - wqe = queue_addr_from_index(q, index); + wqe = __req_next_wqe(qp); + if (wqe == NULL) + return NULL; - if (unlikely((qp->req.state == QP_STATE_DRAIN || - qp->req.state == QP_STATE_DRAINED) && - (wqe->state != wqe_state_processing))) + spin_lock_bh(&qp->state_lock); + if (unlikely((qp_state(qp) == IB_QPS_SQD) && + (wqe->state != wqe_state_processing))) { + spin_unlock_bh(&qp->state_lock); return NULL; + } + spin_unlock_bh(&qp->state_lock); wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); return wqe; @@ -635,9 +659,8 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) return 0; } -int rxe_requester(void *arg) +int rxe_requester(struct rxe_qp *qp) { - struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info pkt; struct sk_buff *skb; @@ -654,24 +677,22 @@ int rxe_requester(void *arg) struct rxe_ah *ah; struct rxe_av *av; - if (!rxe_get(qp)) - return -EAGAIN; - - if (unlikely(!qp->valid)) + spin_lock_bh(&qp->state_lock); + if (unlikely(!qp->valid)) { + spin_unlock_bh(&qp->state_lock); goto exit; + } - if (unlikely(qp->req.state == QP_STATE_ERROR)) { - wqe = req_next_wqe(qp); + if (unlikely(qp_state(qp) == IB_QPS_ERR)) { + wqe = __req_next_wqe(qp); + spin_unlock_bh(&qp->state_lock); if (wqe) - /* - * Generate an error completion for error qp state - */ goto err; else goto exit; } - if (unlikely(qp->req.state == QP_STATE_RESET)) { + if (unlikely(qp_state(qp) == IB_QPS_RESET)) { qp->req.wqe_index = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); qp->req.opcode = -1; @@ -679,8 +700,10 @@ int rxe_requester(void *arg) qp->req.wait_psn = 0; qp->req.need_retry = 0; qp->req.wait_for_rnr_timer = 0; + spin_unlock_bh(&qp->state_lock); goto exit; } + spin_unlock_bh(&qp->state_lock); /* we come here if the retransmit timer has fired * or if the rnr timer has fired. If the retransmit @@ -757,7 +780,7 @@ int rxe_requester(void *arg) qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - rxe_run_task(&qp->comp.task); + rxe_sched_task(&qp->comp.task); goto done; } payload = mtu; @@ -840,12 +863,9 @@ err: /* update wqe_index for each wqe completion */ qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_error; - qp->req.state = QP_STATE_ERROR; - rxe_run_task(&qp->comp.task); + rxe_qp_error(qp); exit: ret = -EAGAIN; out: - rxe_put(qp); - return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 0cc1ba91d48c..68f6cd188d8e 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -42,7 +42,6 @@ static char *resp_state_name[] = { [RESPST_ERR_LENGTH] = "ERR_LENGTH", [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", [RESPST_ERROR] = "ERROR", - [RESPST_RESET] = "RESET", [RESPST_DONE] = "DONE", [RESPST_EXIT] = "EXIT", }; @@ -69,17 +68,6 @@ static inline enum resp_states get_req(struct rxe_qp *qp, { struct sk_buff *skb; - if (qp->resp.state == QP_STATE_ERROR) { - while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_put(qp); - kfree_skb(skb); - ib_device_put(qp->ibqp.device); - } - - /* go drain recv wr queue */ - return RESPST_CHK_RESOURCE; - } - skb = skb_peek(&qp->req_pkts); if (!skb) return RESPST_EXIT; @@ -334,24 +322,6 @@ static enum resp_states check_resource(struct rxe_qp *qp, { struct rxe_srq *srq = qp->srq; - if (qp->resp.state == QP_STATE_ERROR) { - if (qp->resp.wqe) { - qp->resp.status = IB_WC_WR_FLUSH_ERR; - return RESPST_COMPLETE; - } else if (!srq) { - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_FROM_CLIENT); - if (qp->resp.wqe) { - qp->resp.status = IB_WC_WR_FLUSH_ERR; - return RESPST_COMPLETE; - } else { - return RESPST_EXIT; - } - } else { - return RESPST_EXIT; - } - } - if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { /* it is the requesters job to not send * too many read/atomic ops, we just @@ -1151,6 +1121,10 @@ static enum resp_states do_complete(struct rxe_qp *qp, wc->port_num = qp->attr.port_num; } + } else { + if (wc->status != IB_WC_WR_FLUSH_ERR) + rxe_err_qp(qp, "non-flush error status = %d", + wc->status); } /* have copy for srq and reference for !srq */ @@ -1163,8 +1137,13 @@ static enum resp_states do_complete(struct rxe_qp *qp, return RESPST_ERR_CQ_OVERFLOW; finish: - if (unlikely(qp->resp.state == QP_STATE_ERROR)) + spin_lock_bh(&qp->state_lock); + if (unlikely(qp_state(qp) == IB_QPS_ERR)) { + spin_unlock_bh(&qp->state_lock); return RESPST_CHK_RESOURCE; + } + spin_unlock_bh(&qp->state_lock); + if (unlikely(!pkt)) return RESPST_DONE; if (qp_type(qp) == IB_QPT_RC) @@ -1421,49 +1400,90 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } } -static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) +/* drain incoming request packet queue */ +static void drain_req_pkts(struct rxe_qp *qp) { struct sk_buff *skb; - struct rxe_queue *q = qp->rq.queue; while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } +} + +/* complete receive wqe with flush error */ +static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe) +{ + struct rxe_cqe cqe = {}; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + int err; + + if (qp->rcq->is_user) { + uwc->wr_id = wqe->wr_id; + uwc->status = IB_WC_WR_FLUSH_ERR; + uwc->qp_num = qp_num(qp); + } else { + wc->wr_id = wqe->wr_id; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->qp = &qp->ibqp; + } - if (notify) + err = rxe_cq_post(qp->rcq, &cqe, 0); + if (err) + rxe_dbg_cq(qp->rcq, "post cq failed err = %d", err); + + return err; +} + +/* drain and optionally complete the recive queue + * if unable to complete a wqe stop completing and + * just flush the remaining wqes + */ +static void flush_recv_queue(struct rxe_qp *qp, bool notify) +{ + struct rxe_queue *q = qp->rq.queue; + struct rxe_recv_wqe *wqe; + int err; + + if (qp->srq) return; - while (!qp->srq && q && queue_head(q, q->type)) + while ((wqe = queue_head(q, q->type))) { + if (notify) { + err = flush_recv_wqe(qp, wqe); + if (err) + notify = 0; + } queue_advance_consumer(q, q->type); + } + + qp->resp.wqe = NULL; } -int rxe_responder(void *arg) +int rxe_responder(struct rxe_qp *qp) { - struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); enum resp_states state; struct rxe_pkt_info *pkt = NULL; int ret; - if (!rxe_get(qp)) - return -EAGAIN; + spin_lock_bh(&qp->state_lock); + if (!qp->valid || qp_state(qp) == IB_QPS_ERR || + qp_state(qp) == IB_QPS_RESET) { + bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); - qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; - - if (!qp->valid) + drain_req_pkts(qp); + flush_recv_queue(qp, notify); + spin_unlock_bh(&qp->state_lock); goto exit; + } + spin_unlock_bh(&qp->state_lock); - switch (qp->resp.state) { - case QP_STATE_RESET: - state = RESPST_RESET; - break; + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; - default: - state = RESPST_GET_REQ; - break; - } + state = RESPST_GET_REQ; while (1) { rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); @@ -1622,11 +1642,6 @@ int rxe_responder(void *arg) goto exit; - case RESPST_RESET: - rxe_drain_req_pkts(qp, false); - qp->resp.wqe = NULL; - goto exit; - case RESPST_ERROR: qp->resp.goto_error = 0; rxe_dbg_qp(qp, "moved to error state\n"); @@ -1648,6 +1663,5 @@ done: exit: ret = -EAGAIN; out: - rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 82e37a41ced4..27ca82ec0826 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) struct ib_srq_attr *attr = &init->attr; if (attr->max_wr > rxe->attr.max_srq_wr) { - rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n", + rxe_dbg_dev(rxe, "max_wr(%d) > max_srq_wr(%d)\n", attr->max_wr, rxe->attr.max_srq_wr); goto err1; } if (attr->max_wr <= 0) { - rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr); + rxe_dbg_dev(rxe, "max_wr(%d) <= 0\n", attr->max_wr); goto err1; } @@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) attr->max_wr = RXE_MIN_SRQ_WR; if (attr->max_sge > rxe->attr.max_srq_sge) { - rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n", + rxe_dbg_dev(rxe, "max_sge(%d) > max_srq_sge(%d)\n", attr->max_sge, rxe->attr.max_srq_sge); goto err1; } diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 60b90e33a884..fb9a6bc8e620 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -6,69 +6,128 @@ #include "rxe.h" -int __rxe_do_task(struct rxe_task *task) +/* Check if task is idle i.e. not running, not scheduled in + * tasklet queue and not draining. If so move to busy to + * reserve a slot in do_task() by setting to busy and taking + * a qp reference to cover the gap from now until the task finishes. + * state will move out of busy if task returns a non zero value + * in do_task(). If state is already busy it is raised to armed + * to indicate to do_task that additional pass should be made + * over the task. + * Context: caller should hold task->lock. + * Returns: true if state transitioned from idle to busy else false. + */ +static bool __reserve_if_idle(struct rxe_task *task) +{ + WARN_ON(rxe_read(task->qp) <= 0); + + if (task->tasklet.state & BIT(TASKLET_STATE_SCHED)) + return false; + + if (task->state == TASK_STATE_IDLE) { + rxe_get(task->qp); + task->state = TASK_STATE_BUSY; + task->num_sched++; + return true; + } + + if (task->state == TASK_STATE_BUSY) + task->state = TASK_STATE_ARMED; + return false; +} + +/* check if task is idle or drained and not currently + * scheduled in the tasklet queue. This routine is + * called by rxe_cleanup_task or rxe_disable_task to + * see if the queue is empty. + * Context: caller should hold task->lock. + * Returns true if done else false. + */ +static bool __is_done(struct rxe_task *task) { - int ret; + if (task->tasklet.state & BIT(TASKLET_STATE_SCHED)) + return false; - while ((ret = task->func(task->arg)) == 0) - ; + if (task->state == TASK_STATE_IDLE || + task->state == TASK_STATE_DRAINED) { + return true; + } - task->ret = ret; + return false; +} - return ret; +/* a locked version of __is_done */ +static bool is_done(struct rxe_task *task) +{ + unsigned long flags; + int done; + + spin_lock_irqsave(&task->lock, flags); + done = __is_done(task); + spin_unlock_irqrestore(&task->lock, flags); + + return done; } -/* - * this locking is due to a potential race where - * a second caller finds the task already running - * but looks just after the last call to func +/* do_task is a wrapper for the three tasks (requester, + * completer, responder) and calls them in a loop until + * they return a non-zero value. It is called either + * directly by rxe_run_task or indirectly if rxe_sched_task + * schedules the task. They must call __reserve_if_idle to + * move the task to busy before calling or scheduling. + * The task can also be moved to drained or invalid + * by calls to rxe-cleanup_task or rxe_disable_task. + * In that case tasks which get here are not executed but + * just flushed. The tasks are designed to look to see if + * there is work to do and do part of it before returning + * here with a return value of zero until all the work + * has been consumed then it retuens a non-zero value. + * The number of times the task can be run is limited by + * max iterations so one task cannot hold the cpu forever. */ static void do_task(struct tasklet_struct *t) { int cont; int ret; struct rxe_task *task = from_tasklet(task, t, tasklet); - struct rxe_qp *qp = (struct rxe_qp *)task->arg; - unsigned int iterations = RXE_MAX_ITERATIONS; + unsigned int iterations; + unsigned long flags; + int resched = 0; - spin_lock_bh(&task->lock); - switch (task->state) { - case TASK_STATE_START: - task->state = TASK_STATE_BUSY; - spin_unlock_bh(&task->lock); - break; - - case TASK_STATE_BUSY: - task->state = TASK_STATE_ARMED; - fallthrough; - case TASK_STATE_ARMED: - spin_unlock_bh(&task->lock); - return; + WARN_ON(rxe_read(task->qp) <= 0); - default: - spin_unlock_bh(&task->lock); - rxe_dbg_qp(qp, "failed with bad state %d\n", task->state); + spin_lock_irqsave(&task->lock, flags); + if (task->state >= TASK_STATE_DRAINED) { + rxe_put(task->qp); + task->num_done++; + spin_unlock_irqrestore(&task->lock, flags); return; } + spin_unlock_irqrestore(&task->lock, flags); do { + iterations = RXE_MAX_ITERATIONS; cont = 0; - ret = task->func(task->arg); - spin_lock_bh(&task->lock); + do { + ret = task->func(task->qp); + } while (ret == 0 && iterations-- > 0); + + spin_lock_irqsave(&task->lock, flags); switch (task->state) { case TASK_STATE_BUSY: if (ret) { - task->state = TASK_STATE_START; - } else if (iterations--) { - cont = 1; + task->state = TASK_STATE_IDLE; } else { - /* reschedule the tasklet and exit + /* This can happen if the client + * can add work faster than the + * tasklet can finish it. + * Reschedule the tasklet and exit * the loop to give up the cpu */ - tasklet_schedule(&task->tasklet); - task->state = TASK_STATE_START; + task->state = TASK_STATE_IDLE; + resched = 1; } break; @@ -81,71 +140,158 @@ static void do_task(struct tasklet_struct *t) cont = 1; break; + case TASK_STATE_DRAINING: + if (ret) + task->state = TASK_STATE_DRAINED; + else + cont = 1; + break; + default: - rxe_dbg_qp(qp, "failed with bad state %d\n", - task->state); + WARN_ON(1); + rxe_info_qp(task->qp, "unexpected task state = %d", task->state); + } + + if (!cont) { + task->num_done++; + if (WARN_ON(task->num_done != task->num_sched)) + rxe_err_qp(task->qp, "%ld tasks scheduled, %ld tasks done", + task->num_sched, task->num_done); } - spin_unlock_bh(&task->lock); + spin_unlock_irqrestore(&task->lock, flags); } while (cont); task->ret = ret; + + if (resched) + rxe_sched_task(task); + + rxe_put(task->qp); } -int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) +int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, + int (*func)(struct rxe_qp *)) { - task->arg = arg; - task->func = func; - task->destroyed = false; + WARN_ON(rxe_read(qp) <= 0); + + task->qp = qp; + task->func = func; tasklet_setup(&task->tasklet, do_task); - task->state = TASK_STATE_START; + task->state = TASK_STATE_IDLE; spin_lock_init(&task->lock); return 0; } +/* rxe_cleanup_task is only called from rxe_do_qp_cleanup in + * process context. The qp is already completed with no + * remaining references. Once the queue is drained the + * task is moved to invalid and returns. The qp cleanup + * code then calls the task functions directly without + * using the task struct to drain any late arriving packets + * or work requests. + */ void rxe_cleanup_task(struct rxe_task *task) { - bool idle; + unsigned long flags; - /* - * Mark the task, then wait for it to finish. It might be - * running in a non-tasklet (direct call) context. - */ - task->destroyed = true; + spin_lock_irqsave(&task->lock, flags); + if (!__is_done(task) && task->state < TASK_STATE_DRAINED) { + task->state = TASK_STATE_DRAINING; + } else { + task->state = TASK_STATE_INVALID; + spin_unlock_irqrestore(&task->lock, flags); + return; + } + spin_unlock_irqrestore(&task->lock, flags); - do { - spin_lock_bh(&task->lock); - idle = (task->state == TASK_STATE_START); - spin_unlock_bh(&task->lock); - } while (!idle); + /* now the task cannot be scheduled or run just wait + * for the previously scheduled tasks to finish. + */ + while (!is_done(task)) + cond_resched(); tasklet_kill(&task->tasklet); + + spin_lock_irqsave(&task->lock, flags); + task->state = TASK_STATE_INVALID; + spin_unlock_irqrestore(&task->lock, flags); } +/* run the task inline if it is currently idle + * cannot call do_task holding the lock + */ void rxe_run_task(struct rxe_task *task) { - if (task->destroyed) - return; + unsigned long flags; + int run; + + WARN_ON(rxe_read(task->qp) <= 0); + + spin_lock_irqsave(&task->lock, flags); + run = __reserve_if_idle(task); + spin_unlock_irqrestore(&task->lock, flags); - do_task(&task->tasklet); + if (run) + do_task(&task->tasklet); } +/* schedule the task to run later as a tasklet. + * the tasklet)schedule call can be called holding + * the lock. + */ void rxe_sched_task(struct rxe_task *task) { - if (task->destroyed) - return; + unsigned long flags; - tasklet_schedule(&task->tasklet); + WARN_ON(rxe_read(task->qp) <= 0); + + spin_lock_irqsave(&task->lock, flags); + if (__reserve_if_idle(task)) + tasklet_schedule(&task->tasklet); + spin_unlock_irqrestore(&task->lock, flags); } +/* rxe_disable/enable_task are only called from + * rxe_modify_qp in process context. Task is moved + * to the drained state by do_task. + */ void rxe_disable_task(struct rxe_task *task) { + unsigned long flags; + + WARN_ON(rxe_read(task->qp) <= 0); + + spin_lock_irqsave(&task->lock, flags); + if (!__is_done(task) && task->state < TASK_STATE_DRAINED) { + task->state = TASK_STATE_DRAINING; + } else { + task->state = TASK_STATE_DRAINED; + spin_unlock_irqrestore(&task->lock, flags); + return; + } + spin_unlock_irqrestore(&task->lock, flags); + + while (!is_done(task)) + cond_resched(); + tasklet_disable(&task->tasklet); } void rxe_enable_task(struct rxe_task *task) { + unsigned long flags; + + WARN_ON(rxe_read(task->qp) <= 0); + + spin_lock_irqsave(&task->lock, flags); + if (task->state == TASK_STATE_INVALID) { + spin_unlock_irqrestore(&task->lock, flags); + return; + } + task->state = TASK_STATE_IDLE; tasklet_enable(&task->tasklet); + spin_unlock_irqrestore(&task->lock, flags); } diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 7b88129702ac..facb7c8e3729 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -8,9 +8,12 @@ #define RXE_TASK_H enum { - TASK_STATE_START = 0, + TASK_STATE_IDLE = 0, TASK_STATE_BUSY = 1, TASK_STATE_ARMED = 2, + TASK_STATE_DRAINING = 3, + TASK_STATE_DRAINED = 4, + TASK_STATE_INVALID = 5, }; /* @@ -22,28 +25,24 @@ struct rxe_task { struct tasklet_struct tasklet; int state; spinlock_t lock; - void *arg; - int (*func)(void *arg); + struct rxe_qp *qp; + int (*func)(struct rxe_qp *qp); int ret; - bool destroyed; + long num_sched; + long num_done; }; /* * init rxe_task structure - * arg => parameter to pass to fcn + * qp => parameter to pass to func * func => function to call until it returns != 0 */ -int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)); +int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, + int (*func)(struct rxe_qp *)); /* cleanup task */ void rxe_cleanup_task(struct rxe_task *task); -/* - * raw call to func in loop without any checking - * can call when tasklets are disabled - */ -int __rxe_do_task(struct rxe_task *task); - void rxe_run_task(struct rxe_task *task); void rxe_sched_task(struct rxe_task *task); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index e14050a69276..dea605b7f683 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -12,31 +12,48 @@ #include "rxe_queue.h" #include "rxe_hw_counters.h" -static int rxe_query_device(struct ib_device *dev, +static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr); + +/* dev */ +static int rxe_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, - struct ib_udata *uhw) + struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(dev); + struct rxe_dev *rxe = to_rdev(ibdev); + int err; - if (uhw->inlen || uhw->outlen) - return -EINVAL; + if (udata->inlen || udata->outlen) { + rxe_dbg_dev(rxe, "malformed udata"); + err = -EINVAL; + goto err_out; + } + + memcpy(attr, &rxe->attr, sizeof(*attr)); - *attr = rxe->attr; return 0; + +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } -static int rxe_query_port(struct ib_device *dev, +static int rxe_query_port(struct ib_device *ibdev, u32 port_num, struct ib_port_attr *attr) { - struct rxe_dev *rxe = to_rdev(dev); - int rc; + struct rxe_dev *rxe = to_rdev(ibdev); + int err, ret; + + if (port_num != 1) { + err = -EINVAL; + rxe_dbg_dev(rxe, "bad port_num = %d", port_num); + goto err_out; + } - /* *attr being zeroed by the caller, avoid zeroing it here */ - *attr = rxe->port.attr; + memcpy(attr, &rxe->port.attr, sizeof(*attr)); mutex_lock(&rxe->usdev_lock); - rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, - &attr->active_width); + ret = ib_get_eth_speed(ibdev, port_num, &attr->active_speed, + &attr->active_width); if (attr->state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; @@ -47,27 +64,45 @@ static int rxe_query_port(struct ib_device *dev, mutex_unlock(&rxe->usdev_lock); - return rc; + return ret; + +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } -static int rxe_query_pkey(struct ib_device *device, +static int rxe_query_pkey(struct ib_device *ibdev, u32 port_num, u16 index, u16 *pkey) { - if (index > 0) - return -EINVAL; + struct rxe_dev *rxe = to_rdev(ibdev); + int err; + + if (index != 0) { + err = -EINVAL; + rxe_dbg_dev(rxe, "bad pkey index = %d", index); + goto err_out; + } *pkey = IB_DEFAULT_PKEY_FULL; return 0; + +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } -static int rxe_modify_device(struct ib_device *dev, +static int rxe_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *attr) { - struct rxe_dev *rxe = to_rdev(dev); + struct rxe_dev *rxe = to_rdev(ibdev); + int err; if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | - IB_DEVICE_MODIFY_NODE_DESC)) - return -EOPNOTSUPP; + IB_DEVICE_MODIFY_NODE_DESC)) { + err = -EOPNOTSUPP; + rxe_dbg_dev(rxe, "unsupported mask = 0x%x", mask); + goto err_out; + } if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); @@ -78,16 +113,33 @@ static int rxe_modify_device(struct ib_device *dev, } return 0; + +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } -static int rxe_modify_port(struct ib_device *dev, - u32 port_num, int mask, struct ib_port_modify *attr) +static int rxe_modify_port(struct ib_device *ibdev, u32 port_num, + int mask, struct ib_port_modify *attr) { - struct rxe_dev *rxe = to_rdev(dev); + struct rxe_dev *rxe = to_rdev(ibdev); struct rxe_port *port; + int err; - port = &rxe->port; + if (port_num != 1) { + err = -EINVAL; + rxe_dbg_dev(rxe, "bad port_num = %d", port_num); + goto err_out; + } + //TODO is shutdown useful + if (mask & ~(IB_PORT_RESET_QKEY_CNTR)) { + err = -EOPNOTSUPP; + rxe_dbg_dev(rxe, "unsupported mask = 0x%x", mask); + goto err_out; + } + + port = &rxe->port; port->attr.port_cap_flags |= attr->set_port_cap_mask; port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; @@ -95,73 +147,125 @@ static int rxe_modify_port(struct ib_device *dev, port->attr.qkey_viol_cntr = 0; return 0; -} -static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, - u32 port_num) -{ - return IB_LINK_LAYER_ETHERNET; +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } -static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata) +static enum rdma_link_layer rxe_get_link_layer(struct ib_device *ibdev, + u32 port_num) { - struct rxe_dev *rxe = to_rdev(ibuc->device); - struct rxe_ucontext *uc = to_ruc(ibuc); + struct rxe_dev *rxe = to_rdev(ibdev); + int err; - return rxe_add_to_pool(&rxe->uc_pool, uc); -} + if (port_num != 1) { + err = -EINVAL; + rxe_dbg_dev(rxe, "bad port_num = %d", port_num); + goto err_out; + } -static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) -{ - struct rxe_ucontext *uc = to_ruc(ibuc); + return IB_LINK_LAYER_ETHERNET; - rxe_cleanup(uc); +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } -static int rxe_port_immutable(struct ib_device *dev, u32 port_num, +static int rxe_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { + struct rxe_dev *rxe = to_rdev(ibdev); + struct ib_port_attr attr = {}; int err; - struct ib_port_attr attr; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + if (port_num != 1) { + err = -EINVAL; + rxe_dbg_dev(rxe, "bad port_num = %d", port_num); + goto err_out; + } - err = ib_query_port(dev, port_num, &attr); + err = ib_query_port(ibdev, port_num, &attr); if (err) - return err; + goto err_out; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; + +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } +/* uc */ +static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata) +{ + struct rxe_dev *rxe = to_rdev(ibuc->device); + struct rxe_ucontext *uc = to_ruc(ibuc); + int err; + + err = rxe_add_to_pool(&rxe->uc_pool, uc); + if (err) + rxe_err_dev(rxe, "unable to create uc"); + + return err; +} + +static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) +{ + struct rxe_ucontext *uc = to_ruc(ibuc); + int err; + + err = rxe_cleanup(uc); + if (err) + rxe_err_uc(uc, "cleanup failed, err = %d", err); +} + +/* pd */ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); + int err; - return rxe_add_to_pool(&rxe->pd_pool, pd); + err = rxe_add_to_pool(&rxe->pd_pool, pd); + if (err) { + rxe_dbg_dev(rxe, "unable to alloc pd"); + goto err_out; + } + + return 0; + +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); + int err; + + err = rxe_cleanup(pd); + if (err) + rxe_err_pd(pd, "cleanup failed, err = %d", err); - rxe_cleanup(pd); return 0; } +/* ah */ static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) - { struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); struct rxe_create_ah_resp __user *uresp = NULL; - int err; + int err, cleanup_err; if (udata) { /* test if new user provider */ @@ -174,16 +278,18 @@ static int rxe_create_ah(struct ib_ah *ibah, err = rxe_add_to_pool_ah(&rxe->ah_pool, ah, init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); - if (err) - return err; + if (err) { + rxe_dbg_dev(rxe, "unable to create ah"); + goto err_out; + } /* create index > 0 */ ah->ah_num = ah->elem.index; err = rxe_ah_chk_attr(ah, init_attr->ah_attr); if (err) { - rxe_cleanup(ah); - return err; + rxe_dbg_ah(ah, "bad attr"); + goto err_cleanup; } if (uresp) { @@ -191,8 +297,9 @@ static int rxe_create_ah(struct ib_ah *ibah, err = copy_to_user(&uresp->ah_num, &ah->ah_num, sizeof(uresp->ah_num)); if (err) { - rxe_cleanup(ah); - return -EFAULT; + err = -EFAULT; + rxe_dbg_ah(ah, "unable to copy to user"); + goto err_cleanup; } } else if (ah->is_user) { /* only if old user provider */ @@ -203,19 +310,34 @@ static int rxe_create_ah(struct ib_ah *ibah, rxe_finalize(ah); return 0; + +err_cleanup: + cleanup_err = rxe_cleanup(ah); + if (cleanup_err) + rxe_err_ah(ah, "cleanup failed, err = %d", cleanup_err); +err_out: + rxe_err_ah(ah, "returned err = %d", err); + return err; } static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) { - int err; struct rxe_ah *ah = to_rah(ibah); + int err; err = rxe_ah_chk_attr(ah, attr); - if (err) - return err; + if (err) { + rxe_dbg_ah(ah, "bad attr"); + goto err_out; + } rxe_init_av(attr, &ah->av); + return 0; + +err_out: + rxe_err_ah(ah, "returned err = %d", err); + return err; } static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) @@ -225,92 +347,77 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) memset(attr, 0, sizeof(*attr)); attr->type = ibah->type; rxe_av_to_attr(&ah->av, attr); + return 0; } static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); + int err; - rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE); - - return 0; -} - -static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) -{ - int i; - u32 length; - struct rxe_recv_wqe *recv_wqe; - int num_sge = ibwr->num_sge; - int full; - - full = queue_full(rq->queue, QUEUE_TYPE_FROM_ULP); - if (unlikely(full)) - return -ENOMEM; - - if (unlikely(num_sge > rq->max_sge)) - return -EINVAL; - - length = 0; - for (i = 0; i < num_sge; i++) - length += ibwr->sg_list[i].length; - - recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_FROM_ULP); - recv_wqe->wr_id = ibwr->wr_id; - - memcpy(recv_wqe->dma.sge, ibwr->sg_list, - num_sge * sizeof(struct ib_sge)); - - recv_wqe->dma.length = length; - recv_wqe->dma.resid = length; - recv_wqe->dma.num_sge = num_sge; - recv_wqe->dma.cur_sge = 0; - recv_wqe->dma.sge_offset = 0; - - queue_advance_producer(rq->queue, QUEUE_TYPE_FROM_ULP); + err = rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE); + if (err) + rxe_err_ah(ah, "cleanup failed, err = %d", err); return 0; } +/* srq */ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct ib_udata *udata) { - int err; struct rxe_dev *rxe = to_rdev(ibsrq->device); struct rxe_pd *pd = to_rpd(ibsrq->pd); struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; + int err, cleanup_err; if (udata) { - if (udata->outlen < sizeof(*uresp)) - return -EINVAL; + if (udata->outlen < sizeof(*uresp)) { + err = -EINVAL; + rxe_err_dev(rxe, "malformed udata"); + goto err_out; + } uresp = udata->outbuf; } - if (init->srq_type != IB_SRQT_BASIC) - return -EOPNOTSUPP; + if (init->srq_type != IB_SRQT_BASIC) { + err = -EOPNOTSUPP; + rxe_dbg_dev(rxe, "srq type = %d, not supported", + init->srq_type); + goto err_out; + } err = rxe_srq_chk_init(rxe, init); - if (err) - return err; + if (err) { + rxe_dbg_dev(rxe, "invalid init attributes"); + goto err_out; + } err = rxe_add_to_pool(&rxe->srq_pool, srq); - if (err) - return err; + if (err) { + rxe_dbg_dev(rxe, "unable to create srq, err = %d", err); + goto err_out; + } rxe_get(pd); srq->pd = pd; err = rxe_srq_from_init(rxe, srq, init, udata, uresp); - if (err) + if (err) { + rxe_dbg_srq(srq, "create srq failed, err = %d", err); goto err_cleanup; + } return 0; err_cleanup: - rxe_cleanup(srq); - + cleanup_err = rxe_cleanup(srq); + if (cleanup_err) + rxe_err_srq(srq, "cleanup failed, err = %d", cleanup_err); +err_out: + rxe_err_dev(rxe, "returned err = %d", err); return err; } @@ -318,46 +425,64 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct ib_udata *udata) { - int err; struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_dev *rxe = to_rdev(ibsrq->device); - struct rxe_modify_srq_cmd ucmd = {}; + struct rxe_modify_srq_cmd cmd = {}; + int err; if (udata) { - if (udata->inlen < sizeof(ucmd)) - return -EINVAL; + if (udata->inlen < sizeof(cmd)) { + err = -EINVAL; + rxe_dbg_srq(srq, "malformed udata"); + goto err_out; + } - err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); - if (err) - return err; + err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); + if (err) { + err = -EFAULT; + rxe_dbg_srq(srq, "unable to read udata"); + goto err_out; + } } err = rxe_srq_chk_attr(rxe, srq, attr, mask); - if (err) - return err; + if (err) { + rxe_dbg_srq(srq, "bad init attributes"); + goto err_out; + } + + err = rxe_srq_from_attr(rxe, srq, attr, mask, &cmd, udata); + if (err) { + rxe_dbg_srq(srq, "bad attr"); + goto err_out; + } + + return 0; - return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); +err_out: + rxe_err_srq(srq, "returned err = %d", err); + return err; } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { struct rxe_srq *srq = to_rsrq(ibsrq); + int err; - if (srq->error) - return -EINVAL; + if (srq->error) { + err = -EINVAL; + rxe_dbg_srq(srq, "srq in error state"); + goto err_out; + } attr->max_wr = srq->rq.queue->buf->index_mask; attr->max_sge = srq->rq.max_sge; attr->srq_limit = srq->limit; return 0; -} -static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) -{ - struct rxe_srq *srq = to_rsrq(ibsrq); - - rxe_cleanup(srq); - return 0; +err_out: + rxe_err_srq(srq, "returned err = %d", err); + return err; } static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, @@ -378,76 +503,116 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&srq->rq.producer_lock, flags); - if (err) + if (err) { *bad_wr = wr; + rxe_err_srq(srq, "returned err = %d", err); + } return err; } +static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +{ + struct rxe_srq *srq = to_rsrq(ibsrq); + int err; + + err = rxe_cleanup(srq); + if (err) + rxe_err_srq(srq, "cleanup failed, err = %d", err); + + return 0; +} + +/* qp */ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, struct ib_udata *udata) { - int err; struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_pd *pd = to_rpd(ibqp->pd); struct rxe_qp *qp = to_rqp(ibqp); struct rxe_create_qp_resp __user *uresp = NULL; + int err, cleanup_err; if (udata) { - if (udata->outlen < sizeof(*uresp)) - return -EINVAL; - uresp = udata->outbuf; - } - - if (init->create_flags) - return -EOPNOTSUPP; - - err = rxe_qp_chk_init(rxe, init); - if (err) - return err; + if (udata->inlen) { + err = -EINVAL; + rxe_dbg_dev(rxe, "malformed udata, err = %d", err); + goto err_out; + } - if (udata) { - if (udata->inlen) - return -EINVAL; + if (udata->outlen < sizeof(*uresp)) { + err = -EINVAL; + rxe_dbg_dev(rxe, "malformed udata, err = %d", err); + goto err_out; + } qp->is_user = true; + uresp = udata->outbuf; } else { qp->is_user = false; } + if (init->create_flags) { + err = -EOPNOTSUPP; + rxe_dbg_dev(rxe, "unsupported create_flags, err = %d", err); + goto err_out; + } + + err = rxe_qp_chk_init(rxe, init); + if (err) { + rxe_dbg_dev(rxe, "bad init attr, err = %d", err); + goto err_out; + } + err = rxe_add_to_pool(&rxe->qp_pool, qp); - if (err) - return err; + if (err) { + rxe_dbg_dev(rxe, "unable to create qp, err = %d", err); + goto err_out; + } err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata); - if (err) - goto qp_init; + if (err) { + rxe_dbg_qp(qp, "create qp failed, err = %d", err); + goto err_cleanup; + } rxe_finalize(qp); return 0; -qp_init: - rxe_cleanup(qp); +err_cleanup: + cleanup_err = rxe_cleanup(qp); + if (cleanup_err) + rxe_err_qp(qp, "cleanup failed, err = %d", cleanup_err); +err_out: + rxe_err_dev(rxe, "returned err = %d", err); return err; } static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata) { - int err; struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); + int err; - if (mask & ~IB_QP_ATTR_STANDARD_BITS) - return -EOPNOTSUPP; + if (mask & ~IB_QP_ATTR_STANDARD_BITS) { + err = -EOPNOTSUPP; + rxe_dbg_qp(qp, "unsupported mask = 0x%x, err = %d", + mask, err); + goto err_out; + } err = rxe_qp_chk_attr(rxe, qp, attr, mask); - if (err) - return err; + if (err) { + rxe_dbg_qp(qp, "bad mask/attr, err = %d", err); + goto err_out; + } err = rxe_qp_from_attr(qp, attr, mask, udata); - if (err) - return err; + if (err) { + rxe_dbg_qp(qp, "modify qp failed, err = %d", err); + goto err_out; + } if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH)) qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, @@ -455,6 +620,10 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->attr.dest_qp_num); return 0; + +err_out: + rxe_err_qp(qp, "returned err = %d", err); + return err; } static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -471,41 +640,90 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); - int ret; + int err; - ret = rxe_qp_chk_destroy(qp); - if (ret) - return ret; + err = rxe_qp_chk_destroy(qp); + if (err) { + rxe_dbg_qp(qp, "unable to destroy qp, err = %d", err); + goto err_out; + } + + err = rxe_cleanup(qp); + if (err) + rxe_err_qp(qp, "cleanup failed, err = %d", err); - rxe_cleanup(qp); return 0; + +err_out: + rxe_err_qp(qp, "returned err = %d", err); + return err; } +/* send wr */ + +/* sanity check incoming send work request */ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, - unsigned int mask, unsigned int length) + unsigned int *maskp, unsigned int *lengthp) { int num_sge = ibwr->num_sge; struct rxe_sq *sq = &qp->sq; + unsigned int mask = 0; + unsigned long length = 0; + int err = -EINVAL; + int i; - if (unlikely(num_sge > sq->max_sge)) - return -EINVAL; + do { + mask = wr_opcode_mask(ibwr->opcode, qp); + if (!mask) { + rxe_err_qp(qp, "bad wr opcode for qp type"); + break; + } - if (unlikely(mask & WR_ATOMIC_MASK)) { - if (length < 8) - return -EINVAL; + if (num_sge > sq->max_sge) { + rxe_err_qp(qp, "num_sge > max_sge"); + break; + } - if (atomic_wr(ibwr)->remote_addr & 0x7) - return -EINVAL; - } + length = 0; + for (i = 0; i < ibwr->num_sge; i++) + length += ibwr->sg_list[i].length; - if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && - (length > sq->max_inline))) - return -EINVAL; + if (length > (1UL << 31)) { + rxe_err_qp(qp, "message length too long"); + break; + } - return 0; + if (mask & WR_ATOMIC_MASK) { + if (length != 8) { + rxe_err_qp(qp, "atomic length != 8"); + break; + } + if (atomic_wr(ibwr)->remote_addr & 0x7) { + rxe_err_qp(qp, "misaligned atomic address"); + break; + } + } + if (ibwr->send_flags & IB_SEND_INLINE) { + if (!(mask & WR_INLINE_MASK)) { + rxe_err_qp(qp, "opcode doesn't support inline data"); + break; + } + if (length > sq->max_inline) { + rxe_err_qp(qp, "inline length too big"); + break; + } + } + + err = 0; + } while (0); + + *maskp = mask; + *lengthp = (int)length; + + return err; } -static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, +static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; @@ -521,8 +739,18 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, wr->wr.ud.ah_num = to_rah(ibah)->ah_num; if (qp_type(qp) == IB_QPT_GSI) wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; - if (wr->opcode == IB_WR_SEND_WITH_IMM) + + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: wr->ex.imm_data = ibwr->ex.imm_data; + break; + case IB_WR_SEND: + break; + default: + rxe_err_qp(qp, "bad wr opcode %d for UD/GSI QP", + wr->opcode); + return -EINVAL; + } } else { switch (wr->opcode) { case IB_WR_RDMA_WRITE_WITH_IMM: @@ -539,6 +767,11 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, case IB_WR_SEND_WITH_INV: wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; break; + case IB_WR_RDMA_READ_WITH_INV: + wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; + wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; + wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; + break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: wr->wr.atomic.remote_addr = @@ -550,16 +783,26 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, break; case IB_WR_LOCAL_INV: wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; - break; + break; case IB_WR_REG_MR: wr->wr.reg.mr = reg_wr(ibwr)->mr; wr->wr.reg.key = reg_wr(ibwr)->key; wr->wr.reg.access = reg_wr(ibwr)->access; - break; + break; + case IB_WR_SEND: + case IB_WR_BIND_MW: + case IB_WR_FLUSH: + case IB_WR_ATOMIC_WRITE: + break; default: + rxe_err_qp(qp, "unsupported wr opcode %d", + wr->opcode); + return -EINVAL; break; } } + + return 0; } static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, @@ -570,24 +813,27 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, int i; for (i = 0; i < ibwr->num_sge; i++, sge++) { - memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); + memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length); p += sge->length; } } -static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, +static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { int num_sge = ibwr->num_sge; + int err; - init_send_wr(qp, &wqe->wr, ibwr); + err = init_send_wr(qp, &wqe->wr, ibwr); + if (err) + return err; /* local operation */ if (unlikely(mask & WR_LOCAL_OP_MASK)) { wqe->mask = mask; wqe->state = wqe_state_posted; - return; + return 0; } if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) @@ -606,82 +852,62 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, wqe->dma.sge_offset = 0; wqe->state = wqe_state_posted; wqe->ssn = atomic_add_return(1, &qp->ssn); + + return 0; } -static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, - unsigned int mask, u32 length) +static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr) { int err; struct rxe_sq *sq = &qp->sq; struct rxe_send_wqe *send_wqe; - unsigned long flags; + unsigned int mask; + unsigned int length; int full; - err = validate_send_wr(qp, ibwr, mask, length); + err = validate_send_wr(qp, ibwr, &mask, &length); if (err) return err; - spin_lock_irqsave(&qp->sq.sq_lock, flags); - full = queue_full(sq->queue, QUEUE_TYPE_FROM_ULP); - if (unlikely(full)) { - spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + rxe_err_qp(qp, "send queue full"); return -ENOMEM; } send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_FROM_ULP); - init_send_wqe(qp, ibwr, mask, length, send_wqe); + err = init_send_wqe(qp, ibwr, mask, length, send_wqe); + if (!err) + queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP); - queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP); - - spin_unlock_irqrestore(&qp->sq.sq_lock, flags); - - return 0; + return err; } -static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, +static int rxe_post_send_kernel(struct rxe_qp *qp, + const struct ib_send_wr *ibwr, const struct ib_send_wr **bad_wr) { int err = 0; - unsigned int mask; - unsigned int length = 0; - int i; - struct ib_send_wr *next; - - while (wr) { - mask = wr_opcode_mask(wr->opcode, qp); - if (unlikely(!mask)) { - err = -EINVAL; - *bad_wr = wr; - break; - } - - if (unlikely((wr->send_flags & IB_SEND_INLINE) && - !(mask & WR_INLINE_MASK))) { - err = -EINVAL; - *bad_wr = wr; - break; - } - - next = wr->next; - - length = 0; - for (i = 0; i < wr->num_sge; i++) - length += wr->sg_list[i].length; - - err = post_one_send(qp, wr, mask, length); + unsigned long flags; + spin_lock_irqsave(&qp->sq.sq_lock, flags); + while (ibwr) { + err = post_one_send(qp, ibwr); if (err) { - *bad_wr = wr; + *bad_wr = ibwr; break; } - wr = next; + ibwr = ibwr->next; } + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); - rxe_sched_task(&qp->req.task); - if (unlikely(qp->req.state == QP_STATE_ERROR)) + if (!err) + rxe_sched_task(&qp->req.task); + + spin_lock_bh(&qp->state_lock); + if (qp_state(qp) == IB_QPS_ERR) rxe_sched_task(&qp->comp.task); + spin_unlock_bh(&qp->state_lock); return err; } @@ -690,23 +916,88 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct rxe_qp *qp = to_rqp(ibqp); + int err; - if (unlikely(!qp->valid)) { - *bad_wr = wr; + spin_lock_bh(&qp->state_lock); + /* caller has already called destroy_qp */ + if (WARN_ON_ONCE(!qp->valid)) { + spin_unlock_bh(&qp->state_lock); + rxe_err_qp(qp, "qp has been destroyed"); return -EINVAL; } - if (unlikely(qp->req.state < QP_STATE_READY)) { + if (unlikely(qp_state(qp) < IB_QPS_RTS)) { + spin_unlock_bh(&qp->state_lock); *bad_wr = wr; + rxe_err_qp(qp, "qp not ready to send"); return -EINVAL; } + spin_unlock_bh(&qp->state_lock); if (qp->is_user) { /* Utilize process context to do protocol processing */ rxe_run_task(&qp->req.task); - return 0; - } else - return rxe_post_send_kernel(qp, wr, bad_wr); + } else { + err = rxe_post_send_kernel(qp, wr, bad_wr); + if (err) + return err; + } + + return 0; +} + +/* recv wr */ +static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) +{ + int i; + unsigned long length; + struct rxe_recv_wqe *recv_wqe; + int num_sge = ibwr->num_sge; + int full; + int err; + + full = queue_full(rq->queue, QUEUE_TYPE_FROM_ULP); + if (unlikely(full)) { + err = -ENOMEM; + rxe_dbg("queue full"); + goto err_out; + } + + if (unlikely(num_sge > rq->max_sge)) { + err = -EINVAL; + rxe_dbg("bad num_sge > max_sge"); + goto err_out; + } + + length = 0; + for (i = 0; i < num_sge; i++) + length += ibwr->sg_list[i].length; + + /* IBA max message size is 2^31 */ + if (length >= (1UL<<31)) { + err = -EINVAL; + rxe_dbg("message length too long"); + goto err_out; + } + + recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_FROM_ULP); + + recv_wqe->wr_id = ibwr->wr_id; + recv_wqe->dma.length = length; + recv_wqe->dma.resid = length; + recv_wqe->dma.num_sge = num_sge; + recv_wqe->dma.cur_sge = 0; + recv_wqe->dma.sge_offset = 0; + memcpy(recv_wqe->dma.sge, ibwr->sg_list, + num_sge * sizeof(struct ib_sge)); + + queue_advance_producer(rq->queue, QUEUE_TYPE_FROM_ULP); + + return 0; + +err_out: + rxe_dbg("returned err = %d", err); + return err; } static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, @@ -717,13 +1008,26 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, struct rxe_rq *rq = &qp->rq; unsigned long flags; - if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { + spin_lock_bh(&qp->state_lock); + /* caller has already called destroy_qp */ + if (WARN_ON_ONCE(!qp->valid)) { + spin_unlock_bh(&qp->state_lock); + rxe_err_qp(qp, "qp has been destroyed"); + return -EINVAL; + } + + /* see C10-97.2.1 */ + if (unlikely((qp_state(qp) < IB_QPS_INIT))) { + spin_unlock_bh(&qp->state_lock); *bad_wr = wr; + rxe_dbg_qp(qp, "qp not ready to post recv"); return -EINVAL; } + spin_unlock_bh(&qp->state_lock); if (unlikely(qp->srq)) { *bad_wr = wr; + rxe_dbg_qp(qp, "qp has srq, use post_srq_recv instead"); return -EINVAL; } @@ -740,76 +1044,102 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&rq->producer_lock, flags); - if (qp->resp.state == QP_STATE_ERROR) + spin_lock_bh(&qp->state_lock); + if (qp_state(qp) == IB_QPS_ERR) rxe_sched_task(&qp->resp.task); + spin_unlock_bh(&qp->state_lock); return err; } +/* cq */ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { - int err; struct ib_device *dev = ibcq->device; struct rxe_dev *rxe = to_rdev(dev); struct rxe_cq *cq = to_rcq(ibcq); struct rxe_create_cq_resp __user *uresp = NULL; + int err, cleanup_err; if (udata) { - if (udata->outlen < sizeof(*uresp)) - return -EINVAL; + if (udata->outlen < sizeof(*uresp)) { + err = -EINVAL; + rxe_dbg_dev(rxe, "malformed udata, err = %d", err); + goto err_out; + } uresp = udata->outbuf; } - if (attr->flags) - return -EOPNOTSUPP; + if (attr->flags) { + err = -EOPNOTSUPP; + rxe_dbg_dev(rxe, "bad attr->flags, err = %d", err); + goto err_out; + } err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); - if (err) - return err; + if (err) { + rxe_dbg_dev(rxe, "bad init attributes, err = %d", err); + goto err_out; + } + + err = rxe_add_to_pool(&rxe->cq_pool, cq); + if (err) { + rxe_dbg_dev(rxe, "unable to create cq, err = %d", err); + goto err_out; + } err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata, uresp); - if (err) - return err; - - return rxe_add_to_pool(&rxe->cq_pool, cq); -} - -static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) -{ - struct rxe_cq *cq = to_rcq(ibcq); - - /* See IBA C11-17: The CI shall return an error if this Verb is - * invoked while a Work Queue is still associated with the CQ. - */ - if (atomic_read(&cq->num_wq)) - return -EINVAL; - - rxe_cq_disable(cq); + if (err) { + rxe_dbg_cq(cq, "create cq failed, err = %d", err); + goto err_cleanup; + } - rxe_cleanup(cq); return 0; + +err_cleanup: + cleanup_err = rxe_cleanup(cq); + if (cleanup_err) + rxe_err_cq(cq, "cleanup failed, err = %d", cleanup_err); +err_out: + rxe_err_dev(rxe, "returned err = %d", err); + return err; } static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { - int err; struct rxe_cq *cq = to_rcq(ibcq); struct rxe_dev *rxe = to_rdev(ibcq->device); struct rxe_resize_cq_resp __user *uresp = NULL; + int err; if (udata) { - if (udata->outlen < sizeof(*uresp)) - return -EINVAL; + if (udata->outlen < sizeof(*uresp)) { + err = -EINVAL; + rxe_dbg_cq(cq, "malformed udata"); + goto err_out; + } uresp = udata->outbuf; } err = rxe_cq_chk_attr(rxe, cq, cqe, 0); - if (err) - return err; + if (err) { + rxe_dbg_cq(cq, "bad attr, err = %d", err); + goto err_out; + } - return rxe_cq_resize_queue(cq, cqe, uresp, udata); + err = rxe_cq_resize_queue(cq, cqe, uresp, udata); + if (err) { + rxe_dbg_cq(cq, "resize cq failed, err = %d", err); + goto err_out; + } + + return 0; + +err_out: + rxe_err_cq(cq, "returned err = %d", err); + return err; } static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) @@ -823,7 +1153,7 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) for (i = 0; i < num_entries; i++) { cqe = queue_head(cq->queue, QUEUE_TYPE_TO_ULP); if (!cqe) - break; + break; /* queue empty */ memcpy(wc++, &cqe->ibwc, sizeof(*wc)); queue_advance_consumer(cq->queue, QUEUE_TYPE_TO_ULP); @@ -864,6 +1194,32 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) return ret; } +static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct rxe_cq *cq = to_rcq(ibcq); + int err; + + /* See IBA C11-17: The CI shall return an error if this Verb is + * invoked while a Work Queue is still associated with the CQ. + */ + if (atomic_read(&cq->num_wq)) { + err = -EINVAL; + rxe_dbg_cq(cq, "still in use"); + goto err_out; + } + + err = rxe_cleanup(cq); + if (err) + rxe_err_cq(cq, "cleanup failed, err = %d", err); + + return 0; + +err_out: + rxe_err_cq(cq, "returned err = %d", err); + return err; +} + +/* mr */ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) { struct rxe_dev *rxe = to_rdev(ibpd->device); @@ -872,14 +1228,14 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; - goto err_out; - } + if (!mr) + return ERR_PTR(-ENOMEM); err = rxe_add_to_pool(&rxe->mr_pool, mr); - if (err) + if (err) { + rxe_dbg_dev(rxe, "unable to create mr"); goto err_free; + } rxe_get(pd); mr->ibmr.pd = ibpd; @@ -891,47 +1247,49 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) err_free: kfree(mr); -err_out: + rxe_err_pd(pd, "returned err = %d", err); return ERR_PTR(err); } -static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, - u64 start, - u64 length, - u64 iova, - int access, struct ib_udata *udata) +static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start, + u64 length, u64 iova, int access, + struct ib_udata *udata) { - int err; struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_mr *mr; + int err, cleanup_err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; - goto err_out; - } + if (!mr) + return ERR_PTR(-ENOMEM); err = rxe_add_to_pool(&rxe->mr_pool, mr); - if (err) + if (err) { + rxe_dbg_pd(pd, "unable to create mr"); goto err_free; + } rxe_get(pd); mr->ibmr.pd = ibpd; mr->ibmr.device = ibpd->device; err = rxe_mr_init_user(rxe, start, length, iova, access, mr); - if (err) + if (err) { + rxe_dbg_mr(mr, "reg_user_mr failed, err = %d", err); goto err_cleanup; + } rxe_finalize(mr); return &mr->ibmr; err_cleanup: - rxe_cleanup(mr); + cleanup_err = rxe_cleanup(mr); + if (cleanup_err) + rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err); err_free: kfree(mr); -err_out: + rxe_err_pd(pd, "returned err = %d", err); return ERR_PTR(err); } @@ -941,17 +1299,19 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_mr *mr; - int err; + int err, cleanup_err; - if (mr_type != IB_MR_TYPE_MEM_REG) - return ERR_PTR(-EINVAL); - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; + if (mr_type != IB_MR_TYPE_MEM_REG) { + err = -EINVAL; + rxe_dbg_pd(pd, "mr type %d not supported, err = %d", + mr_type, err); goto err_out; } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + err = rxe_add_to_pool(&rxe->mr_pool, mr); if (err) goto err_free; @@ -961,20 +1321,49 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, mr->ibmr.device = ibpd->device; err = rxe_mr_init_fast(max_num_sg, mr); - if (err) + if (err) { + rxe_dbg_mr(mr, "alloc_mr failed, err = %d", err); goto err_cleanup; + } rxe_finalize(mr); return &mr->ibmr; err_cleanup: - rxe_cleanup(mr); + cleanup_err = rxe_cleanup(mr); + if (cleanup_err) + rxe_err_mr(mr, "cleanup failed, err = %d", err); err_free: kfree(mr); err_out: + rxe_err_pd(pd, "returned err = %d", err); return ERR_PTR(err); } +static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct rxe_mr *mr = to_rmr(ibmr); + int err, cleanup_err; + + /* See IBA 10.6.7.2.6 */ + if (atomic_read(&mr->num_mw) > 0) { + err = -EINVAL; + rxe_dbg_mr(mr, "mr has mw's bound"); + goto err_out; + } + + cleanup_err = rxe_cleanup(mr); + if (cleanup_err) + rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err); + + kfree_rcu(mr); + return 0; + +err_out: + rxe_err_mr(mr, "returned err = %d", err); + return err; +} + static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { @@ -1095,7 +1484,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) err = ib_register_device(dev, ibdev_name, NULL); if (err) - rxe_dbg(rxe, "failed with error %d\n", err); + rxe_dbg_dev(rxe, "failed with error %d\n", err); /* * Note that rxe may be invalid at this point if another thread diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index c269ae2a3224..26a20f088692 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -63,9 +63,7 @@ struct rxe_cq { struct rxe_queue *queue; spinlock_t cq_lock; u8 notify; - bool is_dying; bool is_user; - struct tasklet_struct comp_task; atomic_t num_wq; }; @@ -104,17 +102,7 @@ struct rxe_srq { int error; }; -enum rxe_qp_state { - QP_STATE_RESET, - QP_STATE_INIT, - QP_STATE_READY, - QP_STATE_DRAIN, /* req only */ - QP_STATE_DRAINED, /* req only */ - QP_STATE_ERROR -}; - struct rxe_req_info { - enum rxe_qp_state state; int wqe_index; u32 psn; int opcode; @@ -129,7 +117,6 @@ struct rxe_req_info { }; struct rxe_comp_info { - enum rxe_qp_state state; u32 psn; int opcode; int timeout; @@ -175,7 +162,6 @@ struct resp_res { }; struct rxe_resp_info { - enum rxe_qp_state state; u32 msn; u32 psn; u32 ack_psn; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index dacc174604bf..65b5cda5457b 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -437,9 +437,6 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, dev_dbg(&netdev->dev, "siw: event %lu\n", event); - if (dev_net(netdev) != &init_net) - return NOTIFY_OK; - base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW); if (!base_dev) return NOTIFY_OK; diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index fd721cc19682..58bbf738e4e5 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -139,7 +139,7 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, break; bytes = min(bytes, len); - if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) == + if (siw_rx_kva(srx, ib_virt_dma_to_ptr(buf_addr), bytes) == bytes) { copied += bytes; offset += bytes; @@ -487,7 +487,7 @@ int siw_proc_send(struct siw_qp *qp) mem_p = *mem; if (mem_p->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(uintptr_t)(sge->laddr + frx->sge_off), + ib_virt_dma_to_ptr(sge->laddr + frx->sge_off), sge_bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, @@ -852,7 +852,7 @@ int siw_proc_rresp(struct siw_qp *qp) if (mem_p->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(uintptr_t)(sge->laddr + wqe->processed), + ib_virt_dma_to_ptr(sge->laddr + wqe->processed), bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 05052b49107f..4b292e0504f1 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); if (paddr) - return virt_to_page((void *)(uintptr_t)paddr); + return ib_virt_dma_to_page(paddr); return NULL; } @@ -56,8 +56,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) if (!mem->mem_obj) { /* Kernel client using kva */ - memcpy(paddr, - (const void *)(uintptr_t)sge->laddr, bytes); + memcpy(paddr, ib_virt_dma_to_ptr(sge->laddr), bytes); } else if (c_tx->in_syscall) { if (copy_from_user(paddr, u64_to_user_ptr(sge->laddr), bytes)) @@ -477,7 +476,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) * or memory region with assigned kernel buffer */ iov[seg].iov_base = - (void *)(uintptr_t)(sge->laddr + sge_off); + ib_virt_dma_to_ptr(sge->laddr + sge_off); iov[seg].iov_len = sge_len; if (do_crc) @@ -537,19 +536,13 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) * Cast to an uintptr_t to preserve all 64 bits * in sge->laddr. */ - uintptr_t va = (uintptr_t)(sge->laddr + sge_off); + u64 va = sge->laddr + sge_off; - /* - * virt_to_page() takes a (void *) pointer - * so cast to a (void *) meaning it will be 64 - * bits on a 64 bit platform and 32 bits on a - * 32 bit platform. - */ - page_array[seg] = virt_to_page((void *)(va & PAGE_MASK)); + page_array[seg] = ib_virt_dma_to_page(va); if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, - (void *)va, + ib_virt_dma_to_ptr(va), plen); } @@ -558,7 +551,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) data_len -= plen; fp_off = 0; - if (++seg > (int)MAX_ARRAY) { + if (++seg >= (int)MAX_ARRAY) { siw_dbg_qp(tx_qp(c_tx), "to many fragments\n"); siw_unmap_pages(iov, kmap_mask, seg-1); wqe->processed -= c_tx->bytes_unsent; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 906fde1a2a0d..398ec13db624 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -660,7 +660,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, bytes = -EINVAL; break; } - memcpy(kbuf, (void *)(uintptr_t)core_sge->addr, + memcpy(kbuf, ib_virt_dma_to_ptr(core_sge->addr), core_sge->length); kbuf += core_sge->length; @@ -1523,7 +1523,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, } siw_dbg_mem(mem, "sge[%d], size %u, addr 0x%p, total %lu\n", - i, pble->size, (void *)(uintptr_t)pble->addr, + i, pble->size, ib_virt_dma_to_ptr(pble->addr), pbl_size); } rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); |