summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 21:01:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 21:01:17 -0700
commit7ede5f78a0d74b574791c7eb0e2ca6e54b80c93c (patch)
tree25801e614cc8fdf3569d6deb8af8d9ec3a380703 /drivers/infiniband/sw
parent31929ae00890d921618b0b449722dcdf4a4416cc (diff)
parent5f004bcaee4cb552cf1b46a505f18f08777db7e5 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "This cycle saw a focus on rxe and bnxt_re drivers: - Code cleanups for irdma, rxe, rtrs, hns, vmw_pvrdma - rxe uses workqueues instead of tasklets - rxe has better compliance around access checks for MRs and rereg_mr - mana supportst he 'v2' FW interface for RX coalescing - hfi1 bug fix for stale cache entries in its MR cache - mlx5 buf fix to handle FW failures when destroying QPs - erdma HW has a new doorbell allocation mechanism for uverbs that is secure - Lots of small cleanups and rework in bnxt_re: - Use the common mmap functions - Support disassociation - Improve FW command flow - support for 'low latency push'" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (71 commits) RDMA/bnxt_re: Fix an IS_ERR() vs NULL check RDMA/bnxt_re: Fix spelling mistake "priviledged" -> "privileged" RDMA/bnxt_re: Remove duplicated include in bnxt_re/main.c RDMA/bnxt_re: Refactor code around bnxt_qplib_map_rc() RDMA/bnxt_re: Remove incorrect return check from slow path RDMA/bnxt_re: Enable low latency push RDMA/bnxt_re: Reorg the bar mapping RDMA/bnxt_re: Move the interface version to chip context structure RDMA/bnxt_re: Query function capabilities from firmware RDMA/bnxt_re: Optimize the bnxt_re_init_hwrm_hdr usage RDMA/bnxt_re: Add disassociate ucontext support RDMA/bnxt_re: Use the common mmap helper functions RDMA/bnxt_re: Initialize opcode while sending message RDMA/cma: Remove NULL check before dev_{put, hold} RDMA/rxe: Simplify cq->notify code RDMA/rxe: Fixes mr access supported list RDMA/bnxt_re: optimize the parameters passed to helper functions RDMA/bnxt_re: remove redundant cmdq_bitmap RDMA/bnxt_re: use firmware provided max request timeout RDMA/bnxt_re: cancel all control path command waiters upon error ...
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c21
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c22
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c25
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c110
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c45
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h21
15 files changed, 198 insertions, 84 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 7a7e713de52d..54c723a6edda 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -212,10 +212,16 @@ static int __init rxe_module_init(void)
{
int err;
- err = rxe_net_init();
+ err = rxe_alloc_wq();
if (err)
return err;
+ err = rxe_net_init();
+ if (err) {
+ rxe_destroy_wq();
+ return err;
+ }
+
rdma_link_register(&rxe_link_ops);
pr_info("loaded\n");
return 0;
@@ -226,6 +232,7 @@ static void __exit rxe_module_exit(void)
rdma_link_unregister(&rxe_link_ops);
ib_unregister_driver(RDMA_DRIVER_RXE);
rxe_net_exit();
+ rxe_destroy_wq();
pr_info("unloaded\n");
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index f46c5a5fd0ae..5111735aafae 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -832,7 +832,7 @@ int rxe_completer(struct rxe_qp *qp)
}
/* A non-zero return value will cause rxe_do_task to
- * exit its loop and end the tasklet. A zero return
+ * exit its loop and end the work item. A zero return
* will continue looping and return to rxe_completer
*/
done:
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 6ca2a05b6a2a..d5486cbb3f10 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -113,10 +113,9 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
- if ((cq->notify == IB_CQ_NEXT_COMP) ||
- (cq->notify == IB_CQ_SOLICITED && solicited)) {
+ if ((cq->notify & IB_CQ_NEXT_COMP) ||
+ (cq->notify & IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
-
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 804b15e929dd..666e06a82bc9 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -31,8 +31,6 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe,
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
-void rxe_cq_disable(struct rxe_cq *cq);
-
void rxe_cq_cleanup(struct rxe_pool_elem *elem);
/* rxe_mcast.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 0e538fafcc20..f54042e9aeb2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -45,22 +45,17 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
}
}
-#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
- | IB_ACCESS_REMOTE_WRITE \
- | IB_ACCESS_REMOTE_ATOMIC)
-
static void rxe_mr_init(int access, struct rxe_mr *mr)
{
- u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
- u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
+ u32 key = mr->elem.index << 8 | rxe_get_next_key(-1);
/* set ibmr->l/rkey and also copy into private l/rkey
* for user MRs these will always be the same
* for cases where caller 'owns' the key portion
* they may be different until REG_MR WQE is executed.
*/
- mr->lkey = mr->ibmr.lkey = lkey;
- mr->rkey = mr->ibmr.rkey = rkey;
+ mr->lkey = mr->ibmr.lkey = key;
+ mr->rkey = mr->ibmr.rkey = key;
mr->access = access;
mr->ibmr.page_size = PAGE_SIZE;
@@ -195,7 +190,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
int err;
/* always allow remote access for FMRs */
- rxe_mr_init(IB_ACCESS_REMOTE, mr);
+ rxe_mr_init(RXE_ACCESS_REMOTE, mr);
err = rxe_mr_alloc(mr, max_pages);
if (err)
@@ -644,6 +639,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mr *mr;
+ int remote;
int ret;
mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
@@ -653,9 +649,10 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
goto err;
}
- if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
+ remote = mr->access & RXE_ACCESS_REMOTE;
+ if (remote ? (key != mr->rkey) : (key != mr->lkey)) {
rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
- key, (mr->rkey ? mr->rkey : mr->lkey));
+ key, (remote ? mr->rkey : mr->lkey));
ret = -EINVAL;
goto err_drop_ref;
}
@@ -715,7 +712,7 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
mr->access = access;
mr->lkey = key;
- mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
+ mr->rkey = key;
mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
mr->state = RXE_MR_STATE_VALID;
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index afa5ce1a7116..d8a43d87de93 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -48,7 +48,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
}
static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_mw *mw, struct rxe_mr *mr)
+ struct rxe_mw *mw, struct rxe_mr *mr, int access)
{
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
@@ -58,7 +58,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
/* o10-36.2.2 */
- if (unlikely((mw->access & IB_ZERO_BASED))) {
+ if (unlikely((access & IB_ZERO_BASED))) {
rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n");
return -EINVAL;
}
@@ -104,7 +104,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
/* C10-74 */
- if (unlikely((mw->access &
+ if (unlikely((access &
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
rxe_dbg_mw(mw,
@@ -113,7 +113,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
/* C10-75 */
- if (mw->access & IB_ZERO_BASED) {
+ if (access & IB_ZERO_BASED) {
if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) {
rxe_dbg_mw(mw,
"attempt to bind a ZB MW outside of the MR\n");
@@ -133,12 +133,12 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_mw *mw, struct rxe_mr *mr)
+ struct rxe_mw *mw, struct rxe_mr *mr, int access)
{
u32 key = wqe->wr.wr.mw.rkey & 0xff;
mw->rkey = (mw->rkey & ~0xff) | key;
- mw->access = wqe->wr.wr.mw.access;
+ mw->access = access;
mw->state = RXE_MW_STATE_VALID;
mw->addr = wqe->wr.wr.mw.addr;
mw->length = wqe->wr.wr.mw.length;
@@ -169,6 +169,7 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
u32 mw_rkey = wqe->wr.wr.mw.mw_rkey;
u32 mr_lkey = wqe->wr.wr.mw.mr_lkey;
+ int access = wqe->wr.wr.mw.access;
mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8);
if (unlikely(!mw)) {
@@ -196,13 +197,18 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
mr = NULL;
}
+ if (access & ~RXE_ACCESS_SUPPORTED_MW) {
+ rxe_err_mw(mw, "access %#x not supported", access);
+ return -EOPNOTSUPP;
+ }
+
spin_lock_bh(&mw->lock);
- ret = rxe_check_bind_mw(qp, wqe, mw, mr);
+ ret = rxe_check_bind_mw(qp, wqe, mw, mr, access);
if (ret)
goto err_unlock;
- rxe_do_bind_mw(qp, wqe, mw, mr);
+ rxe_do_bind_mw(qp, wqe, mw, mr, access);
err_unlock:
spin_unlock_bh(&mw->lock);
err_drop_mr:
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index cea4e0a63919..5686b691d6b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -91,6 +91,9 @@ enum rxe_hdr_mask {
RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK),
RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK),
RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK),
+ RXE_RDMA_OP_MASK = (RXE_READ_MASK | RXE_WRITE_MASK |
+ RXE_ATOMIC_WRITE_MASK | RXE_FLUSH_MASK |
+ RXE_ATOMIC_MASK),
};
#define OPCODE_NONE (-1)
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 7b41d79e72b2..d2f57ead78ad 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -112,7 +112,7 @@ enum rxe_device_param {
RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
- /* Max number of interations of each tasklet
+ /* Max number of interations of each work item
* before yielding the cpu to let other
* work make progress
*/
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index a0f206431cf8..a569b111a9d2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -392,6 +392,13 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
goto err1;
+ if (mask & IB_QP_ACCESS_FLAGS) {
+ if (!(qp_type(qp) == IB_QPT_RC || qp_type(qp) == IB_QPT_UC))
+ goto err1;
+ if (attr->qp_access_flags & ~RXE_ACCESS_SUPPORTED_QP)
+ goto err1;
+ }
+
if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr))
goto err1;
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 5fe7cbae3031..2171f19494bc 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -857,7 +857,7 @@ int rxe_requester(struct rxe_qp *qp)
update_state(qp, &pkt);
/* A non-zero return value will cause rxe_do_task to
- * exit its loop and end the tasklet. A zero return
+ * exit its loop and end the work item. A zero return
* will continue looping and return to rxe_requester
*/
done:
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index ee68306555b9..64c64f5f36a8 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -387,7 +387,10 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
}
}
- return RESPST_CHK_RKEY;
+ if (pkt->mask & RXE_RDMA_OP_MASK)
+ return RESPST_CHK_RKEY;
+ else
+ return RESPST_EXECUTE;
}
/* if the reth length field is zero we can assume nothing
@@ -434,6 +437,10 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
enum resp_states state;
int access = 0;
+ /* parse RETH or ATMETH header for first/only packets
+ * for va, length, rkey, etc. or use current value for
+ * middle/last packets.
+ */
if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (pkt->mask & RXE_RETH_MASK)
qp_resp_from_reth(qp, pkt);
@@ -454,7 +461,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
qp_resp_from_atmeth(qp, pkt);
access = IB_ACCESS_REMOTE_ATOMIC;
} else {
- return RESPST_EXECUTE;
+ /* shouldn't happen */
+ WARN_ON(1);
}
/* A zero-byte read or write op is not required to
@@ -1449,8 +1457,17 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify)
struct rxe_recv_wqe *wqe;
int err;
- if (qp->srq)
+ if (qp->srq) {
+ if (notify && qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
return;
+ }
while ((wqe = queue_head(q, q->type))) {
if (notify) {
@@ -1657,7 +1674,7 @@ int rxe_responder(struct rxe_qp *qp)
}
/* A non-zero return value will cause rxe_do_task to
- * exit its loop and end the tasklet. A zero return
+ * exit its loop and end the work item. A zero return
* will continue looping and return to rxe_responder
*/
done:
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index fb9a6bc8e620..1501120d4f52 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -6,8 +6,24 @@
#include "rxe.h"
+static struct workqueue_struct *rxe_wq;
+
+int rxe_alloc_wq(void)
+{
+ rxe_wq = alloc_workqueue("rxe_wq", WQ_UNBOUND, WQ_MAX_ACTIVE);
+ if (!rxe_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void rxe_destroy_wq(void)
+{
+ destroy_workqueue(rxe_wq);
+}
+
/* Check if task is idle i.e. not running, not scheduled in
- * tasklet queue and not draining. If so move to busy to
+ * work queue and not draining. If so move to busy to
* reserve a slot in do_task() by setting to busy and taking
* a qp reference to cover the gap from now until the task finishes.
* state will move out of busy if task returns a non zero value
@@ -21,9 +37,6 @@ static bool __reserve_if_idle(struct rxe_task *task)
{
WARN_ON(rxe_read(task->qp) <= 0);
- if (task->tasklet.state & BIT(TASKLET_STATE_SCHED))
- return false;
-
if (task->state == TASK_STATE_IDLE) {
rxe_get(task->qp);
task->state = TASK_STATE_BUSY;
@@ -38,7 +51,7 @@ static bool __reserve_if_idle(struct rxe_task *task)
}
/* check if task is idle or drained and not currently
- * scheduled in the tasklet queue. This routine is
+ * scheduled in the work queue. This routine is
* called by rxe_cleanup_task or rxe_disable_task to
* see if the queue is empty.
* Context: caller should hold task->lock.
@@ -46,7 +59,7 @@ static bool __reserve_if_idle(struct rxe_task *task)
*/
static bool __is_done(struct rxe_task *task)
{
- if (task->tasklet.state & BIT(TASKLET_STATE_SCHED))
+ if (work_pending(&task->work))
return false;
if (task->state == TASK_STATE_IDLE ||
@@ -77,23 +90,23 @@ static bool is_done(struct rxe_task *task)
* schedules the task. They must call __reserve_if_idle to
* move the task to busy before calling or scheduling.
* The task can also be moved to drained or invalid
- * by calls to rxe-cleanup_task or rxe_disable_task.
+ * by calls to rxe_cleanup_task or rxe_disable_task.
* In that case tasks which get here are not executed but
* just flushed. The tasks are designed to look to see if
- * there is work to do and do part of it before returning
+ * there is work to do and then do part of it before returning
* here with a return value of zero until all the work
- * has been consumed then it retuens a non-zero value.
+ * has been consumed then it returns a non-zero value.
* The number of times the task can be run is limited by
* max iterations so one task cannot hold the cpu forever.
+ * If the limit is hit and work remains the task is rescheduled.
*/
-static void do_task(struct tasklet_struct *t)
+static void do_task(struct rxe_task *task)
{
- int cont;
- int ret;
- struct rxe_task *task = from_tasklet(task, t, tasklet);
unsigned int iterations;
unsigned long flags;
int resched = 0;
+ int cont;
+ int ret;
WARN_ON(rxe_read(task->qp) <= 0);
@@ -115,25 +128,22 @@ static void do_task(struct tasklet_struct *t)
} while (ret == 0 && iterations-- > 0);
spin_lock_irqsave(&task->lock, flags);
+ /* we're not done yet but we ran out of iterations.
+ * yield the cpu and reschedule the task
+ */
+ if (!ret) {
+ task->state = TASK_STATE_IDLE;
+ resched = 1;
+ goto exit;
+ }
+
switch (task->state) {
case TASK_STATE_BUSY:
- if (ret) {
- task->state = TASK_STATE_IDLE;
- } else {
- /* This can happen if the client
- * can add work faster than the
- * tasklet can finish it.
- * Reschedule the tasklet and exit
- * the loop to give up the cpu
- */
- task->state = TASK_STATE_IDLE;
- resched = 1;
- }
+ task->state = TASK_STATE_IDLE;
break;
- /* someone tried to run the task since the last time we called
- * func, so we will call one more time regardless of the
- * return value
+ /* someone tried to schedule the task while we
+ * were running, keep going
*/
case TASK_STATE_ARMED:
task->state = TASK_STATE_BUSY;
@@ -141,22 +151,24 @@ static void do_task(struct tasklet_struct *t)
break;
case TASK_STATE_DRAINING:
- if (ret)
- task->state = TASK_STATE_DRAINED;
- else
- cont = 1;
+ task->state = TASK_STATE_DRAINED;
break;
default:
WARN_ON(1);
- rxe_info_qp(task->qp, "unexpected task state = %d", task->state);
+ rxe_dbg_qp(task->qp, "unexpected task state = %d",
+ task->state);
+ task->state = TASK_STATE_IDLE;
}
+exit:
if (!cont) {
task->num_done++;
if (WARN_ON(task->num_done != task->num_sched))
- rxe_err_qp(task->qp, "%ld tasks scheduled, %ld tasks done",
- task->num_sched, task->num_done);
+ rxe_dbg_qp(
+ task->qp,
+ "%ld tasks scheduled, %ld tasks done",
+ task->num_sched, task->num_done);
}
spin_unlock_irqrestore(&task->lock, flags);
} while (cont);
@@ -169,6 +181,12 @@ static void do_task(struct tasklet_struct *t)
rxe_put(task->qp);
}
+/* wrapper around do_task to fix argument for work queue */
+static void do_work(struct work_struct *work)
+{
+ do_task(container_of(work, struct rxe_task, work));
+}
+
int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
int (*func)(struct rxe_qp *))
{
@@ -176,11 +194,9 @@ int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
task->qp = qp;
task->func = func;
-
- tasklet_setup(&task->tasklet, do_task);
-
task->state = TASK_STATE_IDLE;
spin_lock_init(&task->lock);
+ INIT_WORK(&task->work, do_work);
return 0;
}
@@ -213,8 +229,6 @@ void rxe_cleanup_task(struct rxe_task *task)
while (!is_done(task))
cond_resched();
- tasklet_kill(&task->tasklet);
-
spin_lock_irqsave(&task->lock, flags);
task->state = TASK_STATE_INVALID;
spin_unlock_irqrestore(&task->lock, flags);
@@ -226,7 +240,7 @@ void rxe_cleanup_task(struct rxe_task *task)
void rxe_run_task(struct rxe_task *task)
{
unsigned long flags;
- int run;
+ bool run;
WARN_ON(rxe_read(task->qp) <= 0);
@@ -235,11 +249,11 @@ void rxe_run_task(struct rxe_task *task)
spin_unlock_irqrestore(&task->lock, flags);
if (run)
- do_task(&task->tasklet);
+ do_task(task);
}
-/* schedule the task to run later as a tasklet.
- * the tasklet)schedule call can be called holding
+/* schedule the task to run later as a work queue entry.
+ * the queue_work call can be called holding
* the lock.
*/
void rxe_sched_task(struct rxe_task *task)
@@ -250,7 +264,7 @@ void rxe_sched_task(struct rxe_task *task)
spin_lock_irqsave(&task->lock, flags);
if (__reserve_if_idle(task))
- tasklet_schedule(&task->tasklet);
+ queue_work(rxe_wq, &task->work);
spin_unlock_irqrestore(&task->lock, flags);
}
@@ -277,7 +291,9 @@ void rxe_disable_task(struct rxe_task *task)
while (!is_done(task))
cond_resched();
- tasklet_disable(&task->tasklet);
+ spin_lock_irqsave(&task->lock, flags);
+ task->state = TASK_STATE_DRAINED;
+ spin_unlock_irqrestore(&task->lock, flags);
}
void rxe_enable_task(struct rxe_task *task)
@@ -291,7 +307,7 @@ void rxe_enable_task(struct rxe_task *task)
spin_unlock_irqrestore(&task->lock, flags);
return;
}
+
task->state = TASK_STATE_IDLE;
- tasklet_enable(&task->tasklet);
spin_unlock_irqrestore(&task->lock, flags);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index facb7c8e3729..a63e258b3d66 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -22,7 +22,7 @@ enum {
* called again.
*/
struct rxe_task {
- struct tasklet_struct tasklet;
+ struct work_struct work;
int state;
spinlock_t lock;
struct rxe_qp *qp;
@@ -32,6 +32,10 @@ struct rxe_task {
long num_done;
};
+int rxe_alloc_wq(void);
+
+void rxe_destroy_wq(void);
+
/*
* init rxe_task structure
* qp => parameter to pass to func
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 83093e16b6c6..903f0b71447e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1182,9 +1182,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
unsigned long irq_flags;
spin_lock_irqsave(&cq->cq_lock, irq_flags);
- if (cq->notify != IB_CQ_NEXT_COMP)
- cq->notify = flags & IB_CQ_SOLICITED_MASK;
-
+ cq->notify |= flags & IB_CQ_SOLICITED_MASK;
empty = queue_empty(cq->queue, QUEUE_TYPE_TO_ULP);
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
@@ -1261,6 +1259,12 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
struct rxe_mr *mr;
int err, cleanup_err;
+ if (access & ~RXE_ACCESS_SUPPORTED_MR) {
+ rxe_err_pd(pd, "access = %#x not supported (%#x)", access,
+ RXE_ACCESS_SUPPORTED_MR);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -1294,6 +1298,40 @@ err_free:
return ERR_PTR(err);
}
+static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags,
+ u64 start, u64 length, u64 iova,
+ int access, struct ib_pd *ibpd,
+ struct ib_udata *udata)
+{
+ struct rxe_mr *mr = to_rmr(ibmr);
+ struct rxe_pd *old_pd = to_rpd(ibmr->pd);
+ struct rxe_pd *pd = to_rpd(ibpd);
+
+ /* for now only support the two easy cases:
+ * rereg_pd and rereg_access
+ */
+ if (flags & ~RXE_MR_REREG_SUPPORTED) {
+ rxe_err_mr(mr, "flags = %#x not supported", flags);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (flags & IB_MR_REREG_PD) {
+ rxe_put(old_pd);
+ rxe_get(pd);
+ mr->ibmr.pd = ibpd;
+ }
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ if (access & ~RXE_ACCESS_SUPPORTED_MR) {
+ rxe_err_mr(mr, "access = %#x not supported", access);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ mr->access = access;
+ }
+
+ return NULL;
+}
+
static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
@@ -1446,6 +1484,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.query_srq = rxe_query_srq,
.reg_user_mr = rxe_reg_user_mr,
.req_notify_cq = rxe_req_notify_cq,
+ .rereg_user_mr = rxe_rereg_user_mr,
.resize_cq = rxe_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 26a20f088692..ccb9d19ffe8a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -253,6 +253,22 @@ struct rxe_qp {
struct execute_work cleanup_work;
};
+enum {
+ RXE_ACCESS_REMOTE = IB_ACCESS_REMOTE_READ
+ | IB_ACCESS_REMOTE_WRITE
+ | IB_ACCESS_REMOTE_ATOMIC,
+ RXE_ACCESS_SUPPORTED_MR = RXE_ACCESS_REMOTE
+ | IB_ACCESS_LOCAL_WRITE
+ | IB_ACCESS_MW_BIND
+ | IB_ACCESS_ON_DEMAND
+ | IB_ACCESS_FLUSH_GLOBAL
+ | IB_ACCESS_FLUSH_PERSISTENT
+ | IB_ACCESS_OPTIONAL,
+ RXE_ACCESS_SUPPORTED_QP = RXE_ACCESS_SUPPORTED_MR,
+ RXE_ACCESS_SUPPORTED_MW = RXE_ACCESS_SUPPORTED_MR
+ | IB_ZERO_BASED,
+};
+
enum rxe_mr_state {
RXE_MR_STATE_INVALID,
RXE_MR_STATE_FREE,
@@ -269,6 +285,11 @@ enum rxe_mr_lookup_type {
RXE_LOOKUP_REMOTE,
};
+enum rxe_rereg {
+ RXE_MR_REREG_SUPPORTED = IB_MR_REREG_PD
+ | IB_MR_REREG_ACCESS,
+};
+
static inline int rkey_is_mw(u32 rkey)
{
u32 index = rkey >> 8;