summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c13
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rvt.h2
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig5
-rw-r--r--drivers/infiniband/sw/rxe/Makefile2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c56
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h41
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c40
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h63
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c22
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c71
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c114
-rw-r--r--drivers/infiniband/sw/rxe/rxe_odp.c420
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c58
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c96
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c54
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c40
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c76
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h58
-rw-r--r--drivers/infiniband/sw/siw/Kconfig5
-rw-r--r--drivers/infiniband/sw/siw/siw.h54
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c27
-rw-r--r--drivers/infiniband/sw/siw/siw_cq.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c83
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c28
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.h1
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c54
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c31
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c46
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c50
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h2
42 files changed, 1031 insertions, 666 deletions
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 82c3f5932249..0ca2743f1075 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -5,6 +5,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <rdma/uverbs_ioctl.h>
#include "cq.h"
#include "vt.h"
#include "trace.h"
@@ -149,15 +150,16 @@ static void send_complete(struct work_struct *work)
* rvt_create_cq - create a completion queue
* @ibcq: Allocated CQ
* @attr: creation attributes
- * @udata: user data for libibverbs.so
+ * @attrs: uverbs bundle
*
* Called by ib_create_cq() in the generic verbs code.
*
* Return: 0 on success
*/
int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index d49b6d1a26cb..4028702a7b2f 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -10,7 +10,7 @@
#include <rdma/rdmavt_cq.h>
int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 7a9afd5231d5..5ed5cfc2b280 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -348,13 +348,13 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
umem = ib_umem_get(pd->device, start, length, mr_access_flags);
if (IS_ERR(umem))
- return (void *)umem;
+ return ERR_CAST(umem);
n = ib_umem_num_pages(umem);
mr = __rvt_alloc_mr(n, pd);
if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
+ ret = ERR_CAST(mr);
goto bail_umem;
}
@@ -542,7 +542,7 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
mr = __rvt_alloc_mr(max_num_sg, pd);
if (IS_ERR(mr))
- return (struct ib_mr *)mr;
+ return ERR_CAST(mr);
return &mr->ibmr;
}
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index e6203e26cc06..583debe4b9a2 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1107,9 +1107,8 @@ int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
}
/* initialize timers needed for rc qp */
timer_setup(&qp->s_timer, rvt_rc_timeout, 0);
- hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- qp->s_rnr_timer.function = rvt_rc_rnr_retry;
+ hrtimer_setup(&qp->s_rnr_timer, rvt_rc_rnr_retry, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
/*
* Driver needs to set up it's private QP structure and do any
@@ -1298,7 +1297,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
+ timer_delete(&qp->s_timer);
}
if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
@@ -2547,7 +2546,7 @@ void rvt_stop_rc_timers(struct rvt_qp *qp)
/* Remove QP from all timers */
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
+ timer_delete(&qp->s_timer);
hrtimer_try_to_cancel(&qp->s_rnr_timer);
}
}
@@ -2576,7 +2575,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp)
*/
void rvt_del_timers_sync(struct rvt_qp *qp)
{
- del_timer_sync(&qp->s_timer);
+ timer_delete_sync(&qp->s_timer);
hrtimer_cancel(&qp->s_rnr_timer);
}
EXPORT_SYMBOL(rvt_del_timers_sync);
@@ -2597,7 +2596,7 @@ static void rvt_rc_timeout(struct timer_list *t)
qp->s_flags &= ~RVT_S_TIMER;
rvp->n_rc_timeouts++;
- del_timer(&qp->s_timer);
+ timer_delete(&qp->s_timer);
trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
if (rdi->driver_f.notify_restart_rc)
rdi->driver_f.notify_restart_rc(qp,
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index 4341965a5ea7..bdb6b9326b64 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -4,7 +4,7 @@
*/
#define RDI_DEV_ENTRY(rdi) __string(dev, rvt_get_ibdev_name(rdi))
-#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rvt_get_ibdev_name(rdi))
+#define RDI_DEV_ASSIGN(rdi) __assign_str(dev)
#include "trace_rvt.h"
#include "trace_qp.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h
index df33c2ca9710..a00489e66ddf 100644
--- a/drivers/infiniband/sw/rdmavt/trace_rvt.h
+++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h
@@ -24,7 +24,7 @@ TRACE_EVENT(rvt_dbg,
),
TP_fast_assign(
RDI_DEV_ASSIGN(rdi);
- __assign_str(msg, msg);
+ __assign_str(msg);
),
TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
);
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 06b8dc5093f7..1ed5b63f8afc 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -1,11 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
- depends on INET && PCI && INFINIBAND
+ depends on INET && PCI && INFINIBAND && 64BIT
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
- select CRYPTO
- select CRYPTO_CRC32
+ select CRC32
help
This driver implements the InfiniBand RDMA transport over
the Linux network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 5395a581f4bb..93134f1d1d0c 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -23,3 +23,5 @@ rdma_rxe-y := \
rxe_task.o \
rxe_net.o \
rxe_hw_counters.o
+
+rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 255677bc12b2..3a77d6db1720 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -31,14 +31,11 @@ void rxe_dealloc(struct ib_device *ib_dev)
WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree));
- if (rxe->tfm)
- crypto_free_shash(rxe->tfm);
-
mutex_destroy(&rxe->usdev_lock);
}
/* initialize rxe device parameters */
-static void rxe_init_device_param(struct rxe_dev *rxe)
+static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev)
{
rxe->max_inline_data = RXE_MAX_INLINE_DATA;
@@ -71,10 +68,42 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN;
rxe->attr.max_pkeys = RXE_MAX_PKEYS;
rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
+
+ if (ndev->addr_len) {
+ memcpy(rxe->raw_gid, ndev->dev_addr,
+ min_t(unsigned int, ndev->addr_len, ETH_ALEN));
+ } else {
+ /*
+ * This device does not have a HW address, but
+ * connection mangagement requires a unique gid.
+ */
+ eth_random_addr(rxe->raw_gid);
+ }
+
addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid,
- rxe->ndev->dev_addr);
+ rxe->raw_gid);
rxe->max_ucontext = RXE_MAX_UCONTEXT;
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
+
+ /* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
+ rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
+
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_RECV;
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_FLUSH;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC_WRITE;
+ }
}
/* initialize port attributes */
@@ -106,13 +135,13 @@ static void rxe_init_port_param(struct rxe_port *port)
/* initialize port state, note IB convention that HCA ports are always
* numbered from 1
*/
-static void rxe_init_ports(struct rxe_dev *rxe)
+static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev)
{
struct rxe_port *port = &rxe->port;
rxe_init_port_param(port);
addrconf_addr_eui48((unsigned char *)&port->port_guid,
- rxe->ndev->dev_addr);
+ rxe->raw_gid);
spin_lock_init(&port->port_lock);
}
@@ -130,12 +159,12 @@ static void rxe_init_pools(struct rxe_dev *rxe)
}
/* initialize rxe device state */
-static void rxe_init(struct rxe_dev *rxe)
+static void rxe_init(struct rxe_dev *rxe, struct net_device *ndev)
{
/* init default device parameters */
- rxe_init_device_param(rxe);
+ rxe_init_device_param(rxe, ndev);
- rxe_init_ports(rxe);
+ rxe_init_ports(rxe, ndev);
rxe_init_pools(rxe);
/* init pending mmap list */
@@ -167,12 +196,13 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
/* called by ifc layer to create new rxe device.
* The caller should allocate memory for rxe by calling ib_alloc_device.
*/
-int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name)
+int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name,
+ struct net_device *ndev)
{
- rxe_init(rxe);
+ rxe_init(rxe, ndev);
rxe_set_mtu(rxe, mtu);
- return rxe_register_device(rxe, ibdev_name);
+ return rxe_register_device(rxe, ibdev_name, ndev);
}
static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index d8fb2c7af30a..ff8cd53f5f28 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -21,7 +21,6 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
-#include <crypto/hash.h>
#include "rxe_net.h"
#include "rxe_opcode.h"
@@ -100,46 +99,10 @@
#define rxe_info_mw(mw, fmt, ...) ibdev_info_ratelimited((mw)->ibmw.device, \
"mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
-/* responder states */
-enum resp_states {
- RESPST_NONE,
- RESPST_GET_REQ,
- RESPST_CHK_PSN,
- RESPST_CHK_OP_SEQ,
- RESPST_CHK_OP_VALID,
- RESPST_CHK_RESOURCE,
- RESPST_CHK_LENGTH,
- RESPST_CHK_RKEY,
- RESPST_EXECUTE,
- RESPST_READ_REPLY,
- RESPST_ATOMIC_REPLY,
- RESPST_ATOMIC_WRITE_REPLY,
- RESPST_PROCESS_FLUSH,
- RESPST_COMPLETE,
- RESPST_ACKNOWLEDGE,
- RESPST_CLEANUP,
- RESPST_DUPLICATE_REQUEST,
- RESPST_ERR_MALFORMED_WQE,
- RESPST_ERR_UNSUPPORTED_OPCODE,
- RESPST_ERR_MISALIGNED_ATOMIC,
- RESPST_ERR_PSN_OUT_OF_SEQ,
- RESPST_ERR_MISSING_OPCODE_FIRST,
- RESPST_ERR_MISSING_OPCODE_LAST_C,
- RESPST_ERR_MISSING_OPCODE_LAST_D1E,
- RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
- RESPST_ERR_RNR,
- RESPST_ERR_RKEY_VIOLATION,
- RESPST_ERR_INVALIDATE_RKEY,
- RESPST_ERR_LENGTH,
- RESPST_ERR_CQ_OVERFLOW,
- RESPST_ERROR,
- RESPST_DONE,
- RESPST_EXIT,
-};
-
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
-int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);
+int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name,
+ struct net_device *ndev);
void rxe_rcv(struct sk_buff *skb);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index b78b8c0856ab..d48af2180745 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -122,25 +122,16 @@ void retransmit_timer(struct timer_list *t)
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
}
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
{
- int must_sched;
-
+ rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED);
skb_queue_tail(&qp->resp_pkts, skb);
-
- must_sched = skb_queue_len(&qp->resp_pkts) > 1;
- if (must_sched != 0)
- rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
-
- if (must_sched)
- rxe_sched_task(&qp->comp.task);
- else
- rxe_run_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
@@ -325,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ qp->req.again = 1;
}
}
return COMPST_ERROR_RETRY;
@@ -476,7 +467,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_sched_task(&qp->req.task);
+ qp->req.again = 1;
}
}
@@ -515,7 +506,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_sched_task(&qp->req.task);
+ qp->req.again = 1;
}
}
@@ -541,7 +532,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ qp->req.again = 1;
}
}
@@ -654,6 +645,8 @@ int rxe_completer(struct rxe_qp *qp)
int ret;
unsigned long flags;
+ qp->req.again = 0;
+
spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
@@ -737,7 +730,7 @@ int rxe_completer(struct rxe_qp *qp)
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ qp->req.again = 1;
}
state = COMPST_DONE;
@@ -792,7 +785,7 @@ int rxe_completer(struct rxe_qp *qp)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_sched_task(&qp->req.task);
+ qp->req.again = 1;
}
goto done;
@@ -843,8 +836,9 @@ done:
ret = 0;
goto out;
exit:
- ret = -EAGAIN;
+ ret = (qp->req.again) ? 0 : -EAGAIN;
out:
+ qp->req.again = 0;
if (pkt)
free_pkt(pkt);
return ret;
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index fec87c9030ab..fffd144d509e 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata,
cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
- if (err) {
- vfree(cq->queue->buf);
- kfree(cq->queue);
+ if (err)
return err;
- }
cq->is_user = uresp;
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index 46f82b27fcd2..1f0322491d8c 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -234,7 +234,7 @@ static inline void __bth_set_resv6a(void *arg)
{
struct rxe_bth *bth = arg;
- bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK);
+ bth->qpn &= cpu_to_be32(~BTH_RESV6A_MASK);
}
static inline int __bth_ack(void *arg)
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index a012522b577a..437917a7d8f2 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -14,7 +14,7 @@ static const struct rdma_stat_desc rxe_counter_descs[] = {
[RXE_CNT_RCV_RNR].name = "rcvd_rnr_err",
[RXE_CNT_SND_RNR].name = "send_rnr_err",
[RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err",
- [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred",
+ [RXE_CNT_SENDER_SCHED].name = "ack_deferred",
[RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err",
[RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err",
[RXE_CNT_COMP_RETRY].name = "completer_retry_err",
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index 71f4d4fa9dc8..051f9e1c3852 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -18,7 +18,7 @@ enum rxe_counters {
RXE_CNT_RCV_RNR,
RXE_CNT_SND_RNR,
RXE_CNT_RCV_SEQ_ERR,
- RXE_CNT_COMPLETER_SCHED,
+ RXE_CNT_SENDER_SCHED,
RXE_CNT_RETRY_EXCEEDED,
RXE_CNT_RNR_RETRY_EXCEEDED,
RXE_CNT_COMP_RETRY,
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index fdf5f08cd8f1..76d760fbe7ea 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -10,28 +10,6 @@
#include "rxe_loc.h"
/**
- * rxe_icrc_init() - Initialize crypto function for computing crc32
- * @rxe: rdma_rxe device object
- *
- * Return: 0 on success else an error
- */
-int rxe_icrc_init(struct rxe_dev *rxe)
-{
- struct crypto_shash *tfm;
-
- tfm = crypto_alloc_shash("crc32", 0, 0);
- if (IS_ERR(tfm)) {
- rxe_dbg_dev(rxe, "failed to init crc32 algorithm err: %ld\n",
- PTR_ERR(tfm));
- return PTR_ERR(tfm);
- }
-
- rxe->tfm = tfm;
-
- return 0;
-}
-
-/**
* rxe_crc32() - Compute cumulative crc32 for a contiguous segment
* @rxe: rdma_rxe device object
* @crc: starting crc32 value from previous segments
@@ -42,23 +20,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
*/
static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
{
- __be32 icrc;
- int err;
-
- SHASH_DESC_ON_STACK(shash, rxe->tfm);
-
- shash->tfm = rxe->tfm;
- *(__be32 *)shash_desc_ctx(shash) = crc;
- err = crypto_shash_update(shash, next, len);
- if (unlikely(err)) {
- rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err);
- return (__force __be32)crc32_le((__force u32)crc, next, len);
- }
-
- icrc = *(__be32 *)shash_desc_ctx(shash);
- barrier_data(shash_desc_ctx(shash));
-
- return icrc;
+ return (__force __be32)crc32_le((__force u32)crc, next, len);
}
/**
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 746110898a0e..876702058c84 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -58,6 +58,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
/* rxe_mr.c */
u8 rxe_get_next_key(u32 last_key);
+void rxe_mr_init(int access, struct rxe_mr *mr);
void rxe_mr_init_dma(int access, struct rxe_mr *mr);
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
int access, struct rxe_mr *mr);
@@ -69,9 +70,9 @@ int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
void *addr, int length, enum rxe_mr_copy_dir dir);
int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
-int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
- u64 compare, u64 swap_add, u64 *orig_val);
-int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value);
+enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val);
+enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value);
struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
enum rxe_mr_lookup_type type);
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
@@ -80,6 +81,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
+/* defined in rxe_mr.c; used in rxe_mr.c and rxe_odp.c */
+extern spinlock_t atomic_ops_lock;
+
/* rxe_mw.c */
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
int rxe_dealloc_mw(struct ib_mw *ibmw);
@@ -136,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp)
return IB_MTU_4096;
}
+static inline bool is_odp_mr(struct rxe_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_odp;
+}
+
void free_rd_atomic_resource(struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
@@ -164,10 +174,10 @@ void rxe_dealloc(struct ib_device *ib_dev);
int rxe_completer(struct rxe_qp *qp);
int rxe_requester(struct rxe_qp *qp);
-int rxe_responder(struct rxe_qp *qp);
+int rxe_sender(struct rxe_qp *qp);
+int rxe_receiver(struct rxe_qp *qp);
/* rxe_icrc.c */
-int rxe_icrc_init(struct rxe_dev *rxe);
int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt);
void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt);
@@ -180,4 +190,47 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
+/* rxe_odp.c */
+extern const struct mmu_interval_notifier_ops rxe_mn_ops;
+
+#if defined CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
+ u64 iova, int access_flags, struct rxe_mr *mr);
+int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
+ enum rxe_mr_copy_dir dir);
+enum resp_states rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val);
+int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
+ unsigned int length);
+enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value);
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+static inline int
+rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
+ int access_flags, struct rxe_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
+static inline int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
+ int length, enum rxe_mr_copy_dir dir)
+{
+ return -EOPNOTSUPP;
+}
+static inline enum resp_states
+rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val)
+{
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+}
+static inline int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
+ unsigned int length)
+{
+ return -EOPNOTSUPP;
+}
+static inline enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr,
+ u64 iova, u64 value)
+{
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+}
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
#endif /* RXE_LOC_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 86cc2e18a7fd..07ff47bae31d 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -31,10 +31,19 @@
static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
{
unsigned char ll_addr[ETH_ALEN];
+ struct net_device *ndev;
+ int ret;
+
+ ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+ if (!ndev)
+ return -ENODEV;
ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
- return dev_mc_add(rxe->ndev, ll_addr);
+ ret = dev_mc_add(ndev, ll_addr);
+ dev_put(ndev);
+
+ return ret;
}
/**
@@ -47,10 +56,19 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
{
unsigned char ll_addr[ETH_ALEN];
+ struct net_device *ndev;
+ int ret;
+
+ ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+ if (!ndev)
+ return -ENODEV;
ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
- return dev_mc_del(rxe->ndev, ll_addr);
+ ret = dev_mc_del(ndev, ll_addr);
+ dev_put(ndev);
+
+ return ret;
}
/**
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index da3dee520876..bcb97b3ea58a 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -45,7 +45,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
}
}
-static void rxe_mr_init(int access, struct rxe_mr *mr)
+void rxe_mr_init(int access, struct rxe_mr *mr)
{
u32 key = mr->elem.index << 8 | rxe_get_next_key(-1);
@@ -323,7 +323,10 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
return err;
}
- return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
+ if (is_odp_mr(mr))
+ return rxe_odp_mr_copy(mr, iova, addr, length, dir);
+ else
+ return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
}
/* copy data in or out of a wqe, i.e. sg list
@@ -421,7 +424,7 @@ err1:
return err;
}
-int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
+static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
{
unsigned int page_offset;
unsigned long index;
@@ -430,16 +433,6 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
int err;
u8 *va;
- /* mr must be valid even if length is zero */
- if (WARN_ON(!mr))
- return -EINVAL;
-
- if (length == 0)
- return 0;
-
- if (mr->ibmr.type == IB_MR_TYPE_DMA)
- return -EFAULT;
-
err = mr_check_range(mr, iova, length);
if (err)
return err;
@@ -451,7 +444,7 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
if (!page)
return -EFAULT;
bytes = min_t(unsigned int, length,
- mr_page_size(mr) - page_offset);
+ mr_page_size(mr) - page_offset);
va = kmap_local_page(page);
arch_wb_cache_pmem(va + page_offset, bytes);
@@ -465,11 +458,33 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
return 0;
}
+int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 start, unsigned int length)
+{
+ int err;
+
+ /* mr must be valid even if length is zero */
+ if (WARN_ON(!mr))
+ return -EINVAL;
+
+ if (length == 0)
+ return 0;
+
+ if (mr->ibmr.type == IB_MR_TYPE_DMA)
+ return -EFAULT;
+
+ if (is_odp_mr(mr))
+ err = rxe_odp_flush_pmem_iova(mr, start, length);
+ else
+ err = rxe_mr_flush_pmem_iova(mr, start, length);
+
+ return err;
+}
+
/* Guarantee atomicity of atomic operations at the machine level. */
-static DEFINE_SPINLOCK(atomic_ops_lock);
+DEFINE_SPINLOCK(atomic_ops_lock);
-int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
- u64 compare, u64 swap_add, u64 *orig_val)
+enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val)
{
unsigned int page_offset;
struct page *page;
@@ -521,23 +536,15 @@ int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
kunmap_local(va);
- return 0;
+ return RESPST_NONE;
}
-#if defined CONFIG_64BIT
-/* only implemented or called for 64 bit architectures */
-int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
+enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
{
unsigned int page_offset;
struct page *page;
u64 *va;
- /* See IBA oA19-28 */
- if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
- rxe_dbg_mr(mr, "mr not in valid state\n");
- return RESPST_ERR_RKEY_VIOLATION;
- }
-
if (mr->ibmr.type == IB_MR_TYPE_DMA) {
page_offset = iova & (PAGE_SIZE - 1);
page = ib_virt_dma_to_page(iova);
@@ -565,20 +572,12 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
}
va = kmap_local_page(page);
-
/* Do atomic write after all prior operations have completed */
smp_store_release(&va[page_offset >> 3], value);
-
kunmap_local(va);
- return 0;
-}
-#else
-int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
-{
- return RESPST_ERR_UNSUPPORTED_OPCODE;
+ return RESPST_NONE;
}
-#endif
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index cd59666158b1..132a87e52d5c 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -345,46 +345,52 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
static void rxe_skb_tx_dtor(struct sk_buff *skb)
{
- struct sock *sk = skb->sk;
- struct rxe_qp *qp = sk->sk_user_data;
- int skb_out = atomic_dec_return(&qp->skb_out);
+ struct net_device *ndev = skb->dev;
+ struct rxe_dev *rxe;
+ unsigned int qp_index;
+ struct rxe_qp *qp;
+ int skb_out;
+
+ rxe = rxe_get_dev_from_net(ndev);
+ if (!rxe && is_vlan_dev(ndev))
+ rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
+ if (WARN_ON(!rxe))
+ return;
- if (unlikely(qp->need_req_skb &&
- skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
- rxe_sched_task(&qp->req.task);
+ qp_index = (int)(uintptr_t)skb->sk->sk_user_data;
+ if (!qp_index)
+ return;
+
+ qp = rxe_pool_get_index(&rxe->qp_pool, qp_index);
+ if (!qp)
+ goto put_dev;
+
+ skb_out = atomic_dec_return(&qp->skb_out);
+ if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)
+ rxe_sched_task(&qp->send_task);
rxe_put(qp);
+put_dev:
+ ib_device_put(&rxe->ib_dev);
+ sock_put(skb->sk);
}
static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
{
int err;
+ struct sock *sk = pkt->qp->sk->sk;
+ sock_hold(sk);
+ skb->sk = sk;
skb->destructor = rxe_skb_tx_dtor;
- skb->sk = pkt->qp->sk->sk;
-
- rxe_get(pkt->qp);
atomic_inc(&pkt->qp->skb_out);
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (skb->protocol == htons(ETH_P_IP))
err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ else
err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
- } else {
- rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n",
- skb->protocol);
- atomic_dec(&pkt->qp->skb_out);
- rxe_put(pkt->qp);
- kfree_skb(skb);
- return -EINVAL;
- }
-
- if (unlikely(net_xmit_eval(err))) {
- rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
- return -EAGAIN;
- }
- return 0;
+ return err;
}
/* fix up a send packet to match the packets
@@ -392,8 +398,15 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
*/
static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
{
+ struct sock *sk = pkt->qp->sk->sk;
+
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
+ sock_hold(sk);
+ skb->sk = sk;
+ skb->destructor = rxe_skb_tx_dtor;
+ atomic_inc(&pkt->qp->skb_out);
+
if (skb->protocol == htons(ETH_P_IP))
skb_pull(skb, sizeof(struct iphdr));
else
@@ -440,12 +453,6 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
return err;
}
- if ((qp_type(qp) != IB_QPT_RC) &&
- (pkt->mask & RXE_END_MASK)) {
- pkt->wqe->state = wqe_state_done;
- rxe_sched_task(&qp->comp.task);
- }
-
rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
goto done;
@@ -517,7 +524,16 @@ out:
*/
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num)
{
- return rxe->ndev->name;
+ struct net_device *ndev;
+ char *ndev_name;
+
+ ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+ if (!ndev)
+ return NULL;
+ ndev_name = ndev->name;
+ dev_put(ndev);
+
+ return ndev_name;
}
int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
@@ -529,9 +545,9 @@ int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
if (!rxe)
return -ENOMEM;
- rxe->ndev = ndev;
+ ib_mark_name_assigned_by_user(&rxe->ib_dev);
- err = rxe_add(rxe, ndev->mtu, ibdev_name);
+ err = rxe_add(rxe, ndev->mtu, ibdev_name, ndev);
if (err) {
ib_dealloc_device(&rxe->ib_dev);
return err;
@@ -555,11 +571,6 @@ static void rxe_port_event(struct rxe_dev *rxe,
/* Caller must hold net_info_lock */
void rxe_port_up(struct rxe_dev *rxe)
{
- struct rxe_port *port;
-
- port = &rxe->port;
- port->attr.state = IB_PORT_ACTIVE;
-
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
dev_info(&rxe->ib_dev.dev, "set active\n");
}
@@ -567,11 +578,6 @@ void rxe_port_up(struct rxe_dev *rxe)
/* Caller must hold net_info_lock */
void rxe_port_down(struct rxe_dev *rxe)
{
- struct rxe_port *port;
-
- port = &rxe->port;
- port->attr.state = IB_PORT_DOWN;
-
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
dev_info(&rxe->ib_dev.dev, "set down\n");
@@ -579,10 +585,18 @@ void rxe_port_down(struct rxe_dev *rxe)
void rxe_set_port_state(struct rxe_dev *rxe)
{
- if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev))
+ struct net_device *ndev;
+
+ ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+ if (!ndev)
+ return;
+
+ if (ib_get_curr_port_state(ndev) == IB_PORT_ACTIVE)
rxe_port_up(rxe);
else
rxe_port_down(rxe);
+
+ dev_put(ndev);
}
static int rxe_notify(struct notifier_block *not_blk,
@@ -599,18 +613,14 @@ static int rxe_notify(struct notifier_block *not_blk,
case NETDEV_UNREGISTER:
ib_unregister_device_queued(&rxe->ib_dev);
break;
- case NETDEV_UP:
- rxe_port_up(rxe);
- break;
- case NETDEV_DOWN:
- rxe_port_down(rxe);
- break;
case NETDEV_CHANGEMTU:
rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
+ case NETDEV_DOWN:
case NETDEV_CHANGE:
- rxe_set_port_state(rxe);
+ if (ib_get_curr_port_state(ndev) == IB_PORT_DOWN)
+ rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
break;
case NETDEV_REBOOT:
case NETDEV_GOING_DOWN:
diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
new file mode 100644
index 000000000000..dbc5a5600eb7
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_odp.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2022-2023 Fujitsu Ltd. All rights reserved.
+ */
+
+#include <linux/hmm.h>
+#include <linux/libnvdimm.h>
+
+#include <rdma/ib_umem_odp.h>
+
+#include "rxe.h"
+
+static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct ib_umem_odp *umem_odp =
+ container_of(mni, struct ib_umem_odp, notifier);
+ unsigned long start, end;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ mutex_lock(&umem_odp->umem_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ start = max_t(u64, ib_umem_start(umem_odp), range->start);
+ end = min_t(u64, ib_umem_end(umem_odp), range->end);
+
+ /* update umem_odp->map.pfn_list */
+ ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
+
+ mutex_unlock(&umem_odp->umem_mutex);
+ return true;
+}
+
+const struct mmu_interval_notifier_ops rxe_mn_ops = {
+ .invalidate = rxe_ib_invalidate_range,
+};
+
+#define RXE_PAGEFAULT_DEFAULT 0
+#define RXE_PAGEFAULT_RDONLY BIT(0)
+#define RXE_PAGEFAULT_SNAPSHOT BIT(1)
+static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
+ u64 access_mask = 0;
+ int np;
+
+ if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
+ access_mask |= HMM_PFN_WRITE;
+
+ /*
+ * ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
+ * Callers must release the lock later to let invalidation handler
+ * do its work again.
+ */
+ np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
+ access_mask, fault);
+ return np;
+}
+
+static int rxe_odp_init_pages(struct rxe_mr *mr)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ int ret;
+
+ ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address,
+ mr->umem->length,
+ RXE_PAGEFAULT_SNAPSHOT);
+
+ if (ret >= 0)
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return ret >= 0 ? 0 : ret;
+}
+
+int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
+ u64 iova, int access_flags, struct rxe_mr *mr)
+{
+ struct ib_umem_odp *umem_odp;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return -EOPNOTSUPP;
+
+ rxe_mr_init(access_flags, mr);
+
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return -EINVAL;
+ if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return -EINVAL;
+
+ /* Never reach here, for implicit ODP is not implemented. */
+ }
+
+ umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags,
+ &rxe_mn_ops);
+ if (IS_ERR(umem_odp)) {
+ rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n",
+ (int)PTR_ERR(umem_odp));
+ return PTR_ERR(umem_odp);
+ }
+
+ umem_odp->private = mr;
+
+ mr->umem = &umem_odp->umem;
+ mr->access = access_flags;
+ mr->ibmr.length = length;
+ mr->ibmr.iova = iova;
+ mr->page_offset = ib_umem_offset(&umem_odp->umem);
+
+ err = rxe_odp_init_pages(mr);
+ if (err) {
+ ib_umem_odp_release(umem_odp);
+ return err;
+ }
+
+ mr->state = RXE_MR_STATE_VALID;
+ mr->ibmr.type = IB_MR_TYPE_USER;
+
+ return err;
+}
+
+static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp, u64 iova,
+ int length)
+{
+ bool need_fault = false;
+ u64 addr;
+ int idx;
+
+ addr = iova & (~(BIT(umem_odp->page_shift) - 1));
+
+ /* Skim through all pages that are to be accessed. */
+ while (addr < iova + length) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
+
+ if (!(umem_odp->map.pfn_list[idx] & HMM_PFN_VALID)) {
+ need_fault = true;
+ break;
+ }
+
+ addr += BIT(umem_odp->page_shift);
+ }
+ return need_fault;
+}
+
+static unsigned long rxe_odp_iova_to_index(struct ib_umem_odp *umem_odp, u64 iova)
+{
+ return (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
+}
+
+static unsigned long rxe_odp_iova_to_page_offset(struct ib_umem_odp *umem_odp, u64 iova)
+{
+ return iova & (BIT(umem_odp->page_shift) - 1);
+}
+
+static int rxe_odp_map_range_and_lock(struct rxe_mr *mr, u64 iova, int length, u32 flags)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ bool need_fault;
+ int err;
+
+ if (unlikely(length < 1))
+ return -EINVAL;
+
+ mutex_lock(&umem_odp->umem_mutex);
+
+ need_fault = rxe_check_pagefault(umem_odp, iova, length);
+ if (need_fault) {
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ /* umem_mutex is locked on success. */
+ err = rxe_odp_do_pagefault_and_lock(mr, iova, length,
+ flags);
+ if (err < 0)
+ return err;
+
+ need_fault = rxe_check_pagefault(umem_odp, iova, length);
+ if (need_fault)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int __rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
+ int length, enum rxe_mr_copy_dir dir)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ struct page *page;
+ int idx, bytes;
+ size_t offset;
+ u8 *user_va;
+
+ idx = rxe_odp_iova_to_index(umem_odp, iova);
+ offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
+
+ while (length > 0) {
+ u8 *src, *dest;
+
+ page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]);
+ user_va = kmap_local_page(page);
+ if (!user_va)
+ return -EFAULT;
+
+ src = (dir == RXE_TO_MR_OBJ) ? addr : user_va;
+ dest = (dir == RXE_TO_MR_OBJ) ? user_va : addr;
+
+ bytes = BIT(umem_odp->page_shift) - offset;
+ if (bytes > length)
+ bytes = length;
+
+ memcpy(dest, src, bytes);
+ kunmap_local(user_va);
+
+ length -= bytes;
+ idx++;
+ offset = 0;
+ }
+
+ return 0;
+}
+
+int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
+ enum rxe_mr_copy_dir dir)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ u32 flags = RXE_PAGEFAULT_DEFAULT;
+ int err;
+
+ if (length == 0)
+ return 0;
+
+ if (unlikely(!mr->umem->is_odp))
+ return -EOPNOTSUPP;
+
+ switch (dir) {
+ case RXE_TO_MR_OBJ:
+ break;
+
+ case RXE_FROM_MR_OBJ:
+ flags |= RXE_PAGEFAULT_RDONLY;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ err = rxe_odp_map_range_and_lock(mr, iova, length, flags);
+ if (err)
+ return err;
+
+ err = __rxe_odp_mr_copy(mr, iova, addr, length, dir);
+
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return err;
+}
+
+static enum resp_states rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova,
+ int opcode, u64 compare,
+ u64 swap_add, u64 *orig_val)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ unsigned int page_offset;
+ struct page *page;
+ unsigned int idx;
+ u64 value;
+ u64 *va;
+ int err;
+
+ if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
+ rxe_dbg_mr(mr, "mr not in valid state\n");
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+
+ err = mr_check_range(mr, iova, sizeof(value));
+ if (err) {
+ rxe_dbg_mr(mr, "iova out of range\n");
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+
+ idx = rxe_odp_iova_to_index(umem_odp, iova);
+ page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
+ page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]);
+ if (!page)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ if (unlikely(page_offset & 0x7)) {
+ rxe_dbg_mr(mr, "iova not aligned\n");
+ return RESPST_ERR_MISALIGNED_ATOMIC;
+ }
+
+ va = kmap_local_page(page);
+
+ spin_lock_bh(&atomic_ops_lock);
+ value = *orig_val = va[page_offset >> 3];
+
+ if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
+ if (value == compare)
+ va[page_offset >> 3] = swap_add;
+ } else {
+ value += swap_add;
+ va[page_offset >> 3] = value;
+ }
+ spin_unlock_bh(&atomic_ops_lock);
+
+ kunmap_local(va);
+
+ return RESPST_NONE;
+}
+
+enum resp_states rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ int err;
+
+ err = rxe_odp_map_range_and_lock(mr, iova, sizeof(char),
+ RXE_PAGEFAULT_DEFAULT);
+ if (err < 0)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ err = rxe_odp_do_atomic_op(mr, iova, opcode, compare, swap_add,
+ orig_val);
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return err;
+}
+
+int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
+ unsigned int length)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ unsigned int page_offset;
+ unsigned long index;
+ struct page *page;
+ unsigned int bytes;
+ int err;
+ u8 *va;
+
+ err = rxe_odp_map_range_and_lock(mr, iova, length,
+ RXE_PAGEFAULT_DEFAULT);
+ if (err)
+ return err;
+
+ while (length > 0) {
+ index = rxe_odp_iova_to_index(umem_odp, iova);
+ page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
+
+ page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
+ if (!page) {
+ mutex_unlock(&umem_odp->umem_mutex);
+ return -EFAULT;
+ }
+
+ bytes = min_t(unsigned int, length,
+ mr_page_size(mr) - page_offset);
+
+ va = kmap_local_page(page);
+ arch_wb_cache_pmem(va + page_offset, bytes);
+ kunmap_local(va);
+
+ length -= bytes;
+ iova += bytes;
+ page_offset = 0;
+ }
+
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return 0;
+}
+
+enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ unsigned int page_offset;
+ unsigned long index;
+ struct page *page;
+ int err;
+ u64 *va;
+
+ /* See IBA oA19-28 */
+ err = mr_check_range(mr, iova, sizeof(value));
+ if (unlikely(err)) {
+ rxe_dbg_mr(mr, "iova out of range\n");
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+
+ err = rxe_odp_map_range_and_lock(mr, iova, sizeof(value),
+ RXE_PAGEFAULT_DEFAULT);
+ if (err)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
+ index = rxe_odp_iova_to_index(umem_odp, iova);
+ page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
+ if (!page) {
+ mutex_unlock(&umem_odp->umem_mutex);
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+ /* See IBA A19.4.2 */
+ if (unlikely(page_offset & 0x7)) {
+ mutex_unlock(&umem_odp->umem_mutex);
+ rxe_dbg_mr(mr, "misaligned address\n");
+ return RESPST_ERR_MISALIGNED_ATOMIC;
+ }
+
+ va = kmap_local_page(page);
+ /* Do atomic write after all prior operations have completed */
+ smp_store_release(&va[page_offset >> 3], value);
+ kunmap_local(va);
+
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return RESPST_NONE;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index d2f57ead78ad..767870568372 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -53,12 +53,9 @@ enum rxe_device_param {
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_FLUSH_GLOBAL
| IB_DEVICE_FLUSH_PERSISTENT
-#ifdef CONFIG_64BIT
| IB_DEVICE_MEM_WINDOW_TYPE_2B
| IB_DEVICE_ATOMIC_WRITE,
-#else
- | IB_DEVICE_MEM_WINDOW_TYPE_2B,
-#endif /* CONFIG_64BIT */
+
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
sizeof(struct ib_sge) * RXE_MAX_SGE,
@@ -129,7 +126,7 @@ enum rxe_device_param {
enum rxe_port_param {
RXE_PORT_GID_TBL_LEN = 1024,
RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP,
- RXE_PORT_MAX_MSG_SZ = 0x800000,
+ RXE_PORT_MAX_MSG_SZ = (1UL << 31),
RXE_PORT_BAD_PKEY_CNTR = 0,
RXE_PORT_QKEY_VIOL_CNTR = 0,
RXE_PORT_LID = 0,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 6215c6de3a84..d9cb682fd71f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -119,7 +119,7 @@ void rxe_pool_cleanup(struct rxe_pool *pool)
int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
bool sleepable)
{
- int err;
+ int err = -EINVAL;
gfp_t gfp_flags;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
@@ -147,7 +147,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
err_cnt:
atomic_dec(&pool->num_elem);
- return -EINVAL;
+ return err;
}
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
@@ -178,7 +178,6 @@ int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
{
struct rxe_pool *pool = elem->pool;
struct xarray *xa = &pool->xa;
- static int timeout = RXE_POOL_TIMEOUT;
int ret, err = 0;
void *xa_ret;
@@ -202,19 +201,19 @@ int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
* return to rdma-core
*/
if (sleepable) {
- if (!completion_done(&elem->complete) && timeout) {
+ if (!completion_done(&elem->complete)) {
ret = wait_for_completion_timeout(&elem->complete,
- timeout);
+ msecs_to_jiffies(50000));
/* Shouldn't happen. There are still references to
* the object but, rather than deadlock, free the
* object or pass back to rdma-core.
*/
if (WARN_ON(!ret))
- err = -EINVAL;
+ err = -ETIMEDOUT;
}
} else {
- unsigned long until = jiffies + timeout;
+ unsigned long until = jiffies + RXE_POOL_TIMEOUT;
/* AH objects are unique in that the destroy_ah verb
* can be called in atomic context. This delay
@@ -226,7 +225,7 @@ int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
mdelay(1);
if (WARN_ON(!completion_done(&elem->complete)))
- err = -EINVAL;
+ err = -ETIMEDOUT;
}
if (pool->cleanup)
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index e3589c02013e..f2af3e0aef35 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -244,7 +244,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
if (err < 0)
return err;
- qp->sk->sk->sk_user_data = qp;
+ qp->sk->sk->sk_user_data = (void *)(uintptr_t)qp->elem.index;
/* pick a source UDP port number for this QP based on
* the source QPN. this spreads traffic for different QPs
@@ -265,8 +265,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.opcode = -1;
qp->comp.opcode = -1;
- rxe_init_task(&qp->req.task, qp, rxe_requester);
- rxe_init_task(&qp->comp.task, qp, rxe_completer);
+ rxe_init_task(&qp->send_task, qp, rxe_sender);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
if (init->qp_type == IB_QPT_RC) {
@@ -337,7 +336,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
}
- rxe_init_task(&qp->resp.task, qp, rxe_responder);
+ rxe_init_task(&qp->recv_task, qp, rxe_receiver);
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
@@ -514,14 +513,12 @@ err1:
static void rxe_qp_reset(struct rxe_qp *qp)
{
/* stop tasks from running */
- rxe_disable_task(&qp->resp.task);
- rxe_disable_task(&qp->comp.task);
- rxe_disable_task(&qp->req.task);
+ rxe_disable_task(&qp->recv_task);
+ rxe_disable_task(&qp->send_task);
/* drain work and packet queuesc */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->rq.queue)
rxe_queue_reset(qp->rq.queue);
@@ -548,9 +545,8 @@ static void rxe_qp_reset(struct rxe_qp *qp)
cleanup_rd_atomic_resources(qp);
/* reenable tasks */
- rxe_enable_task(&qp->resp.task);
- rxe_enable_task(&qp->comp.task);
- rxe_enable_task(&qp->req.task);
+ rxe_enable_task(&qp->recv_task);
+ rxe_enable_task(&qp->send_task);
}
/* move the qp to the error state */
@@ -562,9 +558,8 @@ void rxe_qp_error(struct rxe_qp *qp)
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
- rxe_sched_task(&qp->resp.task);
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->recv_task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -575,8 +570,7 @@ static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -781,6 +775,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
* Yield the processor
*/
spin_lock_irqsave(&qp->state_lock, flags);
+ attr->cur_qp_state = qp_state(qp);
if (qp->attr.sq_draining) {
spin_unlock_irqrestore(&qp->state_lock, flags);
cond_resched();
@@ -816,24 +811,25 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
spin_unlock_irqrestore(&qp->state_lock, flags);
qp->qp_timeout_jiffies = 0;
- if (qp_type(qp) == IB_QPT_RC) {
- del_timer_sync(&qp->retrans_timer);
- del_timer_sync(&qp->rnr_nak_timer);
+ /* In the function timer_setup, .function is initialized. If .function
+ * is NULL, it indicates the function timer_setup is not called, the
+ * timer is not initialized. Or else, the timer is initialized.
+ */
+ if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function &&
+ qp->rnr_nak_timer.function) {
+ timer_delete_sync(&qp->retrans_timer);
+ timer_delete_sync(&qp->rnr_nak_timer);
}
- if (qp->resp.task.func)
- rxe_cleanup_task(&qp->resp.task);
-
- if (qp->req.task.func)
- rxe_cleanup_task(&qp->req.task);
+ if (qp->recv_task.func)
+ rxe_cleanup_task(&qp->recv_task);
- if (qp->comp.task.func)
- rxe_cleanup_task(&qp->comp.task);
+ if (qp->send_task.func)
+ rxe_cleanup_task(&qp->send_task);
/* flush out any receive wr's or pending requests */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index d8c41fd626a9..9d0392df8a92 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -5,7 +5,6 @@
*/
#include <linux/skbuff.h>
-#include <crypto/hash.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -108,7 +107,7 @@ void rnr_nak_timer(struct timer_list *t)
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -424,7 +423,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
int paylen;
int solicited;
u32 qp_num;
- int ack_req;
+ int ack_req = 0;
/* length from start of bth to end of icrc */
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
@@ -445,8 +444,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
qp->attr.dest_qp_num;
- ack_req = ((pkt->mask & RXE_END_MASK) ||
- (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
+ if (qp_type(qp) != IB_QPT_UD && qp_type(qp) != IB_QPT_UC)
+ ack_req = ((pkt->mask & RXE_END_MASK) ||
+ (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
if (ack_req)
qp->req.noack_pkts = 0;
@@ -545,6 +545,8 @@ static void update_wqe_state(struct rxe_qp *qp,
if (pkt->mask & RXE_END_MASK) {
if (qp_type(qp) == IB_QPT_RC)
wqe->state = wqe_state_pending;
+ else
+ wqe->state = wqe_state_done;
} else {
wqe->state = wqe_state_processing;
}
@@ -573,30 +575,6 @@ static void update_wqe_psn(struct rxe_qp *qp,
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
}
-static void save_state(struct rxe_send_wqe *wqe,
- struct rxe_qp *qp,
- struct rxe_send_wqe *rollback_wqe,
- u32 *rollback_psn)
-{
- rollback_wqe->state = wqe->state;
- rollback_wqe->first_psn = wqe->first_psn;
- rollback_wqe->last_psn = wqe->last_psn;
- rollback_wqe->dma = wqe->dma;
- *rollback_psn = qp->req.psn;
-}
-
-static void rollback_state(struct rxe_send_wqe *wqe,
- struct rxe_qp *qp,
- struct rxe_send_wqe *rollback_wqe,
- u32 rollback_psn)
-{
- wqe->state = rollback_wqe->state;
- wqe->first_psn = rollback_wqe->first_psn;
- wqe->last_psn = rollback_wqe->last_psn;
- wqe->dma = rollback_wqe->dma;
- qp->req.psn = rollback_psn;
-}
-
static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
qp->req.opcode = pkt->opcode;
@@ -655,12 +633,6 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
wqe->status = IB_WC_SUCCESS;
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
- /* There is no ack coming for local work requests
- * which can lead to a deadlock. So go ahead and complete
- * it now.
- */
- rxe_sched_task(&qp->comp.task);
-
return 0;
}
@@ -676,8 +648,6 @@ int rxe_requester(struct rxe_qp *qp)
int opcode;
int err;
int ret;
- struct rxe_send_wqe rollback_wqe;
- u32 rollback_psn;
struct rxe_queue *q = qp->sq.queue;
struct rxe_ah *ah;
struct rxe_av *av;
@@ -692,10 +662,12 @@ int rxe_requester(struct rxe_qp *qp)
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
wqe = __req_next_wqe(qp);
spin_unlock_irqrestore(&qp->state_lock, flags);
- if (wqe)
+ if (wqe) {
+ wqe->status = IB_WC_WR_FLUSH_ERR;
goto err;
- else
+ } else {
goto exit;
+ }
}
if (unlikely(qp_state(qp) == IB_QPS_RESET)) {
@@ -786,7 +758,6 @@ int rxe_requester(struct rxe_qp *qp)
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- rxe_sched_task(&qp->comp.task);
goto done;
}
payload = mtu;
@@ -799,9 +770,6 @@ int rxe_requester(struct rxe_qp *qp)
pkt.mask = rxe_opcode[opcode].mask;
pkt.wqe = wqe;
- /* save wqe state before we build and send packet */
- save_state(wqe, qp, &rollback_wqe, &rollback_psn);
-
av = rxe_get_av(&pkt, &ah);
if (unlikely(!av)) {
rxe_dbg_qp(qp, "Failed no address vector\n");
@@ -834,31 +802,14 @@ int rxe_requester(struct rxe_qp *qp)
if (ah)
rxe_put(ah);
- /* update wqe state as though we had sent it */
- update_wqe_state(qp, wqe, &pkt);
- update_wqe_psn(qp, wqe, &pkt, payload);
-
err = rxe_xmit_packet(qp, &pkt, skb);
if (err) {
- if (err != -EAGAIN) {
- wqe->status = IB_WC_LOC_QP_OP_ERR;
- goto err;
- }
-
- /* the packet was dropped so reset wqe to the state
- * before we sent it so we can try to resend
- */
- rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
-
- /* force a delay until the dropped packet is freed and
- * the send queue is drained below the low water mark
- */
- qp->need_req_skb = 1;
-
- rxe_sched_task(&qp->req.task);
- goto exit;
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
}
+ update_wqe_state(qp, wqe, &pkt);
+ update_wqe_psn(qp, wqe, &pkt, payload);
update_state(qp, &pkt);
/* A non-zero return value will cause rxe_do_task to
@@ -878,3 +829,20 @@ exit:
out:
return ret;
}
+
+int rxe_sender(struct rxe_qp *qp)
+{
+ int req_ret;
+ int comp_ret;
+
+ /* process the send queue */
+ req_ret = rxe_requester(qp);
+
+ /* process the response queue */
+ comp_ret = rxe_completer(qp);
+
+ /* exit the task loop if both requester and completer
+ * are ready
+ */
+ return (req_ret && comp_ret) ? -EAGAIN : 0;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 963382f625d7..711f73e0bbb1 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -49,18 +49,8 @@ static char *resp_state_name[] = {
/* rxe_recv calls here to add a request packet to the input queue */
void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
{
- int must_sched;
- struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
-
skb_queue_tail(&qp->req_pkts, skb);
-
- must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
- (skb_queue_len(&qp->req_pkts) > 1);
-
- if (must_sched)
- rxe_sched_task(&qp->resp.task);
- else
- rxe_run_task(&qp->resp.task);
+ rxe_sched_task(&qp->recv_task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
@@ -351,9 +341,22 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
/*
* See IBA C9-92
* For UD QPs we only check if the packet will fit in the
- * receive buffer later. For rmda operations additional
+ * receive buffer later. For RDMA operations additional
* length checks are performed in check_rkey.
*/
+ if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) {
+ unsigned int payload = payload_size(pkt);
+ unsigned int recv_buffer_len = 0;
+ int i;
+
+ for (i = 0; i < qp->resp.wqe->dma.num_sge; i++)
+ recv_buffer_len += qp->resp.wqe->dma.sge[i].length;
+ if (payload + sizeof(union rdma_network_hdr) > recv_buffer_len) {
+ rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n");
+ return RESPST_ERR_LENGTH;
+ }
+ }
+
if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
(qp_type(qp) == IB_QPT_UC))) {
unsigned int mtu = qp->mtu;
@@ -699,10 +702,16 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
if (!res->replay) {
u64 iova = qp->resp.va + qp->resp.offset;
- err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
- atmeth_comp(pkt),
- atmeth_swap_add(pkt),
- &res->atomic.orig_val);
+ if (is_odp_mr(mr))
+ err = rxe_odp_atomic_op(mr, iova, pkt->opcode,
+ atmeth_comp(pkt),
+ atmeth_swap_add(pkt),
+ &res->atomic.orig_val);
+ else
+ err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
+ atmeth_comp(pkt),
+ atmeth_swap_add(pkt),
+ &res->atomic.orig_val);
if (err)
return err;
@@ -740,7 +749,16 @@ static enum resp_states atomic_write_reply(struct rxe_qp *qp,
value = *(u64 *)payload_addr(pkt);
iova = qp->resp.va + qp->resp.offset;
- err = rxe_mr_do_atomic_write(mr, iova, value);
+ /* See IBA oA19-28 */
+ if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
+ rxe_dbg_mr(mr, "mr not in valid state\n");
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+
+ if (is_odp_mr(mr))
+ err = rxe_odp_do_atomic_write(mr, iova, value);
+ else
+ err = rxe_mr_do_atomic_write(mr, iova, value);
if (err)
return err;
@@ -1485,7 +1503,7 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify)
qp->resp.wqe = NULL;
}
-int rxe_responder(struct rxe_qp *qp)
+int rxe_receiver(struct rxe_qp *qp)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 80332638d9e3..6f8f353e9583 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -85,17 +85,17 @@ static bool is_done(struct rxe_task *task)
/* do_task is a wrapper for the three tasks (requester,
* completer, responder) and calls them in a loop until
- * they return a non-zero value. It is called either
- * directly by rxe_run_task or indirectly if rxe_sched_task
- * schedules the task. They must call __reserve_if_idle to
- * move the task to busy before calling or scheduling.
- * The task can also be moved to drained or invalid
- * by calls to rxe_cleanup_task or rxe_disable_task.
- * In that case tasks which get here are not executed but
- * just flushed. The tasks are designed to look to see if
- * there is work to do and then do part of it before returning
- * here with a return value of zero until all the work
- * has been consumed then it returns a non-zero value.
+ * they return a non-zero value. It is called indirectly
+ * when rxe_sched_task schedules the task. They must
+ * call __reserve_if_idle to move the task to busy before
+ * calling or scheduling. The task can also be moved to
+ * drained or invalid by calls to rxe_cleanup_task or
+ * rxe_disable_task. In that case tasks which get here
+ * are not executed but just flushed. The tasks are
+ * designed to look to see if there is work to do and
+ * then do part of it before returning here with a return
+ * value of zero until all the work has been consumed then
+ * it returns a non-zero value.
* The number of times the task can be run is limited by
* max iterations so one task cannot hold the cpu forever.
* If the limit is hit and work remains the task is rescheduled.
@@ -234,24 +234,6 @@ void rxe_cleanup_task(struct rxe_task *task)
spin_unlock_irqrestore(&task->lock, flags);
}
-/* run the task inline if it is currently idle
- * cannot call do_task holding the lock
- */
-void rxe_run_task(struct rxe_task *task)
-{
- unsigned long flags;
- bool run;
-
- WARN_ON(rxe_read(task->qp) <= 0);
-
- spin_lock_irqsave(&task->lock, flags);
- run = __reserve_if_idle(task);
- spin_unlock_irqrestore(&task->lock, flags);
-
- if (run)
- do_task(task);
-}
-
/* schedule the task to run later as a work queue entry.
* the queue_work call can be called holding
* the lock.
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index a63e258b3d66..a8c9a77b6027 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -47,8 +47,6 @@ int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
/* cleanup task */
void rxe_cleanup_task(struct rxe_task *task);
-void rxe_run_task(struct rxe_task *task);
-
void rxe_sched_task(struct rxe_task *task);
/* keep a task from scheduling */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 614581989b38..2331e698a65b 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -41,6 +41,7 @@ static int rxe_query_port(struct ib_device *ibdev,
u32 port_num, struct ib_port_attr *attr)
{
struct rxe_dev *rxe = to_rdev(ibdev);
+ struct net_device *ndev;
int err, ret;
if (port_num != 1) {
@@ -49,21 +50,29 @@ static int rxe_query_port(struct ib_device *ibdev,
goto err_out;
}
+ ndev = rxe_ib_device_get_netdev(ibdev);
+ if (!ndev) {
+ err = -ENODEV;
+ goto err_out;
+ }
+
memcpy(attr, &rxe->port.attr, sizeof(*attr));
mutex_lock(&rxe->usdev_lock);
ret = ib_get_eth_speed(ibdev, port_num, &attr->active_speed,
&attr->active_width);
+ attr->state = ib_get_curr_port_state(ndev);
if (attr->state == IB_PORT_ACTIVE)
attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
- else if (dev_get_flags(rxe->ndev) & IFF_UP)
+ else if (dev_get_flags(ndev) & IFF_UP)
attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
else
attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
mutex_unlock(&rxe->usdev_lock);
+ dev_put(ndev);
return ret;
err_out:
@@ -71,6 +80,18 @@ err_out:
return err;
}
+static int rxe_query_gid(struct ib_device *ibdev, u32 port, int idx,
+ union ib_gid *gid)
+{
+ struct rxe_dev *rxe = to_rdev(ibdev);
+
+ /* subnet_prefix == interface_id == 0; */
+ memset(gid, 0, sizeof(*gid));
+ memcpy(gid->raw, rxe->raw_gid, ETH_ALEN);
+
+ return 0;
+}
+
static int rxe_query_pkey(struct ib_device *ibdev,
u32 port_num, u16 index, u16 *pkey)
{
@@ -688,7 +709,7 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
for (i = 0; i < ibwr->num_sge; i++)
length += ibwr->sg_list[i].length;
- if (length > (1UL << 31)) {
+ if (length > RXE_PORT_MAX_MSG_SZ) {
rxe_err_qp(qp, "message length too long\n");
break;
}
@@ -812,7 +833,7 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
int i;
for (i = 0; i < ibwr->num_sge; i++, sge++) {
- memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length);
+ memcpy(p, ib_virt_dma_to_ptr(sge->addr), sge->length);
p += sge->length;
}
}
@@ -888,6 +909,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp,
{
int err = 0;
unsigned long flags;
+ int good = 0;
spin_lock_irqsave(&qp->sq.sq_lock, flags);
while (ibwr) {
@@ -895,18 +917,16 @@ static int rxe_post_send_kernel(struct rxe_qp *qp,
if (err) {
*bad_wr = ibwr;
break;
+ } else {
+ good++;
}
ibwr = ibwr->next;
}
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
- if (!err)
- rxe_sched_task(&qp->req.task);
-
- spin_lock_irqsave(&qp->state_lock, flags);
- if (qp_state(qp) == IB_QPS_ERR)
- rxe_sched_task(&qp->comp.task);
- spin_unlock_irqrestore(&qp->state_lock, flags);
+ /* kickoff processing of any posted wqes */
+ if (good)
+ rxe_sched_task(&qp->send_task);
return err;
}
@@ -936,7 +956,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->is_user) {
/* Utilize process context to do protocol processing */
- rxe_run_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
} else {
err = rxe_post_send_kernel(qp, wr, bad_wr);
if (err)
@@ -973,8 +993,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
for (i = 0; i < num_sge; i++)
length += ibwr->sg_list[i].length;
- /* IBA max message size is 2^31 */
- if (length >= (1UL<<31)) {
+ if (length > RXE_PORT_MAX_MSG_SZ) {
err = -EINVAL;
rxe_dbg("message length too long\n");
goto err_out;
@@ -1046,7 +1065,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
- rxe_sched_task(&qp->resp.task);
+ rxe_sched_task(&qp->recv_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
@@ -1054,8 +1073,9 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
/* cq */
static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *dev = ibcq->device;
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_cq *cq = to_rcq(ibcq);
@@ -1278,7 +1298,10 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
mr->ibmr.pd = ibpd;
mr->ibmr.device = ibpd->device;
- err = rxe_mr_init_user(rxe, start, length, access, mr);
+ if (access & IB_ACCESS_ON_DEMAND)
+ err = rxe_odp_mr_init_user(rxe, start, length, iova, access, mr);
+ else
+ err = rxe_mr_init_user(rxe, start, length, access, mr);
if (err) {
rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
goto err_cleanup;
@@ -1425,9 +1448,16 @@ static const struct attribute_group rxe_attr_group = {
static int rxe_enable_driver(struct ib_device *ib_dev)
{
struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
+ struct net_device *ndev;
+
+ ndev = rxe_ib_device_get_netdev(ib_dev);
+ if (!ndev)
+ return -ENODEV;
rxe_set_port_state(rxe);
- dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
+ dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(ndev));
+
+ dev_put(ndev);
return 0;
}
@@ -1478,6 +1508,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.query_ah = rxe_query_ah,
.query_device = rxe_query_device,
.query_pkey = rxe_query_pkey,
+ .query_gid = rxe_query_gid,
.query_port = rxe_query_port,
.query_qp = rxe_query_qp,
.query_srq = rxe_query_srq,
@@ -1495,7 +1526,8 @@ static const struct ib_device_ops rxe_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
};
-int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
+int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name,
+ struct net_device *ndev)
{
int err;
struct ib_device *dev = &rxe->ib_dev;
@@ -1507,17 +1539,13 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
dev->num_comp_vectors = num_possible_cpus();
dev->local_dma_lkey = 0;
addrconf_addr_eui48((unsigned char *)&dev->node_guid,
- rxe->ndev->dev_addr);
+ rxe->raw_gid);
dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
ib_set_device_ops(dev, &rxe_dev_ops);
- err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
- if (err)
- return err;
-
- err = rxe_icrc_init(rxe);
+ err = ib_device_set_netdev(&rxe->ib_dev, ndev, 1);
if (err)
return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index ccb9d19ffe8a..fd48075810dd 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -113,7 +113,7 @@ struct rxe_req_info {
int need_retry;
int wait_for_rnr_timer;
int noack_pkts;
- struct rxe_task task;
+ int again;
};
struct rxe_comp_info {
@@ -124,7 +124,43 @@ struct rxe_comp_info {
int started_retry;
u32 retry_cnt;
u32 rnr_retry;
- struct rxe_task task;
+};
+
+/* responder states */
+enum resp_states {
+ RESPST_NONE,
+ RESPST_GET_REQ,
+ RESPST_CHK_PSN,
+ RESPST_CHK_OP_SEQ,
+ RESPST_CHK_OP_VALID,
+ RESPST_CHK_RESOURCE,
+ RESPST_CHK_LENGTH,
+ RESPST_CHK_RKEY,
+ RESPST_EXECUTE,
+ RESPST_READ_REPLY,
+ RESPST_ATOMIC_REPLY,
+ RESPST_ATOMIC_WRITE_REPLY,
+ RESPST_PROCESS_FLUSH,
+ RESPST_COMPLETE,
+ RESPST_ACKNOWLEDGE,
+ RESPST_CLEANUP,
+ RESPST_DUPLICATE_REQUEST,
+ RESPST_ERR_MALFORMED_WQE,
+ RESPST_ERR_UNSUPPORTED_OPCODE,
+ RESPST_ERR_MISALIGNED_ATOMIC,
+ RESPST_ERR_PSN_OUT_OF_SEQ,
+ RESPST_ERR_MISSING_OPCODE_FIRST,
+ RESPST_ERR_MISSING_OPCODE_LAST_C,
+ RESPST_ERR_MISSING_OPCODE_LAST_D1E,
+ RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
+ RESPST_ERR_RNR,
+ RESPST_ERR_RKEY_VIOLATION,
+ RESPST_ERR_INVALIDATE_RKEY,
+ RESPST_ERR_LENGTH,
+ RESPST_ERR_CQ_OVERFLOW,
+ RESPST_ERROR,
+ RESPST_DONE,
+ RESPST_EXIT,
};
enum rdatm_res_state {
@@ -196,7 +232,6 @@ struct rxe_resp_info {
unsigned int res_head;
unsigned int res_tail;
struct resp_res *res;
- struct rxe_task task;
};
struct rxe_qp {
@@ -229,6 +264,9 @@ struct rxe_qp {
struct sk_buff_head req_pkts;
struct sk_buff_head resp_pkts;
+ struct rxe_task send_task;
+ struct rxe_task recv_task;
+
struct rxe_req_info req;
struct rxe_comp_info comp;
struct rxe_resp_info resp;
@@ -369,14 +407,15 @@ struct rxe_port {
u32 qp_gsi_index;
};
+#define RXE_PORT 1
struct rxe_dev {
struct ib_device ib_dev;
struct ib_device_attr attr;
int max_ucontext;
int max_inline_data;
- struct mutex usdev_lock;
+ struct mutex usdev_lock;
- struct net_device *ndev;
+ char raw_gid[ETH_ALEN];
struct rxe_pool uc_pool;
struct rxe_pool pd_pool;
@@ -402,9 +441,13 @@ struct rxe_dev {
atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
struct rxe_port port;
- struct crypto_shash *tfm;
};
+static inline struct net_device *rxe_ib_device_get_netdev(struct ib_device *dev)
+{
+ return ib_device_get_netdev(dev, RXE_PORT);
+}
+
static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index)
{
atomic64_inc(&rxe->stats_counters[index]);
@@ -470,6 +513,7 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw)
return to_rpd(mw->ibmw.pd);
}
-int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
+int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name,
+ struct net_device *ndev);
#endif /* RXE_VERBS_H */
diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index 81b70a3eeb87..186f182b80e7 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -2,9 +2,8 @@ config RDMA_SIW
tristate "Software RDMA over TCP/IP (iWARP) driver"
depends on INET && INFINIBAND
depends on INFINIBAND_VIRT_DMA
- select LIBCRC32C
- select CRYPTO
- select CRYPTO_CRC32C
+ select CRC32
+ select NET_CRC32C
help
This driver implements the iWARP RDMA transport over
the Linux TCP/IP network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 75253f2b3e3d..f5fd71717b80 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -10,9 +10,9 @@
#include <rdma/restrack.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
-#include <crypto/hash.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
+#include <linux/unaligned.h>
#include <rdma/siw-abi.h>
#include "iwarp.h"
@@ -46,6 +46,9 @@
*/
#define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
+/* There is always only a port 1 per siw device */
+#define SIW_PORT 1
+
struct siw_dev_cap {
int max_qp;
int max_qp_wr;
@@ -69,16 +72,12 @@ struct siw_pd {
struct siw_device {
struct ib_device base_dev;
- struct net_device *netdev;
struct siw_dev_cap attrs;
u32 vendor_part_id;
int numa_node;
char raw_gid[ETH_ALEN];
- /* physical port state (only one port per device) */
- enum ib_port_state state;
-
spinlock_t lock;
struct xarray qp_xa;
@@ -94,8 +93,6 @@ struct siw_device {
atomic_t num_mr;
atomic_t num_srq;
atomic_t num_ctx;
-
- struct work_struct netdev_down;
};
struct siw_ucontext {
@@ -292,7 +289,8 @@ struct siw_rx_stream {
union iwarp_hdr hdr;
struct mpa_trailer trailer;
- struct shash_desc *mpa_crc_hd;
+ u32 mpa_crc;
+ bool mpa_crc_enabled;
/*
* For each FPDU, main RX loop runs through 3 stages:
@@ -393,7 +391,8 @@ struct siw_iwarp_tx {
int burst;
int bytes_unsent; /* ddp payload bytes */
- struct shash_desc *mpa_crc_hd;
+ u32 mpa_crc;
+ bool mpa_crc_enabled;
u8 do_crc : 1; /* do crc for segment */
u8 use_sendpage : 1; /* send w/o copy */
@@ -499,7 +498,6 @@ extern u_char mpa_version;
extern const bool peer_to_peer;
extern struct task_struct *siw_tx_thread[];
-extern struct crypto_shash *siw_crypto_shash;
extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1];
/* QP general functions */
@@ -671,29 +669,33 @@ static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp)
return NULL;
}
-static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
+static inline void siw_crc_init(u32 *crc)
{
- return (__force __wsum)crc32c((__force __u32)sum, buff, len);
+ *crc = ~0;
}
-static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset,
- int len)
+static inline void siw_crc_update(u32 *crc, const void *data, size_t len)
{
- return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
- (__force __u32)csum2, len);
+ *crc = crc32c(*crc, data, len);
}
-static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
+static inline void siw_crc_final(u32 *crc, u8 out[4])
{
- const struct skb_checksum_ops siw_cs_ops = {
- .update = siw_csum_update,
- .combine = siw_csum_combine,
- };
- __wsum crc = *(u32 *)shash_desc_ctx(srx->mpa_crc_hd);
+ put_unaligned_le32(~*crc, out);
+}
- crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc,
- &siw_cs_ops);
- *(u32 *)shash_desc_ctx(srx->mpa_crc_hd) = crc;
+static inline void siw_crc_oneshot(const void *data, size_t len, u8 out[4])
+{
+ u32 crc;
+
+ siw_crc_init(&crc);
+ siw_crc_update(&crc, data, len);
+ return siw_crc_final(&crc, out);
+}
+
+static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
+{
+ srx->mpa_crc = skb_crc32c(srx->skb, srx->skb_offset, len, srx->mpa_crc);
}
#define siw_dbg(ibdev, fmt, ...) \
@@ -716,7 +718,7 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
"MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__)
#define siw_dbg_cep(cep, fmt, ...) \
- ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt, \
+ ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \
cep, __func__, ##__VA_ARGS__)
void siw_cq_flush(struct siw_cq *cq);
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 86323918a570..708b13993fdf 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
{
struct socket *s;
struct siw_cep *cep = NULL;
+ struct net_device *ndev = NULL;
struct siw_device *sdev = to_siw_dev(id->device);
int addr_family = id->local_addr.ss_family;
int rv = 0;
@@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
/* For wildcard addr, limit binding to current device only */
- if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
- s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
-
+ if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
+ ndev = ib_device_get_netdev(id->device, SIW_PORT);
+ if (ndev) {
+ s->sk->sk_bound_dev_if = ndev->ifindex;
+ } else {
+ rv = -ENODEV;
+ goto error;
+ }
+ }
rv = s->ops->bind(s, (struct sockaddr *)laddr,
sizeof(struct sockaddr_in));
} else {
@@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
}
/* For wildcard addr, limit binding to current device only */
- if (ipv6_addr_any(&laddr->sin6_addr))
- s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
-
+ if (ipv6_addr_any(&laddr->sin6_addr)) {
+ ndev = ib_device_get_netdev(id->device, SIW_PORT);
+ if (ndev) {
+ s->sk->sk_bound_dev_if = ndev->ifindex;
+ } else {
+ rv = -ENODEV;
+ goto error;
+ }
+ }
rv = s->ops->bind(s, (struct sockaddr *)laddr,
sizeof(struct sockaddr_in6));
}
@@ -1860,6 +1873,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
}
list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
cep->state = SIW_EPSTATE_LISTENING;
+ dev_put(ndev);
siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
@@ -1879,6 +1893,7 @@ error:
siw_cep_set_free_and_put(cep);
}
sock_release(s);
+ dev_put(ndev);
return rv;
}
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
index f3c2226aff94..25b3c741b66b 100644
--- a/drivers/infiniband/sw/siw/siw_cq.c
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -72,7 +72,7 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
wc->opcode = map_wc_opcode[cqe->opcode];
wc->status = map_cqe_status[cqe->status].ib;
siw_dbg_cq(cq,
- "idx %u, type %d, flags %2x, id 0x%pK\n",
+ "idx %u, type %d, flags %2x, id 0x%p\n",
cq->cq_get % cq->num_cqe, cqe->opcode,
cqe->flags, (void *)(uintptr_t)cqe->id);
} else {
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 61ad8ca3d1a2..5168307229a9 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -59,7 +59,6 @@ u_char mpa_version = MPA_REVISION_2;
const bool peer_to_peer;
struct task_struct *siw_tx_thread[NR_CPUS];
-struct crypto_shash *siw_crypto_shash;
static int siw_device_register(struct siw_device *sdev, const char *name)
{
@@ -287,7 +286,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
return NULL;
base_dev = &sdev->base_dev;
- sdev->netdev = netdev;
if (netdev->addr_len) {
memcpy(sdev->raw_gid, netdev->dev_addr,
@@ -364,39 +362,6 @@ error:
return NULL;
}
-/*
- * Network link becomes unavailable. Mark all
- * affected QP's accordingly.
- */
-static void siw_netdev_down(struct work_struct *work)
-{
- struct siw_device *sdev =
- container_of(work, struct siw_device, netdev_down);
-
- struct siw_qp_attrs qp_attrs;
- struct list_head *pos, *tmp;
-
- memset(&qp_attrs, 0, sizeof(qp_attrs));
- qp_attrs.state = SIW_QP_STATE_ERROR;
-
- list_for_each_safe(pos, tmp, &sdev->qp_list) {
- struct siw_qp *qp = list_entry(pos, struct siw_qp, devq);
-
- down_write(&qp->state_lock);
- WARN_ON(siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE));
- up_write(&qp->state_lock);
- }
- ib_device_put(&sdev->base_dev);
-}
-
-static void siw_device_goes_down(struct siw_device *sdev)
-{
- if (ib_device_try_get(&sdev->base_dev)) {
- INIT_WORK(&sdev->netdev_down, siw_netdev_down);
- schedule_work(&sdev->netdev_down);
- }
-}
-
static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
void *arg)
{
@@ -413,20 +378,6 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
sdev = to_siw_dev(base_dev);
switch (event) {
- case NETDEV_UP:
- sdev->state = IB_PORT_ACTIVE;
- siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
- break;
-
- case NETDEV_GOING_DOWN:
- siw_device_goes_down(sdev);
- break;
-
- case NETDEV_DOWN:
- sdev->state = IB_PORT_DOWN;
- siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
- break;
-
case NETDEV_REGISTER:
/*
* Device registration now handled only by
@@ -444,12 +395,8 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
break;
/*
- * Todo: Below netdev events are currently not handled.
+ * All other events are not handled
*/
- case NETDEV_CHANGEMTU:
- case NETDEV_CHANGE:
- break;
-
default:
break;
}
@@ -479,12 +426,7 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev)
sdev = siw_device_create(netdev);
if (sdev) {
dev_dbg(&netdev->dev, "siw: new device\n");
-
- if (netif_running(netdev) && netif_carrier_ok(netdev))
- sdev->state = IB_PORT_ACTIVE;
- else
- sdev->state = IB_PORT_DOWN;
-
+ ib_mark_name_assigned_by_user(&sdev->base_dev);
rv = siw_device_register(sdev, basedev_name);
if (rv)
ib_dealloc_device(&sdev->base_dev);
@@ -524,20 +466,7 @@ static __init int siw_init_module(void)
rv = -ENOMEM;
goto out_error;
}
- /*
- * Locate CRC32 algorithm. If unsuccessful, fail
- * loading siw only, if CRC is required.
- */
- siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(siw_crypto_shash)) {
- pr_info("siw: Loading CRC32c failed: %ld\n",
- PTR_ERR(siw_crypto_shash));
- siw_crypto_shash = NULL;
- if (mpa_crc_required) {
- rv = -EOPNOTSUPP;
- goto out_error;
- }
- }
+
rv = register_netdevice_notifier(&siw_netdev_nb);
if (rv)
goto out_error;
@@ -550,9 +479,6 @@ static __init int siw_init_module(void)
out_error:
siw_stop_tx_threads();
- if (siw_crypto_shash)
- crypto_free_shash(siw_crypto_shash);
-
pr_info("SoftIWARP attach failed. Error: %d\n", rv);
siw_cm_exit();
@@ -573,9 +499,6 @@ static void __exit siw_exit_module(void)
siw_destroy_cpulist(siw_cpu_info.num_nodes);
- if (siw_crypto_shash)
- crypto_free_shash(siw_crypto_shash);
-
pr_info("SoftiWARP detached\n");
}
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index dcb963607c8b..d5ddeb17bd22 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -18,30 +18,6 @@
#define SIW_STAG_MAX_INDEX 0x00ffffff
/*
- * The code avoids special Stag of zero and tries to randomize
- * STag values between 1 and SIW_STAG_MAX_INDEX.
- */
-int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
-{
- struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
- u32 id, next;
-
- get_random_bytes(&next, 4);
- next &= SIW_STAG_MAX_INDEX;
-
- if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
- GFP_KERNEL) < 0)
- return -ENOMEM;
-
- /* Set the STag index part */
- m->stag = id << 8;
-
- siw_dbg_mem(m, "new MEM object\n");
-
- return 0;
-}
-
-/*
* siw_mem_id2obj()
*
* resolves memory from stag given by id. might be called from:
@@ -181,10 +157,10 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
*/
if (addr < mem->va || addr + len > mem->va + mem->len) {
siw_dbg_pd(pd, "MEM interval len %d\n", len);
- siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
+ siw_dbg_pd(pd, "[0x%p, 0x%p] out of bounds\n",
(void *)(uintptr_t)addr,
(void *)(uintptr_t)(addr + len));
- siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
+ siw_dbg_pd(pd, "[0x%p, 0x%p] STag=0x%08x\n",
(void *)(uintptr_t)mem->va,
(void *)(uintptr_t)(mem->va + mem->len),
mem->stag);
diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h
index e74cfcd6dbc1..8e769d30e2ac 100644
--- a/drivers/infiniband/sw/siw/siw_mem.h
+++ b/drivers/infiniband/sw/siw/siw_mem.h
@@ -12,7 +12,6 @@ void siw_umem_release(struct siw_umem *umem);
struct siw_pbl *siw_pbl_alloc(u32 num_buf);
dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx);
struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index);
-int siw_mem_add(struct siw_device *sdev, struct siw_mem *m);
int siw_invalidate_stag(struct ib_pd *pd, u32 stag);
int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
enum ib_access_flags perms, int len);
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index da92cfa2073d..c1e6e7d6e32f 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -226,33 +226,6 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
return 0;
}
-static int siw_qp_enable_crc(struct siw_qp *qp)
-{
- struct siw_rx_stream *c_rx = &qp->rx_stream;
- struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
- int size;
-
- if (siw_crypto_shash == NULL)
- return -ENOENT;
-
- size = crypto_shash_descsize(siw_crypto_shash) +
- sizeof(struct shash_desc);
-
- c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
- c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
- if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
- kfree(c_tx->mpa_crc_hd);
- kfree(c_rx->mpa_crc_hd);
- c_tx->mpa_crc_hd = NULL;
- c_rx->mpa_crc_hd = NULL;
- return -ENOMEM;
- }
- c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
- c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
-
- return 0;
-}
-
/*
* Send a non signalled READ or WRITE to peer side as negotiated
* with MPAv2 P2P setup protocol. The work request is only created
@@ -583,20 +556,15 @@ void siw_send_terminate(struct siw_qp *qp)
term->ctrl.mpa_len =
cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
- if (qp->tx_ctx.mpa_crc_hd) {
- crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
- if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
- (u8 *)iov[0].iov_base,
- iov[0].iov_len))
- goto out;
-
+ if (qp->tx_ctx.mpa_crc_enabled) {
+ siw_crc_init(&qp->tx_ctx.mpa_crc);
+ siw_crc_update(&qp->tx_ctx.mpa_crc,
+ iov[0].iov_base, iov[0].iov_len);
if (num_frags == 3) {
- if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
- (u8 *)iov[1].iov_base,
- iov[1].iov_len))
- goto out;
+ siw_crc_update(&qp->tx_ctx.mpa_crc,
+ iov[1].iov_base, iov[1].iov_len);
}
- crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
+ siw_crc_final(&qp->tx_ctx.mpa_crc, (u8 *)&crc);
}
rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
@@ -604,7 +572,6 @@ void siw_send_terminate(struct siw_qp *qp)
rv == len_terminate ? "success" : "failure",
__rdmap_term_layer(term), __rdmap_term_etype(term),
__rdmap_term_ecode(term), rv);
-out:
kfree(term);
kfree(err_hdr);
}
@@ -643,9 +610,10 @@ static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
switch (attrs->state) {
case SIW_QP_STATE_RTS:
if (attrs->flags & SIW_MPA_CRC) {
- rv = siw_qp_enable_crc(qp);
- if (rv)
- break;
+ siw_crc_init(&qp->tx_ctx.mpa_crc);
+ qp->tx_ctx.mpa_crc_enabled = true;
+ siw_crc_init(&qp->rx_stream.mpa_crc);
+ qp->rx_stream.mpa_crc_enabled = true;
}
if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
siw_dbg_qp(qp, "no socket\n");
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index ed4fc39718b4..a10820e33887 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -38,7 +38,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
p = siw_get_upage(umem, dest_addr);
if (unlikely(!p)) {
- pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n",
+ pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n",
__func__, qp_id(rx_qp(srx)),
(void *)(uintptr_t)dest_addr,
(void *)(uintptr_t)umem->fp_addr);
@@ -51,7 +51,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
pg_off = dest_addr & ~PAGE_MASK;
bytes = min(len, (int)PAGE_SIZE - pg_off);
- siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes);
+ siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes);
dest = kmap_atomic(p);
rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
@@ -67,10 +67,10 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
return -EFAULT;
}
- if (srx->mpa_crc_hd) {
+ if (srx->mpa_crc_enabled) {
if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
- crypto_shash_update(srx->mpa_crc_hd,
- (u8 *)(dest + pg_off), bytes);
+ siw_crc_update(&srx->mpa_crc, dest + pg_off,
+ bytes);
kunmap_atomic(dest);
} else {
kunmap_atomic(dest);
@@ -105,17 +105,17 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
{
int rv;
- siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len);
+ siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len);
rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
if (unlikely(rv)) {
- pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n",
+ pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n",
qp_id(rx_qp(srx)), __func__, len, kva, rv);
return rv;
}
- if (srx->mpa_crc_hd)
- crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
+ if (srx->mpa_crc_enabled)
+ siw_crc_update(&srx->mpa_crc, kva, len);
srx->skb_offset += len;
srx->skb_copied += len;
@@ -966,16 +966,16 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
if (srx->fpdu_part_rem)
return -EAGAIN;
- if (!srx->mpa_crc_hd)
+ if (!srx->mpa_crc_enabled)
return 0;
if (srx->pad)
- crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
+ siw_crc_update(&srx->mpa_crc, tbuf, srx->pad);
/*
* CRC32 is computed, transmitted and received directly in NBO,
* so there's never a reason to convert byte order.
*/
- crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
+ siw_crc_final(&srx->mpa_crc, (u8 *)&crc_own);
crc_in = (__force __wsum)srx->trailer.crc;
if (unlikely(crc_in != crc_own)) {
@@ -1093,13 +1093,12 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
* (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
* but not by a READ RESPONSE etc.
*/
- if (srx->mpa_crc_hd) {
+ if (srx->mpa_crc_enabled) {
/*
* Restart CRC computation
*/
- crypto_shash_init(srx->mpa_crc_hd);
- crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
- srx->fpdu_part_rcvd);
+ siw_crc_init(&srx->mpa_crc);
+ siw_crc_update(&srx->mpa_crc, c_hdr, srx->fpdu_part_rcvd);
}
if (frx->more_ddp_segs) {
frx->first_ddp_seg = 0;
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 64ad9e0895bd..6432bce7d083 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -248,10 +248,8 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx)
/*
* Do complete CRC if enabled and short packet
*/
- if (c_tx->mpa_crc_hd &&
- crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
- c_tx->ctrl_len, (u8 *)crc) != 0)
- return -EINVAL;
+ if (c_tx->mpa_crc_enabled)
+ siw_crc_oneshot(&c_tx->pkt, c_tx->ctrl_len, (u8 *)crc);
c_tx->ctrl_len += MPA_CRC_SIZE;
return PKT_COMPLETE;
@@ -331,6 +329,8 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset,
msg.msg_flags &= ~MSG_MORE;
tcp_rate_check_app_limited(sk);
+ if (!sendpage_ok(page[i]))
+ msg.msg_flags &= ~MSG_SPLICE_PAGES;
bvec_set_page(&bvec, page[i], bytes, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
@@ -480,9 +480,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
iov[seg].iov_len = sge_len;
if (do_crc)
- crypto_shash_update(c_tx->mpa_crc_hd,
- iov[seg].iov_base,
- sge_len);
+ siw_crc_update(&c_tx->mpa_crc,
+ iov[seg].iov_base, sge_len);
sge_off += sge_len;
data_len -= sge_len;
seg++;
@@ -514,15 +513,14 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
iov[seg].iov_len = plen;
if (do_crc)
- crypto_shash_update(
- c_tx->mpa_crc_hd,
+ siw_crc_update(
+ &c_tx->mpa_crc,
iov[seg].iov_base,
plen);
} else if (do_crc) {
kaddr = kmap_local_page(p);
- crypto_shash_update(c_tx->mpa_crc_hd,
- kaddr + fp_off,
- plen);
+ siw_crc_update(&c_tx->mpa_crc,
+ kaddr + fp_off, plen);
kunmap_local(kaddr);
}
} else {
@@ -534,10 +532,9 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
page_array[seg] = ib_virt_dma_to_page(va);
if (do_crc)
- crypto_shash_update(
- c_tx->mpa_crc_hd,
- ib_virt_dma_to_ptr(va),
- plen);
+ siw_crc_update(&c_tx->mpa_crc,
+ ib_virt_dma_to_ptr(va),
+ plen);
}
sge_len -= plen;
@@ -574,14 +571,14 @@ sge_done:
if (c_tx->pad) {
*(u32 *)c_tx->trailer.pad = 0;
if (do_crc)
- crypto_shash_update(c_tx->mpa_crc_hd,
- (u8 *)&c_tx->trailer.crc - c_tx->pad,
- c_tx->pad);
+ siw_crc_update(&c_tx->mpa_crc,
+ (u8 *)&c_tx->trailer.crc - c_tx->pad,
+ c_tx->pad);
}
- if (!c_tx->mpa_crc_hd)
+ if (!c_tx->mpa_crc_enabled)
c_tx->trailer.crc = 0;
else if (do_crc)
- crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc);
+ siw_crc_final(&c_tx->mpa_crc, (u8 *)&c_tx->trailer.crc);
data_len = c_tx->bytes_unsent;
@@ -734,10 +731,9 @@ static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe)
/*
* Init MPA CRC computation
*/
- if (c_tx->mpa_crc_hd) {
- crypto_shash_init(c_tx->mpa_crc_hd);
- crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
- c_tx->ctrl_len);
+ if (c_tx->mpa_crc_enabled) {
+ siw_crc_init(&c_tx->mpa_crc);
+ siw_crc_update(&c_tx->mpa_crc, &c_tx->pkt, c_tx->ctrl_len);
c_tx->do_crc = 1;
}
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index ecf0444666b4..2b2a7b8e93b0 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -171,21 +171,28 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
int siw_query_port(struct ib_device *base_dev, u32 port,
struct ib_port_attr *attr)
{
- struct siw_device *sdev = to_siw_dev(base_dev);
+ struct net_device *ndev;
int rv;
memset(attr, 0, sizeof(*attr));
rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
&attr->active_width);
+ if (rv)
+ return rv;
+
+ ndev = ib_device_get_netdev(base_dev, SIW_PORT);
+ if (!ndev)
+ return -ENODEV;
+
attr->gid_tbl_len = 1;
attr->max_msg_sz = -1;
- attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
- attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
- attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
+ attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
+ attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
+ attr->state = ib_get_curr_port_state(ndev);
+ attr->phys_state = attr->state == IB_PORT_ACTIVE ?
IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
- attr->state = sdev->state;
/*
* All zero
*
@@ -199,6 +206,7 @@ int siw_query_port(struct ib_device *base_dev, u32 port,
* attr->subnet_timeout = 0;
* attr->init_type_repy = 0;
*/
+ dev_put(ndev);
return rv;
}
@@ -505,21 +513,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
struct siw_qp *qp;
- struct siw_device *sdev;
+ struct net_device *ndev;
- if (base_qp && qp_attr && qp_init_attr) {
+ if (base_qp && qp_attr && qp_init_attr)
qp = to_siw_qp(base_qp);
- sdev = to_siw_dev(base_qp->device);
- } else {
+ else
return -EINVAL;
- }
+
+ ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
+ if (!ndev)
+ return -ENODEV;
+
qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
qp_attr->cap.max_send_wr = qp->attrs.sq_size;
qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
- qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
+ qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
qp_attr->max_rd_atomic = qp->attrs.irq_size;
qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
@@ -534,6 +545,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
+ dev_put(ndev);
return 0;
}
@@ -619,9 +631,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
}
up_write(&qp->state_lock);
- kfree(qp->tx_ctx.mpa_crc_hd);
- kfree(qp->rx_stream.mpa_crc_hd);
-
qp->scq = qp->rcq = NULL;
siw_qp_put(qp);
@@ -927,7 +936,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
rv = -EINVAL;
break;
}
- siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n",
+ siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n",
sqe->opcode, sqe->flags,
(void *)(uintptr_t)sqe->id);
@@ -1093,7 +1102,7 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
siw_dbg_qp(qp, "error %d\n", rv);
*bad_wr = wr;
}
- return rv > 0 ? 0 : rv;
+ return rv;
}
int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
@@ -1124,12 +1133,13 @@ int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
*
* @base_cq: CQ as allocated by RDMA midlayer
* @attr: Initial CQ attributes
- * @udata: relates to user context
+ * @attrs: uverbs bundle
*/
int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct siw_device *sdev = to_siw_dev(base_cq->device);
struct siw_cq *cq = to_siw_cq(base_cq);
int rv, size = attr->cqe;
@@ -1322,7 +1332,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct siw_device *sdev = to_siw_dev(pd->device);
int rv;
- siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
+ siw_dbg_pd(pd, "start: 0x%p, va: 0x%p, len: %llu\n",
(void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va,
(unsigned long long)len);
@@ -1515,7 +1525,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle,
mem->len = base_mr->length;
mem->va = base_mr->iova;
siw_dbg_mem(mem,
- "%llu bytes, start 0x%pK, %u SLE to %u entries\n",
+ "%llu bytes, start 0x%p, %u SLE to %u entries\n",
mem->len, (void *)(uintptr_t)mem->va, num_sle,
pbl->num_buf);
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index 4b57a4fb7237..1f1a305540af 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -43,7 +43,7 @@ int siw_get_port_immutable(struct ib_device *base_dev, u32 port,
int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
struct ib_udata *udata);
int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int siw_query_port(struct ib_device *base_dev, u32 port,
struct ib_port_attr *attr);
int siw_query_gid(struct ib_device *base_dev, u32 port, int idx,