diff options
Diffstat (limited to 'drivers/infiniband/sw')
42 files changed, 1031 insertions, 666 deletions
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 82c3f5932249..0ca2743f1075 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -5,6 +5,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> +#include <rdma/uverbs_ioctl.h> #include "cq.h" #include "vt.h" #include "trace.h" @@ -149,15 +150,16 @@ static void send_complete(struct work_struct *work) * rvt_create_cq - create a completion queue * @ibcq: Allocated CQ * @attr: creation attributes - * @udata: user data for libibverbs.so + * @attrs: uverbs bundle * * Called by ib_create_cq() in the generic verbs code. * * Return: 0 on success */ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata) + struct uverbs_attr_bundle *attrs) { + struct ib_udata *udata = &attrs->driver_udata; struct ib_device *ibdev = ibcq->device; struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index d49b6d1a26cb..4028702a7b2f 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -10,7 +10,7 @@ #include <rdma/rdmavt_cq.h> int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 7a9afd5231d5..5ed5cfc2b280 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -348,13 +348,13 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, umem = ib_umem_get(pd->device, start, length, mr_access_flags); if (IS_ERR(umem)) - return (void *)umem; + return ERR_CAST(umem); n = ib_umem_num_pages(umem); mr = __rvt_alloc_mr(n, pd); if (IS_ERR(mr)) { - ret = (struct ib_mr *)mr; + ret = ERR_CAST(mr); goto bail_umem; } @@ -542,7 +542,7 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr = __rvt_alloc_mr(max_num_sg, pd); if (IS_ERR(mr)) - return (struct ib_mr *)mr; + return ERR_CAST(mr); return &mr->ibmr; } diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e6203e26cc06..583debe4b9a2 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1107,9 +1107,8 @@ int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, } /* initialize timers needed for rc qp */ timer_setup(&qp->s_timer, rvt_rc_timeout, 0); - hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - qp->s_rnr_timer.function = rvt_rc_rnr_retry; + hrtimer_setup(&qp->s_rnr_timer, rvt_rc_rnr_retry, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); /* * Driver needs to set up it's private QP structure and do any @@ -1298,7 +1297,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); + timer_delete(&qp->s_timer); } if (qp->s_flags & RVT_S_ANY_WAIT_SEND) @@ -2547,7 +2546,7 @@ void rvt_stop_rc_timers(struct rvt_qp *qp) /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); + timer_delete(&qp->s_timer); hrtimer_try_to_cancel(&qp->s_rnr_timer); } } @@ -2576,7 +2575,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp) */ void rvt_del_timers_sync(struct rvt_qp *qp) { - del_timer_sync(&qp->s_timer); + timer_delete_sync(&qp->s_timer); hrtimer_cancel(&qp->s_rnr_timer); } EXPORT_SYMBOL(rvt_del_timers_sync); @@ -2597,7 +2596,7 @@ static void rvt_rc_timeout(struct timer_list *t) qp->s_flags &= ~RVT_S_TIMER; rvp->n_rc_timeouts++; - del_timer(&qp->s_timer); + timer_delete(&qp->s_timer); trace_rvt_rc_timeout(qp, qp->s_last_psn + 1); if (rdi->driver_f.notify_restart_rc) rdi->driver_f.notify_restart_rc(qp, diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 4341965a5ea7..bdb6b9326b64 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -4,7 +4,7 @@ */ #define RDI_DEV_ENTRY(rdi) __string(dev, rvt_get_ibdev_name(rdi)) -#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rvt_get_ibdev_name(rdi)) +#define RDI_DEV_ASSIGN(rdi) __assign_str(dev) #include "trace_rvt.h" #include "trace_qp.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h index df33c2ca9710..a00489e66ddf 100644 --- a/drivers/infiniband/sw/rdmavt/trace_rvt.h +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -24,7 +24,7 @@ TRACE_EVENT(rvt_dbg, ), TP_fast_assign( RDI_DEV_ASSIGN(rdi); - __assign_str(msg, msg); + __assign_str(msg); ), TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) ); diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 06b8dc5093f7..1ed5b63f8afc 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -1,11 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only config RDMA_RXE tristate "Software RDMA over Ethernet (RoCE) driver" - depends on INET && PCI && INFINIBAND + depends on INET && PCI && INFINIBAND && 64BIT depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL - select CRYPTO - select CRYPTO_CRC32 + select CRC32 help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile index 5395a581f4bb..93134f1d1d0c 100644 --- a/drivers/infiniband/sw/rxe/Makefile +++ b/drivers/infiniband/sw/rxe/Makefile @@ -23,3 +23,5 @@ rdma_rxe-y := \ rxe_task.o \ rxe_net.o \ rxe_hw_counters.o + +rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 255677bc12b2..3a77d6db1720 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -31,14 +31,11 @@ void rxe_dealloc(struct ib_device *ib_dev) WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree)); - if (rxe->tfm) - crypto_free_shash(rxe->tfm); - mutex_destroy(&rxe->usdev_lock); } /* initialize rxe device parameters */ -static void rxe_init_device_param(struct rxe_dev *rxe) +static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; @@ -71,10 +68,42 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; rxe->attr.max_pkeys = RXE_MAX_PKEYS; rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + + if (ndev->addr_len) { + memcpy(rxe->raw_gid, ndev->dev_addr, + min_t(unsigned int, ndev->addr_len, ETH_ALEN)); + } else { + /* + * This device does not have a HW address, but + * connection mangagement requires a unique gid. + */ + eth_random_addr(rxe->raw_gid); + } + addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, - rxe->ndev->dev_addr); + rxe->raw_gid); rxe->max_ucontext = RXE_MAX_UCONTEXT; + + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING; + + /* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */ + rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT; + + rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_RECV; + rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV; + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE; + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ; + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_FLUSH; + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC_WRITE; + } } /* initialize port attributes */ @@ -106,13 +135,13 @@ static void rxe_init_port_param(struct rxe_port *port) /* initialize port state, note IB convention that HCA ports are always * numbered from 1 */ -static void rxe_init_ports(struct rxe_dev *rxe) +static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev) { struct rxe_port *port = &rxe->port; rxe_init_port_param(port); addrconf_addr_eui48((unsigned char *)&port->port_guid, - rxe->ndev->dev_addr); + rxe->raw_gid); spin_lock_init(&port->port_lock); } @@ -130,12 +159,12 @@ static void rxe_init_pools(struct rxe_dev *rxe) } /* initialize rxe device state */ -static void rxe_init(struct rxe_dev *rxe) +static void rxe_init(struct rxe_dev *rxe, struct net_device *ndev) { /* init default device parameters */ - rxe_init_device_param(rxe); + rxe_init_device_param(rxe, ndev); - rxe_init_ports(rxe); + rxe_init_ports(rxe, ndev); rxe_init_pools(rxe); /* init pending mmap list */ @@ -167,12 +196,13 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) /* called by ifc layer to create new rxe device. * The caller should allocate memory for rxe by calling ib_alloc_device. */ -int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) +int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, + struct net_device *ndev) { - rxe_init(rxe); + rxe_init(rxe, ndev); rxe_set_mtu(rxe, mtu); - return rxe_register_device(rxe, ibdev_name); + return rxe_register_device(rxe, ibdev_name, ndev); } static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index d8fb2c7af30a..ff8cd53f5f28 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -21,7 +21,6 @@ #include <rdma/ib_umem.h> #include <rdma/ib_cache.h> #include <rdma/ib_addr.h> -#include <crypto/hash.h> #include "rxe_net.h" #include "rxe_opcode.h" @@ -100,46 +99,10 @@ #define rxe_info_mw(mw, fmt, ...) ibdev_info_ratelimited((mw)->ibmw.device, \ "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__) -/* responder states */ -enum resp_states { - RESPST_NONE, - RESPST_GET_REQ, - RESPST_CHK_PSN, - RESPST_CHK_OP_SEQ, - RESPST_CHK_OP_VALID, - RESPST_CHK_RESOURCE, - RESPST_CHK_LENGTH, - RESPST_CHK_RKEY, - RESPST_EXECUTE, - RESPST_READ_REPLY, - RESPST_ATOMIC_REPLY, - RESPST_ATOMIC_WRITE_REPLY, - RESPST_PROCESS_FLUSH, - RESPST_COMPLETE, - RESPST_ACKNOWLEDGE, - RESPST_CLEANUP, - RESPST_DUPLICATE_REQUEST, - RESPST_ERR_MALFORMED_WQE, - RESPST_ERR_UNSUPPORTED_OPCODE, - RESPST_ERR_MISALIGNED_ATOMIC, - RESPST_ERR_PSN_OUT_OF_SEQ, - RESPST_ERR_MISSING_OPCODE_FIRST, - RESPST_ERR_MISSING_OPCODE_LAST_C, - RESPST_ERR_MISSING_OPCODE_LAST_D1E, - RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, - RESPST_ERR_RNR, - RESPST_ERR_RKEY_VIOLATION, - RESPST_ERR_INVALIDATE_RKEY, - RESPST_ERR_LENGTH, - RESPST_ERR_CQ_OVERFLOW, - RESPST_ERROR, - RESPST_DONE, - RESPST_EXIT, -}; - void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); -int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); +int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, + struct net_device *ndev); void rxe_rcv(struct sk_buff *skb); diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index b78b8c0856ab..d48af2180745 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -122,25 +122,16 @@ void retransmit_timer(struct timer_list *t) spin_lock_irqsave(&qp->state_lock, flags); if (qp->valid) { qp->comp.timeout = 1; - rxe_sched_task(&qp->comp.task); + rxe_sched_task(&qp->send_task); } spin_unlock_irqrestore(&qp->state_lock, flags); } void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - + rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED); skb_queue_tail(&qp->resp_pkts, skb); - - must_sched = skb_queue_len(&qp->resp_pkts) > 1; - if (must_sched != 0) - rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - - if (must_sched) - rxe_sched_task(&qp->comp.task); - else - rxe_run_task(&qp->comp.task); + rxe_sched_task(&qp->send_task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -325,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } return COMPST_ERROR_RETRY; @@ -476,7 +467,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } @@ -515,7 +506,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } @@ -541,7 +532,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } } @@ -654,6 +645,8 @@ int rxe_completer(struct rxe_qp *qp) int ret; unsigned long flags; + qp->req.again = 0; + spin_lock_irqsave(&qp->state_lock, flags); if (!qp->valid || qp_state(qp) == IB_QPS_ERR || qp_state(qp) == IB_QPS_RESET) { @@ -737,7 +730,7 @@ int rxe_completer(struct rxe_qp *qp) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } state = COMPST_DONE; @@ -792,7 +785,7 @@ int rxe_completer(struct rxe_qp *qp) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_sched_task(&qp->req.task); + qp->req.again = 1; } goto done; @@ -843,8 +836,9 @@ done: ret = 0; goto out; exit: - ret = -EAGAIN; + ret = (qp->req.again) ? 0 : -EAGAIN; out: + qp->req.again = 0; if (pkt) free_pkt(pkt); return ret; diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index fec87c9030ab..fffd144d509e 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); - if (err) { - vfree(cq->queue->buf); - kfree(cq->queue); + if (err) return err; - } cq->is_user = uresp; diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 46f82b27fcd2..1f0322491d8c 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -234,7 +234,7 @@ static inline void __bth_set_resv6a(void *arg) { struct rxe_bth *bth = arg; - bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK); + bth->qpn &= cpu_to_be32(~BTH_RESV6A_MASK); } static inline int __bth_ack(void *arg) diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index a012522b577a..437917a7d8f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -14,7 +14,7 @@ static const struct rdma_stat_desc rxe_counter_descs[] = { [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", [RXE_CNT_SND_RNR].name = "send_rnr_err", [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", - [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", + [RXE_CNT_SENDER_SCHED].name = "ack_deferred", [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", [RXE_CNT_COMP_RETRY].name = "completer_retry_err", diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h index 71f4d4fa9dc8..051f9e1c3852 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h @@ -18,7 +18,7 @@ enum rxe_counters { RXE_CNT_RCV_RNR, RXE_CNT_SND_RNR, RXE_CNT_RCV_SEQ_ERR, - RXE_CNT_COMPLETER_SCHED, + RXE_CNT_SENDER_SCHED, RXE_CNT_RETRY_EXCEEDED, RXE_CNT_RNR_RETRY_EXCEEDED, RXE_CNT_COMP_RETRY, diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index fdf5f08cd8f1..76d760fbe7ea 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -10,28 +10,6 @@ #include "rxe_loc.h" /** - * rxe_icrc_init() - Initialize crypto function for computing crc32 - * @rxe: rdma_rxe device object - * - * Return: 0 on success else an error - */ -int rxe_icrc_init(struct rxe_dev *rxe) -{ - struct crypto_shash *tfm; - - tfm = crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(tfm)) { - rxe_dbg_dev(rxe, "failed to init crc32 algorithm err: %ld\n", - PTR_ERR(tfm)); - return PTR_ERR(tfm); - } - - rxe->tfm = tfm; - - return 0; -} - -/** * rxe_crc32() - Compute cumulative crc32 for a contiguous segment * @rxe: rdma_rxe device object * @crc: starting crc32 value from previous segments @@ -42,23 +20,7 @@ int rxe_icrc_init(struct rxe_dev *rxe) */ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) { - __be32 icrc; - int err; - - SHASH_DESC_ON_STACK(shash, rxe->tfm); - - shash->tfm = rxe->tfm; - *(__be32 *)shash_desc_ctx(shash) = crc; - err = crypto_shash_update(shash, next, len); - if (unlikely(err)) { - rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err); - return (__force __be32)crc32_le((__force u32)crc, next, len); - } - - icrc = *(__be32 *)shash_desc_ctx(shash); - barrier_data(shash_desc_ctx(shash)); - - return icrc; + return (__force __be32)crc32_le((__force u32)crc, next, len); } /** diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 746110898a0e..876702058c84 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -58,6 +58,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); /* rxe_mr.c */ u8 rxe_get_next_key(u32 last_key); +void rxe_mr_init(int access, struct rxe_mr *mr); void rxe_mr_init_dma(int access, struct rxe_mr *mr); int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, int access, struct rxe_mr *mr); @@ -69,9 +70,9 @@ int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, void *addr, int length, enum rxe_mr_copy_dir dir); int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, - u64 compare, u64 swap_add, u64 *orig_val); -int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value); +enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, + u64 compare, u64 swap_add, u64 *orig_val); +enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value); struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, enum rxe_mr_lookup_type type); int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); @@ -80,6 +81,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key); int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); void rxe_mr_cleanup(struct rxe_pool_elem *elem); +/* defined in rxe_mr.c; used in rxe_mr.c and rxe_odp.c */ +extern spinlock_t atomic_ops_lock; + /* rxe_mw.c */ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata); int rxe_dealloc_mw(struct ib_mw *ibmw); @@ -136,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } +static inline bool is_odp_mr(struct rxe_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) @@ -164,10 +174,10 @@ void rxe_dealloc(struct ib_device *ib_dev); int rxe_completer(struct rxe_qp *qp); int rxe_requester(struct rxe_qp *qp); -int rxe_responder(struct rxe_qp *qp); +int rxe_sender(struct rxe_qp *qp); +int rxe_receiver(struct rxe_qp *qp); /* rxe_icrc.c */ -int rxe_icrc_init(struct rxe_dev *rxe); int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt); void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt); @@ -180,4 +190,47 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp) return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; } +/* rxe_odp.c */ +extern const struct mmu_interval_notifier_ops rxe_mn_ops; + +#if defined CONFIG_INFINIBAND_ON_DEMAND_PAGING +int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, + u64 iova, int access_flags, struct rxe_mr *mr); +int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, + enum rxe_mr_copy_dir dir); +enum resp_states rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, + u64 compare, u64 swap_add, u64 *orig_val); +int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova, + unsigned int length); +enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value); +#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +static inline int +rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, + int access_flags, struct rxe_mr *mr) +{ + return -EOPNOTSUPP; +} +static inline int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, + int length, enum rxe_mr_copy_dir dir) +{ + return -EOPNOTSUPP; +} +static inline enum resp_states +rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, + u64 compare, u64 swap_add, u64 *orig_val) +{ + return RESPST_ERR_UNSUPPORTED_OPCODE; +} +static inline int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova, + unsigned int length) +{ + return -EOPNOTSUPP; +} +static inline enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, + u64 iova, u64 value) +{ + return RESPST_ERR_UNSUPPORTED_OPCODE; +} +#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ + #endif /* RXE_LOC_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 86cc2e18a7fd..07ff47bae31d 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -31,10 +31,19 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { unsigned char ll_addr[ETH_ALEN]; + struct net_device *ndev; + int ret; + + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); + if (!ndev) + return -ENODEV; ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - return dev_mc_add(rxe->ndev, ll_addr); + ret = dev_mc_add(ndev, ll_addr); + dev_put(ndev); + + return ret; } /** @@ -47,10 +56,19 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) { unsigned char ll_addr[ETH_ALEN]; + struct net_device *ndev; + int ret; + + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); + if (!ndev) + return -ENODEV; ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - return dev_mc_del(rxe->ndev, ll_addr); + ret = dev_mc_del(ndev, ll_addr); + dev_put(ndev); + + return ret; } /** diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index da3dee520876..bcb97b3ea58a 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -45,7 +45,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) } } -static void rxe_mr_init(int access, struct rxe_mr *mr) +void rxe_mr_init(int access, struct rxe_mr *mr) { u32 key = mr->elem.index << 8 | rxe_get_next_key(-1); @@ -323,7 +323,10 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return err; } - return rxe_mr_copy_xarray(mr, iova, addr, length, dir); + if (is_odp_mr(mr)) + return rxe_odp_mr_copy(mr, iova, addr, length, dir); + else + return rxe_mr_copy_xarray(mr, iova, addr, length, dir); } /* copy data in or out of a wqe, i.e. sg list @@ -421,7 +424,7 @@ err1: return err; } -int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length) +static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length) { unsigned int page_offset; unsigned long index; @@ -430,16 +433,6 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length) int err; u8 *va; - /* mr must be valid even if length is zero */ - if (WARN_ON(!mr)) - return -EINVAL; - - if (length == 0) - return 0; - - if (mr->ibmr.type == IB_MR_TYPE_DMA) - return -EFAULT; - err = mr_check_range(mr, iova, length); if (err) return err; @@ -451,7 +444,7 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length) if (!page) return -EFAULT; bytes = min_t(unsigned int, length, - mr_page_size(mr) - page_offset); + mr_page_size(mr) - page_offset); va = kmap_local_page(page); arch_wb_cache_pmem(va + page_offset, bytes); @@ -465,11 +458,33 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length) return 0; } +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 start, unsigned int length) +{ + int err; + + /* mr must be valid even if length is zero */ + if (WARN_ON(!mr)) + return -EINVAL; + + if (length == 0) + return 0; + + if (mr->ibmr.type == IB_MR_TYPE_DMA) + return -EFAULT; + + if (is_odp_mr(mr)) + err = rxe_odp_flush_pmem_iova(mr, start, length); + else + err = rxe_mr_flush_pmem_iova(mr, start, length); + + return err; +} + /* Guarantee atomicity of atomic operations at the machine level. */ -static DEFINE_SPINLOCK(atomic_ops_lock); +DEFINE_SPINLOCK(atomic_ops_lock); -int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, - u64 compare, u64 swap_add, u64 *orig_val) +enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, + u64 compare, u64 swap_add, u64 *orig_val) { unsigned int page_offset; struct page *page; @@ -521,23 +536,15 @@ int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, kunmap_local(va); - return 0; + return RESPST_NONE; } -#if defined CONFIG_64BIT -/* only implemented or called for 64 bit architectures */ -int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) +enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) { unsigned int page_offset; struct page *page; u64 *va; - /* See IBA oA19-28 */ - if (unlikely(mr->state != RXE_MR_STATE_VALID)) { - rxe_dbg_mr(mr, "mr not in valid state\n"); - return RESPST_ERR_RKEY_VIOLATION; - } - if (mr->ibmr.type == IB_MR_TYPE_DMA) { page_offset = iova & (PAGE_SIZE - 1); page = ib_virt_dma_to_page(iova); @@ -565,20 +572,12 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) } va = kmap_local_page(page); - /* Do atomic write after all prior operations have completed */ smp_store_release(&va[page_offset >> 3], value); - kunmap_local(va); - return 0; -} -#else -int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) -{ - return RESPST_ERR_UNSUPPORTED_OPCODE; + return RESPST_NONE; } -#endif int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) { diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index cd59666158b1..132a87e52d5c 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -345,46 +345,52 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, static void rxe_skb_tx_dtor(struct sk_buff *skb) { - struct sock *sk = skb->sk; - struct rxe_qp *qp = sk->sk_user_data; - int skb_out = atomic_dec_return(&qp->skb_out); + struct net_device *ndev = skb->dev; + struct rxe_dev *rxe; + unsigned int qp_index; + struct rxe_qp *qp; + int skb_out; + + rxe = rxe_get_dev_from_net(ndev); + if (!rxe && is_vlan_dev(ndev)) + rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); + if (WARN_ON(!rxe)) + return; - if (unlikely(qp->need_req_skb && - skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) - rxe_sched_task(&qp->req.task); + qp_index = (int)(uintptr_t)skb->sk->sk_user_data; + if (!qp_index) + return; + + qp = rxe_pool_get_index(&rxe->qp_pool, qp_index); + if (!qp) + goto put_dev; + + skb_out = atomic_dec_return(&qp->skb_out); + if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW) + rxe_sched_task(&qp->send_task); rxe_put(qp); +put_dev: + ib_device_put(&rxe->ib_dev); + sock_put(skb->sk); } static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) { int err; + struct sock *sk = pkt->qp->sk->sk; + sock_hold(sk); + skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; - skb->sk = pkt->qp->sk->sk; - - rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); - if (skb->protocol == htons(ETH_P_IP)) { + if (skb->protocol == htons(ETH_P_IP)) err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); - } else if (skb->protocol == htons(ETH_P_IPV6)) { + else err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); - } else { - rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n", - skb->protocol); - atomic_dec(&pkt->qp->skb_out); - rxe_put(pkt->qp); - kfree_skb(skb); - return -EINVAL; - } - - if (unlikely(net_xmit_eval(err))) { - rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err); - return -EAGAIN; - } - return 0; + return err; } /* fix up a send packet to match the packets @@ -392,8 +398,15 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) */ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) { + struct sock *sk = pkt->qp->sk->sk; + memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); + sock_hold(sk); + skb->sk = sk; + skb->destructor = rxe_skb_tx_dtor; + atomic_inc(&pkt->qp->skb_out); + if (skb->protocol == htons(ETH_P_IP)) skb_pull(skb, sizeof(struct iphdr)); else @@ -440,12 +453,6 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, return err; } - if ((qp_type(qp) != IB_QPT_RC) && - (pkt->mask & RXE_END_MASK)) { - pkt->wqe->state = wqe_state_done; - rxe_sched_task(&qp->comp.task); - } - rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); goto done; @@ -517,7 +524,16 @@ out: */ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) { - return rxe->ndev->name; + struct net_device *ndev; + char *ndev_name; + + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); + if (!ndev) + return NULL; + ndev_name = ndev->name; + dev_put(ndev); + + return ndev_name; } int rxe_net_add(const char *ibdev_name, struct net_device *ndev) @@ -529,9 +545,9 @@ int rxe_net_add(const char *ibdev_name, struct net_device *ndev) if (!rxe) return -ENOMEM; - rxe->ndev = ndev; + ib_mark_name_assigned_by_user(&rxe->ib_dev); - err = rxe_add(rxe, ndev->mtu, ibdev_name); + err = rxe_add(rxe, ndev->mtu, ibdev_name, ndev); if (err) { ib_dealloc_device(&rxe->ib_dev); return err; @@ -555,11 +571,6 @@ static void rxe_port_event(struct rxe_dev *rxe, /* Caller must hold net_info_lock */ void rxe_port_up(struct rxe_dev *rxe) { - struct rxe_port *port; - - port = &rxe->port; - port->attr.state = IB_PORT_ACTIVE; - rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); dev_info(&rxe->ib_dev.dev, "set active\n"); } @@ -567,11 +578,6 @@ void rxe_port_up(struct rxe_dev *rxe) /* Caller must hold net_info_lock */ void rxe_port_down(struct rxe_dev *rxe) { - struct rxe_port *port; - - port = &rxe->port; - port->attr.state = IB_PORT_DOWN; - rxe_port_event(rxe, IB_EVENT_PORT_ERR); rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED); dev_info(&rxe->ib_dev.dev, "set down\n"); @@ -579,10 +585,18 @@ void rxe_port_down(struct rxe_dev *rxe) void rxe_set_port_state(struct rxe_dev *rxe) { - if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev)) + struct net_device *ndev; + + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); + if (!ndev) + return; + + if (ib_get_curr_port_state(ndev) == IB_PORT_ACTIVE) rxe_port_up(rxe); else rxe_port_down(rxe); + + dev_put(ndev); } static int rxe_notify(struct notifier_block *not_blk, @@ -599,18 +613,14 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_UNREGISTER: ib_unregister_device_queued(&rxe->ib_dev); break; - case NETDEV_UP: - rxe_port_up(rxe); - break; - case NETDEV_DOWN: - rxe_port_down(rxe); - break; case NETDEV_CHANGEMTU: rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; + case NETDEV_DOWN: case NETDEV_CHANGE: - rxe_set_port_state(rxe); + if (ib_get_curr_port_state(ndev) == IB_PORT_DOWN) + rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED); break; case NETDEV_REBOOT: case NETDEV_GOING_DOWN: diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c new file mode 100644 index 000000000000..dbc5a5600eb7 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_odp.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2022-2023 Fujitsu Ltd. All rights reserved. + */ + +#include <linux/hmm.h> +#include <linux/libnvdimm.h> + +#include <rdma/ib_umem_odp.h> + +#include "rxe.h" + +static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct ib_umem_odp *umem_odp = + container_of(mni, struct ib_umem_odp, notifier); + unsigned long start, end; + + if (!mmu_notifier_range_blockable(range)) + return false; + + mutex_lock(&umem_odp->umem_mutex); + mmu_interval_set_seq(mni, cur_seq); + + start = max_t(u64, ib_umem_start(umem_odp), range->start); + end = min_t(u64, ib_umem_end(umem_odp), range->end); + + /* update umem_odp->map.pfn_list */ + ib_umem_odp_unmap_dma_pages(umem_odp, start, end); + + mutex_unlock(&umem_odp->umem_mutex); + return true; +} + +const struct mmu_interval_notifier_ops rxe_mn_ops = { + .invalidate = rxe_ib_invalidate_range, +}; + +#define RXE_PAGEFAULT_DEFAULT 0 +#define RXE_PAGEFAULT_RDONLY BIT(0) +#define RXE_PAGEFAULT_SNAPSHOT BIT(1) +static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT); + u64 access_mask = 0; + int np; + + if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY)) + access_mask |= HMM_PFN_WRITE; + + /* + * ib_umem_odp_map_dma_and_lock() locks umem_mutex on success. + * Callers must release the lock later to let invalidation handler + * do its work again. + */ + np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt, + access_mask, fault); + return np; +} + +static int rxe_odp_init_pages(struct rxe_mr *mr) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + int ret; + + ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address, + mr->umem->length, + RXE_PAGEFAULT_SNAPSHOT); + + if (ret >= 0) + mutex_unlock(&umem_odp->umem_mutex); + + return ret >= 0 ? 0 : ret; +} + +int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, + u64 iova, int access_flags, struct rxe_mr *mr) +{ + struct ib_umem_odp *umem_odp; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return -EOPNOTSUPP; + + rxe_mr_init(access_flags, mr); + + if (!start && length == U64_MAX) { + if (iova != 0) + return -EINVAL; + if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return -EINVAL; + + /* Never reach here, for implicit ODP is not implemented. */ + } + + umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags, + &rxe_mn_ops); + if (IS_ERR(umem_odp)) { + rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n", + (int)PTR_ERR(umem_odp)); + return PTR_ERR(umem_odp); + } + + umem_odp->private = mr; + + mr->umem = &umem_odp->umem; + mr->access = access_flags; + mr->ibmr.length = length; + mr->ibmr.iova = iova; + mr->page_offset = ib_umem_offset(&umem_odp->umem); + + err = rxe_odp_init_pages(mr); + if (err) { + ib_umem_odp_release(umem_odp); + return err; + } + + mr->state = RXE_MR_STATE_VALID; + mr->ibmr.type = IB_MR_TYPE_USER; + + return err; +} + +static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp, u64 iova, + int length) +{ + bool need_fault = false; + u64 addr; + int idx; + + addr = iova & (~(BIT(umem_odp->page_shift) - 1)); + + /* Skim through all pages that are to be accessed. */ + while (addr < iova + length) { + idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; + + if (!(umem_odp->map.pfn_list[idx] & HMM_PFN_VALID)) { + need_fault = true; + break; + } + + addr += BIT(umem_odp->page_shift); + } + return need_fault; +} + +static unsigned long rxe_odp_iova_to_index(struct ib_umem_odp *umem_odp, u64 iova) +{ + return (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift; +} + +static unsigned long rxe_odp_iova_to_page_offset(struct ib_umem_odp *umem_odp, u64 iova) +{ + return iova & (BIT(umem_odp->page_shift) - 1); +} + +static int rxe_odp_map_range_and_lock(struct rxe_mr *mr, u64 iova, int length, u32 flags) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + bool need_fault; + int err; + + if (unlikely(length < 1)) + return -EINVAL; + + mutex_lock(&umem_odp->umem_mutex); + + need_fault = rxe_check_pagefault(umem_odp, iova, length); + if (need_fault) { + mutex_unlock(&umem_odp->umem_mutex); + + /* umem_mutex is locked on success. */ + err = rxe_odp_do_pagefault_and_lock(mr, iova, length, + flags); + if (err < 0) + return err; + + need_fault = rxe_check_pagefault(umem_odp, iova, length); + if (need_fault) + return -EFAULT; + } + + return 0; +} + +static int __rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, + int length, enum rxe_mr_copy_dir dir) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + struct page *page; + int idx, bytes; + size_t offset; + u8 *user_va; + + idx = rxe_odp_iova_to_index(umem_odp, iova); + offset = rxe_odp_iova_to_page_offset(umem_odp, iova); + + while (length > 0) { + u8 *src, *dest; + + page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]); + user_va = kmap_local_page(page); + if (!user_va) + return -EFAULT; + + src = (dir == RXE_TO_MR_OBJ) ? addr : user_va; + dest = (dir == RXE_TO_MR_OBJ) ? user_va : addr; + + bytes = BIT(umem_odp->page_shift) - offset; + if (bytes > length) + bytes = length; + + memcpy(dest, src, bytes); + kunmap_local(user_va); + + length -= bytes; + idx++; + offset = 0; + } + + return 0; +} + +int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, + enum rxe_mr_copy_dir dir) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + u32 flags = RXE_PAGEFAULT_DEFAULT; + int err; + + if (length == 0) + return 0; + + if (unlikely(!mr->umem->is_odp)) + return -EOPNOTSUPP; + + switch (dir) { + case RXE_TO_MR_OBJ: + break; + + case RXE_FROM_MR_OBJ: + flags |= RXE_PAGEFAULT_RDONLY; + break; + + default: + return -EINVAL; + } + + err = rxe_odp_map_range_and_lock(mr, iova, length, flags); + if (err) + return err; + + err = __rxe_odp_mr_copy(mr, iova, addr, length, dir); + + mutex_unlock(&umem_odp->umem_mutex); + + return err; +} + +static enum resp_states rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova, + int opcode, u64 compare, + u64 swap_add, u64 *orig_val) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + unsigned int page_offset; + struct page *page; + unsigned int idx; + u64 value; + u64 *va; + int err; + + if (unlikely(mr->state != RXE_MR_STATE_VALID)) { + rxe_dbg_mr(mr, "mr not in valid state\n"); + return RESPST_ERR_RKEY_VIOLATION; + } + + err = mr_check_range(mr, iova, sizeof(value)); + if (err) { + rxe_dbg_mr(mr, "iova out of range\n"); + return RESPST_ERR_RKEY_VIOLATION; + } + + idx = rxe_odp_iova_to_index(umem_odp, iova); + page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova); + page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]); + if (!page) + return RESPST_ERR_RKEY_VIOLATION; + + if (unlikely(page_offset & 0x7)) { + rxe_dbg_mr(mr, "iova not aligned\n"); + return RESPST_ERR_MISALIGNED_ATOMIC; + } + + va = kmap_local_page(page); + + spin_lock_bh(&atomic_ops_lock); + value = *orig_val = va[page_offset >> 3]; + + if (opcode == IB_OPCODE_RC_COMPARE_SWAP) { + if (value == compare) + va[page_offset >> 3] = swap_add; + } else { + value += swap_add; + va[page_offset >> 3] = value; + } + spin_unlock_bh(&atomic_ops_lock); + + kunmap_local(va); + + return RESPST_NONE; +} + +enum resp_states rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, + u64 compare, u64 swap_add, u64 *orig_val) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + int err; + + err = rxe_odp_map_range_and_lock(mr, iova, sizeof(char), + RXE_PAGEFAULT_DEFAULT); + if (err < 0) + return RESPST_ERR_RKEY_VIOLATION; + + err = rxe_odp_do_atomic_op(mr, iova, opcode, compare, swap_add, + orig_val); + mutex_unlock(&umem_odp->umem_mutex); + + return err; +} + +int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova, + unsigned int length) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + unsigned int page_offset; + unsigned long index; + struct page *page; + unsigned int bytes; + int err; + u8 *va; + + err = rxe_odp_map_range_and_lock(mr, iova, length, + RXE_PAGEFAULT_DEFAULT); + if (err) + return err; + + while (length > 0) { + index = rxe_odp_iova_to_index(umem_odp, iova); + page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova); + + page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]); + if (!page) { + mutex_unlock(&umem_odp->umem_mutex); + return -EFAULT; + } + + bytes = min_t(unsigned int, length, + mr_page_size(mr) - page_offset); + + va = kmap_local_page(page); + arch_wb_cache_pmem(va + page_offset, bytes); + kunmap_local(va); + + length -= bytes; + iova += bytes; + page_offset = 0; + } + + mutex_unlock(&umem_odp->umem_mutex); + + return 0; +} + +enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) +{ + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); + unsigned int page_offset; + unsigned long index; + struct page *page; + int err; + u64 *va; + + /* See IBA oA19-28 */ + err = mr_check_range(mr, iova, sizeof(value)); + if (unlikely(err)) { + rxe_dbg_mr(mr, "iova out of range\n"); + return RESPST_ERR_RKEY_VIOLATION; + } + + err = rxe_odp_map_range_and_lock(mr, iova, sizeof(value), + RXE_PAGEFAULT_DEFAULT); + if (err) + return RESPST_ERR_RKEY_VIOLATION; + + page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova); + index = rxe_odp_iova_to_index(umem_odp, iova); + page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]); + if (!page) { + mutex_unlock(&umem_odp->umem_mutex); + return RESPST_ERR_RKEY_VIOLATION; + } + /* See IBA A19.4.2 */ + if (unlikely(page_offset & 0x7)) { + mutex_unlock(&umem_odp->umem_mutex); + rxe_dbg_mr(mr, "misaligned address\n"); + return RESPST_ERR_MISALIGNED_ATOMIC; + } + + va = kmap_local_page(page); + /* Do atomic write after all prior operations have completed */ + smp_store_release(&va[page_offset >> 3], value); + kunmap_local(va); + + mutex_unlock(&umem_odp->umem_mutex); + + return RESPST_NONE; +} diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index d2f57ead78ad..767870568372 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -53,12 +53,9 @@ enum rxe_device_param { | IB_DEVICE_MEM_WINDOW | IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT -#ifdef CONFIG_64BIT | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_ATOMIC_WRITE, -#else - | IB_DEVICE_MEM_WINDOW_TYPE_2B, -#endif /* CONFIG_64BIT */ + RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + sizeof(struct ib_sge) * RXE_MAX_SGE, @@ -129,7 +126,7 @@ enum rxe_device_param { enum rxe_port_param { RXE_PORT_GID_TBL_LEN = 1024, RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, - RXE_PORT_MAX_MSG_SZ = 0x800000, + RXE_PORT_MAX_MSG_SZ = (1UL << 31), RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, RXE_PORT_LID = 0, diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 6215c6de3a84..d9cb682fd71f 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -119,7 +119,7 @@ void rxe_pool_cleanup(struct rxe_pool *pool) int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, bool sleepable) { - int err; + int err = -EINVAL; gfp_t gfp_flags; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) @@ -147,7 +147,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, err_cnt: atomic_dec(&pool->num_elem); - return -EINVAL; + return err; } void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) @@ -178,7 +178,6 @@ int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) { struct rxe_pool *pool = elem->pool; struct xarray *xa = &pool->xa; - static int timeout = RXE_POOL_TIMEOUT; int ret, err = 0; void *xa_ret; @@ -202,19 +201,19 @@ int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) * return to rdma-core */ if (sleepable) { - if (!completion_done(&elem->complete) && timeout) { + if (!completion_done(&elem->complete)) { ret = wait_for_completion_timeout(&elem->complete, - timeout); + msecs_to_jiffies(50000)); /* Shouldn't happen. There are still references to * the object but, rather than deadlock, free the * object or pass back to rdma-core. */ if (WARN_ON(!ret)) - err = -EINVAL; + err = -ETIMEDOUT; } } else { - unsigned long until = jiffies + timeout; + unsigned long until = jiffies + RXE_POOL_TIMEOUT; /* AH objects are unique in that the destroy_ah verb * can be called in atomic context. This delay @@ -226,7 +225,7 @@ int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) mdelay(1); if (WARN_ON(!completion_done(&elem->complete))) - err = -EINVAL; + err = -ETIMEDOUT; } if (pool->cleanup) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index e3589c02013e..f2af3e0aef35 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -244,7 +244,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) return err; - qp->sk->sk->sk_user_data = qp; + qp->sk->sk->sk_user_data = (void *)(uintptr_t)qp->elem.index; /* pick a source UDP port number for this QP based on * the source QPN. this spreads traffic for different QPs @@ -265,8 +265,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->req.opcode = -1; qp->comp.opcode = -1; - rxe_init_task(&qp->req.task, qp, rxe_requester); - rxe_init_task(&qp->comp.task, qp, rxe_completer); + rxe_init_task(&qp->send_task, qp, rxe_sender); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { @@ -337,7 +336,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - rxe_init_task(&qp->resp.task, qp, rxe_responder); + rxe_init_task(&qp->recv_task, qp, rxe_receiver); qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; @@ -514,14 +513,12 @@ err1: static void rxe_qp_reset(struct rxe_qp *qp) { /* stop tasks from running */ - rxe_disable_task(&qp->resp.task); - rxe_disable_task(&qp->comp.task); - rxe_disable_task(&qp->req.task); + rxe_disable_task(&qp->recv_task); + rxe_disable_task(&qp->send_task); /* drain work and packet queuesc */ - rxe_requester(qp); - rxe_completer(qp); - rxe_responder(qp); + rxe_sender(qp); + rxe_receiver(qp); if (qp->rq.queue) rxe_queue_reset(qp->rq.queue); @@ -548,9 +545,8 @@ static void rxe_qp_reset(struct rxe_qp *qp) cleanup_rd_atomic_resources(qp); /* reenable tasks */ - rxe_enable_task(&qp->resp.task); - rxe_enable_task(&qp->comp.task); - rxe_enable_task(&qp->req.task); + rxe_enable_task(&qp->recv_task); + rxe_enable_task(&qp->send_task); } /* move the qp to the error state */ @@ -562,9 +558,8 @@ void rxe_qp_error(struct rxe_qp *qp) qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ - rxe_sched_task(&qp->resp.task); - rxe_sched_task(&qp->comp.task); - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->recv_task); + rxe_sched_task(&qp->send_task); spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -575,8 +570,7 @@ static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr, spin_lock_irqsave(&qp->state_lock, flags); qp->attr.sq_draining = 1; - rxe_sched_task(&qp->comp.task); - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->send_task); spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -781,6 +775,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) * Yield the processor */ spin_lock_irqsave(&qp->state_lock, flags); + attr->cur_qp_state = qp_state(qp); if (qp->attr.sq_draining) { spin_unlock_irqrestore(&qp->state_lock, flags); cond_resched(); @@ -816,24 +811,25 @@ static void rxe_qp_do_cleanup(struct work_struct *work) spin_unlock_irqrestore(&qp->state_lock, flags); qp->qp_timeout_jiffies = 0; - if (qp_type(qp) == IB_QPT_RC) { - del_timer_sync(&qp->retrans_timer); - del_timer_sync(&qp->rnr_nak_timer); + /* In the function timer_setup, .function is initialized. If .function + * is NULL, it indicates the function timer_setup is not called, the + * timer is not initialized. Or else, the timer is initialized. + */ + if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function && + qp->rnr_nak_timer.function) { + timer_delete_sync(&qp->retrans_timer); + timer_delete_sync(&qp->rnr_nak_timer); } - if (qp->resp.task.func) - rxe_cleanup_task(&qp->resp.task); - - if (qp->req.task.func) - rxe_cleanup_task(&qp->req.task); + if (qp->recv_task.func) + rxe_cleanup_task(&qp->recv_task); - if (qp->comp.task.func) - rxe_cleanup_task(&qp->comp.task); + if (qp->send_task.func) + rxe_cleanup_task(&qp->send_task); /* flush out any receive wr's or pending requests */ - rxe_requester(qp); - rxe_completer(qp); - rxe_responder(qp); + rxe_sender(qp); + rxe_receiver(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d8c41fd626a9..9d0392df8a92 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -5,7 +5,6 @@ */ #include <linux/skbuff.h> -#include <crypto/hash.h> #include "rxe.h" #include "rxe_loc.h" @@ -108,7 +107,7 @@ void rnr_nak_timer(struct timer_list *t) /* request a send queue retry */ qp->req.need_retry = 1; qp->req.wait_for_rnr_timer = 0; - rxe_sched_task(&qp->req.task); + rxe_sched_task(&qp->send_task); } spin_unlock_irqrestore(&qp->state_lock, flags); } @@ -424,7 +423,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, int paylen; int solicited; u32 qp_num; - int ack_req; + int ack_req = 0; /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; @@ -445,8 +444,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : qp->attr.dest_qp_num; - ack_req = ((pkt->mask & RXE_END_MASK) || - (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); + if (qp_type(qp) != IB_QPT_UD && qp_type(qp) != IB_QPT_UC) + ack_req = ((pkt->mask & RXE_END_MASK) || + (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); if (ack_req) qp->req.noack_pkts = 0; @@ -545,6 +545,8 @@ static void update_wqe_state(struct rxe_qp *qp, if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; + else + wqe->state = wqe_state_done; } else { wqe->state = wqe_state_processing; } @@ -573,30 +575,6 @@ static void update_wqe_psn(struct rxe_qp *qp, qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; } -static void save_state(struct rxe_send_wqe *wqe, - struct rxe_qp *qp, - struct rxe_send_wqe *rollback_wqe, - u32 *rollback_psn) -{ - rollback_wqe->state = wqe->state; - rollback_wqe->first_psn = wqe->first_psn; - rollback_wqe->last_psn = wqe->last_psn; - rollback_wqe->dma = wqe->dma; - *rollback_psn = qp->req.psn; -} - -static void rollback_state(struct rxe_send_wqe *wqe, - struct rxe_qp *qp, - struct rxe_send_wqe *rollback_wqe, - u32 rollback_psn) -{ - wqe->state = rollback_wqe->state; - wqe->first_psn = rollback_wqe->first_psn; - wqe->last_psn = rollback_wqe->last_psn; - wqe->dma = rollback_wqe->dma; - qp->req.psn = rollback_psn; -} - static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { qp->req.opcode = pkt->opcode; @@ -655,12 +633,6 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) wqe->status = IB_WC_SUCCESS; qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); - /* There is no ack coming for local work requests - * which can lead to a deadlock. So go ahead and complete - * it now. - */ - rxe_sched_task(&qp->comp.task); - return 0; } @@ -676,8 +648,6 @@ int rxe_requester(struct rxe_qp *qp) int opcode; int err; int ret; - struct rxe_send_wqe rollback_wqe; - u32 rollback_psn; struct rxe_queue *q = qp->sq.queue; struct rxe_ah *ah; struct rxe_av *av; @@ -692,10 +662,12 @@ int rxe_requester(struct rxe_qp *qp) if (unlikely(qp_state(qp) == IB_QPS_ERR)) { wqe = __req_next_wqe(qp); spin_unlock_irqrestore(&qp->state_lock, flags); - if (wqe) + if (wqe) { + wqe->status = IB_WC_WR_FLUSH_ERR; goto err; - else + } else { goto exit; + } } if (unlikely(qp_state(qp) == IB_QPS_RESET)) { @@ -786,7 +758,6 @@ int rxe_requester(struct rxe_qp *qp) qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - rxe_sched_task(&qp->comp.task); goto done; } payload = mtu; @@ -799,9 +770,6 @@ int rxe_requester(struct rxe_qp *qp) pkt.mask = rxe_opcode[opcode].mask; pkt.wqe = wqe; - /* save wqe state before we build and send packet */ - save_state(wqe, qp, &rollback_wqe, &rollback_psn); - av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { rxe_dbg_qp(qp, "Failed no address vector\n"); @@ -834,31 +802,14 @@ int rxe_requester(struct rxe_qp *qp) if (ah) rxe_put(ah); - /* update wqe state as though we had sent it */ - update_wqe_state(qp, wqe, &pkt); - update_wqe_psn(qp, wqe, &pkt, payload); - err = rxe_xmit_packet(qp, &pkt, skb); if (err) { - if (err != -EAGAIN) { - wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; - } - - /* the packet was dropped so reset wqe to the state - * before we sent it so we can try to resend - */ - rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - - /* force a delay until the dropped packet is freed and - * the send queue is drained below the low water mark - */ - qp->need_req_skb = 1; - - rxe_sched_task(&qp->req.task); - goto exit; + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; } + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); update_state(qp, &pkt); /* A non-zero return value will cause rxe_do_task to @@ -878,3 +829,20 @@ exit: out: return ret; } + +int rxe_sender(struct rxe_qp *qp) +{ + int req_ret; + int comp_ret; + + /* process the send queue */ + req_ret = rxe_requester(qp); + + /* process the response queue */ + comp_ret = rxe_completer(qp); + + /* exit the task loop if both requester and completer + * are ready + */ + return (req_ret && comp_ret) ? -EAGAIN : 0; +} diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 963382f625d7..711f73e0bbb1 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -49,18 +49,8 @@ static char *resp_state_name[] = { /* rxe_recv calls here to add a request packet to the input queue */ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - skb_queue_tail(&qp->req_pkts, skb); - - must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || - (skb_queue_len(&qp->req_pkts) > 1); - - if (must_sched) - rxe_sched_task(&qp->resp.task); - else - rxe_run_task(&qp->resp.task); + rxe_sched_task(&qp->recv_task); } static inline enum resp_states get_req(struct rxe_qp *qp, @@ -351,9 +341,22 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, /* * See IBA C9-92 * For UD QPs we only check if the packet will fit in the - * receive buffer later. For rmda operations additional + * receive buffer later. For RDMA operations additional * length checks are performed in check_rkey. */ + if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) { + unsigned int payload = payload_size(pkt); + unsigned int recv_buffer_len = 0; + int i; + + for (i = 0; i < qp->resp.wqe->dma.num_sge; i++) + recv_buffer_len += qp->resp.wqe->dma.sge[i].length; + if (payload + sizeof(union rdma_network_hdr) > recv_buffer_len) { + rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n"); + return RESPST_ERR_LENGTH; + } + } + if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))) { unsigned int mtu = qp->mtu; @@ -699,10 +702,16 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, if (!res->replay) { u64 iova = qp->resp.va + qp->resp.offset; - err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, - atmeth_comp(pkt), - atmeth_swap_add(pkt), - &res->atomic.orig_val); + if (is_odp_mr(mr)) + err = rxe_odp_atomic_op(mr, iova, pkt->opcode, + atmeth_comp(pkt), + atmeth_swap_add(pkt), + &res->atomic.orig_val); + else + err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, + atmeth_comp(pkt), + atmeth_swap_add(pkt), + &res->atomic.orig_val); if (err) return err; @@ -740,7 +749,16 @@ static enum resp_states atomic_write_reply(struct rxe_qp *qp, value = *(u64 *)payload_addr(pkt); iova = qp->resp.va + qp->resp.offset; - err = rxe_mr_do_atomic_write(mr, iova, value); + /* See IBA oA19-28 */ + if (unlikely(mr->state != RXE_MR_STATE_VALID)) { + rxe_dbg_mr(mr, "mr not in valid state\n"); + return RESPST_ERR_RKEY_VIOLATION; + } + + if (is_odp_mr(mr)) + err = rxe_odp_do_atomic_write(mr, iova, value); + else + err = rxe_mr_do_atomic_write(mr, iova, value); if (err) return err; @@ -1485,7 +1503,7 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify) qp->resp.wqe = NULL; } -int rxe_responder(struct rxe_qp *qp) +int rxe_receiver(struct rxe_qp *qp) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); enum resp_states state; diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 80332638d9e3..6f8f353e9583 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -85,17 +85,17 @@ static bool is_done(struct rxe_task *task) /* do_task is a wrapper for the three tasks (requester, * completer, responder) and calls them in a loop until - * they return a non-zero value. It is called either - * directly by rxe_run_task or indirectly if rxe_sched_task - * schedules the task. They must call __reserve_if_idle to - * move the task to busy before calling or scheduling. - * The task can also be moved to drained or invalid - * by calls to rxe_cleanup_task or rxe_disable_task. - * In that case tasks which get here are not executed but - * just flushed. The tasks are designed to look to see if - * there is work to do and then do part of it before returning - * here with a return value of zero until all the work - * has been consumed then it returns a non-zero value. + * they return a non-zero value. It is called indirectly + * when rxe_sched_task schedules the task. They must + * call __reserve_if_idle to move the task to busy before + * calling or scheduling. The task can also be moved to + * drained or invalid by calls to rxe_cleanup_task or + * rxe_disable_task. In that case tasks which get here + * are not executed but just flushed. The tasks are + * designed to look to see if there is work to do and + * then do part of it before returning here with a return + * value of zero until all the work has been consumed then + * it returns a non-zero value. * The number of times the task can be run is limited by * max iterations so one task cannot hold the cpu forever. * If the limit is hit and work remains the task is rescheduled. @@ -234,24 +234,6 @@ void rxe_cleanup_task(struct rxe_task *task) spin_unlock_irqrestore(&task->lock, flags); } -/* run the task inline if it is currently idle - * cannot call do_task holding the lock - */ -void rxe_run_task(struct rxe_task *task) -{ - unsigned long flags; - bool run; - - WARN_ON(rxe_read(task->qp) <= 0); - - spin_lock_irqsave(&task->lock, flags); - run = __reserve_if_idle(task); - spin_unlock_irqrestore(&task->lock, flags); - - if (run) - do_task(task); -} - /* schedule the task to run later as a work queue entry. * the queue_work call can be called holding * the lock. diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index a63e258b3d66..a8c9a77b6027 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -47,8 +47,6 @@ int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, /* cleanup task */ void rxe_cleanup_task(struct rxe_task *task); -void rxe_run_task(struct rxe_task *task); - void rxe_sched_task(struct rxe_task *task); /* keep a task from scheduling */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 614581989b38..2331e698a65b 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -41,6 +41,7 @@ static int rxe_query_port(struct ib_device *ibdev, u32 port_num, struct ib_port_attr *attr) { struct rxe_dev *rxe = to_rdev(ibdev); + struct net_device *ndev; int err, ret; if (port_num != 1) { @@ -49,21 +50,29 @@ static int rxe_query_port(struct ib_device *ibdev, goto err_out; } + ndev = rxe_ib_device_get_netdev(ibdev); + if (!ndev) { + err = -ENODEV; + goto err_out; + } + memcpy(attr, &rxe->port.attr, sizeof(*attr)); mutex_lock(&rxe->usdev_lock); ret = ib_get_eth_speed(ibdev, port_num, &attr->active_speed, &attr->active_width); + attr->state = ib_get_curr_port_state(ndev); if (attr->state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - else if (dev_get_flags(rxe->ndev) & IFF_UP) + else if (dev_get_flags(ndev) & IFF_UP) attr->phys_state = IB_PORT_PHYS_STATE_POLLING; else attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; mutex_unlock(&rxe->usdev_lock); + dev_put(ndev); return ret; err_out: @@ -71,6 +80,18 @@ err_out: return err; } +static int rxe_query_gid(struct ib_device *ibdev, u32 port, int idx, + union ib_gid *gid) +{ + struct rxe_dev *rxe = to_rdev(ibdev); + + /* subnet_prefix == interface_id == 0; */ + memset(gid, 0, sizeof(*gid)); + memcpy(gid->raw, rxe->raw_gid, ETH_ALEN); + + return 0; +} + static int rxe_query_pkey(struct ib_device *ibdev, u32 port_num, u16 index, u16 *pkey) { @@ -688,7 +709,7 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, for (i = 0; i < ibwr->num_sge; i++) length += ibwr->sg_list[i].length; - if (length > (1UL << 31)) { + if (length > RXE_PORT_MAX_MSG_SZ) { rxe_err_qp(qp, "message length too long\n"); break; } @@ -812,7 +833,7 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, int i; for (i = 0; i < ibwr->num_sge; i++, sge++) { - memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length); + memcpy(p, ib_virt_dma_to_ptr(sge->addr), sge->length); p += sge->length; } } @@ -888,6 +909,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, { int err = 0; unsigned long flags; + int good = 0; spin_lock_irqsave(&qp->sq.sq_lock, flags); while (ibwr) { @@ -895,18 +917,16 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, if (err) { *bad_wr = ibwr; break; + } else { + good++; } ibwr = ibwr->next; } spin_unlock_irqrestore(&qp->sq.sq_lock, flags); - if (!err) - rxe_sched_task(&qp->req.task); - - spin_lock_irqsave(&qp->state_lock, flags); - if (qp_state(qp) == IB_QPS_ERR) - rxe_sched_task(&qp->comp.task); - spin_unlock_irqrestore(&qp->state_lock, flags); + /* kickoff processing of any posted wqes */ + if (good) + rxe_sched_task(&qp->send_task); return err; } @@ -936,7 +956,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (qp->is_user) { /* Utilize process context to do protocol processing */ - rxe_run_task(&qp->req.task); + rxe_sched_task(&qp->send_task); } else { err = rxe_post_send_kernel(qp, wr, bad_wr); if (err) @@ -973,8 +993,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) for (i = 0; i < num_sge; i++) length += ibwr->sg_list[i].length; - /* IBA max message size is 2^31 */ - if (length >= (1UL<<31)) { + if (length > RXE_PORT_MAX_MSG_SZ) { err = -EINVAL; rxe_dbg("message length too long\n"); goto err_out; @@ -1046,7 +1065,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, spin_lock_irqsave(&qp->state_lock, flags); if (qp_state(qp) == IB_QPS_ERR) - rxe_sched_task(&qp->resp.task); + rxe_sched_task(&qp->recv_task); spin_unlock_irqrestore(&qp->state_lock, flags); return err; @@ -1054,8 +1073,9 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, /* cq */ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata) + struct uverbs_attr_bundle *attrs) { + struct ib_udata *udata = &attrs->driver_udata; struct ib_device *dev = ibcq->device; struct rxe_dev *rxe = to_rdev(dev); struct rxe_cq *cq = to_rcq(ibcq); @@ -1278,7 +1298,10 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start, mr->ibmr.pd = ibpd; mr->ibmr.device = ibpd->device; - err = rxe_mr_init_user(rxe, start, length, access, mr); + if (access & IB_ACCESS_ON_DEMAND) + err = rxe_odp_mr_init_user(rxe, start, length, iova, access, mr); + else + err = rxe_mr_init_user(rxe, start, length, access, mr); if (err) { rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err); goto err_cleanup; @@ -1425,9 +1448,16 @@ static const struct attribute_group rxe_attr_group = { static int rxe_enable_driver(struct ib_device *ib_dev) { struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); + struct net_device *ndev; + + ndev = rxe_ib_device_get_netdev(ib_dev); + if (!ndev) + return -ENODEV; rxe_set_port_state(rxe); - dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev)); + dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(ndev)); + + dev_put(ndev); return 0; } @@ -1478,6 +1508,7 @@ static const struct ib_device_ops rxe_dev_ops = { .query_ah = rxe_query_ah, .query_device = rxe_query_device, .query_pkey = rxe_query_pkey, + .query_gid = rxe_query_gid, .query_port = rxe_query_port, .query_qp = rxe_query_qp, .query_srq = rxe_query_srq, @@ -1495,7 +1526,8 @@ static const struct ib_device_ops rxe_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw), }; -int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) +int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name, + struct net_device *ndev) { int err; struct ib_device *dev = &rxe->ib_dev; @@ -1507,17 +1539,13 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dev->num_comp_vectors = num_possible_cpus(); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, - rxe->ndev->dev_addr); + rxe->raw_gid); dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); - err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); - if (err) - return err; - - err = rxe_icrc_init(rxe); + err = ib_device_set_netdev(&rxe->ib_dev, ndev, 1); if (err) return err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index ccb9d19ffe8a..fd48075810dd 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -113,7 +113,7 @@ struct rxe_req_info { int need_retry; int wait_for_rnr_timer; int noack_pkts; - struct rxe_task task; + int again; }; struct rxe_comp_info { @@ -124,7 +124,43 @@ struct rxe_comp_info { int started_retry; u32 retry_cnt; u32 rnr_retry; - struct rxe_task task; +}; + +/* responder states */ +enum resp_states { + RESPST_NONE, + RESPST_GET_REQ, + RESPST_CHK_PSN, + RESPST_CHK_OP_SEQ, + RESPST_CHK_OP_VALID, + RESPST_CHK_RESOURCE, + RESPST_CHK_LENGTH, + RESPST_CHK_RKEY, + RESPST_EXECUTE, + RESPST_READ_REPLY, + RESPST_ATOMIC_REPLY, + RESPST_ATOMIC_WRITE_REPLY, + RESPST_PROCESS_FLUSH, + RESPST_COMPLETE, + RESPST_ACKNOWLEDGE, + RESPST_CLEANUP, + RESPST_DUPLICATE_REQUEST, + RESPST_ERR_MALFORMED_WQE, + RESPST_ERR_UNSUPPORTED_OPCODE, + RESPST_ERR_MISALIGNED_ATOMIC, + RESPST_ERR_PSN_OUT_OF_SEQ, + RESPST_ERR_MISSING_OPCODE_FIRST, + RESPST_ERR_MISSING_OPCODE_LAST_C, + RESPST_ERR_MISSING_OPCODE_LAST_D1E, + RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, + RESPST_ERR_RNR, + RESPST_ERR_RKEY_VIOLATION, + RESPST_ERR_INVALIDATE_RKEY, + RESPST_ERR_LENGTH, + RESPST_ERR_CQ_OVERFLOW, + RESPST_ERROR, + RESPST_DONE, + RESPST_EXIT, }; enum rdatm_res_state { @@ -196,7 +232,6 @@ struct rxe_resp_info { unsigned int res_head; unsigned int res_tail; struct resp_res *res; - struct rxe_task task; }; struct rxe_qp { @@ -229,6 +264,9 @@ struct rxe_qp { struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; + struct rxe_task send_task; + struct rxe_task recv_task; + struct rxe_req_info req; struct rxe_comp_info comp; struct rxe_resp_info resp; @@ -369,14 +407,15 @@ struct rxe_port { u32 qp_gsi_index; }; +#define RXE_PORT 1 struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; int max_ucontext; int max_inline_data; - struct mutex usdev_lock; + struct mutex usdev_lock; - struct net_device *ndev; + char raw_gid[ETH_ALEN]; struct rxe_pool uc_pool; struct rxe_pool pd_pool; @@ -402,9 +441,13 @@ struct rxe_dev { atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; struct rxe_port port; - struct crypto_shash *tfm; }; +static inline struct net_device *rxe_ib_device_get_netdev(struct ib_device *dev) +{ + return ib_device_get_netdev(dev, RXE_PORT); +} + static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index) { atomic64_inc(&rxe->stats_counters[index]); @@ -470,6 +513,7 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) return to_rpd(mw->ibmw.pd); } -int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); +int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name, + struct net_device *ndev); #endif /* RXE_VERBS_H */ diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 81b70a3eeb87..186f182b80e7 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -2,9 +2,8 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" depends on INET && INFINIBAND depends on INFINIBAND_VIRT_DMA - select LIBCRC32C - select CRYPTO - select CRYPTO_CRC32C + select CRC32 + select NET_CRC32C help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 75253f2b3e3d..f5fd71717b80 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -10,9 +10,9 @@ #include <rdma/restrack.h> #include <linux/socket.h> #include <linux/skbuff.h> -#include <crypto/hash.h> #include <linux/crc32.h> #include <linux/crc32c.h> +#include <linux/unaligned.h> #include <rdma/siw-abi.h> #include "iwarp.h" @@ -46,6 +46,9 @@ */ #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4 +/* There is always only a port 1 per siw device */ +#define SIW_PORT 1 + struct siw_dev_cap { int max_qp; int max_qp_wr; @@ -69,16 +72,12 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct net_device *netdev; struct siw_dev_cap attrs; u32 vendor_part_id; int numa_node; char raw_gid[ETH_ALEN]; - /* physical port state (only one port per device) */ - enum ib_port_state state; - spinlock_t lock; struct xarray qp_xa; @@ -94,8 +93,6 @@ struct siw_device { atomic_t num_mr; atomic_t num_srq; atomic_t num_ctx; - - struct work_struct netdev_down; }; struct siw_ucontext { @@ -292,7 +289,8 @@ struct siw_rx_stream { union iwarp_hdr hdr; struct mpa_trailer trailer; - struct shash_desc *mpa_crc_hd; + u32 mpa_crc; + bool mpa_crc_enabled; /* * For each FPDU, main RX loop runs through 3 stages: @@ -393,7 +391,8 @@ struct siw_iwarp_tx { int burst; int bytes_unsent; /* ddp payload bytes */ - struct shash_desc *mpa_crc_hd; + u32 mpa_crc; + bool mpa_crc_enabled; u8 do_crc : 1; /* do crc for segment */ u8 use_sendpage : 1; /* send w/o copy */ @@ -499,7 +498,6 @@ extern u_char mpa_version; extern const bool peer_to_peer; extern struct task_struct *siw_tx_thread[]; -extern struct crypto_shash *siw_crypto_shash; extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1]; /* QP general functions */ @@ -671,29 +669,33 @@ static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp) return NULL; } -static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum) +static inline void siw_crc_init(u32 *crc) { - return (__force __wsum)crc32c((__force __u32)sum, buff, len); + *crc = ~0; } -static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset, - int len) +static inline void siw_crc_update(u32 *crc, const void *data, size_t len) { - return (__force __wsum)__crc32c_le_combine((__force __u32)csum, - (__force __u32)csum2, len); + *crc = crc32c(*crc, data, len); } -static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) +static inline void siw_crc_final(u32 *crc, u8 out[4]) { - const struct skb_checksum_ops siw_cs_ops = { - .update = siw_csum_update, - .combine = siw_csum_combine, - }; - __wsum crc = *(u32 *)shash_desc_ctx(srx->mpa_crc_hd); + put_unaligned_le32(~*crc, out); +} - crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc, - &siw_cs_ops); - *(u32 *)shash_desc_ctx(srx->mpa_crc_hd) = crc; +static inline void siw_crc_oneshot(const void *data, size_t len, u8 out[4]) +{ + u32 crc; + + siw_crc_init(&crc); + siw_crc_update(&crc, data, len); + return siw_crc_final(&crc, out); +} + +static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) +{ + srx->mpa_crc = skb_crc32c(srx->skb, srx->skb_offset, len, srx->mpa_crc); } #define siw_dbg(ibdev, fmt, ...) \ @@ -716,7 +718,7 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) "MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__) #define siw_dbg_cep(cep, fmt, ...) \ - ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt, \ + ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \ cep, __func__, ##__VA_ARGS__) void siw_cq_flush(struct siw_cq *cq); diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 86323918a570..708b13993fdf 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) { struct socket *s; struct siw_cep *cep = NULL; + struct net_device *ndev = NULL; struct siw_device *sdev = to_siw_dev(id->device); int addr_family = id->local_addr.ss_family; int rv = 0; @@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); /* For wildcard addr, limit binding to current device only */ - if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; - + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) { + ndev = ib_device_get_netdev(id->device, SIW_PORT); + if (ndev) { + s->sk->sk_bound_dev_if = ndev->ifindex; + } else { + rv = -ENODEV; + goto error; + } + } rv = s->ops->bind(s, (struct sockaddr *)laddr, sizeof(struct sockaddr_in)); } else { @@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) } /* For wildcard addr, limit binding to current device only */ - if (ipv6_addr_any(&laddr->sin6_addr)) - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; - + if (ipv6_addr_any(&laddr->sin6_addr)) { + ndev = ib_device_get_netdev(id->device, SIW_PORT); + if (ndev) { + s->sk->sk_bound_dev_if = ndev->ifindex; + } else { + rv = -ENODEV; + goto error; + } + } rv = s->ops->bind(s, (struct sockaddr *)laddr, sizeof(struct sockaddr_in6)); } @@ -1860,6 +1873,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) } list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; + dev_put(ndev); siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr); @@ -1879,6 +1893,7 @@ error: siw_cep_set_free_and_put(cep); } sock_release(s); + dev_put(ndev); return rv; } diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index f3c2226aff94..25b3c741b66b 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -72,7 +72,7 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) wc->opcode = map_wc_opcode[cqe->opcode]; wc->status = map_cqe_status[cqe->status].ib; siw_dbg_cq(cq, - "idx %u, type %d, flags %2x, id 0x%pK\n", + "idx %u, type %d, flags %2x, id 0x%p\n", cq->cq_get % cq->num_cqe, cqe->opcode, cqe->flags, (void *)(uintptr_t)cqe->id); } else { diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 61ad8ca3d1a2..5168307229a9 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -59,7 +59,6 @@ u_char mpa_version = MPA_REVISION_2; const bool peer_to_peer; struct task_struct *siw_tx_thread[NR_CPUS]; -struct crypto_shash *siw_crypto_shash; static int siw_device_register(struct siw_device *sdev, const char *name) { @@ -287,7 +286,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) return NULL; base_dev = &sdev->base_dev; - sdev->netdev = netdev; if (netdev->addr_len) { memcpy(sdev->raw_gid, netdev->dev_addr, @@ -364,39 +362,6 @@ error: return NULL; } -/* - * Network link becomes unavailable. Mark all - * affected QP's accordingly. - */ -static void siw_netdev_down(struct work_struct *work) -{ - struct siw_device *sdev = - container_of(work, struct siw_device, netdev_down); - - struct siw_qp_attrs qp_attrs; - struct list_head *pos, *tmp; - - memset(&qp_attrs, 0, sizeof(qp_attrs)); - qp_attrs.state = SIW_QP_STATE_ERROR; - - list_for_each_safe(pos, tmp, &sdev->qp_list) { - struct siw_qp *qp = list_entry(pos, struct siw_qp, devq); - - down_write(&qp->state_lock); - WARN_ON(siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE)); - up_write(&qp->state_lock); - } - ib_device_put(&sdev->base_dev); -} - -static void siw_device_goes_down(struct siw_device *sdev) -{ - if (ib_device_try_get(&sdev->base_dev)) { - INIT_WORK(&sdev->netdev_down, siw_netdev_down); - schedule_work(&sdev->netdev_down); - } -} - static int siw_netdev_event(struct notifier_block *nb, unsigned long event, void *arg) { @@ -413,20 +378,6 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, sdev = to_siw_dev(base_dev); switch (event) { - case NETDEV_UP: - sdev->state = IB_PORT_ACTIVE; - siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); - break; - - case NETDEV_GOING_DOWN: - siw_device_goes_down(sdev); - break; - - case NETDEV_DOWN: - sdev->state = IB_PORT_DOWN; - siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); - break; - case NETDEV_REGISTER: /* * Device registration now handled only by @@ -444,12 +395,8 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); break; /* - * Todo: Below netdev events are currently not handled. + * All other events are not handled */ - case NETDEV_CHANGEMTU: - case NETDEV_CHANGE: - break; - default: break; } @@ -479,12 +426,7 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev) sdev = siw_device_create(netdev); if (sdev) { dev_dbg(&netdev->dev, "siw: new device\n"); - - if (netif_running(netdev) && netif_carrier_ok(netdev)) - sdev->state = IB_PORT_ACTIVE; - else - sdev->state = IB_PORT_DOWN; - + ib_mark_name_assigned_by_user(&sdev->base_dev); rv = siw_device_register(sdev, basedev_name); if (rv) ib_dealloc_device(&sdev->base_dev); @@ -524,20 +466,7 @@ static __init int siw_init_module(void) rv = -ENOMEM; goto out_error; } - /* - * Locate CRC32 algorithm. If unsuccessful, fail - * loading siw only, if CRC is required. - */ - siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(siw_crypto_shash)) { - pr_info("siw: Loading CRC32c failed: %ld\n", - PTR_ERR(siw_crypto_shash)); - siw_crypto_shash = NULL; - if (mpa_crc_required) { - rv = -EOPNOTSUPP; - goto out_error; - } - } + rv = register_netdevice_notifier(&siw_netdev_nb); if (rv) goto out_error; @@ -550,9 +479,6 @@ static __init int siw_init_module(void) out_error: siw_stop_tx_threads(); - if (siw_crypto_shash) - crypto_free_shash(siw_crypto_shash); - pr_info("SoftIWARP attach failed. Error: %d\n", rv); siw_cm_exit(); @@ -573,9 +499,6 @@ static void __exit siw_exit_module(void) siw_destroy_cpulist(siw_cpu_info.num_nodes); - if (siw_crypto_shash) - crypto_free_shash(siw_crypto_shash); - pr_info("SoftiWARP detached\n"); } diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index dcb963607c8b..d5ddeb17bd22 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -18,30 +18,6 @@ #define SIW_STAG_MAX_INDEX 0x00ffffff /* - * The code avoids special Stag of zero and tries to randomize - * STag values between 1 and SIW_STAG_MAX_INDEX. - */ -int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) -{ - struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); - u32 id, next; - - get_random_bytes(&next, 4); - next &= SIW_STAG_MAX_INDEX; - - if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next, - GFP_KERNEL) < 0) - return -ENOMEM; - - /* Set the STag index part */ - m->stag = id << 8; - - siw_dbg_mem(m, "new MEM object\n"); - - return 0; -} - -/* * siw_mem_id2obj() * * resolves memory from stag given by id. might be called from: @@ -181,10 +157,10 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, */ if (addr < mem->va || addr + len > mem->va + mem->len) { siw_dbg_pd(pd, "MEM interval len %d\n", len); - siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n", + siw_dbg_pd(pd, "[0x%p, 0x%p] out of bounds\n", (void *)(uintptr_t)addr, (void *)(uintptr_t)(addr + len)); - siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n", + siw_dbg_pd(pd, "[0x%p, 0x%p] STag=0x%08x\n", (void *)(uintptr_t)mem->va, (void *)(uintptr_t)(mem->va + mem->len), mem->stag); diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index e74cfcd6dbc1..8e769d30e2ac 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -12,7 +12,6 @@ void siw_umem_release(struct siw_umem *umem); struct siw_pbl *siw_pbl_alloc(u32 num_buf); dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); -int siw_mem_add(struct siw_device *sdev, struct siw_mem *m); int siw_invalidate_stag(struct ib_pd *pd, u32 stag); int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, enum ib_access_flags perms, int len); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index da92cfa2073d..c1e6e7d6e32f 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -226,33 +226,6 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) return 0; } -static int siw_qp_enable_crc(struct siw_qp *qp) -{ - struct siw_rx_stream *c_rx = &qp->rx_stream; - struct siw_iwarp_tx *c_tx = &qp->tx_ctx; - int size; - - if (siw_crypto_shash == NULL) - return -ENOENT; - - size = crypto_shash_descsize(siw_crypto_shash) + - sizeof(struct shash_desc); - - c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); - c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); - if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) { - kfree(c_tx->mpa_crc_hd); - kfree(c_rx->mpa_crc_hd); - c_tx->mpa_crc_hd = NULL; - c_rx->mpa_crc_hd = NULL; - return -ENOMEM; - } - c_tx->mpa_crc_hd->tfm = siw_crypto_shash; - c_rx->mpa_crc_hd->tfm = siw_crypto_shash; - - return 0; -} - /* * Send a non signalled READ or WRITE to peer side as negotiated * with MPAv2 P2P setup protocol. The work request is only created @@ -583,20 +556,15 @@ void siw_send_terminate(struct siw_qp *qp) term->ctrl.mpa_len = cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE)); - if (qp->tx_ctx.mpa_crc_hd) { - crypto_shash_init(qp->tx_ctx.mpa_crc_hd); - if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd, - (u8 *)iov[0].iov_base, - iov[0].iov_len)) - goto out; - + if (qp->tx_ctx.mpa_crc_enabled) { + siw_crc_init(&qp->tx_ctx.mpa_crc); + siw_crc_update(&qp->tx_ctx.mpa_crc, + iov[0].iov_base, iov[0].iov_len); if (num_frags == 3) { - if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd, - (u8 *)iov[1].iov_base, - iov[1].iov_len)) - goto out; + siw_crc_update(&qp->tx_ctx.mpa_crc, + iov[1].iov_base, iov[1].iov_len); } - crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc); + siw_crc_final(&qp->tx_ctx.mpa_crc, (u8 *)&crc); } rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate); @@ -604,7 +572,6 @@ void siw_send_terminate(struct siw_qp *qp) rv == len_terminate ? "success" : "failure", __rdmap_term_layer(term), __rdmap_term_etype(term), __rdmap_term_ecode(term), rv); -out: kfree(term); kfree(err_hdr); } @@ -643,9 +610,10 @@ static int siw_qp_nextstate_from_idle(struct siw_qp *qp, switch (attrs->state) { case SIW_QP_STATE_RTS: if (attrs->flags & SIW_MPA_CRC) { - rv = siw_qp_enable_crc(qp); - if (rv) - break; + siw_crc_init(&qp->tx_ctx.mpa_crc); + qp->tx_ctx.mpa_crc_enabled = true; + siw_crc_init(&qp->rx_stream.mpa_crc); + qp->rx_stream.mpa_crc_enabled = true; } if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) { siw_dbg_qp(qp, "no socket\n"); diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index ed4fc39718b4..a10820e33887 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -38,7 +38,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, p = siw_get_upage(umem, dest_addr); if (unlikely(!p)) { - pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n", + pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n", __func__, qp_id(rx_qp(srx)), (void *)(uintptr_t)dest_addr, (void *)(uintptr_t)umem->fp_addr); @@ -51,7 +51,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, pg_off = dest_addr & ~PAGE_MASK; bytes = min(len, (int)PAGE_SIZE - pg_off); - siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes); + siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes); dest = kmap_atomic(p); rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off, @@ -67,10 +67,10 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, return -EFAULT; } - if (srx->mpa_crc_hd) { + if (srx->mpa_crc_enabled) { if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) { - crypto_shash_update(srx->mpa_crc_hd, - (u8 *)(dest + pg_off), bytes); + siw_crc_update(&srx->mpa_crc, dest + pg_off, + bytes); kunmap_atomic(dest); } else { kunmap_atomic(dest); @@ -105,17 +105,17 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) { int rv; - siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len); + siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len); rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len); if (unlikely(rv)) { - pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n", + pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n", qp_id(rx_qp(srx)), __func__, len, kva, rv); return rv; } - if (srx->mpa_crc_hd) - crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len); + if (srx->mpa_crc_enabled) + siw_crc_update(&srx->mpa_crc, kva, len); srx->skb_offset += len; srx->skb_copied += len; @@ -966,16 +966,16 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) if (srx->fpdu_part_rem) return -EAGAIN; - if (!srx->mpa_crc_hd) + if (!srx->mpa_crc_enabled) return 0; if (srx->pad) - crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); + siw_crc_update(&srx->mpa_crc, tbuf, srx->pad); /* * CRC32 is computed, transmitted and received directly in NBO, * so there's never a reason to convert byte order. */ - crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own); + siw_crc_final(&srx->mpa_crc, (u8 *)&crc_own); crc_in = (__force __wsum)srx->trailer.crc; if (unlikely(crc_in != crc_own)) { @@ -1093,13 +1093,12 @@ static int siw_get_hdr(struct siw_rx_stream *srx) * (tagged/untagged). E.g., a WRITE can get intersected by a SEND, * but not by a READ RESPONSE etc. */ - if (srx->mpa_crc_hd) { + if (srx->mpa_crc_enabled) { /* * Restart CRC computation */ - crypto_shash_init(srx->mpa_crc_hd); - crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr, - srx->fpdu_part_rcvd); + siw_crc_init(&srx->mpa_crc); + siw_crc_update(&srx->mpa_crc, c_hdr, srx->fpdu_part_rcvd); } if (frx->more_ddp_segs) { frx->first_ddp_seg = 0; diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 64ad9e0895bd..6432bce7d083 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -248,10 +248,8 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) /* * Do complete CRC if enabled and short packet */ - if (c_tx->mpa_crc_hd && - crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, - c_tx->ctrl_len, (u8 *)crc) != 0) - return -EINVAL; + if (c_tx->mpa_crc_enabled) + siw_crc_oneshot(&c_tx->pkt, c_tx->ctrl_len, (u8 *)crc); c_tx->ctrl_len += MPA_CRC_SIZE; return PKT_COMPLETE; @@ -331,6 +329,8 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, msg.msg_flags &= ~MSG_MORE; tcp_rate_check_app_limited(sk); + if (!sendpage_ok(page[i])) + msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page[i], bytes, offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); @@ -480,9 +480,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) iov[seg].iov_len = sge_len; if (do_crc) - crypto_shash_update(c_tx->mpa_crc_hd, - iov[seg].iov_base, - sge_len); + siw_crc_update(&c_tx->mpa_crc, + iov[seg].iov_base, sge_len); sge_off += sge_len; data_len -= sge_len; seg++; @@ -514,15 +513,14 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) iov[seg].iov_len = plen; if (do_crc) - crypto_shash_update( - c_tx->mpa_crc_hd, + siw_crc_update( + &c_tx->mpa_crc, iov[seg].iov_base, plen); } else if (do_crc) { kaddr = kmap_local_page(p); - crypto_shash_update(c_tx->mpa_crc_hd, - kaddr + fp_off, - plen); + siw_crc_update(&c_tx->mpa_crc, + kaddr + fp_off, plen); kunmap_local(kaddr); } } else { @@ -534,10 +532,9 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) page_array[seg] = ib_virt_dma_to_page(va); if (do_crc) - crypto_shash_update( - c_tx->mpa_crc_hd, - ib_virt_dma_to_ptr(va), - plen); + siw_crc_update(&c_tx->mpa_crc, + ib_virt_dma_to_ptr(va), + plen); } sge_len -= plen; @@ -574,14 +571,14 @@ sge_done: if (c_tx->pad) { *(u32 *)c_tx->trailer.pad = 0; if (do_crc) - crypto_shash_update(c_tx->mpa_crc_hd, - (u8 *)&c_tx->trailer.crc - c_tx->pad, - c_tx->pad); + siw_crc_update(&c_tx->mpa_crc, + (u8 *)&c_tx->trailer.crc - c_tx->pad, + c_tx->pad); } - if (!c_tx->mpa_crc_hd) + if (!c_tx->mpa_crc_enabled) c_tx->trailer.crc = 0; else if (do_crc) - crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc); + siw_crc_final(&c_tx->mpa_crc, (u8 *)&c_tx->trailer.crc); data_len = c_tx->bytes_unsent; @@ -734,10 +731,9 @@ static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe) /* * Init MPA CRC computation */ - if (c_tx->mpa_crc_hd) { - crypto_shash_init(c_tx->mpa_crc_hd); - crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, - c_tx->ctrl_len); + if (c_tx->mpa_crc_enabled) { + siw_crc_init(&c_tx->mpa_crc); + siw_crc_update(&c_tx->mpa_crc, &c_tx->pkt, c_tx->ctrl_len); c_tx->do_crc = 1; } } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index ecf0444666b4..2b2a7b8e93b0 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -171,21 +171,28 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, int siw_query_port(struct ib_device *base_dev, u32 port, struct ib_port_attr *attr) { - struct siw_device *sdev = to_siw_dev(base_dev); + struct net_device *ndev; int rv; memset(attr, 0, sizeof(*attr)); rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, &attr->active_width); + if (rv) + return rv; + + ndev = ib_device_get_netdev(base_dev, SIW_PORT); + if (!ndev) + return -ENODEV; + attr->gid_tbl_len = 1; attr->max_msg_sz = -1; - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); - attr->phys_state = sdev->state == IB_PORT_ACTIVE ? + attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu); + attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu)); + attr->state = ib_get_curr_port_state(ndev); + attr->phys_state = attr->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; - attr->state = sdev->state; /* * All zero * @@ -199,6 +206,7 @@ int siw_query_port(struct ib_device *base_dev, u32 port, * attr->subnet_timeout = 0; * attr->init_type_repy = 0; */ + dev_put(ndev); return rv; } @@ -505,21 +513,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct siw_qp *qp; - struct siw_device *sdev; + struct net_device *ndev; - if (base_qp && qp_attr && qp_init_attr) { + if (base_qp && qp_attr && qp_init_attr) qp = to_siw_qp(base_qp); - sdev = to_siw_dev(base_qp->device); - } else { + else return -EINVAL; - } + + ndev = ib_device_get_netdev(base_qp->device, SIW_PORT); + if (!ndev) + return -ENODEV; + qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state]; qp_attr->cap.max_inline_data = SIW_MAX_INLINE; qp_attr->cap.max_send_wr = qp->attrs.sq_size; qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; qp_attr->cap.max_recv_wr = qp->attrs.rq_size; qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); + qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu)); qp_attr->max_rd_atomic = qp->attrs.irq_size; qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; @@ -534,6 +545,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, qp_init_attr->cap = qp_attr->cap; + dev_put(ndev); return 0; } @@ -619,9 +631,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) } up_write(&qp->state_lock); - kfree(qp->tx_ctx.mpa_crc_hd); - kfree(qp->rx_stream.mpa_crc_hd); - qp->scq = qp->rcq = NULL; siw_qp_put(qp); @@ -927,7 +936,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, rv = -EINVAL; break; } - siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n", + siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n", sqe->opcode, sqe->flags, (void *)(uintptr_t)sqe->id); @@ -1093,7 +1102,7 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, siw_dbg_qp(qp, "error %d\n", rv); *bad_wr = wr; } - return rv > 0 ? 0 : rv; + return rv; } int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) @@ -1124,12 +1133,13 @@ int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) * * @base_cq: CQ as allocated by RDMA midlayer * @attr: Initial CQ attributes - * @udata: relates to user context + * @attrs: uverbs bundle */ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata) + struct uverbs_attr_bundle *attrs) { + struct ib_udata *udata = &attrs->driver_udata; struct siw_device *sdev = to_siw_dev(base_cq->device); struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; @@ -1322,7 +1332,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, struct siw_device *sdev = to_siw_dev(pd->device); int rv; - siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", + siw_dbg_pd(pd, "start: 0x%p, va: 0x%p, len: %llu\n", (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va, (unsigned long long)len); @@ -1515,7 +1525,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, mem->len = base_mr->length; mem->va = base_mr->iova; siw_dbg_mem(mem, - "%llu bytes, start 0x%pK, %u SLE to %u entries\n", + "%llu bytes, start 0x%p, %u SLE to %u entries\n", mem->len, (void *)(uintptr_t)mem->va, num_sle, pbl->num_buf); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 4b57a4fb7237..1f1a305540af 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -43,7 +43,7 @@ int siw_get_port_immutable(struct ib_device *base_dev, u32 port, int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, struct ib_udata *udata); int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); int siw_query_port(struct ib_device *base_dev, u32 port, struct ib_port_attr *attr); int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, |