summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mana
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mana')
-rw-r--r--drivers/infiniband/hw/mana/Makefile2
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c105
-rw-r--r--drivers/infiniband/hw/mana/counters.h44
-rw-r--r--drivers/infiniband/hw/mana/cq.c319
-rw-r--r--drivers/infiniband/hw/mana/device.c190
-rw-r--r--drivers/infiniband/hw/mana/main.c674
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h515
-rw-r--r--drivers/infiniband/hw/mana/mr.c142
-rw-r--r--drivers/infiniband/hw/mana/qp.c636
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wq.c31
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
13 files changed, 2641 insertions, 358 deletions
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
index 88655fe5e398..921c05e08b11 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c
new file mode 100644
index 000000000000..f56952eebbaa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/ah.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_global_route *grh;
+ enum rdma_network_type ntype;
+
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+
+ if (udata)
+ return -EINVAL;
+
+ ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
+ if (!ah->av)
+ return -ENOMEM;
+
+ grh = rdma_ah_read_grh(ah_attr);
+ ntype = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
+ ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
+ ah->av->hop_limit = grh->hop_limit;
+ ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
+ ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
+
+ if (ah->av->is_ipv6) {
+ copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
+ copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
+ } else {
+ ah->av->dest_ip[10] = 0xFF;
+ ah->av->dest_ip[11] = 0xFF;
+ copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
+ copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
+ }
+
+ return 0;
+}
+
+int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+
+ dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e533ce21013d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..7ff92d27f6c3
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 83ebd070535a..28e154bbb50f 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -6,69 +6,102 @@
#include "mana_ib.h"
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_create_cq_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
- struct gdma_context *gc;
+ bool is_rnic_cq;
+ u32 doorbell;
+ u32 buf_size;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- gc = mdev->gdma_dev->gdma_context;
- if (udata->inlen < sizeof(ucmd))
- return -EINVAL;
+ cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
+ cq->cq_handle = INVALID_MANA_HANDLE;
- if (attr->comp_vector > gc->max_num_queues)
- return -EINVAL;
+ if (udata) {
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
- cq->comp_vector = attr->comp_vector;
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
- err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to copy from udata for create cq, %d\n", err);
- return err;
- }
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
- if (attr->cqe > mdev->adapter_caps.max_qp_wr) {
- ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
- return -EINVAL;
- }
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
- cq->cqe = attr->cqe;
- cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(cq->umem)) {
- err = PTR_ERR(cq->umem);
- ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n",
- err);
- return err;
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ } else {
+ is_rnic_cq = true;
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = mdev->gdma_dev->doorbell;
}
- err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region);
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to create dma region for create cq, %d\n",
- err);
- goto err_release_umem;
+ if (is_rnic_cq) {
+ err = mana_ib_gd_create_cq(mdev, cq, doorbell);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
+ goto err_destroy_queue;
+ }
+
+ err = mana_ib_install_cq_cb(mdev, cq);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
+ goto err_destroy_rnic_cq;
+ }
}
- ibdev_dbg(ibdev,
- "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
- err, cq->gdma_region);
+ if (udata) {
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
+ }
- /*
- * The CQ ID is not known at this time. The ID is generated at create_qp
- */
- cq->id = INVALID_QUEUE_ID;
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
return 0;
-err_release_umem:
- ib_umem_release(cq->umem);
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, cq);
+err_destroy_rnic_cq:
+ mana_ib_gd_destroy_cq(mdev, cq);
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
return err;
}
@@ -77,33 +110,207 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
struct ib_device *ibdev = ibcq->device;
struct mana_ib_dev *mdev;
- struct gdma_context *gc;
- int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- gc = mdev->gdma_dev->gdma_context;
- err = mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to destroy dma region, %d\n", err);
- return err;
- }
+ mana_ib_remove_cq_cb(mdev, cq);
- if (cq->id != INVALID_QUEUE_ID) {
- kfree(gc->cq_table[cq->id]);
- gc->cq_table[cq->id] = NULL;
- }
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_cq(mdev, cq);
- ib_umem_release(cq->umem);
+ mana_ib_destroy_queue(mdev, &cq->queue);
return 0;
}
-void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
+static void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
{
struct mana_ib_cq *cq = ctx;
if (cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue *gdma_cq;
+
+ if (cq->queue.id >= gc->max_num_cqs)
+ return -EINVAL;
+ /* Create CQ table entry */
+ WARN_ON(gc->cq_table[cq->queue.id]);
+ if (cq->queue.kmem)
+ gdma_cq = cq->queue.kmem;
+ else
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (!gdma_cq)
+ return -ENOMEM;
+
+ gdma_cq->cq.context = cq;
+ gdma_cq->type = GDMA_CQ;
+ gdma_cq->cq.callback = mana_ib_cq_handler;
+ gdma_cq->id = cq->queue.id;
+ gc->cq_table[cq->queue.id] = gdma_cq;
+ return 0;
+}
+
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+
+ if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
+ return;
+
+ if (cq->queue.kmem)
+ /* Then it will be cleaned and removed by the mana */
+ return;
+
+ kfree(gc->cq_table[cq->queue.id]);
+ gc->cq_table[cq->queue.id] = NULL;
+}
+
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct gdma_queue *gdma_cq = cq->queue.kmem;
+
+ if (!gdma_cq)
+ return -EINVAL;
+
+ mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
+ return 0;
+}
+
+static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct ud_sq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+}
+
+static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct ud_rq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
+ shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
+ shadow_wqe->header.error_code = IB_WC_SUCCESS;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_rq);
+}
+
+static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
+
+ if (!qp)
+ return;
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
+ if (cqe->is_sq)
+ handle_ud_sq_cqe(qp, cqe);
+ else
+ handle_ud_rq_cqe(qp, cqe);
+ }
+
+ mana_put_qp_ref(qp);
+}
+
+static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
+ const struct shadow_wqe_header *shadow_wqe)
+{
+ const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
+
+ wc->wr_id = shadow_wqe->wr_id;
+ wc->status = shadow_wqe->error_code;
+ wc->opcode = shadow_wqe->opcode;
+ wc->vendor_err = shadow_wqe->error_code;
+ wc->wc_flags = 0;
+ wc->qp = &qp->ibqp;
+ wc->pkey_index = 0;
+
+ if (shadow_wqe->opcode == IB_WC_RECV) {
+ wc->byte_len = ud_wqe->byte_len;
+ wc->src_qp = ud_wqe->src_qpn;
+ wc->wc_flags |= IB_WC_GRH;
+ }
+}
+
+static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
+{
+ struct shadow_wqe_header *shadow_wqe;
+ struct mana_ib_qp *qp;
+ int wc_index = 0;
+
+ /* process send shadow queue completions */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_sq);
+ wc_index++;
+ }
+ }
+
+ /* process recv shadow queue completions */
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_rq);
+ wc_index++;
+ }
+ }
+
+out:
+ return wc_index;
+}
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = cq->queue.kmem;
+ struct gdma_comp gdma_cqe;
+ unsigned long flags;
+ int num_polled = 0;
+ int comp_read, i;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
+ if (comp_read < 1)
+ break;
+ mana_handle_cqe(mdev, &gdma_cqe);
+ }
+
+ num_polled = mana_process_completions(cq, num_entries, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return num_polled;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 6fa902ee80a6..165c0a1e67d1 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -5,39 +5,53 @@
#include "mana_ib.h"
#include <net/mana/mana_auxiliary.h>
+#include <net/addrconf.h>
MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
MODULE_LICENSE("GPL");
-MODULE_IMPORT_NS(NET_MANA);
+MODULE_IMPORT_NS("NET_MANA");
static const struct ib_device_ops mana_ib_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_MANA,
.uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
+ .add_gid = mana_ib_gd_add_gid,
.alloc_pd = mana_ib_alloc_pd,
.alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_ah = mana_ib_create_ah,
.create_cq = mana_ib_create_cq,
.create_qp = mana_ib_create_qp,
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
.create_wq = mana_ib_create_wq,
.dealloc_pd = mana_ib_dealloc_pd,
.dealloc_ucontext = mana_ib_dealloc_ucontext,
+ .del_gid = mana_ib_gd_del_gid,
.dereg_mr = mana_ib_dereg_mr,
+ .destroy_ah = mana_ib_destroy_ah,
.destroy_cq = mana_ib_destroy_cq,
.destroy_qp = mana_ib_destroy_qp,
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
.destroy_wq = mana_ib_destroy_wq,
.disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_dma_mr = mana_ib_get_dma_mr,
+ .get_link_layer = mana_ib_get_link_layer,
.get_port_immutable = mana_ib_get_port_immutable,
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
+ .poll_cq = mana_ib_poll_cq,
+ .post_recv = mana_ib_post_recv,
+ .post_send = mana_ib_post_send,
.query_device = mana_ib_query_device,
.query_gid = mana_ib_query_gid,
+ .query_pkey = mana_ib_query_pkey,
.query_port = mana_ib_query_port,
.reg_user_mr = mana_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mana_ib_arm_cq,
+ INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
@@ -46,64 +60,156 @@ static const struct ib_device_ops mana_ib_dev_ops = {
ib_ind_table),
};
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
+static int mana_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct gdma_context *gc = dev->gdma_dev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct net_device *ndev;
+
+ /* Only process events from our parent device */
+ if (event_dev != mc->ports[0])
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ /*
+ * RDMA core will setup GID based on updated netdev.
+ * It's not possible to race with the core as rtnl lock is being
+ * held.
+ */
+ ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+ struct gdma_context *gc = madev->mdev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
struct gdma_dev *mdev = madev->mdev;
- struct mana_context *mc;
+ struct net_device *ndev;
struct mana_ib_dev *dev;
+ u8 mac_addr[ETH_ALEN];
int ret;
- mc = mdev->driver_data;
-
dev = ib_alloc_device(mana_ib_dev, ib_dev);
if (!dev)
return -ENOMEM;
ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
-
- dev->ib_dev.phys_port_cnt = mc->num_ports;
-
- ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
- mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
-
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
-
- /*
- * num_comp_vectors needs to set to the max MSIX index
- * when interrupts and event queues are implemented
- */
- dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dev.parent = mdev->gdma_context->dev;
-
- ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
- ret);
- goto free_ib_device;
+ dev->ib_dev.num_comp_vectors = gc->max_num_queues;
+ dev->ib_dev.dev.parent = gc->dev;
+ dev->gdma_dev = mdev;
+ xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
+
+ if (mana_ib_is_rnic(dev)) {
+ dev->ib_dev.phys_port_cnt = 1;
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ if (!ndev) {
+ ret = -ENODEV;
+ ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
+ goto free_ib_device;
+ }
+ ether_addr_copy(mac_addr, ndev->dev_addr);
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
+ ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+ ret);
+ goto free_ib_device;
+ }
+
+ ret = mana_ib_gd_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
+ ret = mana_ib_create_eqs(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ret = mana_ib_gd_create_rnic_adapter(dev);
+ if (ret)
+ goto destroy_eqs;
+
+ ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
+ goto destroy_rnic;
+ }
+ } else {
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+ ret = mana_eth_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
+ goto free_ib_device;
+ }
}
- dev->gdma_dev = &mdev->gdma_context->mana_ib;
- ret = mana_ib_gd_query_adapter_caps(dev);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
- ret);
- goto deregister_device;
+ dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
+ MANA_AV_BUFFER_SIZE, 0);
+ if (!dev->av_pool) {
+ ret = -ENOMEM;
+ goto destroy_rnic;
}
- ret = ib_register_device(&dev->ib_dev, "mana_%d",
- mdev->gdma_context->dev);
+ ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+ mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+ ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
+ gc->dev);
if (ret)
- goto deregister_device;
+ goto deallocate_pool;
dev_set_drvdata(&adev->dev, dev);
return 0;
-deregister_device:
- mana_gd_deregister_device(dev->gdma_dev);
+deallocate_pool:
+ dma_pool_destroy(dev->av_pool);
+destroy_rnic:
+ if (mana_ib_is_rnic(dev))
+ mana_ib_gd_destroy_rnic_adapter(dev);
+destroy_eqs:
+ if (mana_ib_is_rnic(dev))
+ mana_ib_destroy_eqs(dev);
+deregister_net_notifier:
+ if (mana_ib_is_rnic(dev))
+ unregister_netdevice_notifier(&dev->nb);
free_ib_device:
+ xa_destroy(&dev->qp_table_wq);
ib_dealloc_device(&dev->ib_dev);
return ret;
}
@@ -113,23 +219,25 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
-
- mana_gd_deregister_device(dev->gdma_dev);
-
+ dma_pool_destroy(dev->av_pool);
+ if (mana_ib_is_rnic(dev)) {
+ mana_ib_gd_destroy_rnic_adapter(dev);
+ mana_ib_destroy_eqs(dev);
+ unregister_netdevice_notifier(&dev->nb);
+ }
+ xa_destroy(&dev->qp_table_wq);
ib_dealloc_device(&dev->ib_dev);
}
static const struct auxiliary_device_id mana_id_table[] = {
- {
- .name = "mana.rdma",
- },
+ { .name = "mana.rdma", },
+ { .name = "mana.eth", },
{},
};
MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
static struct auxiliary_driver mana_driver = {
- .name = "rdma",
.probe = mana_ib_probe,
.remove = mana_ib_remove,
.id_table = mana_id_table,
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index faca092456fa..41a24a186f9d 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -4,17 +4,15 @@
*/
#include "mana_ib.h"
+#include "linux/pci.h"
void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
u32 port)
{
- struct gdma_dev *gd = &dev->gdma_dev->gdma_context->mana;
struct mana_port_context *mpc;
struct net_device *ndev;
- struct mana_context *mc;
- mc = gd->driver_data;
- ndev = mc->ports[port];
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
mpc = netdev_priv(ndev);
mutex_lock(&pd->vport_mutex);
@@ -31,14 +29,11 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
u32 doorbell_id)
{
- struct gdma_dev *mdev = &dev->gdma_dev->gdma_context->mana;
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
int err;
- mc = mdev->driver_data;
- ndev = mc->ports[port];
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
mpc = netdev_priv(ndev);
mutex_lock(&pd->vport_mutex);
@@ -79,17 +74,20 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct gdma_create_pd_req req = {};
enum gdma_pd_flags flags = 0;
struct mana_ib_dev *dev;
- struct gdma_dev *mdev;
+ struct gdma_context *gc;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- mdev = dev->gdma_dev;
+ gc = mdev_to_gc(dev);
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
sizeof(resp));
+ if (!udata)
+ flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
+
req.flags = flags;
- err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+ err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
if (err || resp.hdr.status) {
@@ -119,17 +117,17 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct gdma_destory_pd_resp resp = {};
struct gdma_destroy_pd_req req = {};
struct mana_ib_dev *dev;
- struct gdma_dev *mdev;
+ struct gdma_context *gc;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- mdev = dev->gdma_dev;
+ gc = mdev_to_gc(dev);
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
sizeof(resp));
req.pd_handle = pd->pd_handle;
- err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+ err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
if (err || resp.hdr.status) {
@@ -180,7 +178,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
req.num_resources = 1;
- req.alignment = 1;
+ req.alignment = PAGE_SIZE / MANA_PAGE_SIZE;
/* Have GDMA start searching from 0 */
req.allocated_resources = 0;
@@ -206,13 +204,11 @@ int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
struct ib_device *ibdev = ibcontext->device;
struct mana_ib_dev *mdev;
struct gdma_context *gc;
- struct gdma_dev *dev;
int doorbell_page;
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- dev = mdev->gdma_dev;
- gc = dev->gdma_context;
+ gc = mdev_to_gc(mdev);
/* Allocate a doorbell page index */
ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
@@ -238,13 +234,76 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- gc = mdev->gdma_dev->gdma_context;
+ gc = mdev_to_gc(mdev);
ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
if (ret)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue)
+{
+ struct gdma_queue_spec spec = {};
+ int err;
+
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+ spec.type = type;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = size;
+ err = mana_gd_create_mana_wq_cq(mdev->gdma_dev, &spec, &queue->kmem);
+ if (err)
+ return err;
+ /* take ownership into mana_ib from mana */
+ queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
+ queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+ return 0;
+}
+
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue)
+{
+ struct ib_umem *umem;
+ int err;
+
+ queue->umem = NULL;
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+
+ umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %d\n", err);
+ return err;
+ }
+
+ err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
+ goto free_umem;
+ }
+ queue->umem = umem;
+
+ ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
+
+ return 0;
+free_umem:
+ ib_umem_release(umem);
+ return err;
+}
+
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue)
+{
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
+ ib_umem_release(queue->umem);
+ if (queue->kmem)
+ mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
+}
+
static int
mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
struct gdma_context *gc,
@@ -309,8 +368,8 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
return 0;
}
-int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
- mana_handle_t *gdma_region)
+static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, unsigned long page_sz)
{
struct gdma_dma_region_add_pages_req *add_req = NULL;
size_t num_pages_processed = 0, num_pages_to_handle;
@@ -322,23 +381,14 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
size_t max_pgs_create_cmd;
struct gdma_context *gc;
size_t num_pages_total;
- struct gdma_dev *mdev;
- unsigned long page_sz;
unsigned int tail = 0;
u64 *page_addr_list;
void *request_buf;
- int err;
+ int err = 0;
- mdev = dev->gdma_dev;
- gc = mdev->gdma_context;
+ gc = mdev_to_gc(dev);
hwc = gc->hwc.driver_data;
- /* Hardware requires dma region to align to chosen page size */
- page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
- if (!page_sz) {
- ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
- return -ENOMEM;
- }
num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
max_pgs_create_cmd =
@@ -358,8 +408,8 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
sizeof(struct gdma_create_dma_region_resp));
create_req->length = umem->length;
- create_req->offset_in_page = umem->address & (page_sz - 1);
- create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
+ create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz);
+ create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT;
create_req->page_count = num_pages_total;
ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
@@ -424,12 +474,39 @@ out:
return err;
}
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt)
+{
+ unsigned long page_sz;
+
+ page_sz = ib_umem_find_best_pgsz(umem, dev->adapter_caps.page_size_cap, virt);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region)
+{
+ unsigned long page_sz;
+
+ /* Hardware requires dma region to align to chosen page size */
+ page_sz = ib_umem_find_best_pgoff(umem, dev->adapter_caps.page_size_cap, 0);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
{
- struct gdma_dev *mdev = dev->gdma_dev;
- struct gdma_context *gc;
+ struct gdma_context *gc = mdev_to_gc(dev);
- gc = mdev->gdma_context;
ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
return mana_gd_destroy_dma_region(gc, gdma_region);
@@ -447,7 +524,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- gc = mdev->gdma_dev->gdma_context;
+ gc = mdev_to_gc(mdev);
if (vma->vm_pgoff != 0) {
ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
@@ -460,13 +537,13 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
PAGE_SHIFT;
prot = pgprot_writecombine(vma->vm_page_prot);
- ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot,
+ ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot,
NULL);
if (ret)
ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
else
- ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n",
- pfn, gc->db_page_size, ret);
+ ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n",
+ pfn, PAGE_SIZE, ret);
return ret;
}
@@ -474,11 +551,23 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
- /*
- * This version only support RAW_PACKET
- * other values need to be filled for other types
- */
- immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct ib_port_attr attr;
+ int err;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ if (mana_ib_is_rnic(dev)) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+ }
return 0;
}
@@ -486,17 +575,34 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
- struct mana_ib_dev *dev = container_of(ibdev,
- struct mana_ib_dev, ib_dev);
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct pci_dev *pdev = to_pci_dev(mdev_to_gc(dev)->dev);
+ memset(props, 0, sizeof(*props));
+ props->vendor_id = pdev->vendor;
+ props->vendor_part_id = dev->gdma_dev->dev_id.type;
+ props->max_mr_size = MANA_IB_MAX_MR_SIZE;
+ props->page_size_cap = dev->adapter_caps.page_size_cap;
props->max_qp = dev->adapter_caps.max_qp_count;
props->max_qp_wr = dev->adapter_caps.max_qp_wr;
+ props->device_cap_flags = IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_send_sge = dev->adapter_caps.max_send_sge_count;
+ props->max_recv_sge = dev->adapter_caps.max_recv_sge_count;
+ props->max_sge_rd = dev->adapter_caps.max_recv_sge_count;
props->max_cq = dev->adapter_caps.max_cq_count;
props->max_cqe = dev->adapter_caps.max_qp_wr;
props->max_mr = dev->adapter_caps.max_mr_count;
- props->max_mr_size = MANA_IB_MAX_MR_SIZE;
- props->max_send_sge = dev->adapter_caps.max_send_sge_count;
- props->max_recv_sge = dev->adapter_caps.max_recv_sge_count;
+ props->max_pd = dev->adapter_caps.max_pd_count;
+ props->max_qp_rd_atom = dev->adapter_caps.max_inbound_read_limit;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_qp_init_rd_atom = dev->adapter_caps.max_outbound_read_limit;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
+ props->max_ah = INT_MAX;
+ props->max_pkeys = 1;
+ props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
+ if (!mana_ib_is_rnic(dev))
+ props->raw_packet_caps = IB_RAW_PACKET_CAP_IP_CSUM;
return 0;
}
@@ -504,7 +610,46 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
- /* This version doesn't return port properties */
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
+
+ if (!ndev)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
+
+ if (netif_carrier_ok(ndev) && netif_running(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_EDR;
+ props->pkey_tbl_len = 1;
+ if (mana_ib_is_rnic(dev)) {
+ props->gid_tbl_len = 16;
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ }
+
+ return 0;
+}
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index != 0)
+ return -EINVAL;
+ *pkey = IB_DEFAULT_PKEY_FULL;
return 0;
}
@@ -528,10 +673,10 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
sizeof(resp));
- req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
req.hdr.dev_id = dev->gdma_dev->dev_id;
- err = mana_gd_send_request(dev->gdma_dev->gdma_context, sizeof(req),
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
&req, sizeof(resp), &resp);
if (err) {
@@ -557,6 +702,427 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
caps->max_inline_data_size = resp.max_inline_data_size;
caps->max_send_sge_count = resp.max_send_sge_count;
caps->max_recv_sge_count = resp.max_recv_sge_count;
+ caps->feature_flags = resp.feature_flags;
+
+ caps->page_size_cap = PAGE_SZ_BM;
+ if (mdev_to_gc(dev)->pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB)
+ caps->page_size_cap |= (SZ_4M | SZ_1G | SZ_2G);
+
+ return 0;
+}
+
+int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct gdma_query_max_resources_resp resp = {};
+ struct gdma_general_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+ sizeof(req), sizeof(resp));
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
+ caps->max_cq_count = resp.max_cq;
+ caps->max_mr_count = resp.max_mst;
+ caps->max_pd_count = 0x6000;
+ caps->max_qp_wr = min_t(u32,
+ 0x100000 / GDMA_MAX_SQE_SIZE,
+ 0x100000 / GDMA_MAX_RQE_SIZE);
+ caps->max_send_sge_count = 30;
+ caps->max_recv_sge_count = 15;
+ caps->page_size_cap = PAGE_SZ_BM;
+
+ return 0;
+}
+
+static void
+mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
+{
+ struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx;
+ struct mana_ib_qp *qp;
+ struct ib_event ev;
+ u32 qpn;
+
+ switch (event->type) {
+ case GDMA_EQE_RNIC_QP_FATAL:
+ qpn = event->details[0];
+ qp = mana_get_qp_ref(mdev, qpn, false);
+ if (!qp)
+ break;
+ if (qp->ibqp.event_handler) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_FATAL;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ mana_put_qp_ref(qp);
+ break;
+ default:
+ break;
+ }
+}
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue_spec spec = {};
+ int err, i;
+
+ spec.type = GDMA_EQ;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = EQ_SIZE;
+ spec.eq.callback = mana_ib_event_handler;
+ spec.eq.context = mdev;
+ spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+ spec.eq.msix_index = 0;
+
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->fatal_err_eq);
+ if (err)
+ return err;
+
+ mdev->eqs = kcalloc(mdev->ib_dev.num_comp_vectors, sizeof(struct gdma_queue *),
+ GFP_KERNEL);
+ if (!mdev->eqs) {
+ err = -ENOMEM;
+ goto destroy_fatal_eq;
+ }
+ spec.eq.callback = NULL;
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) {
+ spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]);
+ if (err)
+ goto destroy_eqs;
+ }
+
+ return 0;
+
+destroy_eqs:
+ while (i-- > 0)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+ kfree(mdev->eqs);
+destroy_fatal_eq:
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+ return err;
+}
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int i;
+
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+
+ kfree(mdev->eqs);
+}
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_create_adapter_resp resp = {};
+ struct mana_rnic_create_adapter_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.req.msg_version = GDMA_MESSAGE_V2;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.notify_eq_id = mdev->fatal_err_eq->id;
+ if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
+ req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
+ return err;
+ }
+ mdev->adapter_handle = resp.adapter;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_destroy_adapter_resp resp = {};
+ struct mana_rnic_destroy_adapter_req req = {};
+ struct gdma_context *gc;
+ int err;
+
+ gc = mdev_to_gc(mdev);
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy RNIC adapter err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_ADD;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_REMOVE;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac)
+{
+ struct mana_rnic_config_mac_addr_resp resp = {};
+ struct mana_rnic_config_mac_addr_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = op;
+ copy_in_reverse(req.mac_addr, mac, ETH_ALEN);
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config Mac addr err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_cq_resp resp = {};
+ struct mana_rnic_create_cq_req req = {};
+ int err;
+
+ if (!mdev->eqs)
+ return -EINVAL;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.gdma_region = cq->queue.gdma_region;
+ req.eq_id = mdev->eqs[cq->comp_vector]->id;
+ req.doorbell_page = doorbell;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create cq err %d", err);
+ return err;
+ }
+
+ cq->queue.id = resp.cq_id;
+ cq->cq_handle = resp.cq_handle;
+ /* The GDMA region is now owned by the CQ handle */
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_destroy_cq_resp resp = {};
+ struct mana_rnic_destroy_cq_req req = {};
+ int err;
+
+ if (cq->cq_handle == INVALID_MANA_HANDLE)
+ return 0;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.cq_handle = cq->cq_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy cq err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_qp_resp resp = {};
+ struct mana_rnic_create_qp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->rc_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.flags = flags;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.rc_qp_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++) {
+ qp->rc_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_rc_qp_resp resp = {0};
+ struct mana_rnic_destroy_rc_qp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.rc_qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy rc qp err %d", err);
+ return err;
+ }
+ return 0;
+}
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_udqp_resp resp = {};
+ struct mana_rnic_create_udqp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.qp_type = type;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.qp_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
+ qp->ud_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_udqp_resp resp = {0};
+ struct mana_rnic_destroy_udqp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
+ return err;
+ }
return 0;
}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 6bdc0f5498d5..42bebd6cd4f7 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -11,8 +11,11 @@
#include <rdma/ib_umem.h>
#include <rdma/mana-abi.h>
#include <rdma/uverbs_ioctl.h>
+#include <linux/dmapool.h>
#include <net/mana/mana.h>
+#include "shadow_queue.h"
+#include "counters.h"
#define PAGE_SZ_BM \
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
@@ -21,12 +24,25 @@
/* MANA doesn't have any limit for MR size */
#define MANA_IB_MAX_MR_SIZE U64_MAX
+/* Send queue ID mask */
+#define MANA_SENDQ_MASK BIT(31)
+
/*
* The hardware limit of number of MRs is greater than maximum number of MRs
* that can possibly represent in 24 bits
*/
#define MANA_IB_MAX_MR 0xFFFFFFu
+/*
+ * The CA timeout is approx. 260ms (4us * 2^(DELAY))
+ */
+#define MANA_CA_ACK_DELAY 16
+
+/*
+ * The buffer used for writing AV
+ */
+#define MANA_AV_BUFFER_SIZE 64
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -43,21 +59,35 @@ struct mana_ib_adapter_caps {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
+ u64 page_size_cap;
+};
+
+struct mana_ib_queue {
+ struct ib_umem *umem;
+ struct gdma_queue *kmem;
+ u64 gdma_region;
+ u64 id;
};
struct mana_ib_dev {
struct ib_device ib_dev;
struct gdma_dev *gdma_dev;
+ mana_handle_t adapter_handle;
+ struct gdma_queue *fatal_err_eq;
+ struct gdma_queue **eqs;
+ struct xarray qp_table_wq;
struct mana_ib_adapter_caps adapter_caps;
+ struct dma_pool *av_pool;
+ netdevice_tracker dev_tracker;
+ struct notifier_block nb;
};
struct mana_ib_wq {
struct ib_wq ibwq;
- struct ib_umem *umem;
+ struct mana_ib_queue queue;
int wqe;
u32 wq_buf_size;
- u64 gdma_region;
- u64 id;
mana_handle_t rx_object;
};
@@ -74,6 +104,25 @@ struct mana_ib_pd {
u32 tx_vp_offset;
};
+struct mana_ib_av {
+ u8 dest_ip[16];
+ u8 dest_mac[ETH_ALEN];
+ u16 udp_src_port;
+ u8 src_ip[16];
+ u32 hop_limit : 8;
+ u32 reserved1 : 12;
+ u32 dscp : 6;
+ u32 reserved2 : 5;
+ u32 is_ipv6 : 1;
+ u32 reserved3 : 32;
+};
+
+struct mana_ib_ah {
+ struct ib_ah ibah;
+ struct mana_ib_av *av;
+ dma_addr_t dma_handle;
+};
+
struct mana_ib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
@@ -82,25 +131,60 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
- struct ib_umem *umem;
+ struct mana_ib_queue queue;
+ /* protects CQ polling */
+ spinlock_t cq_lock;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
int cqe;
- u64 gdma_region;
- u64 id;
u32 comp_vector;
+ mana_handle_t cq_handle;
+};
+
+enum mana_rc_queue_type {
+ MANA_RC_SEND_QUEUE_REQUESTER = 0,
+ MANA_RC_SEND_QUEUE_RESPONDER,
+ MANA_RC_SEND_QUEUE_FMR,
+ MANA_RC_RECV_QUEUE_REQUESTER,
+ MANA_RC_RECV_QUEUE_RESPONDER,
+ MANA_RC_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_rc_qp {
+ struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
+};
+
+enum mana_ud_queue_type {
+ MANA_UD_SEND_QUEUE = 0,
+ MANA_UD_RECV_QUEUE,
+ MANA_UD_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_ud_qp {
+ struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
+ u32 sq_psn;
};
struct mana_ib_qp {
struct ib_qp ibqp;
- /* Work queue info */
- struct ib_umem *sq_umem;
- int sqe;
- u64 sq_gdma_region;
- u64 sq_id;
- mana_handle_t tx_object;
+ mana_handle_t qp_handle;
+ union {
+ struct mana_ib_queue raw_sq;
+ struct mana_ib_rc_qp rc_qp;
+ struct mana_ib_ud_qp ud_qp;
+ };
/* The port on the IB device, starting with 1 */
u32 port;
+
+ struct list_head cq_send_list;
+ struct list_head cq_recv_list;
+ struct shadow_queue shadow_rq;
+ struct shadow_queue shadow_sq;
+
+ refcount_t refcount;
+ struct completion free;
};
struct mana_ib_ucontext {
@@ -114,12 +198,28 @@ struct mana_ib_rwq_ind_table {
enum mana_ib_command_code {
MANA_IB_GET_ADAPTER_CAP = 0x30001,
+ MANA_IB_CREATE_ADAPTER = 0x30002,
+ MANA_IB_DESTROY_ADAPTER = 0x30003,
+ MANA_IB_CONFIG_IP_ADDR = 0x30004,
+ MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_UD_QP = 0x30006,
+ MANA_IB_DESTROY_UD_QP = 0x30007,
+ MANA_IB_CREATE_CQ = 0x30008,
+ MANA_IB_DESTROY_CQ = 0x30009,
+ MANA_IB_CREATE_RC_QP = 0x3000a,
+ MANA_IB_DESTROY_RC_QP = 0x3000b,
+ MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
};
struct mana_ib_query_adapter_caps_req {
struct gdma_req_hdr hdr;
}; /*HW Data */
+enum mana_ib_adapter_features {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
+};
+
struct mana_ib_query_adapter_caps_resp {
struct gdma_resp_hdr hdr;
u32 max_sq_id;
@@ -140,14 +240,352 @@ struct mana_ib_query_adapter_caps_resp {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
+}; /* HW Data */
+
+enum mana_ib_adapter_features_request {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
+}; /*HW Data */
+
+struct mana_rnic_create_adapter_req {
+ struct gdma_req_hdr hdr;
+ u32 notify_eq_id;
+ u32 reserved;
+ u64 feature_flags;
+}; /*HW Data */
+
+struct mana_rnic_create_adapter_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_destroy_adapter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /*HW Data */
+
+struct mana_rnic_destroy_adapter_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_ib_addr_op {
+ ADDR_OP_ADD = 1,
+ ADDR_OP_REMOVE = 2,
+};
+
+enum sgid_entry_type {
+ SGID_TYPE_IPV4 = 1,
+ SGID_TYPE_IPV6 = 2,
+};
+
+struct mana_rnic_config_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ enum sgid_entry_type sgid_type;
+ u8 ip_addr[16];
+}; /* HW Data */
+
+struct mana_rnic_config_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ u8 mac_addr[ETH_ALEN];
+ u8 reserved[6];
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_create_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ u64 gdma_region;
+ u32 eq_id;
+ u32 doorbell_page;
}; /* HW Data */
-int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
- mana_handle_t *gdma_region);
+struct mana_rnic_create_cq_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t cq_handle;
+ u32 cq_id;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t cq_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_rnic_create_rc_flags {
+ MANA_RC_FLAG_NO_FMR = 2,
+};
+
+struct mana_rnic_create_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_RC_QUEUE_TYPE_MAX];
+ u64 deprecated[2];
+ u64 flags;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_create_qp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t rc_qp_handle;
+ u32 queue_ids[MANA_RC_QUEUE_TYPE_MAX];
+ u32 reserved;
+}; /* HW Data*/
+
+struct mana_rnic_destroy_rc_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t rc_qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_rc_qp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
+ u32 qp_type;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t qp_handle;
+ u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
+}; /* HW Data*/
+
+struct mana_rnic_destroy_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_udqp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_ib_ah_attr {
+ u8 src_addr[16];
+ u8 dest_addr[16];
+ u8 src_mac[ETH_ALEN];
+ u8 dest_mac[ETH_ALEN];
+ u8 src_addr_type;
+ u8 dest_addr_type;
+ u8 hop_limit;
+ u8 traffic_class;
+ u16 src_port;
+ u16 dest_port;
+ u32 reserved;
+};
+
+struct mana_rnic_set_qp_state_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+ u64 attr_mask;
+ u32 qp_state;
+ u32 path_mtu;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qpn;
+ u32 max_dest_rd_atomic;
+ u32 retry_cnt;
+ u32 rnr_retry;
+ u32 min_rnr_timer;
+ u32 reserved;
+ struct mana_ib_ah_attr ah_attr;
+}; /* HW Data */
+
+struct mana_rnic_set_qp_state_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum WQE_OPCODE_TYPES {
+ WQE_TYPE_UD_SEND = 0,
+ WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+ u32 wqe_type : 5;
+ u32 fence : 1;
+ u32 signaled : 1;
+ u32 solicited : 1;
+ u32 psn : 24;
+
+ u32 ssn_or_rqpn : 24;
+ u32 reserved1 : 8;
+ union {
+ struct {
+ u32 remote_qkey;
+ u32 immediate;
+ u32 reserved1;
+ u32 reserved2;
+ } ud_send;
+ };
+}; /* HW DATA */
+
+struct mana_rdma_cqe {
+ union {
+ struct {
+ u8 cqe_type;
+ u8 data[GDMA_COMP_DATA_SIZE - 1];
+ };
+ struct {
+ u32 cqe_type : 8;
+ u32 vendor_error : 9;
+ u32 reserved1 : 15;
+ u32 sge_offset : 5;
+ u32 tx_wqe_offset : 27;
+ } ud_send;
+ struct {
+ u32 cqe_type : 8;
+ u32 reserved1 : 24;
+ u32 msg_len;
+ u32 src_qpn : 24;
+ u32 reserved2 : 8;
+ u32 imm_data;
+ u32 rx_wqe_offset;
+ } ud_recv;
+ };
+}; /* HW DATA */
+
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+}; /* HW Data */
+
+static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->gdma_context;
+}
+
+static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
+ u32 qid, bool is_sq)
+{
+ struct mana_ib_qp *qp;
+ unsigned long flag;
+
+ if (is_sq)
+ qid |= MANA_SENDQ_MASK;
+
+ xa_lock_irqsave(&mdev->qp_table_wq, flag);
+ qp = xa_load(&mdev->qp_table_wq, qid);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&mdev->qp_table_wq, flag);
+ return qp;
+}
+
+static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
+{
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
+}
+
+static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_context *mc = gc->mana.driver_data;
+
+ if (port < 1 || port > mc->num_ports)
+ return NULL;
+ return mc->ports[port - 1];
+}
+
+static inline void copy_in_reverse(u8 *dst, const u8 *src, u32 size)
+{
+ u32 i;
+
+ for (i = 0; i < size; i++)
+ dst[size - 1 - i] = src[i];
+}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region);
+
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt);
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue);
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue);
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
+
struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
@@ -185,7 +623,7 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
u32 port);
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
@@ -210,6 +648,51 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev);
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num);
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac);
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell);
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq);
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int mr_access_flags,
+ struct uverbs_attr_bundle *attrs);
#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 351207c60eb6..6d974d0a8400 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -5,8 +5,10 @@
#include "mana_ib.h"
-#define VALID_MR_FLAGS \
- (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+#define VALID_MR_FLAGS (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |\
+ IB_ACCESS_REMOTE_ATOMIC | IB_ZERO_BASED)
+
+#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
static enum gdma_mr_access_flags
mana_ib_verbs_to_gdma_access_flags(int access_flags)
@@ -22,6 +24,9 @@ mana_ib_verbs_to_gdma_access_flags(int access_flags)
if (access_flags & IB_ACCESS_REMOTE_READ)
flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_ATOMIC;
+
return flags;
}
@@ -30,24 +35,26 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
{
struct gdma_create_mr_response resp = {};
struct gdma_create_mr_request req = {};
- struct gdma_dev *mdev = dev->gdma_dev;
- struct gdma_context *gc;
+ struct gdma_context *gc = mdev_to_gc(dev);
int err;
- gc = mdev->gdma_context;
-
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
sizeof(resp));
req.pd_handle = mr_params->pd_handle;
req.mr_type = mr_params->mr_type;
switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GPA:
+ break;
case GDMA_MR_TYPE_GVA:
req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
req.gva.virtual_address = mr_params->gva.virtual_address;
req.gva.access_flags = mr_params->gva.access_flags;
break;
-
+ case GDMA_MR_TYPE_ZBVA:
+ req.zbva.dma_region_handle = mr_params->zbva.dma_region_handle;
+ req.zbva.access_flags = mr_params->zbva.access_flags;
+ break;
default:
ibdev_dbg(&dev->ib_dev,
"invalid param (GDMA_MR_TYPE) passed, type %d\n",
@@ -77,12 +84,9 @@ static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
{
struct gdma_destroy_mr_response resp = {};
struct gdma_destroy_mr_request req = {};
- struct gdma_dev *mdev = dev->gdma_dev;
- struct gdma_context *gc;
+ struct gdma_context *gc = mdev_to_gc(dev);
int err;
- gc = mdev->gdma_context;
-
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
sizeof(resp));
@@ -118,6 +122,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
"start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
start, iova, length, access_flags);
+ access_flags &= ~IB_ACCESS_OPTIONAL;
if (access_flags & ~VALID_MR_FLAGS)
return ERR_PTR(-EINVAL);
@@ -133,7 +138,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
goto err_free;
}
- err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle);
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
if (err) {
ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
err);
@@ -141,10 +146,86 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
}
ibdev_dbg(ibdev,
- "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err,
+ "created dma region for user-mr 0x%llx\n",
dma_region_handle);
mr_params.pd_handle = pd->pd_handle;
+ if (access_flags & IB_ZERO_BASED) {
+ mr_params.mr_type = GDMA_MR_TYPE_ZBVA;
+ mr_params.zbva.dma_region_handle = dma_region_handle;
+ mr_params.zbva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ } else {
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ }
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ mr_params.pd_handle = pd->pd_handle;
mr_params.mr_type = GDMA_MR_TYPE_GVA;
mr_params.gva.dma_region_handle = dma_region_handle;
mr_params.gva.virtual_address = iova;
@@ -164,8 +245,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
return &mr->ibmr;
err_dma_region:
- mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
- dma_region_handle);
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
err_umem:
ib_umem_release(mr->umem);
@@ -175,6 +255,38 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (access_flags & ~VALID_DMA_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GPA;
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_free;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 21ac9fcadf3f..14fd7d6c54a2 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -15,17 +15,13 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
struct mana_port_context *mpc = netdev_priv(ndev);
struct mana_cfg_rx_steer_req_v2 *req;
struct mana_cfg_rx_steer_resp resp = {};
- mana_handle_t *req_indir_tab;
struct gdma_context *gc;
- struct gdma_dev *mdev;
u32 req_buf_size;
int i, err;
- gc = dev->gdma_dev->gdma_context;
- mdev = &gc->mana;
+ gc = mdev_to_gc(dev);
- req_buf_size =
- sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE;
+ req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
return -ENOMEM;
@@ -39,27 +35,27 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
req->rx_enable = 1;
req->update_default_rxobj = 1;
req->default_rxobj = default_rxobj;
- req->hdr.dev_id = mdev->dev_id;
+ req->hdr.dev_id = gc->mana.dev_id;
/* If there are more than 1 entries in indirection table, enable RSS */
if (log_ind_tbl_size)
req->rss_enable = true;
- req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
- req->indir_tab_offset = sizeof(*req);
+ req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE;
+ req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
+ indir_tab);
req->update_indir_tab = true;
req->cqe_coalescing_enable = 1;
- req_indir_tab = (mana_handle_t *)(req + 1);
/* The ind table passed to the hardware must have
- * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
+ * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb
* ind_table to MANA_INDIRECT_TABLE_SIZE if required
*/
ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
- req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
+ for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) {
+ req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
- req_indir_tab[i]);
+ req->indir_tab[i]);
}
req->update_hashkey = true;
@@ -102,17 +98,12 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
struct mana_ib_create_qp_rss_resp resp = {};
struct mana_ib_create_qp_rss ucmd = {};
- struct gdma_queue **gdma_cq_allocated;
mana_handle_t *mana_ind_table;
struct mana_port_context *mpc;
- struct gdma_queue *gdma_cq;
unsigned int ind_tbl_size;
- struct mana_context *mc;
struct net_device *ndev;
- struct gdma_context *gc;
struct mana_ib_cq *cq;
struct mana_ib_wq *wq;
- struct gdma_dev *gd;
struct mana_eq *eq;
struct ib_cq *ibcq;
struct ib_wq *ibwq;
@@ -120,10 +111,6 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
u32 port;
int ret;
- gc = mdev->gdma_dev->gdma_context;
- gd = &gc->mana;
- mc = gd->driver_data;
-
if (!udata || udata->inlen < sizeof(ucmd))
return -EINVAL;
@@ -150,7 +137,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
}
ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
- if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
+ if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) {
ibdev_dbg(&mdev->ib_dev,
"Indirect table size %d exceeding limit\n",
ind_tbl_size);
@@ -166,12 +153,12 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
/* IB ports start with 1, MANA start with 0 */
port = ucmd.port;
- if (port < 1 || port > mc->num_ports) {
+ ndev = mana_ib_get_netdev(pd->device, port);
+ if (!ndev) {
ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
port);
return -EINVAL;
}
- ndev = mc->ports[port - 1];
mpc = netdev_priv(ndev);
ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
@@ -184,13 +171,6 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
goto fail;
}
- gdma_cq_allocated = kcalloc(ind_tbl_size, sizeof(*gdma_cq_allocated),
- GFP_KERNEL);
- if (!gdma_cq_allocated) {
- ret = -ENOMEM;
- goto fail;
- }
-
qp->port = port;
for (i = 0; i < ind_tbl_size; i++) {
@@ -203,13 +183,13 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
ibcq = ibwq->cq;
cq = container_of(ibcq, struct mana_ib_cq, ibcq);
- wq_spec.gdma_region = wq->gdma_region;
+ wq_spec.gdma_region = wq->queue.gdma_region;
wq_spec.queue_size = wq->wq_buf_size;
- cq_spec.gdma_region = cq->gdma_region;
+ cq_spec.gdma_region = cq->queue.gdma_region;
cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
- eq = &mc->eqs[cq->comp_vector % gc->max_num_queues];
+ eq = &mpc->ac->eqs[cq->comp_vector];
cq_spec.attached_eq = eq->eq->id;
ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
@@ -221,35 +201,25 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
}
/* The GDMA regions are now owned by the WQ object */
- wq->gdma_region = GDMA_INVALID_DMA_REGION;
- cq->gdma_region = GDMA_INVALID_DMA_REGION;
+ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
- wq->id = wq_spec.queue_index;
- cq->id = cq_spec.queue_index;
+ wq->queue.id = wq_spec.queue_index;
+ cq->queue.id = cq_spec.queue_index;
ibdev_dbg(&mdev->ib_dev,
- "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
- ret, wq->rx_object, wq->id, cq->id);
+ "rx_object 0x%llx wq id %llu cq id %llu\n",
+ wq->rx_object, wq->queue.id, cq->queue.id);
- resp.entries[i].cqid = cq->id;
- resp.entries[i].wqid = wq->id;
+ resp.entries[i].cqid = cq->queue.id;
+ resp.entries[i].wqid = wq->queue.id;
mana_ind_table[i] = wq->rx_object;
/* Create CQ table entry */
- WARN_ON(gc->cq_table[cq->id]);
- gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
- if (!gdma_cq) {
- ret = -ENOMEM;
+ ret = mana_ib_install_cq_cb(mdev, cq);
+ if (ret)
goto fail;
- }
- gdma_cq_allocated[i] = gdma_cq;
-
- gdma_cq->cq.context = cq;
- gdma_cq->type = GDMA_CQ;
- gdma_cq->cq.callback = mana_ib_cq_handler;
- gdma_cq->id = cq->id;
- gc->cq_table[cq->id] = gdma_cq;
}
resp.num_entries = i;
@@ -269,7 +239,6 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
goto fail;
}
- kfree(gdma_cq_allocated);
kfree(mana_ind_table);
return 0;
@@ -281,13 +250,10 @@ fail:
wq = container_of(ibwq, struct mana_ib_wq, ibwq);
cq = container_of(ibcq, struct mana_ib_cq, ibcq);
- gc->cq_table[cq->id] = NULL;
- kfree(gdma_cq_allocated[i]);
-
+ mana_ib_remove_cq_cb(mdev, cq);
mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
}
- kfree(gdma_cq_allocated);
kfree(mana_ind_table);
return ret;
@@ -306,23 +272,17 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
struct mana_ib_ucontext *mana_ucontext =
rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
ibucontext);
- struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana;
struct mana_ib_create_qp_resp resp = {};
struct mana_ib_create_qp ucmd = {};
- struct gdma_queue *gdma_cq = NULL;
struct mana_obj_spec wq_spec = {};
struct mana_obj_spec cq_spec = {};
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
- struct ib_umem *umem;
struct mana_eq *eq;
int eq_vec;
u32 port;
int err;
- mc = gd->driver_data;
-
if (!mana_ucontext || udata->inlen < sizeof(ucmd))
return -EINVAL;
@@ -333,11 +293,6 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
return err;
}
- /* IB ports start with 1, MANA Ethernet ports start with 0 */
- port = ucmd.port;
- if (port < 1 || port > mc->num_ports)
- return -EINVAL;
-
if (attr->cap.max_send_wr > mdev->adapter_caps.max_qp_wr) {
ibdev_dbg(&mdev->ib_dev,
"Requested max_send_wr %d exceeding limit\n",
@@ -352,11 +307,17 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
return -EINVAL;
}
- ndev = mc->ports[port - 1];
+ port = ucmd.port;
+ ndev = mana_ib_get_netdev(ibpd->device, port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+ port);
+ return -EINVAL;
+ }
mpc = netdev_priv(ndev);
ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
- err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell);
+ err = mana_ib_cfg_vport(mdev, port, pd, mana_ucontext->doorbell);
if (err)
return -ENODEV;
@@ -365,77 +326,51 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
ucmd.sq_buf_addr, ucmd.port);
- umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(umem)) {
- err = PTR_ERR(umem);
- ibdev_dbg(&mdev->ib_dev,
- "Failed to get umem for create qp-raw, err %d\n",
- err);
- goto err_free_vport;
- }
- qp->sq_umem = umem;
-
- err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
- &qp->sq_gdma_region);
+ err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size, &qp->raw_sq);
if (err) {
ibdev_dbg(&mdev->ib_dev,
- "Failed to create dma region for create qp-raw, %d\n",
- err);
- goto err_release_umem;
+ "Failed to create queue for create qp-raw, err %d\n", err);
+ goto err_free_vport;
}
- ibdev_dbg(&mdev->ib_dev,
- "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
- err, qp->sq_gdma_region);
-
/* Create a WQ on the same port handle used by the Ethernet */
- wq_spec.gdma_region = qp->sq_gdma_region;
+ wq_spec.gdma_region = qp->raw_sq.gdma_region;
wq_spec.queue_size = ucmd.sq_buf_size;
- cq_spec.gdma_region = send_cq->gdma_region;
+ cq_spec.gdma_region = send_cq->queue.gdma_region;
cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
- eq_vec = send_cq->comp_vector % gd->gdma_context->max_num_queues;
- eq = &mc->eqs[eq_vec];
+ eq_vec = send_cq->comp_vector;
+ eq = &mpc->ac->eqs[eq_vec];
cq_spec.attached_eq = eq->eq->id;
err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
- &cq_spec, &qp->tx_object);
+ &cq_spec, &qp->qp_handle);
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to create wq for create raw-qp, err %d\n",
err);
- goto err_destroy_dma_region;
+ goto err_destroy_queue;
}
/* The GDMA regions are now owned by the WQ object */
- qp->sq_gdma_region = GDMA_INVALID_DMA_REGION;
- send_cq->gdma_region = GDMA_INVALID_DMA_REGION;
+ qp->raw_sq.gdma_region = GDMA_INVALID_DMA_REGION;
+ send_cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
- qp->sq_id = wq_spec.queue_index;
- send_cq->id = cq_spec.queue_index;
+ qp->raw_sq.id = wq_spec.queue_index;
+ send_cq->queue.id = cq_spec.queue_index;
/* Create CQ table entry */
- WARN_ON(gd->gdma_context->cq_table[send_cq->id]);
- gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
- if (!gdma_cq) {
- err = -ENOMEM;
+ err = mana_ib_install_cq_cb(mdev, send_cq);
+ if (err)
goto err_destroy_wq_obj;
- }
-
- gdma_cq->cq.context = send_cq;
- gdma_cq->type = GDMA_CQ;
- gdma_cq->cq.callback = mana_ib_cq_handler;
- gdma_cq->id = send_cq->id;
- gd->gdma_context->cq_table[send_cq->id] = gdma_cq;
ibdev_dbg(&mdev->ib_dev,
- "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
- qp->tx_object, qp->sq_id, send_cq->id);
+ "qp->qp_handle 0x%llx sq id %llu cq id %llu\n",
+ qp->qp_handle, qp->raw_sq.id, send_cq->queue.id);
- resp.sqid = qp->sq_id;
- resp.cqid = send_cq->id;
+ resp.sqid = qp->raw_sq.id;
+ resp.cqid = send_cq->queue.id;
resp.tx_vp_offset = pd->tx_vp_offset;
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -443,27 +378,323 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
ibdev_dbg(&mdev->ib_dev,
"Failed copy udata for create qp-raw, %d\n",
err);
- goto err_release_gdma_cq;
+ goto err_remove_cq_cb;
}
return 0;
-err_release_gdma_cq:
- kfree(gdma_cq);
- gd->gdma_context->cq_table[send_cq->id] = NULL;
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, send_cq);
err_destroy_wq_obj:
- mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
-
-err_destroy_dma_region:
- mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
-err_release_umem:
- ib_umem_release(umem);
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
err_free_vport:
- mana_ib_uncfg_vport(mdev, pd, port - 1);
+ mana_ib_uncfg_vport(mdev, pd, port);
+
+ return err;
+}
+
+static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
+{
+ u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
+
+ return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
+}
+
+static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ u32 queue_size;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ queue_size = attr->cap.max_send_wr *
+ mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
+ else
+ queue_size = attr->cap.max_recv_wr *
+ mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
+ break;
+ default:
+ return 0;
+ }
+
+ return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
+}
+
+static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ enum gdma_queue_type type;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ type = GDMA_SQ;
+ else
+ type = GDMA_RQ;
+ break;
+ default:
+ type = GDMA_INVALID_QUEUE;
+ }
+ return type;
+}
+
+static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+}
+
+static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ int err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
+ if (err)
+ goto remove_sq;
+
+ return 0;
+
+remove_sq:
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ return err;
+}
+
+static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ xa_erase_irq(&mdev->qp_table_wq, qidr);
+}
+
+static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ refcount_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ return mana_table_store_rc_qp(mdev, qp);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_table_store_ud_qp(mdev, qp);
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
+ qp->ibqp.qp_type);
+ }
+
+ return -EINVAL;
+}
+
+static void mana_table_remove_qp(struct mana_ib_dev *mdev,
+ struct mana_ib_qp *qp)
+{
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ mana_table_remove_rc_qp(mdev, qp);
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ mana_table_remove_ud_qp(mdev, qp);
+ break;
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
+ qp->ibqp.qp_type);
+ return;
+ }
+ mana_put_qp_ref(qp);
+ wait_for_completion(&qp->free);
+}
+
+static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_create_rc_qp_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct mana_ib_create_rc_qp ucmd = {};
+ int i, err, j;
+ u64 flags = 0;
+ u32 doorbell;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ flags = MANA_RC_FLAG_NO_FMR;
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy from udata, %d\n", err);
+ return err;
+ }
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ /* skip FMR for user-level RC QPs */
+ if (i == MANA_RC_SEND_QUEUE_FMR) {
+ qp->rc_qp.queues[i].id = INVALID_QUEUE_ID;
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ continue;
+ }
+ err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j],
+ &qp->rc_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err);
+ goto destroy_queues;
+ }
+ j++;
+ }
+
+ err = mana_ib_gd_create_rc_qp(mdev, qp, attr, doorbell, flags);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp %d\n", err);
+ goto destroy_queues;
+ }
+ qp->ibqp.qp_num = qp->rc_qp.queues[MANA_RC_RECV_QUEUE_RESPONDER].id;
+ qp->port = attr->port_num;
+
+ if (udata) {
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ if (i == MANA_RC_SEND_QUEUE_FMR)
+ continue;
+ resp.queue_id[j] = qp->rc_qp.queues[i].id;
+ j++;
+ }
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto destroy_qp;
+ }
+ }
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+ return err;
+}
+
+static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_del(&qp->cq_send_list);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_del(&qp->cq_recv_list);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ u32 doorbell, queue_size;
+ int i, err;
+
+ if (udata) {
+ ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
+ queue_size = mana_ib_queue_size(attr, i);
+ err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
+ &qp->ud_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
+ i, err);
+ goto destroy_queues;
+ }
+ }
+ doorbell = mdev->gdma_dev->doorbell;
+
+ err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+ sizeof(struct ud_rq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+ goto destroy_queues;
+ }
+ err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+ sizeof(struct ud_sq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+ goto destroy_shadow_queues;
+ }
+
+ err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
+ goto destroy_shadow_queues;
+ }
+ qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ qp->port = attr->port_num;
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ mana_add_qp_to_cqs(qp);
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+destroy_shadow_queues:
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
return err;
}
@@ -478,8 +709,12 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
udata);
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_RC:
+ return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
default:
- /* Creating QP other than IB_QPT_RAW_PACKET is not supported */
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
}
@@ -487,11 +722,81 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
return -EINVAL;
}
+static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibqp->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_rnic_set_qp_state_resp resp = {};
+ struct mana_rnic_set_qp_state_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ req.qp_state = attr->qp_state;
+ req.attr_mask = attr_mask;
+ req.path_mtu = attr->path_mtu;
+ req.rq_psn = attr->rq_psn;
+ req.sq_psn = attr->sq_psn;
+ req.dest_qpn = attr->dest_qp_num;
+ req.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ req.retry_cnt = attr->retry_cnt;
+ req.rnr_retry = attr->rnr_retry;
+ req.min_rnr_timer = attr->min_rnr_timer;
+ if (attr_mask & IB_QP_AV) {
+ ndev = mana_ib_get_netdev(&mdev->ib_dev, ibqp->port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ ibqp->port, ibqp->qp_num);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+ copy_in_reverse(req.ah_attr.src_mac, mpc->mac_addr, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.dest_mac, attr->ah_attr.roce.dmac, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.src_addr, attr->ah_attr.grh.sgid_attr->gid.raw,
+ sizeof(union ib_gid));
+ copy_in_reverse(req.ah_attr.dest_addr, attr->ah_attr.grh.dgid.raw,
+ sizeof(union ib_gid));
+ if (rdma_gid_attr_network_type(attr->ah_attr.grh.sgid_attr) == RDMA_NETWORK_IPV4) {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV4;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV4;
+ } else {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV6;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV6;
+ }
+ req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
+ req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ ibqp->qp_num, attr->dest_qp_num);
+ req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class;
+ req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed modify qp err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- /* modify_qp is not supported by this version of the driver */
- return -EOPNOTSUPP;
+ switch (ibqp->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
}
static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
@@ -500,16 +805,13 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
{
struct mana_ib_dev *mdev =
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
- struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana;
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_wq *wq;
struct ib_wq *ibwq;
int i;
- mc = gd->driver_data;
- ndev = mc->ports[qp->port - 1];
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
mpc = netdev_priv(ndev);
for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
@@ -527,26 +829,60 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
{
struct mana_ib_dev *mdev =
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
- struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana;
struct ib_pd *ibpd = qp->ibqp.pd;
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_pd *pd;
- mc = gd->driver_data;
- ndev = mc->ports[qp->port - 1];
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
mpc = netdev_priv(ndev);
pd = container_of(ibpd, struct mana_ib_pd, ibpd);
- mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
- if (qp->sq_umem) {
- mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
- ib_umem_release(qp->sq_umem);
- }
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
- mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
+ mana_ib_uncfg_vport(mdev, pd, qp->port);
+
+ return 0;
+}
+
+static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_table_remove_qp(mdev, qp);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+
+ return 0;
+}
+
+static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_remove_qp_from_cqs(qp);
+ mana_table_remove_qp(mdev, qp);
+
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
return 0;
}
@@ -562,7 +898,11 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
udata);
return mana_ib_destroy_qp_raw(qp, udata);
-
+ case IB_QPT_RC:
+ return mana_ib_destroy_rc_qp(qp, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_destroy_ud_qp(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 000000000000..a4b3818f9c39
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+ u16 opcode;
+ u16 error_code;
+ u32 posted_wqe_size;
+ u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+ struct shadow_wqe_header header;
+ u32 byte_len;
+ u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+ struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+ /* Unmasked producer index, Incremented on wqe posting */
+ u64 prod_idx;
+ /* Unmasked consumer index, Incremented on cq polling */
+ u64 cons_idx;
+ /* Unmasked index of next-to-complete (from HW) shadow WQE */
+ u64 next_to_complete_idx;
+ /* queue size in wqes */
+ u32 length;
+ /* distance between elements in bytes */
+ u32 stride;
+ /* ring buffer holding wqes */
+ void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+ queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
+ if (!queue->buffer)
+ return -ENOMEM;
+
+ queue->length = length;
+ queue->stride = stride;
+
+ return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+ kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+ return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+ return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+ u32 index = unmasked_index % queue->length;
+
+ return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+ return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+ if (queue->cons_idx == queue->next_to_complete_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+ if (queue->next_to_complete_idx == queue->prod_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+ queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+ queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+ queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
index 372d361510e0..f959f4b9244f 100644
--- a/drivers/infiniband/hw/mana/wq.c
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -13,7 +13,6 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
container_of(pd->device, struct mana_ib_dev, ib_dev);
struct mana_ib_create_wq ucmd = {};
struct mana_ib_wq *wq;
- struct ib_umem *umem;
int err;
if (udata->inlen < sizeof(ucmd))
@@ -32,39 +31,18 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
- umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(umem)) {
- err = PTR_ERR(umem);
+ err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size, &wq->queue);
+ if (err) {
ibdev_dbg(&mdev->ib_dev,
- "Failed to get umem for create wq, err %d\n", err);
+ "Failed to create queue for create wq, %d\n", err);
goto err_free_wq;
}
- wq->umem = umem;
wq->wqe = init_attr->max_wr;
wq->wq_buf_size = ucmd.wq_buf_size;
wq->rx_object = INVALID_MANA_HANDLE;
-
- err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region);
- if (err) {
- ibdev_dbg(&mdev->ib_dev,
- "Failed to create dma region for create wq, %d\n",
- err);
- goto err_release_umem;
- }
-
- ibdev_dbg(&mdev->ib_dev,
- "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
- err, wq->gdma_region);
-
- /* WQ ID is returned at wq_create time, doesn't know the value yet */
-
return &wq->ibwq;
-err_release_umem:
- ib_umem_release(umem);
-
err_free_wq:
kfree(wq);
@@ -86,8 +64,7 @@ int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
- mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
- ib_umem_release(wq->umem);
+ mana_ib_destroy_queue(mdev, &wq->queue);
kfree(wq);
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 000000000000..1813567d3b16
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+ struct gdma_wqe_request wqe_req = {0};
+ struct ud_rq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (shadow_queue_full(&qp->shadow_rq))
+ return -EINVAL;
+
+ if (wr->num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ gdma_sgl[i].address = wr->sg_list[i].addr;
+ gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+ gdma_sgl[i].size = wr->sg_list[i].length;
+ }
+ wqe_req.num_sge = wr->num_sge;
+ wqe_req.sgl = gdma_sgl;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_RECV;
+ shadow_wqe->header.wr_id = wr->wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_rq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_recv_ud(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+ struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_wqe_request wqe_req = {0};
+ struct rdma_send_oob send_oob = {0};
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ qp->port, qp->ibqp.qp_num);
+ return -EINVAL;
+ }
+
+ if (wr->wr.opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ if (shadow_queue_full(&qp->shadow_sq))
+ return -EINVAL;
+
+ if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ gdma_sgl[0].address = ah->dma_handle;
+ gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+ gdma_sgl[0].size = sizeof(struct mana_ib_av);
+ for (i = 0; i < wr->wr.num_sge; ++i) {
+ gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+ gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+ gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+ }
+
+ wqe_req.num_sge = wr->wr.num_sge + 1;
+ wqe_req.sgl = gdma_sgl;
+ wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+ wqe_req.inline_oob_data = &send_oob;
+ wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+ wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+ send_oob.wqe_type = WQE_TYPE_UD_SEND;
+ send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+ send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+ send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+ send_oob.psn = qp->ud_qp.sq_psn;
+ send_oob.ssn_or_rqpn = wr->remote_qpn;
+ send_oob.ud_send.remote_qkey =
+ qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ qp->ud_qp.sq_psn++;
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_SEND;
+ shadow_wqe->header.wr_id = wr->wr.wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_sq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int err;
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_send_ud(qp, ud_wr(wr));
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}