summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/efa
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/efa')
-rw-r--r--drivers/infiniband/hw/efa/efa.h11
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h77
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_defs.h4
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c30
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h6
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c10
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h6
-rw-r--r--drivers/infiniband/hw/efa/efa_io_defs.h106
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c98
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c81
10 files changed, 334 insertions, 95 deletions
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index e2bdec32ae80..838182d0409c 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_H_
@@ -57,14 +57,15 @@ struct efa_dev {
u64 db_bar_addr;
u64 db_bar_len;
- int admin_msix_vector_idx;
+ u32 num_irq_vectors;
+ u32 admin_msix_vector_idx;
struct efa_irq admin_irq;
struct efa_stats stats;
/* Array of completion EQs */
struct efa_eq *eqs;
- unsigned int neqs;
+ u32 neqs;
/* Only stores CQs with interrupts enabled */
struct xarray cqs_xa;
@@ -160,14 +161,14 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable);
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 7377c8a9f4d5..fe0b6aec7839 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -30,7 +30,8 @@ enum efa_admin_aq_opcode {
EFA_ADMIN_DEALLOC_UAR = 17,
EFA_ADMIN_CREATE_EQ = 18,
EFA_ADMIN_DESTROY_EQ = 19,
- EFA_ADMIN_MAX_OPCODE = 19,
+ EFA_ADMIN_ALLOC_MR = 20,
+ EFA_ADMIN_MAX_OPCODE = 20,
};
enum efa_admin_aq_feature_id {
@@ -110,7 +111,10 @@ struct efa_admin_create_qp_cmd {
* virtual (IOVA returned by MR registration)
* 1 : rq_virt - If set, RQ ring base address is
* virtual (IOVA returned by MR registration)
- * 7:2 : reserved - MBZ
+ * 2 : unsolicited_write_recv - If set, work requests
+ * will not be consumed for incoming RDMA write with
+ * immediate
+ * 7:3 : reserved - MBZ
*/
u8 flags;
@@ -147,8 +151,11 @@ struct efa_admin_create_qp_cmd {
/* UAR number */
u16 uar;
+ /* Requested service level for the QP, 0 is the default SL */
+ u8 sl;
+
/* MBZ */
- u16 reserved;
+ u8 reserved;
/* MBZ */
u32 reserved2;
@@ -456,6 +463,41 @@ struct efa_admin_dereg_mr_resp {
struct efa_admin_acq_common_desc acq_common_desc;
};
+/*
+ * Allocation of MemoryRegion, required for QP working with Virtual
+ * Addresses in kernel verbs semantics, ready for fast registration use.
+ */
+struct efa_admin_alloc_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved1;
+
+ /* Maximum number of pages this MR supports. */
+ u32 max_pages;
+};
+
+struct efa_admin_alloc_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /*
+ * L_Key, to be used in conjunction with local buffer references in
+ * SQ and RQ WQE, or with virtual RQ/CQ rings
+ */
+ u32 l_key;
+
+ /*
+ * R_Key, to be used in RDMA messages to refer to remotely accessed
+ * memory region
+ */
+ u32 r_key;
+};
+
struct efa_admin_create_cq_cmd {
struct efa_admin_aq_common_desc aq_common_desc;
@@ -480,8 +522,8 @@ struct efa_admin_create_cq_cmd {
*/
u8 cq_caps_2;
- /* completion queue depth in # of entries. must be power of 2 */
- u16 cq_depth;
+ /* Sub completion queue depth in # of entries. must be power of 2 */
+ u16 sub_cq_depth;
/* EQ number assigned to this cq */
u16 eqn;
@@ -516,8 +558,8 @@ struct efa_admin_create_cq_resp {
u16 cq_idx;
- /* actual cq depth in number of entries */
- u16 cq_actual_depth;
+ /* actual sub cq depth in number of entries */
+ u16 sub_cq_actual_depth;
/* CQ doorbell address, as offset to PCIe DB BAR */
u32 db_offset;
@@ -575,6 +617,8 @@ struct efa_admin_basic_stats {
u64 rx_pkts;
u64 rx_drops;
+
+ u64 qkey_viol;
};
struct efa_admin_messages_stats {
@@ -663,12 +707,26 @@ struct efa_admin_feature_device_attr_desc {
* polling is supported
* 3 : rdma_write - If set, RDMA Write is supported
* on TX queues
- * 31:4 : reserved - MBZ
+ * 4 : unsolicited_write_recv - If set, unsolicited
+ * write with imm. receive is supported
+ * 31:5 : reserved - MBZ
*/
u32 device_caps;
/* Max RDMA transfer size in bytes */
u32 max_rdma_size;
+
+ /* Unique global ID for an EFA device */
+ u64 guid;
+
+ /* The device maximum link speed in Gbit/sec */
+ u16 max_link_speed_gbps;
+
+ /* MBZ */
+ u16 reserved0;
+
+ /* MBZ */
+ u32 reserved1;
};
struct efa_admin_feature_queue_attr_desc {
@@ -1009,6 +1067,7 @@ struct efa_admin_host_info {
/* create_qp_cmd */
#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
+#define EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV_MASK BIT(2)
/* modify_qp_cmd */
#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0)
@@ -1044,10 +1103,10 @@ struct efa_admin_host_info {
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1)
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_DATA_POLLING_128_MASK BIT(2)
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_WRITE_MASK BIT(3)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_UNSOLICITED_WRITE_RECV_MASK BIT(4)
/* create_eq_cmd */
#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
-#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6)
#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0)
/* host_info */
diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
index 83f20c38a840..35700c93e639 100644
--- a/drivers/infiniband/hw/efa/efa_admin_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_H_
@@ -96,7 +96,7 @@ struct efa_admin_acq_entry {
struct efa_admin_aenq_common_desc {
u16 group;
- u16 syndrom;
+ u16 syndrome;
/*
* 0 : phase
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 16a24a05fc2a..bafd210dd43e 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include "efa_com.h"
@@ -406,8 +406,8 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue
return comp_ctx;
}
-static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
- struct efa_admin_acq_entry *cqe)
+static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
+ struct efa_admin_acq_entry *cqe)
{
struct efa_comp_ctx *comp_ctx;
u16 cmd_id;
@@ -416,11 +416,11 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
- if (!comp_ctx) {
+ if (comp_ctx->status != EFA_CMD_SUBMITTED) {
ibdev_err(aq->efa_dev,
- "comp_ctx is NULL. Changing the admin queue running state\n");
- clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
- return;
+ "Received completion with unexpected command id[%d], sq producer: %d, sq consumer: %d, cq consumer: %d\n",
+ cmd_id, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ return -EINVAL;
}
comp_ctx->status = EFA_CMD_COMPLETED;
@@ -428,14 +428,17 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
complete(&comp_ctx->wait_event);
+
+ return 0;
}
static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
{
struct efa_admin_acq_entry *cqe;
u16 queue_size_mask;
- u16 comp_num = 0;
+ u16 comp_cmds = 0;
u8 phase;
+ int err;
u16 ci;
queue_size_mask = aq->depth - 1;
@@ -453,10 +456,12 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
* phase bit was validated
*/
dma_rmb();
- efa_com_handle_single_admin_completion(aq, cqe);
+ err = efa_com_handle_single_admin_completion(aq, cqe);
+ if (!err)
+ comp_cmds++;
+ aq->cq.cc++;
ci++;
- comp_num++;
if (ci == aq->depth) {
ci = 0;
phase = !phase;
@@ -465,10 +470,9 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
cqe = &aq->cq.entries[ci];
}
- aq->cq.cc += comp_num;
aq->cq.phase = phase;
- aq->sq.cc += comp_num;
- atomic64_add(comp_num, &aq->stats.completed_cmd);
+ aq->sq.cc += comp_cmds;
+ atomic64_add(comp_cmds, &aq->stats.completed_cmd);
}
static int efa_com_comp_status_to_errno(u8 comp_status)
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
index 77282234ce68..4d9ca97e4296 100644
--- a/drivers/infiniband/hw/efa/efa_com.h
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COM_H_
@@ -65,7 +65,7 @@ struct efa_com_admin_queue {
u16 depth;
struct efa_com_admin_cq cq;
struct efa_com_admin_sq sq;
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
unsigned long state;
@@ -89,7 +89,7 @@ struct efa_com_aenq {
struct efa_aenq_handlers *aenq_handlers;
dma_addr_t dma_addr;
u32 cc; /* consumer counter */
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
u16 depth;
u8 phase;
};
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index d3398c7b0bd0..c6b89c45fdc9 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -31,6 +31,10 @@ int efa_com_create_qp(struct efa_com_dev *edev,
create_qp_cmd.qp_alloc_size.recv_queue_depth =
params->rq_depth;
create_qp_cmd.uar = params->uarn;
+ create_qp_cmd.sl = params->sl;
+
+ if (params->unsolicited_write_recv)
+ EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1);
err = efa_com_cmd_exec(aq,
(struct efa_admin_aq_entry *)&create_qp_cmd,
@@ -160,7 +164,7 @@ int efa_com_create_cq(struct efa_com_dev *edev,
EFA_SET(&create_cmd.cq_caps_2,
EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS,
params->entry_size_in_bytes / 4);
- create_cmd.cq_depth = params->cq_depth;
+ create_cmd.sub_cq_depth = params->sub_cq_depth;
create_cmd.num_sub_cqs = params->num_sub_cqs;
create_cmd.uar = params->uarn;
if (params->interrupt_mode_enabled) {
@@ -188,7 +192,7 @@ int efa_com_create_cq(struct efa_com_dev *edev,
}
result->cq_idx = cmd_completion.cq_idx;
- result->actual_depth = params->cq_depth;
+ result->actual_depth = params->sub_cq_depth;
result->db_off = cmd_completion.db_offset;
result->db_valid = EFA_GET(&cmd_completion.flags,
EFA_ADMIN_CREATE_CQ_RESP_DB_VALID);
@@ -462,6 +466,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->db_bar = resp.u.device_attr.db_bar;
result->max_rdma_size = resp.u.device_attr.max_rdma_size;
result->device_caps = resp.u.device_attr.device_caps;
+ result->guid = resp.u.device_attr.guid;
+ result->max_link_speed_gbps = resp.u.device_attr.max_link_speed_gbps;
if (result->admin_api_version < 1) {
ibdev_err_ratelimited(
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 720a99ba0f7d..5511355b700d 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -27,6 +27,8 @@ struct efa_com_create_qp_params {
u16 pd;
u16 uarn;
u8 qp_type;
+ u8 sl;
+ u8 unsolicited_write_recv : 1;
};
struct efa_com_create_qp_result {
@@ -70,7 +72,7 @@ struct efa_com_create_cq_params {
/* cq physical base address in OS memory */
dma_addr_t dma_addr;
/* completion queue depth in # of entries */
- u16 cq_depth;
+ u16 sub_cq_depth;
u16 num_sub_cqs;
u16 uarn;
u16 eqn;
@@ -111,6 +113,7 @@ struct efa_com_get_device_attr_result {
u8 addr[EFA_GID_SIZE];
u64 page_size_cap;
u64 max_mr_pages;
+ u64 guid;
u32 mtu;
u32 fw_version;
u32 admin_api_version;
@@ -139,6 +142,7 @@ struct efa_com_get_device_attr_result {
u16 max_wr_rdma_sge;
u16 max_tx_batch;
u16 min_sq_depth;
+ u16 max_link_speed_gbps;
u8 db_bar;
};
diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h
index 2d8eb96eaa81..a4c9fd33da38 100644
--- a/drivers/infiniband/hw/efa/efa_io_defs.h
+++ b/drivers/infiniband/hw/efa/efa_io_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_IO_H_
@@ -10,6 +10,7 @@
#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
+#define EFA_IO_TX_DESC_INLINE_PBL_SIZE 1
enum efa_io_queue_type {
/* send queue (of a QP) */
@@ -25,6 +26,10 @@ enum efa_io_send_op_type {
EFA_IO_RDMA_READ = 1,
/* RDMA write */
EFA_IO_RDMA_WRITE = 2,
+ /* Fast MR registration */
+ EFA_IO_FAST_REG = 3,
+ /* Fast MR invalidation */
+ EFA_IO_FAST_INV = 4,
};
enum efa_io_comp_status {
@@ -34,15 +39,15 @@ enum efa_io_comp_status {
EFA_IO_COMP_STATUS_FLUSHED = 1,
/* Internal QP error */
EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2,
- /* Bad operation type */
- EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3,
+ /* Unsupported operation */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNSUPPORTED_OP = 3,
/* Bad AH */
EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4,
/* LKEY not registered or does not match IOVA */
EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5,
/* Message too long */
EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6,
- /* Destination ENI is down or does not run EFA */
+ /* RKEY not registered or does not match remote IOVA */
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7,
/* Connection was reset by remote side */
EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8,
@@ -54,8 +59,17 @@ enum efa_io_comp_status {
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11,
/* Unexpected status returned by responder */
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12,
- /* Unresponsive remote - detected locally */
+ /* Unresponsive remote - was previously responsive */
EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13,
+ /* No valid AH at remote side (required for RDMA operations) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14,
+ /* Unreachable remote - never received a response */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15,
+};
+
+enum efa_io_frwr_pbl_mode {
+ EFA_IO_FRWR_INLINE_PBL = 0,
+ EFA_IO_FRWR_DIRECT_PBL = 1,
};
struct efa_io_tx_meta_desc {
@@ -95,13 +109,13 @@ struct efa_io_tx_meta_desc {
/*
* If inline_msg bit is set, length of inline message in bytes,
- * otherwise length of SGL (number of buffers).
+ * otherwise length of SGL (number of buffers).
*/
u16 length;
/*
- * immediate data: if has_imm is set, then this field is included
- * within Tx message and reported in remote Rx completion.
+ * immediate data: if has_imm is set, then this field is included within
+ * Tx message and reported in remote Rx completion.
*/
u32 immediate_data;
@@ -158,6 +172,63 @@ struct efa_io_rdma_req {
struct efa_io_tx_buf_desc local_mem[1];
};
+struct efa_io_fast_mr_reg_req {
+ /* Updated local key of the MR after lkey/rkey increment */
+ u32 lkey;
+
+ /*
+ * permissions
+ * 0 : local_write_enable - Local write permissions:
+ * must be set for RQ buffers and buffers posted for
+ * RDMA Read requests
+ * 1 : remote_write_enable - Remote write
+ * permissions: must be set to enable RDMA write to
+ * the region
+ * 2 : remote_read_enable - Remote read permissions:
+ * must be set to enable RDMA read from the region
+ * 7:3 : reserved2 - MBZ
+ */
+ u8 permissions;
+
+ /*
+ * control flags
+ * 4:0 : phys_page_size_shift - page size is (1 <<
+ * phys_page_size_shift)
+ * 6:5 : pbl_mode - enum efa_io_frwr_pbl_mode
+ * 7 : reserved - MBZ
+ */
+ u8 flags;
+
+ /* MBZ */
+ u8 reserved[2];
+
+ /* IO Virtual Address associated with this MR */
+ u64 iova;
+
+ /* Memory region length, in bytes */
+ u64 mr_length;
+
+ /* Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of physical page addresses (optimization
+ * for short region activation).
+ */
+ u64 inline_array[1];
+
+ /* points to PBL (Currently only direct) */
+ u64 dma_addr;
+ } pbl;
+};
+
+struct efa_io_fast_mr_inv_req {
+ /* Local key of the MR to invalidate */
+ u32 lkey;
+
+ /* MBZ */
+ u8 reserved[28];
+};
+
/*
* Tx WQE, composed of tx meta descriptors followed by either tx buffer
* descriptors or inline data
@@ -174,6 +245,12 @@ struct efa_io_tx_wqe {
/* RDMA local and remote memory addresses */
struct efa_io_rdma_req rdma_req;
+
+ /* Fast registration */
+ struct efa_io_fast_mr_reg_req reg_mr_req;
+
+ /* Fast invalidation */
+ struct efa_io_fast_mr_inv_req inv_mr_req;
} data;
};
@@ -208,7 +285,7 @@ struct efa_io_rx_desc {
struct efa_io_cdesc_common {
/*
* verbs-generated request ID, as provided in the completed tx or rx
- * descriptor.
+ * descriptor.
*/
u16 req_id;
@@ -221,7 +298,8 @@ struct efa_io_cdesc_common {
* 3 : has_imm - indicates that immediate data is
* present - for RX completions only
* 6:4 : op_type - enum efa_io_send_op_type
- * 7 : reserved31 - MBZ
+ * 7 : unsolicited - indicates that there is no
+ * matching request - for RDMA with imm. RX only
*/
u8 flags;
@@ -291,6 +369,13 @@ struct efa_io_rx_cdesc_ex {
/* tx_buf_desc */
#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
+/* fast_mr_reg_req */
+#define EFA_IO_FAST_MR_REG_REQ_LOCAL_WRITE_ENABLE_MASK BIT(0)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_WRITE_ENABLE_MASK BIT(1)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_READ_ENABLE_MASK BIT(2)
+#define EFA_IO_FAST_MR_REG_REQ_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
+#define EFA_IO_FAST_MR_REG_REQ_PBL_MODE_MASK GENMASK(6, 5)
+
/* rx_desc */
#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0)
#define EFA_IO_RX_DESC_FIRST_MASK BIT(30)
@@ -301,5 +386,6 @@ struct efa_io_rx_cdesc_ex {
#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4)
+#define EFA_IO_CDESC_COMMON_UNSOLICITED_MASK BIT(7)
#endif /* _EFA_IO_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 7b1910a86216..4f03c0ec819f 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/module.h>
@@ -16,11 +16,13 @@
#define PCI_DEV_ID_EFA0_VF 0xefa0
#define PCI_DEV_ID_EFA1_VF 0xefa1
#define PCI_DEV_ID_EFA2_VF 0xefa2
+#define PCI_DEV_ID_EFA3_VF 0xefa3
static const struct pci_device_id efa_pci_tbl[] = {
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA3_VF) },
{ }
};
@@ -139,8 +141,7 @@ static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq)
return 0;
}
-static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq,
- int vector)
+static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector)
{
u32 cpu;
@@ -190,15 +191,23 @@ static int efa_request_doorbell_bar(struct efa_dev *dev)
{
u8 db_bar_idx = dev->dev_attr.db_bar;
struct pci_dev *pdev = dev->pdev;
- int bars;
+ int pci_mem_bars;
+ int db_bar;
int err;
- if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) {
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx);
+ db_bar = BIT(db_bar_idx);
+ if (!(db_bar & EFA_BASE_BAR_MASK)) {
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (db_bar & ~pci_mem_bars) {
+ dev_err(&pdev->dev,
+ "Doorbells BAR unavailable. Requested %#x, available %#x\n",
+ db_bar, pci_mem_bars);
+ return -ENODEV;
+ }
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ err = pci_request_selected_regions(pdev, db_bar, DRV_MODULE_NAME);
if (err) {
- dev_err(&dev->pdev->dev,
+ dev_err(&pdev->dev,
"pci_request_selected_regions for bar %d failed %d\n",
db_bar_idx, err);
return err;
@@ -295,7 +304,7 @@ static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq)
efa_free_irq(dev, &eq->irq);
}
-static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec)
+static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec)
{
int err;
@@ -318,19 +327,17 @@ err_free_comp_irq:
static int efa_create_eqs(struct efa_dev *dev)
{
- unsigned int neqs = dev->dev_attr.max_eq;
- int err;
- int i;
+ u32 neqs = dev->dev_attr.max_eq;
+ int err, i;
- neqs = min_t(unsigned int, neqs, num_online_cpus());
+ neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
dev->neqs = neqs;
dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
if (!dev->eqs)
return -ENOMEM;
for (i = 0; i < neqs; i++) {
- err = efa_create_eq(dev, &dev->eqs[i],
- i + EFA_COMP_EQS_VEC_BASE);
+ err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE);
if (err)
goto err_destroy_eqs;
}
@@ -429,6 +436,7 @@ static int efa_ib_device_add(struct efa_dev *dev)
efa_set_host_info(dev);
dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED;
+ dev->ibdev.node_guid = dev->dev_attr.guid;
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->neqs ?: 1;
dev->ibdev.dev.parent = &pdev->dev;
@@ -457,7 +465,6 @@ static void efa_ib_device_remove(struct efa_dev *dev)
ibdev_info(&dev->ibdev, "Unregister ib device\n");
ib_unregister_device(&dev->ibdev);
efa_destroy_eqs(dev);
- efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL);
efa_release_doorbell_bar(dev);
}
@@ -468,34 +475,30 @@ static void efa_disable_msix(struct efa_dev *dev)
static int efa_enable_msix(struct efa_dev *dev)
{
- int msix_vecs, irq_num;
+ int max_vecs, num_vecs;
/*
* Reserve the max msix vectors we might need, one vector is reserved
* for admin.
*/
- msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
- num_online_cpus() + 1);
+ max_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
+ num_online_cpus() + 1);
dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
- msix_vecs);
+ max_vecs);
dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
- irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs,
- msix_vecs, PCI_IRQ_MSIX);
+ num_vecs = pci_alloc_irq_vectors(dev->pdev, 1,
+ max_vecs, PCI_IRQ_MSIX);
- if (irq_num < 0) {
- dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n",
- irq_num);
+ if (num_vecs < 0) {
+ dev_err(&dev->pdev->dev, "Failed to enable MSI-X. error %d\n",
+ num_vecs);
return -ENOSPC;
}
- if (irq_num != msix_vecs) {
- efa_disable_msix(dev);
- dev_err(&dev->pdev->dev,
- "Allocated %d MSI-X (out of %d requested)\n",
- irq_num, msix_vecs);
- return -ENOSPC;
- }
+ dev_dbg(&dev->pdev->dev, "Allocated %d MSI-X vectors\n", num_vecs);
+
+ dev->num_irq_vectors = num_vecs;
return 0;
}
@@ -533,7 +536,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
{
struct efa_com_dev *edev;
struct efa_dev *dev;
- int bars;
+ int pci_mem_bars;
int err;
err = pci_enable_device_mem(pdev);
@@ -558,8 +561,14 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
dev->pdev = pdev;
xa_init(&dev->cqs_xa);
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK;
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (EFA_BASE_BAR_MASK & ~pci_mem_bars) {
+ dev_err(&pdev->dev, "BARs unavailable. Requested %#x, available %#x\n",
+ (int)EFA_BASE_BAR_MASK, pci_mem_bars);
+ err = -ENODEV;
+ goto err_ibdev_destroy;
+ }
+ err = pci_request_selected_regions(pdev, EFA_BASE_BAR_MASK, DRV_MODULE_NAME);
if (err) {
dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
err);
@@ -628,12 +637,14 @@ err_disable_device:
return ERR_PTR(err);
}
-static void efa_remove_device(struct pci_dev *pdev)
+static void efa_remove_device(struct pci_dev *pdev,
+ enum efa_regs_reset_reason_types reset_reason)
{
struct efa_dev *dev = pci_get_drvdata(pdev);
struct efa_com_dev *edev;
edev = &dev->edev;
+ efa_com_dev_reset(edev, reset_reason);
efa_com_admin_destroy(edev);
efa_free_irq(dev, &dev->admin_irq);
efa_disable_msix(dev);
@@ -661,7 +672,7 @@ static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
err_remove_device:
- efa_remove_device(pdev);
+ efa_remove_device(pdev, EFA_REGS_RESET_INIT_ERR);
return err;
}
@@ -670,7 +681,17 @@ static void efa_remove(struct pci_dev *pdev)
struct efa_dev *dev = pci_get_drvdata(pdev);
efa_ib_device_remove(dev);
- efa_remove_device(pdev);
+ efa_remove_device(pdev, EFA_REGS_RESET_NORMAL);
+}
+
+static void efa_shutdown(struct pci_dev *pdev)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+
+ efa_destroy_eqs(dev);
+ efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_SHUTDOWN);
+ efa_free_irq(dev, &dev->admin_irq);
+ efa_disable_msix(dev);
}
static struct pci_driver efa_pci_driver = {
@@ -678,6 +699,7 @@ static struct pci_driver efa_pci_driver = {
.id_table = efa_pci_tbl,
.probe = efa_probe,
.remove = efa_remove,
+ .shutdown = efa_shutdown,
};
module_pci_driver(efa_pci_driver);
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 2f412db2edcd..a8645a40730f 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -26,10 +26,6 @@ enum {
EFA_MMAP_IO_NC,
};
-#define EFA_AENQ_ENABLED_GROUPS \
- (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
- BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-
struct efa_user_mmap_entry {
struct rdma_user_mmap_entry rdma_entry;
u64 address;
@@ -89,6 +85,8 @@ static const struct rdma_stat_desc efa_port_stats_descs[] = {
EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
};
+#define EFA_DEFAULT_LINK_SPEED_GBPS 100
+
#define EFA_CHUNK_PAYLOAD_SHIFT 12
#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
@@ -263,6 +261,9 @@ int efa_query_device(struct ib_device *ibdev,
if (EFA_DEV_CAP(dev, RDMA_WRITE))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE;
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV;
+
if (dev->neqs)
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
@@ -278,10 +279,47 @@ int efa_query_device(struct ib_device *ibdev,
return 0;
}
+static void efa_link_gbps_to_speed_and_width(u16 gbps,
+ enum ib_port_speed *speed,
+ enum ib_port_width *width)
+{
+ if (gbps >= 400) {
+ *width = IB_WIDTH_8X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 200) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 120) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 100) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_EDR;
+ } else if (gbps >= 60) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_DDR;
+ } else if (gbps >= 50) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 40) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 30) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_SDR;
+ } else {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_EDR;
+ }
+}
+
int efa_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct efa_dev *dev = to_edev(ibdev);
+ enum ib_port_speed link_speed;
+ enum ib_port_width link_width;
+ u16 link_gbps;
props->lmc = 1;
@@ -289,8 +327,10 @@ int efa_query_port(struct ib_device *ibdev, u32 port,
props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
- props->active_speed = IB_SPEED_EDR;
- props->active_width = IB_WIDTH_4X;
+ link_gbps = dev->dev_attr.max_link_speed_gbps ?: EFA_DEFAULT_LINK_SPEED_GBPS;
+ efa_link_gbps_to_speed_and_width(link_gbps, &link_speed, &link_width);
+ props->active_speed = link_speed;
+ props->active_width = link_width;
props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
props->max_msg_sz = dev->dev_attr.mtu;
@@ -521,7 +561,7 @@ static int qp_mmap_entries_setup(struct efa_qp *qp,
address = dev->mem_bar_addr + resp->llq_desc_offset;
length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
- (resp->llq_desc_offset & ~PAGE_MASK));
+ offset_in_page(resp->llq_desc_offset));
qp->llq_desc_mmap_entry =
efa_user_mmap_entry_insert(&ucontext->ibucontext,
@@ -639,6 +679,7 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
struct efa_ibv_create_qp cmd = {};
struct efa_qp *qp = to_eqp(ibqp);
struct efa_ucontext *ucontext;
+ u16 supported_efa_flags = 0;
int err;
ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
@@ -676,13 +717,23 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
goto err_out;
}
- if (cmd.comp_mask) {
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, unknown fields in udata\n");
err = -EINVAL;
goto err_out;
}
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ supported_efa_flags |= EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV;
+
+ if (cmd.flags & ~supported_efa_flags) {
+ ibdev_dbg(&dev->ibdev, "Unsupported EFA QP create flags[%#x], supported[%#x]\n",
+ cmd.flags, supported_efa_flags);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
create_qp_params.uarn = ucontext->uarn;
create_qp_params.pd = to_epd(ibqp->pd)->pdn;
@@ -722,6 +773,11 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
create_qp_params.rq_base_addr = qp->rq_dma_addr;
}
+ create_qp_params.sl = cmd.sl;
+
+ if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV)
+ create_qp_params.unsolicited_write_recv = true;
+
err = efa_com_create_qp(&dev->edev, &create_qp_params,
&create_qp_resp);
if (err)
@@ -1067,8 +1123,9 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
}
int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct efa_ucontext, ibucontext);
struct efa_com_create_cq_params params = {};
@@ -1153,7 +1210,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
params.uarn = cq->ucontext->uarn;
- params.cq_depth = entries;
+ params.sub_cq_depth = entries;
params.dma_addr = cq->dma_addr;
params.entry_size_in_bytes = cmd.cq_entry_size;
params.num_sub_cqs = cmd.num_sub_cqs;
@@ -1670,14 +1727,14 @@ static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
struct efa_dev *dev = to_edev(ibpd->device);
struct ib_umem_dmabuf *umem_dmabuf;
struct efa_mr *mr;
int err;
- mr = efa_alloc_mr(ibpd, access_flags, udata);
+ mr = efa_alloc_mr(ibpd, access_flags, &attrs->driver_udata);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
goto err_out;