summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/efa
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/efa')
-rw-r--r--drivers/infiniband/hw/efa/Kconfig15
-rw-r--r--drivers/infiniband/hw/efa/Makefile9
-rw-r--r--drivers/infiniband/hw/efa/efa.h163
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h794
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_defs.h136
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c1160
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h144
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c692
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h270
-rw-r--r--drivers/infiniband/hw/efa/efa_common_defs.h18
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c533
-rw-r--r--drivers/infiniband/hw/efa/efa_regs_defs.h113
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c1825
13 files changed, 5872 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/efa/Kconfig b/drivers/infiniband/hw/efa/Kconfig
new file mode 100644
index 000000000000..457e18ba1d57
--- /dev/null
+++ b/drivers/infiniband/hw/efa/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+#
+# Amazon fabric device configuration
+#
+
+config INFINIBAND_EFA
+ tristate "Amazon Elastic Fabric Adapter (EFA) support"
+ depends on PCI_MSI && 64BIT && !CPU_BIG_ENDIAN
+ depends on INFINIBAND_USER_ACCESS
+ help
+ This driver supports Amazon Elastic Fabric Adapter (EFA).
+
+ To compile this driver as a module, choose M here.
+ The module will be called efa.
diff --git a/drivers/infiniband/hw/efa/Makefile b/drivers/infiniband/hw/efa/Makefile
new file mode 100644
index 000000000000..6e83083af0bc
--- /dev/null
+++ b/drivers/infiniband/hw/efa/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+#
+# Makefile for Amazon Elastic Fabric Adapter (EFA) device driver.
+#
+
+obj-$(CONFIG_INFINIBAND_EFA) += efa.o
+
+efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
new file mode 100644
index 000000000000..9e3cc3239c13
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_H_
+#define _EFA_H_
+
+#include <linux/bitops.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+
+#include <rdma/efa-abi.h>
+#include <rdma/ib_verbs.h>
+
+#include "efa_com_cmd.h"
+
+#define DRV_MODULE_NAME "efa"
+#define DEVICE_NAME "Elastic Fabric Adapter (EFA)"
+
+#define EFA_IRQNAME_SIZE 40
+
+/* 1 for AENQ + ADMIN */
+#define EFA_NUM_MSIX_VEC 1
+#define EFA_MGMNT_MSIX_VEC_IDX 0
+
+struct efa_irq {
+ irq_handler_t handler;
+ void *data;
+ int cpu;
+ u32 vector;
+ cpumask_t affinity_hint_mask;
+ char name[EFA_IRQNAME_SIZE];
+};
+
+struct efa_sw_stats {
+ atomic64_t alloc_pd_err;
+ atomic64_t create_qp_err;
+ atomic64_t create_cq_err;
+ atomic64_t reg_mr_err;
+ atomic64_t alloc_ucontext_err;
+ atomic64_t create_ah_err;
+};
+
+/* Don't use anything other than atomic64 */
+struct efa_stats {
+ struct efa_sw_stats sw_stats;
+ atomic64_t keep_alive_rcvd;
+};
+
+struct efa_dev {
+ struct ib_device ibdev;
+ struct efa_com_dev edev;
+ struct pci_dev *pdev;
+ struct efa_com_get_device_attr_result dev_attr;
+
+ u64 reg_bar_addr;
+ u64 reg_bar_len;
+ u64 mem_bar_addr;
+ u64 mem_bar_len;
+ u64 db_bar_addr;
+ u64 db_bar_len;
+ u8 addr[EFA_GID_SIZE];
+ u32 mtu;
+
+ int admin_msix_vector_idx;
+ struct efa_irq admin_irq;
+
+ struct efa_stats stats;
+};
+
+struct efa_ucontext {
+ struct ib_ucontext ibucontext;
+ struct xarray mmap_xa;
+ u32 mmap_xa_page;
+ u16 uarn;
+};
+
+struct efa_pd {
+ struct ib_pd ibpd;
+ u16 pdn;
+};
+
+struct efa_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+};
+
+struct efa_cq {
+ struct ib_cq ibcq;
+ struct efa_ucontext *ucontext;
+ dma_addr_t dma_addr;
+ void *cpu_addr;
+ size_t size;
+ u16 cq_idx;
+};
+
+struct efa_qp {
+ struct ib_qp ibqp;
+ dma_addr_t rq_dma_addr;
+ void *rq_cpu_addr;
+ size_t rq_size;
+ enum ib_qp_state state;
+ u32 qp_handle;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+};
+
+struct efa_ah {
+ struct ib_ah ibah;
+ u16 ah;
+ /* dest_addr */
+ u8 id[EFA_GID_SIZE];
+};
+
+int efa_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata);
+int efa_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid);
+int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey);
+int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+struct ib_cq *efa_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable);
+int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata);
+void efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
+int efa_mmap(struct ib_ucontext *ibucontext,
+ struct vm_area_struct *vma);
+int efa_create_ah(struct ib_ah *ibah,
+ struct rdma_ah_attr *ah_attr,
+ u32 flags,
+ struct ib_udata *udata);
+void efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
+ u8 port_num);
+
+#endif /* _EFA_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
new file mode 100644
index 000000000000..2be0469d545f
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -0,0 +1,794 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_ADMIN_CMDS_H_
+#define _EFA_ADMIN_CMDS_H_
+
+#define EFA_ADMIN_API_VERSION_MAJOR 0
+#define EFA_ADMIN_API_VERSION_MINOR 1
+
+/* EFA admin queue opcodes */
+enum efa_admin_aq_opcode {
+ EFA_ADMIN_CREATE_QP = 1,
+ EFA_ADMIN_MODIFY_QP = 2,
+ EFA_ADMIN_QUERY_QP = 3,
+ EFA_ADMIN_DESTROY_QP = 4,
+ EFA_ADMIN_CREATE_AH = 5,
+ EFA_ADMIN_DESTROY_AH = 6,
+ EFA_ADMIN_REG_MR = 7,
+ EFA_ADMIN_DEREG_MR = 8,
+ EFA_ADMIN_CREATE_CQ = 9,
+ EFA_ADMIN_DESTROY_CQ = 10,
+ EFA_ADMIN_GET_FEATURE = 11,
+ EFA_ADMIN_SET_FEATURE = 12,
+ EFA_ADMIN_GET_STATS = 13,
+ EFA_ADMIN_ALLOC_PD = 14,
+ EFA_ADMIN_DEALLOC_PD = 15,
+ EFA_ADMIN_ALLOC_UAR = 16,
+ EFA_ADMIN_DEALLOC_UAR = 17,
+ EFA_ADMIN_MAX_OPCODE = 17,
+};
+
+enum efa_admin_aq_feature_id {
+ EFA_ADMIN_DEVICE_ATTR = 1,
+ EFA_ADMIN_AENQ_CONFIG = 2,
+ EFA_ADMIN_NETWORK_ATTR = 3,
+ EFA_ADMIN_QUEUE_ATTR = 4,
+ EFA_ADMIN_HW_HINTS = 5,
+ EFA_ADMIN_FEATURES_OPCODE_NUM = 8,
+};
+
+/* QP transport type */
+enum efa_admin_qp_type {
+ /* Unreliable Datagram */
+ EFA_ADMIN_QP_TYPE_UD = 1,
+ /* Scalable Reliable Datagram */
+ EFA_ADMIN_QP_TYPE_SRD = 2,
+};
+
+/* QP state */
+enum efa_admin_qp_state {
+ EFA_ADMIN_QP_STATE_RESET = 0,
+ EFA_ADMIN_QP_STATE_INIT = 1,
+ EFA_ADMIN_QP_STATE_RTR = 2,
+ EFA_ADMIN_QP_STATE_RTS = 3,
+ EFA_ADMIN_QP_STATE_SQD = 4,
+ EFA_ADMIN_QP_STATE_SQE = 5,
+ EFA_ADMIN_QP_STATE_ERR = 6,
+};
+
+enum efa_admin_get_stats_type {
+ EFA_ADMIN_GET_STATS_TYPE_BASIC = 0,
+};
+
+enum efa_admin_get_stats_scope {
+ EFA_ADMIN_GET_STATS_SCOPE_ALL = 0,
+ EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1,
+};
+
+enum efa_admin_modify_qp_mask_bits {
+ EFA_ADMIN_QP_STATE_BIT = 0,
+ EFA_ADMIN_CUR_QP_STATE_BIT = 1,
+ EFA_ADMIN_QKEY_BIT = 2,
+ EFA_ADMIN_SQ_PSN_BIT = 3,
+ EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4,
+};
+
+/*
+ * QP allocation sizes, converted by fabric QueuePair (QP) create command
+ * from QP capabilities.
+ */
+struct efa_admin_qp_alloc_size {
+ /* Send descriptor ring size in bytes */
+ u32 send_queue_ring_size;
+
+ /* Max number of WQEs that can be outstanding on send queue. */
+ u32 send_queue_depth;
+
+ /*
+ * Recv descriptor ring size in bytes, sufficient for user-provided
+ * number of WQEs
+ */
+ u32 recv_queue_ring_size;
+
+ /* Max number of WQEs that can be outstanding on recv queue */
+ u32 recv_queue_depth;
+};
+
+struct efa_admin_create_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain associated with this QP */
+ u16 pd;
+
+ /* QP type */
+ u8 qp_type;
+
+ /*
+ * 0 : sq_virt - If set, SQ ring base address is
+ * virtual (IOVA returned by MR registration)
+ * 1 : rq_virt - If set, RQ ring base address is
+ * virtual (IOVA returned by MR registration)
+ * 7:2 : reserved - MBZ
+ */
+ u8 flags;
+
+ /*
+ * Send queue (SQ) ring base physical address. This field is not
+ * used if this is a Low Latency Queue(LLQ).
+ */
+ u64 sq_base_addr;
+
+ /* Receive queue (RQ) ring base address. */
+ u64 rq_base_addr;
+
+ /* Index of CQ to be associated with Send Queue completions */
+ u32 send_cq_idx;
+
+ /* Index of CQ to be associated with Recv Queue completions */
+ u32 recv_cq_idx;
+
+ /*
+ * Memory registration key for the SQ ring, used only when not in
+ * LLQ mode and base address is virtual
+ */
+ u32 sq_l_key;
+
+ /*
+ * Memory registration key for the RQ ring, used only when base
+ * address is virtual
+ */
+ u32 rq_l_key;
+
+ /* Requested QP allocation sizes */
+ struct efa_admin_qp_alloc_size qp_alloc_size;
+
+ /* UAR number */
+ u16 uar;
+
+ /* MBZ */
+ u16 reserved;
+
+ /* MBZ */
+ u32 reserved2;
+};
+
+struct efa_admin_create_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* Opaque handle to be used for consequent operations on the QP */
+ u32 qp_handle;
+
+ /* QP number in the given EFA virtual device */
+ u16 qp_num;
+
+ /* MBZ */
+ u16 reserved;
+
+ /* Index of sub-CQ for Send Queue completions */
+ u16 send_sub_cq_idx;
+
+ /* Index of sub-CQ for Receive Queue completions */
+ u16 recv_sub_cq_idx;
+
+ /* SQ doorbell address, as offset to PCIe DB BAR */
+ u32 sq_db_offset;
+
+ /* RQ doorbell address, as offset to PCIe DB BAR */
+ u32 rq_db_offset;
+
+ /*
+ * low latency send queue ring base address as an offset to PCIe
+ * MMIO LLQ_MEM BAR
+ */
+ u32 llq_descriptors_offset;
+};
+
+struct efa_admin_modify_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /*
+ * Mask indicating which fields should be updated see enum
+ * efa_admin_modify_qp_mask_bits
+ */
+ u32 modify_mask;
+
+ /* QP handle returned by create_qp command */
+ u32 qp_handle;
+
+ /* QP state */
+ u32 qp_state;
+
+ /* Override current QP state (before applying the transition) */
+ u32 cur_qp_state;
+
+ /* QKey */
+ u32 qkey;
+
+ /* SQ PSN */
+ u32 sq_psn;
+
+ /* Enable async notification when SQ is drained */
+ u8 sq_drained_async_notify;
+
+ /* MBZ */
+ u8 reserved1;
+
+ /* MBZ */
+ u16 reserved2;
+};
+
+struct efa_admin_modify_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_query_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* QP handle returned by create_qp command */
+ u32 qp_handle;
+};
+
+struct efa_admin_query_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* QP state */
+ u32 qp_state;
+
+ /* QKey */
+ u32 qkey;
+
+ /* SQ PSN */
+ u32 sq_psn;
+
+ /* Indicates that draining is in progress */
+ u8 sq_draining;
+
+ /* MBZ */
+ u8 reserved1;
+
+ /* MBZ */
+ u16 reserved2;
+};
+
+struct efa_admin_destroy_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* QP handle returned by create_qp command */
+ u32 qp_handle;
+};
+
+struct efa_admin_destroy_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * Create Address Handle command parameters. Must not be called more than
+ * once for the same destination
+ */
+struct efa_admin_create_ah_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Destination address in network byte order */
+ u8 dest_addr[16];
+
+ /* PD number */
+ u16 pd;
+
+ u16 reserved;
+};
+
+struct efa_admin_create_ah_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* Target interface address handle (opaque) */
+ u16 ah;
+
+ u16 reserved;
+};
+
+struct efa_admin_destroy_ah_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Target interface address handle (opaque) */
+ u16 ah;
+
+ /* PD number */
+ u16 pd;
+};
+
+struct efa_admin_destroy_ah_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * Registration of MemoryRegion, required for QP working with Virtual
+ * Addresses. In standard verbs semantics, region length is limited to 2GB
+ * space, but EFA offers larger MR support for large memory space, to ease
+ * on users working with very large datasets (i.e. full GPU memory mapping).
+ */
+struct efa_admin_reg_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved16_w1;
+
+ /* Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of guest-physical page addresses of user
+ * memory pages (optimization for short region
+ * registrations)
+ */
+ u64 inline_pbl_array[4];
+
+ /* points to PBL (direct or indirect, chained if needed) */
+ struct efa_admin_ctrl_buff_info pbl;
+ } pbl;
+
+ /* Memory region length, in bytes. */
+ u64 mr_length;
+
+ /*
+ * flags and page size
+ * 4:0 : phys_page_size_shift - page size is (1 <<
+ * phys_page_size_shift). Page size is used for
+ * building the Virtual to Physical address mapping
+ * 6:5 : reserved - MBZ
+ * 7 : mem_addr_phy_mode_en - Enable bit for physical
+ * memory registration (no translation), can be used
+ * only by privileged clients. If set, PBL must
+ * contain a single entry.
+ */
+ u8 flags;
+
+ /*
+ * permissions
+ * 0 : local_write_enable - Write permissions: value
+ * of 1 needed for RQ buffers and for RDMA write
+ * 7:1 : reserved1 - remote access flags, etc
+ */
+ u8 permissions;
+
+ u16 reserved16_w5;
+
+ /* number of pages in PBL (redundant, could be calculated) */
+ u32 page_num;
+
+ /*
+ * IO Virtual Address associated with this MR. If
+ * mem_addr_phy_mode_en is set, contains the physical address of
+ * the region.
+ */
+ u64 iova;
+};
+
+struct efa_admin_reg_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /*
+ * L_Key, to be used in conjunction with local buffer references in
+ * SQ and RQ WQE, or with virtual RQ/CQ rings
+ */
+ u32 l_key;
+
+ /*
+ * R_Key, to be used in RDMA messages to refer to remotely accessed
+ * memory region
+ */
+ u32 r_key;
+};
+
+struct efa_admin_dereg_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* L_Key, memory region's l_key */
+ u32 l_key;
+};
+
+struct efa_admin_dereg_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_create_cq_cmd {
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /*
+ * 4:0 : reserved5
+ * 5 : interrupt_mode_enabled - if set, cq operates
+ * in interrupt mode (i.e. CQ events and MSI-X are
+ * generated), otherwise - polling
+ * 6 : virt - If set, ring base address is virtual
+ * (IOVA returned by MR registration)
+ * 7 : reserved6
+ */
+ u8 cq_caps_1;
+
+ /*
+ * 4:0 : cq_entry_size_words - size of CQ entry in
+ * 32-bit words, valid values: 4, 8.
+ * 7:5 : reserved7
+ */
+ u8 cq_caps_2;
+
+ /* completion queue depth in # of entries. must be power of 2 */
+ u16 cq_depth;
+
+ /* msix vector assigned to this cq */
+ u32 msix_vector_idx;
+
+ /*
+ * CQ ring base address, virtual or physical depending on 'virt'
+ * flag
+ */
+ struct efa_common_mem_addr cq_ba;
+
+ /*
+ * Memory registration key for the ring, used only when base
+ * address is virtual
+ */
+ u32 l_key;
+
+ /*
+ * number of sub cqs - must be equal to sub_cqs_per_cq of queue
+ * attributes.
+ */
+ u16 num_sub_cqs;
+
+ /* UAR number */
+ u16 uar;
+};
+
+struct efa_admin_create_cq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ u16 cq_idx;
+
+ /* actual cq depth in number of entries */
+ u16 cq_actual_depth;
+};
+
+struct efa_admin_destroy_cq_cmd {
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ u16 cq_idx;
+
+ u16 reserved1;
+};
+
+struct efa_admin_destroy_cq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * EFA AQ Get Statistics command. Extended statistics are placed in control
+ * buffer pointed by AQ entry
+ */
+struct efa_admin_aq_get_stats_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ union {
+ /* command specific inline data */
+ u32 inline_data_w1[3];
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+ } u;
+
+ /* stats type as defined in enum efa_admin_get_stats_type */
+ u8 type;
+
+ /* stats scope defined in enum efa_admin_get_stats_scope */
+ u8 scope;
+
+ u16 scope_modifier;
+};
+
+struct efa_admin_basic_stats {
+ u64 tx_bytes;
+
+ u64 tx_pkts;
+
+ u64 rx_bytes;
+
+ u64 rx_pkts;
+
+ u64 rx_drops;
+};
+
+struct efa_admin_acq_get_stats_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ struct efa_admin_basic_stats basic_stats;
+};
+
+struct efa_admin_get_set_feature_common_desc {
+ /*
+ * 1:0 : select - 0x1 - current value; 0x3 - default
+ * value
+ * 7:3 : reserved3
+ */
+ u8 flags;
+
+ /* as appears in efa_admin_aq_feature_id */
+ u8 feature_id;
+
+ /* MBZ */
+ u16 reserved16;
+};
+
+struct efa_admin_feature_device_attr_desc {
+ /* Bitmap of efa_admin_aq_feature_id */
+ u64 supported_features;
+
+ /* Bitmap of supported page sizes in MR registrations */
+ u64 page_size_cap;
+
+ u32 fw_version;
+
+ u32 admin_api_version;
+
+ u32 device_version;
+
+ /* Bar used for SQ and RQ doorbells */
+ u16 db_bar;
+
+ /* Indicates how many bits are used physical address access */
+ u8 phys_addr_width;
+
+ /* Indicates how many bits are used virtual address access */
+ u8 virt_addr_width;
+};
+
+struct efa_admin_feature_queue_attr_desc {
+ /* The maximum number of queue pairs supported */
+ u32 max_qp;
+
+ u32 max_sq_depth;
+
+ /* max send wr used in inline-buf */
+ u32 inline_buf_size;
+
+ u32 max_rq_depth;
+
+ /* The maximum number of completion queues supported per VF */
+ u32 max_cq;
+
+ u32 max_cq_depth;
+
+ /* Number of sub-CQs to be created for each CQ */
+ u16 sub_cqs_per_cq;
+
+ u16 reserved;
+
+ /*
+ * Maximum number of SGEs (buffs) allowed for a single send work
+ * queue element (WQE)
+ */
+ u16 max_wr_send_sges;
+
+ /* Maximum number of SGEs allowed for a single recv WQE */
+ u16 max_wr_recv_sges;
+
+ /* The maximum number of memory regions supported */
+ u32 max_mr;
+
+ /* The maximum number of pages can be registered */
+ u32 max_mr_pages;
+
+ /* The maximum number of protection domains supported */
+ u32 max_pd;
+
+ /* The maximum number of address handles supported */
+ u32 max_ah;
+
+ /* The maximum size of LLQ in bytes */
+ u32 max_llq_size;
+};
+
+struct efa_admin_feature_aenq_desc {
+ /* bitmask for AENQ groups the device can report */
+ u32 supported_groups;
+
+ /* bitmask for AENQ groups to report */
+ u32 enabled_groups;
+};
+
+struct efa_admin_feature_network_attr_desc {
+ /* Raw address data in network byte order */
+ u8 addr[16];
+
+ u32 mtu;
+};
+
+/*
+ * When hint value is 0, hints capabilities are not supported or driver
+ * should use its own predefined value
+ */
+struct efa_admin_hw_hints {
+ /* value in ms */
+ u16 mmio_read_timeout;
+
+ /* value in ms */
+ u16 driver_watchdog_timeout;
+
+ /* value in ms */
+ u16 admin_completion_timeout;
+
+ /* poll interval in ms */
+ u16 poll_interval;
+};
+
+struct efa_admin_get_feature_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+
+ struct efa_admin_get_set_feature_common_desc feature_common;
+
+ u32 raw[11];
+};
+
+struct efa_admin_get_feature_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ union {
+ u32 raw[14];
+
+ struct efa_admin_feature_device_attr_desc device_attr;
+
+ struct efa_admin_feature_aenq_desc aenq;
+
+ struct efa_admin_feature_network_attr_desc network_attr;
+
+ struct efa_admin_feature_queue_attr_desc queue_attr;
+
+ struct efa_admin_hw_hints hw_hints;
+ } u;
+};
+
+struct efa_admin_set_feature_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+
+ struct efa_admin_get_set_feature_common_desc feature_common;
+
+ union {
+ u32 raw[11];
+
+ /* AENQ configuration */
+ struct efa_admin_feature_aenq_desc aenq;
+ } u;
+};
+
+struct efa_admin_set_feature_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ union {
+ u32 raw[14];
+ } u;
+};
+
+struct efa_admin_alloc_pd_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+};
+
+struct efa_admin_alloc_pd_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* PD number */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_pd_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* PD number */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_pd_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_alloc_uar_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+};
+
+struct efa_admin_alloc_uar_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* UAR number */
+ u16 uar;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_uar_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* UAR number */
+ u16 uar;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_uar_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/* asynchronous event notification groups */
+enum efa_admin_aenq_group {
+ EFA_ADMIN_FATAL_ERROR = 1,
+ EFA_ADMIN_WARNING = 2,
+ EFA_ADMIN_NOTIFICATION = 3,
+ EFA_ADMIN_KEEP_ALIVE = 4,
+ EFA_ADMIN_AENQ_GROUPS_NUM = 5,
+};
+
+enum efa_admin_aenq_notification_syndrom {
+ EFA_ADMIN_SUSPEND = 0,
+ EFA_ADMIN_RESUME = 1,
+ EFA_ADMIN_UPDATE_HINTS = 2,
+};
+
+struct efa_admin_mmio_req_read_less_resp {
+ u16 req_id;
+
+ u16 reg_off;
+
+ /* value is valid when poll is cleared */
+ u32 reg_val;
+};
+
+/* create_qp_cmd */
+#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
+#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT 1
+#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
+
+/* reg_mr_cmd */
+#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
+#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7
+#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7)
+#define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0)
+
+/* create_cq_cmd */
+#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
+#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
+#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT 6
+#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6)
+#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+
+/* get_set_feature_common_desc */
+#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+
+#endif /* _EFA_ADMIN_CMDS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
new file mode 100644
index 000000000000..c8e0c8b905be
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_ADMIN_H_
+#define _EFA_ADMIN_H_
+
+enum efa_admin_aq_completion_status {
+ EFA_ADMIN_SUCCESS = 0,
+ EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1,
+ EFA_ADMIN_BAD_OPCODE = 2,
+ EFA_ADMIN_UNSUPPORTED_OPCODE = 3,
+ EFA_ADMIN_MALFORMED_REQUEST = 4,
+ /* Additional status is provided in ACQ entry extended_status */
+ EFA_ADMIN_ILLEGAL_PARAMETER = 5,
+ EFA_ADMIN_UNKNOWN_ERROR = 6,
+ EFA_ADMIN_RESOURCE_BUSY = 7,
+};
+
+struct efa_admin_aq_common_desc {
+ /*
+ * 11:0 : command_id
+ * 15:12 : reserved12
+ */
+ u16 command_id;
+
+ /* as appears in efa_admin_aq_opcode */
+ u8 opcode;
+
+ /*
+ * 0 : phase
+ * 1 : ctrl_data - control buffer address valid
+ * 2 : ctrl_data_indirect - control buffer address
+ * points to list of pages with addresses of control
+ * buffers
+ * 7:3 : reserved3
+ */
+ u8 flags;
+};
+
+/*
+ * used in efa_admin_aq_entry. Can point directly to control data, or to a
+ * page list chunk. Used also at the end of indirect mode page list chunks,
+ * for chaining.
+ */
+struct efa_admin_ctrl_buff_info {
+ u32 length;
+
+ struct efa_common_mem_addr address;
+};
+
+struct efa_admin_aq_entry {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ union {
+ u32 inline_data_w1[3];
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+ } u;
+
+ u32 inline_data_w4[12];
+};
+
+struct efa_admin_acq_common_desc {
+ /*
+ * command identifier to associate it with the aq descriptor
+ * 11:0 : command_id
+ * 15:12 : reserved12
+ */
+ u16 command;
+
+ u8 status;
+
+ /*
+ * 0 : phase
+ * 7:1 : reserved1
+ */
+ u8 flags;
+
+ u16 extended_status;
+
+ /*
+ * indicates to the driver which AQ entry has been consumed by the
+ * device and could be reused
+ */
+ u16 sq_head_indx;
+};
+
+struct efa_admin_acq_entry {
+ struct efa_admin_acq_common_desc acq_common_descriptor;
+
+ u32 response_specific_data[14];
+};
+
+struct efa_admin_aenq_common_desc {
+ u16 group;
+
+ u16 syndrom;
+
+ /*
+ * 0 : phase
+ * 7:1 : reserved - MBZ
+ */
+ u8 flags;
+
+ u8 reserved1[3];
+
+ u32 timestamp_low;
+
+ u32 timestamp_high;
+};
+
+struct efa_admin_aenq_entry {
+ struct efa_admin_aenq_common_desc aenq_common_desc;
+
+ /* command specific inline data */
+ u32 inline_data_w4[12];
+};
+
+/* aq_common_desc */
+#define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+
+/* acq_common_desc */
+#define EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aenq_common_desc */
+#define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+#endif /* _EFA_ADMIN_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
new file mode 100644
index 000000000000..a5c788741a04
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -0,0 +1,1160 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "efa_com.h"
+#include "efa_regs_defs.h"
+
+#define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
+
+#define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
+#define EFA_MMIO_READ_INVALID 0xffffffff
+
+#define EFA_POLL_INTERVAL_MS 100 /* msecs */
+
+#define EFA_ASYNC_QUEUE_DEPTH 16
+#define EFA_ADMIN_QUEUE_DEPTH 32
+
+#define MIN_EFA_VER\
+ ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
+ (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
+
+#define EFA_CTRL_MAJOR 0
+#define EFA_CTRL_MINOR 0
+#define EFA_CTRL_SUB_MINOR 1
+
+#define MIN_EFA_CTRL_VER \
+ (((EFA_CTRL_MAJOR) << \
+ (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
+ ((EFA_CTRL_MINOR) << \
+ (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
+ (EFA_CTRL_SUB_MINOR))
+
+#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x)))
+#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32))
+
+#define EFA_REGS_ADMIN_INTR_MASK 1
+
+enum efa_cmd_status {
+ EFA_CMD_SUBMITTED,
+ EFA_CMD_COMPLETED,
+ /* Abort - canceled by the driver */
+ EFA_CMD_ABORTED,
+};
+
+struct efa_comp_ctx {
+ struct completion wait_event;
+ struct efa_admin_acq_entry *user_cqe;
+ u32 comp_size;
+ enum efa_cmd_status status;
+ /* status from the device */
+ u8 comp_status;
+ u8 cmd_opcode;
+ u8 occupied;
+};
+
+static const char *efa_com_cmd_str(u8 cmd)
+{
+#define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
+
+ switch (cmd) {
+ EFA_CMD_STR_CASE(CREATE_QP);
+ EFA_CMD_STR_CASE(MODIFY_QP);
+ EFA_CMD_STR_CASE(QUERY_QP);
+ EFA_CMD_STR_CASE(DESTROY_QP);
+ EFA_CMD_STR_CASE(CREATE_AH);
+ EFA_CMD_STR_CASE(DESTROY_AH);
+ EFA_CMD_STR_CASE(REG_MR);
+ EFA_CMD_STR_CASE(DEREG_MR);
+ EFA_CMD_STR_CASE(CREATE_CQ);
+ EFA_CMD_STR_CASE(DESTROY_CQ);
+ EFA_CMD_STR_CASE(GET_FEATURE);
+ EFA_CMD_STR_CASE(SET_FEATURE);
+ EFA_CMD_STR_CASE(GET_STATS);
+ EFA_CMD_STR_CASE(ALLOC_PD);
+ EFA_CMD_STR_CASE(DEALLOC_PD);
+ EFA_CMD_STR_CASE(ALLOC_UAR);
+ EFA_CMD_STR_CASE(DEALLOC_UAR);
+ default: return "unknown command opcode";
+ }
+#undef EFA_CMD_STR_CASE
+}
+
+static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+ struct efa_admin_mmio_req_read_less_resp *read_resp;
+ unsigned long exp_time;
+ u32 mmio_read_reg;
+ u32 err;
+
+ read_resp = mmio_read->read_resp;
+
+ spin_lock(&mmio_read->lock);
+ mmio_read->seq_num++;
+
+ /* trash DMA req_id to identify when hardware is done */
+ read_resp->req_id = mmio_read->seq_num + 0x9aL;
+ mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
+ EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
+ mmio_read_reg |= mmio_read->seq_num &
+ EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+
+ writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
+
+ exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
+ do {
+ if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
+ break;
+ udelay(1);
+ } while (time_is_after_jiffies(exp_time));
+
+ if (read_resp->req_id != mmio_read->seq_num) {
+ ibdev_err(edev->efa_dev,
+ "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
+ mmio_read->seq_num, offset, read_resp->req_id,
+ read_resp->reg_off);
+ err = EFA_MMIO_READ_INVALID;
+ goto out;
+ }
+
+ if (read_resp->reg_off != offset) {
+ ibdev_err(edev->efa_dev,
+ "Reading register failed: wrong offset provided\n");
+ err = EFA_MMIO_READ_INVALID;
+ goto out;
+ }
+
+ err = read_resp->reg_val;
+out:
+ spin_unlock(&mmio_read->lock);
+ return err;
+}
+
+static int efa_com_admin_init_sq(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_com_admin_sq *sq = &aq->sq;
+ u16 size = aq->depth * sizeof(*sq->entries);
+ u32 addr_high;
+ u32 addr_low;
+ u32 aq_caps;
+
+ sq->entries =
+ dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
+ if (!sq->entries)
+ return -ENOMEM;
+
+ spin_lock_init(&sq->lock);
+
+ sq->cc = 0;
+ sq->pc = 0;
+ sq->phase = 1;
+
+ sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
+
+ addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
+ addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
+
+ writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
+ writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
+
+ aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
+ aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
+ EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
+ EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+
+ writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
+
+ return 0;
+}
+
+static int efa_com_admin_init_cq(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_com_admin_cq *cq = &aq->cq;
+ u16 size = aq->depth * sizeof(*cq->entries);
+ u32 addr_high;
+ u32 addr_low;
+ u32 acq_caps;
+
+ cq->entries =
+ dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
+ if (!cq->entries)
+ return -ENOMEM;
+
+ spin_lock_init(&cq->lock);
+
+ cq->cc = 0;
+ cq->phase = 1;
+
+ addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
+ addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
+
+ writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
+ writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
+
+ acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
+ acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
+ EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
+ EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
+ acq_caps |= (aq->msix_vector_idx <<
+ EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
+ EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
+
+ writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
+
+ return 0;
+}
+
+static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
+ struct efa_aenq_handlers *aenq_handlers)
+{
+ struct efa_com_aenq *aenq = &edev->aenq;
+ u32 addr_low, addr_high, aenq_caps;
+ u16 size;
+
+ if (!aenq_handlers) {
+ ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
+ return -EINVAL;
+ }
+
+ size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
+ aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
+ GFP_KERNEL);
+ if (!aenq->entries)
+ return -ENOMEM;
+
+ aenq->aenq_handlers = aenq_handlers;
+ aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
+ aenq->cc = 0;
+ aenq->phase = 1;
+
+ addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
+ addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+
+ writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
+ writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
+
+ aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
+ aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
+ EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
+ EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
+ aenq_caps |= (aenq->msix_vector_idx
+ << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
+ EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
+ writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
+
+ /*
+ * Init cons_db to mark that all entries in the queue
+ * are initially available
+ */
+ writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
+
+ return 0;
+}
+
+/* ID to be used with efa_com_get_comp_ctx */
+static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
+{
+ u16 ctx_id;
+
+ spin_lock(&aq->comp_ctx_lock);
+ ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
+ aq->comp_ctx_pool_next++;
+ spin_unlock(&aq->comp_ctx_lock);
+
+ return ctx_id;
+}
+
+static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
+ u16 ctx_id)
+{
+ spin_lock(&aq->comp_ctx_lock);
+ aq->comp_ctx_pool_next--;
+ aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
+ spin_unlock(&aq->comp_ctx_lock);
+}
+
+static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
+ struct efa_comp_ctx *comp_ctx)
+{
+ u16 comp_id = comp_ctx->user_cqe->acq_common_descriptor.command &
+ EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+
+ ibdev_dbg(aq->efa_dev, "Putting completion command_id %d\n", comp_id);
+ comp_ctx->occupied = 0;
+ efa_com_dealloc_ctx_id(aq, comp_id);
+}
+
+static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
+ u16 command_id, bool capture)
+{
+ if (command_id >= aq->depth) {
+ ibdev_err(aq->efa_dev,
+ "command id is larger than the queue size. cmd_id: %u queue size %d\n",
+ command_id, aq->depth);
+ return NULL;
+ }
+
+ if (aq->comp_ctx[command_id].occupied && capture) {
+ ibdev_err(aq->efa_dev, "Completion context is occupied\n");
+ return NULL;
+ }
+
+ if (capture) {
+ aq->comp_ctx[command_id].occupied = 1;
+ ibdev_dbg(aq->efa_dev, "Taking completion ctxt command_id %d\n",
+ command_id);
+ }
+
+ return &aq->comp_ctx[command_id];
+}
+
+static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
+{
+ struct efa_comp_ctx *comp_ctx;
+ u16 queue_size_mask;
+ u16 ctx_id;
+ u16 pi;
+
+ queue_size_mask = aq->depth - 1;
+ pi = aq->sq.pc & queue_size_mask;
+
+ ctx_id = efa_com_alloc_ctx_id(aq);
+
+ cmd->aq_common_descriptor.flags |= aq->sq.phase &
+ EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+
+ cmd->aq_common_descriptor.command_id |= ctx_id &
+ EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+ comp_ctx = efa_com_get_comp_ctx(aq, ctx_id, true);
+ if (!comp_ctx) {
+ efa_com_dealloc_ctx_id(aq, ctx_id);
+ return ERR_PTR(-EINVAL);
+ }
+
+ comp_ctx->status = EFA_CMD_SUBMITTED;
+ comp_ctx->comp_size = comp_size_in_bytes;
+ comp_ctx->user_cqe = comp;
+ comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+
+ reinit_completion(&comp_ctx->wait_event);
+
+ memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes);
+
+ aq->sq.pc++;
+ atomic64_inc(&aq->stats.submitted_cmd);
+
+ if ((aq->sq.pc & queue_size_mask) == 0)
+ aq->sq.phase = !aq->sq.phase;
+
+ /* barrier not needed in case of writel */
+ writel(aq->sq.pc, aq->sq.db_addr);
+
+ return comp_ctx;
+}
+
+static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
+{
+ size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
+ size_t size = aq->depth * sizeof(struct efa_comp_ctx);
+ struct efa_comp_ctx *comp_ctx;
+ u16 i;
+
+ aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
+ aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
+ if (!aq->comp_ctx || !aq->comp_ctx_pool) {
+ devm_kfree(aq->dmadev, aq->comp_ctx_pool);
+ devm_kfree(aq->dmadev, aq->comp_ctx);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < aq->depth; i++) {
+ comp_ctx = efa_com_get_comp_ctx(aq, i, false);
+ if (comp_ctx)
+ init_completion(&comp_ctx->wait_event);
+
+ aq->comp_ctx_pool[i] = i;
+ }
+
+ spin_lock_init(&aq->comp_ctx_lock);
+
+ aq->comp_ctx_pool_next = 0;
+
+ return 0;
+}
+
+static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
+{
+ struct efa_comp_ctx *comp_ctx;
+
+ spin_lock(&aq->sq.lock);
+ if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
+ ibdev_err(aq->efa_dev, "Admin queue is closed\n");
+ spin_unlock(&aq->sq.lock);
+ return ERR_PTR(-ENODEV);
+ }
+
+ comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
+ comp_size_in_bytes);
+ spin_unlock(&aq->sq.lock);
+ if (IS_ERR(comp_ctx))
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+ return comp_ctx;
+}
+
+static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
+ struct efa_admin_acq_entry *cqe)
+{
+ struct efa_comp_ctx *comp_ctx;
+ u16 cmd_id;
+
+ cmd_id = cqe->acq_common_descriptor.command &
+ EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+
+ comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
+ if (!comp_ctx) {
+ ibdev_err(aq->efa_dev,
+ "comp_ctx is NULL. Changing the admin queue running state\n");
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+ return;
+ }
+
+ comp_ctx->status = EFA_CMD_COMPLETED;
+ comp_ctx->comp_status = cqe->acq_common_descriptor.status;
+ if (comp_ctx->user_cqe)
+ memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
+
+ if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
+ complete(&comp_ctx->wait_event);
+}
+
+static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
+{
+ struct efa_admin_acq_entry *cqe;
+ u16 queue_size_mask;
+ u16 comp_num = 0;
+ u8 phase;
+ u16 ci;
+
+ queue_size_mask = aq->depth - 1;
+
+ ci = aq->cq.cc & queue_size_mask;
+ phase = aq->cq.phase;
+
+ cqe = &aq->cq.entries[ci];
+
+ /* Go over all the completions */
+ while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
+ EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+ /*
+ * Do not read the rest of the completion entry before the
+ * phase bit was validated
+ */
+ dma_rmb();
+ efa_com_handle_single_admin_completion(aq, cqe);
+
+ ci++;
+ comp_num++;
+ if (ci == aq->depth) {
+ ci = 0;
+ phase = !phase;
+ }
+
+ cqe = &aq->cq.entries[ci];
+ }
+
+ aq->cq.cc += comp_num;
+ aq->cq.phase = phase;
+ aq->sq.cc += comp_num;
+ atomic64_add(comp_num, &aq->stats.completed_cmd);
+}
+
+static int efa_com_comp_status_to_errno(u8 comp_status)
+{
+ switch (comp_status) {
+ case EFA_ADMIN_SUCCESS:
+ return 0;
+ case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
+ return -ENOMEM;
+ case EFA_ADMIN_UNSUPPORTED_OPCODE:
+ return -EOPNOTSUPP;
+ case EFA_ADMIN_BAD_OPCODE:
+ case EFA_ADMIN_MALFORMED_REQUEST:
+ case EFA_ADMIN_ILLEGAL_PARAMETER:
+ case EFA_ADMIN_UNKNOWN_ERROR:
+ return -EINVAL;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
+ struct efa_com_admin_queue *aq)
+{
+ unsigned long timeout;
+ unsigned long flags;
+ int err;
+
+ timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
+
+ while (1) {
+ spin_lock_irqsave(&aq->cq.lock, flags);
+ efa_com_handle_admin_completion(aq);
+ spin_unlock_irqrestore(&aq->cq.lock, flags);
+
+ if (comp_ctx->status != EFA_CMD_SUBMITTED)
+ break;
+
+ if (time_is_before_jiffies(timeout)) {
+ ibdev_err(aq->efa_dev,
+ "Wait for completion (polling) timeout\n");
+ /* EFA didn't have any completion */
+ atomic64_inc(&aq->stats.no_completion);
+
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+ err = -ETIME;
+ goto out;
+ }
+
+ msleep(aq->poll_interval);
+ }
+
+ if (comp_ctx->status == EFA_CMD_ABORTED) {
+ ibdev_err(aq->efa_dev, "Command was aborted\n");
+ atomic64_inc(&aq->stats.aborted_cmd);
+ err = -ENODEV;
+ goto out;
+ }
+
+ WARN_ONCE(comp_ctx->status != EFA_CMD_COMPLETED,
+ "Invalid completion status %d\n", comp_ctx->status);
+
+ err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+out:
+ efa_com_put_comp_ctx(aq, comp_ctx);
+ return err;
+}
+
+static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
+ struct efa_com_admin_queue *aq)
+{
+ unsigned long flags;
+ int err;
+
+ wait_for_completion_timeout(&comp_ctx->wait_event,
+ usecs_to_jiffies(aq->completion_timeout));
+
+ /*
+ * In case the command wasn't completed find out the root cause.
+ * There might be 2 kinds of errors
+ * 1) No completion (timeout reached)
+ * 2) There is completion but the device didn't get any msi-x interrupt.
+ */
+ if (comp_ctx->status == EFA_CMD_SUBMITTED) {
+ spin_lock_irqsave(&aq->cq.lock, flags);
+ efa_com_handle_admin_completion(aq);
+ spin_unlock_irqrestore(&aq->cq.lock, flags);
+
+ atomic64_inc(&aq->stats.no_completion);
+
+ if (comp_ctx->status == EFA_CMD_COMPLETED)
+ ibdev_err(aq->efa_dev,
+ "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ else
+ ibdev_err(aq->efa_dev,
+ "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+ err = -ETIME;
+ goto out;
+ }
+
+ err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+out:
+ efa_com_put_comp_ctx(aq, comp_ctx);
+ return err;
+}
+
+/*
+ * There are two types to wait for completion.
+ * Polling mode - wait until the completion is available.
+ * Async mode - wait on wait queue until the completion is ready
+ * (or the timeout expired).
+ * It is expected that the IRQ called efa_com_handle_admin_completion
+ * to mark the completions.
+ */
+static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
+ struct efa_com_admin_queue *aq)
+{
+ if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
+ return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
+
+ return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
+}
+
+/**
+ * efa_com_cmd_exec - Execute admin command
+ * @aq: admin queue.
+ * @cmd: the admin command to execute.
+ * @cmd_size: the command size.
+ * @comp: command completion return entry.
+ * @comp_size: command completion size.
+ * Submit an admin command and then wait until the device will return a
+ * completion.
+ * The completion will be copied into comp.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size)
+{
+ struct efa_comp_ctx *comp_ctx;
+ int err;
+
+ might_sleep();
+
+ /* In case of queue FULL */
+ down(&aq->avail_cmds);
+
+ ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode);
+ comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
+ if (IS_ERR(comp_ctx)) {
+ ibdev_err(aq->efa_dev,
+ "Failed to submit command %s (opcode %u) err %ld\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
+
+ up(&aq->avail_cmds);
+ return PTR_ERR(comp_ctx);
+ }
+
+ err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
+ if (err)
+ ibdev_err(aq->efa_dev,
+ "Failed to process command %s (opcode %u) comp_status %d err %d\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode,
+ comp_ctx->comp_status, err);
+
+ up(&aq->avail_cmds);
+
+ return err;
+}
+
+/**
+ * efa_com_abort_admin_commands - Abort all the outstanding admin commands.
+ * @edev: EFA communication layer struct
+ *
+ * This method aborts all the outstanding admin commands.
+ * The caller should then call efa_com_wait_for_abort_completion to make sure
+ * all the commands were completed.
+ */
+static void efa_com_abort_admin_commands(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_comp_ctx *comp_ctx;
+ unsigned long flags;
+ u16 i;
+
+ spin_lock(&aq->sq.lock);
+ spin_lock_irqsave(&aq->cq.lock, flags);
+ for (i = 0; i < aq->depth; i++) {
+ comp_ctx = efa_com_get_comp_ctx(aq, i, false);
+ if (!comp_ctx)
+ break;
+
+ comp_ctx->status = EFA_CMD_ABORTED;
+
+ complete(&comp_ctx->wait_event);
+ }
+ spin_unlock_irqrestore(&aq->cq.lock, flags);
+ spin_unlock(&aq->sq.lock);
+}
+
+/**
+ * efa_com_wait_for_abort_completion - Wait for admin commands abort.
+ * @edev: EFA communication layer struct
+ *
+ * This method wait until all the outstanding admin commands will be completed.
+ */
+static void efa_com_wait_for_abort_completion(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int i;
+
+ /* all mine */
+ for (i = 0; i < aq->depth; i++)
+ down(&aq->avail_cmds);
+
+ /* let it go */
+ for (i = 0; i < aq->depth; i++)
+ up(&aq->avail_cmds);
+}
+
+static void efa_com_admin_flush(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+ efa_com_abort_admin_commands(edev);
+ efa_com_wait_for_abort_completion(edev);
+}
+
+/**
+ * efa_com_admin_destroy - Destroy the admin and the async events queues.
+ * @edev: EFA communication layer struct
+ */
+void efa_com_admin_destroy(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_com_aenq *aenq = &edev->aenq;
+ struct efa_com_admin_cq *cq = &aq->cq;
+ struct efa_com_admin_sq *sq = &aq->sq;
+ u16 size;
+
+ efa_com_admin_flush(edev);
+
+ devm_kfree(edev->dmadev, aq->comp_ctx_pool);
+ devm_kfree(edev->dmadev, aq->comp_ctx);
+
+ size = aq->depth * sizeof(*sq->entries);
+ dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
+
+ size = aq->depth * sizeof(*cq->entries);
+ dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
+
+ size = aenq->depth * sizeof(*aenq->entries);
+ dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
+}
+
+/**
+ * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
+ * @edev: EFA communication layer struct
+ * @polling: Enable/Disable polling mode
+ *
+ * Set the admin completion mode.
+ */
+void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
+{
+ u32 mask_value = 0;
+
+ if (polling)
+ mask_value = EFA_REGS_ADMIN_INTR_MASK;
+
+ writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
+ if (polling)
+ set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
+ else
+ clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
+}
+
+static void efa_com_stats_init(struct efa_com_dev *edev)
+{
+ atomic64_t *s = (atomic64_t *)&edev->aq.stats;
+ int i;
+
+ for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
+ atomic64_set(s, 0);
+}
+
+/**
+ * efa_com_admin_init - Init the admin and the async queues
+ * @edev: EFA communication layer struct
+ * @aenq_handlers: Those handlers to be called upon event.
+ *
+ * Initialize the admin submission and completion queues.
+ * Initialize the asynchronous events notification queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_admin_init(struct efa_com_dev *edev,
+ struct efa_aenq_handlers *aenq_handlers)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ u32 timeout;
+ u32 dev_sts;
+ u32 cap;
+ int err;
+
+ dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+ if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
+ ibdev_err(edev->efa_dev,
+ "Device isn't ready, abort com init %#x\n", dev_sts);
+ return -ENODEV;
+ }
+
+ aq->depth = EFA_ADMIN_QUEUE_DEPTH;
+
+ aq->dmadev = edev->dmadev;
+ aq->efa_dev = edev->efa_dev;
+ set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
+
+ sema_init(&aq->avail_cmds, aq->depth);
+
+ efa_com_stats_init(edev);
+
+ err = efa_com_init_comp_ctxt(aq);
+ if (err)
+ return err;
+
+ err = efa_com_admin_init_sq(edev);
+ if (err)
+ goto err_destroy_comp_ctxt;
+
+ err = efa_com_admin_init_cq(edev);
+ if (err)
+ goto err_destroy_sq;
+
+ efa_com_set_admin_polling_mode(edev, false);
+
+ err = efa_com_admin_init_aenq(edev, aenq_handlers);
+ if (err)
+ goto err_destroy_cq;
+
+ cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+ timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
+ EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+ if (timeout)
+ /* the resolution of timeout reg is 100ms */
+ aq->completion_timeout = timeout * 100000;
+ else
+ aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
+ aq->poll_interval = EFA_POLL_INTERVAL_MS;
+
+ set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+ return 0;
+
+err_destroy_cq:
+ dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
+ aq->cq.entries, aq->cq.dma_addr);
+err_destroy_sq:
+ dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
+ aq->sq.entries, aq->sq.dma_addr);
+err_destroy_comp_ctxt:
+ devm_kfree(edev->dmadev, aq->comp_ctx);
+
+ return err;
+}
+
+/**
+ * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
+ * @edev: EFA communication layer struct
+ *
+ * This method goes over the admin completion queue and wakes up
+ * all the pending threads that wait on the commands wait event.
+ *
+ * @note: Should be called after MSI-X interrupt.
+ */
+void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&edev->aq.cq.lock, flags);
+ efa_com_handle_admin_completion(&edev->aq);
+ spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
+}
+
+/*
+ * efa_handle_specific_aenq_event:
+ * return the handler that is relevant to the specific event group
+ */
+static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
+ u16 group)
+{
+ struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
+
+ if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
+ return aenq_handlers->handlers[group];
+
+ return aenq_handlers->unimplemented_handler;
+}
+
+/**
+ * efa_com_aenq_intr_handler - AENQ interrupt handler
+ * @edev: EFA communication layer struct
+ * @data: Data of interrupt handler.
+ *
+ * Go over the async event notification queue and call the proper aenq handler.
+ */
+void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
+{
+ struct efa_admin_aenq_common_desc *aenq_common;
+ struct efa_com_aenq *aenq = &edev->aenq;
+ struct efa_admin_aenq_entry *aenq_e;
+ efa_aenq_handler handler_cb;
+ u32 processed = 0;
+ u8 phase;
+ u32 ci;
+
+ ci = aenq->cc & (aenq->depth - 1);
+ phase = aenq->phase;
+ aenq_e = &aenq->entries[ci]; /* Get first entry */
+ aenq_common = &aenq_e->aenq_common_desc;
+
+ /* Go over all the events */
+ while ((READ_ONCE(aenq_common->flags) &
+ EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
+ /*
+ * Do not read the rest of the completion entry before the
+ * phase bit was validated
+ */
+ dma_rmb();
+
+ /* Handle specific event*/
+ handler_cb = efa_com_get_specific_aenq_cb(edev,
+ aenq_common->group);
+ handler_cb(data, aenq_e); /* call the actual event handler*/
+
+ /* Get next event entry */
+ ci++;
+ processed++;
+
+ if (ci == aenq->depth) {
+ ci = 0;
+ phase = !phase;
+ }
+ aenq_e = &aenq->entries[ci];
+ aenq_common = &aenq_e->aenq_common_desc;
+ }
+
+ aenq->cc += processed;
+ aenq->phase = phase;
+
+ /* Don't update aenq doorbell if there weren't any processed events */
+ if (!processed)
+ return;
+
+ /* barrier not needed in case of writel */
+ writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
+}
+
+static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+ u32 addr_high;
+ u32 addr_low;
+
+ /* dma_addr_bits is unknown at this point */
+ addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
+ addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
+
+ writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
+ writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
+}
+
+int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+
+ spin_lock_init(&mmio_read->lock);
+ mmio_read->read_resp =
+ dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
+ &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+ if (!mmio_read->read_resp)
+ return -ENOMEM;
+
+ efa_com_mmio_reg_read_resp_addr_init(edev);
+
+ mmio_read->read_resp->req_id = 0;
+ mmio_read->seq_num = 0;
+ mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
+
+ return 0;
+}
+
+void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+
+ dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
+ mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+}
+
+int efa_com_validate_version(struct efa_com_dev *edev)
+{
+ u32 ctrl_ver_masked;
+ u32 ctrl_ver;
+ u32 ver;
+
+ /*
+ * Make sure the EFA version and the controller version are at least
+ * as the driver expects
+ */
+ ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
+ ctrl_ver = efa_com_reg_read32(edev,
+ EFA_REGS_CONTROLLER_VERSION_OFF);
+
+ ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
+ (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
+ EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
+ ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
+
+ if (ver < MIN_EFA_VER) {
+ ibdev_err(edev->efa_dev,
+ "EFA version is lower than the minimal version the driver supports\n");
+ return -EOPNOTSUPP;
+ }
+
+ ibdev_dbg(edev->efa_dev,
+ "efa controller version: %d.%d.%d implementation version %d\n",
+ (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
+ EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
+ (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
+ EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
+ (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
+ (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
+ EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+
+ ctrl_ver_masked =
+ (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
+ (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
+ (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
+
+ /* Validate the ctrl version without the implementation ID */
+ if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
+ ibdev_err(edev->efa_dev,
+ "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * efa_com_get_dma_width - Retrieve physical dma address width the device
+ * supports.
+ * @edev: EFA communication layer struct
+ *
+ * Retrieve the maximum physical address bits the device can handle.
+ *
+ * @return: > 0 on Success and negative value otherwise.
+ */
+int efa_com_get_dma_width(struct efa_com_dev *edev)
+{
+ u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+ int width;
+
+ width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
+ EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+
+ ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
+
+ if (width < 32 || width > 64) {
+ ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
+ return -EINVAL;
+ }
+
+ edev->dma_addr_bits = width;
+
+ return width;
+}
+
+static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
+ u16 exp_state)
+{
+ u32 val, i;
+
+ for (i = 0; i < timeout; i++) {
+ val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+
+ if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
+ exp_state)
+ return 0;
+
+ ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
+ msleep(EFA_POLL_INTERVAL_MS);
+ }
+
+ return -ETIME;
+}
+
+/**
+ * efa_com_dev_reset - Perform device FLR to the device.
+ * @edev: EFA communication layer struct
+ * @reset_reason: Specify what is the trigger for the reset in case of an error.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_dev_reset(struct efa_com_dev *edev,
+ enum efa_regs_reset_reason_types reset_reason)
+{
+ u32 stat, timeout, cap, reset_val;
+ int err;
+
+ stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+ cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+
+ if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
+ ibdev_err(edev->efa_dev,
+ "Device isn't ready, can't reset device\n");
+ return -EINVAL;
+ }
+
+ timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
+ EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+ if (!timeout) {
+ ibdev_err(edev->efa_dev, "Invalid timeout value\n");
+ return -EINVAL;
+ }
+
+ /* start reset */
+ reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
+ reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
+ EFA_REGS_DEV_CTL_RESET_REASON_MASK;
+ writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
+
+ /* reset clears the mmio readless address, restore it */
+ efa_com_mmio_reg_read_resp_addr_init(edev);
+
+ err = wait_for_reset_state(edev, timeout,
+ EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
+ return err;
+ }
+
+ /* reset done */
+ writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
+ err = wait_for_reset_state(edev, timeout, 0);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
+ return err;
+ }
+
+ timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
+ EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+ if (timeout)
+ /* the resolution of timeout reg is 100ms */
+ edev->aq.completion_timeout = timeout * 100000;
+ else
+ edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
new file mode 100644
index 000000000000..84d96724a74b
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COM_H_
+#define _EFA_COM_H_
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/semaphore.h>
+#include <linux/sched.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "efa_common_defs.h"
+#include "efa_admin_defs.h"
+#include "efa_admin_cmds_defs.h"
+#include "efa_regs_defs.h"
+
+#define EFA_MAX_HANDLERS 256
+
+struct efa_com_admin_cq {
+ struct efa_admin_acq_entry *entries;
+ dma_addr_t dma_addr;
+ spinlock_t lock; /* Protects ACQ */
+
+ u16 cc; /* consumer counter */
+ u8 phase;
+};
+
+struct efa_com_admin_sq {
+ struct efa_admin_aq_entry *entries;
+ dma_addr_t dma_addr;
+ spinlock_t lock; /* Protects ASQ */
+
+ u32 __iomem *db_addr;
+
+ u16 cc; /* consumer counter */
+ u16 pc; /* producer counter */
+ u8 phase;
+
+};
+
+/* Don't use anything other than atomic64 */
+struct efa_com_stats_admin {
+ atomic64_t aborted_cmd;
+ atomic64_t submitted_cmd;
+ atomic64_t completed_cmd;
+ atomic64_t no_completion;
+};
+
+enum {
+ EFA_AQ_STATE_RUNNING_BIT = 0,
+ EFA_AQ_STATE_POLLING_BIT = 1,
+};
+
+struct efa_com_admin_queue {
+ void *dmadev;
+ void *efa_dev;
+ struct efa_comp_ctx *comp_ctx;
+ u32 completion_timeout; /* usecs */
+ u16 poll_interval; /* msecs */
+ u16 depth;
+ struct efa_com_admin_cq cq;
+ struct efa_com_admin_sq sq;
+ u16 msix_vector_idx;
+
+ unsigned long state;
+
+ /* Count the number of available admin commands */
+ struct semaphore avail_cmds;
+
+ struct efa_com_stats_admin stats;
+
+ spinlock_t comp_ctx_lock; /* Protects completion context pool */
+ u32 *comp_ctx_pool;
+ u16 comp_ctx_pool_next;
+};
+
+struct efa_aenq_handlers;
+
+struct efa_com_aenq {
+ struct efa_admin_aenq_entry *entries;
+ struct efa_aenq_handlers *aenq_handlers;
+ dma_addr_t dma_addr;
+ u32 cc; /* consumer counter */
+ u16 msix_vector_idx;
+ u16 depth;
+ u8 phase;
+};
+
+struct efa_com_mmio_read {
+ struct efa_admin_mmio_req_read_less_resp *read_resp;
+ dma_addr_t read_resp_dma_addr;
+ u16 seq_num;
+ u16 mmio_read_timeout; /* usecs */
+ /* serializes mmio reads */
+ spinlock_t lock;
+};
+
+struct efa_com_dev {
+ struct efa_com_admin_queue aq;
+ struct efa_com_aenq aenq;
+ u8 __iomem *reg_bar;
+ void *dmadev;
+ void *efa_dev;
+ u32 supported_features;
+ u32 dma_addr_bits;
+
+ struct efa_com_mmio_read mmio_read;
+};
+
+typedef void (*efa_aenq_handler)(void *data,
+ struct efa_admin_aenq_entry *aenq_e);
+
+/* Holds aenq handlers. Indexed by AENQ event group */
+struct efa_aenq_handlers {
+ efa_aenq_handler handlers[EFA_MAX_HANDLERS];
+ efa_aenq_handler unimplemented_handler;
+};
+
+int efa_com_admin_init(struct efa_com_dev *edev,
+ struct efa_aenq_handlers *aenq_handlers);
+void efa_com_admin_destroy(struct efa_com_dev *edev);
+int efa_com_dev_reset(struct efa_com_dev *edev,
+ enum efa_regs_reset_reason_types reset_reason);
+void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling);
+void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev);
+int efa_com_mmio_reg_read_init(struct efa_com_dev *edev);
+void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev);
+
+int efa_com_validate_version(struct efa_com_dev *edev);
+int efa_com_get_dma_width(struct efa_com_dev *edev);
+
+int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size);
+void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data);
+
+#endif /* _EFA_COM_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
new file mode 100644
index 000000000000..14227725521c
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "efa.h"
+#include "efa_com.h"
+#include "efa_com_cmd.h"
+
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low)
+{
+ *addr_low = lower_32_bits(addr);
+ *addr_high = upper_32_bits(addr);
+}
+
+int efa_com_create_qp(struct efa_com_dev *edev,
+ struct efa_com_create_qp_params *params,
+ struct efa_com_create_qp_result *res)
+{
+ struct efa_admin_create_qp_cmd create_qp_cmd = {};
+ struct efa_admin_create_qp_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ create_qp_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_QP;
+
+ create_qp_cmd.pd = params->pd;
+ create_qp_cmd.qp_type = params->qp_type;
+ create_qp_cmd.rq_base_addr = params->rq_base_addr;
+ create_qp_cmd.send_cq_idx = params->send_cq_idx;
+ create_qp_cmd.recv_cq_idx = params->recv_cq_idx;
+ create_qp_cmd.qp_alloc_size.send_queue_ring_size =
+ params->sq_ring_size_in_bytes;
+ create_qp_cmd.qp_alloc_size.send_queue_depth =
+ params->sq_depth;
+ create_qp_cmd.qp_alloc_size.recv_queue_ring_size =
+ params->rq_ring_size_in_bytes;
+ create_qp_cmd.qp_alloc_size.recv_queue_depth =
+ params->rq_depth;
+ create_qp_cmd.uar = params->uarn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&create_qp_cmd,
+ sizeof(create_qp_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to create qp [%d]\n", err);
+ return err;
+ }
+
+ res->qp_handle = cmd_completion.qp_handle;
+ res->qp_num = cmd_completion.qp_num;
+ res->sq_db_offset = cmd_completion.sq_db_offset;
+ res->rq_db_offset = cmd_completion.rq_db_offset;
+ res->llq_descriptors_offset = cmd_completion.llq_descriptors_offset;
+ res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx;
+ res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx;
+
+ return err;
+}
+
+int efa_com_modify_qp(struct efa_com_dev *edev,
+ struct efa_com_modify_qp_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_modify_qp_cmd cmd = {};
+ struct efa_admin_modify_qp_resp resp;
+ int err;
+
+ cmd.aq_common_desc.opcode = EFA_ADMIN_MODIFY_QP;
+ cmd.modify_mask = params->modify_mask;
+ cmd.qp_handle = params->qp_handle;
+ cmd.qp_state = params->qp_state;
+ cmd.cur_qp_state = params->cur_qp_state;
+ cmd.qkey = params->qkey;
+ cmd.sq_psn = params->sq_psn;
+ cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err(edev->efa_dev,
+ "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
+ cmd.qp_handle, cmd.modify_mask, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_query_qp(struct efa_com_dev *edev,
+ struct efa_com_query_qp_params *params,
+ struct efa_com_query_qp_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_query_qp_cmd cmd = {};
+ struct efa_admin_query_qp_resp resp;
+ int err;
+
+ cmd.aq_common_desc.opcode = EFA_ADMIN_QUERY_QP;
+ cmd.qp_handle = params->qp_handle;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to query qp-%u [%d]\n",
+ cmd.qp_handle, err);
+ return err;
+ }
+
+ result->qp_state = resp.qp_state;
+ result->qkey = resp.qkey;
+ result->sq_draining = resp.sq_draining;
+ result->sq_psn = resp.sq_psn;
+
+ return 0;
+}
+
+int efa_com_destroy_qp(struct efa_com_dev *edev,
+ struct efa_com_destroy_qp_params *params)
+{
+ struct efa_admin_destroy_qp_resp cmd_completion;
+ struct efa_admin_destroy_qp_cmd qp_cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ qp_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_QP;
+ qp_cmd.qp_handle = params->qp_handle;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&qp_cmd,
+ sizeof(qp_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err)
+ ibdev_err(edev->efa_dev, "Failed to destroy qp-%u [%d]\n",
+ qp_cmd.qp_handle, err);
+
+ return 0;
+}
+
+int efa_com_create_cq(struct efa_com_dev *edev,
+ struct efa_com_create_cq_params *params,
+ struct efa_com_create_cq_result *result)
+{
+ struct efa_admin_create_cq_resp cmd_completion;
+ struct efa_admin_create_cq_cmd create_cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ;
+ create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) &
+ EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
+ create_cmd.cq_depth = params->cq_depth;
+ create_cmd.num_sub_cqs = params->num_sub_cqs;
+ create_cmd.uar = params->uarn;
+
+ efa_com_set_dma_addr(params->dma_addr,
+ &create_cmd.cq_ba.mem_addr_high,
+ &create_cmd.cq_ba.mem_addr_low);
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&create_cmd,
+ sizeof(create_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to create cq[%d]\n", err);
+ return err;
+ }
+
+ result->cq_idx = cmd_completion.cq_idx;
+ result->actual_depth = params->cq_depth;
+
+ return err;
+}
+
+int efa_com_destroy_cq(struct efa_com_dev *edev,
+ struct efa_com_destroy_cq_params *params)
+{
+ struct efa_admin_destroy_cq_cmd destroy_cmd = {};
+ struct efa_admin_destroy_cq_resp destroy_resp;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ destroy_cmd.cq_idx = params->cq_idx;
+ destroy_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_CQ;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&destroy_cmd,
+ sizeof(destroy_cmd),
+ (struct efa_admin_acq_entry *)&destroy_resp,
+ sizeof(destroy_resp));
+
+ if (err)
+ ibdev_err(edev->efa_dev, "Failed to destroy CQ-%u [%d]\n",
+ params->cq_idx, err);
+
+ return 0;
+}
+
+int efa_com_register_mr(struct efa_com_dev *edev,
+ struct efa_com_reg_mr_params *params,
+ struct efa_com_reg_mr_result *result)
+{
+ struct efa_admin_reg_mr_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_reg_mr_cmd mr_cmd = {};
+ int err;
+
+ mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR;
+ mr_cmd.pd = params->pd;
+ mr_cmd.mr_length = params->mr_length_in_bytes;
+ mr_cmd.flags |= params->page_shift &
+ EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK;
+ mr_cmd.iova = params->iova;
+ mr_cmd.permissions |= params->permissions &
+ EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK;
+
+ if (params->inline_pbl) {
+ memcpy(mr_cmd.pbl.inline_pbl_array,
+ params->pbl.inline_pbl_array,
+ sizeof(mr_cmd.pbl.inline_pbl_array));
+ } else {
+ mr_cmd.pbl.pbl.length = params->pbl.pbl.length;
+ mr_cmd.pbl.pbl.address.mem_addr_low =
+ params->pbl.pbl.address.mem_addr_low;
+ mr_cmd.pbl.pbl.address.mem_addr_high =
+ params->pbl.pbl.address.mem_addr_high;
+ mr_cmd.aq_common_desc.flags |=
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK;
+ if (params->indirect)
+ mr_cmd.aq_common_desc.flags |=
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ }
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&mr_cmd,
+ sizeof(mr_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to register mr [%d]\n", err);
+ return err;
+ }
+
+ result->l_key = cmd_completion.l_key;
+ result->r_key = cmd_completion.r_key;
+
+ return 0;
+}
+
+int efa_com_dereg_mr(struct efa_com_dev *edev,
+ struct efa_com_dereg_mr_params *params)
+{
+ struct efa_admin_dereg_mr_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_dereg_mr_cmd mr_cmd = {};
+ int err;
+
+ mr_cmd.aq_common_desc.opcode = EFA_ADMIN_DEREG_MR;
+ mr_cmd.l_key = params->l_key;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&mr_cmd,
+ sizeof(mr_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err)
+ ibdev_err(edev->efa_dev,
+ "Failed to de-register mr(lkey-%u) [%d]\n",
+ mr_cmd.l_key, err);
+
+ return 0;
+}
+
+int efa_com_create_ah(struct efa_com_dev *edev,
+ struct efa_com_create_ah_params *params,
+ struct efa_com_create_ah_result *result)
+{
+ struct efa_admin_create_ah_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_create_ah_cmd ah_cmd = {};
+ int err;
+
+ ah_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_AH;
+
+ memcpy(ah_cmd.dest_addr, params->dest_addr, sizeof(ah_cmd.dest_addr));
+ ah_cmd.pd = params->pdn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&ah_cmd,
+ sizeof(ah_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to create ah [%d]\n", err);
+ return err;
+ }
+
+ result->ah = cmd_completion.ah;
+
+ return 0;
+}
+
+int efa_com_destroy_ah(struct efa_com_dev *edev,
+ struct efa_com_destroy_ah_params *params)
+{
+ struct efa_admin_destroy_ah_resp cmd_completion;
+ struct efa_admin_destroy_ah_cmd ah_cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ ah_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_AH;
+ ah_cmd.ah = params->ah;
+ ah_cmd.pd = params->pdn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&ah_cmd,
+ sizeof(ah_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err)
+ ibdev_err(edev->efa_dev, "Failed to destroy ah-%d pd-%d [%d]\n",
+ ah_cmd.ah, ah_cmd.pd, err);
+
+ return 0;
+}
+
+static bool
+efa_com_check_supported_feature_id(struct efa_com_dev *edev,
+ enum efa_admin_aq_feature_id feature_id)
+{
+ u32 feature_mask = 1 << feature_id;
+
+ /* Device attributes is always supported */
+ if (feature_id != EFA_ADMIN_DEVICE_ATTR &&
+ !(edev->supported_features & feature_mask))
+ return false;
+
+ return true;
+}
+
+static int efa_com_get_feature_ex(struct efa_com_dev *edev,
+ struct efa_admin_get_feature_resp *get_resp,
+ enum efa_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size)
+{
+ struct efa_admin_get_feature_cmd get_cmd = {};
+ struct efa_com_admin_queue *aq;
+ int err;
+
+ if (!efa_com_check_supported_feature_id(edev, feature_id)) {
+ ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
+ feature_id);
+ return -EOPNOTSUPP;
+ }
+
+ aq = &edev->aq;
+
+ get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE;
+
+ if (control_buff_size)
+ get_cmd.aq_common_descriptor.flags =
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+
+
+ efa_com_set_dma_addr(control_buf_dma_addr,
+ &get_cmd.control_buffer.address.mem_addr_high,
+ &get_cmd.control_buffer.address.mem_addr_low);
+
+ get_cmd.control_buffer.length = control_buff_size;
+ get_cmd.feature_common.feature_id = feature_id;
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)
+ &get_cmd,
+ sizeof(get_cmd),
+ (struct efa_admin_acq_entry *)
+ get_resp,
+ sizeof(*get_resp));
+
+ if (err)
+ ibdev_err(edev->efa_dev,
+ "Failed to submit get_feature command %d [%d]\n",
+ feature_id, err);
+
+ return 0;
+}
+
+static int efa_com_get_feature(struct efa_com_dev *edev,
+ struct efa_admin_get_feature_resp *get_resp,
+ enum efa_admin_aq_feature_id feature_id)
+{
+ return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0);
+}
+
+int efa_com_get_network_attr(struct efa_com_dev *edev,
+ struct efa_com_get_network_attr_result *result)
+{
+ struct efa_admin_get_feature_resp resp;
+ int err;
+
+ err = efa_com_get_feature(edev, &resp,
+ EFA_ADMIN_NETWORK_ATTR);
+ if (err) {
+ ibdev_err(edev->efa_dev,
+ "Failed to get network attributes %d\n", err);
+ return err;
+ }
+
+ memcpy(result->addr, resp.u.network_attr.addr,
+ sizeof(resp.u.network_attr.addr));
+ result->mtu = resp.u.network_attr.mtu;
+
+ return 0;
+}
+
+int efa_com_get_device_attr(struct efa_com_dev *edev,
+ struct efa_com_get_device_attr_result *result)
+{
+ struct efa_admin_get_feature_resp resp;
+ int err;
+
+ err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to get device attributes %d\n",
+ err);
+ return err;
+ }
+
+ result->page_size_cap = resp.u.device_attr.page_size_cap;
+ result->fw_version = resp.u.device_attr.fw_version;
+ result->admin_api_version = resp.u.device_attr.admin_api_version;
+ result->device_version = resp.u.device_attr.device_version;
+ result->supported_features = resp.u.device_attr.supported_features;
+ result->phys_addr_width = resp.u.device_attr.phys_addr_width;
+ result->virt_addr_width = resp.u.device_attr.virt_addr_width;
+ result->db_bar = resp.u.device_attr.db_bar;
+
+ if (result->admin_api_version < 1) {
+ ibdev_err(edev->efa_dev,
+ "Failed to get device attr api version [%u < 1]\n",
+ result->admin_api_version);
+ return -EINVAL;
+ }
+
+ edev->supported_features = resp.u.device_attr.supported_features;
+ err = efa_com_get_feature(edev, &resp,
+ EFA_ADMIN_QUEUE_ATTR);
+ if (err) {
+ ibdev_err(edev->efa_dev,
+ "Failed to get network attributes %d\n", err);
+ return err;
+ }
+
+ result->max_qp = resp.u.queue_attr.max_qp;
+ result->max_sq_depth = resp.u.queue_attr.max_sq_depth;
+ result->max_rq_depth = resp.u.queue_attr.max_rq_depth;
+ result->max_cq = resp.u.queue_attr.max_cq;
+ result->max_cq_depth = resp.u.queue_attr.max_cq_depth;
+ result->inline_buf_size = resp.u.queue_attr.inline_buf_size;
+ result->max_sq_sge = resp.u.queue_attr.max_wr_send_sges;
+ result->max_rq_sge = resp.u.queue_attr.max_wr_recv_sges;
+ result->max_mr = resp.u.queue_attr.max_mr;
+ result->max_mr_pages = resp.u.queue_attr.max_mr_pages;
+ result->max_pd = resp.u.queue_attr.max_pd;
+ result->max_ah = resp.u.queue_attr.max_ah;
+ result->max_llq_size = resp.u.queue_attr.max_llq_size;
+ result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
+
+ return 0;
+}
+
+int efa_com_get_hw_hints(struct efa_com_dev *edev,
+ struct efa_com_get_hw_hints_result *result)
+{
+ struct efa_admin_get_feature_resp resp;
+ int err;
+
+ err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to get hw hints %d\n", err);
+ return err;
+ }
+
+ result->admin_completion_timeout = resp.u.hw_hints.admin_completion_timeout;
+ result->driver_watchdog_timeout = resp.u.hw_hints.driver_watchdog_timeout;
+ result->mmio_read_timeout = resp.u.hw_hints.mmio_read_timeout;
+ result->poll_interval = resp.u.hw_hints.poll_interval;
+
+ return 0;
+}
+
+static int efa_com_set_feature_ex(struct efa_com_dev *edev,
+ struct efa_admin_set_feature_resp *set_resp,
+ struct efa_admin_set_feature_cmd *set_cmd,
+ enum efa_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size)
+{
+ struct efa_com_admin_queue *aq;
+ int err;
+
+ if (!efa_com_check_supported_feature_id(edev, feature_id)) {
+ ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
+ feature_id);
+ return -EOPNOTSUPP;
+ }
+
+ aq = &edev->aq;
+
+ set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE;
+ if (control_buff_size) {
+ set_cmd->aq_common_descriptor.flags =
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ efa_com_set_dma_addr(control_buf_dma_addr,
+ &set_cmd->control_buffer.address.mem_addr_high,
+ &set_cmd->control_buffer.address.mem_addr_low);
+ }
+
+ set_cmd->control_buffer.length = control_buff_size;
+ set_cmd->feature_common.feature_id = feature_id;
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)set_cmd,
+ sizeof(*set_cmd),
+ (struct efa_admin_acq_entry *)set_resp,
+ sizeof(*set_resp));
+
+ if (err)
+ ibdev_err(edev->efa_dev,
+ "Failed to submit set_feature command %d error: %d\n",
+ feature_id, err);
+
+ return 0;
+}
+
+static int efa_com_set_feature(struct efa_com_dev *edev,
+ struct efa_admin_set_feature_resp *set_resp,
+ struct efa_admin_set_feature_cmd *set_cmd,
+ enum efa_admin_aq_feature_id feature_id)
+{
+ return efa_com_set_feature_ex(edev, set_resp, set_cmd, feature_id,
+ 0, 0);
+}
+
+int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
+{
+ struct efa_admin_get_feature_resp get_resp;
+ struct efa_admin_set_feature_resp set_resp;
+ struct efa_admin_set_feature_cmd cmd = {};
+ int err;
+
+ ibdev_dbg(edev->efa_dev, "Configuring aenq with groups[%#x]\n", groups);
+
+ err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to get aenq attributes: %d\n",
+ err);
+ return err;
+ }
+
+ ibdev_dbg(edev->efa_dev,
+ "Get aenq groups: supported[%#x] enabled[%#x]\n",
+ get_resp.u.aenq.supported_groups,
+ get_resp.u.aenq.enabled_groups);
+
+ if ((get_resp.u.aenq.supported_groups & groups) != groups) {
+ ibdev_err(edev->efa_dev,
+ "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
+ groups, get_resp.u.aenq.supported_groups);
+ return -EOPNOTSUPP;
+ }
+
+ cmd.u.aenq.enabled_groups = groups;
+ err = efa_com_set_feature(edev, &set_resp, &cmd,
+ EFA_ADMIN_AENQ_CONFIG);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to set aenq attributes: %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_alloc_pd(struct efa_com_dev *edev,
+ struct efa_com_alloc_pd_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_alloc_pd_cmd cmd = {};
+ struct efa_admin_alloc_pd_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_PD;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to allocate pd[%d]\n", err);
+ return err;
+ }
+
+ result->pdn = resp.pd;
+
+ return 0;
+}
+
+int efa_com_dealloc_pd(struct efa_com_dev *edev,
+ struct efa_com_dealloc_pd_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_dealloc_pd_cmd cmd = {};
+ struct efa_admin_dealloc_pd_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_PD;
+ cmd.pd = params->pdn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to deallocate pd-%u [%d]\n",
+ cmd.pd, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_alloc_uar(struct efa_com_dev *edev,
+ struct efa_com_alloc_uar_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_alloc_uar_cmd cmd = {};
+ struct efa_admin_alloc_uar_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_UAR;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to allocate uar[%d]\n", err);
+ return err;
+ }
+
+ result->uarn = resp.uar;
+
+ return 0;
+}
+
+int efa_com_dealloc_uar(struct efa_com_dev *edev,
+ struct efa_com_dealloc_uar_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_dealloc_uar_cmd cmd = {};
+ struct efa_admin_dealloc_uar_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_UAR;
+ cmd.uar = params->uarn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err(edev->efa_dev, "Failed to deallocate uar-%u [%d]\n",
+ cmd.uar, err);
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
new file mode 100644
index 000000000000..a1174380462c
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COM_CMD_H_
+#define _EFA_COM_CMD_H_
+
+#include "efa_com.h"
+
+#define EFA_GID_SIZE 16
+
+struct efa_com_create_qp_params {
+ u64 rq_base_addr;
+ u32 send_cq_idx;
+ u32 recv_cq_idx;
+ /*
+ * Send descriptor ring size in bytes,
+ * sufficient for user-provided number of WQEs and SGL size
+ */
+ u32 sq_ring_size_in_bytes;
+ /* Max number of WQEs that will be posted on send queue */
+ u32 sq_depth;
+ /* Recv descriptor ring size in bytes */
+ u32 rq_ring_size_in_bytes;
+ u32 rq_depth;
+ u16 pd;
+ u16 uarn;
+ u8 qp_type;
+};
+
+struct efa_com_create_qp_result {
+ u32 qp_handle;
+ u32 qp_num;
+ u32 sq_db_offset;
+ u32 rq_db_offset;
+ u32 llq_descriptors_offset;
+ u16 send_sub_cq_idx;
+ u16 recv_sub_cq_idx;
+};
+
+struct efa_com_modify_qp_params {
+ u32 modify_mask;
+ u32 qp_handle;
+ u32 qp_state;
+ u32 cur_qp_state;
+ u32 qkey;
+ u32 sq_psn;
+ u8 sq_drained_async_notify;
+};
+
+struct efa_com_query_qp_params {
+ u32 qp_handle;
+};
+
+struct efa_com_query_qp_result {
+ u32 qp_state;
+ u32 qkey;
+ u32 sq_draining;
+ u32 sq_psn;
+};
+
+struct efa_com_destroy_qp_params {
+ u32 qp_handle;
+};
+
+struct efa_com_create_cq_params {
+ /* cq physical base address in OS memory */
+ dma_addr_t dma_addr;
+ /* completion queue depth in # of entries */
+ u16 cq_depth;
+ u16 num_sub_cqs;
+ u16 uarn;
+ u8 entry_size_in_bytes;
+};
+
+struct efa_com_create_cq_result {
+ /* cq identifier */
+ u16 cq_idx;
+ /* actual cq depth in # of entries */
+ u16 actual_depth;
+};
+
+struct efa_com_destroy_cq_params {
+ u16 cq_idx;
+};
+
+struct efa_com_create_ah_params {
+ u16 pdn;
+ /* Destination address in network byte order */
+ u8 dest_addr[EFA_GID_SIZE];
+};
+
+struct efa_com_create_ah_result {
+ u16 ah;
+};
+
+struct efa_com_destroy_ah_params {
+ u16 ah;
+ u16 pdn;
+};
+
+struct efa_com_get_network_attr_result {
+ u8 addr[EFA_GID_SIZE];
+ u32 mtu;
+};
+
+struct efa_com_get_device_attr_result {
+ u64 page_size_cap;
+ u64 max_mr_pages;
+ u32 fw_version;
+ u32 admin_api_version;
+ u32 device_version;
+ u32 supported_features;
+ u32 phys_addr_width;
+ u32 virt_addr_width;
+ u32 max_qp;
+ u32 max_sq_depth; /* wqes */
+ u32 max_rq_depth; /* wqes */
+ u32 max_cq;
+ u32 max_cq_depth; /* cqes */
+ u32 inline_buf_size;
+ u32 max_mr;
+ u32 max_pd;
+ u32 max_ah;
+ u32 max_llq_size;
+ u16 sub_cqs_per_cq;
+ u16 max_sq_sge;
+ u16 max_rq_sge;
+ u8 db_bar;
+};
+
+struct efa_com_get_hw_hints_result {
+ u16 mmio_read_timeout;
+ u16 driver_watchdog_timeout;
+ u16 admin_completion_timeout;
+ u16 poll_interval;
+ u32 reserved[4];
+};
+
+struct efa_com_mem_addr {
+ u32 mem_addr_low;
+ u32 mem_addr_high;
+};
+
+/* Used at indirect mode page list chunks for chaining */
+struct efa_com_ctrl_buff_info {
+ /* indicates length of the buffer pointed by control_buffer_address. */
+ u32 length;
+ /* points to control buffer (direct or indirect) */
+ struct efa_com_mem_addr address;
+};
+
+struct efa_com_reg_mr_params {
+ /* Memory region length, in bytes. */
+ u64 mr_length_in_bytes;
+ /* IO Virtual Address associated with this MR. */
+ u64 iova;
+ /* words 8:15: Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of physical addresses of app pages
+ * (optimization for short region reservations)
+ */
+ u64 inline_pbl_array[4];
+ /*
+ * Describes the next physically contiguous chunk of indirect
+ * page list. A page list contains physical addresses of command
+ * data pages. Data pages are 4KB; page list chunks are
+ * variable-sized.
+ */
+ struct efa_com_ctrl_buff_info pbl;
+ } pbl;
+ /* number of pages in PBL (redundant, could be calculated) */
+ u32 page_num;
+ /* Protection Domain */
+ u16 pd;
+ /*
+ * phys_page_size_shift - page size is (1 << phys_page_size_shift)
+ * Page size is used for building the Virtual to Physical
+ * address mapping
+ */
+ u8 page_shift;
+ /*
+ * permissions
+ * 0: local_write_enable - Write permissions: value of 1 needed
+ * for RQ buffers and for RDMA write:1: reserved1 - remote
+ * access flags, etc
+ */
+ u8 permissions;
+ u8 inline_pbl;
+ u8 indirect;
+};
+
+struct efa_com_reg_mr_result {
+ /*
+ * To be used in conjunction with local buffers references in SQ and
+ * RQ WQE
+ */
+ u32 l_key;
+ /*
+ * To be used in incoming RDMA semantics messages to refer to remotely
+ * accessed memory region
+ */
+ u32 r_key;
+};
+
+struct efa_com_dereg_mr_params {
+ u32 l_key;
+};
+
+struct efa_com_alloc_pd_result {
+ u16 pdn;
+};
+
+struct efa_com_dealloc_pd_params {
+ u16 pdn;
+};
+
+struct efa_com_alloc_uar_result {
+ u16 uarn;
+};
+
+struct efa_com_dealloc_uar_params {
+ u16 uarn;
+};
+
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
+int efa_com_create_qp(struct efa_com_dev *edev,
+ struct efa_com_create_qp_params *params,
+ struct efa_com_create_qp_result *res);
+int efa_com_modify_qp(struct efa_com_dev *edev,
+ struct efa_com_modify_qp_params *params);
+int efa_com_query_qp(struct efa_com_dev *edev,
+ struct efa_com_query_qp_params *params,
+ struct efa_com_query_qp_result *result);
+int efa_com_destroy_qp(struct efa_com_dev *edev,
+ struct efa_com_destroy_qp_params *params);
+int efa_com_create_cq(struct efa_com_dev *edev,
+ struct efa_com_create_cq_params *params,
+ struct efa_com_create_cq_result *result);
+int efa_com_destroy_cq(struct efa_com_dev *edev,
+ struct efa_com_destroy_cq_params *params);
+int efa_com_register_mr(struct efa_com_dev *edev,
+ struct efa_com_reg_mr_params *params,
+ struct efa_com_reg_mr_result *result);
+int efa_com_dereg_mr(struct efa_com_dev *edev,
+ struct efa_com_dereg_mr_params *params);
+int efa_com_create_ah(struct efa_com_dev *edev,
+ struct efa_com_create_ah_params *params,
+ struct efa_com_create_ah_result *result);
+int efa_com_destroy_ah(struct efa_com_dev *edev,
+ struct efa_com_destroy_ah_params *params);
+int efa_com_get_network_attr(struct efa_com_dev *edev,
+ struct efa_com_get_network_attr_result *result);
+int efa_com_get_device_attr(struct efa_com_dev *edev,
+ struct efa_com_get_device_attr_result *result);
+int efa_com_get_hw_hints(struct efa_com_dev *edev,
+ struct efa_com_get_hw_hints_result *result);
+int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups);
+int efa_com_alloc_pd(struct efa_com_dev *edev,
+ struct efa_com_alloc_pd_result *result);
+int efa_com_dealloc_pd(struct efa_com_dev *edev,
+ struct efa_com_dealloc_pd_params *params);
+int efa_com_alloc_uar(struct efa_com_dev *edev,
+ struct efa_com_alloc_uar_result *result);
+int efa_com_dealloc_uar(struct efa_com_dev *edev,
+ struct efa_com_dealloc_uar_params *params);
+
+#endif /* _EFA_COM_CMD_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h
new file mode 100644
index 000000000000..c559ec08898e
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_common_defs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COMMON_H_
+#define _EFA_COMMON_H_
+
+#define EFA_COMMON_SPEC_VERSION_MAJOR 2
+#define EFA_COMMON_SPEC_VERSION_MINOR 0
+
+struct efa_common_mem_addr {
+ u32 mem_addr_low;
+
+ u32 mem_addr_high;
+};
+
+#endif /* _EFA_COMMON_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
new file mode 100644
index 000000000000..db974caf1eb1
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <rdma/ib_user_verbs.h>
+
+#include "efa.h"
+
+#define PCI_DEV_ID_EFA_VF 0xefa0
+
+static const struct pci_device_id efa_pci_tbl[] = {
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) },
+ { }
+};
+
+MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION(DEVICE_NAME);
+MODULE_DEVICE_TABLE(pci, efa_pci_tbl);
+
+#define EFA_REG_BAR 0
+#define EFA_MEM_BAR 2
+#define EFA_BASE_BAR_MASK (BIT(EFA_REG_BAR) | BIT(EFA_MEM_BAR))
+
+#define EFA_AENQ_ENABLED_GROUPS \
+ (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
+ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
+
+static void efa_update_network_attr(struct efa_dev *dev,
+ struct efa_com_get_network_attr_result *network_attr)
+{
+ memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr));
+ dev->mtu = network_attr->mtu;
+
+ dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr);
+}
+
+/* This handler will called for unknown event group or unimplemented handlers */
+static void unimplemented_aenq_handler(void *data,
+ struct efa_admin_aenq_entry *aenq_e)
+{
+ struct efa_dev *dev = (struct efa_dev *)data;
+
+ ibdev_err(&dev->ibdev,
+ "Unknown event was received or event with unimplemented handler\n");
+}
+
+static void efa_keep_alive(void *data, struct efa_admin_aenq_entry *aenq_e)
+{
+ struct efa_dev *dev = (struct efa_dev *)data;
+
+ atomic64_inc(&dev->stats.keep_alive_rcvd);
+}
+
+static struct efa_aenq_handlers aenq_handlers = {
+ .handlers = {
+ [EFA_ADMIN_KEEP_ALIVE] = efa_keep_alive,
+ },
+ .unimplemented_handler = unimplemented_aenq_handler
+};
+
+static void efa_release_bars(struct efa_dev *dev, int bars_mask)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int release_bars;
+
+ release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & bars_mask;
+ pci_release_selected_regions(pdev, release_bars);
+}
+
+static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data)
+{
+ struct efa_dev *dev = data;
+
+ efa_com_admin_q_comp_intr_handler(&dev->edev);
+ efa_com_aenq_intr_handler(&dev->edev, data);
+
+ return IRQ_HANDLED;
+}
+
+static int efa_request_mgmnt_irq(struct efa_dev *dev)
+{
+ struct efa_irq *irq;
+ int err;
+
+ irq = &dev->admin_irq;
+ err = request_irq(irq->vector, irq->handler, 0, irq->name,
+ irq->data);
+ if (err) {
+ dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n",
+ err);
+ return err;
+ }
+
+ dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n",
+ nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector);
+ irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+
+ return err;
+}
+
+static void efa_setup_mgmnt_irq(struct efa_dev *dev)
+{
+ u32 cpu;
+
+ snprintf(dev->admin_irq.name, EFA_IRQNAME_SIZE,
+ "efa-mgmnt@pci:%s", pci_name(dev->pdev));
+ dev->admin_irq.handler = efa_intr_msix_mgmnt;
+ dev->admin_irq.data = dev;
+ dev->admin_irq.vector =
+ pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx);
+ cpu = cpumask_first(cpu_online_mask);
+ dev->admin_irq.cpu = cpu;
+ cpumask_set_cpu(cpu,
+ &dev->admin_irq.affinity_hint_mask);
+ dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n",
+ &dev->admin_irq,
+ dev->admin_irq.vector,
+ dev->admin_irq.name);
+}
+
+static void efa_free_mgmnt_irq(struct efa_dev *dev)
+{
+ struct efa_irq *irq;
+
+ irq = &dev->admin_irq;
+ irq_set_affinity_hint(irq->vector, NULL);
+ free_irq(irq->vector, irq->data);
+}
+
+static int efa_set_mgmnt_irq(struct efa_dev *dev)
+{
+ efa_setup_mgmnt_irq(dev);
+
+ return efa_request_mgmnt_irq(dev);
+}
+
+static int efa_request_doorbell_bar(struct efa_dev *dev)
+{
+ u8 db_bar_idx = dev->dev_attr.db_bar;
+ struct pci_dev *pdev = dev->pdev;
+ int bars;
+ int err;
+
+ if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) {
+ bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx);
+
+ err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ if (err) {
+ dev_err(&dev->pdev->dev,
+ "pci_request_selected_regions for bar %d failed %d\n",
+ db_bar_idx, err);
+ return err;
+ }
+ }
+
+ dev->db_bar_addr = pci_resource_start(dev->pdev, db_bar_idx);
+ dev->db_bar_len = pci_resource_len(dev->pdev, db_bar_idx);
+
+ return 0;
+}
+
+static void efa_release_doorbell_bar(struct efa_dev *dev)
+{
+ if (!(BIT(dev->dev_attr.db_bar) & EFA_BASE_BAR_MASK))
+ efa_release_bars(dev, BIT(dev->dev_attr.db_bar));
+}
+
+static void efa_update_hw_hints(struct efa_dev *dev,
+ struct efa_com_get_hw_hints_result *hw_hints)
+{
+ struct efa_com_dev *edev = &dev->edev;
+
+ if (hw_hints->mmio_read_timeout)
+ edev->mmio_read.mmio_read_timeout =
+ hw_hints->mmio_read_timeout * 1000;
+
+ if (hw_hints->poll_interval)
+ edev->aq.poll_interval = hw_hints->poll_interval;
+
+ if (hw_hints->admin_completion_timeout)
+ edev->aq.completion_timeout =
+ hw_hints->admin_completion_timeout;
+}
+
+static void efa_stats_init(struct efa_dev *dev)
+{
+ atomic64_t *s = (atomic64_t *)&dev->stats;
+ int i;
+
+ for (i = 0; i < sizeof(dev->stats) / sizeof(*s); i++, s++)
+ atomic64_set(s, 0);
+}
+
+static const struct ib_device_ops efa_dev_ops = {
+ .alloc_pd = efa_alloc_pd,
+ .alloc_ucontext = efa_alloc_ucontext,
+ .create_ah = efa_create_ah,
+ .create_cq = efa_create_cq,
+ .create_qp = efa_create_qp,
+ .dealloc_pd = efa_dealloc_pd,
+ .dealloc_ucontext = efa_dealloc_ucontext,
+ .dereg_mr = efa_dereg_mr,
+ .destroy_ah = efa_destroy_ah,
+ .destroy_cq = efa_destroy_cq,
+ .destroy_qp = efa_destroy_qp,
+ .get_link_layer = efa_port_link_layer,
+ .get_port_immutable = efa_get_port_immutable,
+ .mmap = efa_mmap,
+ .modify_qp = efa_modify_qp,
+ .query_device = efa_query_device,
+ .query_gid = efa_query_gid,
+ .query_pkey = efa_query_pkey,
+ .query_port = efa_query_port,
+ .query_qp = efa_query_qp,
+ .reg_user_mr = efa_reg_mr,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext),
+};
+
+static int efa_ib_device_add(struct efa_dev *dev)
+{
+ struct efa_com_get_network_attr_result network_attr;
+ struct efa_com_get_hw_hints_result hw_hints;
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ efa_stats_init(dev);
+
+ err = efa_com_get_device_attr(&dev->edev, &dev->dev_attr);
+ if (err)
+ return err;
+
+ dev_dbg(&dev->pdev->dev, "Doorbells bar (%d)\n", dev->dev_attr.db_bar);
+ err = efa_request_doorbell_bar(dev);
+ if (err)
+ return err;
+
+ err = efa_com_get_network_attr(&dev->edev, &network_attr);
+ if (err)
+ goto err_release_doorbell_bar;
+
+ efa_update_network_attr(dev, &network_attr);
+
+ err = efa_com_get_hw_hints(&dev->edev, &hw_hints);
+ if (err)
+ goto err_release_doorbell_bar;
+
+ efa_update_hw_hints(dev, &hw_hints);
+
+ /* Try to enable all the available aenq groups */
+ err = efa_com_set_aenq_config(&dev->edev, EFA_AENQ_ENABLED_GROUPS);
+ if (err)
+ goto err_release_doorbell_bar;
+
+ dev->ibdev.owner = THIS_MODULE;
+ dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED;
+ dev->ibdev.phys_port_cnt = 1;
+ dev->ibdev.num_comp_vectors = 1;
+ dev->ibdev.dev.parent = &pdev->dev;
+ dev->ibdev.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION;
+
+ dev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+
+ dev->ibdev.uverbs_ex_cmd_mask =
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+
+ dev->ibdev.driver_id = RDMA_DRIVER_EFA;
+ ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
+
+ err = ib_register_device(&dev->ibdev, "efa_%d");
+ if (err)
+ goto err_release_doorbell_bar;
+
+ ibdev_info(&dev->ibdev, "IB device registered\n");
+
+ return 0;
+
+err_release_doorbell_bar:
+ efa_release_doorbell_bar(dev);
+ return err;
+}
+
+static void efa_ib_device_remove(struct efa_dev *dev)
+{
+ efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL);
+ ibdev_info(&dev->ibdev, "Unregister ib device\n");
+ ib_unregister_device(&dev->ibdev);
+ efa_release_doorbell_bar(dev);
+}
+
+static void efa_disable_msix(struct efa_dev *dev)
+{
+ pci_free_irq_vectors(dev->pdev);
+}
+
+static int efa_enable_msix(struct efa_dev *dev)
+{
+ int msix_vecs, irq_num;
+
+ /* Reserve the max msix vectors we might need */
+ msix_vecs = EFA_NUM_MSIX_VEC;
+ dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
+ msix_vecs);
+
+ dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
+ irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs,
+ msix_vecs, PCI_IRQ_MSIX);
+
+ if (irq_num < 0) {
+ dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n",
+ irq_num);
+ return -ENOSPC;
+ }
+
+ if (irq_num != msix_vecs) {
+ dev_err(&dev->pdev->dev,
+ "Allocated %d MSI-X (out of %d requested)\n",
+ irq_num, msix_vecs);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev)
+{
+ int dma_width;
+ int err;
+
+ err = efa_com_dev_reset(edev, EFA_REGS_RESET_NORMAL);
+ if (err)
+ return err;
+
+ err = efa_com_validate_version(edev);
+ if (err)
+ return err;
+
+ dma_width = efa_com_get_dma_width(edev);
+ if (dma_width < 0) {
+ err = dma_width;
+ return err;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+ if (err) {
+ dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err);
+ return err;
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+ if (err) {
+ dev_err(&pdev->dev,
+ "err_pci_set_consistent_dma_mask failed %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
+{
+ struct efa_com_dev *edev;
+ struct efa_dev *dev;
+ int bars;
+ int err;
+
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
+ return ERR_PTR(err);
+ }
+
+ pci_set_master(pdev);
+
+ dev = ib_alloc_device(efa_dev, ibdev);
+ if (!dev) {
+ dev_err(&pdev->dev, "Device alloc failed\n");
+ err = -ENOMEM;
+ goto err_disable_device;
+ }
+
+ pci_set_drvdata(pdev, dev);
+ edev = &dev->edev;
+ edev->efa_dev = dev;
+ edev->dmadev = &pdev->dev;
+ dev->pdev = pdev;
+
+ bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK;
+ err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
+ err);
+ goto err_ibdev_destroy;
+ }
+
+ dev->reg_bar_addr = pci_resource_start(pdev, EFA_REG_BAR);
+ dev->reg_bar_len = pci_resource_len(pdev, EFA_REG_BAR);
+ dev->mem_bar_addr = pci_resource_start(pdev, EFA_MEM_BAR);
+ dev->mem_bar_len = pci_resource_len(pdev, EFA_MEM_BAR);
+
+ edev->reg_bar = devm_ioremap(&pdev->dev,
+ dev->reg_bar_addr,
+ dev->reg_bar_len);
+ if (!edev->reg_bar) {
+ dev_err(&pdev->dev, "Failed to remap register bar\n");
+ err = -EFAULT;
+ goto err_release_bars;
+ }
+
+ err = efa_com_mmio_reg_read_init(edev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init readless MMIO\n");
+ goto err_iounmap;
+ }
+
+ err = efa_device_init(edev, pdev);
+ if (err) {
+ dev_err(&pdev->dev, "EFA device init failed\n");
+ if (err == -ETIME)
+ err = -EPROBE_DEFER;
+ goto err_reg_read_destroy;
+ }
+
+ err = efa_enable_msix(dev);
+ if (err)
+ goto err_reg_read_destroy;
+
+ edev->aq.msix_vector_idx = dev->admin_msix_vector_idx;
+ edev->aenq.msix_vector_idx = dev->admin_msix_vector_idx;
+
+ err = efa_set_mgmnt_irq(dev);
+ if (err)
+ goto err_disable_msix;
+
+ err = efa_com_admin_init(edev, &aenq_handlers);
+ if (err)
+ goto err_free_mgmnt_irq;
+
+ return dev;
+
+err_free_mgmnt_irq:
+ efa_free_mgmnt_irq(dev);
+err_disable_msix:
+ efa_disable_msix(dev);
+err_reg_read_destroy:
+ efa_com_mmio_reg_read_destroy(edev);
+err_iounmap:
+ devm_iounmap(&pdev->dev, edev->reg_bar);
+err_release_bars:
+ efa_release_bars(dev, EFA_BASE_BAR_MASK);
+err_ibdev_destroy:
+ ib_dealloc_device(&dev->ibdev);
+err_disable_device:
+ pci_disable_device(pdev);
+ return ERR_PTR(err);
+}
+
+static void efa_remove_device(struct pci_dev *pdev)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+ struct efa_com_dev *edev;
+
+ edev = &dev->edev;
+ efa_com_admin_destroy(edev);
+ efa_free_mgmnt_irq(dev);
+ efa_disable_msix(dev);
+ efa_com_mmio_reg_read_destroy(edev);
+ devm_iounmap(&pdev->dev, edev->reg_bar);
+ efa_release_bars(dev, EFA_BASE_BAR_MASK);
+ ib_dealloc_device(&dev->ibdev);
+ pci_disable_device(pdev);
+}
+
+static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct efa_dev *dev;
+ int err;
+
+ dev = efa_probe_device(pdev);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
+ err = efa_ib_device_add(dev);
+ if (err)
+ goto err_remove_device;
+
+ return 0;
+
+err_remove_device:
+ efa_remove_device(pdev);
+ return err;
+}
+
+static void efa_remove(struct pci_dev *pdev)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+
+ efa_ib_device_remove(dev);
+ efa_remove_device(pdev);
+}
+
+static struct pci_driver efa_pci_driver = {
+ .name = DRV_MODULE_NAME,
+ .id_table = efa_pci_tbl,
+ .probe = efa_probe,
+ .remove = efa_remove,
+};
+
+module_pci_driver(efa_pci_driver);
diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h
new file mode 100644
index 000000000000..bb9cad3d6a15
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_regs_defs.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_REGS_H_
+#define _EFA_REGS_H_
+
+enum efa_regs_reset_reason_types {
+ EFA_REGS_RESET_NORMAL = 0,
+ /* Keep alive timeout */
+ EFA_REGS_RESET_KEEP_ALIVE_TO = 1,
+ EFA_REGS_RESET_ADMIN_TO = 2,
+ EFA_REGS_RESET_INIT_ERR = 3,
+ EFA_REGS_RESET_DRIVER_INVALID_STATE = 4,
+ EFA_REGS_RESET_OS_TRIGGER = 5,
+ EFA_REGS_RESET_SHUTDOWN = 6,
+ EFA_REGS_RESET_USER_TRIGGER = 7,
+ EFA_REGS_RESET_GENERIC = 8,
+};
+
+/* efa_registers offsets */
+
+/* 0 base */
+#define EFA_REGS_VERSION_OFF 0x0
+#define EFA_REGS_CONTROLLER_VERSION_OFF 0x4
+#define EFA_REGS_CAPS_OFF 0x8
+#define EFA_REGS_AQ_BASE_LO_OFF 0x10
+#define EFA_REGS_AQ_BASE_HI_OFF 0x14
+#define EFA_REGS_AQ_CAPS_OFF 0x18
+#define EFA_REGS_ACQ_BASE_LO_OFF 0x20
+#define EFA_REGS_ACQ_BASE_HI_OFF 0x24
+#define EFA_REGS_ACQ_CAPS_OFF 0x28
+#define EFA_REGS_AQ_PROD_DB_OFF 0x2c
+#define EFA_REGS_AENQ_CAPS_OFF 0x34
+#define EFA_REGS_AENQ_BASE_LO_OFF 0x38
+#define EFA_REGS_AENQ_BASE_HI_OFF 0x3c
+#define EFA_REGS_AENQ_CONS_DB_OFF 0x40
+#define EFA_REGS_INTR_MASK_OFF 0x4c
+#define EFA_REGS_DEV_CTL_OFF 0x54
+#define EFA_REGS_DEV_STS_OFF 0x58
+#define EFA_REGS_MMIO_REG_READ_OFF 0x5c
+#define EFA_REGS_MMIO_RESP_LO_OFF 0x60
+#define EFA_REGS_MMIO_RESP_HI_OFF 0x64
+
+/* version register */
+#define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff
+#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT 8
+#define EFA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00
+
+/* controller_version register */
+#define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff
+#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8
+#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00
+#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16
+#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000
+#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24
+#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000
+
+/* caps register */
+#define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1
+#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1
+#define EFA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
+#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8
+#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
+#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16
+#define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
+
+/* aq_caps register */
+#define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
+#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16
+#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000
+
+/* acq_caps register */
+#define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff
+#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16
+#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xff0000
+#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT 24
+#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK 0xff000000
+
+/* aenq_caps register */
+#define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff
+#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16
+#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xff0000
+#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT 24
+#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK 0xff000000
+
+/* dev_ctl register */
+#define EFA_REGS_DEV_CTL_DEV_RESET_MASK 0x1
+#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1
+#define EFA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2
+#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT 28
+#define EFA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000
+
+/* dev_sts register */
+#define EFA_REGS_DEV_STS_READY_MASK 0x1
+#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1
+#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2
+#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2
+#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4
+#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3
+#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8
+#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4
+#define EFA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10
+#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5
+#define EFA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20
+
+/* mmio_reg_read register */
+#define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff
+#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16
+#define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000
+
+#endif /* _EFA_REGS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
new file mode 100644
index 000000000000..6d6886c9009f
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -0,0 +1,1825 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <linux/vmalloc.h>
+
+#include <rdma/ib_addr.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "efa.h"
+
+#define EFA_MMAP_FLAG_SHIFT 56
+#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
+#define EFA_MMAP_INVALID U64_MAX
+
+enum {
+ EFA_MMAP_DMA_PAGE = 0,
+ EFA_MMAP_IO_WC,
+ EFA_MMAP_IO_NC,
+};
+
+#define EFA_AENQ_ENABLED_GROUPS \
+ (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
+ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
+
+struct efa_mmap_entry {
+ void *obj;
+ u64 address;
+ u64 length;
+ u32 mmap_page;
+ u8 mmap_flag;
+};
+
+static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
+{
+ return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
+ ((u64)efa->mmap_page << PAGE_SHIFT);
+}
+
+#define EFA_CHUNK_PAYLOAD_SHIFT 12
+#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
+#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
+
+#define EFA_CHUNK_SHIFT 12
+#define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT)
+#define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info)
+
+#define EFA_PTRS_PER_CHUNK \
+ ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
+
+#define EFA_CHUNK_USED_SIZE \
+ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
+
+#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
+
+struct pbl_chunk {
+ dma_addr_t dma_addr;
+ u64 *buf;
+ u32 length;
+};
+
+struct pbl_chunk_list {
+ struct pbl_chunk *chunks;
+ unsigned int size;
+};
+
+struct pbl_context {
+ union {
+ struct {
+ dma_addr_t dma_addr;
+ } continuous;
+ struct {
+ u32 pbl_buf_size_in_pages;
+ struct scatterlist *sgl;
+ int sg_dma_cnt;
+ struct pbl_chunk_list chunk_list;
+ } indirect;
+ } phys;
+ u64 *pbl_buf;
+ u32 pbl_buf_size_in_bytes;
+ u8 physically_continuous;
+};
+
+static inline struct efa_dev *to_edev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct efa_dev, ibdev);
+}
+
+static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct efa_ucontext, ibucontext);
+}
+
+static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct efa_pd, ibpd);
+}
+
+static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct efa_mr, ibmr);
+}
+
+static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct efa_qp, ibqp);
+}
+
+static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct efa_cq, ibcq);
+}
+
+static inline struct efa_ah *to_eah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct efa_ah, ibah);
+}
+
+#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
+ sizeof(((typeof(x) *)0)->fld) <= (sz))
+
+#define is_reserved_cleared(reserved) \
+ !memchr_inv(reserved, 0, sizeof(reserved))
+
+static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ void *addr;
+
+ addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+ if (!addr)
+ return NULL;
+
+ *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
+ if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
+ ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
+ free_pages_exact(addr, size);
+ return NULL;
+ }
+
+ return addr;
+}
+
+/*
+ * This is only called when the ucontext is destroyed and there can be no
+ * concurrent query via mmap or allocate on the xarray, thus we can be sure no
+ * other thread is using the entry pointer. We also know that all the BAR
+ * pages have either been zap'd or munmaped at this point. Normal pages are
+ * refcounted and will be freed at the proper time.
+ */
+static void mmap_entries_remove_free(struct efa_dev *dev,
+ struct efa_ucontext *ucontext)
+{
+ struct efa_mmap_entry *entry;
+ unsigned long mmap_page;
+
+ xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
+ xa_erase(&ucontext->mmap_xa, mmap_page);
+
+ ibdev_dbg(
+ &dev->ibdev,
+ "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+ entry->obj, get_mmap_key(entry), entry->address,
+ entry->length);
+ if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
+ /* DMA mapping is already gone, now free the pages */
+ free_pages_exact(phys_to_virt(entry->address),
+ entry->length);
+ kfree(entry);
+ }
+}
+
+static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
+ struct efa_ucontext *ucontext,
+ u64 key, u64 len)
+{
+ struct efa_mmap_entry *entry;
+ u64 mmap_page;
+
+ mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
+ if (mmap_page > U32_MAX)
+ return NULL;
+
+ entry = xa_load(&ucontext->mmap_xa, mmap_page);
+ if (!entry || get_mmap_key(entry) != key || entry->length != len)
+ return NULL;
+
+ ibdev_dbg(&dev->ibdev,
+ "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+ entry->obj, key, entry->address, entry->length);
+
+ return entry;
+}
+
+/*
+ * Note this locking scheme cannot support removal of entries, except during
+ * ucontext destruction when the core code guarentees no concurrency.
+ */
+static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
+ void *obj, u64 address, u64 length, u8 mmap_flag)
+{
+ struct efa_mmap_entry *entry;
+ int err;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return EFA_MMAP_INVALID;
+
+ entry->obj = obj;
+ entry->address = address;
+ entry->length = length;
+ entry->mmap_flag = mmap_flag;
+
+ xa_lock(&ucontext->mmap_xa);
+ entry->mmap_page = ucontext->mmap_xa_page;
+ ucontext->mmap_xa_page += DIV_ROUND_UP(length, PAGE_SIZE);
+ err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
+ GFP_KERNEL);
+ xa_unlock(&ucontext->mmap_xa);
+ if (err){
+ kfree(entry);
+ return EFA_MMAP_INVALID;
+ }
+
+ ibdev_dbg(
+ &dev->ibdev,
+ "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
+ entry->obj, entry->address, entry->length, get_mmap_key(entry));
+
+ return get_mmap_key(entry);
+}
+
+int efa_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata)
+{
+ struct efa_com_get_device_attr_result *dev_attr;
+ struct efa_ibv_ex_query_device_resp resp = {};
+ struct efa_dev *dev = to_edev(ibdev);
+ int err;
+
+ if (udata && udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ return -EINVAL;
+ }
+
+ dev_attr = &dev->dev_attr;
+
+ memset(props, 0, sizeof(*props));
+ props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
+ props->page_size_cap = dev_attr->page_size_cap;
+ props->vendor_id = dev->pdev->vendor;
+ props->vendor_part_id = dev->pdev->device;
+ props->hw_ver = dev->pdev->subsystem_device;
+ props->max_qp = dev_attr->max_qp;
+ props->max_cq = dev_attr->max_cq;
+ props->max_pd = dev_attr->max_pd;
+ props->max_mr = dev_attr->max_mr;
+ props->max_ah = dev_attr->max_ah;
+ props->max_cqe = dev_attr->max_cq_depth;
+ props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
+ dev_attr->max_rq_depth);
+ props->max_send_sge = dev_attr->max_sq_sge;
+ props->max_recv_sge = dev_attr->max_rq_sge;
+
+ if (udata && udata->outlen) {
+ resp.max_sq_sge = dev_attr->max_sq_sge;
+ resp.max_rq_sge = dev_attr->max_rq_sge;
+ resp.max_sq_wr = dev_attr->max_sq_depth;
+ resp.max_rq_wr = dev_attr->max_rq_depth;
+
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to copy udata for query_device\n");
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int efa_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct efa_dev *dev = to_edev(ibdev);
+
+ props->lmc = 1;
+
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->active_speed = IB_SPEED_EDR;
+ props->active_width = IB_WIDTH_4X;
+ props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
+ props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
+ props->max_msg_sz = dev->mtu;
+ props->max_vl_num = 1;
+
+ return 0;
+}
+
+int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct efa_dev *dev = to_edev(ibqp->device);
+ struct efa_com_query_qp_params params = {};
+ struct efa_com_query_qp_result result;
+ struct efa_qp *qp = to_eqp(ibqp);
+ int err;
+
+#define EFA_QUERY_QP_SUPP_MASK \
+ (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
+ IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
+
+ if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
+ qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
+ return -EOPNOTSUPP;
+ }
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ params.qp_handle = qp->qp_handle;
+ err = efa_com_query_qp(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ qp_attr->qp_state = result.qp_state;
+ qp_attr->qkey = result.qkey;
+ qp_attr->sq_psn = result.sq_psn;
+ qp_attr->sq_draining = result.sq_draining;
+ qp_attr->port_num = 1;
+
+ qp_attr->cap.max_send_wr = qp->max_send_wr;
+ qp_attr->cap.max_recv_wr = qp->max_recv_wr;
+ qp_attr->cap.max_send_sge = qp->max_send_sge;
+ qp_attr->cap.max_recv_sge = qp->max_recv_sge;
+ qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+ qp_init_attr->qp_type = ibqp->qp_type;
+ qp_init_attr->recv_cq = ibqp->recv_cq;
+ qp_init_attr->send_cq = ibqp->send_cq;
+ qp_init_attr->qp_context = ibqp->qp_context;
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
+int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct efa_dev *dev = to_edev(ibdev);
+
+ memcpy(gid->raw, dev->addr, sizeof(dev->addr));
+
+ return 0;
+}
+
+int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ if (index > 0)
+ return -EINVAL;
+
+ *pkey = 0xffff;
+ return 0;
+}
+
+static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
+{
+ struct efa_com_dealloc_pd_params params = {
+ .pdn = pdn,
+ };
+
+ return efa_com_dealloc_pd(&dev->edev, &params);
+}
+
+int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_ibv_alloc_pd_resp resp = {};
+ struct efa_com_alloc_pd_result result;
+ struct efa_pd *pd = to_epd(ibpd);
+ int err;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = efa_com_alloc_pd(&dev->edev, &result);
+ if (err)
+ goto err_out;
+
+ pd->pdn = result.pdn;
+ resp.pdn = result.pdn;
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Failed to copy udata for alloc_pd\n");
+ goto err_dealloc_pd;
+ }
+ }
+
+ ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
+
+ return 0;
+
+err_dealloc_pd:
+ efa_pd_dealloc(dev, result.pdn);
+err_out:
+ atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
+ return err;
+}
+
+void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_pd *pd = to_epd(ibpd);
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
+ return;
+ }
+
+ ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
+ efa_pd_dealloc(dev, pd->pdn);
+}
+
+static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
+{
+ struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
+
+ return efa_com_destroy_qp(&dev->edev, &params);
+}
+
+int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibqp->pd->device);
+ struct efa_qp *qp = to_eqp(ibqp);
+ int err;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
+ return -EINVAL;
+ }
+
+ ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
+ err = efa_destroy_qp_handle(dev, qp->qp_handle);
+ if (err)
+ return err;
+
+ if (qp->rq_cpu_addr) {
+ ibdev_dbg(&dev->ibdev,
+ "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
+ qp->rq_cpu_addr, qp->rq_size,
+ &qp->rq_dma_addr);
+ dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
+ DMA_TO_DEVICE);
+ }
+
+ kfree(qp);
+ return 0;
+}
+
+static int qp_mmap_entries_setup(struct efa_qp *qp,
+ struct efa_dev *dev,
+ struct efa_ucontext *ucontext,
+ struct efa_com_create_qp_params *params,
+ struct efa_ibv_create_qp_resp *resp)
+{
+ /*
+ * Once an entry is inserted it might be mmapped, hence cannot be
+ * cleaned up until dealloc_ucontext.
+ */
+ resp->sq_db_mmap_key =
+ mmap_entry_insert(dev, ucontext, qp,
+ dev->db_bar_addr + resp->sq_db_offset,
+ PAGE_SIZE, EFA_MMAP_IO_NC);
+ if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
+ return -ENOMEM;
+
+ resp->sq_db_offset &= ~PAGE_MASK;
+
+ resp->llq_desc_mmap_key =
+ mmap_entry_insert(dev, ucontext, qp,
+ dev->mem_bar_addr + resp->llq_desc_offset,
+ PAGE_ALIGN(params->sq_ring_size_in_bytes +
+ (resp->llq_desc_offset & ~PAGE_MASK)),
+ EFA_MMAP_IO_WC);
+ if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
+ return -ENOMEM;
+
+ resp->llq_desc_offset &= ~PAGE_MASK;
+
+ if (qp->rq_size) {
+ resp->rq_db_mmap_key =
+ mmap_entry_insert(dev, ucontext, qp,
+ dev->db_bar_addr + resp->rq_db_offset,
+ PAGE_SIZE, EFA_MMAP_IO_NC);
+ if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
+ return -ENOMEM;
+
+ resp->rq_db_offset &= ~PAGE_MASK;
+
+ resp->rq_mmap_key =
+ mmap_entry_insert(dev, ucontext, qp,
+ virt_to_phys(qp->rq_cpu_addr),
+ qp->rq_size, EFA_MMAP_DMA_PAGE);
+ if (resp->rq_mmap_key == EFA_MMAP_INVALID)
+ return -ENOMEM;
+
+ resp->rq_mmap_size = qp->rq_size;
+ }
+
+ return 0;
+}
+
+static int efa_qp_validate_cap(struct efa_dev *dev,
+ struct ib_qp_init_attr *init_attr)
+{
+ if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested send wr[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_send_wr,
+ dev->dev_attr.max_sq_depth);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested receive wr[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_recv_wr,
+ dev->dev_attr.max_rq_depth);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested sge send[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested sge recv[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested inline data[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_inline_data,
+ dev->dev_attr.inline_buf_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int efa_qp_validate_attr(struct efa_dev *dev,
+ struct ib_qp_init_attr *init_attr)
+{
+ if (init_attr->qp_type != IB_QPT_DRIVER &&
+ init_attr->qp_type != IB_QPT_UD) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp type %d\n", init_attr->qp_type);
+ return -EOPNOTSUPP;
+ }
+
+ if (init_attr->srq) {
+ ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (init_attr->create_flags) {
+ ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct efa_com_create_qp_params create_qp_params = {};
+ struct efa_com_create_qp_result create_qp_resp;
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_ibv_create_qp_resp resp = {};
+ struct efa_ibv_create_qp cmd = {};
+ bool rq_entry_inserted = false;
+ struct efa_ucontext *ucontext;
+ struct efa_qp *qp;
+ int err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
+ ibucontext);
+
+ err = efa_qp_validate_cap(dev, init_attr);
+ if (err)
+ goto err_out;
+
+ err = efa_qp_validate_attr(dev, init_attr);
+ if (err)
+ goto err_out;
+
+ if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, no input udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (udata->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(udata, sizeof(cmd),
+ udata->inlen - sizeof(cmd))) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Cannot copy udata for create_qp\n");
+ goto err_out;
+ }
+
+ if (cmd.comp_mask) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ create_qp_params.uarn = ucontext->uarn;
+ create_qp_params.pd = to_epd(ibpd)->pdn;
+
+ if (init_attr->qp_type == IB_QPT_UD) {
+ create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
+ } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
+ create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
+ } else {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp type %d driver qp type %d\n",
+ init_attr->qp_type, cmd.driver_qp_type);
+ err = -EOPNOTSUPP;
+ goto err_free_qp;
+ }
+
+ ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
+ init_attr->qp_type, cmd.driver_qp_type);
+ create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
+ create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
+ create_qp_params.sq_depth = init_attr->cap.max_send_wr;
+ create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
+
+ create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
+ create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
+ qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
+ if (qp->rq_size) {
+ qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
+ qp->rq_size, DMA_TO_DEVICE);
+ if (!qp->rq_cpu_addr) {
+ err = -ENOMEM;
+ goto err_free_qp;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
+ qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
+ create_qp_params.rq_base_addr = qp->rq_dma_addr;
+ }
+
+ err = efa_com_create_qp(&dev->edev, &create_qp_params,
+ &create_qp_resp);
+ if (err)
+ goto err_free_mapped;
+
+ resp.sq_db_offset = create_qp_resp.sq_db_offset;
+ resp.rq_db_offset = create_qp_resp.rq_db_offset;
+ resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
+ resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
+ resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
+
+ err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
+ &resp);
+ if (err)
+ goto err_destroy_qp;
+
+ rq_entry_inserted = true;
+ qp->qp_handle = create_qp_resp.qp_handle;
+ qp->ibqp.qp_num = create_qp_resp.qp_num;
+ qp->ibqp.qp_type = init_attr->qp_type;
+ qp->max_send_wr = init_attr->cap.max_send_wr;
+ qp->max_recv_wr = init_attr->cap.max_recv_wr;
+ qp->max_send_sge = init_attr->cap.max_send_sge;
+ qp->max_recv_sge = init_attr->cap.max_recv_sge;
+ qp->max_inline_data = init_attr->cap.max_inline_data;
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Failed to copy udata for qp[%u]\n",
+ create_qp_resp.qp_num);
+ goto err_destroy_qp;
+ }
+ }
+
+ ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
+
+ return &qp->ibqp;
+
+err_destroy_qp:
+ efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
+err_free_mapped:
+ if (qp->rq_size) {
+ dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
+ DMA_TO_DEVICE);
+ if (!rq_entry_inserted)
+ free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
+ }
+err_free_qp:
+ kfree(qp);
+err_out:
+ atomic64_inc(&dev->stats.sw_stats.create_qp_err);
+ return ERR_PTR(err);
+}
+
+static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+#define EFA_MODIFY_QP_SUPP_MASK \
+ (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
+ IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
+
+ if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
+ qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
+ return -EOPNOTSUPP;
+ }
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
+ qp_attr_mask)) {
+ ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
+ return -EINVAL;
+ }
+
+ if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
+ ibdev_dbg(&dev->ibdev, "Can't change port num\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
+ ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibqp->device);
+ struct efa_com_modify_qp_params params = {};
+ struct efa_qp *qp = to_eqp(ibqp);
+ enum ib_qp_state cur_state;
+ enum ib_qp_state new_state;
+ int err;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ return -EINVAL;
+ }
+
+ cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
+ qp->state;
+ new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
+
+ err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
+ new_state);
+ if (err)
+ return err;
+
+ params.qp_handle = qp->qp_handle;
+
+ if (qp_attr_mask & IB_QP_STATE) {
+ params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
+ BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
+ params.cur_qp_state = qp_attr->cur_qp_state;
+ params.qp_state = qp_attr->qp_state;
+ }
+
+ if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
+ params.modify_mask |=
+ BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
+ params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
+ }
+
+ if (qp_attr_mask & IB_QP_QKEY) {
+ params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
+ params.qkey = qp_attr->qkey;
+ }
+
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
+ params.sq_psn = qp_attr->sq_psn;
+ }
+
+ err = efa_com_modify_qp(&dev->edev, &params);
+ if (err)
+ return err;
+
+ qp->state = new_state;
+
+ return 0;
+}
+
+static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
+{
+ struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
+
+ return efa_com_destroy_cq(&dev->edev, &params);
+}
+
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibcq->device);
+ struct efa_cq *cq = to_ecq(ibcq);
+ int err;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
+ return -EINVAL;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
+ cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
+
+ err = efa_destroy_cq_idx(dev, cq->cq_idx);
+ if (err)
+ return err;
+
+ dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
+
+ kfree(cq);
+ return 0;
+}
+
+static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
+ struct efa_ibv_create_cq_resp *resp)
+{
+ resp->q_mmap_size = cq->size;
+ resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
+ virt_to_phys(cq->cpu_addr),
+ cq->size, EFA_MMAP_DMA_PAGE);
+ if (resp->q_mmap_key == EFA_MMAP_INVALID)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *ibucontext,
+ struct ib_udata *udata)
+{
+ struct efa_ibv_create_cq_resp resp = {};
+ struct efa_com_create_cq_params params;
+ struct efa_com_create_cq_result result;
+ struct efa_dev *dev = to_edev(ibdev);
+ struct efa_ibv_create_cq cmd = {};
+ bool cq_entry_inserted = false;
+ struct efa_cq *cq;
+ int err;
+
+ ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
+
+ if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
+ ibdev_dbg(ibdev,
+ "cq: requested entries[%u] non-positive or greater than max[%u]\n",
+ entries, dev->dev_attr.max_cq_depth);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, no input udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (udata->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(udata, sizeof(cmd),
+ udata->inlen - sizeof(cmd))) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
+ goto err_out;
+ }
+
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (!cmd.cq_entry_size) {
+ ibdev_dbg(ibdev,
+ "Invalid entry size [%u]\n", cmd.cq_entry_size);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
+ ibdev_dbg(ibdev,
+ "Invalid number of sub cqs[%u] expected[%u]\n",
+ cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ cq->ucontext = to_eucontext(ibucontext);
+ cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
+ cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
+ if (!cq->cpu_addr) {
+ err = -ENOMEM;
+ goto err_free_cq;
+ }
+
+ params.uarn = cq->ucontext->uarn;
+ params.cq_depth = entries;
+ params.dma_addr = cq->dma_addr;
+ params.entry_size_in_bytes = cmd.cq_entry_size;
+ params.num_sub_cqs = cmd.num_sub_cqs;
+ err = efa_com_create_cq(&dev->edev, &params, &result);
+ if (err)
+ goto err_free_mapped;
+
+ resp.cq_idx = result.cq_idx;
+ cq->cq_idx = result.cq_idx;
+ cq->ibcq.cqe = result.actual_depth;
+ WARN_ON_ONCE(entries != result.actual_depth);
+
+ err = cq_mmap_entries_setup(dev, cq, &resp);
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Could not setup cq[%u] mmap entries\n", cq->cq_idx);
+ goto err_destroy_cq;
+ }
+
+ cq_entry_inserted = true;
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to copy udata for create_cq\n");
+ goto err_destroy_cq;
+ }
+ }
+
+ ibdev_dbg(ibdev,
+ "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
+ cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
+
+ return &cq->ibcq;
+
+err_destroy_cq:
+ efa_destroy_cq_idx(dev, cq->cq_idx);
+err_free_mapped:
+ dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
+ if (!cq_entry_inserted)
+ free_pages_exact(cq->cpu_addr, cq->size);
+err_free_cq:
+ kfree(cq);
+err_out:
+ atomic64_inc(&dev->stats.sw_stats.create_cq_err);
+ return ERR_PTR(err);
+}
+
+struct ib_cq *efa_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata,
+ struct efa_ucontext,
+ ibucontext);
+
+ return do_create_cq(ibdev, attr->cqe, attr->comp_vector,
+ &ucontext->ibucontext, udata);
+}
+
+static int umem_to_page_list(struct efa_dev *dev,
+ struct ib_umem *umem,
+ u64 *page_list,
+ u32 hp_cnt,
+ u8 hp_shift)
+{
+ u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
+ struct sg_dma_page_iter sg_iter;
+ unsigned int page_idx = 0;
+ unsigned int hp_idx = 0;
+
+ ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
+ hp_cnt, pages_in_hp);
+
+ for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+ if (page_idx % pages_in_hp == 0) {
+ page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter);
+ hp_idx++;
+ }
+
+ page_idx++;
+ }
+
+ return 0;
+}
+
+static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
+{
+ struct scatterlist *sglist;
+ struct page *pg;
+ int i;
+
+ sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
+ if (!sglist)
+ return NULL;
+ sg_init_table(sglist, page_cnt);
+ for (i = 0; i < page_cnt; i++) {
+ pg = vmalloc_to_page(buf);
+ if (!pg)
+ goto err;
+ sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
+ buf += PAGE_SIZE / sizeof(*buf);
+ }
+ return sglist;
+
+err:
+ kfree(sglist);
+ return NULL;
+}
+
+/*
+ * create a chunk list of physical pages dma addresses from the supplied
+ * scatter gather list
+ */
+static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx;
+ struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
+ int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
+ struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
+ int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
+ struct efa_com_ctrl_buff_info *ctrl_buf;
+ u64 *cur_chunk_buf, *prev_chunk_buf;
+ struct scatterlist *sg;
+ dma_addr_t dma_addr;
+ int i;
+
+ /* allocate a chunk list that consists of 4KB chunks */
+ chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
+
+ chunk_list->size = chunk_list_size;
+ chunk_list->chunks = kcalloc(chunk_list_size,
+ sizeof(*chunk_list->chunks),
+ GFP_KERNEL);
+ if (!chunk_list->chunks)
+ return -ENOMEM;
+
+ ibdev_dbg(&dev->ibdev,
+ "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
+ page_cnt);
+
+ /* allocate chunk buffers: */
+ for (i = 0; i < chunk_list_size; i++) {
+ chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
+ if (!chunk_list->chunks[i].buf)
+ goto chunk_list_dealloc;
+
+ chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
+ }
+ chunk_list->chunks[chunk_list_size - 1].length =
+ ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
+ EFA_CHUNK_PTR_SIZE;
+
+ /* fill the dma addresses of sg list pages to chunks: */
+ chunk_idx = 0;
+ payload_idx = 0;
+ cur_chunk_buf = chunk_list->chunks[0].buf;
+ for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) {
+ payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT;
+ for (i = 0; i < payloads_in_sg; i++) {
+ cur_chunk_buf[payload_idx++] =
+ (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) +
+ (EFA_CHUNK_PAYLOAD_SIZE * i);
+
+ if (payload_idx == EFA_PTRS_PER_CHUNK) {
+ chunk_idx++;
+ cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
+ payload_idx = 0;
+ }
+ }
+ }
+
+ /* map chunks to dma and fill chunks next ptrs */
+ for (i = chunk_list_size - 1; i >= 0; i--) {
+ dma_addr = dma_map_single(&dev->pdev->dev,
+ chunk_list->chunks[i].buf,
+ chunk_list->chunks[i].length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
+ ibdev_err(&dev->ibdev,
+ "chunk[%u] dma_map_failed\n", i);
+ goto chunk_list_unmap;
+ }
+
+ chunk_list->chunks[i].dma_addr = dma_addr;
+ ibdev_dbg(&dev->ibdev,
+ "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
+
+ if (!i)
+ break;
+
+ prev_chunk_buf = chunk_list->chunks[i - 1].buf;
+
+ ctrl_buf = (struct efa_com_ctrl_buff_info *)
+ &prev_chunk_buf[EFA_PTRS_PER_CHUNK];
+ ctrl_buf->length = chunk_list->chunks[i].length;
+
+ efa_com_set_dma_addr(dma_addr,
+ &ctrl_buf->address.mem_addr_high,
+ &ctrl_buf->address.mem_addr_low);
+ }
+
+ return 0;
+
+chunk_list_unmap:
+ for (; i < chunk_list_size; i++) {
+ dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
+ chunk_list->chunks[i].length, DMA_TO_DEVICE);
+ }
+chunk_list_dealloc:
+ for (i = 0; i < chunk_list_size; i++)
+ kfree(chunk_list->chunks[i].buf);
+
+ kfree(chunk_list->chunks);
+ return -ENOMEM;
+}
+
+static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
+ int i;
+
+ for (i = 0; i < chunk_list->size; i++) {
+ dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
+ chunk_list->chunks[i].length, DMA_TO_DEVICE);
+ kfree(chunk_list->chunks[i].buf);
+ }
+
+ kfree(chunk_list->chunks);
+}
+
+/* initialize pbl continuous mode: map pbl buffer to a dma address. */
+static int pbl_continuous_initialize(struct efa_dev *dev,
+ struct pbl_context *pbl)
+{
+ dma_addr_t dma_addr;
+
+ dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
+ pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
+ ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
+ return -ENOMEM;
+ }
+
+ pbl->phys.continuous.dma_addr = dma_addr;
+ ibdev_dbg(&dev->ibdev,
+ "pbl continuous - dma_addr = %pad, size[%u]\n",
+ &dma_addr, pbl->pbl_buf_size_in_bytes);
+
+ return 0;
+}
+
+/*
+ * initialize pbl indirect mode:
+ * create a chunk list out of the dma addresses of the physical pages of
+ * pbl buffer.
+ */
+static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
+ struct scatterlist *sgl;
+ int sg_dma_cnt, err;
+
+ BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
+ sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
+ if (!sgl)
+ return -ENOMEM;
+
+ sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
+ if (!sg_dma_cnt) {
+ err = -EINVAL;
+ goto err_map;
+ }
+
+ pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
+ pbl->phys.indirect.sgl = sgl;
+ pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
+ err = pbl_chunk_list_create(dev, pbl);
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "chunk_list creation failed[%d]\n", err);
+ goto err_chunk;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "pbl indirect - size[%u], chunks[%u]\n",
+ pbl->pbl_buf_size_in_bytes,
+ pbl->phys.indirect.chunk_list.size);
+
+ return 0;
+
+err_chunk:
+ dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
+err_map:
+ kfree(sgl);
+ return err;
+}
+
+static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ pbl_chunk_list_destroy(dev, pbl);
+ dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
+ pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
+ kfree(pbl->phys.indirect.sgl);
+}
+
+/* create a page buffer list from a mapped user memory region */
+static int pbl_create(struct efa_dev *dev,
+ struct pbl_context *pbl,
+ struct ib_umem *umem,
+ int hp_cnt,
+ u8 hp_shift)
+{
+ int err;
+
+ pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
+ pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes,
+ GFP_KERNEL | __GFP_NOWARN);
+ if (pbl->pbl_buf) {
+ pbl->physically_continuous = 1;
+ err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
+ hp_shift);
+ if (err)
+ goto err_continuous;
+ err = pbl_continuous_initialize(dev, pbl);
+ if (err)
+ goto err_continuous;
+ } else {
+ pbl->physically_continuous = 0;
+ pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes);
+ if (!pbl->pbl_buf)
+ return -ENOMEM;
+
+ err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
+ hp_shift);
+ if (err)
+ goto err_indirect;
+ err = pbl_indirect_initialize(dev, pbl);
+ if (err)
+ goto err_indirect;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "user_pbl_created: user_pages[%u], continuous[%u]\n",
+ hp_cnt, pbl->physically_continuous);
+
+ return 0;
+
+err_continuous:
+ kfree(pbl->pbl_buf);
+ return err;
+err_indirect:
+ vfree(pbl->pbl_buf);
+ return err;
+}
+
+static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ if (pbl->physically_continuous) {
+ dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
+ pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
+ kfree(pbl->pbl_buf);
+ } else {
+ pbl_indirect_terminate(dev, pbl);
+ vfree(pbl->pbl_buf);
+ }
+}
+
+static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
+ struct efa_com_reg_mr_params *params)
+{
+ int err;
+
+ params->inline_pbl = 1;
+ err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
+ params->page_num, params->page_shift);
+ if (err)
+ return err;
+
+ ibdev_dbg(&dev->ibdev,
+ "inline_pbl_array - pages[%u]\n", params->page_num);
+
+ return 0;
+}
+
+static int efa_create_pbl(struct efa_dev *dev,
+ struct pbl_context *pbl,
+ struct efa_mr *mr,
+ struct efa_com_reg_mr_params *params)
+{
+ int err;
+
+ err = pbl_create(dev, pbl, mr->umem, params->page_num,
+ params->page_shift);
+ if (err) {
+ ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
+ return err;
+ }
+
+ params->inline_pbl = 0;
+ params->indirect = !pbl->physically_continuous;
+ if (pbl->physically_continuous) {
+ params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
+
+ efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
+ &params->pbl.pbl.address.mem_addr_high,
+ &params->pbl.pbl.address.mem_addr_low);
+ } else {
+ params->pbl.pbl.length =
+ pbl->phys.indirect.chunk_list.chunks[0].length;
+
+ efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
+ &params->pbl.pbl.address.mem_addr_high,
+ &params->pbl.pbl.address.mem_addr_low);
+ }
+
+ return 0;
+}
+
+static void efa_cont_pages(struct ib_umem *umem, u64 addr,
+ unsigned long max_page_shift,
+ int *count, u8 *shift, u32 *ncont)
+{
+ struct scatterlist *sg;
+ u64 base = ~0, p = 0;
+ unsigned long tmp;
+ unsigned long m;
+ u64 len, pfn;
+ int i = 0;
+ int entry;
+
+ addr = addr >> PAGE_SHIFT;
+ tmp = (unsigned long)addr;
+ m = find_first_bit(&tmp, BITS_PER_LONG);
+ if (max_page_shift)
+ m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE);
+ pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+ if (base + p != pfn) {
+ /*
+ * If either the offset or the new
+ * base are unaligned update m
+ */
+ tmp = (unsigned long)(pfn | p);
+ if (!IS_ALIGNED(tmp, 1 << m))
+ m = find_first_bit(&tmp, BITS_PER_LONG);
+
+ base = pfn;
+ p = 0;
+ }
+
+ p += len;
+ i += len;
+ }
+
+ if (i) {
+ m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
+ *ncont = DIV_ROUND_UP(i, (1 << m));
+ } else {
+ m = 0;
+ *ncont = 0;
+ }
+
+ *shift = PAGE_SHIFT + m;
+ *count = i;
+}
+
+struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_com_reg_mr_params params = {};
+ struct efa_com_reg_mr_result result = {};
+ unsigned long max_page_shift;
+ struct pbl_context pbl;
+ struct efa_mr *mr;
+ int inline_size;
+ int npages;
+ int err;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported access flags[%#x], supported[%#x]\n",
+ access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ ibdev_dbg(&dev->ibdev,
+ "Failed to pin and map user space memory[%d]\n", err);
+ goto err_free;
+ }
+
+ params.pd = to_epd(ibpd)->pdn;
+ params.iova = virt_addr;
+ params.mr_length_in_bytes = length;
+ params.permissions = access_flags & 0x1;
+ max_page_shift = fls64(dev->dev_attr.page_size_cap);
+
+ efa_cont_pages(mr->umem, start, max_page_shift, &npages,
+ &params.page_shift, &params.page_num);
+ ibdev_dbg(&dev->ibdev,
+ "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n",
+ start, length, npages, params.page_shift, params.page_num);
+
+ inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
+ if (params.page_num <= inline_size) {
+ err = efa_create_inline_pbl(dev, mr, &params);
+ if (err)
+ goto err_unmap;
+
+ err = efa_com_register_mr(&dev->edev, &params, &result);
+ if (err)
+ goto err_unmap;
+ } else {
+ err = efa_create_pbl(dev, &pbl, mr, &params);
+ if (err)
+ goto err_unmap;
+
+ err = efa_com_register_mr(&dev->edev, &params, &result);
+ pbl_destroy(dev, &pbl);
+
+ if (err)
+ goto err_unmap;
+ }
+
+ mr->ibmr.lkey = result.l_key;
+ mr->ibmr.rkey = result.r_key;
+ mr->ibmr.length = length;
+ ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
+
+ return &mr->ibmr;
+
+err_unmap:
+ ib_umem_release(mr->umem);
+err_free:
+ kfree(mr);
+err_out:
+ atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
+ return ERR_PTR(err);
+}
+
+int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibmr->device);
+ struct efa_com_dereg_mr_params params;
+ struct efa_mr *mr = to_emr(ibmr);
+ int err;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
+ return -EINVAL;
+ }
+
+ ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
+
+ if (mr->umem) {
+ params.l_key = mr->ibmr.lkey;
+ err = efa_com_dereg_mr(&dev->edev, &params);
+ if (err)
+ return err;
+ ib_umem_release(mr->umem);
+ }
+
+ kfree(mr);
+
+ return 0;
+}
+
+int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err) {
+ ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
+ return err;
+ }
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
+{
+ struct efa_com_dealloc_uar_params params = {
+ .uarn = uarn,
+ };
+
+ return efa_com_dealloc_uar(&dev->edev, &params);
+}
+
+int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
+{
+ struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ struct efa_ibv_alloc_ucontext_resp resp = {};
+ struct efa_com_alloc_uar_result result;
+ int err;
+
+ /*
+ * it's fine if the driver does not know all request fields,
+ * we will ack input fields in our response.
+ */
+
+ err = efa_com_alloc_uar(&dev->edev, &result);
+ if (err)
+ goto err_out;
+
+ ucontext->uarn = result.uarn;
+ xa_init(&ucontext->mmap_xa);
+
+ resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
+ resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
+ resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
+ resp.inline_buf_size = dev->dev_attr.inline_buf_size;
+ resp.max_llq_size = dev->dev_attr.max_llq_size;
+
+ if (udata && udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err)
+ goto err_dealloc_uar;
+ }
+
+ return 0;
+
+err_dealloc_uar:
+ efa_dealloc_uar(dev, result.uarn);
+err_out:
+ atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
+ return err;
+}
+
+void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
+{
+ struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+ struct efa_dev *dev = to_edev(ibucontext->device);
+
+ mmap_entries_remove_free(dev, ucontext);
+ efa_dealloc_uar(dev, ucontext->uarn);
+}
+
+static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
+ struct vm_area_struct *vma, u64 key, u64 length)
+{
+ struct efa_mmap_entry *entry;
+ unsigned long va;
+ u64 pfn;
+ int err;
+
+ entry = mmap_entry_get(dev, ucontext, key, length);
+ if (!entry) {
+ ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
+ key);
+ return -EINVAL;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
+ entry->address, length, entry->mmap_flag);
+
+ pfn = entry->address >> PAGE_SHIFT;
+ switch (entry->mmap_flag) {
+ case EFA_MMAP_IO_NC:
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
+ pgprot_noncached(vma->vm_page_prot));
+ break;
+ case EFA_MMAP_IO_WC:
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
+ pgprot_writecombine(vma->vm_page_prot));
+ break;
+ case EFA_MMAP_DMA_PAGE:
+ for (va = vma->vm_start; va < vma->vm_end;
+ va += PAGE_SIZE, pfn++) {
+ err = vm_insert_page(vma, va, pfn_to_page(pfn));
+ if (err)
+ break;
+ }
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (err)
+ ibdev_dbg(
+ &dev->ibdev,
+ "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
+ entry->address, length, entry->mmap_flag, err);
+
+ return err;
+}
+
+int efa_mmap(struct ib_ucontext *ibucontext,
+ struct vm_area_struct *vma)
+{
+ struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ u64 length = vma->vm_end - vma->vm_start;
+ u64 key = vma->vm_pgoff << PAGE_SHIFT;
+
+ ibdev_dbg(&dev->ibdev,
+ "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
+ vma->vm_start, vma->vm_end, length, key);
+
+ if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
+ ibdev_dbg(&dev->ibdev,
+ "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
+ length, PAGE_SIZE, vma->vm_flags);
+ return -EINVAL;
+ }
+
+ if (vma->vm_flags & VM_EXEC) {
+ ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
+ return -EPERM;
+ }
+ vma->vm_flags &= ~VM_MAYEXEC;
+
+ return __efa_mmap(dev, ucontext, vma, key, length);
+}
+
+static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
+{
+ struct efa_com_destroy_ah_params params = {
+ .ah = ah->ah,
+ .pdn = to_epd(ah->ibah.pd)->pdn,
+ };
+
+ return efa_com_destroy_ah(&dev->edev, &params);
+}
+
+int efa_create_ah(struct ib_ah *ibah,
+ struct rdma_ah_attr *ah_attr,
+ u32 flags,
+ struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibah->device);
+ struct efa_com_create_ah_params params = {};
+ struct efa_ibv_create_ah_resp resp = {};
+ struct efa_com_create_ah_result result;
+ struct efa_ah *ah = to_eah(ibah);
+ int err;
+
+ if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
+ ibdev_dbg(&dev->ibdev,
+ "Create address handle is not supported in atomic context\n");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
+ sizeof(params.dest_addr));
+ params.pdn = to_epd(ibah->pd)->pdn;
+ err = efa_com_create_ah(&dev->edev, &params, &result);
+ if (err)
+ goto err_out;
+
+ memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
+ ah->ah = result.ah;
+
+ resp.efa_address_handle = result.ah;
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Failed to copy udata for create_ah response\n");
+ goto err_destroy_ah;
+ }
+ }
+ ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
+
+ return 0;
+
+err_destroy_ah:
+ efa_ah_destroy(dev, ah);
+err_out:
+ atomic64_inc(&dev->stats.sw_stats.create_ah_err);
+ return err;
+}
+
+void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct efa_dev *dev = to_edev(ibah->pd->device);
+ struct efa_ah *ah = to_eah(ibah);
+
+ ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
+
+ if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
+ ibdev_dbg(&dev->ibdev,
+ "Destroy address handle is not supported in atomic context\n");
+ return;
+ }
+
+ efa_ah_destroy(dev, ah);
+}
+
+enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_UNSPECIFIED;
+}
+