summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/Kconfig4
-rw-r--r--drivers/acpi/riscv/Kconfig7
-rw-r--r--drivers/acpi/riscv/Makefile1
-rw-r--r--drivers/acpi/riscv/init.c2
-rw-r--r--drivers/acpi/riscv/init.h1
-rw-r--r--drivers/acpi/riscv/rimt.c520
-rw-r--r--drivers/acpi/scan.c4
-rw-r--r--drivers/crypto/ccp/sev-dev.c10
-rw-r--r--drivers/fwctl/mlx5/main.c9
-rw-r--r--drivers/fwctl/pds/main.c18
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/addr.c83
-rw-r--r--drivers/infiniband/core/agent.c3
-rw-r--r--drivers/infiniband/core/cm.c4
-rw-r--r--drivers/infiniband/core/cma.c136
-rw-r--r--drivers/infiniband/core/cma_priv.h4
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/core/sa_query.c283
-rw-r--r--drivers/infiniband/core/ucma.c120
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h19
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c37
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c109
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h26
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c156
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h10
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c378
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c13
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c10
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c38
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h21
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c98
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h44
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c5
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c18
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c6
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c110
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h4
-rw-r--r--drivers/infiniband/hw/hfi1/device.c4
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c2
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c8
-rw-r--r--drivers/infiniband/hw/ionic/Kconfig15
-rw-r--r--drivers/infiniband/hw/ionic/Makefile9
-rw-r--r--drivers/infiniband/hw/ionic/ionic_admin.c1229
-rw-r--r--drivers/infiniband/hw/ionic/ionic_controlpath.c2679
-rw-r--r--drivers/infiniband/hw/ionic/ionic_datapath.c1399
-rw-r--r--drivers/infiniband/hw/ionic/ionic_fw.h1029
-rw-r--r--drivers/infiniband/hw/ionic/ionic_hw_stats.c484
-rw-r--r--drivers/infiniband/hw/ionic/ionic_ibdev.c440
-rw-r--r--drivers/infiniband/hw/ionic/ionic_ibdev.h517
-rw-r--r--drivers/infiniband/hw/ionic/ionic_lif_cfg.c111
-rw-r--r--drivers/infiniband/hw/ionic/ionic_lif_cfg.h66
-rw-r--r--drivers/infiniband/hw/ionic/ionic_pgtbl.c143
-rw-r--r--drivers/infiniband/hw/ionic/ionic_queue.c52
-rw-r--r--drivers/infiniband/hw/ionic/ionic_queue.h234
-rw-r--r--drivers/infiniband/hw/ionic/ionic_res.h154
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig7
-rw-r--r--drivers/infiniband/hw/irdma/Makefile4
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c1440
-rw-r--r--drivers/infiniband/hw/irdma/defs.h264
-rw-r--r--drivers/infiniband/hw/irdma/hmc.c18
-rw-r--r--drivers/infiniband/hw/irdma/hmc.h19
-rw-r--r--drivers/infiniband/hw/irdma/hw.c363
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c2
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.h2
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_if.c3
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c3
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.h5
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_if.c343
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_hw.c170
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_hw.h32
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_if.c232
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h22
-rw-r--r--drivers/infiniband/hw/irdma/main.c371
-rw-r--r--drivers/infiniband/hw/irdma/main.h35
-rw-r--r--drivers/infiniband/hw/irdma/pble.c20
-rw-r--r--drivers/infiniband/hw/irdma/protos.h1
-rw-r--r--drivers/infiniband/hw/irdma/puda.h4
-rw-r--r--drivers/infiniband/hw/irdma/type.h221
-rw-r--r--drivers/infiniband/hw/irdma/uda_d.h5
-rw-r--r--drivers/infiniband/hw/irdma/uk.c303
-rw-r--r--drivers/infiniband/hw/irdma/user.h267
-rw-r--r--drivers/infiniband/hw/irdma/utils.c112
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c834
-rw-r--r--drivers/infiniband/hw/irdma/verbs.h50
-rw-r--r--drivers/infiniband/hw/irdma/virtchnl.c618
-rw-r--r--drivers/infiniband/hw/irdma/virtchnl.h176
-rw-r--r--drivers/infiniband/hw/mana/cq.c26
-rw-r--r--drivers/infiniband/hw/mana/device.c3
-rw-r--r--drivers/infiniband/hw/mana/main.c5
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h14
-rw-r--r--drivers/infiniband/hw/mana/mr.c6
-rw-r--r--drivers/infiniband/hw/mana/qp.c9
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c8
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c3
-rw-r--r--drivers/infiniband/hw/mlx5/data_direct.c2
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c15
-rw-r--r--drivers/infiniband/hw/mlx5/main.c113
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h7
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c11
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c8
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c25
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c21
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c16
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h5
-rw-r--r--drivers/iommu/amd/init.c284
-rw-r--r--drivers/iommu/amd/iommu.c5
-rw-r--r--drivers/iommu/apple-dart.c55
-rw-r--r--drivers/iommu/dma-iommu.c61
-rw-r--r--drivers/iommu/intel/debugfs.c29
-rw-r--r--drivers/iommu/intel/iommu.c2
-rw-r--r--drivers/iommu/intel/iommu.h7
-rw-r--r--drivers/iommu/intel/perf.c10
-rw-r--r--drivers/iommu/intel/perf.h5
-rw-r--r--drivers/iommu/intel/prq.c7
-rw-r--r--drivers/iommu/io-pgtable-dart.c139
-rw-r--r--drivers/iommu/iommu-priv.h2
-rw-r--r--drivers/iommu/iommu.c26
-rw-r--r--drivers/iommu/iommufd/selftest.c2
-rw-r--r--drivers/iommu/omap-iommu.c2
-rw-r--r--drivers/iommu/riscv/iommu-platform.c17
-rw-r--r--drivers/iommu/riscv/iommu.c10
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/dm-bufio.c10
-rw-r--r--drivers/md/dm-cache-policy-smq.c2
-rw-r--r--drivers/md/dm-core.h2
-rw-r--r--drivers/md/dm-ima.c70
-rw-r--r--drivers/md/dm-integrity.c359
-rw-r--r--drivers/md/dm-log-writes.c2
-rw-r--r--drivers/md/dm-pcache/Kconfig17
-rw-r--r--drivers/md/dm-pcache/Makefile3
-rw-r--r--drivers/md/dm-pcache/backing_dev.c374
-rw-r--r--drivers/md/dm-pcache/backing_dev.h127
-rw-r--r--drivers/md/dm-pcache/cache.c445
-rw-r--r--drivers/md/dm-pcache/cache.h635
-rw-r--r--drivers/md/dm-pcache/cache_dev.c303
-rw-r--r--drivers/md/dm-pcache/cache_dev.h70
-rw-r--r--drivers/md/dm-pcache/cache_gc.c170
-rw-r--r--drivers/md/dm-pcache/cache_key.c888
-rw-r--r--drivers/md/dm-pcache/cache_req.c836
-rw-r--r--drivers/md/dm-pcache/cache_segment.c305
-rw-r--r--drivers/md/dm-pcache/cache_writeback.c261
-rw-r--r--drivers/md/dm-pcache/dm_pcache.c497
-rw-r--r--drivers/md/dm-pcache/dm_pcache.h67
-rw-r--r--drivers/md/dm-pcache/pcache_internal.h117
-rw-r--r--drivers/md/dm-pcache/segment.c61
-rw-r--r--drivers/md/dm-pcache/segment.h74
-rw-r--r--drivers/md/dm-raid.c13
-rw-r--r--drivers/md/dm-region-hash.c2
-rw-r--r--drivers/md/dm-switch.c4
-rw-r--r--drivers/md/dm-target.c3
-rw-r--r--drivers/md/dm-thin.c4
-rw-r--r--drivers/md/dm-vdo/data-vio.c17
-rw-r--r--drivers/md/dm-vdo/indexer/volume-index.c4
-rw-r--r--drivers/md/dm.c45
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c6
-rw-r--r--drivers/net/ethernet/pensando/Kconfig1
-rw-r--r--drivers/net/ethernet/pensando/ionic/Makefile2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic.h7
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_api.h131
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_aux.c102
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_aux.h10
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c7
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c270
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h28
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h118
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c47
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h3
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c4
-rw-r--r--drivers/scsi/aic94xx/aic94xx_task.c1
-rw-r--r--drivers/scsi/bfa/bfa_core.c1
-rw-r--r--drivers/scsi/csiostor/csio_wr.c4
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c2
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v2_hw.c6
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c6
-rw-r--r--drivers/scsi/hpsa.c53
-rw-r--r--drivers/scsi/ipr.c8
-rw-r--r--drivers/scsi/isci/remote_device.c2
-rw-r--r--drivers/scsi/libfc/fc_encode.h2
-rw-r--r--drivers/scsi/libsas/sas_expander.c5
-rw-r--r--drivers/scsi/lpfc/lpfc.h52
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c632
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.h5
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c23
-rw-r--r--drivers/scsi/lpfc/lpfc_hw.h3
-rw-r--r--drivers/scsi/lpfc/lpfc_hw4.h6
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c12
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c25
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c8
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c14
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c21
-rw-r--r--drivers/scsi/lpfc/lpfc_version.h2
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h38
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_pci.h2
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_sas.h1
-rw-r--r--drivers/scsi/mpi3mr/mpi/mpi30_transport.h2
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr.h8
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_fw.c13
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_os.c28
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_transport.c11
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c8
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.h4
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_transport.c11
-rw-r--r--drivers/scsi/mvsas/mv_sas.c2
-rw-r--r--drivers/scsi/myrs.c8
-rw-r--r--drivers/scsi/pm8001/pm8001_ctl.c24
-rw-r--r--drivers/scsi/pm8001/pm8001_hwi.c11
-rw-r--r--drivers/scsi/pm8001/pm8001_hwi.h4
-rw-r--r--drivers/scsi/pm8001/pm8001_init.c1
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c34
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.h5
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.c10
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.h4
-rw-r--r--drivers/scsi/qla2xxx/qla_bsg.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h10
-rw-r--r--drivers/scsi/qla2xxx/qla_edif.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c17
-rw-r--r--drivers/scsi/qla2xxx/qla_nvme.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c13
-rw-r--r--drivers/scsi/scsi_debug.c17
-rw-r--r--drivers/scsi/sd.c58
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c17
-rw-r--r--drivers/scsi/storvsc_drv.c4
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c6
-rw-r--r--drivers/target/iscsi/iscsi_target_tmr.c3
-rw-r--r--drivers/ufs/core/ufs-mcq.c11
-rw-r--r--drivers/ufs/core/ufs-sysfs.c2
-rw-r--r--drivers/ufs/core/ufs_trace.h1
-rw-r--r--drivers/ufs/core/ufs_trace_types.h24
-rw-r--r--drivers/ufs/core/ufshcd.c60
-rw-r--r--drivers/ufs/host/ufs-exynos.c10
-rw-r--r--drivers/ufs/host/ufs-mediatek.c352
-rw-r--r--drivers/ufs/host/ufs-mediatek.h1
-rw-r--r--drivers/ufs/host/ufs-qcom.c226
-rw-r--r--drivers/ufs/host/ufs-qcom.h28
-rw-r--r--drivers/ufs/host/ufshcd-pltfrm.c33
-rw-r--r--drivers/ufs/host/ufshcd-pltfrm.h1
-rw-r--r--drivers/virtio/virtio_ring.c4
-rw-r--r--drivers/xen/swiotlb-xen.c21
249 files changed, 23602 insertions, 2814 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index b594780a57d7..2cdbd08b30e4 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -547,6 +547,10 @@ if ARM64
source "drivers/acpi/arm64/Kconfig"
endif
+if RISCV
+source "drivers/acpi/riscv/Kconfig"
+endif
+
config ACPI_PPTT
bool
diff --git a/drivers/acpi/riscv/Kconfig b/drivers/acpi/riscv/Kconfig
new file mode 100644
index 000000000000..046296a18d00
--- /dev/null
+++ b/drivers/acpi/riscv/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ACPI Configuration for RISC-V
+#
+
+config ACPI_RIMT
+ bool
diff --git a/drivers/acpi/riscv/Makefile b/drivers/acpi/riscv/Makefile
index a96fdf1e2cb8..1284a076fa88 100644
--- a/drivers/acpi/riscv/Makefile
+++ b/drivers/acpi/riscv/Makefile
@@ -2,3 +2,4 @@
obj-y += rhct.o init.o irq.o
obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o
obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o
+obj-$(CONFIG_ACPI_RIMT) += rimt.o
diff --git a/drivers/acpi/riscv/init.c b/drivers/acpi/riscv/init.c
index 673e4d5dd752..7c00f7995e86 100644
--- a/drivers/acpi/riscv/init.c
+++ b/drivers/acpi/riscv/init.c
@@ -10,4 +10,6 @@
void __init acpi_arch_init(void)
{
riscv_acpi_init_gsi_mapping();
+ if (IS_ENABLED(CONFIG_ACPI_RIMT))
+ riscv_acpi_rimt_init();
}
diff --git a/drivers/acpi/riscv/init.h b/drivers/acpi/riscv/init.h
index 0b9a07e4031f..1680aa2aaf23 100644
--- a/drivers/acpi/riscv/init.h
+++ b/drivers/acpi/riscv/init.h
@@ -2,3 +2,4 @@
#include <linux/init.h>
void __init riscv_acpi_init_gsi_mapping(void);
+void __init riscv_acpi_rimt_init(void);
diff --git a/drivers/acpi/riscv/rimt.c b/drivers/acpi/riscv/rimt.c
new file mode 100644
index 000000000000..683fcfe35c31
--- /dev/null
+++ b/drivers/acpi/riscv/rimt.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024-2025, Ventana Micro Systems Inc
+ * Author: Sunil V L <sunilvl@ventanamicro.com>
+ *
+ */
+
+#define pr_fmt(fmt) "ACPI: RIMT: " fmt
+
+#include <linux/acpi.h>
+#include <linux/acpi_rimt.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include "init.h"
+
+struct rimt_fwnode {
+ struct list_head list;
+ struct acpi_rimt_node *rimt_node;
+ struct fwnode_handle *fwnode;
+};
+
+static LIST_HEAD(rimt_fwnode_list);
+static DEFINE_SPINLOCK(rimt_fwnode_lock);
+
+#define RIMT_TYPE_MASK(type) (1 << (type))
+#define RIMT_IOMMU_TYPE BIT(0)
+
+/* Root pointer to the mapped RIMT table */
+static struct acpi_table_header *rimt_table;
+
+/**
+ * rimt_set_fwnode() - Create rimt_fwnode and use it to register
+ * iommu data in the rimt_fwnode_list
+ *
+ * @rimt_node: RIMT table node associated with the IOMMU
+ * @fwnode: fwnode associated with the RIMT node
+ *
+ * Returns: 0 on success
+ * <0 on failure
+ */
+static int rimt_set_fwnode(struct acpi_rimt_node *rimt_node,
+ struct fwnode_handle *fwnode)
+{
+ struct rimt_fwnode *np;
+
+ np = kzalloc(sizeof(*np), GFP_ATOMIC);
+
+ if (WARN_ON(!np))
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&np->list);
+ np->rimt_node = rimt_node;
+ np->fwnode = fwnode;
+
+ spin_lock(&rimt_fwnode_lock);
+ list_add_tail(&np->list, &rimt_fwnode_list);
+ spin_unlock(&rimt_fwnode_lock);
+
+ return 0;
+}
+
+/**
+ * rimt_get_fwnode() - Retrieve fwnode associated with an RIMT node
+ *
+ * @node: RIMT table node to be looked-up
+ *
+ * Returns: fwnode_handle pointer on success, NULL on failure
+ */
+static struct fwnode_handle *rimt_get_fwnode(struct acpi_rimt_node *node)
+{
+ struct fwnode_handle *fwnode = NULL;
+ struct rimt_fwnode *curr;
+
+ spin_lock(&rimt_fwnode_lock);
+ list_for_each_entry(curr, &rimt_fwnode_list, list) {
+ if (curr->rimt_node == node) {
+ fwnode = curr->fwnode;
+ break;
+ }
+ }
+ spin_unlock(&rimt_fwnode_lock);
+
+ return fwnode;
+}
+
+static acpi_status rimt_match_node_callback(struct acpi_rimt_node *node,
+ void *context)
+{
+ acpi_status status = AE_NOT_FOUND;
+ struct device *dev = context;
+
+ if (node->type == ACPI_RIMT_NODE_TYPE_IOMMU) {
+ struct acpi_rimt_iommu *iommu_node = (struct acpi_rimt_iommu *)&node->node_data;
+
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev;
+ u16 bdf;
+
+ pdev = to_pci_dev(dev);
+ bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
+ if ((pci_domain_nr(pdev->bus) == iommu_node->pcie_segment_number) &&
+ bdf == iommu_node->pcie_bdf) {
+ status = AE_OK;
+ } else {
+ status = AE_NOT_FOUND;
+ }
+ } else {
+ struct platform_device *pdev = to_platform_device(dev);
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (res && res->start == iommu_node->base_address)
+ status = AE_OK;
+ else
+ status = AE_NOT_FOUND;
+ }
+ } else if (node->type == ACPI_RIMT_NODE_TYPE_PCIE_ROOT_COMPLEX) {
+ struct acpi_rimt_pcie_rc *pci_rc;
+ struct pci_bus *bus;
+
+ bus = to_pci_bus(dev);
+ pci_rc = (struct acpi_rimt_pcie_rc *)node->node_data;
+
+ /*
+ * It is assumed that PCI segment numbers maps one-to-one
+ * with root complexes. Each segment number can represent only
+ * one root complex.
+ */
+ status = pci_rc->pcie_segment_number == pci_domain_nr(bus) ?
+ AE_OK : AE_NOT_FOUND;
+ } else if (node->type == ACPI_RIMT_NODE_TYPE_PLAT_DEVICE) {
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_rimt_platform_device *ncomp;
+ struct device *plat_dev = dev;
+ struct acpi_device *adev;
+
+ /*
+ * Walk the device tree to find a device with an
+ * ACPI companion; there is no point in scanning
+ * RIMT for a device matching a platform device if
+ * the device does not have an ACPI companion to
+ * start with.
+ */
+ do {
+ adev = ACPI_COMPANION(plat_dev);
+ if (adev)
+ break;
+
+ plat_dev = plat_dev->parent;
+ } while (plat_dev);
+
+ if (!adev)
+ return status;
+
+ status = acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &buf);
+ if (ACPI_FAILURE(status)) {
+ dev_warn(plat_dev, "Can't get device full path name\n");
+ return status;
+ }
+
+ ncomp = (struct acpi_rimt_platform_device *)node->node_data;
+ status = !strcmp(ncomp->device_name, buf.pointer) ?
+ AE_OK : AE_NOT_FOUND;
+ acpi_os_free(buf.pointer);
+ }
+
+ return status;
+}
+
+static struct acpi_rimt_node *rimt_scan_node(enum acpi_rimt_node_type type,
+ void *context)
+{
+ struct acpi_rimt_node *rimt_node, *rimt_end;
+ struct acpi_table_rimt *rimt;
+ int i;
+
+ if (!rimt_table)
+ return NULL;
+
+ /* Get the first RIMT node */
+ rimt = (struct acpi_table_rimt *)rimt_table;
+ rimt_node = ACPI_ADD_PTR(struct acpi_rimt_node, rimt,
+ rimt->node_offset);
+ rimt_end = ACPI_ADD_PTR(struct acpi_rimt_node, rimt_table,
+ rimt_table->length);
+
+ for (i = 0; i < rimt->num_nodes; i++) {
+ if (WARN_TAINT(rimt_node >= rimt_end, TAINT_FIRMWARE_WORKAROUND,
+ "RIMT node pointer overflows, bad table!\n"))
+ return NULL;
+
+ if (rimt_node->type == type &&
+ ACPI_SUCCESS(rimt_match_node_callback(rimt_node, context)))
+ return rimt_node;
+
+ rimt_node = ACPI_ADD_PTR(struct acpi_rimt_node, rimt_node,
+ rimt_node->length);
+ }
+
+ return NULL;
+}
+
+static bool rimt_pcie_rc_supports_ats(struct acpi_rimt_node *node)
+{
+ struct acpi_rimt_pcie_rc *pci_rc;
+
+ pci_rc = (struct acpi_rimt_pcie_rc *)node->node_data;
+ return pci_rc->flags & ACPI_RIMT_PCIE_ATS_SUPPORTED;
+}
+
+static int rimt_iommu_xlate(struct device *dev, struct acpi_rimt_node *node, u32 deviceid)
+{
+ struct fwnode_handle *rimt_fwnode;
+
+ if (!node)
+ return -ENODEV;
+
+ rimt_fwnode = rimt_get_fwnode(node);
+
+ /*
+ * The IOMMU drivers may not be probed yet.
+ * Defer the IOMMU configuration
+ */
+ if (!rimt_fwnode)
+ return -EPROBE_DEFER;
+
+ return acpi_iommu_fwspec_init(dev, deviceid, rimt_fwnode);
+}
+
+struct rimt_pci_alias_info {
+ struct device *dev;
+ struct acpi_rimt_node *node;
+ const struct iommu_ops *ops;
+};
+
+static int rimt_id_map(struct acpi_rimt_id_mapping *map, u8 type, u32 rid_in, u32 *rid_out)
+{
+ if (rid_in < map->source_id_base ||
+ (rid_in > map->source_id_base + map->num_ids))
+ return -ENXIO;
+
+ *rid_out = map->dest_id_base + (rid_in - map->source_id_base);
+ return 0;
+}
+
+static struct acpi_rimt_node *rimt_node_get_id(struct acpi_rimt_node *node,
+ u32 *id_out, int index)
+{
+ struct acpi_rimt_platform_device *plat_node;
+ u32 id_mapping_offset, num_id_mapping;
+ struct acpi_rimt_pcie_rc *pci_node;
+ struct acpi_rimt_id_mapping *map;
+ struct acpi_rimt_node *parent;
+
+ if (node->type == ACPI_RIMT_NODE_TYPE_PCIE_ROOT_COMPLEX) {
+ pci_node = (struct acpi_rimt_pcie_rc *)&node->node_data;
+ id_mapping_offset = pci_node->id_mapping_offset;
+ num_id_mapping = pci_node->num_id_mappings;
+ } else if (node->type == ACPI_RIMT_NODE_TYPE_PLAT_DEVICE) {
+ plat_node = (struct acpi_rimt_platform_device *)&node->node_data;
+ id_mapping_offset = plat_node->id_mapping_offset;
+ num_id_mapping = plat_node->num_id_mappings;
+ } else {
+ return NULL;
+ }
+
+ if (!id_mapping_offset || !num_id_mapping || index >= num_id_mapping)
+ return NULL;
+
+ map = ACPI_ADD_PTR(struct acpi_rimt_id_mapping, node,
+ id_mapping_offset + index * sizeof(*map));
+
+ /* Firmware bug! */
+ if (!map->dest_offset) {
+ pr_err(FW_BUG "[node %p type %d] ID map has NULL parent reference\n",
+ node, node->type);
+ return NULL;
+ }
+
+ parent = ACPI_ADD_PTR(struct acpi_rimt_node, rimt_table, map->dest_offset);
+
+ if (node->type == ACPI_RIMT_NODE_TYPE_PLAT_DEVICE ||
+ node->type == ACPI_RIMT_NODE_TYPE_PCIE_ROOT_COMPLEX) {
+ *id_out = map->dest_id_base;
+ return parent;
+ }
+
+ return NULL;
+}
+
+/*
+ * RISC-V supports IOMMU as a PCI device or a platform device.
+ * When it is a platform device, there should be a namespace device as
+ * well along with RIMT. To create the link between RIMT information and
+ * the platform device, the IOMMU driver should register itself with the
+ * RIMT module. This is true for PCI based IOMMU as well.
+ */
+int rimt_iommu_register(struct device *dev)
+{
+ struct fwnode_handle *rimt_fwnode;
+ struct acpi_rimt_node *node;
+
+ node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_IOMMU, dev);
+ if (!node) {
+ pr_err("Could not find IOMMU node in RIMT\n");
+ return -ENODEV;
+ }
+
+ if (dev_is_pci(dev)) {
+ rimt_fwnode = acpi_alloc_fwnode_static();
+ if (!rimt_fwnode)
+ return -ENOMEM;
+
+ rimt_fwnode->dev = dev;
+ if (!dev->fwnode)
+ dev->fwnode = rimt_fwnode;
+
+ rimt_set_fwnode(node, rimt_fwnode);
+ } else {
+ rimt_set_fwnode(node, dev->fwnode);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_IOMMU_API
+
+static struct acpi_rimt_node *rimt_node_map_id(struct acpi_rimt_node *node,
+ u32 id_in, u32 *id_out,
+ u8 type_mask)
+{
+ struct acpi_rimt_platform_device *plat_node;
+ u32 id_mapping_offset, num_id_mapping;
+ struct acpi_rimt_pcie_rc *pci_node;
+ u32 id = id_in;
+
+ /* Parse the ID mapping tree to find specified node type */
+ while (node) {
+ struct acpi_rimt_id_mapping *map;
+ int i, rc = 0;
+ u32 map_id = id;
+
+ if (RIMT_TYPE_MASK(node->type) & type_mask) {
+ if (id_out)
+ *id_out = id;
+ return node;
+ }
+
+ if (node->type == ACPI_RIMT_NODE_TYPE_PCIE_ROOT_COMPLEX) {
+ pci_node = (struct acpi_rimt_pcie_rc *)&node->node_data;
+ id_mapping_offset = pci_node->id_mapping_offset;
+ num_id_mapping = pci_node->num_id_mappings;
+ } else if (node->type == ACPI_RIMT_NODE_TYPE_PLAT_DEVICE) {
+ plat_node = (struct acpi_rimt_platform_device *)&node->node_data;
+ id_mapping_offset = plat_node->id_mapping_offset;
+ num_id_mapping = plat_node->num_id_mappings;
+ } else {
+ goto fail_map;
+ }
+
+ if (!id_mapping_offset || !num_id_mapping)
+ goto fail_map;
+
+ map = ACPI_ADD_PTR(struct acpi_rimt_id_mapping, node,
+ id_mapping_offset);
+
+ /* Firmware bug! */
+ if (!map->dest_offset) {
+ pr_err(FW_BUG "[node %p type %d] ID map has NULL parent reference\n",
+ node, node->type);
+ goto fail_map;
+ }
+
+ /* Do the ID translation */
+ for (i = 0; i < num_id_mapping; i++, map++) {
+ rc = rimt_id_map(map, node->type, map_id, &id);
+ if (!rc)
+ break;
+ }
+
+ if (i == num_id_mapping)
+ goto fail_map;
+
+ node = ACPI_ADD_PTR(struct acpi_rimt_node, rimt_table,
+ rc ? 0 : map->dest_offset);
+ }
+
+fail_map:
+ /* Map input ID to output ID unchanged on mapping failure */
+ if (id_out)
+ *id_out = id_in;
+
+ return NULL;
+}
+
+static struct acpi_rimt_node *rimt_node_map_platform_id(struct acpi_rimt_node *node, u32 *id_out,
+ u8 type_mask, int index)
+{
+ struct acpi_rimt_node *parent;
+ u32 id;
+
+ parent = rimt_node_get_id(node, &id, index);
+ if (!parent)
+ return NULL;
+
+ if (!(RIMT_TYPE_MASK(parent->type) & type_mask))
+ parent = rimt_node_map_id(parent, id, id_out, type_mask);
+ else
+ if (id_out)
+ *id_out = id;
+
+ return parent;
+}
+
+static int rimt_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct rimt_pci_alias_info *info = data;
+ struct acpi_rimt_node *parent;
+ u32 deviceid;
+
+ parent = rimt_node_map_id(info->node, alias, &deviceid, RIMT_IOMMU_TYPE);
+ return rimt_iommu_xlate(info->dev, parent, deviceid);
+}
+
+static int rimt_plat_iommu_map(struct device *dev, struct acpi_rimt_node *node)
+{
+ struct acpi_rimt_node *parent;
+ int err = -ENODEV, i = 0;
+ u32 deviceid = 0;
+
+ do {
+ parent = rimt_node_map_platform_id(node, &deviceid,
+ RIMT_IOMMU_TYPE,
+ i++);
+
+ if (parent)
+ err = rimt_iommu_xlate(dev, parent, deviceid);
+ } while (parent && !err);
+
+ return err;
+}
+
+static int rimt_plat_iommu_map_id(struct device *dev,
+ struct acpi_rimt_node *node,
+ const u32 *in_id)
+{
+ struct acpi_rimt_node *parent;
+ u32 deviceid;
+
+ parent = rimt_node_map_id(node, *in_id, &deviceid, RIMT_IOMMU_TYPE);
+ if (parent)
+ return rimt_iommu_xlate(dev, parent, deviceid);
+
+ return -ENODEV;
+}
+
+/**
+ * rimt_iommu_configure_id - Set-up IOMMU configuration for a device.
+ *
+ * @dev: device to configure
+ * @id_in: optional input id const value pointer
+ *
+ * Returns: 0 on success, <0 on failure
+ */
+int rimt_iommu_configure_id(struct device *dev, const u32 *id_in)
+{
+ struct acpi_rimt_node *node;
+ int err = -ENODEV;
+
+ if (dev_is_pci(dev)) {
+ struct iommu_fwspec *fwspec;
+ struct pci_bus *bus = to_pci_dev(dev)->bus;
+ struct rimt_pci_alias_info info = { .dev = dev };
+
+ node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_PCIE_ROOT_COMPLEX, &bus->dev);
+ if (!node)
+ return -ENODEV;
+
+ info.node = node;
+ err = pci_for_each_dma_alias(to_pci_dev(dev),
+ rimt_pci_iommu_init, &info);
+
+ fwspec = dev_iommu_fwspec_get(dev);
+ if (fwspec && rimt_pcie_rc_supports_ats(node))
+ fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS;
+ } else {
+ node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_PLAT_DEVICE, dev);
+ if (!node)
+ return -ENODEV;
+
+ err = id_in ? rimt_plat_iommu_map_id(dev, node, id_in) :
+ rimt_plat_iommu_map(dev, node);
+ }
+
+ return err;
+}
+
+#endif
+
+void __init riscv_acpi_rimt_init(void)
+{
+ acpi_status status;
+
+ /* rimt_table will be used at runtime after the rimt init,
+ * so we don't need to call acpi_put_table() to release
+ * the RIMT table mapping.
+ */
+ status = acpi_get_table(ACPI_SIG_RIMT, 0, &rimt_table);
+ if (ACPI_FAILURE(status)) {
+ if (status != AE_NOT_FOUND) {
+ const char *msg = acpi_format_exception(status);
+
+ pr_err("Failed to get table, %s\n", msg);
+ }
+
+ return;
+ }
+}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 880a544d73cd..065abe56f440 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
+#include <linux/acpi_rimt.h>
#include <linux/acpi_viot.h>
#include <linux/iommu.h>
#include <linux/signal.h>
@@ -1631,7 +1632,10 @@ static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in)
err = iort_iommu_configure_id(dev, id_in);
if (err && err != -EPROBE_DEFER)
+ err = rimt_iommu_configure_id(dev, id_in);
+ if (err && err != -EPROBE_DEFER)
err = viot_iommu_configure(dev);
+
mutex_unlock(&iommu_probe_device_lock);
return err;
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 65d6d0af140a..8dff5c2c40fd 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -28,6 +28,7 @@
#include <linux/fs_struct.h>
#include <linux/psp.h>
#include <linux/amd-iommu.h>
+#include <linux/crash_dump.h>
#include <asm/smp.h>
#include <asm/cacheflush.h>
@@ -1526,6 +1527,15 @@ static int _sev_platform_init_locked(struct sev_platform_init_args *args)
if (!psp_master || !psp_master->sev_data)
return -ENODEV;
+ /*
+ * Skip SNP/SEV initialization under a kdump kernel as SEV/SNP
+ * may already be initialized in the previous kernel. Since no
+ * SNP/SEV guests are run under a kdump kernel, there is no
+ * need to initialize SNP or SEV during kdump boot.
+ */
+ if (is_kdump_kernel())
+ return 0;
+
sev = psp_master->sev_data;
if (sev->state == SEV_STATE_INIT)
diff --git a/drivers/fwctl/mlx5/main.c b/drivers/fwctl/mlx5/main.c
index f93aa0cecdb9..3dacccf7855c 100644
--- a/drivers/fwctl/mlx5/main.c
+++ b/drivers/fwctl/mlx5/main.c
@@ -58,6 +58,9 @@ enum {
MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716,
MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER = 0x722,
MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT = 0x728,
+ MLX5_CMD_OP_QUERY_ADJACENT_FUNCTIONS_ID = 0x730,
+ MLX5_CMD_OP_DELEGATE_VHCA_MANAGEMENT = 0x731,
+ MLX5_CMD_OP_QUERY_DELEGATED_VHCA = 0x732,
MLX5_CMD_OP_QUERY_BURST_SIZE = 0x813,
MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS = 0x819,
MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS = 0x820,
@@ -188,6 +191,7 @@ static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope)
* filter commands manually for now.
*/
switch (opcode) {
+ case MLX5_CMD_OP_MODIFY_CONG_STATUS:
case MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT:
case MLX5_CMD_OP_QUERY_ADAPTER:
case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
@@ -196,6 +200,7 @@ static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope)
case MLX5_CMD_OP_QUERY_OTHER_HCA_CAP:
case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
case MLX5_CMD_OPCODE_QUERY_VUID:
+ case MLX5_CMD_OP_DELEGATE_VHCA_MANAGEMENT:
/*
* FW limits SET_HCA_CAP on the tools UID to only the other function
* mode which is used for function pre-configuration
@@ -281,6 +286,8 @@ static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope)
case MLX5_CMD_OP_QUERY_XRQ:
case MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY:
case MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_ADJACENT_FUNCTIONS_ID:
+ case MLX5_CMD_OP_QUERY_DELEGATED_VHCA:
return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
case MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS:
@@ -345,7 +352,7 @@ static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
*/
if (ret && ret != -EREMOTEIO) {
if (rpc_out != rpc_in)
- kfree(rpc_out);
+ kvfree(rpc_out);
return ERR_PTR(ret);
}
return rpc_out;
diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c
index 9b9d1f6b5556..1809853f6353 100644
--- a/drivers/fwctl/pds/main.c
+++ b/drivers/fwctl/pds/main.c
@@ -6,6 +6,7 @@
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/bitfield.h>
+#include <linux/string.h>
#include <uapi/fwctl/fwctl.h>
#include <uapi/fwctl/pds.h>
@@ -366,18 +367,10 @@ static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
return ERR_PTR(err);
if (rpc->in.len > 0) {
- in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
- if (!in_payload) {
- dev_err(dev, "Failed to allocate in_payload\n");
- err = -ENOMEM;
- goto err_out;
- }
-
- if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
- rpc->in.len)) {
+ in_payload = memdup_user(u64_to_user_ptr(rpc->in.payload), rpc->in.len);
+ if (IS_ERR(in_payload)) {
dev_dbg(dev, "Failed to copy in_payload from user\n");
- err = -EFAULT;
- goto err_in_payload;
+ return in_payload;
}
in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
@@ -453,7 +446,6 @@ err_out_payload:
rpc->in.len, DMA_TO_DEVICE);
err_in_payload:
kfree(in_payload);
-err_out:
if (err)
return ERR_PTR(err);
@@ -481,7 +473,7 @@ static int pdsfc_probe(struct auxiliary_device *adev,
pdsfc = fwctl_alloc_device(&padev->vf_pdev->dev, &pdsfc_ops,
struct pdsfc_dev, fwctl);
if (!pdsfc)
- return dev_err_probe(dev, -ENOMEM, "Failed to allocate fwctl device struct\n");
+ return -ENOMEM;
pdsfc->padev = padev;
err = pdsfc_identify(pdsfc);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 3a394cd772f6..f0323f1d6f01 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/erdma/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
+source "drivers/infiniband/hw/ionic/Kconfig"
source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mana/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index be0743dac3ff..61596cda2b65 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -446,63 +446,41 @@ static int addr6_resolve(struct sockaddr *src_sock,
}
#endif
+static bool is_dst_local(const struct dst_entry *dst)
+{
+ if (dst->ops->family == AF_INET)
+ return !!(dst_rtable(dst)->rt_type & RTN_LOCAL);
+ else if (dst->ops->family == AF_INET6)
+ return !!(dst_rt6_info(dst)->rt6i_flags & RTF_LOCAL);
+ else
+ return false;
+}
+
static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
- unsigned int ndev_flags,
u32 seq)
{
- int ret = 0;
-
- if (ndev_flags & IFF_LOOPBACK) {
+ if (is_dst_local(dst)) {
+ /* When the destination is local entry, source and destination
+ * are same. Skip the neighbour lookup.
+ */
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- } else {
- if (!(ndev_flags & IFF_NOARP)) {
- /* If the device doesn't do ARP internally */
- ret = fetch_ha(dst, addr, dst_in, seq);
- }
+ return 0;
}
- return ret;
-}
-
-static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
- const struct sockaddr *dst_in,
- const struct dst_entry *dst,
- const struct net_device *ndev)
-{
- int ret = 0;
-
- if (dst->dev->flags & IFF_LOOPBACK)
- ret = rdma_translate_ip(dst_in, dev_addr);
- else
- rdma_copy_src_l2_addr(dev_addr, dst->dev);
-
- /*
- * If there's a gateway and type of device not ARPHRD_INFINIBAND,
- * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
- * network type accordingly.
- */
- if (has_gateway(dst, dst_in->sa_family) &&
- ndev->type != ARPHRD_INFINIBAND)
- dev_addr->network = dst_in->sa_family == AF_INET ?
- RDMA_NETWORK_IPV4 :
- RDMA_NETWORK_IPV6;
- else
- dev_addr->network = RDMA_NETWORK_IB;
- return ret;
+ return fetch_ha(dst, addr, dst_in, seq);
}
static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
- unsigned int *ndev_flags,
const struct sockaddr *dst_in,
const struct dst_entry *dst)
{
struct net_device *ndev = READ_ONCE(dst->dev);
- *ndev_flags = ndev->flags;
/* A physical device must be the RDMA device to use */
- if (ndev->flags & IFF_LOOPBACK) {
+ if (is_dst_local(dst)) {
+ int ret;
/*
* RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
* loopback IP address. So if route is resolved to loopback
@@ -512,9 +490,27 @@ static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
if (IS_ERR(ndev))
return -ENODEV;
+ ret = rdma_translate_ip(dst_in, dev_addr);
+ if (ret)
+ return ret;
+ } else {
+ rdma_copy_src_l2_addr(dev_addr, dst->dev);
}
- return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
+ /*
+ * If there's a gateway and type of device not ARPHRD_INFINIBAND,
+ * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
+ * network type accordingly.
+ */
+ if (has_gateway(dst, dst_in->sa_family) &&
+ ndev->type != ARPHRD_INFINIBAND)
+ dev_addr->network = dst_in->sa_family == AF_INET ?
+ RDMA_NETWORK_IPV4 :
+ RDMA_NETWORK_IPV6;
+ else
+ dev_addr->network = RDMA_NETWORK_IB;
+
+ return 0;
}
static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
@@ -551,7 +547,6 @@ static int addr_resolve(struct sockaddr *src_in,
u32 seq)
{
struct dst_entry *dst = NULL;
- unsigned int ndev_flags = 0;
struct rtable *rt = NULL;
int ret;
@@ -588,7 +583,7 @@ static int addr_resolve(struct sockaddr *src_in,
rcu_read_unlock();
goto done;
}
- ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
+ ret = rdma_set_src_addr_rcu(addr, dst_in, dst);
rcu_read_unlock();
/*
@@ -596,7 +591,7 @@ static int addr_resolve(struct sockaddr *src_in,
* only if src addr translation didn't fail.
*/
if (!ret && resolve_neigh)
- ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
+ ret = addr_resolve_neigh(dst, dst_in, addr, seq);
if (src_in->sa_family == AF_INET)
ip_rt_put(rt);
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 3bb46696731e..25a060a28301 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -110,8 +110,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
- PTR_ERR(ah));
+ dev_err(&device->dev, "ib_create_ah_from_wc error %pe\n", ah);
return;
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 92678e438ff4..01bede8ba105 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1049,8 +1049,8 @@ static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id,
struct cm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__,
- cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount));
+ pr_err_ratelimited("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__,
+ cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount));
}
static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9b471548e7ae..5b2d3ae3f9fc 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2076,6 +2076,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
kfree(id_priv->id.route.path_rec);
kfree(id_priv->id.route.path_rec_inbound);
kfree(id_priv->id.route.path_rec_outbound);
+ kfree(id_priv->id.route.service_recs);
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
@@ -3382,13 +3383,18 @@ err1:
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
+ enum rdma_cm_state state;
int ret;
if (!timeout_ms)
return -EINVAL;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
+ state = id_priv->state;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY) &&
+ !cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_RESOLVED,
+ RDMA_CM_ROUTE_QUERY))
return -EINVAL;
cma_id_get(id_priv);
@@ -3409,7 +3415,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
return 0;
err:
- cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, state);
cma_id_put(id_priv);
return ret;
}
@@ -5506,3 +5512,129 @@ static void __exit cma_cleanup(void)
module_init(cma_init);
module_exit(cma_cleanup);
+
+static void cma_query_ib_service_handler(int status,
+ struct sa_service_rec *recs,
+ unsigned int num_recs, void *context)
+{
+ struct cma_work *work = context;
+ struct rdma_id_private *id_priv = work->id;
+ struct sockaddr_ib *addr;
+
+ if (status)
+ goto fail;
+
+ if (!num_recs) {
+ status = -ENOENT;
+ goto fail;
+ }
+
+ if (id_priv->id.route.service_recs) {
+ status = -EALREADY;
+ goto fail;
+ }
+
+ id_priv->id.route.service_recs =
+ kmalloc_array(num_recs, sizeof(*recs), GFP_KERNEL);
+ if (!id_priv->id.route.service_recs) {
+ status = -ENOMEM;
+ goto fail;
+ }
+
+ id_priv->id.route.num_service_recs = num_recs;
+ memcpy(id_priv->id.route.service_recs, recs, sizeof(*recs) * num_recs);
+
+ addr = (struct sockaddr_ib *)&id_priv->id.route.addr.dst_addr;
+ addr->sib_family = AF_IB;
+ addr->sib_addr = *(struct ib_addr *)&recs->gid;
+ addr->sib_pkey = recs->pkey;
+ addr->sib_sid = recs->id;
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
+ (union ib_gid *)&addr->sib_addr);
+ ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr,
+ ntohs(addr->sib_pkey));
+
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDR_BOUND;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited(
+ "RDMA CM: SERVICE_ERROR: failed to query service record. status %d\n",
+ status);
+ queue_work(cma_wq, &work->work);
+}
+
+static int cma_resolve_ib_service(struct rdma_id_private *id_priv,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct sa_service_rec sr = {};
+ ib_sa_comp_mask mask = 0;
+ struct cma_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ cma_id_get(id_priv);
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDRINFO_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_RESOLVED;
+
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_ID) {
+ sr.id = cpu_to_be64(ibs->service_id);
+ mask |= IB_SA_SERVICE_REC_SERVICE_ID;
+ }
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_NAME) {
+ strscpy(sr.name, ibs->service_name, sizeof(sr.name));
+ mask |= IB_SA_SERVICE_REC_SERVICE_NAME;
+ }
+
+ id_priv->query_id = ib_sa_service_rec_get(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num,
+ &sr, mask,
+ 2000, GFP_KERNEL,
+ cma_query_ib_service_handler,
+ work, &id_priv->query);
+
+ if (id_priv->query_id < 0) {
+ cma_id_put(id_priv);
+ kfree(work);
+ return id_priv->query_id;
+ }
+
+ return 0;
+}
+
+int rdma_resolve_ib_service(struct rdma_cm_id *id,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!id_priv->cma_dev ||
+ !cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDRINFO_QUERY))
+ return -EINVAL;
+
+ if (rdma_cap_ib_sa(id->device, id->port_num))
+ ret = cma_resolve_ib_service(id_priv, ibs);
+ else
+ ret = -EOPNOTSUPP;
+
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_ib_service);
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index b7354c94cf1b..c604b601f4d9 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -47,7 +47,9 @@ enum rdma_cm_state {
RDMA_CM_ADDR_BOUND,
RDMA_CM_LISTEN,
RDMA_CM_DEVICE_REMOVAL,
- RDMA_CM_DESTROYING
+ RDMA_CM_DESTROYING,
+ RDMA_CM_ADDRINFO_QUERY,
+ RDMA_CM_ADDRINFO_RESOLVED
};
struct rdma_id_private {
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 3145cb34a1d2..b4f3c835844a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1543,7 +1543,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
/*
* We have a registration lock so that all the calls to unregister are
- * fully fenced, once any unregister returns the device is truely
+ * fully fenced, once any unregister returns the device is truly
* unregistered even if multiple callers are unregistering it at the
* same time. This also interacts with the registration flow and
* provides sane semantics if register and unregister are racing.
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 53571e6b3162..c23e9c847314 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -107,6 +107,8 @@ struct ib_sa_device {
struct ib_sa_query {
void (*callback)(struct ib_sa_query *sa_query, int status,
struct ib_sa_mad *mad);
+ void (*rmpp_callback)(struct ib_sa_query *sa_query, int status,
+ struct ib_mad_recv_wc *mad);
void (*release)(struct ib_sa_query *);
struct ib_sa_client *client;
struct ib_sa_port *port;
@@ -150,6 +152,13 @@ struct ib_sa_mcmember_query {
struct ib_sa_query sa_query;
};
+struct ib_sa_service_query {
+ void (*callback)(int status, struct sa_service_rec *rec,
+ unsigned int num_services, void *context);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
static LIST_HEAD(ib_nl_request_list);
static DEFINE_SPINLOCK(ib_nl_request_lock);
static atomic_t ib_nl_sa_request_seq;
@@ -684,6 +693,58 @@ static const struct ib_field guidinfo_rec_table[] = {
.size_bits = 512 },
};
+#define SERVICE_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct sa_service_rec, field), \
+ .struct_size_bytes = sizeof_field(struct sa_service_rec, field), \
+ .field_name = "sa_service_rec:" #field
+
+static const struct ib_field service_rec_table[] = {
+ { SERVICE_REC_FIELD(id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { SERVICE_REC_FIELD(gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(pkey),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 6,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { SERVICE_REC_FIELD(lease),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { SERVICE_REC_FIELD(key),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(name),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 512 },
+ { SERVICE_REC_FIELD(data_8),
+ .offset_words = 28,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_16),
+ .offset_words = 32,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_32),
+ .offset_words = 36,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_64),
+ .offset_words = 40,
+ .offset_bits = 0,
+ .size_bits = 128 },
+};
+
#define RDMA_PRIMARY_PATH_MAX_REC_NUM 3
static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
@@ -1013,6 +1074,8 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+
delta = timeout - sa_local_svc_timeout_ms;
if (delta < 0)
abs_delta = -delta;
@@ -1020,7 +1083,6 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
abs_delta = delta;
if (delta != 0) {
- spin_lock_irqsave(&ib_nl_request_lock, flags);
sa_local_svc_timeout_ms = timeout;
list_for_each_entry(query, &ib_nl_request_list, list) {
if (delta < 0 && abs_delta > query->timeout)
@@ -1038,9 +1100,10 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
if (delay)
mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
(unsigned long)delay);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+
settimeout_out:
return 0;
}
@@ -1390,6 +1453,20 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
}
EXPORT_SYMBOL(ib_sa_pack_path);
+void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute)
+{
+ ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec,
+ attribute);
+}
+EXPORT_SYMBOL(ib_sa_pack_service);
+
+void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec)
+{
+ ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), attribute,
+ rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_service);
+
static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
struct ib_sa_device *sa_dev,
u32 port_num)
@@ -1479,6 +1556,68 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
}
}
+#define IB_SA_DATA_OFFS 56
+#define IB_SERVICE_REC_SZ 176
+
+static void ib_unpack_service_rmpp(struct sa_service_rec *rec,
+ struct ib_mad_recv_wc *mad_wc,
+ int num_services)
+{
+ unsigned int cp_sz, data_i, data_size, rec_i = 0, buf_i = 0;
+ struct ib_mad_recv_buf *mad_buf;
+ u8 buf[IB_SERVICE_REC_SZ];
+ u8 *data;
+
+ data_size = sizeof(((struct ib_sa_mad *) mad_buf->mad)->data);
+
+ list_for_each_entry(mad_buf, &mad_wc->rmpp_list, list) {
+ data = ((struct ib_sa_mad *) mad_buf->mad)->data;
+ data_i = 0;
+ while (data_i < data_size && rec_i < num_services) {
+ cp_sz = min(IB_SERVICE_REC_SZ - buf_i,
+ data_size - data_i);
+ memcpy(buf + buf_i, data + data_i, cp_sz);
+ data_i += cp_sz;
+ buf_i += cp_sz;
+ if (buf_i == IB_SERVICE_REC_SZ) {
+ ib_sa_unpack_service(buf, rec + rec_i);
+ buf_i = 0;
+ rec_i++;
+ }
+ }
+ }
+}
+
+static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct ib_sa_service_query *query =
+ container_of(sa_query, struct ib_sa_service_query, sa_query);
+ struct sa_service_rec *rec;
+ int num_services;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad) {
+ query->callback(status, NULL, 0, query->context);
+ return;
+ }
+
+ num_services = (mad_wc->mad_len - IB_SA_DATA_OFFS) / IB_SERVICE_REC_SZ;
+ if (!num_services) {
+ query->callback(-ENODATA, NULL, 0, query->context);
+ return;
+ }
+
+ rec = kmalloc_array(num_services, sizeof(*rec), GFP_KERNEL);
+ if (!rec) {
+ query->callback(-ENOMEM, NULL, 0, query->context);
+ return;
+ }
+
+ ib_unpack_service_rmpp(rec, mad_wc, num_services);
+ query->callback(status, rec, num_services, query->context);
+ kfree(rec);
+}
+
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
struct ib_sa_path_query *query =
@@ -1488,6 +1627,14 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
kfree(query);
}
+static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
+{
+ struct ib_sa_service_query *query =
+ container_of(sa_query, struct ib_sa_service_query, sa_query);
+
+ kfree(query);
+}
+
/**
* ib_sa_path_rec_get - Start a Path get query
* @client:SA client
@@ -1618,6 +1765,101 @@ err1:
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
+/**
+ * ib_sa_service_rec_get - Start a Service get query
+ * @client: SA client
+ * @device: device to send query on
+ * @port_num: port number to send query on
+ * @rec: Service Record to send in query
+ * @comp_mask: component mask to send in query
+ * @timeout_ms: time to wait for response
+ * @gfp_mask: GFP mask to use for internal allocations
+ * @callback: function called when query completes, times out or is
+ * canceled
+ * @context: opaque user context passed to callback
+ * @sa_query: query context, used to cancel query
+ *
+ * Send a Service Record Get query to the SA to look up a path. The
+ * callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_service_rec_get() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_service_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u32 port_num,
+ struct sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct sa_service_rec *resp,
+ unsigned int num_services,
+ void *context),
+ void *context, struct ib_sa_query **sa_query)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_service_query *query;
+ struct ib_mad_agent *agent;
+ struct ib_sa_port *port;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kzalloc(sizeof(*query), gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(&query->sa_query, agent);
+
+ query->sa_query.rmpp_callback = callback ? ib_sa_service_rec_callback :
+ NULL;
+ query->sa_query.release = ib_sa_service_rec_release;
+ mad->mad_hdr.method = IB_MGMT_METHOD_GET_TABLE;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_sa_pack_service(rec, mad->data);
+
+ *sa_query = &query->sa_query;
+ query->sa_query.mad_buf->context[1] = rec;
+
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+err1:
+ kfree(query);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_service_rec_get);
+
static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
int status, struct ib_sa_mad *mad)
{
@@ -1987,23 +2229,29 @@ static void send_handler(struct ib_mad_agent *agent,
{
struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
unsigned long flags;
+ int status = 0;
- if (query->callback)
+ if (query->callback || query->rmpp_callback) {
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
/* No callback -- already got recv */
break;
case IB_WC_RESP_TIMEOUT_ERR:
- query->callback(query, -ETIMEDOUT, NULL);
+ status = -ETIMEDOUT;
break;
case IB_WC_WR_FLUSH_ERR:
- query->callback(query, -EINTR, NULL);
+ status = -EINTR;
break;
default:
- query->callback(query, -EIO, NULL);
+ status = -EIO;
break;
}
+ if (status)
+ query->callback ? query->callback(query, status, NULL) :
+ query->rmpp_callback(query, status, NULL);
+ }
+
xa_lock_irqsave(&queries, flags);
__xa_erase(&queries, query->id);
xa_unlock_irqrestore(&queries, flags);
@@ -2019,17 +2267,25 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
+ struct ib_mad *mad;
+
if (!send_buf)
return;
query = send_buf->context[0];
- if (query->callback) {
+ mad = mad_recv_wc->recv_buf.mad;
+
+ if (query->rmpp_callback) {
+ if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
+ query->rmpp_callback(query, mad->mad_hdr.status ?
+ -EINVAL : 0, mad_recv_wc);
+ else
+ query->rmpp_callback(query, -EIO, NULL);
+ } else if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
- query->callback(query,
- mad_recv_wc->recv_buf.mad->mad_hdr.status ?
- -EINVAL : 0,
- (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
+ query->callback(query, mad->mad_hdr.status ?
+ -EINVAL : 0, (struct ib_sa_mad *)mad);
else
query->callback(query, -EIO, NULL);
}
@@ -2181,8 +2437,9 @@ static int ib_sa_add_one(struct ib_device *device)
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
- NULL, 0, send_handler,
- recv_handler, sa_dev, 0);
+ NULL, IB_MGMT_RMPP_VERSION,
+ send_handler, recv_handler,
+ sa_dev, 0);
if (IS_ERR(sa_dev->port[i].agent)) {
ret = PTR_ERR(sa_dev->port[i].agent);
goto err;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 6e700b974033..f86ece701db6 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -282,6 +282,10 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
}
uevent->resp.event = event->event;
uevent->resp.status = event->status;
+
+ if (event->event == RDMA_CM_EVENT_ADDRINFO_RESOLVED)
+ goto out;
+
if (ctx->cm_id->qp_type == IB_QPT_UD)
ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
&event->param.ud);
@@ -289,6 +293,7 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
ucma_copy_conn_event(&uevent->resp.param.conn,
&event->param.conn);
+out:
uevent->resp.ece.vendor_id = event->ece.vendor_id;
uevent->resp.ece.attr_mod = event->ece.attr_mod;
return uevent;
@@ -728,6 +733,28 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file,
return ret;
}
+static ssize_t ucma_resolve_ib_service(struct ucma_file *file,
+ const char __user *inbuf, int in_len,
+ int out_len)
+{
+ struct rdma_ucm_resolve_ib_service cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_resolve_ib_service(ctx->cm_id, &cmd.ibs);
+ mutex_unlock(&ctx->mutex);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
static ssize_t ucma_resolve_route(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -994,6 +1021,43 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
return ret;
}
+static ssize_t ucma_query_ib_service(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_ib_service_resp *resp;
+ int n, ret = 0;
+
+ if (out_len < sizeof(struct rdma_ucm_query_ib_service_resp))
+ return -ENOSPC;
+
+ if (!ctx->cm_id->route.service_recs)
+ return -ENODATA;
+
+ resp = kzalloc(out_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ resp->num_service_recs = ctx->cm_id->route.num_service_recs;
+
+ n = (out_len - sizeof(struct rdma_ucm_query_ib_service_resp)) /
+ sizeof(struct ib_user_service_rec);
+
+ if (!n)
+ goto out;
+
+ if (n > ctx->cm_id->route.num_service_recs)
+ n = ctx->cm_id->route.num_service_recs;
+
+ memcpy(resp->recs, ctx->cm_id->route.service_recs,
+ sizeof(*resp->recs) * n);
+ if (copy_to_user(response, resp, struct_size(resp, recs, n)))
+ ret = -EFAULT;
+
+out:
+ kfree(resp);
+ return ret;
+}
+
static ssize_t ucma_query(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -1022,6 +1086,9 @@ static ssize_t ucma_query(struct ucma_file *file,
case RDMA_USER_CM_QUERY_GID:
ret = ucma_query_gid(ctx, response, out_len);
break;
+ case RDMA_USER_CM_QUERY_IB_SERVICE:
+ ret = ucma_query_ib_service(ctx, response, out_len);
+ break;
default:
ret = -ENOSYS;
break;
@@ -1678,6 +1745,55 @@ err_unlock:
return ret;
}
+static ssize_t ucma_write_cm_event(struct ucma_file *file,
+ const char __user *inbuf, int in_len,
+ int out_len)
+{
+ struct rdma_ucm_write_cm_event cmd;
+ struct rdma_cm_event event = {};
+ struct ucma_event *uevent;
+ struct ucma_context *ctx;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ if ((cmd.event != RDMA_CM_EVENT_USER) &&
+ (cmd.event != RDMA_CM_EVENT_INTERNAL))
+ return -EINVAL;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ event.event = cmd.event;
+ event.status = cmd.status;
+ event.param.arg = cmd.param.arg;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ uevent->ctx = ctx;
+ uevent->resp.uid = ctx->uid;
+ uevent->resp.id = ctx->id;
+ uevent->resp.event = event.event;
+ uevent->resp.status = event.status;
+ memcpy(uevent->resp.param.arg32, &event.param.arg,
+ sizeof(event.param.arg));
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+
+out:
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len) = {
@@ -1703,7 +1819,9 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
[RDMA_USER_CM_CMD_QUERY] = ucma_query,
[RDMA_USER_CM_CMD_BIND] = ucma_bind,
[RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
- [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
+ [RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service,
+ [RDMA_USER_CM_CMD_WRITE_CM_EVENT] = ucma_write_cm_event,
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index df61b2299ec0..b706dc0d0263 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/
+obj-$(CONFIG_INFINIBAND_IONIC) += ionic/
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 6df5a2738c95..3485e495ac6a 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -172,9 +172,9 @@ struct bnxt_re_dev {
struct list_head list;
unsigned long flags;
#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
+#define BNXT_RE_FLAG_STATS_CTX3_ALLOC 1
#define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
-#define BNXT_RE_FLAG_QOS_WORK_REG 5
#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
#define BNXT_RE_FLAG_ERR_DEVICE_DETACHED 17
@@ -187,9 +187,6 @@ struct bnxt_re_dev {
int id;
- struct delayed_work worker;
- u8 cur_prio_map;
-
/* RCFW Channel */
struct bnxt_qplib_rcfw rcfw;
@@ -227,6 +224,13 @@ struct bnxt_re_dev {
struct workqueue_struct *dcb_wq;
struct dentry *cc_config;
struct bnxt_re_dbg_cc_config_params *cc_config_params;
+#define BNXT_VPD_FLD_LEN 32
+ char board_partno[BNXT_VPD_FLD_LEN];
+ /* RoCE mirror */
+ u16 mirror_vnic_id;
+ union ib_gid ugid;
+ u32 ugid_index;
+ u8 sniffer_flow_created : 1;
};
#define to_bnxt_re_dev(ptr, member) \
@@ -243,6 +247,10 @@ int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *ou
int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev,
struct ib_mad *out_mad);
+void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev);
+int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev);
+int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id);
+
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{
if (rdev)
@@ -276,4 +284,7 @@ static inline int bnxt_re_read_context_allowed(struct bnxt_re_dev *rdev)
#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 192
#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 192
+#define BNXT_RE_HWRM_CMD_TIMEOUT(rdev) \
+ ((rdev)->chip_ctx->hwrm_cmd_max_timeout * 1000)
+
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
index e632f1661b92..be5e9b5ca2f0 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.c
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -8,6 +8,7 @@
#include <linux/debugfs.h>
#include <linux/pci.h>
+#include <linux/seq_file.h>
#include <rdma/ib_addr.h>
#include "bnxt_ulp.h"
@@ -314,6 +315,40 @@ static const struct file_operations bnxt_re_cc_config_ops = {
.write = bnxt_re_cc_config_set,
};
+static int info_show(struct seq_file *m, void *unused)
+{
+ struct bnxt_re_dev *rdev = m->private;
+ struct bnxt_re_res_cntrs *res_s = &rdev->stats.res;
+
+ seq_puts(m, "Info:\n");
+ seq_printf(m, "Device Name\t\t: %s\n", dev_name(&rdev->ibdev.dev));
+ seq_printf(m, "PD Watermark\t\t: %llu\n", res_s->pd_watermark);
+ seq_printf(m, "AH Watermark\t\t: %llu\n", res_s->ah_watermark);
+ seq_printf(m, "QP Watermark\t\t: %llu\n", res_s->qp_watermark);
+ seq_printf(m, "RC QP Watermark\t\t: %llu\n", res_s->rc_qp_watermark);
+ seq_printf(m, "UD QP Watermark\t\t: %llu\n", res_s->ud_qp_watermark);
+ seq_printf(m, "SRQ Watermark\t\t: %llu\n", res_s->srq_watermark);
+ seq_printf(m, "CQ Watermark\t\t: %llu\n", res_s->cq_watermark);
+ seq_printf(m, "MR Watermark\t\t: %llu\n", res_s->mr_watermark);
+ seq_printf(m, "MW Watermark\t\t: %llu\n", res_s->mw_watermark);
+ seq_printf(m, "CQ Resize Count\t\t: %d\n", atomic_read(&res_s->resize_count));
+ if (rdev->pacing.dbr_pacing) {
+ seq_printf(m, "DB Pacing Reschedule\t: %llu\n", rdev->stats.pacing.resched);
+ seq_printf(m, "DB Pacing Complete\t: %llu\n", rdev->stats.pacing.complete);
+ seq_printf(m, "DB Pacing Alerts\t: %llu\n", rdev->stats.pacing.alerts);
+ seq_printf(m, "DB FIFO Register\t: 0x%x\n",
+ readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off));
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(info);
+
+static void bnxt_re_debugfs_add_info(struct bnxt_re_dev *rdev)
+{
+ debugfs_create_file("info", 0400, rdev->dbg_root, rdev, &info_fops);
+}
+
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
{
struct pci_dev *pdev = rdev->en_dev->pdev;
@@ -325,6 +360,8 @@ void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root);
+ bnxt_re_debugfs_add_info(rdev);
+
rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL);
for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) {
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 44bb082e0a60..651cf9d0e0c7 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -51,25 +51,6 @@
#include "hw_counters.h"
static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
- [BNXT_RE_ACTIVE_PD].name = "active_pds",
- [BNXT_RE_ACTIVE_AH].name = "active_ahs",
- [BNXT_RE_ACTIVE_QP].name = "active_qps",
- [BNXT_RE_ACTIVE_RC_QP].name = "active_rc_qps",
- [BNXT_RE_ACTIVE_UD_QP].name = "active_ud_qps",
- [BNXT_RE_ACTIVE_SRQ].name = "active_srqs",
- [BNXT_RE_ACTIVE_CQ].name = "active_cqs",
- [BNXT_RE_ACTIVE_MR].name = "active_mrs",
- [BNXT_RE_ACTIVE_MW].name = "active_mws",
- [BNXT_RE_WATERMARK_PD].name = "watermark_pds",
- [BNXT_RE_WATERMARK_AH].name = "watermark_ahs",
- [BNXT_RE_WATERMARK_QP].name = "watermark_qps",
- [BNXT_RE_WATERMARK_RC_QP].name = "watermark_rc_qps",
- [BNXT_RE_WATERMARK_UD_QP].name = "watermark_ud_qps",
- [BNXT_RE_WATERMARK_SRQ].name = "watermark_srqs",
- [BNXT_RE_WATERMARK_CQ].name = "watermark_cqs",
- [BNXT_RE_WATERMARK_MR].name = "watermark_mrs",
- [BNXT_RE_WATERMARK_MW].name = "watermark_mws",
- [BNXT_RE_RESIZE_CQ_CNT].name = "resize_cq_cnt",
[BNXT_RE_RX_PKTS].name = "rx_pkts",
[BNXT_RE_RX_BYTES].name = "rx_bytes",
[BNXT_RE_TX_PKTS].name = "tx_pkts",
@@ -79,22 +60,22 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
[BNXT_RE_TX_DISCARDS].name = "tx_roce_discards",
[BNXT_RE_RX_ERRORS].name = "rx_roce_errors",
[BNXT_RE_RX_DISCARDS].name = "rx_roce_discards",
- [BNXT_RE_TO_RETRANSMITS].name = "to_retransmits",
- [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "seq_err_naks_rcvd",
- [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded",
- [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_naks_rcvd",
- [BNXT_RE_MISSING_RESP].name = "missing_resp",
+ [BNXT_RE_TO_RETRANSMITS].name = "local_ack_timeout_err",
+ [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "packet_seq_err",
+ [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded",
+ [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_nak_retry_err",
+ [BNXT_RE_MISSING_RESP].name = "implied_nak_seq_err",
[BNXT_RE_UNRECOVERABLE_ERR].name = "unrecoverable_err",
[BNXT_RE_BAD_RESP_ERR].name = "bad_resp_err",
[BNXT_RE_LOCAL_QP_OP_ERR].name = "local_qp_op_err",
[BNXT_RE_LOCAL_PROTECTION_ERR].name = "local_protection_err",
[BNXT_RE_MEM_MGMT_OP_ERR].name = "mem_mgmt_op_err",
- [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "remote_invalid_req_err",
- [BNXT_RE_REMOTE_ACCESS_ERR].name = "remote_access_err",
+ [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "req_remote_invalid_request",
+ [BNXT_RE_REMOTE_ACCESS_ERR].name = "req_remote_access_errors",
[BNXT_RE_REMOTE_OP_ERR].name = "remote_op_err",
- [BNXT_RE_DUP_REQ].name = "dup_req",
+ [BNXT_RE_DUP_REQ].name = "duplicate_request",
[BNXT_RE_RES_EXCEED_MAX].name = "res_exceed_max",
- [BNXT_RE_RES_LENGTH_MISMATCH].name = "res_length_mismatch",
+ [BNXT_RE_RES_LENGTH_MISMATCH].name = "resp_local_length_error",
[BNXT_RE_RES_EXCEEDS_WQE].name = "res_exceeds_wqe",
[BNXT_RE_RES_OPCODE_ERR].name = "res_opcode_err",
[BNXT_RE_RES_RX_INVALID_RKEY].name = "res_rx_invalid_rkey",
@@ -118,7 +99,7 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
[BNXT_RE_RES_SRQ_LOAD_ERR].name = "res_srq_load_err",
[BNXT_RE_RES_TX_PCI_ERR].name = "res_tx_pci_err",
[BNXT_RE_RES_RX_PCI_ERR].name = "res_rx_pci_err",
- [BNXT_RE_OUT_OF_SEQ_ERR].name = "oos_drop_count",
+ [BNXT_RE_OUT_OF_SEQ_ERR].name = "out_of_sequence",
[BNXT_RE_TX_ATOMIC_REQ].name = "tx_atomic_req",
[BNXT_RE_TX_READ_REQ].name = "tx_read_req",
[BNXT_RE_TX_READ_RES].name = "tx_read_resp",
@@ -126,23 +107,22 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
[BNXT_RE_TX_SEND_REQ].name = "tx_send_req",
[BNXT_RE_TX_ROCE_PKTS].name = "tx_roce_only_pkts",
[BNXT_RE_TX_ROCE_BYTES].name = "tx_roce_only_bytes",
- [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req",
- [BNXT_RE_RX_READ_REQ].name = "rx_read_req",
+ [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_requests",
+ [BNXT_RE_RX_READ_REQ].name = "rx_read_requests",
[BNXT_RE_RX_READ_RESP].name = "rx_read_resp",
- [BNXT_RE_RX_WRITE_REQ].name = "rx_write_req",
+ [BNXT_RE_RX_WRITE_REQ].name = "rx_write_requests",
[BNXT_RE_RX_SEND_REQ].name = "rx_send_req",
[BNXT_RE_RX_ROCE_PKTS].name = "rx_roce_only_pkts",
[BNXT_RE_RX_ROCE_BYTES].name = "rx_roce_only_bytes",
[BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts",
[BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes",
- [BNXT_RE_OOB].name = "rx_out_of_buffer",
- [BNXT_RE_TX_CNP].name = "tx_cnp_pkts",
- [BNXT_RE_RX_CNP].name = "rx_cnp_pkts",
- [BNXT_RE_RX_ECN].name = "rx_ecn_marked_pkts",
- [BNXT_RE_PACING_RESCHED].name = "pacing_reschedule",
- [BNXT_RE_PACING_CMPL].name = "pacing_complete",
- [BNXT_RE_PACING_ALERT].name = "pacing_alerts",
- [BNXT_RE_DB_FIFO_REG].name = "db_fifo_register",
+ [BNXT_RE_OOB].name = "out_of_buffer",
+ [BNXT_RE_TX_CNP].name = "np_cnp_pkts",
+ [BNXT_RE_RX_CNP].name = "rp_cnp_handled",
+ [BNXT_RE_RX_ECN].name = "np_ecn_marked_roce_packets",
+ [BNXT_RE_REQ_CQE_ERROR].name = "req_cqe_error",
+ [BNXT_RE_RESP_CQE_ERROR].name = "resp_cqe_error",
+ [BNXT_RE_RESP_REMOTE_ACCESS_ERRS].name = "resp_remote_access_errors",
};
static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
@@ -273,18 +253,20 @@ static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev,
err_s->res_rx_pci_err;
stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
err_s->res_oos_drop_count;
-}
-
-static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev,
- struct rdma_hw_stats *stats)
-{
- struct bnxt_re_db_pacing_stats *pacing_s = &rdev->stats.pacing;
-
- stats->value[BNXT_RE_PACING_RESCHED] = pacing_s->resched;
- stats->value[BNXT_RE_PACING_CMPL] = pacing_s->complete;
- stats->value[BNXT_RE_PACING_ALERT] = pacing_s->alerts;
- stats->value[BNXT_RE_DB_FIFO_REG] =
- readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
+ stats->value[BNXT_RE_REQ_CQE_ERROR] =
+ err_s->bad_resp_err +
+ err_s->local_qp_op_err +
+ err_s->local_protection_err +
+ err_s->mem_mgmt_op_err +
+ err_s->remote_invalid_req_err +
+ err_s->remote_access_err +
+ err_s->remote_op_err;
+ stats->value[BNXT_RE_RESP_CQE_ERROR] =
+ err_s->res_cmp_err +
+ err_s->res_cq_load_err;
+ stats->value[BNXT_RE_RESP_REMOTE_ACCESS_ERRS] =
+ err_s->res_rx_no_perm +
+ err_s->res_tx_no_perm;
}
int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
@@ -382,7 +364,6 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
u32 port, int index)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct bnxt_re_res_cntrs *res_s = &rdev->stats.res;
struct bnxt_qplib_roce_stats *err_s = NULL;
struct ctx_hw_stats *hw_stats = NULL;
int rc = 0;
@@ -391,26 +372,6 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
if (!port || !stats)
return -EINVAL;
- stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&res_s->qp_count);
- stats->value[BNXT_RE_ACTIVE_RC_QP] = atomic_read(&res_s->rc_qp_count);
- stats->value[BNXT_RE_ACTIVE_UD_QP] = atomic_read(&res_s->ud_qp_count);
- stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&res_s->srq_count);
- stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&res_s->cq_count);
- stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&res_s->mr_count);
- stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&res_s->mw_count);
- stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&res_s->pd_count);
- stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&res_s->ah_count);
- stats->value[BNXT_RE_WATERMARK_QP] = res_s->qp_watermark;
- stats->value[BNXT_RE_WATERMARK_RC_QP] = res_s->rc_qp_watermark;
- stats->value[BNXT_RE_WATERMARK_UD_QP] = res_s->ud_qp_watermark;
- stats->value[BNXT_RE_WATERMARK_SRQ] = res_s->srq_watermark;
- stats->value[BNXT_RE_WATERMARK_CQ] = res_s->cq_watermark;
- stats->value[BNXT_RE_WATERMARK_MR] = res_s->mr_watermark;
- stats->value[BNXT_RE_WATERMARK_MW] = res_s->mw_watermark;
- stats->value[BNXT_RE_WATERMARK_PD] = res_s->pd_watermark;
- stats->value[BNXT_RE_WATERMARK_AH] = res_s->ah_watermark;
- stats->value[BNXT_RE_RESIZE_CQ_CNT] = atomic_read(&res_s->resize_count);
-
if (hw_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
le64_to_cpu(hw_stats->tx_bcast_pkts);
@@ -449,8 +410,6 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
goto done;
}
}
- if (rdev->pacing.dbr_pacing && bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
- bnxt_re_copy_db_pacing_stats(rdev, stats);
}
done:
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index e541b6f8ca9f..09d371d442aa 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -41,25 +41,6 @@
#define __BNXT_RE_HW_STATS_H__
enum bnxt_re_hw_stats {
- BNXT_RE_ACTIVE_PD,
- BNXT_RE_ACTIVE_AH,
- BNXT_RE_ACTIVE_QP,
- BNXT_RE_ACTIVE_RC_QP,
- BNXT_RE_ACTIVE_UD_QP,
- BNXT_RE_ACTIVE_SRQ,
- BNXT_RE_ACTIVE_CQ,
- BNXT_RE_ACTIVE_MR,
- BNXT_RE_ACTIVE_MW,
- BNXT_RE_WATERMARK_PD,
- BNXT_RE_WATERMARK_AH,
- BNXT_RE_WATERMARK_QP,
- BNXT_RE_WATERMARK_RC_QP,
- BNXT_RE_WATERMARK_UD_QP,
- BNXT_RE_WATERMARK_SRQ,
- BNXT_RE_WATERMARK_CQ,
- BNXT_RE_WATERMARK_MR,
- BNXT_RE_WATERMARK_MW,
- BNXT_RE_RESIZE_CQ_CNT,
BNXT_RE_RX_PKTS,
BNXT_RE_RX_BYTES,
BNXT_RE_TX_PKTS,
@@ -129,10 +110,9 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_CNP,
BNXT_RE_RX_CNP,
BNXT_RE_RX_ECN,
- BNXT_RE_PACING_RESCHED,
- BNXT_RE_PACING_CMPL,
- BNXT_RE_PACING_ALERT,
- BNXT_RE_DB_FIFO_REG,
+ BNXT_RE_REQ_CQE_ERROR,
+ BNXT_RE_RESP_CQE_ERROR,
+ BNXT_RE_RESP_REMOTE_ACCESS_ERRS,
BNXT_RE_NUM_EXT_COUNTERS
};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 260dc67b8b87..4dab5ca7362b 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -288,7 +288,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
}
port_attr->max_mtu = IB_MTU_4096;
port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
- port_attr->gid_tbl_len = dev_attr->max_sgid;
+ /* One GID is reserved for RawEth QP. Report one less */
+ port_attr->gid_tbl_len = (rdev->rcfw.roce_mirror ? (dev_attr->max_sgid - 1) :
+ dev_attr->max_sgid);
port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_DEVICE_MGMT_SUP |
IB_PORT_VENDOR_CLASS_SUP;
@@ -375,7 +377,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
if (!ctx)
return -EINVAL;
- if (sgid_tbl && sgid_tbl->active) {
+ if (sgid_tbl->active) {
if (ctx->idx >= sgid_tbl->max)
return -EINVAL;
gid_to_del = &sgid_tbl->tbl[ctx->idx].gid;
@@ -429,7 +431,7 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid,
rdev->qplib_res.netdev->dev_addr,
- vlan_id, true, &tbl_idx);
+ vlan_id, true, &tbl_idx, false, 0);
if (rc == -EALREADY) {
ctx_tbl = sgid_tbl->ctx;
ctx_tbl[tbl_idx]->refcnt++;
@@ -955,6 +957,20 @@ fail:
return rc;
}
+static void bnxt_re_del_unique_gid(struct bnxt_re_dev *rdev)
+{
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return;
+
+ rc = bnxt_qplib_del_sgid(&rdev->qplib_res.sgid_tbl,
+ (struct bnxt_qplib_gid *)&rdev->ugid,
+ 0xFFFF, true);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to delete unique GID, rc: %d\n", rc);
+}
+
/* Queue Pairs */
int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
{
@@ -994,6 +1010,9 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD)
atomic_dec(&rdev->stats.res.ud_qp_count);
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE)
+ bnxt_re_del_unique_gid(rdev);
+
ib_umem_release(qp->rumem);
ib_umem_release(qp->sumem);
@@ -1018,6 +1037,8 @@ static u8 __from_ib_qp_type(enum ib_qp_type type)
return CMDQ_CREATE_QP_TYPE_RC;
case IB_QPT_UD:
return CMDQ_CREATE_QP_TYPE_UD;
+ case IB_QPT_RAW_PACKET:
+ return CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE;
default:
return IB_QPT_MAX;
}
@@ -1595,6 +1616,29 @@ static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev,
return rc;
}
+static int bnxt_re_add_unique_gid(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return 0;
+
+ rdev->ugid.global.subnet_prefix = cpu_to_be64(0xfe8000000000abcdLL);
+ addrconf_ifid_eui48(&rdev->ugid.raw[8], rdev->netdev);
+
+ rc = bnxt_qplib_add_sgid(&res->sgid_tbl,
+ (struct bnxt_qplib_gid *)&rdev->ugid,
+ rdev->qplib_res.netdev->dev_addr,
+ 0xFFFF, true, &rdev->ugid_index, true,
+ hctx->stats3.fw_id);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to add unique GID. rc = %d\n", rc);
+
+ return rc;
+}
+
int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata)
{
@@ -1656,6 +1700,17 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
}
}
+ /* Support for RawEth QP is added to capture TCP pkt dump.
+ * So unique SGID is used to avoid incorrect statistics on per
+ * function stats_ctx
+ */
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE) {
+ rc = bnxt_re_add_unique_gid(rdev);
+ if (rc)
+ goto qp_destroy;
+ qp->qplib_qp.ugid_index = rdev->ugid_index;
+ }
+
qp->ib_qp.qp_num = qp->qplib_qp.id;
if (qp_init_attr->qp_type == IB_QPT_GSI)
rdev->gsi_ctx.gsi_qp = qp;
@@ -2301,7 +2356,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp_attr->pkey_index = qplib_qp->pkey_index;
qp_attr->qkey = qplib_qp->qkey;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->ah.flow_label,
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->udp_sport,
qplib_qp->ah.host_sgid_index,
qplib_qp->ah.hop_limit,
qplib_qp->ah.traffic_class);
@@ -3248,9 +3303,9 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->resize_umem)) {
rc = PTR_ERR(cq->resize_umem);
+ ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %pe\n",
+ __func__, cq->resize_umem);
cq->resize_umem = NULL;
- ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %d\n",
- __func__, rc);
goto fail;
}
cq->resize_cqe = entries;
@@ -4392,6 +4447,93 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
}
}
+static int bnxt_re_setup_vnic(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ int rc;
+
+ rc = bnxt_re_hwrm_alloc_vnic(rdev);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_hwrm_cfg_vnic(rdev, qp->qplib_qp.id);
+ if (rc)
+ goto out_free_vnic;
+
+ return 0;
+out_free_vnic:
+ bnxt_re_hwrm_free_vnic(rdev);
+ return rc;
+}
+
+struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp,
+ struct ib_flow_attr *attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_re_flow *flow;
+ int rc;
+
+ if (attr->type != IB_FLOW_ATTR_SNIFFER ||
+ !rdev->rcfw.roce_mirror)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&rdev->qp_lock);
+ if (rdev->sniffer_flow_created) {
+ ibdev_err(&rdev->ibdev, "RoCE Mirroring is already Configured\n");
+ mutex_unlock(&rdev->qp_lock);
+ return ERR_PTR(-EBUSY);
+ }
+
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+ if (!flow) {
+ mutex_unlock(&rdev->qp_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ flow->rdev = rdev;
+
+ rc = bnxt_re_setup_vnic(rdev, qp);
+ if (rc)
+ goto out_free_flow;
+
+ rc = bnxt_qplib_create_flow(&rdev->qplib_res);
+ if (rc)
+ goto out_free_vnic;
+
+ rdev->sniffer_flow_created = 1;
+ mutex_unlock(&rdev->qp_lock);
+
+ return &flow->ib_flow;
+
+out_free_vnic:
+ bnxt_re_hwrm_free_vnic(rdev);
+out_free_flow:
+ mutex_unlock(&rdev->qp_lock);
+ kfree(flow);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_destroy_flow(struct ib_flow *flow_id)
+{
+ struct bnxt_re_flow *flow =
+ container_of(flow_id, struct bnxt_re_flow, ib_flow);
+ struct bnxt_re_dev *rdev = flow->rdev;
+ int rc;
+
+ mutex_lock(&rdev->qp_lock);
+ rc = bnxt_qplib_destroy_flow(&rdev->qplib_res);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev, "failed to destroy_flow rc = %d\n", rc);
+ rdev->sniffer_flow_created = 0;
+
+ bnxt_re_hwrm_free_vnic(rdev);
+ mutex_unlock(&rdev->qp_lock);
+ kfree(flow);
+
+ return rc;
+}
+
static struct bnxt_re_cq *bnxt_re_search_for_cq(struct bnxt_re_dev *rdev, u32 cq_id)
{
struct bnxt_re_cq *cq = NULL, *tmp_cq;
@@ -4604,7 +4746,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *
return err;
err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_DPI,
- &dpi, sizeof(length));
+ &dpi, sizeof(dpi));
if (err)
return err;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index fe00ab691a51..76ba9ab04d5c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -164,6 +164,11 @@ struct bnxt_re_user_mmap_entry {
u8 mmap_flag;
};
+struct bnxt_re_flow {
+ struct ib_flow ib_flow;
+ struct bnxt_re_dev *rdev;
+};
+
static inline u16 bnxt_re_get_swqe_size(int nsge)
{
return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
@@ -267,6 +272,11 @@ struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
struct uverbs_attr_bundle *attrs);
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
+struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp,
+ struct ib_flow_attr *attr,
+ struct ib_udata *udata);
+int bnxt_re_destroy_flow(struct ib_flow *flow_id);
+
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index df7cf8d68e27..b13810572c2e 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -80,6 +80,7 @@ MODULE_LICENSE("Dual BSD/GPL");
static DEFINE_MUTEX(bnxt_re_mutex);
static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
+static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev);
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
u32 *offset);
@@ -188,6 +189,10 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
rdev->qplib_res.en_dev = en_dev;
+ rc = bnxt_re_query_hwrm_intf_version(rdev);
+ if (rc)
+ goto free_dev_attr;
+
bnxt_re_set_drv_mode(rdev);
bnxt_re_set_db_offset(rdev);
@@ -540,6 +545,72 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
fw_msg->timeout = timeout;
}
+void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_free_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_FREE);
+
+ req.vnic_id = cpu_to_le32(rdev->mirror_vnic_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL,
+ 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to free vnic, rc = %d\n", rc);
+}
+
+int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_alloc_output resp = {};
+ struct hwrm_vnic_alloc_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_ALLOC);
+
+ req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id);
+ req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_VNIC_ID_VALID);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to alloc vnic, rc = %d\n", rc);
+
+ return rc;
+}
+
+int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_cfg_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_CFG);
+
+ req.flags = cpu_to_le32(VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE);
+ req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_RAW_QP_ID |
+ VNIC_CFG_REQ_ENABLES_MRU);
+ req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id);
+ req.raw_qp_id = cpu_to_le32(qp_id);
+ req.mru = cpu_to_le16(rdev->netdev->mtu + VLAN_ETH_HLEN);
+
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL,
+ 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to cfg vnic, rc = %d\n", rc);
+
+ return rc;
+}
+
/* Query device config using common hwrm */
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
u32 *offset)
@@ -553,11 +624,12 @@ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG);
req.fid = cpu_to_le16(0xffff);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
rc = bnxt_send_msg(en_dev, &fw_msg);
if (!rc) {
*db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
*offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
+ rdev->mirror_vnic_id = le16_to_cpu(resp.mirror_vnic_id);
}
return rc;
}
@@ -577,7 +649,7 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS);
req.fid = cpu_to_le16(0xffff);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
rc = bnxt_send_msg(en_dev, &fw_msg);
if (rc)
@@ -587,6 +659,8 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
flags_ext2 = le32_to_cpu(resp.flags_ext2);
cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ||
flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED;
+ cctx->modes.roce_mirror = !!(le32_to_cpu(resp.flags_ext3) &
+ FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED);
return 0;
}
@@ -603,7 +677,7 @@ static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
cctx = rdev->chip_ctx;
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
rc = bnxt_send_msg(en_dev, &fw_msg);
if (rc)
return rc;
@@ -842,20 +916,12 @@ static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
u16 fw_ring_id, int type)
{
- struct bnxt_en_dev *en_dev;
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_ring_free_input req = {};
struct hwrm_ring_free_output resp;
struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- if (!rdev)
- return rc;
-
- en_dev = rdev->en_dev;
-
- if (!en_dev)
- return rc;
-
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
return 0;
@@ -863,7 +929,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
req.ring_type = type;
req.ring_id = cpu_to_le16(fw_ring_id);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
rc = bnxt_send_msg(en_dev, &fw_msg);
if (rc)
ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
@@ -881,9 +947,6 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- if (!en_dev)
- return rc;
-
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
req.enables = 0;
req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
@@ -899,7 +962,7 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
req.ring_type = ring_attr->type;
req.int_mode = ring_attr->mode;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
rc = bnxt_send_msg(en_dev, &fw_msg);
if (!rc)
*fw_ring_id = le16_to_cpu(resp.ring_id);
@@ -916,16 +979,13 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- if (!en_dev)
- return rc;
-
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
return 0;
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
rc = bnxt_send_msg(en_dev, &fw_msg);
if (rc)
ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
@@ -935,8 +995,7 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
}
static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
- dma_addr_t dma_map,
- u32 *fw_stats_ctx_id)
+ struct bnxt_qplib_stats *stats)
{
struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
struct hwrm_stat_ctx_alloc_output resp = {};
@@ -945,21 +1004,18 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- *fw_stats_ctx_id = INVALID_STATS_CTX_ID;
-
- if (!en_dev)
- return rc;
+ stats->fw_id = INVALID_STATS_CTX_ID;
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
req.update_period_ms = cpu_to_le32(1000);
- req.stats_dma_addr = cpu_to_le64(dma_map);
+ req.stats_dma_addr = cpu_to_le64(stats->dma_map);
req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
rc = bnxt_send_msg(en_dev, &fw_msg);
if (!rc)
- *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
+ stats->fw_id = le32_to_cpu(resp.stat_ctx_id);
return rc;
}
@@ -975,7 +1031,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
- return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor);
+ return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->revision);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -985,13 +1041,31 @@ static ssize_t hca_type_show(struct device *device,
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
- return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc);
+ return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
+static ssize_t board_id_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device,
+ struct bnxt_re_dev, ibdev);
+ char buffer[BNXT_VPD_FLD_LEN] = {};
+
+ if (!rdev->is_virtfn)
+ memcpy(buffer, rdev->board_partno, BNXT_VPD_FLD_LEN - 1);
+ else
+ scnprintf(buffer, BNXT_VPD_FLD_LEN, "0x%x-VF",
+ rdev->en_dev->pdev->device);
+
+ return sysfs_emit(buf, "%s\n", buffer);
+}
+static DEVICE_ATTR_RO(board_id);
+
static struct attribute *bnxt_re_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
NULL
};
@@ -1207,6 +1281,8 @@ static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq
goto err;
if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge))
goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "srq_limit", srq->qplib_srq.threshold))
+ goto err;
nla_nest_end(msg, table_attr);
return 0;
@@ -1297,6 +1373,8 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf,
.req_notify_cq = bnxt_re_req_notify_cq,
.resize_cq = bnxt_re_resize_cq,
+ .create_flow = bnxt_re_create_flow,
+ .destroy_flow = bnxt_re_destroy_flow,
INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
@@ -1323,8 +1401,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
/* ib device init */
ibdev->node_type = RDMA_NODE_IB_CA;
- strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
- strlen(BNXT_RE_DESC) + 5);
+ strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA");
ibdev->phys_port_cnt = 1;
addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
@@ -1850,81 +1927,6 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
mutex_unlock(&rdev->qp_lock);
}
-static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
-{
- struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
- struct bnxt_qplib_gid gid;
- u16 gid_idx, index;
- int rc = 0;
-
- if (!ib_device_try_get(&rdev->ibdev))
- return 0;
-
- for (index = 0; index < sgid_tbl->active; index++) {
- gid_idx = sgid_tbl->hw_id[index];
-
- if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
- sizeof(bnxt_qplib_gid_zero)))
- continue;
- /* need to modify the VLAN enable setting of non VLAN GID only
- * as setting is done for VLAN GID while adding GID
- */
- if (sgid_tbl->vlan[index])
- continue;
-
- memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
-
- rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
- rdev->qplib_res.netdev->dev_addr);
- }
-
- ib_device_put(&rdev->ibdev);
- return rc;
-}
-
-static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
-{
- u32 prio_map = 0, tmp_map = 0;
- struct net_device *netdev;
- struct dcb_app app = {};
-
- netdev = rdev->netdev;
-
- app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
- app.protocol = ETH_P_IBOE;
- tmp_map = dcb_ieee_getapp_mask(netdev, &app);
- prio_map = tmp_map;
-
- app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
- app.protocol = ROCE_V2_UDP_DPORT;
- tmp_map = dcb_ieee_getapp_mask(netdev, &app);
- prio_map |= tmp_map;
-
- return prio_map;
-}
-
-static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
-{
- u8 prio_map = 0;
-
- /* Get priority for roce */
- prio_map = bnxt_re_get_priority_mask(rdev);
-
- if (prio_map == rdev->cur_prio_map)
- return 0;
- rdev->cur_prio_map = prio_map;
- /* Actual priorities are not programmed as they are already
- * done by L2 driver; just enable or disable priority vlan tagging
- */
- if ((prio_map == 0 && rdev->qplib_res.prio) ||
- (prio_map != 0 && !rdev->qplib_res.prio)) {
- rdev->qplib_res.prio = prio_map;
- bnxt_re_update_gid(rdev);
- }
-
- return 0;
-}
-
static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev)
{
if (rdev->is_virtfn)
@@ -1945,7 +1947,31 @@ static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev)
ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
}
-static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
+static void bnxt_re_read_vpd_info(struct bnxt_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->en_dev->pdev;
+ unsigned int vpd_size, kw_len;
+ int pos, size;
+ u8 *vpd_data;
+
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+ if (IS_ERR(vpd_data)) {
+ pci_warn(pdev, "Unable to read VPD, err=%pe\n", vpd_data);
+ return;
+ }
+
+ pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+ PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
+ if (pos < 0)
+ goto free;
+
+ size = min_t(int, kw_len, BNXT_VPD_FLD_LEN - 1);
+ memcpy(rdev->board_partno, &vpd_data[pos], size);
+free:
+ kfree(vpd_data);
+}
+
+static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_ver_get_output resp = {};
@@ -1964,7 +1990,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
rc);
- return;
+ return rc;
}
cctx = rdev->chip_ctx;
@@ -1978,6 +2004,8 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
if (!cctx->hwrm_cmd_max_timeout)
cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
+
+ return 0;
}
static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
@@ -2039,6 +2067,72 @@ static void bnxt_re_free_gid_ctx(struct bnxt_re_dev *rdev)
}
}
+static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats);
+ if (rc)
+ goto free_stat_mem;
+
+ return 0;
+free_stat_mem:
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats);
+
+ return rc;
+}
+
+static int bnxt_re_get_stats3_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return 0;
+
+ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats3);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats3);
+ if (rc)
+ goto free_stat_mem;
+
+ return 0;
+free_stat_mem:
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3);
+
+ return rc;
+}
+
+static void bnxt_re_put_stats3_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+
+ if (!rdev->rcfw.roce_mirror)
+ return;
+
+ bnxt_re_net_stats_ctx_free(rdev, hctx->stats3.fw_id);
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3);
+}
+
+static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+
+ bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id);
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats);
+}
+
static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
{
u8 type;
@@ -2049,8 +2143,7 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_re_net_unregister_async_event(rdev);
bnxt_re_uninit_dcb_wq(rdev);
- if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
- cancel_delayed_work_sync(&rdev->worker);
+ bnxt_re_put_stats3_ctx(rdev);
bnxt_re_free_gid_ctx(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
@@ -2064,8 +2157,8 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
if (rc)
ibdev_warn(&rdev->ibdev,
"Failed to deinitialize RCFW: %#x", rc);
- bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
- bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
+ bnxt_re_put_stats_ctx(rdev);
+ bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
@@ -2085,16 +2178,6 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
}
}
-/* worker thread for polling periodic events. Now used for QoS programming*/
-static void bnxt_re_worker(struct work_struct *work)
-{
- struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
- worker.work);
-
- bnxt_re_setup_qos(rdev);
- schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
-}
-
static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
{
struct bnxt_re_ring_attr rattr = {};
@@ -2109,8 +2192,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
rc = bnxt_re_register_netdev(rdev);
if (rc) {
ibdev_err(&rdev->ibdev,
- "Failed to register with netedev: %#x\n", rc);
- return -EINVAL;
+ "Failed to register with Ethernet driver, rc %d\n",
+ rc);
+ return rc;
}
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
@@ -2148,8 +2232,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
/* Check whether VF or PF */
bnxt_re_get_sriov_func_type(rdev);
- bnxt_re_query_hwrm_intf_version(rdev);
-
/* Establish RCFW Communication Channel to initialize the context
* memory for the function and all child VFs
*/
@@ -2199,18 +2281,20 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
if (rc)
goto disable_rcfw;
+ bnxt_qplib_query_version(&rdev->rcfw);
bnxt_re_set_resource_limits(rdev);
- rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0,
- bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx));
- if (rc) {
- ibdev_err(&rdev->ibdev,
- "Failed to allocate QPLIB context: %#x\n", rc);
- goto disable_rcfw;
+ if (!rdev->is_virtfn &&
+ !bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate hw context: %#x\n", rc);
+ goto disable_rcfw;
+ }
}
- rc = bnxt_re_net_stats_ctx_alloc(rdev,
- rdev->qplib_ctx.stats.dma_map,
- &rdev->qplib_ctx.stats.fw_id);
+
+ rc = bnxt_re_get_stats_ctx(rdev);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate stats context: %#x\n", rc);
@@ -2249,15 +2333,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
if (rc)
ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n");
- rc = bnxt_re_setup_qos(rdev);
- if (rc)
- ibdev_info(&rdev->ibdev,
- "RoCE priority not yet configured\n");
-
- INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
- set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
- schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
-
if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT))
bnxt_re_vf_res_config(rdev);
}
@@ -2270,11 +2345,18 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_re_init_dcb_wq(rdev);
bnxt_re_net_register_async_event(rdev);
+ if (!rdev->is_virtfn)
+ bnxt_re_read_vpd_info(rdev);
+
+ rc = bnxt_re_get_stats3_ctx(rdev);
+ if (rc)
+ goto fail;
+
return 0;
free_sctx:
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
free_ctx:
- bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
+ bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
disable_rcfw:
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
free_ring:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index ee36b3d82cc0..ce90d3d834d4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1307,6 +1307,7 @@ static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp)
int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &res->sgid_tbl;
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct creq_modify_qp_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
@@ -1358,9 +1359,14 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL)
req.flow_label = cpu_to_le32(qp->ah.flow_label);
- if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)
- req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id
- [qp->ah.sgid_index]);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX) {
+ if (qp->type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE)
+ req.sgid_index =
+ cpu_to_le16(sgid_tbl->hw_id[qp->ugid_index]);
+ else
+ req.sgid_index =
+ cpu_to_le16(sgid_tbl->hw_id[qp->ah.sgid_index]);
+ }
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT)
req.hop_limit = qp->ah.hop_limit;
@@ -1464,6 +1470,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp->access = sb->access;
qp->pkey_index = le16_to_cpu(sb->pkey);
qp->qkey = le32_to_cpu(sb->qkey);
+ qp->udp_sport = le16_to_cpu(sb->udp_src_port);
temp32[0] = le32_to_cpu(sb->dgid[0]);
temp32[1] = le32_to_cpu(sb->dgid[1]);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 4921a214c34c..b990d0c0ce1a 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -299,6 +299,7 @@ struct bnxt_qplib_qp {
u8 smac[6];
u16 vlan_id;
u16 port_id;
+ u16 udp_sport;
u8 nw_type;
struct bnxt_qplib_ah ah;
@@ -344,6 +345,7 @@ struct bnxt_qplib_qp {
u32 msn_tbl_sz;
bool is_host_msn_tbl;
u8 tos_dscp;
+ u32 ugid_index;
};
#define BNXT_RE_MAX_MSG_SIZE 0x80000000
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 804bc773b4ef..295a9610f3e6 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -186,7 +186,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
* wait for command completion. Maximum holding interval is 8 second.
*
* Returns:
- * -ETIMEOUT if command is not completed in specific time interval.
+ * -ETIMEDOUT if command is not completed in specific time interval.
* 0 if command is completed by firmware.
*/
static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
@@ -366,6 +366,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
wmb();
writel(cmdq_prod, cmdq->cmdq_mbox.prod);
writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+ print_hex_dump_bytes("req: ", DUMP_PREFIX_OFFSET, msg->req, msg->req_sz);
spin_unlock_bh(&hwq->lock);
/* Return the CREQ response pointer */
return 0;
@@ -381,7 +382,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
* This function can not be called from non-sleepable context.
*
* Returns:
- * -ETIMEOUT if command is not completed in specific time interval.
+ * -ETIMEDOUT if command is not completed in specific time interval.
* 0 if command is completed by firmware.
*/
static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
@@ -631,6 +632,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
int rc = 0;
pdev = rcfw->pdev;
+ print_hex_dump_bytes("event: ", DUMP_PREFIX_OFFSET, qp_event, sizeof(*qp_event));
switch (qp_event->event) {
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
err_event = (struct creq_qp_error_notification *)qp_event;
@@ -903,6 +905,10 @@ skip_ctx_setup:
flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
if (rcfw->res->en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)
flags |= CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT;
+ if (bnxt_qplib_roce_mirror_supported(rcfw->res->cctx)) {
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED;
+ rcfw->roce_mirror = true;
+ }
req.flags |= cpu_to_le16(flags);
req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index ff873c5f1b25..988c89b4232e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -236,6 +236,7 @@ struct bnxt_qplib_rcfw {
atomic_t timeout_send;
/* cached from chip cctx for quick reference in slow path */
u16 max_timeout;
+ bool roce_mirror;
};
struct bnxt_qplib_cmdqmsg {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index cc5c82d96839..875d7b52c06a 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -53,12 +53,6 @@
#include "qplib_sp.h"
#include "qplib_rcfw.h"
-static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_stats *stats);
-static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_chip_ctx *cctx,
- struct bnxt_qplib_stats *stats);
-
/* PBL */
static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl,
bool is_umem)
@@ -352,8 +346,8 @@ fail:
}
/* Context Tables */
-void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res,
- struct bnxt_qplib_ctx *ctx)
+void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
{
int i;
@@ -367,7 +361,6 @@ void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res,
/* restore original pde level before destroy */
ctx->tqm_ctx.pde.level = ctx->tqm_ctx.pde_level;
bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.pde);
- bnxt_qplib_free_stats_ctx(res->pdev, &ctx->stats);
}
static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res,
@@ -466,7 +459,7 @@ fail:
}
/*
- * Routine: bnxt_qplib_alloc_ctx
+ * Routine: bnxt_qplib_alloc_hwctx
* Description:
* Context tables are memories which are used by the chip fw.
* The 6 tables defined are:
@@ -486,17 +479,13 @@ fail:
* Returns:
* 0 if success, else -ERRORS
*/
-int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
- struct bnxt_qplib_ctx *ctx,
- bool virt_fn, bool is_p5)
+int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
{
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
int rc;
- if (virt_fn || is_p5)
- goto stats_alloc;
-
/* QPC Tables */
sginfo.pgsize = PAGE_SIZE;
sginfo.pgshft = PAGE_SHIFT;
@@ -542,16 +531,11 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
rc = bnxt_qplib_alloc_init_hwq(&ctx->tim_tbl, &hwq_attr);
if (rc)
goto fail;
-stats_alloc:
- /* Stats */
- rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats);
- if (rc)
- goto fail;
return 0;
fail:
- bnxt_qplib_free_ctx(res, ctx);
+ bnxt_qplib_free_hwctx(res, ctx);
return rc;
}
@@ -832,8 +816,8 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
}
/* Stats */
-static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_stats *stats)
+void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats)
{
if (stats->dma) {
dma_free_coherent(&pdev->dev, stats->size,
@@ -843,9 +827,9 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
stats->fw_id = -1;
}
-static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_chip_ctx *cctx,
- struct bnxt_qplib_stats *stats)
+int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
+ struct bnxt_qplib_stats *stats)
{
memset(stats, 0, sizeof(*stats));
stats->fw_id = -1;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 6a13927674b4..2ea3b7f232a3 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -65,6 +65,7 @@ struct bnxt_qplib_drv_modes {
bool db_push;
bool dbr_pacing;
u32 toggle_bits;
+ u8 roce_mirror;
};
enum bnxt_re_toggle_modes {
@@ -303,6 +304,7 @@ struct bnxt_qplib_ctx {
struct bnxt_qplib_hwq tim_tbl;
struct bnxt_qplib_tqm_ctx tqm_ctx;
struct bnxt_qplib_stats stats;
+ struct bnxt_qplib_stats stats3;
struct bnxt_qplib_vf_res vf_res;
};
@@ -432,15 +434,19 @@ void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev);
-void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res,
- struct bnxt_qplib_ctx *ctx);
-int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
- struct bnxt_qplib_ctx *ctx,
- bool virt_fn, bool is_p5);
+void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx);
int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res);
void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res);
int bnxt_qplib_determine_atomics(struct pci_dev *dev);
+int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
+ struct bnxt_qplib_stats *stats);
+void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats);
static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo,
struct bnxt_qplib_hwq *hwq, u32 cnt)
@@ -582,6 +588,11 @@ static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx)
return cctx->modes.dbr_pacing;
}
+static inline u8 bnxt_qplib_roce_mirror_supported(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return cctx->modes.roce_mirror;
+}
+
static inline bool _is_alloc_mr_unified(u16 dev_cap_flags)
{
return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 68981399598d..9ef581ed785c 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -66,14 +66,15 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
}
-static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
- char *fw_ver)
+void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw)
{
struct creq_query_version_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_query_version req = {};
+ struct bnxt_qplib_dev_attr *attr;
int rc;
+ attr = rcfw->res->dattr;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_VERSION,
sizeof(req));
@@ -82,10 +83,10 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return;
- fw_ver[0] = resp.fw_maj;
- fw_ver[1] = resp.fw_minor;
- fw_ver[2] = resp.fw_bld;
- fw_ver[3] = resp.fw_rsvd;
+ attr->fw_ver[0] = resp.fw_maj;
+ attr->fw_ver[1] = resp.fw_minor;
+ attr->fw_ver[2] = resp.fw_bld;
+ attr->fw_ver[3] = resp.fw_rsvd;
}
int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
@@ -179,8 +180,6 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
attr->max_srq += le16_to_cpu(sb->max_srq_ext);
- bnxt_qplib_query_version(rcfw, attr->fw_ver);
-
for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
tqm_alloc = (u8 *)&temp;
@@ -309,7 +308,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, const u8 *smac,
- u16 vlan_id, bool update, u32 *index)
+ u16 vlan_id, bool update, u32 *index,
+ bool is_ugid, u32 stats_ctx_id)
{
struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
struct bnxt_qplib_res,
@@ -374,6 +374,9 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]);
req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]);
+ req.stats_ctx = cpu_to_le16(CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID |
+ (u16)stats_ctx_id);
+
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
@@ -397,46 +400,6 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
return 0;
}
-int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
- struct bnxt_qplib_gid *gid, u16 gid_idx,
- const u8 *smac)
-{
- struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
- struct bnxt_qplib_res,
- sgid_tbl);
- struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct creq_modify_gid_resp resp = {};
- struct bnxt_qplib_cmdqmsg msg = {};
- struct cmdq_modify_gid req = {};
- int rc;
-
- bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
- CMDQ_BASE_OPCODE_MODIFY_GID,
- sizeof(req));
-
- req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]);
- req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]);
- req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]);
- req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]);
- if (res->prio) {
- req.vlan |= cpu_to_le16
- (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
- CMDQ_ADD_GID_VLAN_VLAN_EN);
- }
-
- /* MAC in network format */
- req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]);
- req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]);
- req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]);
-
- req.gid_index = cpu_to_le16(gid_idx);
-
- bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
- sizeof(resp), 0);
- rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
- return rc;
-}
-
/* AH */
int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
bool block)
@@ -1143,3 +1106,40 @@ out:
dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
return rc;
}
+
+int bnxt_qplib_create_flow(struct bnxt_qplib_res *res)
+{
+ struct creq_roce_mirror_cfg_resp resp = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_roce_mirror_cfg req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG,
+ sizeof(req));
+
+ req.mirror_flags = (u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE;
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ return bnxt_qplib_rcfw_send_message(rcfw, &msg);
+}
+
+int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res)
+{
+ struct creq_roce_mirror_cfg_resp resp = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_roce_mirror_cfg req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG,
+ sizeof(req));
+
+ req.mirror_flags &= ~((u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+
+ return bnxt_qplib_rcfw_send_message(rcfw, &msg);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 09faf4a1e849..147b5d9c0313 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -323,7 +323,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, u16 vlan_id, bool update);
int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, const u8 *mac, u16 vlan_id,
- bool update, u32 *index);
+ bool update, u32 *index,
+ bool is_ugid, u32 stats_ctx_id);
int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, u16 gid_idx,
const u8 *smac);
@@ -358,6 +359,9 @@ int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid,
u32 resp_size, void *resp_va);
int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
struct bnxt_qplib_cc_param *cc_param);
+void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_create_flow(struct bnxt_qplib_res *res);
+int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res);
#define BNXT_VAR_MAX_WQE 4352
#define BNXT_VAR_MAX_SLOT_ALIGN 256
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 024845f945ff..99ecd72e72e2 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -144,7 +144,8 @@ struct cmdq_base {
#define CMDQ_BASE_OPCODE_MODIFY_CQ 0x90UL
#define CMDQ_BASE_OPCODE_QUERY_QP_EXTEND 0x91UL
#define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
- #define CMDQ_BASE_OPCODE_LAST CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT
+ #define CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG 0x99UL
+ #define CMDQ_BASE_OPCODE_LAST CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG
u8 cmd_size;
__le16 flags;
__le16 cookie;
@@ -218,6 +219,7 @@ struct cmdq_initialize_fw {
#define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL
#define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL
#define CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT 0x10UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED 0x80UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -788,7 +790,8 @@ struct creq_query_qp_resp_sb {
#define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC 0x8UL
__le16 pkey;
__le32 qkey;
- __le32 reserved32;
+ __le16 udp_src_port;
+ __le16 reserved16;
__le32 dgid[4];
__le32 flow_label;
__le16 sgid_index;
@@ -2108,6 +2111,43 @@ struct creq_query_roce_stats_ext_resp_sb {
__le64 dup_req;
};
+/* cmdq_roce_mirror_cfg (size:192b/24B) */
+struct cmdq_roce_mirror_cfg {
+ u8 opcode;
+ #define CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG 0x99UL
+ #define CMDQ_ROCE_MIRROR_CFG_OPCODE_LAST \
+ CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 mirror_flags;
+ #define CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE 0x1UL
+ u8 rsvd[7];
+};
+
+/* creq_roce_mirror_cfg_resp (size:128b/16B) */
+struct creq_roce_mirror_cfg_resp {
+ u8 type;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_SFT 0
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_LAST \
+ CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG 0x99UL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_LAST \
+ CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG
+ u8 reserved48[6];
+};
+
/* cmdq_query_func (size:128b/16B) */
struct cmdq_query_func {
u8 opcode;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index b67747ae6a68..d892f55febe2 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -1228,9 +1228,8 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
if (!ctx->dev) {
ctx->dev = c4iw_alloc(&ctx->lldi);
if (IS_ERR(ctx->dev)) {
- pr_err("%s: initialization failed: %ld\n",
- pci_name(ctx->lldi.pdev),
- PTR_ERR(ctx->dev));
+ pr_err("%s: initialization failed: %pe\n",
+ pci_name(ctx->lldi.pdev), ctx->dev);
ctx->dev = NULL;
break;
}
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index bafd210dd43e..0e979ca10d24 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include "efa_com.h"
@@ -30,6 +30,7 @@ struct efa_comp_ctx {
struct efa_admin_acq_entry *user_cqe;
u32 comp_size;
enum efa_cmd_status status;
+ u16 cmd_id;
u8 cmd_opcode;
u8 occupied;
};
@@ -333,6 +334,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
comp_ctx->comp_size = comp_size_in_bytes;
comp_ctx->user_cqe = comp;
comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+ comp_ctx->cmd_id = cmd_id;
reinit_completion(&comp_ctx->wait_event);
@@ -557,17 +559,19 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com
if (comp_ctx->status == EFA_CMD_COMPLETED)
ibdev_err_ratelimited(
aq->efa_dev,
- "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
efa_com_cmd_str(comp_ctx->cmd_opcode),
comp_ctx->cmd_opcode, comp_ctx->status,
- comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc,
+ aq->cq.cc);
else
ibdev_err_ratelimited(
aq->efa_dev,
- "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ "The device didn't send any completion for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
efa_com_cmd_str(comp_ctx->cmd_opcode),
comp_ctx->cmd_opcode, comp_ctx->status,
- comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc,
+ aq->cq.cc);
clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
err = -ETIME;
@@ -631,9 +635,9 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
if (IS_ERR(comp_ctx)) {
ibdev_err_ratelimited(
aq->efa_dev,
- "Failed to submit command %s (opcode %u) err %ld\n",
+ "Failed to submit command %s (opcode %u) err %pe\n",
efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
+ cmd->aq_common_descriptor.opcode, comp_ctx);
up(&aq->avail_cmds);
atomic64_inc(&aq->stats.cmd_err);
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 886923d5fe50..d9a12681f843 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1788,7 +1788,8 @@ struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
access_flags);
if (IS_ERR(umem_dmabuf)) {
err = PTR_ERR(umem_dmabuf);
- ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
+ ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%pe]\n",
+ umem_dmabuf);
goto err_free;
}
@@ -1832,7 +1833,8 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
ibdev_dbg(&dev->ibdev,
- "Failed to pin and map user space memory[%d]\n", err);
+ "Failed to pin and map user space memory[%pe]\n",
+ mr->umem);
goto err_free;
}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index fdeec33c71da..109a3f3de911 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -149,7 +149,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
req.phy_addr[0] = mr->mem.mtt->buf_dma;
mtt_level = ERDMA_MR_MTT_1LEVEL;
} else {
- req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist);
+ req.phy_addr[0] = mr->mem.mtt->dma_addrs[0];
mtt_level = mr->mem.mtt->level;
}
} else if (mr->type != ERDMA_MR_TYPE_DMA) {
@@ -626,18 +626,27 @@ err_free_mtt:
return ERR_PTR(-ENOMEM);
}
-static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
- struct erdma_mtt *mtt)
+static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma,
+ u32 npages)
{
- dma_unmap_sg(&dev->pdev->dev, mtt->sglist,
- DIV_ROUND_UP(mtt->size, PAGE_SIZE), DMA_TO_DEVICE);
- vfree(mtt->sglist);
+ u32 i;
+
+ for (i = 0; i < npages; i++)
+ dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE,
+ DMA_TO_DEVICE);
+}
+
+static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages);
+ vfree(mtt->dma_addrs);
}
static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
struct erdma_mtt *mtt)
{
- erdma_destroy_mtt_buf_sg(dev, mtt);
+ erdma_destroy_mtt_buf_dma_addrs(dev, mtt);
vfree(mtt->buf);
kfree(mtt);
}
@@ -645,50 +654,69 @@ static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
struct erdma_mtt *low_mtt)
{
- struct scatterlist *sg;
- u32 idx = 0, i;
+ dma_addr_t *pg_addr = mtt->buf;
+ u32 i;
- for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i)
- mtt->buf[idx++] = sg_dma_address(sg);
+ for (i = 0; i < low_mtt->npages; i++)
+ pg_addr[i] = low_mtt->dma_addrs[i];
}
-static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt)
+static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs,
+ void *buf, u64 len)
{
- struct scatterlist *sglist;
- void *buf = mtt->buf;
- u32 npages, i, nsg;
+ dma_addr_t *pg_dma;
struct page *pg;
+ u32 npages, i;
+ void *addr;
- /* Failed if buf is not page aligned */
- if ((uintptr_t)buf & ~PAGE_MASK)
- return -EINVAL;
-
- npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE);
- sglist = vzalloc(npages * sizeof(*sglist));
- if (!sglist)
- return -ENOMEM;
+ npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >>
+ PAGE_SHIFT;
+ pg_dma = vcalloc(npages, sizeof(*pg_dma));
+ if (!pg_dma)
+ return 0;
- sg_init_table(sglist, npages);
+ addr = buf;
for (i = 0; i < npages; i++) {
- pg = vmalloc_to_page(buf);
+ pg = vmalloc_to_page(addr);
if (!pg)
goto err;
- sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
- buf += PAGE_SIZE;
+
+ pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, pg_dma[i]))
+ goto err;
+
+ addr += PAGE_SIZE;
}
- nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE);
- if (!nsg)
- goto err;
+ *dma_addrs = pg_dma;
- mtt->sglist = sglist;
- mtt->nsg = nsg;
+ return npages;
+err:
+ erdma_unmap_page_list(dev, pg_dma, i);
+ vfree(pg_dma);
return 0;
-err:
- vfree(sglist);
+}
- return -ENOMEM;
+static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ dma_addr_t *addrs;
+ u32 npages;
+
+ /* Failed if buf is not page aligned */
+ if ((uintptr_t)mtt->buf & ~PAGE_MASK)
+ return -EINVAL;
+
+ npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size);
+ if (!npages)
+ return -ENOMEM;
+
+ mtt->dma_addrs = addrs;
+ mtt->npages = npages;
+
+ return 0;
}
static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
@@ -707,12 +735,12 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
if (!mtt->buf)
goto err_free_mtt;
- ret = erdma_create_mtt_buf_sg(dev, mtt);
+ ret = erdma_create_mtt_buf_dma_addrs(dev, mtt);
if (ret)
goto err_free_mtt_buf;
- ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n",
- mtt->size, mtt->nsg);
+ ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n",
+ mtt->size, mtt->npages);
return mtt;
@@ -746,8 +774,8 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
level = 1;
/* convergence the mtt table. */
- while (mtt->nsg != 1 && level <= 3) {
- tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg));
+ while (mtt->npages != 1 && level <= 3) {
+ tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages));
if (IS_ERR(tmp_mtt)) {
ret = PTR_ERR(tmp_mtt);
goto err_free_mtt;
@@ -765,7 +793,7 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
mtt->level = level;
ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
- mtt->level, mtt->sglist[0].dma_address);
+ mtt->level, mtt->dma_addrs[0]);
return mtt;
err_free_mtt:
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index ef411b81fbd7..7d8d3fe501d5 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -99,8 +99,8 @@ struct erdma_mtt {
union {
dma_addr_t buf_dma;
struct {
- struct scatterlist *sglist;
- u32 nsg;
+ dma_addr_t *dma_addrs;
+ u32 npages;
u32 level;
};
};
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index 4250d077b06f..a98a4175e53b 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -64,9 +64,9 @@ int hfi1_cdev_init(int minor, const char *name,
if (IS_ERR(device)) {
ret = PTR_ERR(device);
+ pr_err("Could not create device for minor %d, %s (err %pe)\n",
+ minor, name, device);
device = NULL;
- pr_err("Could not create device for minor %d, %s (err %d)\n",
- minor, name, -ret);
cdev_del(cdev);
}
done:
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 719b7c34e238..5cfa4f8fbf3d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -990,7 +990,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
/* Clean up old mappings */
- for_each_cpu(cpu, cpu_online_mask) {
+ for_each_online_cpu(cpu) {
struct sdma_rht_node *rht_node;
/* Don't cleanup sdes that are set in the new mask */
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index b72625283fcf..9b1aece1b080 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -498,8 +498,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ntids, sizeof(*req->tids));
if (IS_ERR(tmp)) {
ret = PTR_ERR(tmp);
- SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
- ntids, ret);
+ SDMA_DBG(req, "Failed to copy %d TIDs (%pe)", ntids,
+ tmp);
goto free_req;
}
req->tids = tmp;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 0f037e545520..31cb8699e198 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -594,8 +594,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access);
if (IS_ERR(mtr->umem)) {
- ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
- PTR_ERR(mtr->umem));
+ ibdev_err(ibdev, "failed to get umem, ret = %pe.\n",
+ mtr->umem);
return -ENOMEM;
}
} else {
@@ -605,8 +605,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
!mtr_has_mtt(buf_attr) ?
HNS_ROCE_BUF_DIRECT : 0);
if (IS_ERR(mtr->kmem)) {
- ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
- PTR_ERR(mtr->kmem));
+ ibdev_err(ibdev, "failed to alloc kmem, ret = %pe.\n",
+ mtr->kmem);
return PTR_ERR(mtr->kmem);
}
}
diff --git a/drivers/infiniband/hw/ionic/Kconfig b/drivers/infiniband/hw/ionic/Kconfig
new file mode 100644
index 000000000000..de6f10e9b6e9
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2018-2025, Advanced Micro Devices, Inc.
+
+config INFINIBAND_IONIC
+ tristate "AMD Pensando DSC RDMA/RoCE Support"
+ depends on NETDEVICES && ETHERNET && PCI && INET && IONIC
+ help
+ This enables RDMA/RoCE support for the AMD Pensando family of
+ Distributed Services Cards (DSCs).
+
+ To learn more, visit our website at
+ <https://www.amd.com/en/products/accelerators/pensando.html>.
+
+ To compile this driver as a module, choose M here. The module
+ will be called ionic_rdma.
diff --git a/drivers/infiniband/hw/ionic/Makefile b/drivers/infiniband/hw/ionic/Makefile
new file mode 100644
index 000000000000..957973742820
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y := -I $(srctree)/drivers/net/ethernet/pensando/ionic
+
+obj-$(CONFIG_INFINIBAND_IONIC) += ionic_rdma.o
+
+ionic_rdma-y := \
+ ionic_ibdev.o ionic_lif_cfg.o ionic_queue.o ionic_pgtbl.o ionic_admin.o \
+ ionic_controlpath.o ionic_datapath.o ionic_hw_stats.o
diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c
new file mode 100644
index 000000000000..2537aa55d12d
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_admin.c
@@ -0,0 +1,1229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define IONIC_EQ_COUNT_MIN 4
+#define IONIC_AQ_COUNT_MIN 1
+
+/* not a valid queue position or negative error status */
+#define IONIC_ADMIN_POSTED 0x10000
+
+/* cpu can be held with irq disabled for COUNT * MS (for create/destroy_ah) */
+#define IONIC_ADMIN_BUSY_RETRY_COUNT 2000
+#define IONIC_ADMIN_BUSY_RETRY_MS 1
+
+/* admin queue will be considered failed if a command takes longer */
+#define IONIC_ADMIN_TIMEOUT (HZ * 2)
+#define IONIC_ADMIN_WARN (HZ / 8)
+
+/* will poll for admin cq to tolerate and report from missed event */
+#define IONIC_ADMIN_DELAY (HZ / 8)
+
+/* work queue for polling the event queue and admin cq */
+struct workqueue_struct *ionic_evt_workq;
+
+static void ionic_admin_timedout(struct ionic_aq *aq)
+{
+ struct ionic_ibdev *dev = aq->dev;
+ unsigned long irqflags;
+ u16 pos;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ if (ionic_queue_empty(&aq->q))
+ goto out;
+
+ /* Reset ALL adminq if any one times out */
+ if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED)
+ queue_work(ionic_evt_workq, &dev->reset_work);
+
+ ibdev_err(&dev->ibdev, "admin command timed out, aq %d after: %ums\n",
+ aq->aqid, (u32)jiffies_to_msecs(jiffies - aq->stamp));
+
+ pos = (aq->q.prod - 1) & aq->q.mask;
+ if (pos == aq->q.cons)
+ goto out;
+
+ ibdev_warn(&dev->ibdev, "admin pos %u (last posted)\n", pos);
+ print_hex_dump(KERN_WARNING, "cmd ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at(&aq->q, pos),
+ BIT(aq->q.stride_log2), true);
+
+out:
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_admin_reset_dwork(struct ionic_ibdev *dev)
+{
+ if (atomic_read(&dev->admin_state) == IONIC_ADMIN_KILLED)
+ return;
+
+ queue_delayed_work(ionic_evt_workq, &dev->admin_dwork,
+ IONIC_ADMIN_DELAY);
+}
+
+static void ionic_admin_reset_wdog(struct ionic_aq *aq)
+{
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED)
+ return;
+
+ aq->stamp = jiffies;
+ ionic_admin_reset_dwork(aq->dev);
+}
+
+static bool ionic_admin_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_v1_cqe **cqe)
+{
+ struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q);
+
+ if (unlikely(cq->color != ionic_v1_cqe_color(qcqe)))
+ return false;
+
+ /* Prevent out-of-order reads of the CQE */
+ dma_rmb();
+ *cqe = qcqe;
+
+ return true;
+}
+
+static void ionic_admin_poll_locked(struct ionic_aq *aq)
+{
+ struct ionic_cq *cq = &aq->vcq->cq[0];
+ struct ionic_admin_wr *wr, *wr_next;
+ struct ionic_ibdev *dev = aq->dev;
+ u32 wr_strides, avlbl_strides;
+ struct ionic_v1_cqe *cqe;
+ u32 qtf, qid;
+ u16 old_prod;
+ u8 type;
+
+ lockdep_assert_held(&aq->lock);
+
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED) {
+ list_for_each_entry_safe(wr, wr_next, &aq->wr_prod, aq_ent) {
+ INIT_LIST_HEAD(&wr->aq_ent);
+ aq->q_wr[wr->status].wr = NULL;
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+ INIT_LIST_HEAD(&aq->wr_prod);
+
+ list_for_each_entry_safe(wr, wr_next, &aq->wr_post, aq_ent) {
+ INIT_LIST_HEAD(&wr->aq_ent);
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+ INIT_LIST_HEAD(&aq->wr_post);
+
+ return;
+ }
+
+ old_prod = cq->q.prod;
+
+ while (ionic_admin_next_cqe(dev, cq, &cqe)) {
+ qtf = ionic_v1_cqe_qtf(cqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ if (unlikely(type != IONIC_V1_CQE_TYPE_ADMIN)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe type %u\n", type);
+ goto cq_next;
+ }
+
+ if (unlikely(qid != aq->aqid)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe qid %u\n", qid);
+ goto cq_next;
+ }
+
+ if (unlikely(be16_to_cpu(cqe->admin.cmd_idx) != aq->q.cons)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad idx %u cons %u qid %u\n",
+ be16_to_cpu(cqe->admin.cmd_idx),
+ aq->q.cons, qid);
+ goto cq_next;
+ }
+
+ if (unlikely(ionic_queue_empty(&aq->q))) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe for empty adminq\n");
+ goto cq_next;
+ }
+
+ wr = aq->q_wr[aq->q.cons].wr;
+ if (wr) {
+ aq->q_wr[aq->q.cons].wr = NULL;
+ list_del_init(&wr->aq_ent);
+
+ wr->cqe = *cqe;
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+
+ ionic_queue_consume_entries(&aq->q,
+ aq->q_wr[aq->q.cons].wqe_strides);
+
+cq_next:
+ ionic_queue_produce(&cq->q);
+ cq->color = ionic_color_wrap(cq->q.prod, cq->color);
+ }
+
+ if (old_prod != cq->q.prod) {
+ ionic_admin_reset_wdog(aq);
+ cq->q.cons = cq->q.prod;
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ ionic_queue_dbell_val(&cq->q));
+ queue_work(ionic_evt_workq, &aq->work);
+ } else if (!aq->armed) {
+ aq->armed = true;
+ cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod);
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ cq->q.dbell | IONIC_CQ_RING_ARM |
+ cq->arm_any_prod);
+ queue_work(ionic_evt_workq, &aq->work);
+ }
+
+ if (atomic_read(&aq->admin_state) != IONIC_ADMIN_ACTIVE)
+ return;
+
+ old_prod = aq->q.prod;
+
+ if (ionic_queue_empty(&aq->q) && !list_empty(&aq->wr_post))
+ ionic_admin_reset_wdog(aq);
+
+ if (list_empty(&aq->wr_post))
+ return;
+
+ do {
+ u8 *src;
+ int i, src_len;
+ size_t stride_len;
+
+ wr = list_first_entry(&aq->wr_post, struct ionic_admin_wr,
+ aq_ent);
+ wr_strides = (le16_to_cpu(wr->wqe.len) + ADMIN_WQE_HDR_LEN +
+ (ADMIN_WQE_STRIDE - 1)) >> aq->q.stride_log2;
+ avlbl_strides = ionic_queue_length_remaining(&aq->q);
+
+ if (wr_strides > avlbl_strides)
+ break;
+
+ list_move(&wr->aq_ent, &aq->wr_prod);
+ wr->status = aq->q.prod;
+ aq->q_wr[aq->q.prod].wr = wr;
+ aq->q_wr[aq->q.prod].wqe_strides = wr_strides;
+
+ src_len = le16_to_cpu(wr->wqe.len);
+ src = (uint8_t *)&wr->wqe.cmd;
+
+ /* First stride */
+ memcpy(ionic_queue_at_prod(&aq->q), &wr->wqe,
+ ADMIN_WQE_HDR_LEN);
+ stride_len = ADMIN_WQE_STRIDE - ADMIN_WQE_HDR_LEN;
+ if (stride_len > src_len)
+ stride_len = src_len;
+ memcpy(ionic_queue_at_prod(&aq->q) + ADMIN_WQE_HDR_LEN,
+ src, stride_len);
+ ibdev_dbg(&dev->ibdev, "post admin prod %u (%u strides)\n",
+ aq->q.prod, wr_strides);
+ print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at_prod(&aq->q),
+ BIT(aq->q.stride_log2), true);
+ ionic_queue_produce(&aq->q);
+
+ /* Remaining strides */
+ for (i = stride_len; i < src_len; i += stride_len) {
+ stride_len = ADMIN_WQE_STRIDE;
+
+ if (i + stride_len > src_len)
+ stride_len = src_len - i;
+
+ memcpy(ionic_queue_at_prod(&aq->q), src + i,
+ stride_len);
+ print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at_prod(&aq->q),
+ BIT(aq->q.stride_log2), true);
+ ionic_queue_produce(&aq->q);
+ }
+ } while (!list_empty(&aq->wr_post));
+
+ if (old_prod != aq->q.prod)
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.aq_qtype,
+ ionic_queue_dbell_val(&aq->q));
+}
+
+static void ionic_admin_dwork(struct work_struct *ws)
+{
+ struct ionic_ibdev *dev =
+ container_of(ws, struct ionic_ibdev, admin_dwork.work);
+ struct ionic_aq *aq, *bad_aq = NULL;
+ bool do_reschedule = false;
+ unsigned long irqflags;
+ bool do_reset = false;
+ u16 pos;
+ int i;
+
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ aq = dev->aq_vec[i];
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+
+ if (ionic_queue_empty(&aq->q))
+ goto next_aq;
+
+ /* Reschedule if any queue has outstanding work */
+ do_reschedule = true;
+
+ if (time_is_after_eq_jiffies(aq->stamp + IONIC_ADMIN_WARN))
+ /* Warning threshold not met, nothing to do */
+ goto next_aq;
+
+ /* See if polling now makes some progress */
+ pos = aq->q.cons;
+ ionic_admin_poll_locked(aq);
+ if (pos != aq->q.cons) {
+ ibdev_dbg(&dev->ibdev,
+ "missed event for acq %d\n", aq->cqid);
+ goto next_aq;
+ }
+
+ if (time_is_after_eq_jiffies(aq->stamp +
+ IONIC_ADMIN_TIMEOUT)) {
+ /* Timeout threshold not met */
+ ibdev_dbg(&dev->ibdev, "no progress after %ums\n",
+ (u32)jiffies_to_msecs(jiffies - aq->stamp));
+ goto next_aq;
+ }
+
+ /* Queue timed out */
+ bad_aq = aq;
+ do_reset = true;
+next_aq:
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+ }
+
+ if (do_reset)
+ /* Reset RDMA lif on a timeout */
+ ionic_admin_timedout(bad_aq);
+ else if (do_reschedule)
+ /* Try to poll again later */
+ ionic_admin_reset_dwork(dev);
+}
+
+static void ionic_admin_work(struct work_struct *ws)
+{
+ struct ionic_aq *aq = container_of(ws, struct ionic_aq, work);
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_admin_post_aq(struct ionic_aq *aq, struct ionic_admin_wr *wr)
+{
+ unsigned long irqflags;
+ bool poll;
+
+ wr->status = IONIC_ADMIN_POSTED;
+ wr->aq = aq;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ poll = list_empty(&aq->wr_post);
+ list_add(&wr->aq_ent, &aq->wr_post);
+ if (poll)
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr)
+{
+ int aq_idx;
+
+ /* Use cpu id for the adminq selection */
+ aq_idx = raw_smp_processor_id() % dev->lif_cfg.aq_count;
+ ionic_admin_post_aq(dev->aq_vec[aq_idx], wr);
+}
+
+static void ionic_admin_cancel(struct ionic_admin_wr *wr)
+{
+ struct ionic_aq *aq = wr->aq;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+
+ if (!list_empty(&wr->aq_ent)) {
+ list_del(&wr->aq_ent);
+ if (wr->status != IONIC_ADMIN_POSTED)
+ aq->q_wr[wr->status].wr = NULL;
+ }
+
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static int ionic_admin_busy_wait(struct ionic_admin_wr *wr)
+{
+ struct ionic_aq *aq = wr->aq;
+ unsigned long irqflags;
+ int try_i;
+
+ for (try_i = 0; try_i < IONIC_ADMIN_BUSY_RETRY_COUNT; ++try_i) {
+ if (completion_done(&wr->work))
+ return 0;
+
+ mdelay(IONIC_ADMIN_BUSY_RETRY_MS);
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+ }
+
+ /*
+ * we timed out. Initiate RDMA LIF reset and indicate
+ * error to caller.
+ */
+ ionic_admin_timedout(aq);
+ return -ETIMEDOUT;
+}
+
+int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr,
+ enum ionic_admin_flags flags)
+{
+ int rc, timo;
+
+ if (flags & IONIC_ADMIN_F_BUSYWAIT) {
+ /* Spin */
+ rc = ionic_admin_busy_wait(wr);
+ } else if (flags & IONIC_ADMIN_F_INTERRUPT) {
+ /*
+ * Interruptible sleep, 1s timeout
+ * This is used for commands which are safe for the caller
+ * to clean up without killing and resetting the adminq.
+ */
+ timo = wait_for_completion_interruptible_timeout(&wr->work,
+ HZ);
+ if (timo > 0)
+ rc = 0;
+ else if (timo == 0)
+ rc = -ETIMEDOUT;
+ else
+ rc = timo;
+ } else {
+ /*
+ * Uninterruptible sleep
+ * This is used for commands which are NOT safe for the
+ * caller to clean up. Cleanup must be handled by the
+ * adminq kill and reset process so that host memory is
+ * not corrupted by the device.
+ */
+ wait_for_completion(&wr->work);
+ rc = 0;
+ }
+
+ if (rc) {
+ ibdev_warn(&dev->ibdev, "wait status %d\n", rc);
+ ionic_admin_cancel(wr);
+ } else if (wr->status == IONIC_ADMIN_KILLED) {
+ ibdev_dbg(&dev->ibdev, "admin killed\n");
+
+ /* No error if admin already killed during teardown */
+ rc = (flags & IONIC_ADMIN_F_TEARDOWN) ? 0 : -ENODEV;
+ } else if (ionic_v1_cqe_error(&wr->cqe)) {
+ ibdev_warn(&dev->ibdev, "opcode %u error %u\n",
+ wr->wqe.op,
+ be32_to_cpu(wr->cqe.status_length));
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+static int ionic_rdma_devcmd(struct ionic_ibdev *dev,
+ struct ionic_admin_ctx *admin)
+{
+ int rc;
+
+ rc = ionic_adminq_post_wait(dev->lif_cfg.lif, admin);
+ if (rc)
+ return rc;
+
+ return ionic_error_to_errno(admin->comp.comp.status);
+}
+
+int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev)
+{
+ struct ionic_admin_ctx admin = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(admin.work),
+ .cmd.rdma_reset = {
+ .opcode = IONIC_CMD_RDMA_RESET_LIF,
+ .lif_index = cpu_to_le16(dev->lif_cfg.lif_index),
+ },
+ };
+
+ return ionic_rdma_devcmd(dev, &admin);
+}
+
+static int ionic_rdma_queue_devcmd(struct ionic_ibdev *dev,
+ struct ionic_queue *q,
+ u32 qid, u32 cid, u16 opcode)
+{
+ struct ionic_admin_ctx admin = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(admin.work),
+ .cmd.rdma_queue = {
+ .opcode = opcode,
+ .lif_index = cpu_to_le16(dev->lif_cfg.lif_index),
+ .qid_ver = cpu_to_le32(qid),
+ .cid = cpu_to_le32(cid),
+ .dbid = cpu_to_le16(dev->lif_cfg.dbid),
+ .depth_log2 = q->depth_log2,
+ .stride_log2 = q->stride_log2,
+ .dma_addr = cpu_to_le64(q->dma),
+ },
+ };
+
+ return ionic_rdma_devcmd(dev, &admin);
+}
+
+static void ionic_rdma_admincq_comp(struct ib_cq *ibcq, void *cq_context)
+{
+ struct ionic_aq *aq = cq_context;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ aq->armed = false;
+ if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED)
+ queue_work(ionic_evt_workq, &aq->work);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_rdma_admincq_event(struct ib_event *event, void *cq_context)
+{
+ struct ionic_aq *aq = cq_context;
+
+ ibdev_err(&aq->dev->ibdev, "admincq event %d\n", event->event);
+}
+
+static struct ionic_vcq *ionic_create_rdma_admincq(struct ionic_ibdev *dev,
+ int comp_vector)
+{
+ struct ib_cq_init_attr attr = {
+ .cqe = IONIC_AQ_DEPTH,
+ .comp_vector = comp_vector,
+ };
+ struct ionic_tbl_buf buf = {};
+ struct ionic_vcq *vcq;
+ struct ionic_cq *cq;
+ int rc;
+
+ vcq = kzalloc(sizeof(*vcq), GFP_KERNEL);
+ if (!vcq)
+ return ERR_PTR(-ENOMEM);
+
+ vcq->ibcq.device = &dev->ibdev;
+ vcq->ibcq.comp_handler = ionic_rdma_admincq_comp;
+ vcq->ibcq.event_handler = ionic_rdma_admincq_event;
+ atomic_set(&vcq->ibcq.usecnt, 0);
+
+ vcq->udma_mask = 1;
+ cq = &vcq->cq[0];
+
+ rc = ionic_create_cq_common(vcq, &buf, &attr, NULL, NULL,
+ NULL, NULL, 0);
+ if (rc)
+ goto err_init;
+
+ rc = ionic_rdma_queue_devcmd(dev, &cq->q, cq->cqid, cq->eqid,
+ IONIC_CMD_RDMA_CREATE_CQ);
+ if (rc)
+ goto err_cmd;
+
+ return vcq;
+
+err_cmd:
+ ionic_destroy_cq_common(dev, cq);
+err_init:
+ kfree(vcq);
+
+ return ERR_PTR(rc);
+}
+
+static struct ionic_aq *__ionic_create_rdma_adminq(struct ionic_ibdev *dev,
+ u32 aqid, u32 cqid)
+{
+ struct ionic_aq *aq;
+ int rc;
+
+ aq = kzalloc(sizeof(*aq), GFP_KERNEL);
+ if (!aq)
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED);
+ aq->dev = dev;
+ aq->aqid = aqid;
+ aq->cqid = cqid;
+ spin_lock_init(&aq->lock);
+
+ rc = ionic_queue_init(&aq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH,
+ ADMIN_WQE_STRIDE);
+ if (rc)
+ goto err_q;
+
+ ionic_queue_dbell_init(&aq->q, aq->aqid);
+
+ aq->q_wr = kcalloc((u32)aq->q.mask + 1, sizeof(*aq->q_wr), GFP_KERNEL);
+ if (!aq->q_wr) {
+ rc = -ENOMEM;
+ goto err_wr;
+ }
+
+ INIT_LIST_HEAD(&aq->wr_prod);
+ INIT_LIST_HEAD(&aq->wr_post);
+
+ INIT_WORK(&aq->work, ionic_admin_work);
+ aq->armed = false;
+
+ return aq;
+
+err_wr:
+ ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev);
+err_q:
+ kfree(aq);
+
+ return ERR_PTR(rc);
+}
+
+static void __ionic_destroy_rdma_adminq(struct ionic_ibdev *dev,
+ struct ionic_aq *aq)
+{
+ kfree(aq->q_wr);
+ ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev);
+ kfree(aq);
+}
+
+static struct ionic_aq *ionic_create_rdma_adminq(struct ionic_ibdev *dev,
+ u32 aqid, u32 cqid)
+{
+ struct ionic_aq *aq;
+ int rc;
+
+ aq = __ionic_create_rdma_adminq(dev, aqid, cqid);
+ if (IS_ERR(aq))
+ return aq;
+
+ rc = ionic_rdma_queue_devcmd(dev, &aq->q, aq->aqid, aq->cqid,
+ IONIC_CMD_RDMA_CREATE_ADMINQ);
+ if (rc)
+ goto err_cmd;
+
+ return aq;
+
+err_cmd:
+ __ionic_destroy_rdma_adminq(dev, aq);
+
+ return ERR_PTR(rc);
+}
+
+static void ionic_flush_qs(struct ionic_ibdev *dev)
+{
+ struct ionic_qp *qp, *qp_tmp;
+ struct ionic_cq *cq, *cq_tmp;
+ LIST_HEAD(flush_list);
+ unsigned long index;
+
+ WARN_ON(!irqs_disabled());
+
+ /* Flush qp send and recv */
+ xa_lock(&dev->qp_tbl);
+ xa_for_each(&dev->qp_tbl, index, qp) {
+ kref_get(&qp->qp_kref);
+ list_add_tail(&qp->ibkill_flush_ent, &flush_list);
+ }
+ xa_unlock(&dev->qp_tbl);
+
+ list_for_each_entry_safe(qp, qp_tmp, &flush_list, ibkill_flush_ent) {
+ ionic_flush_qp(dev, qp);
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+ list_del(&qp->ibkill_flush_ent);
+ }
+
+ /* Notify completions */
+ xa_lock(&dev->cq_tbl);
+ xa_for_each(&dev->cq_tbl, index, cq) {
+ kref_get(&cq->cq_kref);
+ list_add_tail(&cq->ibkill_flush_ent, &flush_list);
+ }
+ xa_unlock(&dev->cq_tbl);
+
+ list_for_each_entry_safe(cq, cq_tmp, &flush_list, ibkill_flush_ent) {
+ ionic_notify_flush_cq(cq);
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+ list_del(&cq->ibkill_flush_ent);
+ }
+}
+
+static void ionic_kill_ibdev(struct ionic_ibdev *dev, bool fatal_path)
+{
+ unsigned long irqflags;
+ bool do_flush = false;
+ int i;
+
+ /* Mark AQs for drain and flush the QPs while irq is disabled */
+ local_irq_save(irqflags);
+
+ /* Mark the admin queue, flushing at most once */
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ struct ionic_aq *aq = dev->aq_vec[i];
+
+ spin_lock(&aq->lock);
+ if (atomic_read(&aq->admin_state) != IONIC_ADMIN_KILLED) {
+ atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED);
+ /* Flush incomplete admin commands */
+ ionic_admin_poll_locked(aq);
+ do_flush = true;
+ }
+ spin_unlock(&aq->lock);
+ }
+
+ if (do_flush)
+ ionic_flush_qs(dev);
+
+ local_irq_restore(irqflags);
+
+ /* Post a fatal event if requested */
+ if (fatal_path) {
+ struct ib_event ev;
+
+ ev.device = &dev->ibdev;
+ ev.element.port_num = 1;
+ ev.event = IB_EVENT_DEVICE_FATAL;
+
+ ib_dispatch_event(&ev);
+ }
+
+ atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED);
+}
+
+void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path)
+{
+ enum ionic_admin_state old_state;
+ unsigned long irqflags = 0;
+ int i, rc;
+
+ if (!dev->aq_vec)
+ return;
+
+ /*
+ * Admin queues are transitioned from active to paused to killed state.
+ * When in paused state, no new commands are issued to the device,
+ * nor are any completed locally. After resetting the lif, it will be
+ * safe to resume the rdma admin queues in the killed state. Commands
+ * will not be issued to the device, but will complete locally with status
+ * IONIC_ADMIN_KILLED. Handling completion will ensure that creating or
+ * modifying resources fails, but destroying resources succeeds.
+ * If there was a failure resetting the lif using this strategy,
+ * then the state of the device is unknown.
+ */
+ old_state = atomic_cmpxchg(&dev->admin_state, IONIC_ADMIN_ACTIVE,
+ IONIC_ADMIN_PAUSED);
+ if (old_state != IONIC_ADMIN_ACTIVE)
+ return;
+
+ /* Pause all the AQs */
+ local_irq_save(irqflags);
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ struct ionic_aq *aq = dev->aq_vec[i];
+
+ spin_lock(&aq->lock);
+ /* pause rdma admin queues to reset lif */
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_ACTIVE)
+ atomic_set(&aq->admin_state, IONIC_ADMIN_PAUSED);
+ spin_unlock(&aq->lock);
+ }
+ local_irq_restore(irqflags);
+
+ rc = ionic_rdma_reset_devcmd(dev);
+ if (unlikely(rc)) {
+ ibdev_err(&dev->ibdev, "failed to reset rdma %d\n", rc);
+ ionic_request_rdma_reset(dev->lif_cfg.lif);
+ }
+
+ ionic_kill_ibdev(dev, fatal_path);
+}
+
+static void ionic_reset_work(struct work_struct *ws)
+{
+ struct ionic_ibdev *dev =
+ container_of(ws, struct ionic_ibdev, reset_work);
+
+ ionic_kill_rdma_admin(dev, true);
+}
+
+static bool ionic_next_eqe(struct ionic_eq *eq, struct ionic_v1_eqe *eqe)
+{
+ struct ionic_v1_eqe *qeqe;
+ bool color;
+
+ qeqe = ionic_queue_at_prod(&eq->q);
+ color = ionic_v1_eqe_color(qeqe);
+
+ /* cons is color for eq */
+ if (eq->q.cons != color)
+ return false;
+
+ /* Prevent out-of-order reads of the EQE */
+ dma_rmb();
+
+ ibdev_dbg(&eq->dev->ibdev, "poll eq prod %u\n", eq->q.prod);
+ print_hex_dump_debug("eqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ qeqe, BIT(eq->q.stride_log2), true);
+ *eqe = *qeqe;
+
+ return true;
+}
+
+static void ionic_cq_event(struct ionic_ibdev *dev, u32 cqid, u8 code)
+{
+ unsigned long irqflags;
+ struct ib_event ibev;
+ struct ionic_cq *cq;
+
+ xa_lock_irqsave(&dev->cq_tbl, irqflags);
+ cq = xa_load(&dev->cq_tbl, cqid);
+ if (cq)
+ kref_get(&cq->cq_kref);
+ xa_unlock_irqrestore(&dev->cq_tbl, irqflags);
+
+ if (!cq) {
+ ibdev_dbg(&dev->ibdev,
+ "missing cqid %#x code %u\n", cqid, code);
+ return;
+ }
+
+ switch (code) {
+ case IONIC_V1_EQE_CQ_NOTIFY:
+ if (cq->vcq->ibcq.comp_handler)
+ cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq,
+ cq->vcq->ibcq.cq_context);
+ break;
+
+ case IONIC_V1_EQE_CQ_ERR:
+ if (cq->vcq->ibcq.event_handler) {
+ ibev.event = IB_EVENT_CQ_ERR;
+ ibev.device = &dev->ibdev;
+ ibev.element.cq = &cq->vcq->ibcq;
+
+ cq->vcq->ibcq.event_handler(&ibev,
+ cq->vcq->ibcq.cq_context);
+ }
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unrecognized cqid %#x code %u\n", cqid, code);
+ break;
+ }
+
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+}
+
+static void ionic_qp_event(struct ionic_ibdev *dev, u32 qpid, u8 code)
+{
+ unsigned long irqflags;
+ struct ib_event ibev;
+ struct ionic_qp *qp;
+
+ xa_lock_irqsave(&dev->qp_tbl, irqflags);
+ qp = xa_load(&dev->qp_tbl, qpid);
+ if (qp)
+ kref_get(&qp->qp_kref);
+ xa_unlock_irqrestore(&dev->qp_tbl, irqflags);
+
+ if (!qp) {
+ ibdev_dbg(&dev->ibdev,
+ "missing qpid %#x code %u\n", qpid, code);
+ return;
+ }
+
+ ibev.device = &dev->ibdev;
+ ibev.element.qp = &qp->ibqp;
+
+ switch (code) {
+ case IONIC_V1_EQE_SQ_DRAIN:
+ ibev.event = IB_EVENT_SQ_DRAINED;
+ break;
+
+ case IONIC_V1_EQE_QP_COMM_EST:
+ ibev.event = IB_EVENT_COMM_EST;
+ break;
+
+ case IONIC_V1_EQE_QP_LAST_WQE:
+ ibev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR:
+ ibev.event = IB_EVENT_QP_FATAL;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR_REQUEST:
+ ibev.event = IB_EVENT_QP_REQ_ERR;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR_ACCESS:
+ ibev.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unrecognized qpid %#x code %u\n", qpid, code);
+ goto out;
+ }
+
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&ibev, qp->ibqp.qp_context);
+
+out:
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+}
+
+static u16 ionic_poll_eq(struct ionic_eq *eq, u16 budget)
+{
+ struct ionic_ibdev *dev = eq->dev;
+ struct ionic_v1_eqe eqe;
+ u16 npolled = 0;
+ u8 type, code;
+ u32 evt, qid;
+
+ while (npolled < budget) {
+ if (!ionic_next_eqe(eq, &eqe))
+ break;
+
+ ionic_queue_produce(&eq->q);
+
+ /* cons is color for eq */
+ eq->q.cons = ionic_color_wrap(eq->q.prod, eq->q.cons);
+
+ ++npolled;
+
+ evt = ionic_v1_eqe_evt(&eqe);
+ type = ionic_v1_eqe_evt_type(evt);
+ code = ionic_v1_eqe_evt_code(evt);
+ qid = ionic_v1_eqe_evt_qid(evt);
+
+ switch (type) {
+ case IONIC_V1_EQE_TYPE_CQ:
+ ionic_cq_event(dev, qid, code);
+ break;
+
+ case IONIC_V1_EQE_TYPE_QP:
+ ionic_qp_event(dev, qid, code);
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unknown event %#x type %u\n", evt, type);
+ }
+ }
+
+ return npolled;
+}
+
+static void ionic_poll_eq_work(struct work_struct *work)
+{
+ struct ionic_eq *eq = container_of(work, struct ionic_eq, work);
+ u32 npolled;
+
+ if (unlikely(!eq->enable) || WARN_ON(eq->armed))
+ return;
+
+ npolled = ionic_poll_eq(eq, IONIC_EQ_WORK_BUDGET);
+ if (npolled == IONIC_EQ_WORK_BUDGET) {
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ npolled, 0);
+ queue_work(ionic_evt_workq, &eq->work);
+ } else {
+ xchg(&eq->armed, 1);
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ 0, IONIC_INTR_CRED_UNMASK);
+ }
+}
+
+static irqreturn_t ionic_poll_eq_isr(int irq, void *eqptr)
+{
+ struct ionic_eq *eq = eqptr;
+ int was_armed;
+ u32 npolled;
+
+ was_armed = xchg(&eq->armed, 0);
+
+ if (unlikely(!eq->enable) || !was_armed)
+ return IRQ_HANDLED;
+
+ npolled = ionic_poll_eq(eq, IONIC_EQ_ISR_BUDGET);
+ if (npolled == IONIC_EQ_ISR_BUDGET) {
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ npolled, 0);
+ queue_work(ionic_evt_workq, &eq->work);
+ } else {
+ xchg(&eq->armed, 1);
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ 0, IONIC_INTR_CRED_UNMASK);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static struct ionic_eq *ionic_create_eq(struct ionic_ibdev *dev, int eqid)
+{
+ struct ionic_intr_info intr_obj = { };
+ struct ionic_eq *eq;
+ int rc;
+
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ eq->dev = dev;
+
+ rc = ionic_queue_init(&eq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH,
+ sizeof(struct ionic_v1_eqe));
+ if (rc)
+ goto err_q;
+
+ eq->eqid = eqid;
+
+ eq->armed = true;
+ eq->enable = false;
+ INIT_WORK(&eq->work, ionic_poll_eq_work);
+
+ rc = ionic_intr_alloc(dev->lif_cfg.lif, &intr_obj);
+ if (rc < 0)
+ goto err_intr;
+
+ eq->irq = intr_obj.vector;
+ eq->intr = intr_obj.index;
+
+ ionic_queue_dbell_init(&eq->q, eq->eqid);
+
+ /* cons is color for eq */
+ eq->q.cons = true;
+
+ snprintf(eq->name, sizeof(eq->name), "%s-%d-%d-eq",
+ "ionr", dev->lif_cfg.lif_index, eq->eqid);
+
+ ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET);
+ ionic_intr_mask_assert(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET);
+ ionic_intr_coal_init(dev->lif_cfg.intr_ctrl, eq->intr, 0);
+ ionic_intr_clean(dev->lif_cfg.intr_ctrl, eq->intr);
+
+ eq->enable = true;
+
+ rc = request_irq(eq->irq, ionic_poll_eq_isr, 0, eq->name, eq);
+ if (rc)
+ goto err_irq;
+
+ rc = ionic_rdma_queue_devcmd(dev, &eq->q, eq->eqid, eq->intr,
+ IONIC_CMD_RDMA_CREATE_EQ);
+ if (rc)
+ goto err_cmd;
+
+ ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_CLEAR);
+
+ return eq;
+
+err_cmd:
+ eq->enable = false;
+ free_irq(eq->irq, eq);
+ flush_work(&eq->work);
+err_irq:
+ ionic_intr_free(dev->lif_cfg.lif, eq->intr);
+err_intr:
+ ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev);
+err_q:
+ kfree(eq);
+
+ return ERR_PTR(rc);
+}
+
+static void ionic_destroy_eq(struct ionic_eq *eq)
+{
+ struct ionic_ibdev *dev = eq->dev;
+
+ eq->enable = false;
+ free_irq(eq->irq, eq);
+ flush_work(&eq->work);
+
+ ionic_intr_free(dev->lif_cfg.lif, eq->intr);
+ ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev);
+ kfree(eq);
+}
+
+int ionic_create_rdma_admin(struct ionic_ibdev *dev)
+{
+ int eq_i = 0, aq_i = 0, rc = 0;
+ struct ionic_vcq *vcq;
+ struct ionic_aq *aq;
+ struct ionic_eq *eq;
+
+ dev->eq_vec = NULL;
+ dev->aq_vec = NULL;
+
+ INIT_WORK(&dev->reset_work, ionic_reset_work);
+ INIT_DELAYED_WORK(&dev->admin_dwork, ionic_admin_dwork);
+ atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED);
+
+ if (dev->lif_cfg.aq_count > IONIC_AQ_COUNT) {
+ ibdev_dbg(&dev->ibdev, "limiting adminq count to %d\n",
+ IONIC_AQ_COUNT);
+ dev->lif_cfg.aq_count = IONIC_AQ_COUNT;
+ }
+
+ if (dev->lif_cfg.eq_count > IONIC_EQ_COUNT) {
+ dev_dbg(&dev->ibdev.dev, "limiting eventq count to %d\n",
+ IONIC_EQ_COUNT);
+ dev->lif_cfg.eq_count = IONIC_EQ_COUNT;
+ }
+
+ /* need at least two eq and one aq */
+ if (dev->lif_cfg.eq_count < IONIC_EQ_COUNT_MIN ||
+ dev->lif_cfg.aq_count < IONIC_AQ_COUNT_MIN) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ dev->eq_vec = kmalloc_array(dev->lif_cfg.eq_count, sizeof(*dev->eq_vec),
+ GFP_KERNEL);
+ if (!dev->eq_vec) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ for (eq_i = 0; eq_i < dev->lif_cfg.eq_count; ++eq_i) {
+ eq = ionic_create_eq(dev, eq_i + dev->lif_cfg.eq_base);
+ if (IS_ERR(eq)) {
+ rc = PTR_ERR(eq);
+
+ if (eq_i < IONIC_EQ_COUNT_MIN) {
+ ibdev_err(&dev->ibdev,
+ "fail create eq %pe\n", eq);
+ goto out;
+ }
+
+ /* ok, just fewer eq than device supports */
+ ibdev_dbg(&dev->ibdev, "eq count %d want %d rc %pe\n",
+ eq_i, dev->lif_cfg.eq_count, eq);
+
+ rc = 0;
+ break;
+ }
+
+ dev->eq_vec[eq_i] = eq;
+ }
+
+ dev->lif_cfg.eq_count = eq_i;
+
+ dev->aq_vec = kmalloc_array(dev->lif_cfg.aq_count, sizeof(*dev->aq_vec),
+ GFP_KERNEL);
+ if (!dev->aq_vec) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Create one CQ per AQ */
+ for (aq_i = 0; aq_i < dev->lif_cfg.aq_count; ++aq_i) {
+ vcq = ionic_create_rdma_admincq(dev, aq_i % eq_i);
+ if (IS_ERR(vcq)) {
+ rc = PTR_ERR(vcq);
+
+ if (!aq_i) {
+ ibdev_err(&dev->ibdev,
+ "failed to create acq %pe\n", vcq);
+ goto out;
+ }
+
+ /* ok, just fewer adminq than device supports */
+ ibdev_dbg(&dev->ibdev, "acq count %d want %d rc %pe\n",
+ aq_i, dev->lif_cfg.aq_count, vcq);
+ break;
+ }
+
+ aq = ionic_create_rdma_adminq(dev, aq_i + dev->lif_cfg.aq_base,
+ vcq->cq[0].cqid);
+ if (IS_ERR(aq)) {
+ /* Clean up the dangling CQ */
+ ionic_destroy_cq_common(dev, &vcq->cq[0]);
+ kfree(vcq);
+
+ rc = PTR_ERR(aq);
+
+ if (!aq_i) {
+ ibdev_err(&dev->ibdev,
+ "failed to create aq %pe\n", aq);
+ goto out;
+ }
+
+ /* ok, just fewer adminq than device supports */
+ ibdev_dbg(&dev->ibdev, "aq count %d want %d rc %pe\n",
+ aq_i, dev->lif_cfg.aq_count, aq);
+ break;
+ }
+
+ vcq->ibcq.cq_context = aq;
+ aq->vcq = vcq;
+
+ atomic_set(&aq->admin_state, IONIC_ADMIN_ACTIVE);
+ dev->aq_vec[aq_i] = aq;
+ }
+
+ atomic_set(&dev->admin_state, IONIC_ADMIN_ACTIVE);
+out:
+ dev->lif_cfg.eq_count = eq_i;
+ dev->lif_cfg.aq_count = aq_i;
+
+ return rc;
+}
+
+void ionic_destroy_rdma_admin(struct ionic_ibdev *dev)
+{
+ struct ionic_vcq *vcq;
+ struct ionic_aq *aq;
+ struct ionic_eq *eq;
+
+ /*
+ * Killing the admin before destroy makes sure all admin and
+ * completions are flushed. admin_state = IONIC_ADMIN_KILLED
+ * stops queueing up further works.
+ */
+ cancel_delayed_work_sync(&dev->admin_dwork);
+ cancel_work_sync(&dev->reset_work);
+
+ if (dev->aq_vec) {
+ while (dev->lif_cfg.aq_count > 0) {
+ aq = dev->aq_vec[--dev->lif_cfg.aq_count];
+ vcq = aq->vcq;
+
+ cancel_work_sync(&aq->work);
+
+ __ionic_destroy_rdma_adminq(dev, aq);
+ if (vcq) {
+ ionic_destroy_cq_common(dev, &vcq->cq[0]);
+ kfree(vcq);
+ }
+ }
+
+ kfree(dev->aq_vec);
+ }
+
+ if (dev->eq_vec) {
+ while (dev->lif_cfg.eq_count > 0) {
+ eq = dev->eq_vec[--dev->lif_cfg.eq_count];
+ ionic_destroy_eq(eq);
+ }
+
+ kfree(dev->eq_vec);
+ }
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c
new file mode 100644
index 000000000000..ea12d9b8e125
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c
@@ -0,0 +1,2679 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
+#include <ionic_api.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define ionic_set_ecn(tos) (((tos) | 2u) & ~1u)
+#define ionic_clear_ecn(tos) ((tos) & ~3u)
+
+static int ionic_validate_qdesc(struct ionic_qdesc *q)
+{
+ if (!q->addr || !q->size || !q->mask ||
+ !q->depth_log2 || !q->stride_log2)
+ return -EINVAL;
+
+ if (q->addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ if (q->mask != BIT(q->depth_log2) - 1)
+ return -EINVAL;
+
+ if (q->size < BIT_ULL(q->depth_log2 + q->stride_log2))
+ return -EINVAL;
+
+ return 0;
+}
+
+static u32 ionic_get_eqid(struct ionic_ibdev *dev, u32 comp_vector, u8 udma_idx)
+{
+ /* EQ per vector per udma, and the first eqs reserved for async events.
+ * The rest of the vectors can be requested for completions.
+ */
+ u32 comp_vec_count = dev->lif_cfg.eq_count / dev->lif_cfg.udma_count - 1;
+
+ return (comp_vector % comp_vec_count + 1) * dev->lif_cfg.udma_count + udma_idx;
+}
+
+static int ionic_get_cqid(struct ionic_ibdev *dev, u32 *cqid, u8 udma_idx)
+{
+ unsigned int size, base, bound;
+ int rc;
+
+ size = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count;
+ base = size * udma_idx;
+ bound = base + size;
+
+ rc = ionic_resid_get_shared(&dev->inuse_cqid, base, bound);
+ if (rc >= 0) {
+ /* cq_base is zero or a multiple of two queue groups */
+ *cqid = dev->lif_cfg.cq_base +
+ ionic_bitid_to_qid(rc, dev->lif_cfg.udma_qgrp_shift,
+ dev->half_cqid_udma_shift);
+
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static void ionic_put_cqid(struct ionic_ibdev *dev, u32 cqid)
+{
+ u32 bitid = ionic_qid_to_bitid(cqid - dev->lif_cfg.cq_base,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_cqid_udma_shift);
+
+ ionic_resid_put(&dev->inuse_cqid, bitid);
+}
+
+int ionic_create_cq_common(struct ionic_vcq *vcq,
+ struct ionic_tbl_buf *buf,
+ const struct ib_cq_init_attr *attr,
+ struct ionic_ctx *ctx,
+ struct ib_udata *udata,
+ struct ionic_qdesc *req_cq,
+ __u32 *resp_cqid,
+ int udma_idx)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(vcq->ibcq.device);
+ struct ionic_cq *cq = &vcq->cq[udma_idx];
+ void *entry;
+ int rc;
+
+ cq->vcq = vcq;
+
+ if (attr->cqe < 1 || attr->cqe + IONIC_CQ_GRACE > 0xffff) {
+ rc = -EINVAL;
+ goto err_args;
+ }
+
+ rc = ionic_get_cqid(dev, &cq->cqid, udma_idx);
+ if (rc)
+ goto err_args;
+
+ cq->eqid = ionic_get_eqid(dev, attr->comp_vector, udma_idx);
+
+ spin_lock_init(&cq->lock);
+ INIT_LIST_HEAD(&cq->poll_sq);
+ INIT_LIST_HEAD(&cq->flush_sq);
+ INIT_LIST_HEAD(&cq->flush_rq);
+
+ if (udata) {
+ rc = ionic_validate_qdesc(req_cq);
+ if (rc)
+ goto err_qdesc;
+
+ cq->umem = ib_umem_get(&dev->ibdev, req_cq->addr, req_cq->size,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->umem)) {
+ rc = PTR_ERR(cq->umem);
+ goto err_qdesc;
+ }
+
+ cq->q.ptr = NULL;
+ cq->q.size = req_cq->size;
+ cq->q.mask = req_cq->mask;
+ cq->q.depth_log2 = req_cq->depth_log2;
+ cq->q.stride_log2 = req_cq->stride_log2;
+
+ *resp_cqid = cq->cqid;
+ } else {
+ rc = ionic_queue_init(&cq->q, dev->lif_cfg.hwdev,
+ attr->cqe + IONIC_CQ_GRACE,
+ sizeof(struct ionic_v1_cqe));
+ if (rc)
+ goto err_q_init;
+
+ ionic_queue_dbell_init(&cq->q, cq->cqid);
+ cq->color = true;
+ cq->credit = cq->q.mask;
+ }
+
+ rc = ionic_pgtbl_init(dev, buf, cq->umem, cq->q.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_pgtbl_init;
+
+ init_completion(&cq->cq_rel_comp);
+ kref_init(&cq->cq_kref);
+
+ entry = xa_store_irq(&dev->cq_tbl, cq->cqid, cq, GFP_KERNEL);
+ if (entry) {
+ if (!xa_is_err(entry))
+ rc = -EINVAL;
+ else
+ rc = xa_err(entry);
+
+ goto err_xa;
+ }
+
+ return 0;
+
+err_xa:
+ ionic_pgtbl_unbuf(dev, buf);
+err_pgtbl_init:
+ if (!udata)
+ ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev);
+err_q_init:
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+err_qdesc:
+ ionic_put_cqid(dev, cq->cqid);
+err_args:
+ cq->vcq = NULL;
+
+ return rc;
+}
+
+void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq)
+{
+ if (!cq->vcq)
+ return;
+
+ xa_erase_irq(&dev->cq_tbl, cq->cqid);
+
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+ wait_for_completion(&cq->cq_rel_comp);
+
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+ else
+ ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev);
+
+ ionic_put_cqid(dev, cq->cqid);
+
+ cq->vcq = NULL;
+}
+
+static int ionic_validate_qdesc_zero(struct ionic_qdesc *q)
+{
+ if (q->addr || q->size || q->mask || q->depth_log2 || q->stride_log2)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ionic_get_pdid(struct ionic_ibdev *dev, u32 *pdid)
+{
+ int rc;
+
+ rc = ionic_resid_get(&dev->inuse_pdid);
+ if (rc < 0)
+ return rc;
+
+ *pdid = rc;
+ return 0;
+}
+
+static int ionic_get_ahid(struct ionic_ibdev *dev, u32 *ahid)
+{
+ int rc;
+
+ rc = ionic_resid_get(&dev->inuse_ahid);
+ if (rc < 0)
+ return rc;
+
+ *ahid = rc;
+ return 0;
+}
+
+static int ionic_get_mrid(struct ionic_ibdev *dev, u32 *mrid)
+{
+ int rc;
+
+ /* wrap to 1, skip reserved lkey */
+ rc = ionic_resid_get_shared(&dev->inuse_mrid, 1,
+ dev->inuse_mrid.inuse_size);
+ if (rc < 0)
+ return rc;
+
+ *mrid = ionic_mrid(rc, dev->next_mrkey++);
+ return 0;
+}
+
+static int ionic_get_gsi_qpid(struct ionic_ibdev *dev, u32 *qpid)
+{
+ int rc = 0;
+
+ rc = ionic_resid_get_shared(&dev->inuse_qpid, IB_QPT_GSI, IB_QPT_GSI + 1);
+ if (rc < 0)
+ return rc;
+
+ *qpid = IB_QPT_GSI;
+ return 0;
+}
+
+static int ionic_get_qpid(struct ionic_ibdev *dev, u32 *qpid,
+ u8 *udma_idx, u8 udma_mask)
+{
+ unsigned int size, base, bound;
+ int udma_i, udma_x, udma_ix;
+ int rc = -EINVAL;
+
+ udma_x = dev->next_qpid_udma_idx;
+
+ dev->next_qpid_udma_idx ^= dev->lif_cfg.udma_count - 1;
+
+ for (udma_i = 0; udma_i < dev->lif_cfg.udma_count; ++udma_i) {
+ udma_ix = udma_i ^ udma_x;
+
+ if (!(udma_mask & BIT(udma_ix)))
+ continue;
+
+ size = dev->lif_cfg.qp_count / dev->lif_cfg.udma_count;
+ base = size * udma_ix;
+ bound = base + size;
+
+ /* skip reserved SMI and GSI qpids in group zero */
+ if (!base)
+ base = 2;
+
+ rc = ionic_resid_get_shared(&dev->inuse_qpid, base, bound);
+ if (rc >= 0) {
+ *qpid = ionic_bitid_to_qid(rc,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_qpid_udma_shift);
+ *udma_idx = udma_ix;
+
+ rc = 0;
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int ionic_get_dbid(struct ionic_ibdev *dev, u32 *dbid, phys_addr_t *addr)
+{
+ int rc, dbpage_num;
+
+ /* wrap to 1, skip kernel reserved */
+ rc = ionic_resid_get_shared(&dev->inuse_dbid, 1,
+ dev->inuse_dbid.inuse_size);
+ if (rc < 0)
+ return rc;
+
+ dbpage_num = (dev->lif_cfg.lif_hw_index * dev->lif_cfg.dbid_count) + rc;
+ *addr = dev->lif_cfg.db_phys + ((phys_addr_t)dbpage_num << PAGE_SHIFT);
+
+ *dbid = rc;
+
+ return 0;
+}
+
+static void ionic_put_pdid(struct ionic_ibdev *dev, u32 pdid)
+{
+ ionic_resid_put(&dev->inuse_pdid, pdid);
+}
+
+static void ionic_put_ahid(struct ionic_ibdev *dev, u32 ahid)
+{
+ ionic_resid_put(&dev->inuse_ahid, ahid);
+}
+
+static void ionic_put_mrid(struct ionic_ibdev *dev, u32 mrid)
+{
+ ionic_resid_put(&dev->inuse_mrid, ionic_mrid_index(mrid));
+}
+
+static void ionic_put_qpid(struct ionic_ibdev *dev, u32 qpid)
+{
+ u32 bitid = ionic_qid_to_bitid(qpid,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_qpid_udma_shift);
+
+ ionic_resid_put(&dev->inuse_qpid, bitid);
+}
+
+static void ionic_put_dbid(struct ionic_ibdev *dev, u32 dbid)
+{
+ ionic_resid_put(&dev->inuse_dbid, dbid);
+}
+
+static struct rdma_user_mmap_entry*
+ionic_mmap_entry_insert(struct ionic_ctx *ctx, unsigned long size,
+ unsigned long pfn, u8 mmap_flags, u64 *offset)
+{
+ struct ionic_mmap_entry *entry;
+ int rc;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->size = size;
+ entry->pfn = pfn;
+ entry->mmap_flags = mmap_flags;
+
+ rc = rdma_user_mmap_entry_insert(&ctx->ibctx, &entry->rdma_entry,
+ entry->size);
+ if (rc) {
+ kfree(entry);
+ return NULL;
+ }
+
+ if (offset)
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+ struct ionic_ctx_resp resp = {};
+ struct ionic_ctx_req req;
+ phys_addr_t db_phys = 0;
+ int rc;
+
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+
+ /* try to allocate dbid for user ctx */
+ rc = ionic_get_dbid(dev, &ctx->dbid, &db_phys);
+ if (rc < 0)
+ return rc;
+
+ ibdev_dbg(&dev->ibdev, "user space dbid %u\n", ctx->dbid);
+
+ ctx->mmap_dbell = ionic_mmap_entry_insert(ctx, PAGE_SIZE,
+ PHYS_PFN(db_phys), 0, NULL);
+ if (!ctx->mmap_dbell) {
+ rc = -ENOMEM;
+ goto err_mmap_dbell;
+ }
+
+ resp.page_shift = PAGE_SHIFT;
+
+ resp.dbell_offset = db_phys & ~PAGE_MASK;
+
+ resp.version = dev->lif_cfg.rdma_version;
+ resp.qp_opcodes = dev->lif_cfg.qp_opcodes;
+ resp.admin_opcodes = dev->lif_cfg.admin_opcodes;
+
+ resp.sq_qtype = dev->lif_cfg.sq_qtype;
+ resp.rq_qtype = dev->lif_cfg.rq_qtype;
+ resp.cq_qtype = dev->lif_cfg.cq_qtype;
+ resp.admin_qtype = dev->lif_cfg.aq_qtype;
+ resp.max_stride = dev->lif_cfg.max_stride;
+ resp.max_spec = IONIC_SPEC_HIGH;
+
+ resp.udma_count = dev->lif_cfg.udma_count;
+ resp.expdb_mask = dev->lif_cfg.expdb_mask;
+
+ if (dev->lif_cfg.sq_expdb)
+ resp.expdb_qtypes |= IONIC_EXPDB_SQ;
+ if (dev->lif_cfg.rq_expdb)
+ resp.expdb_qtypes |= IONIC_EXPDB_RQ;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+
+ return 0;
+
+err_resp:
+ rdma_user_mmap_entry_remove(ctx->mmap_dbell);
+err_mmap_dbell:
+ ionic_put_dbid(dev, ctx->dbid);
+
+ return rc;
+}
+
+void ionic_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+
+ rdma_user_mmap_entry_remove(ctx->mmap_dbell);
+ ionic_put_dbid(dev, ctx->dbid);
+}
+
+int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct ionic_mmap_entry *ionic_entry;
+ int rc = 0;
+
+ rdma_entry = rdma_user_mmap_entry_get(&ctx->ibctx, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&dev->ibdev, "not found %#lx\n",
+ vma->vm_pgoff << PAGE_SHIFT);
+ return -EINVAL;
+ }
+
+ ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry,
+ rdma_entry);
+
+ ibdev_dbg(&dev->ibdev, "writecombine? %d\n",
+ ionic_entry->mmap_flags & IONIC_MMAP_WC);
+ if (ionic_entry->mmap_flags & IONIC_MMAP_WC)
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ else
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ ibdev_dbg(&dev->ibdev, "remap st %#lx pf %#lx sz %#lx\n",
+ vma->vm_start, ionic_entry->pfn, ionic_entry->size);
+ rc = rdma_user_mmap_io(&ctx->ibctx, vma, ionic_entry->pfn,
+ ionic_entry->size, vma->vm_page_prot,
+ rdma_entry);
+ if (rc)
+ ibdev_dbg(&dev->ibdev, "remap failed %d\n", rc);
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return rc;
+}
+
+void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct ionic_mmap_entry *ionic_entry;
+
+ ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry,
+ rdma_entry);
+ kfree(ionic_entry);
+}
+
+int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+
+ return ionic_get_pdid(dev, &pd->pdid);
+}
+
+int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+
+ ionic_put_pdid(dev, pd->pdid);
+
+ return 0;
+}
+
+static int ionic_build_hdr(struct ionic_ibdev *dev,
+ struct ib_ud_header *hdr,
+ const struct rdma_ah_attr *attr,
+ u16 sport, bool want_ecn)
+{
+ const struct ib_global_route *grh;
+ enum rdma_network_type net;
+ u16 vlan;
+ int rc;
+
+ if (attr->ah_flags != IB_AH_GRH)
+ return -EINVAL;
+ if (attr->type != RDMA_AH_ATTR_TYPE_ROCE)
+ return -EINVAL;
+
+ grh = rdma_ah_read_grh(attr);
+
+ rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]);
+ if (rc)
+ return rc;
+
+ net = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ rc = ib_ud_header_init(0, /* no payload */
+ 0, /* no lrh */
+ 1, /* yes eth */
+ vlan != 0xffff,
+ 0, /* no grh */
+ net == RDMA_NETWORK_IPV4 ? 4 : 6,
+ 1, /* yes udp */
+ 0, /* no imm */
+ hdr);
+ if (rc)
+ return rc;
+
+ ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac);
+
+ if (net == RDMA_NETWORK_IPV4) {
+ hdr->eth.type = cpu_to_be16(ETH_P_IP);
+ hdr->ip4.frag_off = cpu_to_be16(0x4000); /* don't fragment */
+ hdr->ip4.ttl = grh->hop_limit;
+ hdr->ip4.tot_len = cpu_to_be16(0xffff);
+ hdr->ip4.saddr =
+ *(const __be32 *)(grh->sgid_attr->gid.raw + 12);
+ hdr->ip4.daddr = *(const __be32 *)(grh->dgid.raw + 12);
+
+ if (want_ecn)
+ hdr->ip4.tos = ionic_set_ecn(grh->traffic_class);
+ else
+ hdr->ip4.tos = ionic_clear_ecn(grh->traffic_class);
+ } else {
+ hdr->eth.type = cpu_to_be16(ETH_P_IPV6);
+ hdr->grh.flow_label = cpu_to_be32(grh->flow_label);
+ hdr->grh.hop_limit = grh->hop_limit;
+ hdr->grh.source_gid = grh->sgid_attr->gid;
+ hdr->grh.destination_gid = grh->dgid;
+
+ if (want_ecn)
+ hdr->grh.traffic_class =
+ ionic_set_ecn(grh->traffic_class);
+ else
+ hdr->grh.traffic_class =
+ ionic_clear_ecn(grh->traffic_class);
+ }
+
+ if (vlan != 0xffff) {
+ vlan |= rdma_ah_get_sl(attr) << VLAN_PRIO_SHIFT;
+ hdr->vlan.tag = cpu_to_be16(vlan);
+ hdr->vlan.type = hdr->eth.type;
+ hdr->eth.type = cpu_to_be16(ETH_P_8021Q);
+ }
+
+ hdr->udp.sport = cpu_to_be16(sport);
+ hdr->udp.dport = cpu_to_be16(ROCE_V2_UDP_DPORT);
+
+ return 0;
+}
+
+static void ionic_set_ah_attr(struct ionic_ibdev *dev,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_ud_header *hdr,
+ int sgid_index)
+{
+ u32 flow_label;
+ u16 vlan = 0;
+ u8 tos, ttl;
+
+ if (hdr->vlan_present)
+ vlan = be16_to_cpu(hdr->vlan.tag);
+
+ if (hdr->ipv4_present) {
+ flow_label = 0;
+ ttl = hdr->ip4.ttl;
+ tos = hdr->ip4.tos;
+ *(__be16 *)(hdr->grh.destination_gid.raw + 10) = cpu_to_be16(0xffff);
+ *(__be32 *)(hdr->grh.destination_gid.raw + 12) = hdr->ip4.daddr;
+ } else {
+ flow_label = be32_to_cpu(hdr->grh.flow_label);
+ ttl = hdr->grh.hop_limit;
+ tos = hdr->grh.traffic_class;
+ }
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+ if (hdr->eth_present)
+ ether_addr_copy(ah_attr->roce.dmac, hdr->eth.dmac_h);
+ rdma_ah_set_sl(ah_attr, vlan >> VLAN_PRIO_SHIFT);
+ rdma_ah_set_port_num(ah_attr, 1);
+ rdma_ah_set_grh(ah_attr, NULL, flow_label, sgid_index, ttl, tos);
+ rdma_ah_set_dgid_raw(ah_attr, &hdr->grh.destination_gid);
+}
+
+static int ionic_create_ah_cmd(struct ionic_ibdev *dev,
+ struct ionic_ah *ah,
+ struct ionic_pd *pd,
+ struct rdma_ah_attr *attr,
+ u32 flags)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_AH,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_AH_IN_V1_LEN),
+ .cmd.create_ah = {
+ .pd_id = cpu_to_le32(pd->pdid),
+ .dbid_flags = cpu_to_le16(dev->lif_cfg.dbid),
+ .id_ver = cpu_to_le32(ah->ahid),
+ }
+ }
+ };
+ enum ionic_admin_flags admin_flags = 0;
+ dma_addr_t hdr_dma = 0;
+ void *hdr_buf;
+ gfp_t gfp = GFP_ATOMIC;
+ int rc, hdr_len = 0;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_AH)
+ return -EBADRQC;
+
+ if (flags & RDMA_CREATE_AH_SLEEPABLE)
+ gfp = GFP_KERNEL;
+ else
+ admin_flags |= IONIC_ADMIN_F_BUSYWAIT;
+
+ rc = ionic_build_hdr(dev, &ah->hdr, attr, IONIC_ROCE_UDP_SPORT, false);
+ if (rc)
+ return rc;
+
+ if (ah->hdr.eth.type == cpu_to_be16(ETH_P_8021Q)) {
+ if (ah->hdr.vlan.type == cpu_to_be16(ETH_P_IP))
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP;
+ else
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP;
+ } else {
+ if (ah->hdr.eth.type == cpu_to_be16(ETH_P_IP))
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP;
+ else
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP;
+ }
+
+ ah->sgid_index = rdma_ah_read_grh(attr)->sgid_index;
+
+ hdr_buf = kmalloc(PAGE_SIZE, gfp);
+ if (!hdr_buf)
+ return -ENOMEM;
+
+ hdr_len = ib_ud_header_pack(&ah->hdr, hdr_buf);
+ hdr_len -= IB_BTH_BYTES;
+ hdr_len -= IB_DETH_BYTES;
+ ibdev_dbg(&dev->ibdev, "roce packet header template\n");
+ print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1,
+ hdr_buf, hdr_len, true);
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len,
+ DMA_TO_DEVICE);
+
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ wr.wqe.cmd.create_ah.dma_addr = cpu_to_le64(hdr_dma);
+ wr.wqe.cmd.create_ah.length = cpu_to_le32(hdr_len);
+
+ ionic_admin_post(dev, &wr);
+ rc = ionic_admin_wait(dev, &wr, admin_flags);
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len,
+ DMA_TO_DEVICE);
+err_dma:
+ kfree(hdr_buf);
+
+ return rc;
+}
+
+static int ionic_destroy_ah_cmd(struct ionic_ibdev *dev, u32 ahid, u32 flags)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_AH,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_AH_IN_V1_LEN),
+ .cmd.destroy_ah = {
+ .ah_id = cpu_to_le32(ahid),
+ },
+ }
+ };
+ enum ionic_admin_flags admin_flags = IONIC_ADMIN_F_TEARDOWN;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_AH)
+ return -EBADRQC;
+
+ if (!(flags & RDMA_CREATE_AH_SLEEPABLE))
+ admin_flags |= IONIC_ADMIN_F_BUSYWAIT;
+
+ ionic_admin_post(dev, &wr);
+ ionic_admin_wait(dev, &wr, admin_flags);
+
+ /* No host-memory resource is associated with ah, so it is ok
+ * to "succeed" and complete this destroy ah on the host.
+ */
+ return 0;
+}
+
+int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct rdma_ah_attr *attr = init_attr->ah_attr;
+ struct ionic_pd *pd = to_ionic_pd(ibah->pd);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+ struct ionic_ah_resp resp = {};
+ u32 flags = init_attr->flags;
+ int rc;
+
+ rc = ionic_get_ahid(dev, &ah->ahid);
+ if (rc)
+ return rc;
+
+ rc = ionic_create_ah_cmd(dev, ah, pd, attr, flags);
+ if (rc)
+ goto err_cmd;
+
+ if (udata) {
+ resp.ahid = ah->ahid;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ return 0;
+
+err_resp:
+ ionic_destroy_ah_cmd(dev, ah->ahid, flags);
+err_cmd:
+ ionic_put_ahid(dev, ah->ahid);
+ return rc;
+}
+
+int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+
+ ionic_set_ah_attr(dev, ah_attr, &ah->hdr, ah->sgid_index);
+
+ return 0;
+}
+
+int ionic_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+ int rc;
+
+ rc = ionic_destroy_ah_cmd(dev, ah->ahid, flags);
+ if (rc)
+ return rc;
+
+ ionic_put_ahid(dev, ah->ahid);
+
+ return 0;
+}
+
+static int ionic_create_mr_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_mr *mr,
+ u64 addr,
+ u64 length)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_MR,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_MR_IN_V1_LEN),
+ .cmd.create_mr = {
+ .va = cpu_to_le64(addr),
+ .length = cpu_to_le64(length),
+ .pd_id = cpu_to_le32(pd->pdid),
+ .page_size_log2 = mr->buf.page_size_log2,
+ .tbl_index = cpu_to_le32(~0),
+ .map_count = cpu_to_le32(mr->buf.tbl_pages),
+ .dma_addr = ionic_pgtbl_dma(&mr->buf, addr),
+ .dbid_flags = cpu_to_le16(mr->flags),
+ .id_ver = cpu_to_le32(mr->mrid),
+ }
+ }
+ };
+ int rc;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_MR)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+ rc = ionic_admin_wait(dev, &wr, 0);
+ if (!rc)
+ mr->created = true;
+
+ return rc;
+}
+
+static int ionic_destroy_mr_cmd(struct ionic_ibdev *dev, u32 mrid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_MR,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_MR_IN_V1_LEN),
+ .cmd.destroy_mr = {
+ .mr_id = cpu_to_le32(mrid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_MR)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access)
+{
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->ibmr.lkey = IONIC_DMA_LKEY;
+ mr->ibmr.rkey = IONIC_DMA_RKEY;
+
+ if (pd)
+ pd->flags |= IONIC_QPF_PRIVILEGED;
+
+ return &mr->ibmr;
+}
+
+struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 addr, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+ unsigned long pg_sz;
+ int rc;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+ mr->ibmr.iova = addr;
+ mr->ibmr.length = length;
+
+ mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access);
+
+ mr->umem = ib_umem_get(&dev->ibdev, start, length, access);
+ if (IS_ERR(mr->umem)) {
+ rc = PTR_ERR(mr->umem);
+ goto err_umem;
+ }
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->lif_cfg.page_size_supported,
+ addr);
+ if (!pg_sz) {
+ rc = -EINVAL;
+ goto err_pgtbl;
+ }
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz);
+ if (rc)
+ goto err_pgtbl;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, addr, length);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ib_umem_release(mr->umem);
+err_umem:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset,
+ u64 length, u64 addr, int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ionic_mr *mr;
+ u64 pg_sz;
+ int rc;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+ mr->ibmr.iova = addr;
+ mr->ibmr.length = length;
+
+ mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(&dev->ibdev, offset, length,
+ fd, access);
+ if (IS_ERR(umem_dmabuf)) {
+ rc = PTR_ERR(umem_dmabuf);
+ goto err_umem;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->lif_cfg.page_size_supported,
+ addr);
+ if (!pg_sz) {
+ rc = -EINVAL;
+ goto err_pgtbl;
+ }
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz);
+ if (rc)
+ goto err_pgtbl;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, addr, length);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ib_umem_release(mr->umem);
+err_umem:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+ int rc;
+
+ if (!mr->ibmr.lkey)
+ goto out;
+
+ if (mr->created) {
+ rc = ionic_destroy_mr_cmd(dev, mr->mrid);
+ if (rc)
+ return rc;
+ }
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ ionic_put_mrid(dev, mr->mrid);
+
+out:
+ kfree(mr);
+
+ return 0;
+}
+
+struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type,
+ u32 max_sg)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+ int rc;
+
+ if (type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+
+ mr->flags = IONIC_MRF_PHYS_MR;
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, max_sg, PAGE_SIZE);
+ if (rc)
+ goto err_pgtbl;
+
+ mr->buf.tbl_pages = 0;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0);
+ if (rc)
+ goto err_cmd;
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+static int ionic_map_mr_page(struct ib_mr *ibmr, u64 dma)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+
+ ibdev_dbg(&dev->ibdev, "dma %p\n", (void *)dma);
+ return ionic_pgtbl_page(&mr->buf, dma);
+}
+
+int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+ int rc;
+
+ /* mr must be allocated using ib_alloc_mr() */
+ if (unlikely(!mr->buf.tbl_limit))
+ return -EINVAL;
+
+ mr->buf.tbl_pages = 0;
+
+ if (mr->buf.tbl_buf)
+ dma_sync_single_for_cpu(dev->lif_cfg.hwdev, mr->buf.tbl_dma,
+ mr->buf.tbl_size, DMA_TO_DEVICE);
+
+ ibdev_dbg(&dev->ibdev, "sg %p nent %d\n", sg, sg_nents);
+ rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ionic_map_mr_page);
+
+ mr->buf.page_size_log2 = order_base_2(ibmr->page_size);
+
+ if (mr->buf.tbl_buf)
+ dma_sync_single_for_device(dev->lif_cfg.hwdev, mr->buf.tbl_dma,
+ mr->buf.tbl_size, DMA_TO_DEVICE);
+
+ return rc;
+}
+
+int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device);
+ struct ionic_pd *pd = to_ionic_pd(ibmw->pd);
+ struct ionic_mr *mr = to_ionic_mw(ibmw);
+ int rc;
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ return rc;
+
+ mr->ibmw.rkey = mr->mrid;
+
+ if (mr->ibmw.type == IB_MW_TYPE_1)
+ mr->flags = IONIC_MRF_MW_1;
+ else
+ mr->flags = IONIC_MRF_MW_2;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0);
+ if (rc)
+ goto err_cmd;
+
+ return 0;
+
+err_cmd:
+ ionic_put_mrid(dev, mr->mrid);
+ return rc;
+}
+
+int ionic_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device);
+ struct ionic_mr *mr = to_ionic_mw(ibmw);
+ int rc;
+
+ rc = ionic_destroy_mr_cmd(dev, mr->mrid);
+ if (rc)
+ return rc;
+
+ ionic_put_mrid(dev, mr->mrid);
+
+ return 0;
+}
+
+static int ionic_create_cq_cmd(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_cq *cq,
+ struct ionic_tbl_buf *buf)
+{
+ const u16 dbid = ionic_ctx_dbid(dev, ctx);
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_CQ,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_CQ_IN_V1_LEN),
+ .cmd.create_cq = {
+ .eq_id = cpu_to_le32(cq->eqid),
+ .depth_log2 = cq->q.depth_log2,
+ .stride_log2 = cq->q.stride_log2,
+ .page_size_log2 = buf->page_size_log2,
+ .tbl_index = cpu_to_le32(~0),
+ .map_count = cpu_to_le32(buf->tbl_pages),
+ .dma_addr = ionic_pgtbl_dma(buf, 0),
+ .dbid_flags = cpu_to_le16(dbid),
+ .id_ver = cpu_to_le32(cq->cqid),
+ }
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_CQ)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, 0);
+}
+
+static int ionic_destroy_cq_cmd(struct ionic_ibdev *dev, u32 cqid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_CQ,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN),
+ .cmd.destroy_cq = {
+ .cq_id = cpu_to_le32(cqid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_CQ)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ struct ionic_tbl_buf buf = {};
+ struct ionic_cq_resp resp;
+ struct ionic_cq_req req;
+ int udma_idx = 0, rc;
+
+ if (udata) {
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+ }
+
+ vcq->udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+
+ if (udata)
+ vcq->udma_mask &= req.udma_mask;
+
+ if (!vcq->udma_mask) {
+ rc = -EINVAL;
+ goto err_init;
+ }
+
+ for (; udma_idx < dev->lif_cfg.udma_count; ++udma_idx) {
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+
+ rc = ionic_create_cq_common(vcq, &buf, attr, ctx, udata,
+ &req.cq[udma_idx],
+ &resp.cqid[udma_idx],
+ udma_idx);
+ if (rc)
+ goto err_init;
+
+ rc = ionic_create_cq_cmd(dev, ctx, &vcq->cq[udma_idx], &buf);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &buf);
+ }
+
+ vcq->ibcq.cqe = attr->cqe;
+
+ if (udata) {
+ resp.udma_mask = vcq->udma_mask;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ return 0;
+
+err_resp:
+ while (udma_idx) {
+ --udma_idx;
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+ ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &buf);
+ ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+err_init:
+ ;
+ }
+
+ return rc;
+}
+
+int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int udma_idx, rc_tmp, rc = 0;
+
+ for (udma_idx = dev->lif_cfg.udma_count; udma_idx; ) {
+ --udma_idx;
+
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+
+ rc_tmp = ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
+ if (rc_tmp) {
+ if (!rc)
+ rc = rc_tmp;
+
+ continue;
+ }
+
+ ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+ }
+
+ return rc;
+}
+
+static bool pd_remote_privileged(struct ib_pd *pd)
+{
+ return pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
+}
+
+static int ionic_create_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_cq *send_cq,
+ struct ionic_cq *recv_cq,
+ struct ionic_qp *qp,
+ struct ionic_tbl_buf *sq_buf,
+ struct ionic_tbl_buf *rq_buf,
+ struct ib_qp_init_attr *attr)
+{
+ const u16 dbid = ionic_obj_dbid(dev, pd->ibpd.uobject);
+ const u32 flags = to_ionic_qp_flags(0, 0,
+ qp->sq_cmb & IONIC_CMB_ENABLE,
+ qp->rq_cmb & IONIC_CMB_ENABLE,
+ qp->sq_spec, qp->rq_spec,
+ pd->flags & IONIC_QPF_PRIVILEGED,
+ pd_remote_privileged(&pd->ibpd));
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_QP_IN_V1_LEN),
+ .cmd.create_qp = {
+ .pd_id = cpu_to_le32(pd->pdid),
+ .priv_flags = cpu_to_be32(flags),
+ .type_state = to_ionic_qp_type(attr->qp_type),
+ .dbid_flags = cpu_to_le16(dbid),
+ .id_ver = cpu_to_le32(qp->qpid),
+ }
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_QP)
+ return -EBADRQC;
+
+ if (qp->has_sq) {
+ wr.wqe.cmd.create_qp.sq_cq_id = cpu_to_le32(send_cq->cqid);
+ wr.wqe.cmd.create_qp.sq_depth_log2 = qp->sq.depth_log2;
+ wr.wqe.cmd.create_qp.sq_stride_log2 = qp->sq.stride_log2;
+ wr.wqe.cmd.create_qp.sq_page_size_log2 = sq_buf->page_size_log2;
+ wr.wqe.cmd.create_qp.sq_tbl_index_xrcd_id = cpu_to_le32(~0);
+ wr.wqe.cmd.create_qp.sq_map_count =
+ cpu_to_le32(sq_buf->tbl_pages);
+ wr.wqe.cmd.create_qp.sq_dma_addr = ionic_pgtbl_dma(sq_buf, 0);
+ }
+
+ if (qp->has_rq) {
+ wr.wqe.cmd.create_qp.rq_cq_id = cpu_to_le32(recv_cq->cqid);
+ wr.wqe.cmd.create_qp.rq_depth_log2 = qp->rq.depth_log2;
+ wr.wqe.cmd.create_qp.rq_stride_log2 = qp->rq.stride_log2;
+ wr.wqe.cmd.create_qp.rq_page_size_log2 = rq_buf->page_size_log2;
+ wr.wqe.cmd.create_qp.rq_tbl_index_srq_id = cpu_to_le32(~0);
+ wr.wqe.cmd.create_qp.rq_map_count =
+ cpu_to_le32(rq_buf->tbl_pages);
+ wr.wqe.cmd.create_qp.rq_dma_addr = ionic_pgtbl_dma(rq_buf, 0);
+ }
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, 0);
+}
+
+static int ionic_modify_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_qp *qp,
+ struct ib_qp_attr *attr,
+ int mask)
+{
+ const u32 flags = to_ionic_qp_flags(attr->qp_access_flags,
+ attr->en_sqd_async_notify,
+ qp->sq_cmb & IONIC_CMB_ENABLE,
+ qp->rq_cmb & IONIC_CMB_ENABLE,
+ qp->sq_spec, qp->rq_spec,
+ pd->flags & IONIC_QPF_PRIVILEGED,
+ pd_remote_privileged(qp->ibqp.pd));
+ const u8 state = to_ionic_qp_modify_state(attr->qp_state,
+ attr->cur_qp_state);
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_MODIFY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_MODIFY_QP_IN_V1_LEN),
+ .cmd.mod_qp = {
+ .attr_mask = cpu_to_be32(mask),
+ .access_flags = cpu_to_be16(flags),
+ .rq_psn = cpu_to_le32(attr->rq_psn),
+ .sq_psn = cpu_to_le32(attr->sq_psn),
+ .rate_limit_kbps =
+ cpu_to_le32(attr->rate_limit),
+ .pmtu = (attr->path_mtu + 7),
+ .retry = (attr->retry_cnt |
+ (attr->rnr_retry << 4)),
+ .rnr_timer = attr->min_rnr_timer,
+ .retry_timeout = attr->timeout,
+ .type_state = state,
+ .id_ver = cpu_to_le32(qp->qpid),
+ }
+ }
+ };
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ void *hdr_buf = NULL;
+ dma_addr_t hdr_dma = 0;
+ int rc, hdr_len = 0;
+ u16 sport;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_MODIFY_QP)
+ return -EBADRQC;
+
+ if ((mask & IB_QP_MAX_DEST_RD_ATOMIC) && attr->max_dest_rd_atomic) {
+ /* Note, round up/down was already done for allocating
+ * resources on the device. The allocation order is in cache
+ * line size. We can't use the order of the resource
+ * allocation to determine the order wqes here, because for
+ * queue length <= one cache line it is not distinct.
+ *
+ * Therefore, order wqes is computed again here.
+ *
+ * Account for hole and round up to the next order.
+ */
+ wr.wqe.cmd.mod_qp.rsq_depth =
+ order_base_2(attr->max_dest_rd_atomic + 1);
+ wr.wqe.cmd.mod_qp.rsq_index = cpu_to_le32(~0);
+ }
+
+ if ((mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
+ /* Account for hole and round down to the next order */
+ wr.wqe.cmd.mod_qp.rrq_depth =
+ order_base_2(attr->max_rd_atomic + 2) - 1;
+ wr.wqe.cmd.mod_qp.rrq_index = cpu_to_le32(~0);
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+ wr.wqe.cmd.mod_qp.qkey_dest_qpn =
+ cpu_to_le32(attr->dest_qp_num);
+ else
+ wr.wqe.cmd.mod_qp.qkey_dest_qpn = cpu_to_le32(attr->qkey);
+
+ if (mask & IB_QP_AV) {
+ if (!qp->hdr)
+ return -ENOMEM;
+
+ sport = rdma_get_udp_sport(grh->flow_label,
+ qp->qpid,
+ attr->dest_qp_num);
+
+ rc = ionic_build_hdr(dev, qp->hdr, &attr->ah_attr, sport, true);
+ if (rc)
+ return rc;
+
+ qp->sgid_index = grh->sgid_index;
+
+ hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!hdr_buf)
+ return -ENOMEM;
+
+ hdr_len = ib_ud_header_pack(qp->hdr, hdr_buf);
+ hdr_len -= IB_BTH_BYTES;
+ hdr_len -= IB_DETH_BYTES;
+ ibdev_dbg(&dev->ibdev, "roce packet header template\n");
+ print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1,
+ hdr_buf, hdr_len, true);
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len,
+ DMA_TO_DEVICE);
+
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ if (qp->hdr->ipv4_present) {
+ wr.wqe.cmd.mod_qp.tfp_csum_profile =
+ qp->hdr->vlan_present ?
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP :
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP;
+ } else {
+ wr.wqe.cmd.mod_qp.tfp_csum_profile =
+ qp->hdr->vlan_present ?
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP :
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP;
+ }
+
+ wr.wqe.cmd.mod_qp.ah_id_len =
+ cpu_to_le32(qp->ahid | (hdr_len << 24));
+ wr.wqe.cmd.mod_qp.dma_addr = cpu_to_le64(hdr_dma);
+
+ wr.wqe.cmd.mod_qp.en_pcp = attr->ah_attr.sl;
+ wr.wqe.cmd.mod_qp.ip_dscp = grh->traffic_class >> 2;
+ }
+
+ ionic_admin_post(dev, &wr);
+
+ rc = ionic_admin_wait(dev, &wr, 0);
+
+ if (mask & IB_QP_AV)
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len,
+ DMA_TO_DEVICE);
+err_dma:
+ if (mask & IB_QP_AV)
+ kfree(hdr_buf);
+
+ return rc;
+}
+
+static int ionic_query_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_qp_attr *attr,
+ int mask)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_QUERY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_QUERY_QP_IN_V1_LEN),
+ .cmd.query_qp = {
+ .id_ver = cpu_to_le32(qp->qpid),
+ },
+ }
+ };
+ struct ionic_v1_admin_query_qp_sq *query_sqbuf;
+ struct ionic_v1_admin_query_qp_rq *query_rqbuf;
+ dma_addr_t query_sqdma;
+ dma_addr_t query_rqdma;
+ dma_addr_t hdr_dma = 0;
+ void *hdr_buf = NULL;
+ int flags, rc;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_QUERY_QP)
+ return -EBADRQC;
+
+ if (qp->has_sq) {
+ bool expdb = !!(qp->sq_cmb & IONIC_CMB_EXPDB);
+
+ attr->cap.max_send_sge =
+ ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ expdb);
+ attr->cap.max_inline_data =
+ ionic_v1_send_wqe_max_data(qp->sq.stride_log2, expdb);
+ }
+
+ if (qp->has_rq) {
+ attr->cap.max_recv_sge =
+ ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2,
+ qp->rq_spec,
+ qp->rq_cmb & IONIC_CMB_EXPDB);
+ }
+
+ query_sqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!query_sqbuf)
+ return -ENOMEM;
+
+ query_rqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!query_rqbuf) {
+ rc = -ENOMEM;
+ goto err_rqbuf;
+ }
+
+ query_sqdma = dma_map_single(dev->lif_cfg.hwdev, query_sqbuf, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, query_sqdma);
+ if (rc)
+ goto err_sqdma;
+
+ query_rqdma = dma_map_single(dev->lif_cfg.hwdev, query_rqbuf, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, query_rqdma);
+ if (rc)
+ goto err_rqdma;
+
+ if (mask & IB_QP_AV) {
+ hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!hdr_buf) {
+ rc = -ENOMEM;
+ goto err_hdrbuf;
+ }
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_hdrdma;
+ }
+
+ wr.wqe.cmd.query_qp.sq_dma_addr = cpu_to_le64(query_sqdma);
+ wr.wqe.cmd.query_qp.rq_dma_addr = cpu_to_le64(query_rqdma);
+ wr.wqe.cmd.query_qp.hdr_dma_addr = cpu_to_le64(hdr_dma);
+ wr.wqe.cmd.query_qp.ah_id = cpu_to_le32(qp->ahid);
+
+ ionic_admin_post(dev, &wr);
+
+ rc = ionic_admin_wait(dev, &wr, 0);
+
+ if (rc)
+ goto err_hdrdma;
+
+ flags = be16_to_cpu(query_sqbuf->access_perms_flags |
+ query_rqbuf->access_perms_flags);
+
+ print_hex_dump_debug("sqbuf ", DUMP_PREFIX_OFFSET, 16, 1,
+ query_sqbuf, sizeof(*query_sqbuf), true);
+ print_hex_dump_debug("rqbuf ", DUMP_PREFIX_OFFSET, 16, 1,
+ query_rqbuf, sizeof(*query_rqbuf), true);
+ ibdev_dbg(&dev->ibdev, "query qp %u state_pmtu %#x flags %#x",
+ qp->qpid, query_rqbuf->state_pmtu, flags);
+
+ attr->qp_state = from_ionic_qp_state(query_rqbuf->state_pmtu >> 4);
+ attr->cur_qp_state = attr->qp_state;
+ attr->path_mtu = (query_rqbuf->state_pmtu & 0xf) - 7;
+ attr->path_mig_state = IB_MIG_MIGRATED;
+ attr->qkey = be32_to_cpu(query_sqbuf->qkey_dest_qpn);
+ attr->rq_psn = be32_to_cpu(query_sqbuf->rq_psn);
+ attr->sq_psn = be32_to_cpu(query_rqbuf->sq_psn);
+ attr->dest_qp_num = attr->qkey;
+ attr->qp_access_flags = from_ionic_qp_flags(flags);
+ attr->pkey_index = 0;
+ attr->alt_pkey_index = 0;
+ attr->en_sqd_async_notify = !!(flags & IONIC_QPF_SQD_NOTIFY);
+ attr->sq_draining = !!(flags & IONIC_QPF_SQ_DRAINING);
+ attr->max_rd_atomic = BIT(query_rqbuf->rrq_depth) - 1;
+ attr->max_dest_rd_atomic = BIT(query_rqbuf->rsq_depth) - 1;
+ attr->min_rnr_timer = query_sqbuf->rnr_timer;
+ attr->port_num = 0;
+ attr->timeout = query_sqbuf->retry_timeout;
+ attr->retry_cnt = query_rqbuf->retry_rnrtry & 0xf;
+ attr->rnr_retry = query_rqbuf->retry_rnrtry >> 4;
+ attr->alt_port_num = 0;
+ attr->alt_timeout = 0;
+ attr->rate_limit = be32_to_cpu(query_sqbuf->rate_limit_kbps);
+
+ if (mask & IB_QP_AV)
+ ionic_set_ah_attr(dev, &attr->ah_attr,
+ qp->hdr, qp->sgid_index);
+
+err_hdrdma:
+ if (mask & IB_QP_AV) {
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ kfree(hdr_buf);
+ }
+err_hdrbuf:
+ dma_unmap_single(dev->lif_cfg.hwdev, query_rqdma, sizeof(*query_rqbuf),
+ DMA_FROM_DEVICE);
+err_rqdma:
+ dma_unmap_single(dev->lif_cfg.hwdev, query_sqdma, sizeof(*query_sqbuf),
+ DMA_FROM_DEVICE);
+err_sqdma:
+ kfree(query_rqbuf);
+err_rqbuf:
+ kfree(query_sqbuf);
+
+ return rc;
+}
+
+static int ionic_destroy_qp_cmd(struct ionic_ibdev *dev, u32 qpid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_QP_IN_V1_LEN),
+ .cmd.destroy_qp = {
+ .qp_id = cpu_to_le32(qpid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_QP)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+static bool ionic_expdb_wqe_size_supported(struct ionic_ibdev *dev,
+ uint32_t wqe_size)
+{
+ switch (wqe_size) {
+ case 64: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_64;
+ case 128: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_128;
+ case 256: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_256;
+ case 512: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_512;
+ }
+
+ return false;
+}
+
+static void ionic_qp_sq_init_cmb(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_udata *udata,
+ int max_data)
+{
+ u8 expdb_stride_log2 = 0;
+ bool expdb;
+ int rc;
+
+ if (!(qp->sq_cmb & IONIC_CMB_ENABLE))
+ goto not_in_cmb;
+
+ if (qp->sq_cmb & ~IONIC_CMB_SUPPORTED) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->sq_cmb &= IONIC_CMB_SUPPORTED;
+ }
+
+ if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.sq_expdb) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ qp->sq_cmb_order = order_base_2(qp->sq.size / PAGE_SIZE);
+
+ if (qp->sq_cmb_order >= IONIC_SQCMB_ORDER)
+ goto not_in_cmb;
+
+ if (qp->sq_cmb & IONIC_CMB_EXPDB)
+ expdb_stride_log2 = qp->sq.stride_log2;
+
+ rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->sq_cmb_pgid,
+ &qp->sq_cmb_addr, qp->sq_cmb_order,
+ expdb_stride_log2, &expdb);
+ if (rc)
+ goto not_in_cmb;
+
+ if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !expdb) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto err_map;
+
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ return;
+
+err_map:
+ ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order);
+not_in_cmb:
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ ibdev_dbg(&dev->ibdev, "could not place sq in cmb as required\n");
+
+ qp->sq_cmb = 0;
+ qp->sq_cmb_order = IONIC_RES_INVALID;
+ qp->sq_cmb_pgid = 0;
+ qp->sq_cmb_addr = 0;
+}
+
+static void ionic_qp_sq_destroy_cmb(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!(qp->sq_cmb & IONIC_CMB_ENABLE))
+ return;
+
+ if (ctx)
+ rdma_user_mmap_entry_remove(qp->mmap_sq_cmb);
+
+ ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order);
+}
+
+static int ionic_qp_sq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx,
+ struct ionic_qp *qp, struct ionic_qdesc *sq,
+ struct ionic_tbl_buf *buf, int max_wr, int max_sge,
+ int max_data, int sq_spec, struct ib_udata *udata)
+{
+ u32 wqe_size;
+ int rc = 0;
+
+ qp->sq_msn_prod = 0;
+ qp->sq_msn_cons = 0;
+
+ if (!qp->has_sq) {
+ if (buf) {
+ buf->tbl_buf = NULL;
+ buf->tbl_limit = 0;
+ buf->tbl_pages = 0;
+ }
+ if (udata)
+ rc = ionic_validate_qdesc_zero(sq);
+
+ return rc;
+ }
+
+ rc = -EINVAL;
+
+ if (max_wr < 0 || max_wr > 0xffff)
+ return rc;
+
+ if (max_sge < 1)
+ return rc;
+
+ if (max_sge > min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0,
+ qp->sq_cmb &
+ IONIC_CMB_EXPDB),
+ IONIC_SPEC_HIGH))
+ return rc;
+
+ if (max_data < 0)
+ return rc;
+
+ if (max_data > ionic_v1_send_wqe_max_data(dev->lif_cfg.max_stride,
+ qp->sq_cmb & IONIC_CMB_EXPDB))
+ return rc;
+
+ if (udata) {
+ rc = ionic_validate_qdesc(sq);
+ if (rc)
+ return rc;
+
+ qp->sq_spec = sq_spec;
+
+ qp->sq.ptr = NULL;
+ qp->sq.size = sq->size;
+ qp->sq.mask = sq->mask;
+ qp->sq.depth_log2 = sq->depth_log2;
+ qp->sq.stride_log2 = sq->stride_log2;
+
+ qp->sq_meta = NULL;
+ qp->sq_msn_idx = NULL;
+
+ qp->sq_umem = ib_umem_get(&dev->ibdev, sq->addr, sq->size, 0);
+ if (IS_ERR(qp->sq_umem))
+ return PTR_ERR(qp->sq_umem);
+ } else {
+ qp->sq_umem = NULL;
+
+ qp->sq_spec = ionic_v1_use_spec_sge(max_sge, sq_spec);
+ if (sq_spec && !qp->sq_spec)
+ ibdev_dbg(&dev->ibdev,
+ "init sq: max_sge %u disables spec\n",
+ max_sge);
+
+ if (qp->sq_cmb & IONIC_CMB_EXPDB) {
+ wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data,
+ qp->sq_spec,
+ true);
+
+ if (!ionic_expdb_wqe_size_supported(dev, wqe_size))
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ if (!(qp->sq_cmb & IONIC_CMB_EXPDB))
+ wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data,
+ qp->sq_spec,
+ false);
+
+ rc = ionic_queue_init(&qp->sq, dev->lif_cfg.hwdev,
+ max_wr, wqe_size);
+ if (rc)
+ return rc;
+
+ ionic_queue_dbell_init(&qp->sq, qp->qpid);
+
+ qp->sq_meta = kmalloc_array((u32)qp->sq.mask + 1,
+ sizeof(*qp->sq_meta),
+ GFP_KERNEL);
+ if (!qp->sq_meta) {
+ rc = -ENOMEM;
+ goto err_sq_meta;
+ }
+
+ qp->sq_msn_idx = kmalloc_array((u32)qp->sq.mask + 1,
+ sizeof(*qp->sq_msn_idx),
+ GFP_KERNEL);
+ if (!qp->sq_msn_idx) {
+ rc = -ENOMEM;
+ goto err_sq_msn;
+ }
+ }
+
+ ionic_qp_sq_init_cmb(dev, qp, udata, max_data);
+
+ if (qp->sq_cmb & IONIC_CMB_ENABLE)
+ rc = ionic_pgtbl_init(dev, buf, NULL,
+ (u64)qp->sq_cmb_pgid << PAGE_SHIFT,
+ 1, PAGE_SIZE);
+ else
+ rc = ionic_pgtbl_init(dev, buf,
+ qp->sq_umem, qp->sq.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_sq_tbl;
+
+ return 0;
+
+err_sq_tbl:
+ ionic_qp_sq_destroy_cmb(dev, ctx, qp);
+ kfree(qp->sq_msn_idx);
+err_sq_msn:
+ kfree(qp->sq_meta);
+err_sq_meta:
+ if (qp->sq_umem)
+ ib_umem_release(qp->sq_umem);
+ else
+ ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev);
+ return rc;
+}
+
+static void ionic_qp_sq_destroy(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!qp->has_sq)
+ return;
+
+ ionic_qp_sq_destroy_cmb(dev, ctx, qp);
+
+ kfree(qp->sq_msn_idx);
+ kfree(qp->sq_meta);
+
+ if (qp->sq_umem)
+ ib_umem_release(qp->sq_umem);
+ else
+ ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev);
+}
+
+static void ionic_qp_rq_init_cmb(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_udata *udata)
+{
+ u8 expdb_stride_log2 = 0;
+ bool expdb;
+ int rc;
+
+ if (!(qp->rq_cmb & IONIC_CMB_ENABLE))
+ goto not_in_cmb;
+
+ if (qp->rq_cmb & ~IONIC_CMB_SUPPORTED) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->rq_cmb &= IONIC_CMB_SUPPORTED;
+ }
+
+ if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.rq_expdb) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ qp->rq_cmb_order = order_base_2(qp->rq.size / PAGE_SIZE);
+
+ if (qp->rq_cmb_order >= IONIC_RQCMB_ORDER)
+ goto not_in_cmb;
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB)
+ expdb_stride_log2 = qp->rq.stride_log2;
+
+ rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->rq_cmb_pgid,
+ &qp->rq_cmb_addr, qp->rq_cmb_order,
+ expdb_stride_log2, &expdb);
+ if (rc)
+ goto not_in_cmb;
+
+ if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !expdb) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto err_map;
+
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ return;
+
+err_map:
+ ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order);
+not_in_cmb:
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ ibdev_dbg(&dev->ibdev, "could not place rq in cmb as required\n");
+
+ qp->rq_cmb = 0;
+ qp->rq_cmb_order = IONIC_RES_INVALID;
+ qp->rq_cmb_pgid = 0;
+ qp->rq_cmb_addr = 0;
+}
+
+static void ionic_qp_rq_destroy_cmb(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!(qp->rq_cmb & IONIC_CMB_ENABLE))
+ return;
+
+ if (ctx)
+ rdma_user_mmap_entry_remove(qp->mmap_rq_cmb);
+
+ ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order);
+}
+
+static int ionic_qp_rq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx,
+ struct ionic_qp *qp, struct ionic_qdesc *rq,
+ struct ionic_tbl_buf *buf, int max_wr, int max_sge,
+ int rq_spec, struct ib_udata *udata)
+{
+ int rc = 0, i;
+ u32 wqe_size;
+
+ if (!qp->has_rq) {
+ if (buf) {
+ buf->tbl_buf = NULL;
+ buf->tbl_limit = 0;
+ buf->tbl_pages = 0;
+ }
+ if (udata)
+ rc = ionic_validate_qdesc_zero(rq);
+
+ return rc;
+ }
+
+ rc = -EINVAL;
+
+ if (max_wr < 0 || max_wr > 0xffff)
+ return rc;
+
+ if (max_sge < 1)
+ return rc;
+
+ if (max_sge > min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH))
+ return rc;
+
+ if (udata) {
+ rc = ionic_validate_qdesc(rq);
+ if (rc)
+ return rc;
+
+ qp->rq_spec = rq_spec;
+
+ qp->rq.ptr = NULL;
+ qp->rq.size = rq->size;
+ qp->rq.mask = rq->mask;
+ qp->rq.depth_log2 = rq->depth_log2;
+ qp->rq.stride_log2 = rq->stride_log2;
+
+ qp->rq_meta = NULL;
+
+ qp->rq_umem = ib_umem_get(&dev->ibdev, rq->addr, rq->size, 0);
+ if (IS_ERR(qp->rq_umem))
+ return PTR_ERR(qp->rq_umem);
+ } else {
+ qp->rq_umem = NULL;
+
+ qp->rq_spec = ionic_v1_use_spec_sge(max_sge, rq_spec);
+ if (rq_spec && !qp->rq_spec)
+ ibdev_dbg(&dev->ibdev,
+ "init rq: max_sge %u disables spec\n",
+ max_sge);
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB) {
+ wqe_size = ionic_v1_recv_wqe_min_size(max_sge,
+ qp->rq_spec,
+ true);
+
+ if (!ionic_expdb_wqe_size_supported(dev, wqe_size))
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ if (!(qp->rq_cmb & IONIC_CMB_EXPDB))
+ wqe_size = ionic_v1_recv_wqe_min_size(max_sge,
+ qp->rq_spec,
+ false);
+
+ rc = ionic_queue_init(&qp->rq, dev->lif_cfg.hwdev,
+ max_wr, wqe_size);
+ if (rc)
+ return rc;
+
+ ionic_queue_dbell_init(&qp->rq, qp->qpid);
+
+ qp->rq_meta = kmalloc_array((u32)qp->rq.mask + 1,
+ sizeof(*qp->rq_meta),
+ GFP_KERNEL);
+ if (!qp->rq_meta) {
+ rc = -ENOMEM;
+ goto err_rq_meta;
+ }
+
+ for (i = 0; i < qp->rq.mask; ++i)
+ qp->rq_meta[i].next = &qp->rq_meta[i + 1];
+ qp->rq_meta[i].next = IONIC_META_LAST;
+ qp->rq_meta_head = &qp->rq_meta[0];
+ }
+
+ ionic_qp_rq_init_cmb(dev, qp, udata);
+
+ if (qp->rq_cmb & IONIC_CMB_ENABLE)
+ rc = ionic_pgtbl_init(dev, buf, NULL,
+ (u64)qp->rq_cmb_pgid << PAGE_SHIFT,
+ 1, PAGE_SIZE);
+ else
+ rc = ionic_pgtbl_init(dev, buf,
+ qp->rq_umem, qp->rq.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_rq_tbl;
+
+ return 0;
+
+err_rq_tbl:
+ ionic_qp_rq_destroy_cmb(dev, ctx, qp);
+ kfree(qp->rq_meta);
+err_rq_meta:
+ if (qp->rq_umem)
+ ib_umem_release(qp->rq_umem);
+ else
+ ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev);
+ return rc;
+}
+
+static void ionic_qp_rq_destroy(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!qp->has_rq)
+ return;
+
+ ionic_qp_rq_destroy_cmb(dev, ctx, qp);
+
+ kfree(qp->rq_meta);
+
+ if (qp->rq_umem)
+ ib_umem_release(qp->rq_umem);
+ else
+ ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev);
+}
+
+int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_tbl_buf sq_buf = {}, rq_buf = {};
+ struct ionic_pd *pd = to_ionic_pd(ibqp->pd);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_qp_resp resp = {};
+ struct ionic_qp_req req = {};
+ struct ionic_cq *cq;
+ u8 udma_mask;
+ void *entry;
+ int rc;
+
+ if (udata) {
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+ } else {
+ req.sq_spec = IONIC_SPEC_HIGH;
+ req.rq_spec = IONIC_SPEC_HIGH;
+ }
+
+ if (attr->qp_type == IB_QPT_SMI || attr->qp_type > IB_QPT_UD)
+ return -EOPNOTSUPP;
+
+ qp->state = IB_QPS_RESET;
+
+ INIT_LIST_HEAD(&qp->cq_poll_sq);
+ INIT_LIST_HEAD(&qp->cq_flush_sq);
+ INIT_LIST_HEAD(&qp->cq_flush_rq);
+
+ spin_lock_init(&qp->sq_lock);
+ spin_lock_init(&qp->rq_lock);
+
+ qp->has_sq = 1;
+ qp->has_rq = 1;
+
+ if (attr->qp_type == IB_QPT_GSI) {
+ rc = ionic_get_gsi_qpid(dev, &qp->qpid);
+ } else {
+ udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+
+ if (qp->has_sq)
+ udma_mask &= to_ionic_vcq(attr->send_cq)->udma_mask;
+
+ if (qp->has_rq)
+ udma_mask &= to_ionic_vcq(attr->recv_cq)->udma_mask;
+
+ if (udata && req.udma_mask)
+ udma_mask &= req.udma_mask;
+
+ if (!udma_mask)
+ return -EINVAL;
+
+ rc = ionic_get_qpid(dev, &qp->qpid, &qp->udma_idx, udma_mask);
+ }
+ if (rc)
+ return rc;
+
+ qp->sig_all = attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ qp->has_ah = attr->qp_type == IB_QPT_RC;
+
+ if (qp->has_ah) {
+ qp->hdr = kzalloc(sizeof(*qp->hdr), GFP_KERNEL);
+ if (!qp->hdr) {
+ rc = -ENOMEM;
+ goto err_ah_alloc;
+ }
+
+ rc = ionic_get_ahid(dev, &qp->ahid);
+ if (rc)
+ goto err_ahid;
+ }
+
+ if (udata) {
+ if (req.rq_cmb & IONIC_CMB_ENABLE)
+ qp->rq_cmb = req.rq_cmb;
+
+ if (req.sq_cmb & IONIC_CMB_ENABLE)
+ qp->sq_cmb = req.sq_cmb;
+ }
+
+ rc = ionic_qp_sq_init(dev, ctx, qp, &req.sq, &sq_buf,
+ attr->cap.max_send_wr, attr->cap.max_send_sge,
+ attr->cap.max_inline_data, req.sq_spec, udata);
+ if (rc)
+ goto err_sq;
+
+ rc = ionic_qp_rq_init(dev, ctx, qp, &req.rq, &rq_buf,
+ attr->cap.max_recv_wr, attr->cap.max_recv_sge,
+ req.rq_spec, udata);
+ if (rc)
+ goto err_rq;
+
+ rc = ionic_create_qp_cmd(dev, pd,
+ to_ionic_vcq_cq(attr->send_cq, qp->udma_idx),
+ to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx),
+ qp, &sq_buf, &rq_buf, attr);
+ if (rc)
+ goto err_cmd;
+
+ if (udata) {
+ resp.qpid = qp->qpid;
+ resp.udma_idx = qp->udma_idx;
+
+ if (qp->sq_cmb & IONIC_CMB_ENABLE) {
+ bool wc;
+
+ if ((qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) ==
+ (IONIC_CMB_WC | IONIC_CMB_UC)) {
+ ibdev_dbg(&dev->ibdev,
+ "Both sq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n");
+ qp->sq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC);
+ }
+
+ wc = (qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ != IONIC_CMB_UC;
+
+ /* let userspace know the mapping */
+ if (wc)
+ qp->sq_cmb |= IONIC_CMB_WC;
+ else
+ qp->sq_cmb |= IONIC_CMB_UC;
+
+ qp->mmap_sq_cmb =
+ ionic_mmap_entry_insert(ctx,
+ qp->sq.size,
+ PHYS_PFN(qp->sq_cmb_addr),
+ wc ? IONIC_MMAP_WC : 0,
+ &resp.sq_cmb_offset);
+ if (!qp->mmap_sq_cmb) {
+ rc = -ENOMEM;
+ goto err_mmap_sq;
+ }
+
+ resp.sq_cmb = qp->sq_cmb;
+ }
+
+ if (qp->rq_cmb & IONIC_CMB_ENABLE) {
+ bool wc;
+
+ if ((qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) ==
+ (IONIC_CMB_WC | IONIC_CMB_UC)) {
+ ibdev_dbg(&dev->ibdev,
+ "Both rq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n");
+ qp->rq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC);
+ }
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB)
+ wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ == IONIC_CMB_WC;
+ else
+ wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ != IONIC_CMB_UC;
+
+ /* let userspace know the mapping */
+ if (wc)
+ qp->rq_cmb |= IONIC_CMB_WC;
+ else
+ qp->rq_cmb |= IONIC_CMB_UC;
+
+ qp->mmap_rq_cmb =
+ ionic_mmap_entry_insert(ctx,
+ qp->rq.size,
+ PHYS_PFN(qp->rq_cmb_addr),
+ wc ? IONIC_MMAP_WC : 0,
+ &resp.rq_cmb_offset);
+ if (!qp->mmap_rq_cmb) {
+ rc = -ENOMEM;
+ goto err_mmap_rq;
+ }
+
+ resp.rq_cmb = qp->rq_cmb;
+ }
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ ionic_pgtbl_unbuf(dev, &rq_buf);
+ ionic_pgtbl_unbuf(dev, &sq_buf);
+
+ qp->ibqp.qp_num = qp->qpid;
+
+ init_completion(&qp->qp_rel_comp);
+ kref_init(&qp->qp_kref);
+
+ entry = xa_store_irq(&dev->qp_tbl, qp->qpid, qp, GFP_KERNEL);
+ if (entry) {
+ if (!xa_is_err(entry))
+ rc = -EINVAL;
+ else
+ rc = xa_err(entry);
+
+ goto err_resp;
+ }
+
+ if (qp->has_sq) {
+ cq = to_ionic_vcq_cq(attr->send_cq, qp->udma_idx);
+
+ attr->cap.max_send_wr = qp->sq.mask;
+ attr->cap.max_send_sge =
+ ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ qp->sq_cmb & IONIC_CMB_EXPDB);
+ attr->cap.max_inline_data =
+ ionic_v1_send_wqe_max_data(qp->sq.stride_log2,
+ qp->sq_cmb &
+ IONIC_CMB_EXPDB);
+ qp->sq_cqid = cq->cqid;
+ }
+
+ if (qp->has_rq) {
+ cq = to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx);
+
+ attr->cap.max_recv_wr = qp->rq.mask;
+ attr->cap.max_recv_sge =
+ ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2,
+ qp->rq_spec,
+ qp->rq_cmb & IONIC_CMB_EXPDB);
+ qp->rq_cqid = cq->cqid;
+ }
+
+ return 0;
+
+err_resp:
+ if (udata && (qp->rq_cmb & IONIC_CMB_ENABLE))
+ rdma_user_mmap_entry_remove(qp->mmap_rq_cmb);
+err_mmap_rq:
+ if (udata && (qp->sq_cmb & IONIC_CMB_ENABLE))
+ rdma_user_mmap_entry_remove(qp->mmap_sq_cmb);
+err_mmap_sq:
+ ionic_destroy_qp_cmd(dev, qp->qpid);
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &rq_buf);
+ ionic_qp_rq_destroy(dev, ctx, qp);
+err_rq:
+ ionic_pgtbl_unbuf(dev, &sq_buf);
+ ionic_qp_sq_destroy(dev, ctx, qp);
+err_sq:
+ if (qp->has_ah)
+ ionic_put_ahid(dev, qp->ahid);
+err_ahid:
+ kfree(qp->hdr);
+err_ah_alloc:
+ ionic_put_qpid(dev, qp->qpid);
+ return rc;
+}
+
+void ionic_notify_flush_cq(struct ionic_cq *cq)
+{
+ if (cq->flush && cq->vcq->ibcq.comp_handler)
+ cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq,
+ cq->vcq->ibcq.cq_context);
+}
+
+static void ionic_notify_qp_cqs(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ if (qp->ibqp.send_cq)
+ ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.send_cq,
+ qp->udma_idx));
+ if (qp->ibqp.recv_cq && qp->ibqp.recv_cq != qp->ibqp.send_cq)
+ ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.recv_cq,
+ qp->udma_idx));
+}
+
+void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+
+ /* Hold the CQ lock and QP sq_lock to set up flush */
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->sq_lock);
+ qp->sq_flush = true;
+ if (!ionic_queue_empty(&qp->sq)) {
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+ spin_unlock(&qp->sq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+
+ /* Hold the CQ lock and QP rq_lock to set up flush */
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->rq_lock);
+ qp->rq_flush = true;
+ if (!ionic_queue_empty(&qp->rq)) {
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+ }
+ spin_unlock(&qp->rq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+}
+
+static void ionic_clean_cq(struct ionic_cq *cq, u32 qpid)
+{
+ struct ionic_v1_cqe *qcqe;
+ int prod, qtf, qid, type;
+ bool color;
+
+ if (!cq->q.ptr)
+ return;
+
+ color = cq->color;
+ prod = cq->q.prod;
+ qcqe = ionic_queue_at(&cq->q, prod);
+
+ while (color == ionic_v1_cqe_color(qcqe)) {
+ qtf = ionic_v1_cqe_qtf(qcqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ if (qid == qpid && type != IONIC_V1_CQE_TYPE_ADMIN)
+ ionic_v1_cqe_clean(qcqe);
+
+ prod = ionic_queue_next(&cq->q, prod);
+ qcqe = ionic_queue_at(&cq->q, prod);
+ color = ionic_color_wrap(prod, color);
+ }
+}
+
+static void ionic_reset_qp(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+ int i;
+
+ local_irq_save(irqflags);
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+ spin_lock(&cq->lock);
+ ionic_clean_cq(cq, qp->qpid);
+ spin_unlock(&cq->lock);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+ spin_lock(&cq->lock);
+ ionic_clean_cq(cq, qp->qpid);
+ spin_unlock(&cq->lock);
+ }
+
+ if (qp->has_sq) {
+ spin_lock(&qp->sq_lock);
+ qp->sq_flush = false;
+ qp->sq_flush_rcvd = false;
+ qp->sq_msn_prod = 0;
+ qp->sq_msn_cons = 0;
+ qp->sq.prod = 0;
+ qp->sq.cons = 0;
+ spin_unlock(&qp->sq_lock);
+ }
+
+ if (qp->has_rq) {
+ spin_lock(&qp->rq_lock);
+ qp->rq_flush = false;
+ qp->rq.prod = 0;
+ qp->rq.cons = 0;
+ if (qp->rq_meta) {
+ for (i = 0; i < qp->rq.mask; ++i)
+ qp->rq_meta[i].next = &qp->rq_meta[i + 1];
+ qp->rq_meta[i].next = IONIC_META_LAST;
+ }
+ qp->rq_meta_head = &qp->rq_meta[0];
+ spin_unlock(&qp->rq_lock);
+ }
+
+ local_irq_restore(irqflags);
+}
+
+static bool ionic_qp_cur_state_is_ok(enum ib_qp_state q_state,
+ enum ib_qp_state attr_state)
+{
+ if (q_state == attr_state)
+ return true;
+
+ if (attr_state == IB_QPS_ERR)
+ return true;
+
+ if (attr_state == IB_QPS_SQE)
+ return q_state == IB_QPS_RTS || q_state == IB_QPS_SQD;
+
+ return false;
+}
+
+static int ionic_check_modify_qp(struct ionic_qp *qp, struct ib_qp_attr *attr,
+ int mask)
+{
+ enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state : qp->state;
+ enum ib_qp_state next_state = (mask & IB_QP_STATE) ?
+ attr->qp_state : cur_state;
+
+ if ((mask & IB_QP_CUR_STATE) &&
+ !ionic_qp_cur_state_is_ok(qp->state, attr->cur_qp_state))
+ return -EINVAL;
+
+ if (!ib_modify_qp_is_ok(cur_state, next_state, qp->ibqp.qp_type, mask))
+ return -EINVAL;
+
+ /* unprivileged qp not allowed privileged qkey */
+ if ((mask & IB_QP_QKEY) && (attr->qkey & 0x80000000) &&
+ qp->ibqp.uobject)
+ return -EPERM;
+
+ return 0;
+}
+
+int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_pd *pd = to_ionic_pd(ibqp->pd);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ int rc;
+
+ rc = ionic_check_modify_qp(qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (mask & IB_QP_CAP)
+ return -EINVAL;
+
+ rc = ionic_modify_qp_cmd(dev, pd, qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (mask & IB_QP_STATE) {
+ qp->state = attr->qp_state;
+
+ if (attr->qp_state == IB_QPS_ERR) {
+ ionic_flush_qp(dev, qp);
+ ionic_notify_qp_cqs(dev, qp);
+ } else if (attr->qp_state == IB_QPS_RESET) {
+ ionic_reset_qp(dev, qp);
+ }
+ }
+
+ return 0;
+}
+
+int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int mask, struct ib_qp_init_attr *init_attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ int rc;
+
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
+
+ rc = ionic_query_qp_cmd(dev, qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (qp->has_sq)
+ attr->cap.max_send_wr = qp->sq.mask;
+
+ if (qp->has_rq)
+ attr->cap.max_recv_wr = qp->rq.mask;
+
+ init_attr->event_handler = ibqp->event_handler;
+ init_attr->qp_context = ibqp->qp_context;
+ init_attr->send_cq = ibqp->send_cq;
+ init_attr->recv_cq = ibqp->recv_cq;
+ init_attr->srq = ibqp->srq;
+ init_attr->xrcd = ibqp->xrcd;
+ init_attr->cap = attr->cap;
+ init_attr->sq_sig_type = qp->sig_all ?
+ IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ init_attr->qp_type = ibqp->qp_type;
+ init_attr->create_flags = 0;
+ init_attr->port_num = 0;
+ init_attr->rwq_ind_tbl = ibqp->rwq_ind_tbl;
+ init_attr->source_qpn = 0;
+
+ return rc;
+}
+
+int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+ int rc;
+
+ rc = ionic_destroy_qp_cmd(dev, qp->qpid);
+ if (rc)
+ return rc;
+
+ xa_erase_irq(&dev->qp_tbl, qp->qpid);
+
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+ wait_for_completion(&qp->qp_rel_comp);
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+ spin_lock_irqsave(&cq->lock, irqflags);
+ ionic_clean_cq(cq, qp->qpid);
+ list_del(&qp->cq_poll_sq);
+ list_del(&qp->cq_flush_sq);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+ spin_lock_irqsave(&cq->lock, irqflags);
+ ionic_clean_cq(cq, qp->qpid);
+ list_del(&qp->cq_flush_rq);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ ionic_qp_rq_destroy(dev, ctx, qp);
+ ionic_qp_sq_destroy(dev, ctx, qp);
+ if (qp->has_ah) {
+ ionic_put_ahid(dev, qp->ahid);
+ kfree(qp->hdr);
+ }
+ ionic_put_qpid(dev, qp->qpid);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_datapath.c b/drivers/infiniband/hw/ionic/ionic_datapath.c
new file mode 100644
index 000000000000..aa2944887f23
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_datapath.c
@@ -0,0 +1,1399 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define IONIC_OP(version, opname) \
+ ((version) < 2 ? IONIC_V1_OP_##opname : IONIC_V2_OP_##opname)
+
+static bool ionic_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_v1_cqe **cqe)
+{
+ struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q);
+
+ if (unlikely(cq->color != ionic_v1_cqe_color(qcqe)))
+ return false;
+
+ /* Prevent out-of-order reads of the CQE */
+ dma_rmb();
+
+ *cqe = qcqe;
+
+ return true;
+}
+
+static int ionic_flush_recv(struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_rq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (!qp->rq_flush)
+ return 0;
+
+ if (ionic_queue_empty(&qp->rq))
+ return 0;
+
+ wqe = ionic_queue_at_cons(&qp->rq);
+
+ /* wqe_id must be a valid queue index */
+ if (unlikely(wqe->base.wqe_id >> qp->rq.depth_log2)) {
+ ibdev_warn(qp->ibqp.device,
+ "flush qp %u recv index %llu invalid\n",
+ qp->qpid, (unsigned long long)wqe->base.wqe_id);
+ return -EIO;
+ }
+
+ /* wqe_id must indicate a request that is outstanding */
+ meta = &qp->rq_meta[wqe->base.wqe_id];
+ if (unlikely(meta->next != IONIC_META_POSTED)) {
+ ibdev_warn(qp->ibqp.device,
+ "flush qp %u recv index %llu not posted\n",
+ qp->qpid, (unsigned long long)wqe->base.wqe_id);
+ return -EIO;
+ }
+
+ ionic_queue_consume(&qp->rq);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ meta->next = qp->rq_meta_head;
+ qp->rq_meta_head = meta;
+
+ return 1;
+}
+
+static int ionic_flush_recv_many(struct ionic_qp *qp,
+ struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_flush_recv(qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_flush_send(struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_sq_meta *meta;
+
+ if (!qp->sq_flush)
+ return 0;
+
+ if (ionic_queue_empty(&qp->sq))
+ return 0;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ ionic_queue_consume(&qp->sq);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ return 1;
+}
+
+static int ionic_flush_send_many(struct ionic_qp *qp,
+ struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_flush_send(qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_poll_recv(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *cqe_qp, struct ionic_v1_cqe *cqe,
+ struct ib_wc *wc)
+{
+ struct ionic_qp *qp = NULL;
+ struct ionic_rq_meta *meta;
+ u32 src_qpn, st_len;
+ u16 vlan_tag;
+ u8 op;
+
+ if (cqe_qp->rq_flush)
+ return 0;
+
+ qp = cqe_qp;
+
+ st_len = be32_to_cpu(cqe->status_length);
+
+ /* ignore wqe_id in case of flush error */
+ if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) {
+ cqe_qp->rq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+
+ /* posted recvs (if any) flushed by ionic_flush_recv */
+ return 0;
+ }
+
+ /* there had better be something in the recv queue to complete */
+ if (ionic_queue_empty(&qp->rq)) {
+ ibdev_warn(&dev->ibdev, "qp %u is empty\n", qp->qpid);
+ return -EIO;
+ }
+
+ /* wqe_id must be a valid queue index */
+ if (unlikely(cqe->recv.wqe_id >> qp->rq.depth_log2)) {
+ ibdev_warn(&dev->ibdev,
+ "qp %u recv index %llu invalid\n",
+ qp->qpid, (unsigned long long)cqe->recv.wqe_id);
+ return -EIO;
+ }
+
+ /* wqe_id must indicate a request that is outstanding */
+ meta = &qp->rq_meta[cqe->recv.wqe_id];
+ if (unlikely(meta->next != IONIC_META_POSTED)) {
+ ibdev_warn(&dev->ibdev,
+ "qp %u recv index %llu not posted\n",
+ qp->qpid, (unsigned long long)cqe->recv.wqe_id);
+ return -EIO;
+ }
+
+ meta->next = qp->rq_meta_head;
+ qp->rq_meta_head = meta;
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->wr_id = meta->wrid;
+
+ wc->qp = &cqe_qp->ibqp;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ wc->vendor_err = st_len;
+ wc->status = ionic_to_ib_status(st_len);
+
+ cqe_qp->rq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+
+ ibdev_warn(&dev->ibdev,
+ "qp %d recv cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, BIT(cq->q.stride_log2), true);
+ goto out;
+ }
+
+ wc->vendor_err = 0;
+ wc->status = IB_WC_SUCCESS;
+
+ src_qpn = be32_to_cpu(cqe->recv.src_qpn_op);
+ op = src_qpn >> IONIC_V1_CQE_RECV_OP_SHIFT;
+
+ src_qpn &= IONIC_V1_CQE_RECV_QPN_MASK;
+ op &= IONIC_V1_CQE_RECV_OP_MASK;
+
+ wc->opcode = IB_WC_RECV;
+ switch (op) {
+ case IONIC_V1_CQE_RECV_OP_RDMA_IMM:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */
+ break;
+ case IONIC_V1_CQE_RECV_OP_SEND_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */
+ break;
+ case IONIC_V1_CQE_RECV_OP_SEND_INV:
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->recv.imm_data_rkey);
+ break;
+ }
+
+ wc->byte_len = st_len;
+ wc->src_qp = src_qpn;
+
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI) {
+ wc->wc_flags |= IB_WC_GRH | IB_WC_WITH_SMAC;
+ ether_addr_copy(wc->smac, cqe->recv.src_mac);
+
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ if (ionic_v1_cqe_recv_is_ipv4(cqe))
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+
+ if (ionic_v1_cqe_recv_is_vlan(cqe))
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+
+ /* vlan_tag in cqe will be valid from dpath even if no vlan */
+ vlan_tag = be16_to_cpu(cqe->recv.vlan_tag);
+ wc->vlan_id = vlan_tag & 0xfff; /* 802.1q VID */
+ wc->sl = vlan_tag >> VLAN_PRIO_SHIFT; /* 802.1q PCP */
+ }
+
+ wc->pkey_index = 0;
+ wc->port_num = 1;
+
+out:
+ ionic_queue_consume(&qp->rq);
+
+ return 1;
+}
+
+static bool ionic_peek_send(struct ionic_qp *qp)
+{
+ struct ionic_sq_meta *meta;
+
+ if (qp->sq_flush)
+ return false;
+
+ /* completed all send queue requests */
+ if (ionic_queue_empty(&qp->sq))
+ return false;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ /* waiting for remote completion */
+ if (meta->remote && meta->seq == qp->sq_msn_cons)
+ return false;
+
+ /* waiting for local completion */
+ if (!meta->remote && !meta->local_comp)
+ return false;
+
+ return true;
+}
+
+static int ionic_poll_send(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_sq_meta *meta;
+
+ if (qp->sq_flush)
+ return 0;
+
+ do {
+ /* completed all send queue requests */
+ if (ionic_queue_empty(&qp->sq))
+ goto out_empty;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ /* waiting for remote completion */
+ if (meta->remote && meta->seq == qp->sq_msn_cons)
+ goto out_empty;
+
+ /* waiting for local completion */
+ if (!meta->remote && !meta->local_comp)
+ goto out_empty;
+
+ ionic_queue_consume(&qp->sq);
+
+ /* produce wc only if signaled or error status */
+ } while (!meta->signal && meta->ibsts == IB_WC_SUCCESS);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = meta->ibsts;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ if (meta->ibsts == IB_WC_SUCCESS) {
+ wc->byte_len = meta->len;
+ wc->opcode = meta->ibop;
+ } else {
+ wc->vendor_err = meta->len;
+
+ qp->sq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+
+ return 1;
+
+out_empty:
+ if (qp->sq_flush_rcvd) {
+ qp->sq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+ return 0;
+}
+
+static int ionic_poll_send_many(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *qp, struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_poll_send(dev, cq, qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_validate_cons(u16 prod, u16 cons,
+ u16 comp, u16 mask)
+{
+ if (((prod - cons) & mask) <= ((comp - cons) & mask))
+ return -EIO;
+
+ return 0;
+}
+
+static int ionic_comp_msn(struct ionic_qp *qp, struct ionic_v1_cqe *cqe)
+{
+ struct ionic_sq_meta *meta;
+ u16 cqe_seq, cqe_idx;
+ int rc;
+
+ if (qp->sq_flush)
+ return 0;
+
+ cqe_seq = be32_to_cpu(cqe->send.msg_msn) & qp->sq.mask;
+
+ rc = ionic_validate_cons(qp->sq_msn_prod,
+ qp->sq_msn_cons,
+ cqe_seq - 1,
+ qp->sq.mask);
+ if (rc) {
+ ibdev_warn(qp->ibqp.device,
+ "qp %u bad msn %#x seq %u for prod %u cons %u\n",
+ qp->qpid, be32_to_cpu(cqe->send.msg_msn),
+ cqe_seq, qp->sq_msn_prod, qp->sq_msn_cons);
+ return rc;
+ }
+
+ qp->sq_msn_cons = cqe_seq;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ cqe_idx = qp->sq_msn_idx[(cqe_seq - 1) & qp->sq.mask];
+
+ meta = &qp->sq_meta[cqe_idx];
+ meta->len = be32_to_cpu(cqe->status_length);
+ meta->ibsts = ionic_to_ib_status(meta->len);
+
+ ibdev_warn(qp->ibqp.device,
+ "qp %d msn cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), true);
+ }
+
+ return 0;
+}
+
+static int ionic_comp_npg(struct ionic_qp *qp, struct ionic_v1_cqe *cqe)
+{
+ struct ionic_sq_meta *meta;
+ u16 cqe_idx;
+ u32 st_len;
+
+ if (qp->sq_flush)
+ return 0;
+
+ st_len = be32_to_cpu(cqe->status_length);
+
+ if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) {
+ /*
+ * Flush cqe does not consume a wqe on the device, and maybe
+ * no such work request is posted.
+ *
+ * The driver should begin flushing after the last indicated
+ * normal or error completion. Here, only set a hint that the
+ * flush request was indicated. In poll_send, if nothing more
+ * can be polled normally, then begin flushing.
+ */
+ qp->sq_flush_rcvd = true;
+ return 0;
+ }
+
+ cqe_idx = cqe->send.npg_wqe_id & qp->sq.mask;
+ meta = &qp->sq_meta[cqe_idx];
+ meta->local_comp = true;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ meta->len = st_len;
+ meta->ibsts = ionic_to_ib_status(st_len);
+ meta->remote = false;
+ ibdev_warn(qp->ibqp.device,
+ "qp %d npg cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), true);
+ }
+
+ return 0;
+}
+
+static void ionic_reserve_sync_cq(struct ionic_ibdev *dev, struct ionic_cq *cq)
+{
+ if (!ionic_queue_empty(&cq->q)) {
+ cq->credit += ionic_queue_length(&cq->q);
+ cq->q.cons = cq->q.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ ionic_queue_dbell_val(&cq->q));
+ }
+}
+
+static void ionic_reserve_cq(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ int spend)
+{
+ cq->credit -= spend;
+
+ if (cq->credit <= 0)
+ ionic_reserve_sync_cq(dev, cq);
+}
+
+static int ionic_poll_vcq_cq(struct ionic_ibdev *dev,
+ struct ionic_cq *cq,
+ int nwc, struct ib_wc *wc)
+{
+ struct ionic_qp *qp, *qp_next;
+ struct ionic_v1_cqe *cqe;
+ int rc = 0, npolled = 0;
+ unsigned long irqflags;
+ u32 qtf, qid;
+ bool peek;
+ u8 type;
+
+ if (nwc < 1)
+ return 0;
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+
+ /* poll already indicated work completions for send queue */
+ list_for_each_entry_safe(qp, qp_next, &cq->poll_sq, cq_poll_sq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->sq_lock);
+ rc = ionic_poll_send_many(dev, cq, qp, wc + npolled,
+ nwc - npolled);
+ spin_unlock(&qp->sq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_poll_sq);
+ }
+
+ /* poll for more work completions */
+ while (likely(ionic_next_cqe(dev, cq, &cqe))) {
+ if (npolled == nwc)
+ goto out;
+
+ qtf = ionic_v1_cqe_qtf(cqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ /*
+ * Safe to access QP without additional reference here as,
+ * 1. We hold cq->lock throughout
+ * 2. ionic_destroy_qp() acquires the same cq->lock before cleanup
+ * 3. QP is removed from qp_tbl before any cleanup begins
+ * This ensures no concurrent access between polling and destruction.
+ */
+ qp = xa_load(&dev->qp_tbl, qid);
+ if (unlikely(!qp)) {
+ ibdev_dbg(&dev->ibdev, "missing qp for qid %u\n", qid);
+ goto cq_next;
+ }
+
+ switch (type) {
+ case IONIC_V1_CQE_TYPE_RECV:
+ spin_lock(&qp->rq_lock);
+ rc = ionic_poll_recv(dev, cq, qp, cqe, wc + npolled);
+ spin_unlock(&qp->rq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ break;
+
+ case IONIC_V1_CQE_TYPE_SEND_MSN:
+ spin_lock(&qp->sq_lock);
+ rc = ionic_comp_msn(qp, cqe);
+ if (!rc) {
+ rc = ionic_poll_send_many(dev, cq, qp,
+ wc + npolled,
+ nwc - npolled);
+ peek = ionic_peek_send(qp);
+ }
+ spin_unlock(&qp->sq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ if (peek)
+ list_move_tail(&qp->cq_poll_sq, &cq->poll_sq);
+ break;
+
+ case IONIC_V1_CQE_TYPE_SEND_NPG:
+ spin_lock(&qp->sq_lock);
+ rc = ionic_comp_npg(qp, cqe);
+ if (!rc) {
+ rc = ionic_poll_send_many(dev, cq, qp,
+ wc + npolled,
+ nwc - npolled);
+ peek = ionic_peek_send(qp);
+ }
+ spin_unlock(&qp->sq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ if (peek)
+ list_move_tail(&qp->cq_poll_sq, &cq->poll_sq);
+ break;
+
+ default:
+ ibdev_warn(&dev->ibdev,
+ "unexpected cqe type %u\n", type);
+ rc = -EIO;
+ goto out;
+ }
+
+cq_next:
+ ionic_queue_produce(&cq->q);
+ cq->color = ionic_color_wrap(cq->q.prod, cq->color);
+ }
+
+ /* lastly, flush send and recv queues */
+ if (likely(!cq->flush))
+ goto out;
+
+ cq->flush = false;
+
+ list_for_each_entry_safe(qp, qp_next, &cq->flush_sq, cq_flush_sq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->sq_lock);
+ rc = ionic_flush_send_many(qp, wc + npolled, nwc - npolled);
+ spin_unlock(&qp->sq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_flush_sq);
+ else
+ cq->flush = true;
+ }
+
+ list_for_each_entry_safe(qp, qp_next, &cq->flush_rq, cq_flush_rq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->rq_lock);
+ rc = ionic_flush_recv_many(qp, wc + npolled, nwc - npolled);
+ spin_unlock(&qp->rq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_flush_rq);
+ else
+ cq->flush = true;
+ }
+
+out:
+ /* in case credit was depleted (more work posted than cq depth) */
+ if (cq->credit <= 0)
+ ionic_reserve_sync_cq(dev, cq);
+
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ return npolled ?: rc;
+}
+
+int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int rc_tmp, rc = 0, npolled = 0;
+ int cq_i, cq_x, cq_ix;
+
+ cq_x = vcq->poll_idx;
+ vcq->poll_idx ^= dev->lif_cfg.udma_count - 1;
+
+ for (cq_i = 0; npolled < nwc && cq_i < dev->lif_cfg.udma_count; ++cq_i) {
+ cq_ix = cq_i ^ cq_x;
+
+ if (!(vcq->udma_mask & BIT(cq_ix)))
+ continue;
+
+ rc_tmp = ionic_poll_vcq_cq(dev, &vcq->cq[cq_ix],
+ nwc - npolled,
+ wc + npolled);
+
+ if (rc_tmp >= 0)
+ npolled += rc_tmp;
+ else if (!rc)
+ rc = rc_tmp;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_req_notify_vcq_cq(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ u64 dbell_val = cq->q.dbell;
+
+ if (flags & IB_CQ_SOLICITED) {
+ cq->arm_sol_prod = ionic_queue_next(&cq->q, cq->arm_sol_prod);
+ dbell_val |= cq->arm_sol_prod | IONIC_CQ_RING_SOL;
+ } else {
+ cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod);
+ dbell_val |= cq->arm_any_prod | IONIC_CQ_RING_ARM;
+ }
+
+ ionic_reserve_sync_cq(dev, cq);
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype, dbell_val);
+
+ /*
+ * IB_CQ_REPORT_MISSED_EVENTS:
+ *
+ * The queue index in ring zero guarantees no missed events.
+ *
+ * Here, we check if the color bit in the next cqe is flipped. If it
+ * is flipped, then progress can be made by immediately polling the cq.
+ * Still, the cq will be armed, and an event will be generated. The cq
+ * may be empty when polled after the event, because the next poll
+ * after arming the cq can empty it.
+ */
+ return (flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ cq->color == ionic_v1_cqe_color(ionic_queue_at_prod(&cq->q));
+}
+
+int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int rc = 0, cq_i;
+
+ for (cq_i = 0; cq_i < dev->lif_cfg.udma_count; ++cq_i) {
+ if (!(vcq->udma_mask & BIT(cq_i)))
+ continue;
+
+ if (ionic_req_notify_vcq_cq(dev, &vcq->cq[cq_i], flags))
+ rc = 1;
+ }
+
+ return rc;
+}
+
+static s64 ionic_prep_inline(void *data, u32 max_data,
+ const struct ib_sge *ib_sgl, int num_sge)
+{
+ static const s64 bit_31 = 1u << 31;
+ s64 len = 0, sg_len;
+ int sg_i;
+
+ for (sg_i = 0; sg_i < num_sge; ++sg_i) {
+ sg_len = ib_sgl[sg_i].length;
+
+ /* sge length zero means 2GB */
+ if (unlikely(sg_len == 0))
+ sg_len = bit_31;
+
+ /* greater than max inline data is invalid */
+ if (unlikely(len + sg_len > max_data))
+ return -EINVAL;
+
+ memcpy(data + len, (void *)ib_sgl[sg_i].addr, sg_len);
+
+ len += sg_len;
+ }
+
+ return len;
+}
+
+static s64 ionic_prep_pld(struct ionic_v1_wqe *wqe,
+ union ionic_v1_pld *pld,
+ int spec, u32 max_sge,
+ const struct ib_sge *ib_sgl,
+ int num_sge)
+{
+ static const s64 bit_31 = 1l << 31;
+ struct ionic_sge *sgl;
+ __be32 *spec32 = NULL;
+ __be16 *spec16 = NULL;
+ s64 len = 0, sg_len;
+ int sg_i = 0;
+
+ if (unlikely(num_sge < 0 || (u32)num_sge > max_sge))
+ return -EINVAL;
+
+ if (spec && num_sge > IONIC_V1_SPEC_FIRST_SGE) {
+ sg_i = IONIC_V1_SPEC_FIRST_SGE;
+
+ if (num_sge > 8) {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC16);
+ spec16 = pld->spec16;
+ } else {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC32);
+ spec32 = pld->spec32;
+ }
+ }
+
+ sgl = &pld->sgl[sg_i];
+
+ for (sg_i = 0; sg_i < num_sge; ++sg_i) {
+ sg_len = ib_sgl[sg_i].length;
+
+ /* sge length zero means 2GB */
+ if (unlikely(sg_len == 0))
+ sg_len = bit_31;
+
+ /* greater than 2GB data is invalid */
+ if (unlikely(len + sg_len > bit_31))
+ return -EINVAL;
+
+ sgl[sg_i].va = cpu_to_be64(ib_sgl[sg_i].addr);
+ sgl[sg_i].len = cpu_to_be32(sg_len);
+ sgl[sg_i].lkey = cpu_to_be32(ib_sgl[sg_i].lkey);
+
+ if (spec32) {
+ spec32[sg_i] = sgl[sg_i].len;
+ } else if (spec16) {
+ if (unlikely(sg_len > U16_MAX))
+ return -EINVAL;
+ spec16[sg_i] = cpu_to_be16(sg_len);
+ }
+
+ len += sg_len;
+ }
+
+ return len;
+}
+
+static void ionic_prep_base(struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ struct ionic_sq_meta *meta,
+ struct ionic_v1_wqe *wqe)
+{
+ meta->wrid = wr->wr_id;
+ meta->ibsts = IB_WC_SUCCESS;
+ meta->signal = false;
+ meta->local_comp = false;
+
+ wqe->base.wqe_id = qp->sq.prod;
+
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_FENCE);
+
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SOL);
+
+ if (qp->sig_all || wr->send_flags & IB_SEND_SIGNALED) {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SIG);
+ meta->signal = true;
+ }
+
+ meta->seq = qp->sq_msn_prod;
+ meta->remote =
+ qp->ibqp.qp_type != IB_QPT_UD &&
+ qp->ibqp.qp_type != IB_QPT_GSI &&
+ !ionic_ibop_is_local(wr->opcode);
+
+ if (meta->remote) {
+ qp->sq_msn_idx[meta->seq] = qp->sq.prod;
+ qp->sq_msn_prod = ionic_queue_next(&qp->sq, qp->sq_msn_prod);
+ }
+
+ ionic_queue_produce(&qp->sq);
+}
+
+static int ionic_prep_common(struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ struct ionic_sq_meta *meta,
+ struct ionic_v1_wqe *wqe)
+{
+ s64 signed_len;
+ u32 mval;
+
+ if (wr->send_flags & IB_SEND_INLINE) {
+ wqe->base.num_sge_key = 0;
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_INL);
+ mval = ionic_v1_send_wqe_max_data(qp->sq.stride_log2, false);
+ signed_len = ionic_prep_inline(wqe->common.pld.data, mval,
+ wr->sg_list, wr->num_sge);
+ } else {
+ wqe->base.num_sge_key = wr->num_sge;
+ mval = ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ false);
+ signed_len = ionic_prep_pld(wqe, &wqe->common.pld,
+ qp->sq_spec, mval,
+ wr->sg_list, wr->num_sge);
+ }
+
+ if (unlikely(signed_len < 0))
+ return signed_len;
+
+ meta->len = signed_len;
+ wqe->common.length = cpu_to_be32(signed_len);
+
+ ionic_prep_base(qp, wr, meta, wqe);
+
+ return 0;
+}
+
+static void ionic_prep_sq_wqe(struct ionic_qp *qp, void *wqe)
+{
+ memset(wqe, 0, 1u << qp->sq.stride_log2);
+}
+
+static void ionic_prep_rq_wqe(struct ionic_qp *qp, void *wqe)
+{
+ memset(wqe, 0, 1u << qp->rq.stride_log2);
+}
+
+static int ionic_prep_send(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_SEND;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM);
+ wqe->base.imm_data_key = wr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_INV);
+ wqe->base.imm_data_key =
+ cpu_to_be32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ionic_prep_common(qp, wr, meta, wqe);
+}
+
+static int ionic_prep_send_ud(struct ionic_qp *qp,
+ const struct ib_ud_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ struct ionic_ah *ah;
+
+ if (unlikely(!wr->ah))
+ return -EINVAL;
+
+ ah = to_ionic_ah(wr->ah);
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ wqe->common.send.ah_id = cpu_to_be32(ah->ahid);
+ wqe->common.send.dest_qpn = cpu_to_be32(wr->remote_qpn);
+ wqe->common.send.dest_qkey = cpu_to_be32(wr->remote_qkey);
+
+ meta->ibop = IB_WC_SEND;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_SEND:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM);
+ wqe->base.imm_data_key = wr->wr.ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_rdma(struct ionic_qp *qp,
+ const struct ib_rdma_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_RDMA_WRITE;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_RDMA_READ:
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+ meta->ibop = IB_WC_RDMA_READ;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_READ);
+ break;
+ case IB_WR_RDMA_WRITE:
+ if (wr->wr.send_flags & IB_SEND_SOLICITED)
+ return -EINVAL;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE);
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE_IMM);
+ wqe->base.imm_data_key = wr->wr.ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ wqe->common.rdma.remote_va_high = cpu_to_be32(wr->remote_addr >> 32);
+ wqe->common.rdma.remote_va_low = cpu_to_be32(wr->remote_addr);
+ wqe->common.rdma.remote_rkey = cpu_to_be32(wr->rkey);
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_atomic(struct ionic_qp *qp,
+ const struct ib_atomic_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (wr->wr.num_sge != 1 || wr->wr.sg_list[0].length != 8)
+ return -EINVAL;
+
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_RDMA_WRITE;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ meta->ibop = IB_WC_COMP_SWAP;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_CS);
+ wqe->atomic.swap_add_high = cpu_to_be32(wr->swap >> 32);
+ wqe->atomic.swap_add_low = cpu_to_be32(wr->swap);
+ wqe->atomic.compare_high = cpu_to_be32(wr->compare_add >> 32);
+ wqe->atomic.compare_low = cpu_to_be32(wr->compare_add);
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ meta->ibop = IB_WC_FETCH_ADD;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_FA);
+ wqe->atomic.swap_add_high = cpu_to_be32(wr->compare_add >> 32);
+ wqe->atomic.swap_add_low = cpu_to_be32(wr->compare_add);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ wqe->atomic.remote_va_high = cpu_to_be32(wr->remote_addr >> 32);
+ wqe->atomic.remote_va_low = cpu_to_be32(wr->remote_addr);
+ wqe->atomic.remote_rkey = cpu_to_be32(wr->rkey);
+
+ wqe->base.num_sge_key = 1;
+ wqe->atomic.sge.va = cpu_to_be64(wr->wr.sg_list[0].addr);
+ wqe->atomic.sge.len = cpu_to_be32(8);
+ wqe->atomic.sge.lkey = cpu_to_be32(wr->wr.sg_list[0].lkey);
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_inv(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (wr->send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, LOCAL_INV);
+ wqe->base.imm_data_key = cpu_to_be32(wr->ex.invalidate_rkey);
+
+ meta->len = 0;
+ meta->ibop = IB_WC_LOCAL_INV;
+
+ ionic_prep_base(qp, wr, meta, wqe);
+
+ return 0;
+}
+
+static int ionic_prep_reg(struct ionic_qp *qp,
+ const struct ib_reg_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_mr *mr = to_ionic_mr(wr->mr);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ __le64 dma_addr;
+ int flags;
+
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ /* must call ib_map_mr_sg before posting reg wr */
+ if (!mr->buf.tbl_pages)
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ flags = to_ionic_mr_flags(wr->access);
+
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, REG_MR);
+ wqe->base.num_sge_key = wr->key;
+ wqe->base.imm_data_key = cpu_to_be32(mr->ibmr.lkey);
+ wqe->reg_mr.va = cpu_to_be64(mr->ibmr.iova);
+ wqe->reg_mr.length = cpu_to_be64(mr->ibmr.length);
+ wqe->reg_mr.offset = ionic_pgtbl_off(&mr->buf, mr->ibmr.iova);
+ dma_addr = ionic_pgtbl_dma(&mr->buf, mr->ibmr.iova);
+ wqe->reg_mr.dma_addr = cpu_to_be64(le64_to_cpu(dma_addr));
+
+ wqe->reg_mr.map_count = cpu_to_be32(mr->buf.tbl_pages);
+ wqe->reg_mr.flags = cpu_to_be16(flags);
+ wqe->reg_mr.dir_size_log2 = 0;
+ wqe->reg_mr.page_size_log2 = order_base_2(mr->ibmr.page_size);
+
+ meta->len = 0;
+ meta->ibop = IB_WC_REG_MR;
+
+ ionic_prep_base(qp, &wr->wr, meta, wqe);
+
+ return 0;
+}
+
+static int ionic_prep_one_rc(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ int rc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ rc = ionic_prep_send(qp, wr);
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc = ionic_prep_rdma(qp, rdma_wr(wr));
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc = ionic_prep_atomic(qp, atomic_wr(wr));
+ break;
+ case IB_WR_LOCAL_INV:
+ rc = ionic_prep_inv(qp, wr);
+ break;
+ case IB_WR_REG_MR:
+ rc = ionic_prep_reg(qp, reg_wr(wr));
+ break;
+ default:
+ ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static int ionic_prep_one_ud(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ int rc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ rc = ionic_prep_send_ud(qp, ud_wr(wr));
+ break;
+ default:
+ ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static int ionic_prep_recv(struct ionic_qp *qp,
+ const struct ib_recv_wr *wr)
+{
+ struct ionic_rq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ s64 signed_len;
+ u32 mval;
+
+ wqe = ionic_queue_at_prod(&qp->rq);
+
+ /* if wqe is owned by device, caller can try posting again soon */
+ if (wqe->base.flags & cpu_to_be16(IONIC_V1_FLAG_FENCE))
+ return -EAGAIN;
+
+ meta = qp->rq_meta_head;
+ if (unlikely(meta == IONIC_META_LAST) ||
+ unlikely(meta == IONIC_META_POSTED))
+ return -EIO;
+
+ ionic_prep_rq_wqe(qp, wqe);
+
+ mval = ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2, qp->rq_spec,
+ false);
+ signed_len = ionic_prep_pld(wqe, &wqe->recv.pld,
+ qp->rq_spec, mval,
+ wr->sg_list, wr->num_sge);
+ if (signed_len < 0)
+ return signed_len;
+
+ meta->wrid = wr->wr_id;
+
+ wqe->base.wqe_id = meta - qp->rq_meta;
+ wqe->base.num_sge_key = wr->num_sge;
+
+ /* total length for recv goes in base imm_data_key */
+ wqe->base.imm_data_key = cpu_to_be32(signed_len);
+
+ ionic_queue_produce(&qp->rq);
+
+ qp->rq_meta_head = meta->next;
+ meta->next = IONIC_META_POSTED;
+
+ return 0;
+}
+
+static int ionic_post_send_common(struct ionic_ibdev *dev,
+ struct ionic_vcq *vcq,
+ struct ionic_cq *cq,
+ struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad)
+{
+ unsigned long irqflags;
+ bool notify = false;
+ int spend, rc = 0;
+
+ if (!bad)
+ return -EINVAL;
+
+ if (!qp->has_sq) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ if (qp->state < IB_QPS_RTS) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->sq_lock, irqflags);
+
+ while (wr) {
+ if (ionic_queue_full(&qp->sq)) {
+ ibdev_dbg(&dev->ibdev, "queue full");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ rc = ionic_prep_one_ud(qp, wr);
+ else
+ rc = ionic_prep_one_rc(qp, wr);
+ if (rc)
+ goto out;
+
+ wr = wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&qp->sq_lock, irqflags);
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->sq_lock);
+
+ if (likely(qp->sq.prod != qp->sq_old_prod)) {
+ /* ring cq doorbell just in time */
+ spend = (qp->sq.prod - qp->sq_old_prod) & qp->sq.mask;
+ ionic_reserve_cq(dev, cq, spend);
+
+ qp->sq_old_prod = qp->sq.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.sq_qtype,
+ ionic_queue_dbell_val(&qp->sq));
+ }
+
+ if (qp->sq_flush) {
+ notify = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+
+ spin_unlock(&qp->sq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ if (notify && vcq->ibcq.comp_handler)
+ vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+
+ *bad = wr;
+ return rc;
+}
+
+static int ionic_post_recv_common(struct ionic_ibdev *dev,
+ struct ionic_vcq *vcq,
+ struct ionic_cq *cq,
+ struct ionic_qp *qp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad)
+{
+ unsigned long irqflags;
+ bool notify = false;
+ int spend, rc = 0;
+
+ if (!bad)
+ return -EINVAL;
+
+ if (!qp->has_rq) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ if (qp->state < IB_QPS_INIT) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->rq_lock, irqflags);
+
+ while (wr) {
+ if (ionic_queue_full(&qp->rq)) {
+ ibdev_dbg(&dev->ibdev, "queue full");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = ionic_prep_recv(qp, wr);
+ if (rc)
+ goto out;
+
+ wr = wr->next;
+ }
+
+out:
+ if (!cq) {
+ spin_unlock_irqrestore(&qp->rq_lock, irqflags);
+ goto out_unlocked;
+ }
+ spin_unlock_irqrestore(&qp->rq_lock, irqflags);
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->rq_lock);
+
+ if (likely(qp->rq.prod != qp->rq_old_prod)) {
+ /* ring cq doorbell just in time */
+ spend = (qp->rq.prod - qp->rq_old_prod) & qp->rq.mask;
+ ionic_reserve_cq(dev, cq, spend);
+
+ qp->rq_old_prod = qp->rq.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.rq_qtype,
+ ionic_queue_dbell_val(&qp->rq));
+ }
+
+ if (qp->rq_flush) {
+ notify = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+ }
+
+ spin_unlock(&qp->rq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ if (notify && vcq->ibcq.comp_handler)
+ vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+
+out_unlocked:
+ *bad = wr;
+ return rc;
+}
+
+int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibqp->send_cq);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_cq *cq =
+ to_ionic_vcq_cq(ibqp->send_cq, qp->udma_idx);
+
+ return ionic_post_send_common(dev, vcq, cq, qp, wr, bad);
+}
+
+int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibqp->recv_cq);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_cq *cq =
+ to_ionic_vcq_cq(ibqp->recv_cq, qp->udma_idx);
+
+ return ionic_post_recv_common(dev, vcq, cq, qp, wr, bad);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_fw.h b/drivers/infiniband/hw/ionic/ionic_fw.h
new file mode 100644
index 000000000000..adfbb89d856c
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_fw.h
@@ -0,0 +1,1029 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_FW_H_
+#define _IONIC_FW_H_
+
+#include <linux/kernel.h>
+#include <rdma/ib_verbs.h>
+
+/* common for ib spec */
+
+#define IONIC_EXP_DBELL_SZ 8
+
+enum ionic_mrid_bits {
+ IONIC_MRID_INDEX_SHIFT = 8,
+};
+
+static inline u32 ionic_mrid(u32 index, u8 key)
+{
+ return (index << IONIC_MRID_INDEX_SHIFT) | key;
+}
+
+static inline u32 ionic_mrid_index(u32 lrkey)
+{
+ return lrkey >> IONIC_MRID_INDEX_SHIFT;
+}
+
+/* common to all versions */
+
+/* wqe scatter gather element */
+struct ionic_sge {
+ __be64 va;
+ __be32 len;
+ __be32 lkey;
+};
+
+/* admin queue mr type */
+enum ionic_mr_flags {
+ /* bits that determine mr access */
+ IONIC_MRF_LOCAL_WRITE = BIT(0),
+ IONIC_MRF_REMOTE_WRITE = BIT(1),
+ IONIC_MRF_REMOTE_READ = BIT(2),
+ IONIC_MRF_REMOTE_ATOMIC = BIT(3),
+ IONIC_MRF_MW_BIND = BIT(4),
+ IONIC_MRF_ZERO_BASED = BIT(5),
+ IONIC_MRF_ON_DEMAND = BIT(6),
+ IONIC_MRF_PB = BIT(7),
+ IONIC_MRF_ACCESS_MASK = BIT(12) - 1,
+
+ /* bits that determine mr type */
+ IONIC_MRF_UKEY_EN = BIT(13),
+ IONIC_MRF_IS_MW = BIT(14),
+ IONIC_MRF_INV_EN = BIT(15),
+
+ /* base flags combinations for mr types */
+ IONIC_MRF_USER_MR = 0,
+ IONIC_MRF_PHYS_MR = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_INV_EN),
+ IONIC_MRF_MW_1 = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_IS_MW),
+ IONIC_MRF_MW_2 = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_IS_MW |
+ IONIC_MRF_INV_EN),
+};
+
+static inline int to_ionic_mr_flags(int access)
+{
+ int flags = 0;
+
+ if (access & IB_ACCESS_LOCAL_WRITE)
+ flags |= IONIC_MRF_LOCAL_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_READ)
+ flags |= IONIC_MRF_REMOTE_READ;
+
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ flags |= IONIC_MRF_REMOTE_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= IONIC_MRF_REMOTE_ATOMIC;
+
+ if (access & IB_ACCESS_MW_BIND)
+ flags |= IONIC_MRF_MW_BIND;
+
+ if (access & IB_ZERO_BASED)
+ flags |= IONIC_MRF_ZERO_BASED;
+
+ return flags;
+}
+
+enum ionic_qp_flags {
+ /* bits that determine qp access */
+ IONIC_QPF_REMOTE_WRITE = BIT(0),
+ IONIC_QPF_REMOTE_READ = BIT(1),
+ IONIC_QPF_REMOTE_ATOMIC = BIT(2),
+
+ /* bits that determine other qp behavior */
+ IONIC_QPF_SQ_PB = BIT(6),
+ IONIC_QPF_RQ_PB = BIT(7),
+ IONIC_QPF_SQ_SPEC = BIT(8),
+ IONIC_QPF_RQ_SPEC = BIT(9),
+ IONIC_QPF_REMOTE_PRIVILEGED = BIT(10),
+ IONIC_QPF_SQ_DRAINING = BIT(11),
+ IONIC_QPF_SQD_NOTIFY = BIT(12),
+ IONIC_QPF_SQ_CMB = BIT(13),
+ IONIC_QPF_RQ_CMB = BIT(14),
+ IONIC_QPF_PRIVILEGED = BIT(15),
+};
+
+static inline int from_ionic_qp_flags(int flags)
+{
+ int access_flags = 0;
+
+ if (flags & IONIC_QPF_REMOTE_WRITE)
+ access_flags |= IB_ACCESS_REMOTE_WRITE;
+
+ if (flags & IONIC_QPF_REMOTE_READ)
+ access_flags |= IB_ACCESS_REMOTE_READ;
+
+ if (flags & IONIC_QPF_REMOTE_ATOMIC)
+ access_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return access_flags;
+}
+
+static inline int to_ionic_qp_flags(int access, bool sqd_notify,
+ bool sq_is_cmb, bool rq_is_cmb,
+ bool sq_spec, bool rq_spec,
+ bool privileged, bool remote_privileged)
+{
+ int flags = 0;
+
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ flags |= IONIC_QPF_REMOTE_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_READ)
+ flags |= IONIC_QPF_REMOTE_READ;
+
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= IONIC_QPF_REMOTE_ATOMIC;
+
+ if (sqd_notify)
+ flags |= IONIC_QPF_SQD_NOTIFY;
+
+ if (sq_is_cmb)
+ flags |= IONIC_QPF_SQ_CMB;
+
+ if (rq_is_cmb)
+ flags |= IONIC_QPF_RQ_CMB;
+
+ if (sq_spec)
+ flags |= IONIC_QPF_SQ_SPEC;
+
+ if (rq_spec)
+ flags |= IONIC_QPF_RQ_SPEC;
+
+ if (privileged)
+ flags |= IONIC_QPF_PRIVILEGED;
+
+ if (remote_privileged)
+ flags |= IONIC_QPF_REMOTE_PRIVILEGED;
+
+ return flags;
+}
+
+/* cqe non-admin status indicated in status_length field when err bit is set */
+enum ionic_status {
+ IONIC_STS_OK,
+ IONIC_STS_LOCAL_LEN_ERR,
+ IONIC_STS_LOCAL_QP_OPER_ERR,
+ IONIC_STS_LOCAL_PROT_ERR,
+ IONIC_STS_WQE_FLUSHED_ERR,
+ IONIC_STS_MEM_MGMT_OPER_ERR,
+ IONIC_STS_BAD_RESP_ERR,
+ IONIC_STS_LOCAL_ACC_ERR,
+ IONIC_STS_REMOTE_INV_REQ_ERR,
+ IONIC_STS_REMOTE_ACC_ERR,
+ IONIC_STS_REMOTE_OPER_ERR,
+ IONIC_STS_RETRY_EXCEEDED,
+ IONIC_STS_RNR_RETRY_EXCEEDED,
+ IONIC_STS_XRC_VIO_ERR,
+ IONIC_STS_LOCAL_SGL_INV_ERR,
+};
+
+static inline int ionic_to_ib_status(int sts)
+{
+ switch (sts) {
+ case IONIC_STS_OK:
+ return IB_WC_SUCCESS;
+ case IONIC_STS_LOCAL_LEN_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case IONIC_STS_LOCAL_QP_OPER_ERR:
+ case IONIC_STS_LOCAL_SGL_INV_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case IONIC_STS_LOCAL_PROT_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case IONIC_STS_WQE_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case IONIC_STS_MEM_MGMT_OPER_ERR:
+ return IB_WC_MW_BIND_ERR;
+ case IONIC_STS_BAD_RESP_ERR:
+ return IB_WC_BAD_RESP_ERR;
+ case IONIC_STS_LOCAL_ACC_ERR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case IONIC_STS_REMOTE_INV_REQ_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case IONIC_STS_REMOTE_ACC_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case IONIC_STS_REMOTE_OPER_ERR:
+ return IB_WC_REM_OP_ERR;
+ case IONIC_STS_RETRY_EXCEEDED:
+ return IB_WC_RETRY_EXC_ERR;
+ case IONIC_STS_RNR_RETRY_EXCEEDED:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case IONIC_STS_XRC_VIO_ERR:
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+/* admin queue qp type */
+enum ionic_qp_type {
+ IONIC_QPT_RC,
+ IONIC_QPT_UC,
+ IONIC_QPT_RD,
+ IONIC_QPT_UD,
+ IONIC_QPT_SRQ,
+ IONIC_QPT_XRC_INI,
+ IONIC_QPT_XRC_TGT,
+ IONIC_QPT_XRC_SRQ,
+};
+
+static inline int to_ionic_qp_type(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ return IONIC_QPT_UD;
+ case IB_QPT_RC:
+ return IONIC_QPT_RC;
+ case IB_QPT_UC:
+ return IONIC_QPT_UC;
+ case IB_QPT_XRC_INI:
+ return IONIC_QPT_XRC_INI;
+ case IB_QPT_XRC_TGT:
+ return IONIC_QPT_XRC_TGT;
+ default:
+ return -EINVAL;
+ }
+}
+
+/* admin queue qp state */
+enum ionic_qp_state {
+ IONIC_QPS_RESET,
+ IONIC_QPS_INIT,
+ IONIC_QPS_RTR,
+ IONIC_QPS_RTS,
+ IONIC_QPS_SQD,
+ IONIC_QPS_SQE,
+ IONIC_QPS_ERR,
+};
+
+static inline int from_ionic_qp_state(enum ionic_qp_state state)
+{
+ switch (state) {
+ case IONIC_QPS_RESET:
+ return IB_QPS_RESET;
+ case IONIC_QPS_INIT:
+ return IB_QPS_INIT;
+ case IONIC_QPS_RTR:
+ return IB_QPS_RTR;
+ case IONIC_QPS_RTS:
+ return IB_QPS_RTS;
+ case IONIC_QPS_SQD:
+ return IB_QPS_SQD;
+ case IONIC_QPS_SQE:
+ return IB_QPS_SQE;
+ case IONIC_QPS_ERR:
+ return IB_QPS_ERR;
+ default:
+ return -EINVAL;
+ }
+}
+
+static inline int to_ionic_qp_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return IONIC_QPS_RESET;
+ case IB_QPS_INIT:
+ return IONIC_QPS_INIT;
+ case IB_QPS_RTR:
+ return IONIC_QPS_RTR;
+ case IB_QPS_RTS:
+ return IONIC_QPS_RTS;
+ case IB_QPS_SQD:
+ return IONIC_QPS_SQD;
+ case IB_QPS_SQE:
+ return IONIC_QPS_SQE;
+ case IB_QPS_ERR:
+ return IONIC_QPS_ERR;
+ default:
+ return 0;
+ }
+}
+
+static inline int to_ionic_qp_modify_state(enum ib_qp_state to_state,
+ enum ib_qp_state from_state)
+{
+ return to_ionic_qp_state(to_state) |
+ (to_ionic_qp_state(from_state) << 4);
+}
+
+/* fw abi v1 */
+
+/* data payload part of v1 wqe */
+union ionic_v1_pld {
+ struct ionic_sge sgl[2];
+ __be32 spec32[8];
+ __be16 spec16[16];
+ __u8 data[32];
+};
+
+/* completion queue v1 cqe */
+struct ionic_v1_cqe {
+ union {
+ struct {
+ __be16 cmd_idx;
+ __u8 cmd_op;
+ __u8 rsvd[17];
+ __le16 old_sq_cindex;
+ __le16 old_rq_cq_cindex;
+ } admin;
+ struct {
+ __u64 wqe_id;
+ __be32 src_qpn_op;
+ __u8 src_mac[6];
+ __be16 vlan_tag;
+ __be32 imm_data_rkey;
+ } recv;
+ struct {
+ __u8 rsvd[4];
+ __be32 msg_msn;
+ __u8 rsvd2[8];
+ __u64 npg_wqe_id;
+ } send;
+ };
+ __be32 status_length;
+ __be32 qid_type_flags;
+};
+
+/* bits for cqe recv */
+enum ionic_v1_cqe_src_qpn_bits {
+ IONIC_V1_CQE_RECV_QPN_MASK = 0xffffff,
+ IONIC_V1_CQE_RECV_OP_SHIFT = 24,
+
+ /* MASK could be 0x3, but need 0x1f for makeshift values:
+ * OP_TYPE_RDMA_OPER_WITH_IMM, OP_TYPE_SEND_RCVD
+ */
+ IONIC_V1_CQE_RECV_OP_MASK = 0x1f,
+ IONIC_V1_CQE_RECV_OP_SEND = 0,
+ IONIC_V1_CQE_RECV_OP_SEND_INV = 1,
+ IONIC_V1_CQE_RECV_OP_SEND_IMM = 2,
+ IONIC_V1_CQE_RECV_OP_RDMA_IMM = 3,
+
+ IONIC_V1_CQE_RECV_IS_IPV4 = BIT(7 + IONIC_V1_CQE_RECV_OP_SHIFT),
+ IONIC_V1_CQE_RECV_IS_VLAN = BIT(6 + IONIC_V1_CQE_RECV_OP_SHIFT),
+};
+
+/* bits for cqe qid_type_flags */
+enum ionic_v1_cqe_qtf_bits {
+ IONIC_V1_CQE_COLOR = BIT(0),
+ IONIC_V1_CQE_ERROR = BIT(1),
+ IONIC_V1_CQE_TYPE_SHIFT = 5,
+ IONIC_V1_CQE_TYPE_MASK = 0x7,
+ IONIC_V1_CQE_QID_SHIFT = 8,
+
+ IONIC_V1_CQE_TYPE_ADMIN = 0,
+ IONIC_V1_CQE_TYPE_RECV = 1,
+ IONIC_V1_CQE_TYPE_SEND_MSN = 2,
+ IONIC_V1_CQE_TYPE_SEND_NPG = 3,
+};
+
+static inline bool ionic_v1_cqe_color(struct ionic_v1_cqe *cqe)
+{
+ return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_COLOR);
+}
+
+static inline bool ionic_v1_cqe_error(struct ionic_v1_cqe *cqe)
+{
+ return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_ERROR);
+}
+
+static inline bool ionic_v1_cqe_recv_is_ipv4(struct ionic_v1_cqe *cqe)
+{
+ return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_IPV4);
+}
+
+static inline bool ionic_v1_cqe_recv_is_vlan(struct ionic_v1_cqe *cqe)
+{
+ return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_VLAN);
+}
+
+static inline void ionic_v1_cqe_clean(struct ionic_v1_cqe *cqe)
+{
+ cqe->qid_type_flags |= cpu_to_be32(~0u << IONIC_V1_CQE_QID_SHIFT);
+}
+
+static inline u32 ionic_v1_cqe_qtf(struct ionic_v1_cqe *cqe)
+{
+ return be32_to_cpu(cqe->qid_type_flags);
+}
+
+static inline u8 ionic_v1_cqe_qtf_type(u32 qtf)
+{
+ return (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
+}
+
+static inline u32 ionic_v1_cqe_qtf_qid(u32 qtf)
+{
+ return qtf >> IONIC_V1_CQE_QID_SHIFT;
+}
+
+/* v1 base wqe header */
+struct ionic_v1_base_hdr {
+ __u64 wqe_id;
+ __u8 op;
+ __u8 num_sge_key;
+ __be16 flags;
+ __be32 imm_data_key;
+};
+
+/* v1 receive wqe body */
+struct ionic_v1_recv_bdy {
+ __u8 rsvd[16];
+ union ionic_v1_pld pld;
+};
+
+/* v1 send/rdma wqe body (common, has sgl) */
+struct ionic_v1_common_bdy {
+ union {
+ struct {
+ __be32 ah_id;
+ __be32 dest_qpn;
+ __be32 dest_qkey;
+ } send;
+ struct {
+ __be32 remote_va_high;
+ __be32 remote_va_low;
+ __be32 remote_rkey;
+ } rdma;
+ };
+ __be32 length;
+ union ionic_v1_pld pld;
+};
+
+/* v1 atomic wqe body */
+struct ionic_v1_atomic_bdy {
+ __be32 remote_va_high;
+ __be32 remote_va_low;
+ __be32 remote_rkey;
+ __be32 swap_add_high;
+ __be32 swap_add_low;
+ __be32 compare_high;
+ __be32 compare_low;
+ __u8 rsvd[4];
+ struct ionic_sge sge;
+};
+
+/* v1 reg mr wqe body */
+struct ionic_v1_reg_mr_bdy {
+ __be64 va;
+ __be64 length;
+ __be64 offset;
+ __be64 dma_addr;
+ __be32 map_count;
+ __be16 flags;
+ __u8 dir_size_log2;
+ __u8 page_size_log2;
+ __u8 rsvd[8];
+};
+
+/* v1 bind mw wqe body */
+struct ionic_v1_bind_mw_bdy {
+ __be64 va;
+ __be64 length;
+ __be32 lkey;
+ __be16 flags;
+ __u8 rsvd[26];
+};
+
+/* v1 send/recv wqe */
+struct ionic_v1_wqe {
+ struct ionic_v1_base_hdr base;
+ union {
+ struct ionic_v1_recv_bdy recv;
+ struct ionic_v1_common_bdy common;
+ struct ionic_v1_atomic_bdy atomic;
+ struct ionic_v1_reg_mr_bdy reg_mr;
+ struct ionic_v1_bind_mw_bdy bind_mw;
+ };
+};
+
+/* queue pair v1 send opcodes */
+enum ionic_v1_op {
+ IONIC_V1_OP_SEND,
+ IONIC_V1_OP_SEND_INV,
+ IONIC_V1_OP_SEND_IMM,
+ IONIC_V1_OP_RDMA_READ,
+ IONIC_V1_OP_RDMA_WRITE,
+ IONIC_V1_OP_RDMA_WRITE_IMM,
+ IONIC_V1_OP_ATOMIC_CS,
+ IONIC_V1_OP_ATOMIC_FA,
+ IONIC_V1_OP_REG_MR,
+ IONIC_V1_OP_LOCAL_INV,
+ IONIC_V1_OP_BIND_MW,
+
+ /* flags */
+ IONIC_V1_FLAG_FENCE = BIT(0),
+ IONIC_V1_FLAG_SOL = BIT(1),
+ IONIC_V1_FLAG_INL = BIT(2),
+ IONIC_V1_FLAG_SIG = BIT(3),
+
+ /* flags last four bits for sgl spec format */
+ IONIC_V1_FLAG_SPEC32 = (1u << 12),
+ IONIC_V1_FLAG_SPEC16 = (2u << 12),
+ IONIC_V1_SPEC_FIRST_SGE = 2,
+};
+
+/* queue pair v2 send opcodes */
+enum ionic_v2_op {
+ IONIC_V2_OPSL_OUT = 0x20,
+ IONIC_V2_OPSL_IMM = 0x40,
+ IONIC_V2_OPSL_INV = 0x80,
+
+ IONIC_V2_OP_SEND = 0x0 | IONIC_V2_OPSL_OUT,
+ IONIC_V2_OP_SEND_IMM = IONIC_V2_OP_SEND | IONIC_V2_OPSL_IMM,
+ IONIC_V2_OP_SEND_INV = IONIC_V2_OP_SEND | IONIC_V2_OPSL_INV,
+
+ IONIC_V2_OP_RDMA_WRITE = 0x1 | IONIC_V2_OPSL_OUT,
+ IONIC_V2_OP_RDMA_WRITE_IMM = IONIC_V2_OP_RDMA_WRITE | IONIC_V2_OPSL_IMM,
+
+ IONIC_V2_OP_RDMA_READ = 0x2,
+
+ IONIC_V2_OP_ATOMIC_CS = 0x4,
+ IONIC_V2_OP_ATOMIC_FA = 0x5,
+ IONIC_V2_OP_REG_MR = 0x6,
+ IONIC_V2_OP_LOCAL_INV = 0x7,
+ IONIC_V2_OP_BIND_MW = 0x8,
+};
+
+static inline size_t ionic_v1_send_wqe_min_size(int min_sge, int min_data,
+ int spec, bool expdb)
+{
+ size_t sz_wqe, sz_sgl, sz_data;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ min_sge += IONIC_V1_SPEC_FIRST_SGE;
+
+ if (expdb) {
+ min_sge += 1;
+ min_data += IONIC_EXP_DBELL_SZ;
+ }
+
+ sz_wqe = sizeof(struct ionic_v1_wqe);
+ sz_sgl = offsetof(struct ionic_v1_wqe, common.pld.sgl[min_sge]);
+ sz_data = offsetof(struct ionic_v1_wqe, common.pld.data[min_data]);
+
+ if (sz_sgl > sz_wqe)
+ sz_wqe = sz_sgl;
+
+ if (sz_data > sz_wqe)
+ sz_wqe = sz_data;
+
+ return sz_wqe;
+}
+
+static inline int ionic_v1_send_wqe_max_sge(u8 stride_log2, int spec,
+ bool expdb)
+{
+ struct ionic_sge *sge = (void *)(1ull << stride_log2);
+ struct ionic_v1_wqe *wqe = (void *)0;
+ int num_sge = 0;
+
+ if (expdb)
+ sge -= 1;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ num_sge = IONIC_V1_SPEC_FIRST_SGE;
+
+ num_sge = sge - &wqe->common.pld.sgl[num_sge];
+
+ if (spec && num_sge > spec)
+ num_sge = spec;
+
+ return num_sge;
+}
+
+static inline int ionic_v1_send_wqe_max_data(u8 stride_log2, bool expdb)
+{
+ struct ionic_v1_wqe *wqe = (void *)0;
+ __u8 *data = (void *)(1ull << stride_log2);
+
+ if (expdb)
+ data -= IONIC_EXP_DBELL_SZ;
+
+ return data - wqe->common.pld.data;
+}
+
+static inline size_t ionic_v1_recv_wqe_min_size(int min_sge, int spec,
+ bool expdb)
+{
+ size_t sz_wqe, sz_sgl;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ min_sge += IONIC_V1_SPEC_FIRST_SGE;
+
+ if (expdb)
+ min_sge += 1;
+
+ sz_wqe = sizeof(struct ionic_v1_wqe);
+ sz_sgl = offsetof(struct ionic_v1_wqe, recv.pld.sgl[min_sge]);
+
+ if (sz_sgl > sz_wqe)
+ sz_wqe = sz_sgl;
+
+ return sz_wqe;
+}
+
+static inline int ionic_v1_recv_wqe_max_sge(u8 stride_log2, int spec,
+ bool expdb)
+{
+ struct ionic_sge *sge = (void *)(1ull << stride_log2);
+ struct ionic_v1_wqe *wqe = (void *)0;
+ int num_sge = 0;
+
+ if (expdb)
+ sge -= 1;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ num_sge = IONIC_V1_SPEC_FIRST_SGE;
+
+ num_sge = sge - &wqe->recv.pld.sgl[num_sge];
+
+ if (spec && num_sge > spec)
+ num_sge = spec;
+
+ return num_sge;
+}
+
+static inline int ionic_v1_use_spec_sge(int min_sge, int spec)
+{
+ if (!spec || min_sge > spec)
+ return 0;
+
+ if (min_sge <= IONIC_V1_SPEC_FIRST_SGE)
+ return IONIC_V1_SPEC_FIRST_SGE;
+
+ return spec;
+}
+
+struct ionic_admin_stats_hdr {
+ __le64 dma_addr;
+ __le32 length;
+ __le32 id_ver;
+ __u8 type_state;
+} __packed;
+
+#define IONIC_ADMIN_STATS_HDRS_IN_V1_LEN 17
+static_assert(sizeof(struct ionic_admin_stats_hdr) ==
+ IONIC_ADMIN_STATS_HDRS_IN_V1_LEN);
+
+struct ionic_admin_create_ah {
+ __le64 dma_addr;
+ __le32 length;
+ __le32 pd_id;
+ __le32 id_ver;
+ __le16 dbid_flags;
+ __u8 csum_profile;
+ __u8 crypto;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_AH_IN_V1_LEN 24
+static_assert(sizeof(struct ionic_admin_create_ah) ==
+ IONIC_ADMIN_CREATE_AH_IN_V1_LEN);
+
+struct ionic_admin_destroy_ah {
+ __le32 ah_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_AH_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_ah) ==
+ IONIC_ADMIN_DESTROY_AH_IN_V1_LEN);
+
+struct ionic_admin_query_ah {
+ __le64 dma_addr;
+} __packed;
+
+#define IONIC_ADMIN_QUERY_AH_IN_V1_LEN 8
+static_assert(sizeof(struct ionic_admin_query_ah) ==
+ IONIC_ADMIN_QUERY_AH_IN_V1_LEN);
+
+struct ionic_admin_create_mr {
+ __le64 va;
+ __le64 length;
+ __le32 pd_id;
+ __le32 id_ver;
+ __le32 tbl_index;
+ __le32 map_count;
+ __le64 dma_addr;
+ __le16 dbid_flags;
+ __u8 pt_type;
+ __u8 dir_size_log2;
+ __u8 page_size_log2;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_MR_IN_V1_LEN 45
+static_assert(sizeof(struct ionic_admin_create_mr) ==
+ IONIC_ADMIN_CREATE_MR_IN_V1_LEN);
+
+struct ionic_admin_destroy_mr {
+ __le32 mr_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_MR_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_mr) ==
+ IONIC_ADMIN_DESTROY_MR_IN_V1_LEN);
+
+struct ionic_admin_create_cq {
+ __le32 eq_id;
+ __u8 depth_log2;
+ __u8 stride_log2;
+ __u8 dir_size_log2_rsvd;
+ __u8 page_size_log2;
+ __le32 cq_flags;
+ __le32 id_ver;
+ __le32 tbl_index;
+ __le32 map_count;
+ __le64 dma_addr;
+ __le16 dbid_flags;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_CQ_IN_V1_LEN 34
+static_assert(sizeof(struct ionic_admin_create_cq) ==
+ IONIC_ADMIN_CREATE_CQ_IN_V1_LEN);
+
+struct ionic_admin_destroy_cq {
+ __le32 cq_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_cq) ==
+ IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN);
+
+struct ionic_admin_create_qp {
+ __le32 pd_id;
+ __be32 priv_flags;
+ __le32 sq_cq_id;
+ __u8 sq_depth_log2;
+ __u8 sq_stride_log2;
+ __u8 sq_dir_size_log2_rsvd;
+ __u8 sq_page_size_log2;
+ __le32 sq_tbl_index_xrcd_id;
+ __le32 sq_map_count;
+ __le64 sq_dma_addr;
+ __le32 rq_cq_id;
+ __u8 rq_depth_log2;
+ __u8 rq_stride_log2;
+ __u8 rq_dir_size_log2_rsvd;
+ __u8 rq_page_size_log2;
+ __le32 rq_tbl_index_srq_id;
+ __le32 rq_map_count;
+ __le64 rq_dma_addr;
+ __le32 id_ver;
+ __le16 dbid_flags;
+ __u8 type_state;
+ __u8 rsvd;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_QP_IN_V1_LEN 64
+static_assert(sizeof(struct ionic_admin_create_qp) ==
+ IONIC_ADMIN_CREATE_QP_IN_V1_LEN);
+
+struct ionic_admin_destroy_qp {
+ __le32 qp_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_QP_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_qp) ==
+ IONIC_ADMIN_DESTROY_QP_IN_V1_LEN);
+
+struct ionic_admin_mod_qp {
+ __be32 attr_mask;
+ __u8 dcqcn_profile;
+ __u8 tfp_csum_profile;
+ __be16 access_flags;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ __le32 qkey_dest_qpn;
+ __le32 rate_limit_kbps;
+ __u8 pmtu;
+ __u8 retry;
+ __u8 rnr_timer;
+ __u8 retry_timeout;
+ __u8 rsq_depth;
+ __u8 rrq_depth;
+ __le16 pkey_id;
+ __le32 ah_id_len;
+ __u8 en_pcp;
+ __u8 ip_dscp;
+ __u8 rsvd2;
+ __u8 type_state;
+ union {
+ struct {
+ __le16 rsvd1;
+ };
+ __le32 rrq_index;
+ };
+ __le32 rsq_index;
+ __le64 dma_addr;
+ __le32 id_ver;
+} __packed;
+
+#define IONIC_ADMIN_MODIFY_QP_IN_V1_LEN 60
+static_assert(sizeof(struct ionic_admin_mod_qp) ==
+ IONIC_ADMIN_MODIFY_QP_IN_V1_LEN);
+
+struct ionic_admin_query_qp {
+ __le64 hdr_dma_addr;
+ __le64 sq_dma_addr;
+ __le64 rq_dma_addr;
+ __le32 ah_id;
+ __le32 id_ver;
+ __le16 dbid_flags;
+} __packed;
+
+#define IONIC_ADMIN_QUERY_QP_IN_V1_LEN 34
+static_assert(sizeof(struct ionic_admin_query_qp) ==
+ IONIC_ADMIN_QUERY_QP_IN_V1_LEN);
+
+#define ADMIN_WQE_STRIDE 64
+#define ADMIN_WQE_HDR_LEN 4
+
+/* admin queue v1 wqe */
+struct ionic_v1_admin_wqe {
+ __u8 op;
+ __u8 rsvd;
+ __le16 len;
+
+ union {
+ struct ionic_admin_stats_hdr stats;
+ struct ionic_admin_create_ah create_ah;
+ struct ionic_admin_destroy_ah destroy_ah;
+ struct ionic_admin_query_ah query_ah;
+ struct ionic_admin_create_mr create_mr;
+ struct ionic_admin_destroy_mr destroy_mr;
+ struct ionic_admin_create_cq create_cq;
+ struct ionic_admin_destroy_cq destroy_cq;
+ struct ionic_admin_create_qp create_qp;
+ struct ionic_admin_destroy_qp destroy_qp;
+ struct ionic_admin_mod_qp mod_qp;
+ struct ionic_admin_query_qp query_qp;
+ } cmd;
+};
+
+/* side data for query qp */
+struct ionic_v1_admin_query_qp_sq {
+ __u8 rnr_timer;
+ __u8 retry_timeout;
+ __be16 access_perms_flags;
+ __be16 rsvd;
+ __be16 pkey_id;
+ __be32 qkey_dest_qpn;
+ __be32 rate_limit_kbps;
+ __be32 rq_psn;
+};
+
+struct ionic_v1_admin_query_qp_rq {
+ __u8 state_pmtu;
+ __u8 retry_rnrtry;
+ __u8 rrq_depth;
+ __u8 rsq_depth;
+ __be32 sq_psn;
+ __be16 access_perms_flags;
+ __be16 rsvd;
+};
+
+/* admin queue v1 opcodes */
+enum ionic_v1_admin_op {
+ IONIC_V1_ADMIN_NOOP,
+ IONIC_V1_ADMIN_CREATE_CQ,
+ IONIC_V1_ADMIN_CREATE_QP,
+ IONIC_V1_ADMIN_CREATE_MR,
+ IONIC_V1_ADMIN_STATS_HDRS,
+ IONIC_V1_ADMIN_STATS_VALS,
+ IONIC_V1_ADMIN_DESTROY_MR,
+ IONIC_V1_ADMIN_RSVD_7, /* RESIZE_CQ */
+ IONIC_V1_ADMIN_DESTROY_CQ,
+ IONIC_V1_ADMIN_MODIFY_QP,
+ IONIC_V1_ADMIN_QUERY_QP,
+ IONIC_V1_ADMIN_DESTROY_QP,
+ IONIC_V1_ADMIN_DEBUG,
+ IONIC_V1_ADMIN_CREATE_AH,
+ IONIC_V1_ADMIN_QUERY_AH,
+ IONIC_V1_ADMIN_MODIFY_DCQCN,
+ IONIC_V1_ADMIN_DESTROY_AH,
+ IONIC_V1_ADMIN_QP_STATS_HDRS,
+ IONIC_V1_ADMIN_QP_STATS_VALS,
+ IONIC_V1_ADMIN_OPCODES_MAX,
+};
+
+/* admin queue v1 cqe status */
+enum ionic_v1_admin_status {
+ IONIC_V1_ASTS_OK,
+ IONIC_V1_ASTS_BAD_CMD,
+ IONIC_V1_ASTS_BAD_INDEX,
+ IONIC_V1_ASTS_BAD_STATE,
+ IONIC_V1_ASTS_BAD_TYPE,
+ IONIC_V1_ASTS_BAD_ATTR,
+ IONIC_V1_ASTS_MSG_TOO_BIG,
+};
+
+/* event queue v1 eqe */
+struct ionic_v1_eqe {
+ __be32 evt;
+};
+
+/* bits for cqe queue_type_flags */
+enum ionic_v1_eqe_evt_bits {
+ IONIC_V1_EQE_COLOR = BIT(0),
+ IONIC_V1_EQE_TYPE_SHIFT = 1,
+ IONIC_V1_EQE_TYPE_MASK = 0x7,
+ IONIC_V1_EQE_CODE_SHIFT = 4,
+ IONIC_V1_EQE_CODE_MASK = 0xf,
+ IONIC_V1_EQE_QID_SHIFT = 8,
+
+ /* cq events */
+ IONIC_V1_EQE_TYPE_CQ = 0,
+ /* cq normal events */
+ IONIC_V1_EQE_CQ_NOTIFY = 0,
+ /* cq error events */
+ IONIC_V1_EQE_CQ_ERR = 8,
+
+ /* qp and srq events */
+ IONIC_V1_EQE_TYPE_QP = 1,
+ /* qp normal events */
+ IONIC_V1_EQE_SRQ_LEVEL = 0,
+ IONIC_V1_EQE_SQ_DRAIN = 1,
+ IONIC_V1_EQE_QP_COMM_EST = 2,
+ IONIC_V1_EQE_QP_LAST_WQE = 3,
+ /* qp error events */
+ IONIC_V1_EQE_QP_ERR = 8,
+ IONIC_V1_EQE_QP_ERR_REQUEST = 9,
+ IONIC_V1_EQE_QP_ERR_ACCESS = 10,
+};
+
+enum ionic_tfp_csum_profiles {
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP = 0,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP = 1,
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP = 2,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP = 3,
+ IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 4,
+ IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 5,
+ IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 6,
+ IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 7,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_IPV4_UDP = 8,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_ESP_UDP = 9,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_UDP = 10,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_ESP_UDP = 11,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_CSUM = 12,
+};
+
+static inline bool ionic_v1_eqe_color(struct ionic_v1_eqe *eqe)
+{
+ return eqe->evt & cpu_to_be32(IONIC_V1_EQE_COLOR);
+}
+
+static inline u32 ionic_v1_eqe_evt(struct ionic_v1_eqe *eqe)
+{
+ return be32_to_cpu(eqe->evt);
+}
+
+static inline u8 ionic_v1_eqe_evt_type(u32 evt)
+{
+ return (evt >> IONIC_V1_EQE_TYPE_SHIFT) & IONIC_V1_EQE_TYPE_MASK;
+}
+
+static inline u8 ionic_v1_eqe_evt_code(u32 evt)
+{
+ return (evt >> IONIC_V1_EQE_CODE_SHIFT) & IONIC_V1_EQE_CODE_MASK;
+}
+
+static inline u32 ionic_v1_eqe_evt_qid(u32 evt)
+{
+ return evt >> IONIC_V1_EQE_QID_SHIFT;
+}
+
+enum ionic_v1_stat_bits {
+ IONIC_V1_STAT_TYPE_SHIFT = 28,
+ IONIC_V1_STAT_TYPE_NONE = 0,
+ IONIC_V1_STAT_TYPE_8 = 1,
+ IONIC_V1_STAT_TYPE_LE16 = 2,
+ IONIC_V1_STAT_TYPE_LE32 = 3,
+ IONIC_V1_STAT_TYPE_LE64 = 4,
+ IONIC_V1_STAT_TYPE_BE16 = 5,
+ IONIC_V1_STAT_TYPE_BE32 = 6,
+ IONIC_V1_STAT_TYPE_BE64 = 7,
+ IONIC_V1_STAT_OFF_MASK = BIT(IONIC_V1_STAT_TYPE_SHIFT) - 1,
+};
+
+struct ionic_v1_stat {
+ union {
+ __be32 be_type_off;
+ u32 type_off;
+ };
+ char name[28];
+};
+
+static inline int ionic_v1_stat_type(struct ionic_v1_stat *hdr)
+{
+ return hdr->type_off >> IONIC_V1_STAT_TYPE_SHIFT;
+}
+
+static inline unsigned int ionic_v1_stat_off(struct ionic_v1_stat *hdr)
+{
+ return hdr->type_off & IONIC_V1_STAT_OFF_MASK;
+}
+
+#endif /* _IONIC_FW_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_hw_stats.c b/drivers/infiniband/hw/ionic/ionic_hw_stats.c
new file mode 100644
index 000000000000..244a80dde08f
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_hw_stats.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/dma-mapping.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+static int ionic_v1_stat_normalize(struct ionic_v1_stat *hw_stats,
+ int hw_stats_count)
+{
+ int hw_stat_i;
+
+ for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) {
+ struct ionic_v1_stat *stat = &hw_stats[hw_stat_i];
+
+ stat->type_off = be32_to_cpu(stat->be_type_off);
+ stat->name[sizeof(stat->name) - 1] = 0;
+ if (ionic_v1_stat_type(stat) == IONIC_V1_STAT_TYPE_NONE)
+ break;
+ }
+
+ return hw_stat_i;
+}
+
+static void ionic_fill_stats_desc(struct rdma_stat_desc *hw_stats_hdrs,
+ struct ionic_v1_stat *hw_stats,
+ int hw_stats_count)
+{
+ int hw_stat_i;
+
+ for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) {
+ struct ionic_v1_stat *stat = &hw_stats[hw_stat_i];
+
+ hw_stats_hdrs[hw_stat_i].name = stat->name;
+ }
+}
+
+static u64 ionic_v1_stat_val(struct ionic_v1_stat *stat,
+ void *vals_buf, size_t vals_len)
+{
+ unsigned int off = ionic_v1_stat_off(stat);
+ int type = ionic_v1_stat_type(stat);
+
+#define __ionic_v1_stat_validate(__type) \
+ ((off + sizeof(__type) <= vals_len) && \
+ (IS_ALIGNED(off, sizeof(__type))))
+
+ switch (type) {
+ case IONIC_V1_STAT_TYPE_8:
+ if (__ionic_v1_stat_validate(u8))
+ return *(u8 *)(vals_buf + off);
+ break;
+ case IONIC_V1_STAT_TYPE_LE16:
+ if (__ionic_v1_stat_validate(__le16))
+ return le16_to_cpu(*(__le16 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_LE32:
+ if (__ionic_v1_stat_validate(__le32))
+ return le32_to_cpu(*(__le32 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_LE64:
+ if (__ionic_v1_stat_validate(__le64))
+ return le64_to_cpu(*(__le64 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE16:
+ if (__ionic_v1_stat_validate(__be16))
+ return be16_to_cpu(*(__be16 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE32:
+ if (__ionic_v1_stat_validate(__be32))
+ return be32_to_cpu(*(__be32 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE64:
+ if (__ionic_v1_stat_validate(__be64))
+ return be64_to_cpu(*(__be64 *)(vals_buf + off));
+ break;
+ }
+
+ return ~0ull;
+#undef __ionic_v1_stat_validate
+}
+
+static int ionic_hw_stats_cmd(struct ionic_ibdev *dev,
+ dma_addr_t dma, size_t len, int qid, int op)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = op,
+ .len = cpu_to_le16(IONIC_ADMIN_STATS_HDRS_IN_V1_LEN),
+ .cmd.stats = {
+ .dma_addr = cpu_to_le64(dma),
+ .length = cpu_to_le32(len),
+ .id_ver = cpu_to_le32(qid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= op)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_INTERRUPT);
+}
+
+static int ionic_init_hw_stats(struct ionic_ibdev *dev)
+{
+ dma_addr_t hw_stats_dma;
+ int rc, hw_stats_count;
+
+ if (dev->hw_stats_hdrs)
+ return 0;
+
+ dev->hw_stats_count = 0;
+
+ /* buffer for current values from the device */
+ dev->hw_stats_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dev->hw_stats_buf) {
+ rc = -ENOMEM;
+ goto err_buf;
+ }
+
+ /* buffer for names, sizes, offsets of values */
+ dev->hw_stats = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dev->hw_stats) {
+ rc = -ENOMEM;
+ goto err_hw_stats;
+ }
+
+ /* request the names, sizes, offsets */
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE, 0,
+ IONIC_V1_ADMIN_STATS_HDRS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* normalize and count the number of hw_stats */
+ hw_stats_count =
+ ionic_v1_stat_normalize(dev->hw_stats,
+ PAGE_SIZE / sizeof(*dev->hw_stats));
+ if (!hw_stats_count) {
+ rc = -ENODATA;
+ goto err_dma;
+ }
+
+ dev->hw_stats_count = hw_stats_count;
+
+ /* alloc and init array of names, for alloc_hw_stats */
+ dev->hw_stats_hdrs = kcalloc(hw_stats_count,
+ sizeof(*dev->hw_stats_hdrs),
+ GFP_KERNEL);
+ if (!dev->hw_stats_hdrs) {
+ rc = -ENOMEM;
+ goto err_dma;
+ }
+
+ ionic_fill_stats_desc(dev->hw_stats_hdrs, dev->hw_stats,
+ hw_stats_count);
+
+ return 0;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ kfree(dev->hw_stats);
+err_hw_stats:
+ kfree(dev->hw_stats_buf);
+err_buf:
+ dev->hw_stats_count = 0;
+ dev->hw_stats = NULL;
+ dev->hw_stats_buf = NULL;
+ dev->hw_stats_hdrs = NULL;
+ return rc;
+}
+
+static struct rdma_hw_stats *ionic_alloc_hw_stats(struct ib_device *ibdev,
+ u32 port)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ if (port != 1)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(dev->hw_stats_hdrs,
+ dev->hw_stats_count,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int ionic_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *hw_stats,
+ u32 port, int index)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ dma_addr_t hw_stats_dma;
+ int rc, hw_stat_i;
+
+ if (port != 1)
+ return -EINVAL;
+
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats_buf,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE,
+ 0, IONIC_V1_ADMIN_STATS_VALS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+
+ for (hw_stat_i = 0; hw_stat_i < dev->hw_stats_count; ++hw_stat_i)
+ hw_stats->value[hw_stat_i] =
+ ionic_v1_stat_val(&dev->hw_stats[hw_stat_i],
+ dev->hw_stats_buf, PAGE_SIZE);
+
+ return hw_stat_i;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ return rc;
+}
+
+static struct rdma_hw_stats *
+ionic_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_counter *cntr;
+ int err;
+
+ cntr = kzalloc(sizeof(*cntr), GFP_KERNEL);
+ if (!cntr)
+ return NULL;
+
+ /* buffer for current values from the device */
+ cntr->vals = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!cntr->vals)
+ goto err_vals;
+
+ err = xa_alloc(&dev->counter_stats->xa_counters, &counter->id,
+ cntr,
+ XA_LIMIT(0, IONIC_MAX_QPID),
+ GFP_KERNEL);
+ if (err)
+ goto err_xa;
+
+ INIT_LIST_HEAD(&cntr->qp_list);
+
+ return rdma_alloc_hw_stats_struct(dev->counter_stats->stats_hdrs,
+ dev->counter_stats->queue_stats_count,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+err_xa:
+ kfree(cntr->vals);
+err_vals:
+ kfree(cntr);
+
+ return NULL;
+}
+
+static int ionic_counter_dealloc(struct rdma_counter *counter)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_counter *cntr;
+
+ cntr = xa_erase(&dev->counter_stats->xa_counters, counter->id);
+ if (!cntr)
+ return -EINVAL;
+
+ kfree(cntr->vals);
+ kfree(cntr);
+
+ return 0;
+}
+
+static int ionic_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *ibqp,
+ u32 port)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_counter *cntr;
+
+ cntr = xa_load(&dev->counter_stats->xa_counters, counter->id);
+ if (!cntr)
+ return -EINVAL;
+
+ list_add_tail(&qp->qp_list_counter, &cntr->qp_list);
+ ibqp->counter = counter;
+
+ return 0;
+}
+
+static int ionic_counter_unbind_qp(struct ib_qp *ibqp, u32 port)
+{
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+
+ if (ibqp->counter) {
+ list_del(&qp->qp_list_counter);
+ ibqp->counter = NULL;
+ }
+
+ return 0;
+}
+
+static int ionic_get_qp_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *hw_stats,
+ u32 counter_id)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ struct ionic_counter_stats *cs;
+ struct ionic_counter *cntr;
+ dma_addr_t hw_stats_dma;
+ struct ionic_qp *qp;
+ int rc, stat_i = 0;
+
+ cs = dev->counter_stats;
+ cntr = xa_load(&cs->xa_counters, counter_id);
+ if (!cntr)
+ return -EINVAL;
+
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, cntr->vals,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ return rc;
+
+ memset(hw_stats->value, 0, sizeof(u64) * hw_stats->num_counters);
+
+ list_for_each_entry(qp, &cntr->qp_list, qp_list_counter) {
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE,
+ qp->qpid,
+ IONIC_V1_ADMIN_QP_STATS_VALS);
+ if (rc)
+ goto err_cmd;
+
+ for (stat_i = 0; stat_i < cs->queue_stats_count; ++stat_i)
+ hw_stats->value[stat_i] +=
+ ionic_v1_stat_val(&cs->hdr[stat_i],
+ cntr->vals,
+ PAGE_SIZE);
+ }
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ return stat_i;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ return rc;
+}
+
+static int ionic_counter_update_stats(struct rdma_counter *counter)
+{
+ return ionic_get_qp_stats(counter->device, counter->stats, counter->id);
+}
+
+static int ionic_alloc_counters(struct ionic_ibdev *dev)
+{
+ struct ionic_counter_stats *cs = dev->counter_stats;
+ int rc, hw_stats_count;
+ dma_addr_t hdr_dma;
+
+ /* buffer for names, sizes, offsets of values */
+ cs->hdr = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!cs->hdr)
+ return -ENOMEM;
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, cs->hdr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hdr_dma, PAGE_SIZE, 0,
+ IONIC_V1_ADMIN_QP_STATS_HDRS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* normalize and count the number of hw_stats */
+ hw_stats_count = ionic_v1_stat_normalize(cs->hdr,
+ PAGE_SIZE / sizeof(*cs->hdr));
+ if (!hw_stats_count) {
+ rc = -ENODATA;
+ goto err_dma;
+ }
+
+ cs->queue_stats_count = hw_stats_count;
+
+ /* alloc and init array of names */
+ cs->stats_hdrs = kcalloc(hw_stats_count, sizeof(*cs->stats_hdrs),
+ GFP_KERNEL);
+ if (!cs->stats_hdrs) {
+ rc = -ENOMEM;
+ goto err_dma;
+ }
+
+ ionic_fill_stats_desc(cs->stats_hdrs, cs->hdr, hw_stats_count);
+
+ return 0;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ kfree(cs->hdr);
+
+ return rc;
+}
+
+static const struct ib_device_ops ionic_hw_stats_ops = {
+ .driver_id = RDMA_DRIVER_IONIC,
+ .alloc_hw_port_stats = ionic_alloc_hw_stats,
+ .get_hw_stats = ionic_get_hw_stats,
+};
+
+static const struct ib_device_ops ionic_counter_stats_ops = {
+ .counter_alloc_stats = ionic_counter_alloc_stats,
+ .counter_dealloc = ionic_counter_dealloc,
+ .counter_bind_qp = ionic_counter_bind_qp,
+ .counter_unbind_qp = ionic_counter_unbind_qp,
+ .counter_update_stats = ionic_counter_update_stats,
+};
+
+void ionic_stats_init(struct ionic_ibdev *dev)
+{
+ u16 stats_type = dev->lif_cfg.stats_type;
+ int rc;
+
+ if (stats_type & IONIC_LIF_RDMA_STAT_GLOBAL) {
+ rc = ionic_init_hw_stats(dev);
+ if (rc)
+ ibdev_dbg(&dev->ibdev, "Failed to init hw stats\n");
+ else
+ ib_set_device_ops(&dev->ibdev, &ionic_hw_stats_ops);
+ }
+
+ if (stats_type & IONIC_LIF_RDMA_STAT_QP) {
+ dev->counter_stats = kzalloc(sizeof(*dev->counter_stats),
+ GFP_KERNEL);
+ if (!dev->counter_stats)
+ return;
+
+ rc = ionic_alloc_counters(dev);
+ if (rc) {
+ ibdev_dbg(&dev->ibdev, "Failed to init counter stats\n");
+ kfree(dev->counter_stats);
+ dev->counter_stats = NULL;
+ return;
+ }
+
+ xa_init_flags(&dev->counter_stats->xa_counters, XA_FLAGS_ALLOC);
+
+ ib_set_device_ops(&dev->ibdev, &ionic_counter_stats_ops);
+ }
+}
+
+void ionic_stats_cleanup(struct ionic_ibdev *dev)
+{
+ if (dev->counter_stats) {
+ xa_destroy(&dev->counter_stats->xa_counters);
+ kfree(dev->counter_stats->hdr);
+ kfree(dev->counter_stats->stats_hdrs);
+ kfree(dev->counter_stats);
+ dev->counter_stats = NULL;
+ }
+
+ kfree(dev->hw_stats);
+ kfree(dev->hw_stats_buf);
+ kfree(dev->hw_stats_hdrs);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c
new file mode 100644
index 000000000000..164046d00e5d
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
+
+#include "ionic_ibdev.h"
+
+#define DRIVER_DESCRIPTION "AMD Pensando RoCE HCA driver"
+#define DEVICE_DESCRIPTION "AMD Pensando RoCE HCA"
+
+MODULE_AUTHOR("Allen Hubbe <allen.hubbe@amd.com>");
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("NET_IONIC");
+
+static int ionic_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ struct net_device *ndev;
+
+ ndev = ib_device_get_netdev(ibdev, 1);
+ addrconf_ifid_eui48((u8 *)&attr->sys_image_guid, ndev);
+ dev_put(ndev);
+ attr->max_mr_size = dev->lif_cfg.npts_per_lif * PAGE_SIZE / 2;
+ attr->page_size_cap = dev->lif_cfg.page_size_supported;
+
+ attr->vendor_id = to_pci_dev(dev->lif_cfg.hwdev)->vendor;
+ attr->vendor_part_id = to_pci_dev(dev->lif_cfg.hwdev)->device;
+
+ attr->hw_ver = ionic_lif_asic_rev(dev->lif_cfg.lif);
+ attr->fw_ver = 0;
+ attr->max_qp = dev->lif_cfg.qp_count;
+ attr->max_qp_wr = IONIC_MAX_DEPTH;
+ attr->device_cap_flags =
+ IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_MEM_WINDOW_TYPE_2B |
+ 0;
+ attr->max_send_sge =
+ min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH);
+ attr->max_recv_sge =
+ min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH);
+ attr->max_sge_rd = attr->max_send_sge;
+ attr->max_cq = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count;
+ attr->max_cqe = IONIC_MAX_CQ_DEPTH - IONIC_CQ_GRACE;
+ attr->max_mr = dev->lif_cfg.nmrs_per_lif;
+ attr->max_pd = IONIC_MAX_PD;
+ attr->max_qp_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_ee_rd_atom = 0;
+ attr->max_res_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_qp_init_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_ee_init_rd_atom = 0;
+ attr->atomic_cap = IB_ATOMIC_GLOB;
+ attr->masked_atomic_cap = IB_ATOMIC_GLOB;
+ attr->max_mw = dev->lif_cfg.nmrs_per_lif;
+ attr->max_mcast_grp = 0;
+ attr->max_mcast_qp_attach = 0;
+ attr->max_ah = dev->lif_cfg.nahs_per_lif;
+ attr->max_fast_reg_page_list_len = dev->lif_cfg.npts_per_lif / 2;
+ attr->max_pkeys = IONIC_PKEY_TBL_LEN;
+
+ return 0;
+}
+
+static int ionic_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *attr)
+{
+ struct net_device *ndev;
+
+ if (port != 1)
+ return -EINVAL;
+
+ ndev = ib_device_get_netdev(ibdev, port);
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ attr->state = IB_PORT_ACTIVE;
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else if (netif_running(ndev)) {
+ attr->state = IB_PORT_DOWN;
+ attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
+ } else {
+ attr->state = IB_PORT_DOWN;
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ attr->max_mtu = iboe_get_mtu(ndev->max_mtu);
+ attr->active_mtu = min(attr->max_mtu, iboe_get_mtu(ndev->mtu));
+ attr->gid_tbl_len = IONIC_GID_TBL_LEN;
+ attr->ip_gids = true;
+ attr->port_cap_flags = 0;
+ attr->max_msg_sz = 0x80000000;
+ attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN;
+ attr->max_vl_num = 1;
+ attr->subnet_prefix = 0xfe80000000000000ull;
+
+ dev_put(ndev);
+
+ return ib_get_eth_speed(ibdev, port,
+ &attr->active_speed,
+ &attr->active_width);
+}
+
+static enum rdma_link_layer ionic_get_link_layer(struct ib_device *ibdev,
+ u32 port)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int ionic_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ if (port != 1)
+ return -EINVAL;
+
+ if (index != 0)
+ return -EINVAL;
+
+ *pkey = IB_DEFAULT_PKEY_FULL;
+
+ return 0;
+}
+
+static int ionic_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC)
+ memcpy(dev->ibdev.node_desc, attr->node_desc,
+ IB_DEVICE_NODE_DESC_MAX);
+
+ return 0;
+}
+
+static int ionic_get_port_immutable(struct ib_device *ibdev, u32 port,
+ struct ib_port_immutable *attr)
+{
+ if (port != 1)
+ return -EINVAL;
+
+ attr->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN;
+ attr->gid_tbl_len = IONIC_GID_TBL_LEN;
+ attr->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
+}
+
+static void ionic_get_dev_fw_str(struct ib_device *ibdev, char *str)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ ionic_lif_fw_version(dev->lif_cfg.lif, str, IB_FW_VERSION_NAME_MAX);
+}
+
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ionic_ibdev *dev =
+ rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+
+ return sysfs_emit(buf, "0x%x\n", ionic_lif_asic_rev(dev->lif_cfg.lif));
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ionic_ibdev *dev =
+ rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+
+ return sysfs_emit(buf, "%s\n", dev->ibdev.node_desc);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *ionic_rdma_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group ionic_rdma_attr_group = {
+ .attrs = ionic_rdma_attributes,
+};
+
+static void ionic_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ /*
+ * Dummy define disassociate_ucontext so that it does not
+ * wait for user context before cleaning up hw resources.
+ */
+}
+
+static const struct ib_device_ops ionic_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_IONIC,
+ .uverbs_abi_ver = IONIC_ABI_VERSION,
+
+ .alloc_ucontext = ionic_alloc_ucontext,
+ .dealloc_ucontext = ionic_dealloc_ucontext,
+ .mmap = ionic_mmap,
+ .mmap_free = ionic_mmap_free,
+ .alloc_pd = ionic_alloc_pd,
+ .dealloc_pd = ionic_dealloc_pd,
+ .create_ah = ionic_create_ah,
+ .query_ah = ionic_query_ah,
+ .destroy_ah = ionic_destroy_ah,
+ .create_user_ah = ionic_create_ah,
+ .get_dma_mr = ionic_get_dma_mr,
+ .reg_user_mr = ionic_reg_user_mr,
+ .reg_user_mr_dmabuf = ionic_reg_user_mr_dmabuf,
+ .dereg_mr = ionic_dereg_mr,
+ .alloc_mr = ionic_alloc_mr,
+ .map_mr_sg = ionic_map_mr_sg,
+ .alloc_mw = ionic_alloc_mw,
+ .dealloc_mw = ionic_dealloc_mw,
+ .create_cq = ionic_create_cq,
+ .destroy_cq = ionic_destroy_cq,
+ .create_qp = ionic_create_qp,
+ .modify_qp = ionic_modify_qp,
+ .query_qp = ionic_query_qp,
+ .destroy_qp = ionic_destroy_qp,
+
+ .post_send = ionic_post_send,
+ .post_recv = ionic_post_recv,
+ .poll_cq = ionic_poll_cq,
+ .req_notify_cq = ionic_req_notify_cq,
+
+ .query_device = ionic_query_device,
+ .query_port = ionic_query_port,
+ .get_link_layer = ionic_get_link_layer,
+ .query_pkey = ionic_query_pkey,
+ .modify_device = ionic_modify_device,
+ .get_port_immutable = ionic_get_port_immutable,
+ .get_dev_fw_str = ionic_get_dev_fw_str,
+ .device_group = &ionic_rdma_attr_group,
+ .disassociate_ucontext = ionic_disassociate_ucontext,
+
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, ionic_ctx, ibctx),
+ INIT_RDMA_OBJ_SIZE(ib_pd, ionic_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ah, ionic_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, ionic_vcq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_qp, ionic_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_mw, ionic_mr, ibmw),
+};
+
+static void ionic_init_resids(struct ionic_ibdev *dev)
+{
+ ionic_resid_init(&dev->inuse_cqid, dev->lif_cfg.cq_count);
+ dev->half_cqid_udma_shift =
+ order_base_2(dev->lif_cfg.cq_count / dev->lif_cfg.udma_count);
+ ionic_resid_init(&dev->inuse_pdid, IONIC_MAX_PD);
+ ionic_resid_init(&dev->inuse_ahid, dev->lif_cfg.nahs_per_lif);
+ ionic_resid_init(&dev->inuse_mrid, dev->lif_cfg.nmrs_per_lif);
+ /* skip reserved lkey */
+ dev->next_mrkey = 1;
+ ionic_resid_init(&dev->inuse_qpid, dev->lif_cfg.qp_count);
+ /* skip reserved SMI and GSI qpids */
+ dev->half_qpid_udma_shift =
+ order_base_2(dev->lif_cfg.qp_count / dev->lif_cfg.udma_count);
+ ionic_resid_init(&dev->inuse_dbid, dev->lif_cfg.dbid_count);
+}
+
+static void ionic_destroy_resids(struct ionic_ibdev *dev)
+{
+ ionic_resid_destroy(&dev->inuse_cqid);
+ ionic_resid_destroy(&dev->inuse_pdid);
+ ionic_resid_destroy(&dev->inuse_ahid);
+ ionic_resid_destroy(&dev->inuse_mrid);
+ ionic_resid_destroy(&dev->inuse_qpid);
+ ionic_resid_destroy(&dev->inuse_dbid);
+}
+
+static void ionic_destroy_ibdev(struct ionic_ibdev *dev)
+{
+ ionic_kill_rdma_admin(dev, false);
+ ib_unregister_device(&dev->ibdev);
+ ionic_stats_cleanup(dev);
+ ionic_destroy_rdma_admin(dev);
+ ionic_destroy_resids(dev);
+ WARN_ON(!xa_empty(&dev->qp_tbl));
+ xa_destroy(&dev->qp_tbl);
+ WARN_ON(!xa_empty(&dev->cq_tbl));
+ xa_destroy(&dev->cq_tbl);
+ ib_dealloc_device(&dev->ibdev);
+}
+
+static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev)
+{
+ struct ib_device *ibdev;
+ struct ionic_ibdev *dev;
+ struct net_device *ndev;
+ int rc;
+
+ dev = ib_alloc_device(ionic_ibdev, ibdev);
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ ionic_fill_lif_cfg(ionic_adev->lif, &dev->lif_cfg);
+
+ xa_init_flags(&dev->qp_tbl, GFP_ATOMIC);
+ xa_init_flags(&dev->cq_tbl, GFP_ATOMIC);
+
+ ionic_init_resids(dev);
+
+ rc = ionic_rdma_reset_devcmd(dev);
+ if (rc)
+ goto err_reset;
+
+ rc = ionic_create_rdma_admin(dev);
+ if (rc)
+ goto err_admin;
+
+ ibdev = &dev->ibdev;
+ ibdev->dev.parent = dev->lif_cfg.hwdev;
+
+ strscpy(ibdev->name, "ionic_%d", IB_DEVICE_NAME_MAX);
+ strscpy(ibdev->node_desc, DEVICE_DESCRIPTION, IB_DEVICE_NODE_DESC_MAX);
+
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ibdev->phys_port_cnt = 1;
+
+ /* the first two eq are reserved for async events */
+ ibdev->num_comp_vectors = dev->lif_cfg.eq_count - 2;
+
+ ndev = ionic_lif_netdev(ionic_adev->lif);
+ addrconf_ifid_eui48((u8 *)&ibdev->node_guid, ndev);
+ rc = ib_device_set_netdev(ibdev, ndev, 1);
+ /* ionic_lif_netdev() returns ndev with refcount held */
+ dev_put(ndev);
+ if (rc)
+ goto err_admin;
+
+ ib_set_device_ops(&dev->ibdev, &ionic_dev_ops);
+
+ ionic_stats_init(dev);
+
+ rc = ib_register_device(ibdev, "ionic_%d", ibdev->dev.parent);
+ if (rc)
+ goto err_register;
+
+ return dev;
+
+err_register:
+ ionic_stats_cleanup(dev);
+err_admin:
+ ionic_kill_rdma_admin(dev, false);
+ ionic_destroy_rdma_admin(dev);
+err_reset:
+ ionic_destroy_resids(dev);
+ xa_destroy(&dev->qp_tbl);
+ xa_destroy(&dev->cq_tbl);
+ ib_dealloc_device(&dev->ibdev);
+
+ return ERR_PTR(rc);
+}
+
+static int ionic_aux_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct ionic_aux_dev *ionic_adev;
+ struct ionic_ibdev *dev;
+
+ ionic_adev = container_of(adev, struct ionic_aux_dev, adev);
+ dev = ionic_create_ibdev(ionic_adev);
+ if (IS_ERR(dev))
+ return dev_err_probe(&adev->dev, PTR_ERR(dev),
+ "Failed to register ibdev\n");
+
+ auxiliary_set_drvdata(adev, dev);
+ ibdev_dbg(&dev->ibdev, "registered\n");
+
+ return 0;
+}
+
+static void ionic_aux_remove(struct auxiliary_device *adev)
+{
+ struct ionic_ibdev *dev = auxiliary_get_drvdata(adev);
+
+ dev_dbg(&adev->dev, "unregister ibdev\n");
+ ionic_destroy_ibdev(dev);
+ dev_dbg(&adev->dev, "unregistered\n");
+}
+
+static const struct auxiliary_device_id ionic_aux_id_table[] = {
+ { .name = "ionic.rdma", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ionic_aux_id_table);
+
+static struct auxiliary_driver ionic_aux_r_driver = {
+ .name = "rdma",
+ .probe = ionic_aux_probe,
+ .remove = ionic_aux_remove,
+ .id_table = ionic_aux_id_table,
+};
+
+static int __init ionic_mod_init(void)
+{
+ int rc;
+
+ ionic_evt_workq = create_workqueue(KBUILD_MODNAME "-evt");
+ if (!ionic_evt_workq)
+ return -ENOMEM;
+
+ rc = auxiliary_driver_register(&ionic_aux_r_driver);
+ if (rc)
+ goto err_aux;
+
+ return 0;
+
+err_aux:
+ destroy_workqueue(ionic_evt_workq);
+
+ return rc;
+}
+
+static void __exit ionic_mod_exit(void)
+{
+ auxiliary_driver_unregister(&ionic_aux_r_driver);
+ destroy_workqueue(ionic_evt_workq);
+}
+
+module_init(ionic_mod_init);
+module_exit(ionic_mod_exit);
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h
new file mode 100644
index 000000000000..82fda1e3cdb6
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_IBDEV_H_
+#define _IONIC_IBDEV_H_
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include <rdma/ionic-abi.h>
+#include <ionic_api.h>
+#include <ionic_regs.h>
+
+#include "ionic_fw.h"
+#include "ionic_queue.h"
+#include "ionic_res.h"
+
+#include "ionic_lif_cfg.h"
+
+/* Config knobs */
+#define IONIC_EQ_DEPTH 511
+#define IONIC_EQ_COUNT 32
+#define IONIC_AQ_DEPTH 63
+#define IONIC_AQ_COUNT 4
+#define IONIC_EQ_ISR_BUDGET 10
+#define IONIC_EQ_WORK_BUDGET 1000
+#define IONIC_MAX_RD_ATOM 16
+#define IONIC_PKEY_TBL_LEN 1
+#define IONIC_GID_TBL_LEN 256
+
+#define IONIC_MAX_QPID 0xffffff
+#define IONIC_SPEC_HIGH 8
+#define IONIC_MAX_PD 1024
+#define IONIC_SPEC_HIGH 8
+#define IONIC_SQCMB_ORDER 5
+#define IONIC_RQCMB_ORDER 0
+
+#define IONIC_META_LAST ((void *)1ul)
+#define IONIC_META_POSTED ((void *)2ul)
+
+#define IONIC_CQ_GRACE 100
+
+#define IONIC_ROCE_UDP_SPORT 28272
+#define IONIC_DMA_LKEY 0
+#define IONIC_DMA_RKEY IONIC_DMA_LKEY
+
+#define IONIC_CMB_SUPPORTED \
+ (IONIC_CMB_ENABLE | IONIC_CMB_REQUIRE | IONIC_CMB_EXPDB | \
+ IONIC_CMB_WC | IONIC_CMB_UC)
+
+/* resource is not reserved on the device, indicated in tbl_order */
+#define IONIC_RES_INVALID -1
+
+struct ionic_aq;
+struct ionic_cq;
+struct ionic_eq;
+struct ionic_vcq;
+
+enum ionic_admin_state {
+ IONIC_ADMIN_ACTIVE, /* submitting admin commands to queue */
+ IONIC_ADMIN_PAUSED, /* not submitting, but may complete normally */
+ IONIC_ADMIN_KILLED, /* not submitting, locally completed */
+};
+
+enum ionic_admin_flags {
+ IONIC_ADMIN_F_BUSYWAIT = BIT(0), /* Don't sleep */
+ IONIC_ADMIN_F_TEARDOWN = BIT(1), /* In destroy path */
+ IONIC_ADMIN_F_INTERRUPT = BIT(2), /* Interruptible w/timeout */
+};
+
+enum ionic_mmap_flag {
+ IONIC_MMAP_WC = BIT(0),
+};
+
+struct ionic_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ unsigned long size;
+ unsigned long pfn;
+ u8 mmap_flags;
+};
+
+struct ionic_ibdev {
+ struct ib_device ibdev;
+
+ struct ionic_lif_cfg lif_cfg;
+
+ struct xarray qp_tbl;
+ struct xarray cq_tbl;
+
+ struct ionic_resid_bits inuse_dbid;
+ struct ionic_resid_bits inuse_pdid;
+ struct ionic_resid_bits inuse_ahid;
+ struct ionic_resid_bits inuse_mrid;
+ struct ionic_resid_bits inuse_qpid;
+ struct ionic_resid_bits inuse_cqid;
+
+ u8 half_cqid_udma_shift;
+ u8 half_qpid_udma_shift;
+ u8 next_qpid_udma_idx;
+ u8 next_mrkey;
+
+ struct work_struct reset_work;
+ bool reset_posted;
+ u32 reset_cnt;
+
+ struct delayed_work admin_dwork;
+ struct ionic_aq **aq_vec;
+ atomic_t admin_state;
+
+ struct ionic_eq **eq_vec;
+
+ struct ionic_v1_stat *hw_stats;
+ void *hw_stats_buf;
+ struct rdma_stat_desc *hw_stats_hdrs;
+ struct ionic_counter_stats *counter_stats;
+ int hw_stats_count;
+};
+
+struct ionic_eq {
+ struct ionic_ibdev *dev;
+
+ u32 eqid;
+ u32 intr;
+
+ struct ionic_queue q;
+
+ int armed;
+ bool enable;
+
+ struct work_struct work;
+
+ int irq;
+ char name[32];
+};
+
+struct ionic_admin_wr {
+ struct completion work;
+ struct list_head aq_ent;
+ struct ionic_v1_admin_wqe wqe;
+ struct ionic_v1_cqe cqe;
+ struct ionic_aq *aq;
+ int status;
+};
+
+struct ionic_admin_wr_q {
+ struct ionic_admin_wr *wr;
+ int wqe_strides;
+};
+
+struct ionic_aq {
+ struct ionic_ibdev *dev;
+ struct ionic_vcq *vcq;
+
+ struct work_struct work;
+
+ atomic_t admin_state;
+ unsigned long stamp;
+ bool armed;
+
+ u32 aqid;
+ u32 cqid;
+
+ spinlock_t lock; /* for posting */
+ struct ionic_queue q;
+ struct ionic_admin_wr_q *q_wr;
+ struct list_head wr_prod;
+ struct list_head wr_post;
+};
+
+struct ionic_ctx {
+ struct ib_ucontext ibctx;
+ u32 dbid;
+ struct rdma_user_mmap_entry *mmap_dbell;
+};
+
+struct ionic_tbl_buf {
+ u32 tbl_limit;
+ u32 tbl_pages;
+ size_t tbl_size;
+ __le64 *tbl_buf;
+ dma_addr_t tbl_dma;
+ u8 page_size_log2;
+};
+
+struct ionic_pd {
+ struct ib_pd ibpd;
+
+ u32 pdid;
+ u32 flags;
+};
+
+struct ionic_cq {
+ struct ionic_vcq *vcq;
+
+ u32 cqid;
+ u32 eqid;
+
+ spinlock_t lock; /* for polling */
+ struct list_head poll_sq;
+ bool flush;
+ struct list_head flush_sq;
+ struct list_head flush_rq;
+ struct list_head ibkill_flush_ent;
+
+ struct ionic_queue q;
+ bool color;
+ int credit;
+ u16 arm_any_prod;
+ u16 arm_sol_prod;
+
+ struct kref cq_kref;
+ struct completion cq_rel_comp;
+
+ /* infrequently accessed, keep at end */
+ struct ib_umem *umem;
+};
+
+struct ionic_vcq {
+ struct ib_cq ibcq;
+ struct ionic_cq cq[2];
+ u8 udma_mask;
+ u8 poll_idx;
+};
+
+struct ionic_sq_meta {
+ u64 wrid;
+ u32 len;
+ u16 seq;
+ u8 ibop;
+ u8 ibsts;
+ u8 remote:1;
+ u8 signal:1;
+ u8 local_comp:1;
+};
+
+struct ionic_rq_meta {
+ struct ionic_rq_meta *next;
+ u64 wrid;
+};
+
+struct ionic_qp {
+ struct ib_qp ibqp;
+ enum ib_qp_state state;
+
+ u32 qpid;
+ u32 ahid;
+ u32 sq_cqid;
+ u32 rq_cqid;
+ u8 udma_idx;
+ u8 has_ah:1;
+ u8 has_sq:1;
+ u8 has_rq:1;
+ u8 sig_all:1;
+
+ struct list_head qp_list_counter;
+
+ struct list_head cq_poll_sq;
+ struct list_head cq_flush_sq;
+ struct list_head cq_flush_rq;
+ struct list_head ibkill_flush_ent;
+
+ spinlock_t sq_lock; /* for posting and polling */
+ struct ionic_queue sq;
+ struct ionic_sq_meta *sq_meta;
+ u16 *sq_msn_idx;
+ int sq_spec;
+ u16 sq_old_prod;
+ u16 sq_msn_prod;
+ u16 sq_msn_cons;
+ u8 sq_cmb;
+ bool sq_flush;
+ bool sq_flush_rcvd;
+
+ spinlock_t rq_lock; /* for posting and polling */
+ struct ionic_queue rq;
+ struct ionic_rq_meta *rq_meta;
+ struct ionic_rq_meta *rq_meta_head;
+ int rq_spec;
+ u16 rq_old_prod;
+ u8 rq_cmb;
+ bool rq_flush;
+
+ struct kref qp_kref;
+ struct completion qp_rel_comp;
+
+ /* infrequently accessed, keep at end */
+ int sgid_index;
+ int sq_cmb_order;
+ u32 sq_cmb_pgid;
+ phys_addr_t sq_cmb_addr;
+ struct rdma_user_mmap_entry *mmap_sq_cmb;
+
+ struct ib_umem *sq_umem;
+
+ int rq_cmb_order;
+ u32 rq_cmb_pgid;
+ phys_addr_t rq_cmb_addr;
+ struct rdma_user_mmap_entry *mmap_rq_cmb;
+
+ struct ib_umem *rq_umem;
+
+ int dcqcn_profile;
+
+ struct ib_ud_header *hdr;
+};
+
+struct ionic_ah {
+ struct ib_ah ibah;
+ u32 ahid;
+ int sgid_index;
+ struct ib_ud_header hdr;
+};
+
+struct ionic_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ };
+
+ u32 mrid;
+ int flags;
+
+ struct ib_umem *umem;
+ struct ionic_tbl_buf buf;
+ bool created;
+};
+
+struct ionic_counter_stats {
+ int queue_stats_count;
+ struct ionic_v1_stat *hdr;
+ struct rdma_stat_desc *stats_hdrs;
+ struct xarray xa_counters;
+};
+
+struct ionic_counter {
+ void *vals;
+ struct list_head qp_list;
+};
+
+static inline struct ionic_ibdev *to_ionic_ibdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct ionic_ibdev, ibdev);
+}
+
+static inline struct ionic_ctx *to_ionic_ctx(struct ib_ucontext *ibctx)
+{
+ return container_of(ibctx, struct ionic_ctx, ibctx);
+}
+
+static inline struct ionic_ctx *to_ionic_ctx_uobj(struct ib_uobject *uobj)
+{
+ if (!uobj)
+ return NULL;
+
+ if (!uobj->context)
+ return NULL;
+
+ return to_ionic_ctx(uobj->context);
+}
+
+static inline struct ionic_pd *to_ionic_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct ionic_pd, ibpd);
+}
+
+static inline struct ionic_mr *to_ionic_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct ionic_mr, ibmr);
+}
+
+static inline struct ionic_mr *to_ionic_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct ionic_mr, ibmw);
+}
+
+static inline struct ionic_vcq *to_ionic_vcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct ionic_vcq, ibcq);
+}
+
+static inline struct ionic_cq *to_ionic_vcq_cq(struct ib_cq *ibcq,
+ uint8_t udma_idx)
+{
+ return &to_ionic_vcq(ibcq)->cq[udma_idx];
+}
+
+static inline struct ionic_qp *to_ionic_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct ionic_qp, ibqp);
+}
+
+static inline struct ionic_ah *to_ionic_ah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct ionic_ah, ibah);
+}
+
+static inline u32 ionic_ctx_dbid(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx)
+{
+ if (!ctx)
+ return dev->lif_cfg.dbid;
+
+ return ctx->dbid;
+}
+
+static inline u32 ionic_obj_dbid(struct ionic_ibdev *dev,
+ struct ib_uobject *uobj)
+{
+ return ionic_ctx_dbid(dev, to_ionic_ctx_uobj(uobj));
+}
+
+static inline bool ionic_ibop_is_local(enum ib_wr_opcode op)
+{
+ return op == IB_WR_LOCAL_INV || op == IB_WR_REG_MR;
+}
+
+static inline void ionic_qp_complete(struct kref *kref)
+{
+ struct ionic_qp *qp = container_of(kref, struct ionic_qp, qp_kref);
+
+ complete(&qp->qp_rel_comp);
+}
+
+static inline void ionic_cq_complete(struct kref *kref)
+{
+ struct ionic_cq *cq = container_of(kref, struct ionic_cq, cq_kref);
+
+ complete(&cq->cq_rel_comp);
+}
+
+/* ionic_admin.c */
+extern struct workqueue_struct *ionic_evt_workq;
+void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr);
+int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr,
+ enum ionic_admin_flags);
+
+int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev);
+
+int ionic_create_rdma_admin(struct ionic_ibdev *dev);
+void ionic_destroy_rdma_admin(struct ionic_ibdev *dev);
+void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path);
+
+/* ionic_controlpath.c */
+int ionic_create_cq_common(struct ionic_vcq *vcq,
+ struct ionic_tbl_buf *buf,
+ const struct ib_cq_init_attr *attr,
+ struct ionic_ctx *ctx,
+ struct ib_udata *udata,
+ struct ionic_qdesc *req_cq,
+ __u32 *resp_cqid,
+ int udma_idx);
+void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq);
+void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp);
+void ionic_notify_flush_cq(struct ionic_cq *cq);
+
+int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata);
+void ionic_dealloc_ucontext(struct ib_ucontext *ibctx);
+int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma);
+void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+int ionic_destroy_ah(struct ib_ah *ibah, u32 flags);
+struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access);
+struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 addr, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata);
+struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset,
+ u64 length, u64 addr, int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type,
+ u32 max_sg);
+int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
+int ionic_dealloc_mw(struct ib_mw *ibmw);
+int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata);
+int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *udata);
+int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_qp_init_attr *init_attr);
+int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+/* ionic_datapath.c */
+int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad);
+int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad);
+int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc);
+int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+/* ionic_hw_stats.c */
+void ionic_stats_init(struct ionic_ibdev *dev);
+void ionic_stats_cleanup(struct ionic_ibdev *dev);
+
+/* ionic_pgtbl.c */
+__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va);
+__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va);
+int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma);
+int ionic_pgtbl_init(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf,
+ struct ib_umem *umem,
+ dma_addr_t dma,
+ int limit,
+ u64 page_size);
+void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf);
+#endif /* _IONIC_IBDEV_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.c b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c
new file mode 100644
index 000000000000..f3cd281c3a2f
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/kernel.h>
+
+#include <ionic.h>
+#include <ionic_lif.h>
+
+#include "ionic_lif_cfg.h"
+
+#define IONIC_MIN_RDMA_VERSION 0
+#define IONIC_MAX_RDMA_VERSION 2
+
+static u8 ionic_get_expdb(struct ionic_lif *lif)
+{
+ u8 expdb_support = 0;
+
+ if (lif->ionic->idev.phy_cmb_expdb64_pages)
+ expdb_support |= IONIC_EXPDB_64B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb128_pages)
+ expdb_support |= IONIC_EXPDB_128B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb256_pages)
+ expdb_support |= IONIC_EXPDB_256B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb512_pages)
+ expdb_support |= IONIC_EXPDB_512B_WQE;
+
+ return expdb_support;
+}
+
+void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg)
+{
+ union ionic_lif_identity *ident = &lif->ionic->ident.lif;
+
+ cfg->lif = lif;
+ cfg->hwdev = &lif->ionic->pdev->dev;
+ cfg->lif_index = lif->index;
+ cfg->lif_hw_index = lif->hw_index;
+
+ cfg->dbid = lif->kern_pid;
+ cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+ cfg->dbpage = lif->kern_dbpage;
+ cfg->intr_ctrl = lif->ionic->idev.intr_ctrl;
+
+ cfg->db_phys = lif->ionic->bars[IONIC_PCI_BAR_DBELL].bus_addr;
+
+ if (IONIC_VERSION(ident->rdma.version, ident->rdma.minor_version) >=
+ IONIC_VERSION(2, 1))
+ cfg->page_size_supported =
+ le64_to_cpu(ident->rdma.page_size_cap);
+ else
+ cfg->page_size_supported = IONIC_PAGE_SIZE_SUPPORTED;
+
+ cfg->rdma_version = ident->rdma.version;
+ cfg->qp_opcodes = ident->rdma.qp_opcodes;
+ cfg->admin_opcodes = ident->rdma.admin_opcodes;
+
+ cfg->stats_type = le16_to_cpu(ident->rdma.stats_type);
+ cfg->npts_per_lif = le32_to_cpu(ident->rdma.npts_per_lif);
+ cfg->nmrs_per_lif = le32_to_cpu(ident->rdma.nmrs_per_lif);
+ cfg->nahs_per_lif = le32_to_cpu(ident->rdma.nahs_per_lif);
+
+ cfg->aq_base = le32_to_cpu(ident->rdma.aq_qtype.qid_base);
+ cfg->cq_base = le32_to_cpu(ident->rdma.cq_qtype.qid_base);
+ cfg->eq_base = le32_to_cpu(ident->rdma.eq_qtype.qid_base);
+
+ /*
+ * ionic_create_rdma_admin() may reduce aq_count or eq_count if
+ * it is unable to allocate all that were requested.
+ * aq_count is tunable; see ionic_aq_count
+ * eq_count is tunable; see ionic_eq_count
+ */
+ cfg->aq_count = le32_to_cpu(ident->rdma.aq_qtype.qid_count);
+ cfg->eq_count = le32_to_cpu(ident->rdma.eq_qtype.qid_count);
+ cfg->cq_count = le32_to_cpu(ident->rdma.cq_qtype.qid_count);
+ cfg->qp_count = le32_to_cpu(ident->rdma.sq_qtype.qid_count);
+ cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+
+ cfg->aq_qtype = ident->rdma.aq_qtype.qtype;
+ cfg->sq_qtype = ident->rdma.sq_qtype.qtype;
+ cfg->rq_qtype = ident->rdma.rq_qtype.qtype;
+ cfg->cq_qtype = ident->rdma.cq_qtype.qtype;
+ cfg->eq_qtype = ident->rdma.eq_qtype.qtype;
+ cfg->udma_qgrp_shift = ident->rdma.udma_shift;
+ cfg->udma_count = 2;
+
+ cfg->max_stride = ident->rdma.max_stride;
+ cfg->expdb_mask = ionic_get_expdb(lif);
+
+ cfg->sq_expdb =
+ !!(lif->qtype_info[IONIC_QTYPE_TXQ].features & IONIC_QIDENT_F_EXPDB);
+ cfg->rq_expdb =
+ !!(lif->qtype_info[IONIC_QTYPE_RXQ].features & IONIC_QIDENT_F_EXPDB);
+}
+
+struct net_device *ionic_lif_netdev(struct ionic_lif *lif)
+{
+ struct net_device *netdev = lif->netdev;
+
+ dev_hold(netdev);
+ return netdev;
+}
+
+void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len)
+{
+ strscpy(str, lif->ionic->idev.dev_info.fw_version, len);
+}
+
+u8 ionic_lif_asic_rev(struct ionic_lif *lif)
+{
+ return lif->ionic->idev.dev_info.asic_rev;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.h b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h
new file mode 100644
index 000000000000..20853429f623
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_LIF_CFG_H_
+
+#define IONIC_VERSION(a, b) (((a) << 16) + ((b) << 8))
+#define IONIC_PAGE_SIZE_SUPPORTED 0x40201000 /* 4kb, 2Mb, 1Gb */
+
+#define IONIC_EXPDB_64B_WQE BIT(0)
+#define IONIC_EXPDB_128B_WQE BIT(1)
+#define IONIC_EXPDB_256B_WQE BIT(2)
+#define IONIC_EXPDB_512B_WQE BIT(3)
+
+struct ionic_lif_cfg {
+ struct device *hwdev;
+ struct ionic_lif *lif;
+
+ int lif_index;
+ int lif_hw_index;
+
+ u32 dbid;
+ int dbid_count;
+ u64 __iomem *dbpage;
+ struct ionic_intr __iomem *intr_ctrl;
+ phys_addr_t db_phys;
+
+ u64 page_size_supported;
+ u32 npts_per_lif;
+ u32 nmrs_per_lif;
+ u32 nahs_per_lif;
+
+ u32 aq_base;
+ u32 cq_base;
+ u32 eq_base;
+
+ int aq_count;
+ int eq_count;
+ int cq_count;
+ int qp_count;
+
+ u16 stats_type;
+ u8 aq_qtype;
+ u8 sq_qtype;
+ u8 rq_qtype;
+ u8 cq_qtype;
+ u8 eq_qtype;
+
+ u8 udma_count;
+ u8 udma_qgrp_shift;
+
+ u8 rdma_version;
+ u8 qp_opcodes;
+ u8 admin_opcodes;
+
+ u8 max_stride;
+ bool sq_expdb;
+ bool rq_expdb;
+ u8 expdb_mask;
+};
+
+void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg);
+struct net_device *ionic_lif_netdev(struct ionic_lif *lif);
+void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len);
+u8 ionic_lif_asic_rev(struct ionic_lif *lif);
+
+#endif /* _IONIC_LIF_CFG_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_pgtbl.c b/drivers/infiniband/hw/ionic/ionic_pgtbl.c
new file mode 100644
index 000000000000..e74db73c9246
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_pgtbl.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/mman.h>
+#include <linux/dma-mapping.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va)
+{
+ u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1;
+ u64 dma;
+
+ if (!buf->tbl_pages)
+ return cpu_to_le64(0);
+
+ if (buf->tbl_pages > 1)
+ return cpu_to_le64(buf->tbl_dma);
+
+ if (buf->tbl_buf)
+ dma = le64_to_cpu(buf->tbl_buf[0]);
+ else
+ dma = buf->tbl_dma;
+
+ return cpu_to_le64(dma + (va & pg_mask));
+}
+
+__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va)
+{
+ if (buf->tbl_pages > 1) {
+ u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1;
+
+ return cpu_to_be64(va & pg_mask);
+ }
+
+ return 0;
+}
+
+int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma)
+{
+ if (unlikely(buf->tbl_pages == buf->tbl_limit))
+ return -ENOMEM;
+
+ if (buf->tbl_buf)
+ buf->tbl_buf[buf->tbl_pages] = cpu_to_le64(dma);
+ else
+ buf->tbl_dma = dma;
+
+ ++buf->tbl_pages;
+
+ return 0;
+}
+
+static int ionic_tbl_buf_alloc(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf)
+{
+ int rc;
+
+ buf->tbl_size = buf->tbl_limit * sizeof(*buf->tbl_buf);
+ buf->tbl_buf = kmalloc(buf->tbl_size, GFP_KERNEL);
+ if (!buf->tbl_buf)
+ return -ENOMEM;
+
+ buf->tbl_dma = dma_map_single(dev->lif_cfg.hwdev, buf->tbl_buf,
+ buf->tbl_size, DMA_TO_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, buf->tbl_dma);
+ if (rc) {
+ kfree(buf->tbl_buf);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int ionic_pgtbl_umem(struct ionic_tbl_buf *buf, struct ib_umem *umem)
+{
+ struct ib_block_iter biter;
+ u64 page_dma;
+ int rc;
+
+ rdma_umem_for_each_dma_block(umem, &biter, BIT_ULL(buf->page_size_log2)) {
+ page_dma = rdma_block_iter_dma_address(&biter);
+ rc = ionic_pgtbl_page(buf, page_dma);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf)
+{
+ if (buf->tbl_buf)
+ dma_unmap_single(dev->lif_cfg.hwdev, buf->tbl_dma,
+ buf->tbl_size, DMA_TO_DEVICE);
+
+ kfree(buf->tbl_buf);
+ memset(buf, 0, sizeof(*buf));
+}
+
+int ionic_pgtbl_init(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf,
+ struct ib_umem *umem,
+ dma_addr_t dma,
+ int limit,
+ u64 page_size)
+{
+ int rc;
+
+ memset(buf, 0, sizeof(*buf));
+
+ if (umem) {
+ limit = ib_umem_num_dma_blocks(umem, page_size);
+ buf->page_size_log2 = order_base_2(page_size);
+ }
+
+ if (limit < 1)
+ return -EINVAL;
+
+ buf->tbl_limit = limit;
+
+ /* skip pgtbl if contiguous / direct translation */
+ if (limit > 1) {
+ rc = ionic_tbl_buf_alloc(dev, buf);
+ if (rc)
+ return rc;
+ }
+
+ if (umem)
+ rc = ionic_pgtbl_umem(buf, umem);
+ else
+ rc = ionic_pgtbl_page(buf, dma);
+
+ if (rc)
+ goto err_unbuf;
+
+ return 0;
+
+err_unbuf:
+ ionic_pgtbl_unbuf(dev, buf);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_queue.c b/drivers/infiniband/hw/ionic/ionic_queue.c
new file mode 100644
index 000000000000..aa897ed2a412
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_queue.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/dma-mapping.h>
+
+#include "ionic_queue.h"
+
+int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev,
+ int depth, size_t stride)
+{
+ if (depth < 0 || depth > 0xffff)
+ return -EINVAL;
+
+ if (stride == 0 || stride > 0x10000)
+ return -EINVAL;
+
+ if (depth == 0)
+ depth = 1;
+
+ q->depth_log2 = order_base_2(depth + 1);
+ q->stride_log2 = order_base_2(stride);
+
+ if (q->depth_log2 + q->stride_log2 < PAGE_SHIFT)
+ q->depth_log2 = PAGE_SHIFT - q->stride_log2;
+
+ if (q->depth_log2 > 16 || q->stride_log2 > 16)
+ return -EINVAL;
+
+ q->size = BIT_ULL(q->depth_log2 + q->stride_log2);
+ q->mask = BIT(q->depth_log2) - 1;
+
+ q->ptr = dma_alloc_coherent(dma_dev, q->size, &q->dma, GFP_KERNEL);
+ if (!q->ptr)
+ return -ENOMEM;
+
+ /* it will always be page aligned, but just to be sure... */
+ if (!PAGE_ALIGNED(q->ptr)) {
+ dma_free_coherent(dma_dev, q->size, q->ptr, q->dma);
+ return -ENOMEM;
+ }
+
+ q->prod = 0;
+ q->cons = 0;
+ q->dbell = 0;
+
+ return 0;
+}
+
+void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev)
+{
+ dma_free_coherent(dma_dev, q->size, q->ptr, q->dma);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_queue.h b/drivers/infiniband/hw/ionic/ionic_queue.h
new file mode 100644
index 000000000000..d18020d4cad5
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_queue.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_QUEUE_H_
+#define _IONIC_QUEUE_H_
+
+#include <linux/io.h>
+#include <ionic_regs.h>
+
+#define IONIC_MAX_DEPTH 0xffff
+#define IONIC_MAX_CQ_DEPTH 0xffff
+#define IONIC_CQ_RING_ARM IONIC_DBELL_RING_1
+#define IONIC_CQ_RING_SOL IONIC_DBELL_RING_2
+
+/**
+ * struct ionic_queue - Ring buffer used between device and driver
+ * @size: Size of the buffer, in bytes
+ * @dma: Dma address of the buffer
+ * @ptr: Buffer virtual address
+ * @prod: Driver position in the queue
+ * @cons: Device position in the queue
+ * @mask: Capacity of the queue, subtracting the hole
+ * This value is equal to ((1 << depth_log2) - 1)
+ * @depth_log2: Log base two size depth of the queue
+ * @stride_log2: Log base two size of an element in the queue
+ * @dbell: Doorbell identifying bits
+ */
+struct ionic_queue {
+ size_t size;
+ dma_addr_t dma;
+ void *ptr;
+ u16 prod;
+ u16 cons;
+ u16 mask;
+ u8 depth_log2;
+ u8 stride_log2;
+ u64 dbell;
+};
+
+/**
+ * ionic_queue_init() - Initialize user space queue
+ * @q: Uninitialized queue structure
+ * @dma_dev: DMA device for mapping
+ * @depth: Depth of the queue
+ * @stride: Size of each element of the queue
+ *
+ * Return: status code
+ */
+int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev,
+ int depth, size_t stride);
+
+/**
+ * ionic_queue_destroy() - Destroy user space queue
+ * @q: Queue structure
+ * @dma_dev: DMA device for mapping
+ *
+ * Return: status code
+ */
+void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev);
+
+/**
+ * ionic_queue_empty() - Test if queue is empty
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: is empty
+ */
+static inline bool ionic_queue_empty(struct ionic_queue *q)
+{
+ return q->prod == q->cons;
+}
+
+/**
+ * ionic_queue_length() - Get the current length of the queue
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: length
+ */
+static inline u16 ionic_queue_length(struct ionic_queue *q)
+{
+ return (q->prod - q->cons) & q->mask;
+}
+
+/**
+ * ionic_queue_length_remaining() - Get the remaining length of the queue
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: length remaining
+ */
+static inline u16 ionic_queue_length_remaining(struct ionic_queue *q)
+{
+ return q->mask - ionic_queue_length(q);
+}
+
+/**
+ * ionic_queue_full() - Test if queue is full
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: is full
+ */
+static inline bool ionic_queue_full(struct ionic_queue *q)
+{
+ return q->mask == ionic_queue_length(q);
+}
+
+/**
+ * ionic_color_wrap() - Flip the color if prod is wrapped
+ * @prod: Queue index just after advancing
+ * @color: Queue color just prior to advancing the index
+ *
+ * Return: color after advancing the index
+ */
+static inline bool ionic_color_wrap(u16 prod, bool color)
+{
+ /* logical xor color with (prod == 0) */
+ return color != (prod == 0);
+}
+
+/**
+ * ionic_queue_at() - Get the element at the given index
+ * @q: Queue structure
+ * @idx: Index in the queue
+ *
+ * The index must be within the bounds of the queue. It is not checked here.
+ *
+ * Return: pointer to element at index
+ */
+static inline void *ionic_queue_at(struct ionic_queue *q, u16 idx)
+{
+ return q->ptr + ((unsigned long)idx << q->stride_log2);
+}
+
+/**
+ * ionic_queue_at_prod() - Get the element at the producer index
+ * @q: Queue structure
+ *
+ * Return: pointer to element at producer index
+ */
+static inline void *ionic_queue_at_prod(struct ionic_queue *q)
+{
+ return ionic_queue_at(q, q->prod);
+}
+
+/**
+ * ionic_queue_at_cons() - Get the element at the consumer index
+ * @q: Queue structure
+ *
+ * Return: pointer to element at consumer index
+ */
+static inline void *ionic_queue_at_cons(struct ionic_queue *q)
+{
+ return ionic_queue_at(q, q->cons);
+}
+
+/**
+ * ionic_queue_next() - Compute the next index
+ * @q: Queue structure
+ * @idx: Index
+ *
+ * Return: next index after idx
+ */
+static inline u16 ionic_queue_next(struct ionic_queue *q, u16 idx)
+{
+ return (idx + 1) & q->mask;
+}
+
+/**
+ * ionic_queue_produce() - Increase the producer index
+ * @q: Queue structure
+ *
+ * Caller must ensure that the queue is not full. It is not checked here.
+ */
+static inline void ionic_queue_produce(struct ionic_queue *q)
+{
+ q->prod = ionic_queue_next(q, q->prod);
+}
+
+/**
+ * ionic_queue_consume() - Increase the consumer index
+ * @q: Queue structure
+ *
+ * Caller must ensure that the queue is not empty. It is not checked here.
+ *
+ * This is only valid for to-device queues.
+ */
+static inline void ionic_queue_consume(struct ionic_queue *q)
+{
+ q->cons = ionic_queue_next(q, q->cons);
+}
+
+/**
+ * ionic_queue_consume_entries() - Increase the consumer index by entries
+ * @q: Queue structure
+ * @entries: Number of entries to increment
+ *
+ * Caller must ensure that the queue is not empty. It is not checked here.
+ *
+ * This is only valid for to-device queues.
+ */
+static inline void ionic_queue_consume_entries(struct ionic_queue *q,
+ u16 entries)
+{
+ q->cons = (q->cons + entries) & q->mask;
+}
+
+/**
+ * ionic_queue_dbell_init() - Initialize doorbell bits for queue id
+ * @q: Queue structure
+ * @qid: Queue identifying number
+ */
+static inline void ionic_queue_dbell_init(struct ionic_queue *q, u32 qid)
+{
+ q->dbell = IONIC_DBELL_QID(qid);
+}
+
+/**
+ * ionic_queue_dbell_val() - Get current doorbell update value
+ * @q: Queue structure
+ *
+ * Return: current doorbell update value
+ */
+static inline u64 ionic_queue_dbell_val(struct ionic_queue *q)
+{
+ return q->dbell | q->prod;
+}
+
+#endif /* _IONIC_QUEUE_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_res.h b/drivers/infiniband/hw/ionic/ionic_res.h
new file mode 100644
index 000000000000..46c8c584bd9a
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_res.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_RES_H_
+#define _IONIC_RES_H_
+
+#include <linux/kernel.h>
+#include <linux/idr.h>
+
+/**
+ * struct ionic_resid_bits - Number allocator based on IDA
+ *
+ * @inuse: IDA handle
+ * @inuse_size: Highest ID limit for IDA
+ */
+struct ionic_resid_bits {
+ struct ida inuse;
+ unsigned int inuse_size;
+};
+
+/**
+ * ionic_resid_init() - Initialize a resid allocator
+ * @resid: Uninitialized resid allocator
+ * @size: Capacity of the allocator
+ *
+ * Return: Zero on success, or negative error number
+ */
+static inline void ionic_resid_init(struct ionic_resid_bits *resid,
+ unsigned int size)
+{
+ resid->inuse_size = size;
+ ida_init(&resid->inuse);
+}
+
+/**
+ * ionic_resid_destroy() - Destroy a resid allocator
+ * @resid: Resid allocator
+ */
+static inline void ionic_resid_destroy(struct ionic_resid_bits *resid)
+{
+ ida_destroy(&resid->inuse);
+}
+
+/**
+ * ionic_resid_get_shared() - Allocate an available shared resource id
+ * @resid: Resid allocator
+ * @min: Smallest valid resource id
+ * @size: One after largest valid resource id
+ *
+ * Return: Resource id, or negative error number
+ */
+static inline int ionic_resid_get_shared(struct ionic_resid_bits *resid,
+ unsigned int min,
+ unsigned int size)
+{
+ return ida_alloc_range(&resid->inuse, min, size - 1, GFP_KERNEL);
+}
+
+/**
+ * ionic_resid_get() - Allocate an available resource id
+ * @resid: Resid allocator
+ *
+ * Return: Resource id, or negative error number
+ */
+static inline int ionic_resid_get(struct ionic_resid_bits *resid)
+{
+ return ionic_resid_get_shared(resid, 0, resid->inuse_size);
+}
+
+/**
+ * ionic_resid_put() - Free a resource id
+ * @resid: Resid allocator
+ * @id: Resource id
+ */
+static inline void ionic_resid_put(struct ionic_resid_bits *resid, int id)
+{
+ ida_free(&resid->inuse, id);
+}
+
+/**
+ * ionic_bitid_to_qid() - Transform a resource bit index into a queue id
+ * @bitid: Bit index
+ * @qgrp_shift: Log2 number of queues per queue group
+ * @half_qid_shift: Log2 of half the total number of queues
+ *
+ * Return: Queue id
+ *
+ * Udma-constrained queues (QPs and CQs) are associated with their udma by
+ * queue group. Even queue groups are associated with udma0, and odd queue
+ * groups with udma1.
+ *
+ * For allocating queue ids, we want to arrange the bits into two halves,
+ * with the even queue groups of udma0 in the lower half of the bitset,
+ * and the odd queue groups of udma1 in the upper half of the bitset.
+ * Then, one or two calls of find_next_zero_bit can examine all the bits
+ * for queues of an entire udma.
+ *
+ * For example, assuming eight queue groups with qgrp qids per group:
+ *
+ * bitid 0*qgrp..1*qgrp-1 : qid 0*qgrp..1*qgrp-1
+ * bitid 1*qgrp..2*qgrp-1 : qid 2*qgrp..3*qgrp-1
+ * bitid 2*qgrp..3*qgrp-1 : qid 4*qgrp..5*qgrp-1
+ * bitid 3*qgrp..4*qgrp-1 : qid 6*qgrp..7*qgrp-1
+ * bitid 4*qgrp..5*qgrp-1 : qid 1*qgrp..2*qgrp-1
+ * bitid 5*qgrp..6*qgrp-1 : qid 3*qgrp..4*qgrp-1
+ * bitid 6*qgrp..7*qgrp-1 : qid 5*qgrp..6*qgrp-1
+ * bitid 7*qgrp..8*qgrp-1 : qid 7*qgrp..8*qgrp-1
+ *
+ * There are three important ranges of bits in the qid. There is the udma
+ * bit "U" at qgrp_shift, which is the least significant bit of the group
+ * index, and determines which udma a queue is associated with.
+ * The bits of lesser significance we can call the idx bits "I", which are
+ * the index of the queue within the group. The bits of greater significance
+ * we can call the grp bits "G", which are other bits of the group index that
+ * do not determine the udma. Those bits are just rearranged in the bit index
+ * in the bitset. A bitid has the udma bit in the most significant place,
+ * then the grp bits, then the idx bits.
+ *
+ * bitid: 00000000000000 U GGG IIIIII
+ * qid: 00000000000000 GGG U IIIIII
+ *
+ * Transforming from bit index to qid, or from qid to bit index, can be
+ * accomplished by rearranging the bits by masking and shifting.
+ */
+static inline u32 ionic_bitid_to_qid(u32 bitid, u8 qgrp_shift,
+ u8 half_qid_shift)
+{
+ u32 udma_bit =
+ (bitid & BIT(half_qid_shift)) >> (half_qid_shift - qgrp_shift);
+ u32 grp_bits = (bitid & GENMASK(half_qid_shift - 1, qgrp_shift)) << 1;
+ u32 idx_bits = bitid & (BIT(qgrp_shift) - 1);
+
+ return grp_bits | udma_bit | idx_bits;
+}
+
+/**
+ * ionic_qid_to_bitid() - Transform a queue id into a resource bit index
+ * @qid: queue index
+ * @qgrp_shift: Log2 number of queues per queue group
+ * @half_qid_shift: Log2 of half the total number of queues
+ *
+ * Return: Resource bit index
+ *
+ * This is the inverse of ionic_bitid_to_qid().
+ */
+static inline u32 ionic_qid_to_bitid(u32 qid, u8 qgrp_shift, u8 half_qid_shift)
+{
+ u32 udma_bit = (qid & BIT(qgrp_shift)) << (half_qid_shift - qgrp_shift);
+ u32 grp_bits = (qid & GENMASK(half_qid_shift, qgrp_shift + 1)) >> 1;
+ u32 idx_bits = qid & (BIT(qgrp_shift) - 1);
+
+ return udma_bit | grp_bits | idx_bits;
+}
+#endif /* _IONIC_RES_H_ */
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
index 5f49a58590ed..0bd7e3fca1fb 100644
--- a/drivers/infiniband/hw/irdma/Kconfig
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -4,10 +4,11 @@ config INFINIBAND_IRDMA
depends on INET
depends on IPV6 || !IPV6
depends on PCI
- depends on ICE && I40E
+ depends on IDPF && ICE && I40E
select GENERIC_ALLOCATOR
select AUXILIARY_BUS
select CRC32
help
- This is an Intel(R) Ethernet Protocol Driver for RDMA driver
- that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
+ This is an Intel(R) Ethernet Protocol Driver for RDMA that
+ supports IPU E2000 (RoCEv2), E810 (iWARP/RoCEv2) and X722 (iWARP)
+ network devices.
diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile
index 48c3854235a0..03ceb9e5475f 100644
--- a/drivers/infiniband/hw/irdma/Makefile
+++ b/drivers/infiniband/hw/irdma/Makefile
@@ -13,7 +13,10 @@ irdma-objs := cm.o \
hw.o \
i40iw_hw.o \
i40iw_if.o \
+ ig3rdma_if.o\
+ icrdma_if.o \
icrdma_hw.o \
+ ig3rdma_hw.o\
main.o \
pble.o \
puda.o \
@@ -22,6 +25,7 @@ irdma-objs := cm.o \
uk.o \
utils.o \
verbs.o \
+ virtchnl.o \
ws.o \
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index 99a7f1a6c0b5..4ef1c29032f7 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -74,6 +74,14 @@ static void irdma_set_qos_info(struct irdma_sc_vsi *vsi,
{
u8 i;
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ vsi->qos[i].qs_handle = vsi->dev->qos[i].qs_handle;
+ vsi->qos[i].valid = true;
+ }
+
+ return;
+ }
vsi->qos_rel_bw = l2p->vsi_rel_bw;
vsi->qos_prio_type = l2p->vsi_prio_type;
vsi->dscp_mode = l2p->dscp_mode;
@@ -404,7 +412,8 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info)
pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) ||
- (info->virtual_map && info->rq_pa >= pble_obj_cnt))
+ (!info->qp_uk_init_info.srq_uk &&
+ info->virtual_map && info->rq_pa >= pble_obj_cnt))
return -EINVAL;
qp->llp_stream_handle = (void *)(-1);
@@ -439,6 +448,208 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info)
}
/**
+ * irdma_sc_srq_init - init sc_srq structure
+ * @srq: srq sc struct
+ * @info: parameters for srq init
+ */
+int irdma_sc_srq_init(struct irdma_sc_srq *srq,
+ struct irdma_srq_init_info *info)
+{
+ u32 srq_size_quanta;
+ int ret_code;
+
+ ret_code = irdma_uk_srq_init(&srq->srq_uk, &info->srq_uk_init_info);
+ if (ret_code)
+ return ret_code;
+
+ srq->dev = info->pd->dev;
+ srq->pd = info->pd;
+ srq->vsi = info->vsi;
+ srq->srq_pa = info->srq_pa;
+ srq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ srq->pasid = info->pasid;
+ srq->pasid_valid = info->pasid_valid;
+ srq->srq_limit = info->srq_limit;
+ srq->leaf_pbl_size = info->leaf_pbl_size;
+ srq->virtual_map = info->virtual_map;
+ srq->tph_en = info->tph_en;
+ srq->arm_limit_event = info->arm_limit_event;
+ srq->tph_val = info->tph_value;
+ srq->shadow_area_pa = info->shadow_area_pa;
+
+ /* Smallest SRQ size is 256B i.e. 8 quanta */
+ srq_size_quanta = max((u32)IRDMA_SRQ_MIN_QUANTA,
+ srq->srq_uk.srq_size *
+ srq->srq_uk.wqe_size_multiplier);
+ srq->hw_srq_size = irdma_get_encoded_wqe_size(srq_size_quanta,
+ IRDMA_QUEUE_TYPE_SRQ);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_create - send srq create CQP WQE
+ * @srq: srq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_create(struct irdma_sc_srq *srq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->pd->dev->cqp;
+ if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id ||
+ srq->srq_uk.srq_id >
+ (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1))
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, srq->srq_limit) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size));
+ set_64bit_val(wqe, 8, (uintptr_t)srq);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR,
+ srq->srq_pa >>
+ IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR,
+ srq->shadow_area_pa >>
+ IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX,
+ srq->first_pm_pbl_idx));
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT,
+ srq->arm_limit_event) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_modify - send modify_srq CQP WQE
+ * @srq: srq sc struct
+ * @info: parameters for srq modification
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_modify(struct irdma_sc_srq *srq,
+ struct irdma_modify_srq_info *info, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->dev->cqp;
+ if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id ||
+ srq->srq_uk.srq_id >
+ (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1))
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, info->srq_limit) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQCTX, srq->srq_uk.srq_id));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR,
+ srq->srq_pa >>
+ IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR,
+ srq->shadow_area_pa >>
+ IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX,
+ srq->first_pm_pbl_idx));
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT,
+ info->arm_limit_event) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_destroy - send srq_destroy CQP WQE
+ * @srq: srq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_destroy(struct irdma_sc_srq *srq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->dev->cqp;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8, (uintptr_t)srq);
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
* irdma_sc_qp_create - create qp
* @qp: sc qp
* @info: qp create info
@@ -629,13 +840,14 @@ static u8 irdma_sc_get_encoded_ird_size(u16 ird_size)
}
/**
- * irdma_sc_qp_setctx_roce - set qp's context
+ * irdma_sc_qp_setctx_roce_gen_2 - set qp's context
* @qp: sc qp
* @qp_ctx: context ptr
* @info: ctx info
*/
-void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
- struct irdma_qp_host_ctx_info *info)
+static void irdma_sc_qp_setctx_roce_gen_2(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
{
struct irdma_roce_offload_info *roce_info;
struct irdma_udp_offload_info *udp;
@@ -753,6 +965,189 @@ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
}
+/**
+ * irdma_sc_get_encoded_ird_size_gen_3 - get encoded IRD size for GEN 3
+ * @ird_size: IRD size
+ * The ird from the connection is rounded to a supported HW setting and then encoded
+ * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based
+ * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input.
+ */
+static u8 irdma_sc_get_encoded_ird_size_gen_3(u16 ird_size)
+{
+ switch (ird_size ?
+ roundup_pow_of_two(2 * ird_size) : 4) {
+ case 4096:
+ return IRDMA_IRD_HW_SIZE_4096_GEN3;
+ case 2048:
+ return IRDMA_IRD_HW_SIZE_2048_GEN3;
+ case 1024:
+ return IRDMA_IRD_HW_SIZE_1024_GEN3;
+ case 512:
+ return IRDMA_IRD_HW_SIZE_512_GEN3;
+ case 256:
+ return IRDMA_IRD_HW_SIZE_256_GEN3;
+ case 128:
+ return IRDMA_IRD_HW_SIZE_128_GEN3;
+ case 64:
+ return IRDMA_IRD_HW_SIZE_64_GEN3;
+ case 32:
+ return IRDMA_IRD_HW_SIZE_32_GEN3;
+ case 16:
+ return IRDMA_IRD_HW_SIZE_16_GEN3;
+ case 8:
+ return IRDMA_IRD_HW_SIZE_8_GEN3;
+ case 4:
+ default:
+ break;
+ }
+
+ return IRDMA_IRD_HW_SIZE_4_GEN3;
+}
+
+/**
+ * irdma_sc_qp_setctx_roce_gen_3 - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ struct irdma_roce_offload_info *roce_info = info->roce_info;
+ struct irdma_udp_offload_info *udp = info->udp_info;
+ u64 qw0, qw3, qw7 = 0, qw8 = 0;
+ u8 push_mode_en;
+ u32 push_idx;
+
+ qp->user_pri = info->user_pri;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ push_mode_en = 0;
+ push_idx = 0;
+ } else {
+ push_mode_en = 1;
+ push_idx = qp->push_idx;
+ }
+
+ qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) |
+ FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) |
+ FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) |
+ FIELD_PREP(IRDMAQPC_USE_SRQ, !qp->qp_uk.srq_uk ? 0 : 1) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag);
+ set_64bit_val(qp_ctx, 0, qw0);
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+ qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) |
+ FIELD_PREP(IRDMAQPC_TTL, udp->ttl) |
+ FIELD_PREP(IRDMAQPC_TOS, udp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port);
+ set_64bit_val(qp_ctx, 24, qw3);
+ set_64bit_val(qp_ctx, 32,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR2, udp->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR3, udp->dest_ip_addr[3]));
+ set_64bit_val(qp_ctx, 40,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR0, udp->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR1, udp->dest_ip_addr[1]));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, udp->snd_mss) |
+ FIELD_PREP(IRDMAQPC_VLANTAG, udp->vlan_tag) |
+ FIELD_PREP(IRDMAQPC_ARPIDX, udp->arp_idx));
+ qw7 = FIELD_PREP(IRDMAQPC_PKEY, roce_info->p_key) |
+ FIELD_PREP(IRDMAQPC_ACKCREDITS, roce_info->ack_credits) |
+ FIELD_PREP(IRDMAQPC_FLOWLABEL, udp->flow_label);
+ set_64bit_val(qp_ctx, 56, qw7);
+ qw8 = FIELD_PREP(IRDMAQPC_QKEY, roce_info->qkey) |
+ FIELD_PREP(IRDMAQPC_DESTQP, roce_info->dest_qp);
+ set_64bit_val(qp_ctx, 64, qw8);
+ set_64bit_val(qp_ctx, 80,
+ FIELD_PREP(IRDMAQPC_PSNNXT, udp->psn_nxt) |
+ FIELD_PREP(IRDMAQPC_LSN, udp->lsn));
+ set_64bit_val(qp_ctx, 88,
+ FIELD_PREP(IRDMAQPC_EPSN, udp->epsn));
+ set_64bit_val(qp_ctx, 96,
+ FIELD_PREP(IRDMAQPC_PSNMAX, udp->psn_max) |
+ FIELD_PREP(IRDMAQPC_PSNUNA, udp->psn_una));
+ set_64bit_val(qp_ctx, 112,
+ FIELD_PREP(IRDMAQPC_CWNDROCE, udp->cwnd));
+ set_64bit_val(qp_ctx, 128,
+ FIELD_PREP(IRDMAQPC_MINRNR_TIMER, udp->min_rnr_timer) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_THRESH, udp->rnr_nak_thresh) |
+ FIELD_PREP(IRDMAQPC_REXMIT_THRESH, udp->rexmit_thresh) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_TMR, udp->rnr_nak_tmr) |
+ FIELD_PREP(IRDMAQPC_RTOMIN, roce_info->rtomin));
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num));
+ set_64bit_val(qp_ctx, 152,
+ FIELD_PREP(IRDMAQPC_MACADDRESS,
+ ether_addr_to_u64(roce_info->mac_addr)) |
+ FIELD_PREP(IRDMAQPC_LOCALACKTIMEOUT,
+ roce_info->local_ack_timeout));
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_ORDSIZE_GEN3, roce_info->ord_size) |
+ FIELD_PREP(IRDMAQPC_IRDSIZE_GEN3,
+ irdma_sc_get_encoded_ird_size_gen_3(roce_info->ird_size)) |
+ FIELD_PREP(IRDMAQPC_WRRDRSPOK, roce_info->wr_rdresp_en) |
+ FIELD_PREP(IRDMAQPC_RDOK, roce_info->rd_en) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE,
+ info->stats_idx_valid) |
+ FIELD_PREP(IRDMAQPC_BINDEN, roce_info->bind_en) |
+ FIELD_PREP(IRDMAQPC_FASTREGEN, roce_info->fast_reg_en) |
+ FIELD_PREP(IRDMAQPC_DCQCNENABLE, roce_info->dcqcn_en) |
+ FIELD_PREP(IRDMAQPC_RCVNOICRC, roce_info->rcv_no_icrc) |
+ FIELD_PREP(IRDMAQPC_FW_CC_ENABLE,
+ roce_info->fw_cc_enable) |
+ FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE,
+ roce_info->udprivcq_en) |
+ FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
+ FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle));
+ set_64bit_val(qp_ctx, 184,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, udp->local_ipaddr[3]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, udp->local_ipaddr[2]));
+ set_64bit_val(qp_ctx, 192,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, udp->local_ipaddr[1]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0]));
+ set_64bit_val(qp_ctx, 200,
+ FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) |
+ FIELD_PREP(IRDMAQPC_SRQ_ID,
+ !qp->qp_uk.srq_uk ?
+ 0 : qp->qp_uk.srq_uk->srq_id) |
+ FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low));
+ set_64bit_val(qp_ctx, 208, roce_info->pd_id |
+ FIELD_PREP(IRDMAQPC_STAT_INDEX_GEN3, info->stats_idx) |
+ FIELD_PREP(IRDMAQPC_PKT_LIMIT, qp->pkt_limit));
+
+ print_hex_dump_debug("WQE: QP_HOST ROCE CTX WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2)
+ irdma_sc_qp_setctx_roce_gen_2(qp, qp_ctx, info);
+ else
+ irdma_sc_qp_setctx_roce_gen_3(qp, qp_ctx, info);
+}
+
/* irdma_sc_alloc_local_mac_entry - allocate a mac entry
* @cqp: struct for cqp hw
* @scratch: u64 saved to be used during cqp completion
@@ -1080,7 +1475,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) |
FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len));
set_64bit_val(wqe, 16,
- FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx));
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
set_64bit_val(wqe, 40,
FIELD_PREP(IRDMA_CQPSQ_STAG_HMCFNIDX, info->hmc_fcn_index));
@@ -1096,6 +1492,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) |
FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) |
FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
@@ -1165,6 +1563,7 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
set_64bit_val(wqe, 16,
FIELD_PREP(IRDMA_CQPSQ_STAG_KEY, info->stag_key) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18) |
FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx));
if (!info->chunk_size) {
set_64bit_val(wqe, 32, info->reg_addr_pa);
@@ -1187,6 +1586,8 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) |
FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) |
FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
@@ -1223,7 +1624,8 @@ static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev,
set_64bit_val(wqe, 8,
FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
set_64bit_val(wqe, 16,
- FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx));
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DEALLOC_STAG) |
FIELD_PREP(IRDMA_CQPSQ_STAG_MR, info->mr) |
@@ -1263,7 +1665,8 @@ static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev,
set_64bit_val(wqe, 8,
FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
set_64bit_val(wqe, 16,
- FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->mw_stag_index));
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->mw_stag_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) |
FIELD_PREP(IRDMA_CQPSQ_STAG_MWTYPE, info->mw_wide) |
@@ -1343,6 +1746,7 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_ATOMICS_EN, info->remote_atomics_en) |
FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
@@ -1873,7 +2277,7 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
mutex_init(&vsi->qos[i].qos_mutex);
INIT_LIST_HEAD(&vsi->qos[i].qplist);
}
- if (vsi->register_qset) {
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) {
vsi->dev->ws_add = irdma_ws_add;
vsi->dev->ws_remove = irdma_ws_remove;
vsi->dev->ws_reset = irdma_ws_reset;
@@ -1888,7 +2292,7 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
* irdma_get_stats_idx - Return stats index
* @vsi: pointer to the vsi
*/
-static u8 irdma_get_stats_idx(struct irdma_sc_vsi *vsi)
+static u16 irdma_get_stats_idx(struct irdma_sc_vsi *vsi)
{
struct irdma_stats_inst_info stats_info = {};
struct irdma_sc_dev *dev = vsi->dev;
@@ -1964,12 +2368,13 @@ int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
(void *)((uintptr_t)stats_buff_mem->va +
IRDMA_GATHER_STATS_BUF_SIZE);
- irdma_hw_stats_start_timer(vsi);
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ irdma_hw_stats_start_timer(vsi);
/* when stat allocation is not required default to fcn_id. */
vsi->stats_idx = info->fcn_id;
if (info->alloc_stats_inst) {
- u8 stats_idx = irdma_get_stats_idx(vsi);
+ u16 stats_idx = irdma_get_stats_idx(vsi);
if (stats_idx != IRDMA_INVALID_STATS_IDX) {
vsi->stats_inst_alloc = true;
@@ -1993,7 +2398,7 @@ void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi)
{
struct irdma_stats_inst_info stats_info = {};
struct irdma_sc_dev *dev = vsi->dev;
- u8 stats_idx = vsi->stats_idx;
+ u16 stats_idx = vsi->stats_idx;
if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
if (vsi->stats_inst_alloc) {
@@ -2009,7 +2414,9 @@ void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi)
if (!vsi->pestat)
return;
- irdma_hw_stats_stop_timer(vsi);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ irdma_hw_stats_stop_timer(vsi);
dma_free_coherent(vsi->pestat->hw->device,
vsi->pestat->gather_info.stats_buff_mem.size,
vsi->pestat->gather_info.stats_buff_mem.va,
@@ -2026,6 +2433,14 @@ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type)
{
u8 encoded_size = 0;
+ if (queue_type == IRDMA_QUEUE_TYPE_SRQ) {
+ /* Smallest SRQ size is 256B (8 quanta) that gets
+ * encoded to 0.
+ */
+ encoded_size = ilog2(wqsize) - 3;
+
+ return encoded_size;
+ }
/* cqp sq's hw coded value starts from 1 for size of 4
* while it starts from 0 for qp' wq's.
*/
@@ -2259,6 +2674,12 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
info->ae_src) : 0;
set_64bit_val(wqe, 8, temp);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, info->err_sq_idx));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, info->err_rq_idx));
+ }
hdr = qp->qp_uk.qp_id |
FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) |
@@ -2267,6 +2688,9 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) |
FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) |
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, info->err_sq_idx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, info->err_rq_idx_valid);
dma_wmb(); /* make sure WQE is written before valid bit is set */
set_64bit_val(wqe, 24, hdr);
@@ -2562,6 +2986,9 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch,
FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) |
FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, check_overflow) |
FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CQID_HIGH, cq->cq_uk.cq_id >> 22) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQID_HIGH,
+ (cq->ceq_id_valid ? cq->ceq_id : 0) >> 10) |
FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) |
FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
@@ -2706,6 +3133,41 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
}
/**
+ * irdma_sc_get_decoded_ird_size_gen_3 - get decoded IRD size for GEN 3
+ * @ird_enc: IRD encoding
+ * IRD size defaults to a value of 4 in case of invalid input.
+ */
+static u16 irdma_sc_get_decoded_ird_size_gen_3(u8 ird_enc)
+{
+ switch (ird_enc) {
+ case IRDMA_IRD_HW_SIZE_4096_GEN3:
+ return 4096;
+ case IRDMA_IRD_HW_SIZE_2048_GEN3:
+ return 2048;
+ case IRDMA_IRD_HW_SIZE_1024_GEN3:
+ return 1024;
+ case IRDMA_IRD_HW_SIZE_512_GEN3:
+ return 512;
+ case IRDMA_IRD_HW_SIZE_256_GEN3:
+ return 256;
+ case IRDMA_IRD_HW_SIZE_128_GEN3:
+ return 128;
+ case IRDMA_IRD_HW_SIZE_64_GEN3:
+ return 64;
+ case IRDMA_IRD_HW_SIZE_32_GEN3:
+ return 32;
+ case IRDMA_IRD_HW_SIZE_16_GEN3:
+ return 16;
+ case IRDMA_IRD_HW_SIZE_8_GEN3:
+ return 8;
+ case IRDMA_IRD_HW_SIZE_4_GEN3:
+ return 4;
+ default:
+ return 4;
+ }
+}
+
+/**
* irdma_check_cqp_progress - check cqp processing progress
* @timeout: timeout info struct
* @dev: sc device struct
@@ -2738,6 +3200,89 @@ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val,
}
/**
+ * irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending list
+ * @dev: sc device struct
+ * @info: AE entry info
+ * @first: true if this is the first call to this handler for given AEQE
+ * @scratch: (out) scratch entry pointer
+ * @sw_def_info: (in/out) SW ticket value for this AE
+ *
+ * In case of AE_DEF_CMPL event, this function should be called in a loop
+ * until it returns NULL-ptr via scratch.
+ * For each call, it looks for a matching CQP request on pending list,
+ * removes it from the list and returns the pointer to the associated scratch
+ * entry.
+ * If this is the first call to this function for given AEQE, sw_def_info
+ * value is not used to find matching requests. Instead, it is populated
+ * with the value from the first matching cqp_request on the list.
+ * For subsequent calls, ooo_op->sw_def_info need to match the value passed
+ * by a caller.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if no matching request is found.
+ */
+void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
+ struct irdma_aeqe_info *info,
+ bool first, u64 *scratch,
+ u32 *sw_def_info)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ *scratch = 0;
+
+ spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags);
+ list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+ if (ooo_op->deferred &&
+ ((first && ooo_op->def_info == info->def_info) ||
+ (!first && ooo_op->sw_def_info == *sw_def_info))) {
+ *sw_def_info = ooo_op->sw_def_info;
+ *scratch = ooo_op->scratch;
+
+ list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+ atomic64_inc(&dev->cqp->completed_ops);
+
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags);
+
+ if (first && !*scratch)
+ ibdev_dbg(to_ibdev(dev),
+ "AEQ: deferred completion with unknown ticket: def_info 0x%x\n",
+ info->def_info);
+}
+
+/**
+ * irdma_sc_cqp_cleanup_handler - remove requests from pending list
+ * @dev: sc device struct
+ *
+ * This function should be called in a loop from irdma_cleanup_pending_cqp_op.
+ * For each call, it returns first CQP request on pending list, removes it
+ * from the list and returns the pointer to the associated scratch entry.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if pending list is empty.
+ */
+u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ u64 scratch = 0;
+
+ list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+ scratch = ooo_op->scratch;
+
+ list_del(&ooo_op->list_entry);
+ list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+ atomic64_inc(&dev->cqp->completed_ops);
+
+ break;
+ }
+
+ return scratch;
+}
+
+/**
* irdma_cqp_poll_registers - poll cqp registers
* @cqp: struct for cqp hw
* @tail: wqtail register value
@@ -2794,7 +3339,10 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf,
obj_info[rsrc_idx].cnt = (u32)FLD_RS_64(dev, temp, IRDMA_COMMIT_FPM_CQCNT);
break;
case IRDMA_HMC_IW_APBVT_ENTRY:
- obj_info[rsrc_idx].cnt = 1;
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ obj_info[rsrc_idx].cnt = 1;
+ else
+ obj_info[rsrc_idx].cnt = 0;
break;
default:
obj_info[rsrc_idx].cnt = (u32)temp;
@@ -2829,7 +3377,8 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
IRDMA_HMC_IW_QP);
irdma_sc_decode_fpm_commit(dev, buf, 8, info,
IRDMA_HMC_IW_CQ);
- /* skiping RSRVD */
+ irdma_sc_decode_fpm_commit(dev, buf, 16, info,
+ IRDMA_HMC_IW_SRQ);
irdma_sc_decode_fpm_commit(dev, buf, 24, info,
IRDMA_HMC_IW_HTE);
irdma_sc_decode_fpm_commit(dev, buf, 32, info,
@@ -2864,15 +3413,17 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
IRDMA_HMC_IW_HDR);
irdma_sc_decode_fpm_commit(dev, buf, 152, info,
IRDMA_HMC_IW_MD);
- irdma_sc_decode_fpm_commit(dev, buf, 160, info,
- IRDMA_HMC_IW_OOISC);
- irdma_sc_decode_fpm_commit(dev, buf, 168, info,
- IRDMA_HMC_IW_OOISCFFL);
+ if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) {
+ irdma_sc_decode_fpm_commit(dev, buf, 160, info,
+ IRDMA_HMC_IW_OOISC);
+ irdma_sc_decode_fpm_commit(dev, buf, 168, info,
+ IRDMA_HMC_IW_OOISCFFL);
+ }
}
/* searching for the last object in HMC to find the size of the HMC area. */
for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) {
- if (info[i].base > max_base) {
+ if (info[i].base > max_base && info[i].cnt) {
max_base = info[i].base;
last_hmc_obj = i;
}
@@ -2927,6 +3478,7 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
struct irdma_hmc_fpm_misc *hmc_fpm_misc)
{
struct irdma_hmc_obj_info *obj_info;
+ u8 ird_encoding;
u64 temp;
u32 size;
u16 max_pe_sds;
@@ -2935,7 +3487,19 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
get_64bit_val(buf, 0, &temp);
hmc_info->first_sd_index = (u16)FIELD_GET(IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX, temp);
- max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS, temp);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3, temp);
+ else
+ max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS, temp);
+
+ /* Reduce SD count for unprivleged functions by 1 to account for PBLE
+ * backing page rounding
+ */
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2 &&
+ (hmc_info->hmc_fn_id >= dev->hw_attrs.first_hw_vf_fpm_id ||
+ !dev->privileged))
+ max_pe_sds--;
hmc_fpm_misc->max_sds = max_pe_sds;
hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
@@ -2949,11 +3513,17 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
size = (u32)(temp >> 32);
obj_info[IRDMA_HMC_IW_CQ].size = BIT_ULL(size);
+ irdma_sc_decode_fpm_query(buf, 24, obj_info, IRDMA_HMC_IW_SRQ);
irdma_sc_decode_fpm_query(buf, 32, obj_info, IRDMA_HMC_IW_HTE);
irdma_sc_decode_fpm_query(buf, 40, obj_info, IRDMA_HMC_IW_ARP);
- obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 8192;
- obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 1;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 0;
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 0;
+ } else {
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 8192;
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 1;
+ }
irdma_sc_decode_fpm_query(buf, 48, obj_info, IRDMA_HMC_IW_MR);
irdma_sc_decode_fpm_query(buf, 56, obj_info, IRDMA_HMC_IW_XF);
@@ -2962,7 +3532,7 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
obj_info[IRDMA_HMC_IW_XFFL].max_cnt = (u32)temp;
obj_info[IRDMA_HMC_IW_XFFL].size = 4;
hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp);
- if (!hmc_fpm_misc->xf_block_size)
+ if (obj_info[IRDMA_HMC_IW_XF].max_cnt && !hmc_fpm_misc->xf_block_size)
return -EINVAL;
irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1);
@@ -2984,6 +3554,14 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
hmc_fpm_misc->max_ceqs = FIELD_GET(IRDMA_QUERY_FPM_MAX_CEQS, temp);
hmc_fpm_misc->ht_multiplier = FIELD_GET(IRDMA_QUERY_FPM_HTMULTIPLIER, temp);
hmc_fpm_misc->timer_bucket = FIELD_GET(IRDMA_QUERY_FPM_TIMERBUCKET, temp);
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2,
+ dev->feature_info[IRDMA_FTN_FLAGS])) {
+ ird_encoding = (u8)FIELD_GET(IRDMA_QUERY_FPM_MAX_IRD, temp);
+ hmc_fpm_misc->ird =
+ irdma_sc_get_decoded_ird_size_gen_3(ird_encoding) / 2;
+ dev->hw_attrs.max_hw_ird = hmc_fpm_misc->ird;
+ dev->hw_attrs.max_hw_ord = hmc_fpm_misc->ird;
+ }
if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
return 0;
irdma_sc_decode_fpm_query(buf, 96, obj_info, IRDMA_HMC_IW_FSIMC);
@@ -3000,15 +3578,25 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR);
irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD);
- irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC);
-
- get_64bit_val(buf, 168, &temp);
- obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt = (u32)temp;
- obj_info[IRDMA_HMC_IW_OOISCFFL].size = 4;
- hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp);
- if (!hmc_fpm_misc->ooiscf_block_size &&
- obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt)
- return -EINVAL;
+
+ if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) {
+ irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC);
+
+ get_64bit_val(buf, 168, &temp);
+ obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_OOISCFFL].size = 4;
+ hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp);
+ if (!hmc_fpm_misc->ooiscf_block_size &&
+ obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt)
+ return -EINVAL;
+ }
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ get_64bit_val(buf, 176, &temp);
+ hmc_fpm_misc->loc_mem_pages = (u32)FIELD_GET(IRDMA_QUERY_FPM_LOC_MEM_PAGES, temp);
+ if (!hmc_fpm_misc->loc_mem_pages)
+ return -EINVAL;
+ }
return 0;
}
@@ -3088,6 +3676,8 @@ exit:
int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
struct irdma_cqp_init_info *info)
{
+ struct irdma_ooo_cqp_op *ooo_op;
+ u32 num_ooo_ops;
u8 hw_sq_size;
if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 ||
@@ -3118,17 +3708,43 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
cqp->rocev2_rto_policy = info->rocev2_rto_policy;
cqp->protocol_used = info->protocol_used;
memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params));
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ cqp->ooisc_blksize = info->ooisc_blksize;
+ cqp->rrsp_blksize = info->rrsp_blksize;
+ cqp->q1_blksize = info->q1_blksize;
+ cqp->xmit_blksize = info->xmit_blksize;
+ cqp->blksizes_valid = info->blksizes_valid;
+ cqp->ts_shift = info->ts_shift;
+ cqp->ts_override = info->ts_override;
+ cqp->en_fine_grained_timers = info->en_fine_grained_timers;
+ cqp->pe_en_vf_cnt = info->pe_en_vf_cnt;
+ cqp->ooo_op_array = info->ooo_op_array;
+ /* initialize the OOO lists */
+ INIT_LIST_HEAD(&cqp->ooo_avail);
+ INIT_LIST_HEAD(&cqp->ooo_pnd);
+ if (cqp->ooo_op_array) {
+ /* Populate avail list entries */
+ for (num_ooo_ops = 0, ooo_op = info->ooo_op_array;
+ num_ooo_ops < cqp->sq_size;
+ num_ooo_ops++, ooo_op++)
+ list_add(&ooo_op->list_entry, &cqp->ooo_avail);
+ }
+ }
info->dev->cqp = cqp;
IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size);
+ cqp->last_def_cmpl_ticket = 0;
+ cqp->sw_def_cmpl_ticket = 0;
cqp->requested_ops = 0;
atomic64_set(&cqp->completed_ops, 0);
/* for the cqp commands backlog. */
INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head);
writel(0, cqp->dev->hw_regs[IRDMA_CQPTAIL]);
- writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]);
- writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) {
+ writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]);
+ writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ }
ibdev_dbg(to_ibdev(cqp->dev),
"WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n",
@@ -3160,6 +3776,7 @@ int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err)
return -ENOMEM;
spin_lock_init(&cqp->dev->cqp_lock);
+ spin_lock_init(&cqp->ooo_list_lock);
temp = FIELD_PREP(IRDMA_CQPHC_SQSIZE, cqp->hw_sq_size) |
FIELD_PREP(IRDMA_CQPHC_SVER, cqp->struct_ver) |
@@ -3171,12 +3788,29 @@ int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err)
FIELD_PREP(IRDMA_CQPHC_PROTOCOL_USED,
cqp->protocol_used);
}
+ if (hw_rev >= IRDMA_GEN_3)
+ temp |= FIELD_PREP(IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS,
+ cqp->en_fine_grained_timers);
set_64bit_val(cqp->host_ctx, 0, temp);
set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) |
FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile);
+
+ if (hw_rev >= IRDMA_GEN_3)
+ temp |= FIELD_PREP(IRDMA_CQPHC_OOISC_BLKSIZE,
+ cqp->ooisc_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_RRSP_BLKSIZE,
+ cqp->rrsp_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_Q1_BLKSIZE, cqp->q1_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_XMIT_BLKSIZE,
+ cqp->xmit_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_BLKSIZES_VALID,
+ cqp->blksizes_valid) |
+ FIELD_PREP(IRDMA_CQPHC_TIMESTAMP_OVERRIDE,
+ cqp->ts_override) |
+ FIELD_PREP(IRDMA_CQPHC_TS_SHIFT, cqp->ts_shift);
set_64bit_val(cqp->host_ctx, 16, temp);
set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
temp = FIELD_PREP(IRDMA_CQPHC_HW_MAJVER, cqp->hw_maj_ver) |
@@ -3338,6 +3972,87 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
}
/**
+ * irdma_sc_process_def_cmpl - process deferred or pending completion
+ * @cqp: CQP sc struct
+ * @info: CQP CQE info
+ * @wqe_idx: CQP WQE descriptor index
+ * @def_info: deferred op ticket value or out-of-order completion id
+ * @def_cmpl: true for deferred completion, false for pending (RCA)
+ */
+static void irdma_sc_process_def_cmpl(struct irdma_sc_cqp *cqp,
+ struct irdma_ccq_cqe_info *info,
+ u32 wqe_idx, u32 def_info, bool def_cmpl)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ /* Deferred and out-of-order completions share the same list of pending
+ * completions. Since the list can be also accessed from AE handler,
+ * it must be protected by a lock.
+ */
+ spin_lock_irqsave(&cqp->ooo_list_lock, flags);
+
+ /* For deferred completions bump up SW completion ticket value. */
+ if (def_cmpl) {
+ cqp->last_def_cmpl_ticket = def_info;
+ cqp->sw_def_cmpl_ticket++;
+ }
+ if (!list_empty(&cqp->ooo_avail)) {
+ ooo_op = (struct irdma_ooo_cqp_op *)
+ list_entry(cqp->ooo_avail.next,
+ struct irdma_ooo_cqp_op, list_entry);
+
+ list_del(&ooo_op->list_entry);
+ ooo_op->scratch = info->scratch;
+ ooo_op->def_info = def_info;
+ ooo_op->sw_def_info = cqp->sw_def_cmpl_ticket;
+ ooo_op->deferred = def_cmpl;
+ ooo_op->wqe_idx = wqe_idx;
+ /* Pending completions must be chronologically ordered,
+ * so adding at the end of list.
+ */
+ list_add_tail(&ooo_op->list_entry, &cqp->ooo_pnd);
+ }
+ spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
+
+ info->pending = true;
+}
+
+/**
+ * irdma_sc_process_ooo_cmpl - process out-of-order (final) completion
+ * @cqp: CQP sc struct
+ * @info: CQP CQE info
+ * @def_info: out-of-order completion id
+ */
+static void irdma_sc_process_ooo_cmpl(struct irdma_sc_cqp *cqp,
+ struct irdma_ccq_cqe_info *info,
+ u32 def_info)
+{
+ struct irdma_ooo_cqp_op *ooo_op_tmp;
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ info->scratch = 0;
+
+ spin_lock_irqsave(&cqp->ooo_list_lock, flags);
+ list_for_each_entry_safe(ooo_op, ooo_op_tmp, &cqp->ooo_pnd,
+ list_entry) {
+ if (!ooo_op->deferred && ooo_op->def_info == def_info) {
+ list_del(&ooo_op->list_entry);
+ info->scratch = ooo_op->scratch;
+ list_add(&ooo_op->list_entry, &cqp->ooo_avail);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
+
+ if (!info->scratch)
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "CQP: DEBUG_FW_OOO out-of-order completion with unknown def_info = 0x%x\n",
+ def_info);
+}
+
+/**
* irdma_sc_ccq_get_cqe_info - get ccq's cq entry
* @ccq: ccq sc struct
* @info: completion q entry to return
@@ -3345,6 +4060,10 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
struct irdma_ccq_cqe_info *info)
{
+ u32 def_info;
+ bool def_cmpl = false;
+ bool pend_cmpl = false;
+ bool ooo_final_cmpl = false;
u64 qp_ctx, temp, temp1;
__le64 *cqe;
struct irdma_sc_cqp *cqp;
@@ -3352,6 +4071,7 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
u32 error;
u8 polarity;
int ret_code = 0;
+ unsigned long flags;
if (ccq->cq_uk.avoid_mem_cflct)
cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk);
@@ -3383,6 +4103,25 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
get_64bit_val(cqe, 16, &temp1);
info->op_ret_val = (u32)FIELD_GET(IRDMA_CCQ_OPRETVAL, temp1);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ def_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
+ info->min_err_code == IRDMA_CQPSQ_MIN_DEF_CMPL;
+ def_info = (u32)FIELD_GET(IRDMA_CCQ_DEFINFO, temp1);
+
+ pend_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
+ info->min_err_code == IRDMA_CQPSQ_MIN_OOO_CMPL;
+
+ ooo_final_cmpl = (bool)FIELD_GET(IRDMA_OOO_CMPL, temp);
+
+ if (def_cmpl || pend_cmpl || ooo_final_cmpl) {
+ if (ooo_final_cmpl)
+ irdma_sc_process_ooo_cmpl(cqp, info, def_info);
+ else
+ irdma_sc_process_def_cmpl(cqp, info, wqe_idx,
+ def_info, def_cmpl);
+ }
+ }
+
get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
info->op_code = (u8)FIELD_GET(IRDMA_CQPSQ_OPCODE, temp1);
info->cqp = cqp;
@@ -3399,7 +4138,16 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
dma_wmb(); /* make sure shadow area is updated before moving tail */
- IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+ spin_lock_irqsave(&cqp->dev->cqp_lock, flags);
+ if (!ooo_final_cmpl)
+ IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+ spin_unlock_irqrestore(&cqp->dev->cqp_lock, flags);
+
+ /* Do not increment completed_ops counter on pending or deferred
+ * completions.
+ */
+ if (pend_cmpl || def_cmpl)
+ return ret_code;
atomic64_inc(&cqp->completed_ops);
return ret_code;
@@ -3647,7 +4395,7 @@ int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
ceq->tph_en = info->tph_en;
ceq->tph_val = info->tph_val;
- ceq->vsi = info->vsi;
+ ceq->vsi_idx = info->vsi_idx;
ceq->polarity = 1;
IRDMA_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
ceq->dev->ceq[info->ceq_id] = ceq;
@@ -3680,13 +4428,16 @@ static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch,
(ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
set_64bit_val(wqe, 56,
FIELD_PREP(IRDMA_CQPSQ_TPHVAL, ceq->tph_val) |
- FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi->vsi_idx));
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi_idx));
hdr = FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID, ceq->ceq_id) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID_HIGH, ceq->ceq_id >> 10) |
FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CEQ) |
FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
FIELD_PREP(IRDMA_CQPSQ_CEQ_ITRNOEXPIRE, ceq->itr_no_expire) |
FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
@@ -3741,7 +4492,7 @@ int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch)
int ret_code;
struct irdma_sc_dev *dev = ceq->dev;
- dev->ccq->vsi = ceq->vsi;
+ dev->ccq->vsi_idx = ceq->vsi_idx;
if (ceq->reg_cq) {
ret_code = irdma_sc_add_cq_ctx(ceq, ceq->dev->ccq);
if (ret_code)
@@ -3774,11 +4525,14 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq)
set_64bit_val(wqe, 16, ceq->elem_cnt);
set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid));
hdr = ceq->ceq_id |
FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CEQ) |
FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
@@ -3942,10 +4696,13 @@ static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch,
(aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
set_64bit_val(wqe, 48,
(aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid));
hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_AEQ) |
FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) |
FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) |
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
@@ -3974,7 +4731,8 @@ static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch,
u64 hdr;
dev = aeq->dev;
- writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]);
+ if (dev->privileged)
+ writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]);
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
@@ -3982,9 +4740,12 @@ static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch,
return -ENOMEM;
set_64bit_val(wqe, 16, aeq->elem_cnt);
set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid));
hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) |
FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) |
FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) |
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
@@ -4025,18 +4786,39 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8,
aeqe, 16, false);
- ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp);
- info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp);
- info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) |
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC_GEN_3, temp);
+ info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX_GEN_3,
+ temp);
+ info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_GEN_3, temp);
+ info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE_GEN_3, temp);
+ info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE_GEN_3, compl_ctx);
+ info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE_GEN_3, temp);
+ info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA_GEN_3, compl_ctx);
+ info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW_GEN_3, temp);
+ info->compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx);
+ compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx) << IRDMA_AEQE_CMPL_CTXT_S;
+ } else {
+ ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp);
+ info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp);
+ info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) |
((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18);
- info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp);
- info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE, temp);
- info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE, temp);
- info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA, temp);
- info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW, temp);
+ info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp);
+ info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE, temp);
+ info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE, temp);
+ info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA, temp);
+ info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW,
+ temp);
+ }
info->ae_src = ae_src;
switch (info->ae_id) {
+ case IRDMA_AE_SRQ_LIMIT:
+ info->srq = true;
+ /* [63:6] from CMPL_CTXT, [5:0] from WQDESCIDX. */
+ info->compl_ctx = compl_ctx;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
case IRDMA_AE_PRIV_OPERATION_DENIED:
case IRDMA_AE_AMP_INVALIDATE_TYPE1_MW:
case IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW:
@@ -4069,6 +4851,10 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
case IRDMA_AE_LLP_DOUBT_REACHABILITY:
case IRDMA_AE_LLP_CONNECTION_ESTABLISHED:
case IRDMA_AE_RESET_SENT:
@@ -4085,6 +4871,10 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
info->compl_ctx = compl_ctx << 1;
ae_src = IRDMA_AE_SOURCE_RSVD;
break;
+ case IRDMA_AE_CQP_DEFERRED_COMPLETE:
+ info->def_info = info->wqe_idx;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
case IRDMA_AE_ROCE_EMPTY_MCG:
case IRDMA_AE_ROCE_BAD_MC_IP_ADDR:
case IRDMA_AE_ROCE_BAD_MC_QPID:
@@ -4110,6 +4900,7 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
info->qp = true;
info->rq = true;
info->compl_ctx = compl_ctx;
+ info->err_rq_idx_valid = true;
break;
case IRDMA_AE_SOURCE_CQ:
case IRDMA_AE_SOURCE_CQ_0110:
@@ -4125,8 +4916,18 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
info->compl_ctx = compl_ctx;
break;
case IRDMA_AE_SOURCE_IN_RR_WR:
+ info->qp = true;
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ info->err_rq_idx_valid = true;
+ info->compl_ctx = compl_ctx;
+ info->in_rdrsp_wr = true;
+ break;
case IRDMA_AE_SOURCE_IN_RR_WR_1011:
info->qp = true;
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ info->sq = true;
+ info->err_rq_idx_valid = true;
+ }
info->compl_ctx = compl_ctx;
info->in_rdrsp_wr = true;
break;
@@ -4336,6 +5137,26 @@ int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id)
}
/**
+ * irdma_set_loc_mem() - set a local memory bit field
+ * @buf: ptr to a buffer where local memory gets enabled
+ */
+static void irdma_set_loc_mem(__le64 *buf)
+{
+ u64 loc_mem_en = BIT_ULL(ENABLE_LOC_MEM);
+ u32 offset;
+ u64 temp;
+
+ for (offset = 0; offset < IRDMA_COMMIT_FPM_BUF_SIZE;
+ offset += sizeof(__le64)) {
+ if (offset == IRDMA_PBLE_COMMIT_OFFSET)
+ continue;
+ get_64bit_val(buf, offset, &temp);
+ if (temp)
+ set_64bit_val(buf, offset, temp | loc_mem_en);
+ }
+}
+
+/**
* irdma_sc_cfg_iw_fpm() - commits hmc obj cnt values using cqp
* command and populates fpm base address in hmc_info
* @dev : ptr to irdma_dev struct
@@ -4356,7 +5177,7 @@ static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id)
set_64bit_val(buf, 0, (u64)obj_info[IRDMA_HMC_IW_QP].cnt);
set_64bit_val(buf, 8, (u64)obj_info[IRDMA_HMC_IW_CQ].cnt);
- set_64bit_val(buf, 16, (u64)0); /* RSRVD */
+ set_64bit_val(buf, 16, (u64)obj_info[IRDMA_HMC_IW_SRQ].cnt);
set_64bit_val(buf, 24, (u64)obj_info[IRDMA_HMC_IW_HTE].cnt);
set_64bit_val(buf, 32, (u64)obj_info[IRDMA_HMC_IW_ARP].cnt);
set_64bit_val(buf, 40, (u64)0); /* RSVD */
@@ -4383,7 +5204,9 @@ static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id)
(u64)obj_info[IRDMA_HMC_IW_OOISC].cnt);
set_64bit_val(buf, 168,
(u64)obj_info[IRDMA_HMC_IW_OOISCFFL].cnt);
-
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 &&
+ dev->hmc_fpm_misc.loc_mem_pages)
+ irdma_set_loc_mem(buf);
commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
commit_fpm_mem.va = dev->fpm_commit_buf;
@@ -4592,6 +5415,7 @@ static bool irdma_cqp_ring_full(struct irdma_sc_cqp *cqp)
static u32 irdma_est_sd(struct irdma_sc_dev *dev,
struct irdma_hmc_info *hmc_info)
{
+ struct irdma_hmc_obj_info *pble_info;
int i;
u64 size = 0;
u64 sd;
@@ -4600,12 +5424,22 @@ static u32 irdma_est_sd(struct irdma_sc_dev *dev,
if (i != IRDMA_HMC_IW_PBLE)
size += round_up(hmc_info->hmc_obj[i].cnt *
hmc_info->hmc_obj[i].size, 512);
- size += round_up(hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt *
- hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].size, 512);
+
+ pble_info = &hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE];
+ if (dev->privileged)
+ size += round_up(pble_info->cnt * pble_info->size, 512);
if (size & 0x1FFFFF)
sd = (size >> 21) + 1; /* add 1 for remainder */
else
sd = size >> 21;
+ if (!dev->privileged && !dev->hmc_fpm_misc.loc_mem_pages) {
+ /* 2MB alignment for VF PBLE HMC */
+ size = pble_info->cnt * pble_info->size;
+ if (size & 0x1FFFFF)
+ sd += (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd += size >> 21;
+ }
if (sd > 0xFFFFFFFF) {
ibdev_dbg(to_ibdev(dev), "HMC: sd overflow[%lld]\n", sd);
sd = 0xFFFFFFFF - 1;
@@ -4615,17 +5449,6 @@ static u32 irdma_est_sd(struct irdma_sc_dev *dev,
}
/**
- * irdma_sc_query_rdma_features_done - poll cqp for query features done
- * @cqp: struct for cqp hw
- */
-static int irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp)
-{
- return irdma_sc_poll_for_cqp_op_done(cqp,
- IRDMA_CQP_OP_QUERY_RDMA_FEATURES,
- NULL);
-}
-
-/**
* irdma_sc_query_rdma_features - query RDMA features and FW ver
* @cqp: struct for cqp hw
* @buf: buffer to hold query info
@@ -4634,7 +5457,9 @@ static int irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp)
static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp,
struct irdma_dma_mem *buf, u64 scratch)
{
+ u32 tail, val, error;
__le64 *wqe;
+ int status;
u64 temp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
@@ -4654,9 +5479,15 @@ static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp,
print_hex_dump_debug("WQE: QUERY RDMA FEATURES", DUMP_PREFIX_OFFSET,
16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
irdma_sc_cqp_post_sq(cqp);
+ status = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ if (error || status)
+ status = -EINVAL;
- return 0;
+ return status;
}
/**
@@ -4678,8 +5509,6 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev)
return -ENOMEM;
ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
- if (!ret_code)
- ret_code = irdma_sc_query_rdma_features_done(dev->cqp);
if (ret_code)
goto exit;
@@ -4703,8 +5532,6 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev)
return -ENOMEM;
ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
- if (!ret_code)
- ret_code = irdma_sc_query_rdma_features_done(dev->cqp);
if (ret_code)
goto exit;
@@ -4731,6 +5558,10 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev)
}
dev->feature_info[feat_type] = temp;
}
+
+ if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT)
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_ATOMIC_OPS;
+
exit:
dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va,
feat_buf.pa);
@@ -4786,22 +5617,354 @@ static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev,
}
/**
+ * irdma_get_rsrc_mem_config - configure resources if local memory or host
+ * @dev: sc device struct
+ * @is_mrte_loc_mem: if true, MR's to be in local memory because sd=loc pages
+ *
+ * Only mr can be configured host or local memory if qp's are in local memory.
+ * If qp is in local memory, then all resource object will be in local memory
+ * except mr which can be either host or local memory. The only exception
+ * is pble's which are always in host memory.
+ */
+static void irdma_get_rsrc_mem_config(struct irdma_sc_dev *dev, bool is_mrte_loc_mem)
+{
+ struct irdma_hmc_info *hmc_info = dev->hmc_info;
+ int i;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].mem_loc = IRDMA_LOC_MEM;
+
+ if (dev->feature_info[IRDMA_OBJ_1] && !is_mrte_loc_mem) {
+ u8 mem_type;
+
+ mem_type = (u8)FIELD_GET(IRDMA_MR_MEM_LOC, dev->feature_info[IRDMA_OBJ_1]);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc =
+ (mem_type & IRDMA_OBJ_LOC_MEM_BIT) ?
+ IRDMA_LOC_MEM : IRDMA_HOST_MEM;
+ } else {
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc = IRDMA_LOC_MEM;
+ }
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc = IRDMA_HOST_MEM;
+
+ ibdev_dbg(to_ibdev(dev), "HMC: INFO: mrte_mem_loc = %d pble = %d\n",
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc);
+}
+
+/**
+ * irdma_cfg_sd_mem - allocate sd memory
+ * @dev: sc device struct
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ */
+static int irdma_cfg_sd_mem(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info)
+{
+ struct irdma_virt_mem virt_mem;
+ u32 mem_size;
+
+ mem_size = sizeof(struct irdma_hmc_sd_entry) * hmc_info->sd_table.sd_cnt;
+ virt_mem.size = mem_size;
+ virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL);
+ if (!virt_mem.va)
+ return -ENOMEM;
+ hmc_info->sd_table.sd_entry = virt_mem.va;
+
+ return 0;
+}
+
+/**
+ * irdma_get_objs_pages - get number of 2M pages needed
+ * @dev: sc device struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @mem_loc: pages for local or host memory
+ */
+static u32 irdma_get_objs_pages(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info,
+ enum irdma_hmc_obj_mem mem_loc)
+{
+ u64 size = 0;
+ int i;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) {
+ if (hmc_info->hmc_obj[i].mem_loc == mem_loc) {
+ size += round_up(hmc_info->hmc_obj[i].cnt *
+ hmc_info->hmc_obj[i].size, 512);
+ }
+ }
+
+ return DIV_ROUND_UP(size, IRDMA_HMC_PAGE_SIZE);
+}
+
+/**
+ * irdma_set_host_hmc_rsrc_gen_3 - calculate host hmc resources for gen 3
+ * @dev: sc device struct
+ */
+static void irdma_set_host_hmc_rsrc_gen_3(struct irdma_sc_dev *dev)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ struct irdma_hmc_info *hmc_info;
+ enum irdma_hmc_obj_mem mrte_loc;
+ u32 mrwanted, pblewanted;
+ u32 avail_sds, mr_sds;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ avail_sds = hmc_fpm_misc->max_sds;
+ mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc;
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt;
+ pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt;
+
+ if (mrte_loc == IRDMA_HOST_MEM && avail_sds > IRDMA_MIN_PBLE_PAGES) {
+ mr_sds = avail_sds - IRDMA_MIN_PBLE_PAGES;
+ mrwanted = min(mrwanted, mr_sds * MAX_MR_PER_SD);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted;
+ avail_sds -= DIV_ROUND_UP(mrwanted, MAX_MR_PER_SD);
+ }
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]) &&
+ pblewanted > avail_sds * MAX_PBLE_PER_SD)
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: Warn: Resource version 2: pble wanted = 0x%x available = 0x%x\n",
+ pblewanted, avail_sds * MAX_PBLE_PER_SD);
+
+ pblewanted = min(pblewanted, avail_sds * MAX_PBLE_PER_SD);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
+}
+
+/**
+ * irdma_verify_commit_fpm_gen_3 - verify query fpm values
+ * @dev: sc device struct
+ * @max_pages: max local memory available
+ * @qpwanted: number of qp's wanted
+ */
+static int irdma_verify_commit_fpm_gen_3(struct irdma_sc_dev *dev,
+ u32 max_pages,
+ u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed;
+ struct irdma_hmc_info *hmc_info;
+ u32 rrffl_cnt = 0;
+ u32 xffl_cnt = 0;
+ u32 q1fl_cnt;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted);
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ rrffl_cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+
+ xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted);
+
+ if (xf_cnt)
+ xffl_cnt = xf_cnt / hmc_fpm_misc->xf_block_size;
+
+ timer_cnt = (round_up(qpwanted, 512) / 512 + 1) *
+ hmc_fpm_misc->timer_bucket;
+
+ q1fl_cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+
+ pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM);
+ if (pages_needed > max_pages) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: FAIL: SW counts rrf_cnt = %u rrffl_cnt = %u timer_cnt = %u",
+ rrf_cnt, rrffl_cnt, timer_cnt);
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: FAIL: SW counts xf_cnt = %u xffl_cnt = %u q1fl_cnt = %u",
+ xf_cnt, xffl_cnt, q1fl_cnt);
+
+ return -EINVAL;
+ }
+
+ hmc_fpm_misc->max_sds -= pages_needed;
+ hmc_fpm_misc->loc_mem_pages -= pages_needed;
+
+ return 0;
+}
+
+/**
+ * irdma_set_loc_hmc_rsrc_gen_3 - calculate hmc resources for gen 3
+ * @dev: sc device struct
+ * @max_pages: max local memory available
+ * @qpwanted: number of qp's wanted
+ */
+static int irdma_set_loc_hmc_rsrc_gen_3(struct irdma_sc_dev *dev,
+ u32 max_pages,
+ u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed;
+ struct irdma_hmc_info *hmc_info;
+ u32 ird, ord;
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ return irdma_verify_commit_fpm_gen_3(dev, max_pages, qpwanted);
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ ird = dev->hw_attrs.max_hw_ird;
+ ord = dev->hw_attrs.max_hw_ord;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt, qpwanted * 2);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt =
+ min(qpwanted * 8, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt);
+
+ rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].max_cnt, rrf_cnt);
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+
+ xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_XF].max_cnt, xf_cnt);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XFFL].cnt =
+ xf_cnt / hmc_fpm_misc->xf_block_size;
+
+ timer_cnt = (round_up(qpwanted, 512) / 512 + 1) *
+ hmc_fpm_misc->timer_bucket;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt =
+ min(timer_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt);
+
+ do {
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt = roundup_pow_of_two(ird * 2 * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1FL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+
+ pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM);
+ if (pages_needed <= max_pages)
+ break;
+
+ ird /= 2;
+ ord /= 2;
+ } while (ird >= IRDMA_MIN_IRD);
+
+ if (ird < IRDMA_MIN_IRD) {
+ ibdev_dbg(to_ibdev(dev), "HMC: FAIL: IRD=%u Q1 CNT = %u\n",
+ ird, hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt);
+ return -EINVAL;
+ }
+
+ dev->hw_attrs.max_hw_ird = ird;
+ dev->hw_attrs.max_hw_ord = ord;
+ hmc_fpm_misc->max_sds -= pages_needed;
+
+ return 0;
+}
+
+/**
+ * cfg_fpm_value_gen_3 - configure fpm for gen 3
+ * @dev: sc device struct
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ */
+static int cfg_fpm_value_gen_3(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info,
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc)
+{
+ enum irdma_hmc_obj_mem mrte_loc;
+ u32 mrwanted, qpwanted;
+ int i, ret_code = 0;
+ u32 loc_mem_pages;
+ bool is_mrte_loc_mem;
+
+ loc_mem_pages = hmc_fpm_misc->loc_mem_pages;
+ is_mrte_loc_mem = hmc_fpm_misc->loc_mem_pages == hmc_fpm_misc->max_sds ?
+ true : false;
+
+ irdma_get_rsrc_mem_config(dev, is_mrte_loc_mem);
+ mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc;
+
+ if (is_mrte_loc_mem)
+ loc_mem_pages -= IRDMA_MIN_PBLE_PAGES;
+
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: mrte_loc %d loc_mem %u fpm max sds %u host_obj %d\n",
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc,
+ hmc_fpm_misc->loc_mem_pages, hmc_fpm_misc->max_sds,
+ is_mrte_loc_mem);
+
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt;
+ qpwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt = 0;
+
+ if (!FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt,
+ (u32)IRDMA_FSIAV_CNT_MAX);
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+
+ while (qpwanted >= IRDMA_MIN_QP_CNT) {
+ if (!irdma_set_loc_hmc_rsrc_gen_3(dev, loc_mem_pages, qpwanted))
+ break;
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ return -EINVAL;
+
+ qpwanted /= 2;
+ if (mrte_loc == IRDMA_LOC_MEM) {
+ mrwanted = qpwanted * IRDMA_MIN_MR_PER_QP;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt, mrwanted);
+ }
+ }
+
+ if (qpwanted < IRDMA_MIN_QP_CNT) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: ERROR: could not allocate fpm resources\n");
+ return -EINVAL;
+ }
+
+ irdma_set_host_hmc_rsrc_gen_3(dev);
+ ret_code = irdma_sc_cfg_iw_fpm(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: cfg_iw_fpm returned error_code[x%08X]\n",
+ readl(dev->hw_regs[IRDMA_CQPERRCODES]));
+
+ return ret_code;
+ }
+
+ return irdma_cfg_sd_mem(dev, hmc_info);
+}
+
+/**
* irdma_cfg_fpm_val - configure HMC objects
* @dev: sc device struct
* @qp_count: desired qp count
*/
int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
{
- struct irdma_virt_mem virt_mem;
- u32 i, mem_size;
u32 qpwanted, mrwanted, pblewanted;
- u32 powerof2, hte;
+ u32 powerof2, hte, i;
u32 sd_needed;
u32 sd_diff;
u32 loop_count = 0;
struct irdma_hmc_info *hmc_info;
struct irdma_hmc_fpm_misc *hmc_fpm_misc;
int ret_code = 0;
+ u32 max_sds;
hmc_info = dev->hmc_info;
hmc_fpm_misc = &dev->hmc_fpm_misc;
@@ -4814,14 +5977,16 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
return ret_code;
}
+ max_sds = hmc_fpm_misc->max_sds;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ return cfg_fpm_value_gen_3(dev, hmc_info, hmc_fpm_misc);
+
for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
sd_needed = irdma_est_sd(dev, hmc_info);
- ibdev_dbg(to_ibdev(dev),
- "HMC: FW max resources sd_needed[%08d] first_sd_index[%04d]\n",
- sd_needed, hmc_info->first_sd_index);
- ibdev_dbg(to_ibdev(dev), "HMC: sd count %d where max sd is %d\n",
- hmc_info->sd_table.sd_cnt, hmc_fpm_misc->max_sds);
+ ibdev_dbg(to_ibdev(dev), "HMC: sd count %u where max sd is %u\n",
+ hmc_info->sd_table.sd_cnt, max_sds);
qpwanted = min(qp_count, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt);
@@ -4835,21 +6000,21 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt;
ibdev_dbg(to_ibdev(dev),
- "HMC: req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d, mc=%d, av=%d\n",
- qp_count, hmc_fpm_misc->max_sds,
+ "HMC: req_qp=%d max_sd=%u, max_qp = %u, max_cq=%u, max_mr=%u, max_pble=%u, mc=%d, av=%u\n",
+ qp_count, max_sds,
hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt,
hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt,
hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt,
hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt,
hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt,
hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt);
+
hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt =
hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt;
hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt =
hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt;
hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt =
hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].max_cnt;
-
hmc_info->hmc_obj[IRDMA_HMC_IW_APBVT_ENTRY].cnt = 1;
while (irdma_q1_cnt(dev, hmc_info, qpwanted) > hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].max_cnt)
@@ -4860,7 +6025,7 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted;
hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt =
min(2 * qpwanted, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt);
- hmc_info->hmc_obj[IRDMA_HMC_IW_RESERVED].cnt = 0; /* Reserved */
+ hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt = 0; /* Reserved */
hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted;
hte = round_up(qpwanted + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt, 512);
@@ -4898,11 +6063,12 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
if (!(loop_count % 2) && qpwanted > 128) {
qpwanted /= 2;
} else {
- mrwanted /= 2;
pblewanted /= 2;
+ mrwanted /= 2;
}
continue;
}
+
if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF &&
pblewanted > (512 * FPM_MULTIPLIER * sd_diff)) {
pblewanted -= 256 * FPM_MULTIPLIER * sd_diff;
@@ -4928,14 +6094,13 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
if (sd_needed > hmc_fpm_misc->max_sds) {
ibdev_dbg(to_ibdev(dev),
- "HMC: cfg_fpm failed loop_cnt=%d, sd_needed=%d, max sd count %d\n",
+ "HMC: cfg_fpm failed loop_cnt=%u, sd_needed=%u, max sd count %u\n",
loop_count, sd_needed, hmc_info->sd_table.sd_cnt);
return -EINVAL;
}
- if (loop_count > 1 && sd_needed < hmc_fpm_misc->max_sds) {
- pblewanted += (hmc_fpm_misc->max_sds - sd_needed) * 256 *
- FPM_MULTIPLIER;
+ if (loop_count > 1 && sd_needed < max_sds) {
+ pblewanted += (max_sds - sd_needed) * 256 * FPM_MULTIPLIER;
hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
sd_needed = irdma_est_sd(dev, hmc_info);
}
@@ -4959,18 +6124,7 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
return ret_code;
}
- mem_size = sizeof(struct irdma_hmc_sd_entry) *
- (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
- virt_mem.size = mem_size;
- virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL);
- if (!virt_mem.va) {
- ibdev_dbg(to_ibdev(dev),
- "HMC: failed to allocate memory for sd_entry buffer\n");
- return -ENOMEM;
- }
- hmc_info->sd_table.sd_entry = virt_mem.va;
-
- return ret_code;
+ return irdma_cfg_sd_mem(dev, hmc_info);
}
/**
@@ -5242,6 +6396,22 @@ static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev,
&pcmdinfo->in.u.mc_modify.info,
pcmdinfo->in.u.mc_modify.scratch);
break;
+ case IRDMA_OP_SRQ_CREATE:
+ status = irdma_sc_srq_create(pcmdinfo->in.u.srq_create.srq,
+ pcmdinfo->in.u.srq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_SRQ_MODIFY:
+ status = irdma_sc_srq_modify(pcmdinfo->in.u.srq_modify.srq,
+ &pcmdinfo->in.u.srq_modify.info,
+ pcmdinfo->in.u.srq_modify.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_SRQ_DESTROY:
+ status = irdma_sc_srq_destroy(pcmdinfo->in.u.srq_destroy.srq,
+ pcmdinfo->in.u.srq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
default:
status = -EOPNOTSUPP;
break;
@@ -5314,14 +6484,26 @@ void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable)
*/
void sc_vsi_update_stats(struct irdma_sc_vsi *vsi)
{
- struct irdma_gather_stats *gather_stats;
- struct irdma_gather_stats *last_gather_stats;
+ struct irdma_dev_hw_stats *hw_stats = &vsi->pestat->hw_stats;
+ struct irdma_gather_stats *gather_stats =
+ vsi->pestat->gather_info.gather_stats_va;
+ struct irdma_gather_stats *last_gather_stats =
+ vsi->pestat->gather_info.last_gather_stats_va;
+ const struct irdma_hw_stat_map *map = vsi->dev->hw_stats_map;
+ u16 max_stat_idx = vsi->dev->hw_attrs.max_stat_idx;
+ u16 i;
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ for (i = 0; i < max_stat_idx; i++) {
+ u16 idx = map[i].byteoff / sizeof(u64);
+
+ hw_stats->stats_val[i] = gather_stats->val[idx];
+ }
+ return;
+ }
- gather_stats = vsi->pestat->gather_info.gather_stats_va;
- last_gather_stats = vsi->pestat->gather_info.last_gather_stats_va;
- irdma_update_stats(&vsi->pestat->hw_stats, gather_stats,
- last_gather_stats, vsi->dev->hw_stats_map,
- vsi->dev->hw_attrs.max_stat_idx);
+ irdma_update_stats(hw_stats, gather_stats, last_gather_stats,
+ map, max_stat_idx);
}
/**
@@ -5356,6 +6538,9 @@ static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev)
case IRDMA_GEN_2:
icrdma_init_hw(dev);
break;
+ case IRDMA_GEN_3:
+ ig3rdma_init_hw(dev);
+ break;
}
}
@@ -5381,10 +6566,15 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
dev->fpm_commit_buf = info->fpm_commit_buf;
dev->hw = info->hw;
dev->hw->hw_addr = info->bar0;
+ dev->protocol_used = info->protocol_used;
/* Setup the hardware limits, hmc may limit further */
dev->hw_attrs.min_hw_qp_id = IRDMA_MIN_IW_QP_ID;
+ dev->hw_attrs.min_hw_srq_id = IRDMA_MIN_IW_SRQ_ID;
dev->hw_attrs.min_hw_aeq_size = IRDMA_MIN_AEQ_ENTRIES;
- dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES_GEN_3;
+ else
+ dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES;
dev->hw_attrs.min_hw_ceq_size = IRDMA_MIN_CEQ_ENTRIES;
dev->hw_attrs.max_hw_ceq_size = IRDMA_MAX_CEQ_ENTRIES;
dev->hw_attrs.uk_attrs.min_hw_cq_size = IRDMA_MIN_CQ_SIZE;
@@ -5409,21 +6599,39 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
dev->hw_attrs.max_sleep_count = IRDMA_SLEEP_COUNT;
dev->hw_attrs.max_cqp_compl_wait_time_ms = CQP_COMPL_WAIT_TIME_MS;
- dev->hw_attrs.uk_attrs.hw_rev = ver;
+ if (!dev->privileged) {
+ ret_code = irdma_vchnl_req_get_hmc_fcn(dev);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get HMC function ret = %d\n",
+ ret_code);
+
+ return ret_code;
+ }
+ }
+
irdma_sc_init_hw(dev);
- if (irdma_wait_pe_ready(dev))
- return -ETIMEDOUT;
+ if (dev->privileged) {
+ if (irdma_wait_pe_ready(dev))
+ return -ETIMEDOUT;
- val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]);
- db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val);
- if (db_size != IRDMA_PE_DB_SIZE_4M && db_size != IRDMA_PE_DB_SIZE_8M) {
- ibdev_dbg(to_ibdev(dev),
- "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n",
- val, db_size);
- return -ENODEV;
+ val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]);
+ db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val);
+ if (db_size != IRDMA_PE_DB_SIZE_4M &&
+ db_size != IRDMA_PE_DB_SIZE_8M) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n",
+ val, db_size);
+ return -ENODEV;
+ }
+ } else {
+ ret_code = irdma_vchnl_req_get_reg_layout(dev);
+ if (ret_code)
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get Register layout failed ret = %d\n",
+ ret_code);
}
- dev->db_addr = dev->hw->hw_addr + (uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET];
return ret_code;
}
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
index 2cb4b96db721..983b22d7ae23 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -14,6 +14,18 @@
#define IRDMA_PE_DB_SIZE_4M 1
#define IRDMA_PE_DB_SIZE_8M 2
+#define IRDMA_IRD_HW_SIZE_4_GEN3 0
+#define IRDMA_IRD_HW_SIZE_8_GEN3 1
+#define IRDMA_IRD_HW_SIZE_16_GEN3 2
+#define IRDMA_IRD_HW_SIZE_32_GEN3 3
+#define IRDMA_IRD_HW_SIZE_64_GEN3 4
+#define IRDMA_IRD_HW_SIZE_128_GEN3 5
+#define IRDMA_IRD_HW_SIZE_256_GEN3 6
+#define IRDMA_IRD_HW_SIZE_512_GEN3 7
+#define IRDMA_IRD_HW_SIZE_1024_GEN3 8
+#define IRDMA_IRD_HW_SIZE_2048_GEN3 9
+#define IRDMA_IRD_HW_SIZE_4096_GEN3 10
+
#define IRDMA_IRD_HW_SIZE_4 0
#define IRDMA_IRD_HW_SIZE_16 1
#define IRDMA_IRD_HW_SIZE_64 2
@@ -114,6 +126,13 @@ enum irdma_protocol_used {
#define IRDMA_UPDATE_SD_BUFF_SIZE 128
#define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES)
+#define ENABLE_LOC_MEM 63
+#define IRDMA_ATOMICS_ALLOWED_BIT 1
+#define MAX_PBLE_PER_SD 0x40000
+#define MAX_PBLE_SD_PER_FCN 0x400
+#define MAX_MR_PER_SD 0x8000
+#define MAX_MR_SD_PER_FCN 0x80
+#define IRDMA_PBLE_COMMIT_OFFSET 112
#define IRDMA_MAX_QUANTA_PER_WR 8
#define IRDMA_QP_SW_MAX_WQ_QUANTA 32768
@@ -121,6 +140,10 @@ enum irdma_protocol_used {
#define IRDMA_QP_SW_MAX_RQ_QUANTA 32768
#define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \
((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr))
+#define IRDMA_SRQ_MIN_QUANTA 8
+#define IRDMA_SRQ_MAX_QUANTA 262144
+#define IRDMA_MAX_SRQ_WRS \
+ ((IRDMA_SRQ_MAX_QUANTA - IRDMA_RQ_RSVD) / IRDMA_MAX_QUANTA_PER_WR)
#define IRDMAQP_TERM_SEND_TERM_AND_FIN 0
#define IRDMAQP_TERM_SEND_TERM_ONLY 1
@@ -147,8 +170,13 @@ enum irdma_protocol_used {
#define IRDMA_SQ_RSVD 258
#define IRDMA_RQ_RSVD 1
-#define IRDMA_FEATURE_RTS_AE 1ULL
-#define IRDMA_FEATURE_CQ_RESIZE 2ULL
+#define IRDMA_FEATURE_RTS_AE BIT_ULL(0)
+#define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1)
+#define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5)
+#define IRDMA_FEATURE_ATOMIC_OPS BIT_ULL(6)
+#define IRDMA_FEATURE_SRQ BIT_ULL(7)
+#define IRDMA_FEATURE_CQE_TIMESTAMPING BIT_ULL(8)
+
#define IRDMAQP_OP_RDMA_WRITE 0x00
#define IRDMAQP_OP_RDMA_READ 0x01
#define IRDMAQP_OP_RDMA_SEND 0x03
@@ -161,6 +189,8 @@ enum irdma_protocol_used {
#define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b
#define IRDMAQP_OP_NOP 0x0c
#define IRDMAQP_OP_RDMA_WRITE_SOL 0x0d
+#define IRDMAQP_OP_ATOMIC_FETCH_ADD 0x0f
+#define IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD 0x11
#define IRDMAQP_OP_GEN_RTS_AE 0x30
enum irdma_cqp_op_type {
@@ -212,9 +242,12 @@ enum irdma_cqp_op_type {
IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46,
IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47,
IRDMA_OP_CQ_MODIFY = 48,
+ IRDMA_OP_SRQ_CREATE = 49,
+ IRDMA_OP_SRQ_MODIFY = 50,
+ IRDMA_OP_SRQ_DESTROY = 51,
/* Must be last entry*/
- IRDMA_MAX_CQP_OPS = 49,
+ IRDMA_MAX_CQP_OPS = 52,
};
/* CQP SQ WQES */
@@ -224,6 +257,9 @@ enum irdma_cqp_op_type {
#define IRDMA_CQP_OP_CREATE_CQ 0x03
#define IRDMA_CQP_OP_MODIFY_CQ 0x04
#define IRDMA_CQP_OP_DESTROY_CQ 0x05
+#define IRDMA_CQP_OP_CREATE_SRQ 0x06
+#define IRDMA_CQP_OP_MODIFY_SRQ 0x07
+#define IRDMA_CQP_OP_DESTROY_SRQ 0x08
#define IRDMA_CQP_OP_ALLOC_STAG 0x09
#define IRDMA_CQP_OP_REG_MR 0x0a
#define IRDMA_CQP_OP_QUERY_STAG 0x0b
@@ -265,97 +301,6 @@ enum irdma_cqp_op_type {
#define IRDMA_CQP_OP_GATHER_STATS 0x2e
#define IRDMA_CQP_OP_UP_MAP 0x2f
-/* Async Events codes */
-#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102
-#define IRDMA_AE_AMP_INVALID_STAG 0x0103
-#define IRDMA_AE_AMP_BAD_QP 0x0104
-#define IRDMA_AE_AMP_BAD_PD 0x0105
-#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106
-#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107
-#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108
-#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109
-#define IRDMA_AE_AMP_TO_WRAP 0x010a
-#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c
-#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d
-#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
-#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
-#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111
-#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
-#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
-#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114
-#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115
-#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
-#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117
-#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
-#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
-#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
-#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b
-#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c
-#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d
-#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e
-#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f
-#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120
-#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121
-#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132
-#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133
-#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134
-#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135
-#define IRDMA_AE_BAD_CLOSE 0x0201
-#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
-#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203
-#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
-#define IRDMA_AE_STAG_ZERO_INVALID 0x0206
-#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207
-#define IRDMA_AE_IB_INVALID_REQUEST 0x0208
-#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a
-#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b
-#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c
-#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d
-#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e
-#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220
-#define IRDMA_AE_INVALID_REQUEST 0x0223
-#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
-#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
-#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
-#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305
-#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
-#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307
-#define IRDMA_AE_DDP_NO_L_BIT 0x0308
-#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
-#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
-#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
-#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
-#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316
-#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380
-#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381
-#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382
-#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383
-#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401
-#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402
-#define IRDMA_AE_STALE_ARP_ENTRY 0x0403
-#define IRDMA_AE_INVALID_AH_ENTRY 0x0406
-#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501
-#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502
-#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503
-#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504
-#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
-#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507
-#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508
-#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509
-#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a
-#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
-#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c
-#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e
-#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f
-#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520
-#define IRDMA_AE_RESET_SENT 0x0601
-#define IRDMA_AE_TERMINATE_SENT 0x0602
-#define IRDMA_AE_RESET_NOT_SENT 0x0603
-#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700
-#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
-#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702
-#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900
-
#define FLD_LS_64(dev, val, field) \
(((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
#define FLD_RS_64(dev, val, field) \
@@ -393,9 +338,13 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_STATS_USE_INST BIT_ULL(61)
#define IRDMA_CQPSQ_STATS_OP GENMASK_ULL(37, 32)
#define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0)
-#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(15, 0)
#define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63)
-#define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(53, 52)
+#define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52)
+#define IRDMA_SD_MAX GENMASK_ULL(15, 0)
+#define IRDMA_MEM_MAX GENMASK_ULL(15, 0)
+#define IRDMA_QP_MEM_LOC GENMASK_ULL(47, 44)
+#define IRDMA_MR_MEM_LOC GENMASK_ULL(27, 24)
#define IRDMA_CQPSQ_WS_ENABLENODE BIT_ULL(62)
#define IRDMA_CQPSQ_WS_NODETYPE BIT_ULL(61)
@@ -404,16 +353,16 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_WS_VMVFTYPE GENMASK_ULL(55, 54)
#define IRDMA_CQPSQ_WS_VMVFNUM GENMASK_ULL(51, 42)
#define IRDMA_CQPSQ_WS_OP GENMASK_ULL(37, 32)
-#define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(25, 16)
-#define IRDMA_CQPSQ_WS_NODEID GENMASK_ULL(9, 0)
-#define IRDMA_CQPSQ_WS_VSI GENMASK_ULL(57, 48)
+#define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(29, 16)
+#define IRDMA_CQPSQ_WS_NODEID GENMASK_ULL(13, 0)
+#define IRDMA_CQPSQ_WS_VSI GENMASK_ULL(63, 48)
#define IRDMA_CQPSQ_WS_WEIGHT GENMASK_ULL(38, 32)
#define IRDMA_CQPSQ_UP_WQEVALID BIT_ULL(63)
#define IRDMA_CQPSQ_UP_USEVLAN BIT_ULL(62)
#define IRDMA_CQPSQ_UP_USEOVERRIDE BIT_ULL(61)
#define IRDMA_CQPSQ_UP_OP GENMASK_ULL(37, 32)
-#define IRDMA_CQPSQ_UP_HMCFCNIDX GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_UP_HMCFCNIDX GENMASK_ULL(15, 0)
#define IRDMA_CQPSQ_UP_CNPOVERRIDE GENMASK_ULL(37, 32)
#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID BIT_ULL(63)
#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN GENMASK_ULL(31, 0)
@@ -448,6 +397,16 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPHC_SVER GENMASK_ULL(31, 24)
#define IRDMA_CQPHC_SQBASE GENMASK_ULL(63, 9)
+#define IRDMA_CQPHC_TIMESTAMP_OVERRIDE BIT_ULL(5)
+#define IRDMA_CQPHC_TS_SHIFT GENMASK_ULL(12, 8)
+#define IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS BIT_ULL(0)
+
+#define IRDMA_CQPHC_OOISC_BLKSIZE GENMASK_ULL(63, 60)
+#define IRDMA_CQPHC_RRSP_BLKSIZE GENMASK_ULL(59, 56)
+#define IRDMA_CQPHC_Q1_BLKSIZE GENMASK_ULL(55, 52)
+#define IRDMA_CQPHC_XMIT_BLKSIZE GENMASK_ULL(51, 48)
+#define IRDMA_CQPHC_BLKSIZES_VALID BIT_ULL(4)
+
#define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0)
#define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
#define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0)
@@ -461,6 +420,8 @@ enum irdma_cqp_op_type {
#define IRDMA_CCQ_OPRETVAL GENMASK_ULL(31, 0)
+#define IRDMA_CCQ_DEFINFO GENMASK_ULL(63, 32)
+
#define IRDMA_CQ_MINERR GENMASK_ULL(15, 0)
#define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16)
#define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32)
@@ -469,6 +430,7 @@ enum irdma_cqp_op_type {
#define IRDMA_CQ_ERROR BIT_ULL(55)
#define IRDMA_CQ_SQ BIT_ULL(62)
+#define IRDMA_CQ_SRQ BIT_ULL(52)
#define IRDMA_CQ_VALID BIT_ULL(63)
#define IRDMA_CQ_IMMVALID BIT_ULL(62)
#define IRDMA_CQ_UDSMACVALID BIT_ULL(61)
@@ -476,8 +438,6 @@ enum irdma_cqp_op_type {
#define IRDMA_CQ_UDSMAC GENMASK_ULL(47, 0)
#define IRDMA_CQ_UDVLAN GENMASK_ULL(63, 48)
-#define IRDMA_CQ_IMMDATA_S 0
-#define IRDMA_CQ_IMMDATA_M (0xffffffffffffffffULL << IRDMA_CQ_IMMVALID_S)
#define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0)
#define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32)
#define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0)
@@ -508,6 +468,17 @@ enum irdma_cqp_op_type {
#define IRDMA_AEQE_Q2DATA GENMASK_ULL(62, 61)
#define IRDMA_AEQE_VALID BIT_ULL(63)
+#define IRDMA_AEQE_Q2DATA_GEN_3 GENMASK_ULL(5, 4)
+#define IRDMA_AEQE_TCPSTATE_GEN_3 GENMASK_ULL(3, 0)
+#define IRDMA_AEQE_QPCQID_GEN_3 GENMASK_ULL(24, 0)
+#define IRDMA_AEQE_AECODE_GEN_3 GENMASK_ULL(61, 50)
+#define IRDMA_AEQE_OVERFLOW_GEN_3 BIT_ULL(62)
+#define IRDMA_AEQE_WQDESCIDX_GEN_3 GENMASK_ULL(49, 32)
+#define IRDMA_AEQE_IWSTATE_GEN_3 GENMASK_ULL(31, 29)
+#define IRDMA_AEQE_AESRC_GEN_3 GENMASK_ULL(28, 25)
+#define IRDMA_AEQE_CMPL_CTXT_S 6
+#define IRDMA_AEQE_CMPL_CTXT GENMASK_ULL(63, 6)
+
#define IRDMA_UDA_QPSQ_NEXT_HDR GENMASK_ULL(23, 16)
#define IRDMA_UDA_QPSQ_OPCODE GENMASK_ULL(37, 32)
#define IRDMA_UDA_QPSQ_L4LEN GENMASK_ULL(45, 42)
@@ -530,11 +501,14 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_WQEVALID BIT_ULL(63)
#define IRDMA_CQPSQ_TPHVAL GENMASK_ULL(7, 0)
-#define IRDMA_CQPSQ_VSIIDX GENMASK_ULL(17, 8)
+#define IRDMA_CQPSQ_VSIIDX GENMASK_ULL(23, 8)
#define IRDMA_CQPSQ_TPHEN BIT_ULL(60)
#define IRDMA_CQPSQ_PBUFADDR IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_PASID GENMASK_ULL(51, 32)
+#define IRDMA_CQPSQ_PASID_VALID BIT_ULL(62)
+
/* Create/Modify/Destroy QP */
#define IRDMA_CQPSQ_QP_NEWMSS GENMASK_ULL(45, 32)
@@ -566,10 +540,30 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_QP_DBSHADOWADDR IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_SRQ_RQSIZE GENMASK_ULL(3, 0)
+#define IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE GENMASK_ULL(5, 4)
+#define IRDMA_CQPSQ_SRQ_SRQ_LIMIT GENMASK_ULL(43, 32)
+#define IRDMA_CQPSQ_SRQ_SRQCTX GENMASK_ULL(63, 6)
+#define IRDMA_CQPSQ_SRQ_PD_ID GENMASK_ULL(39, 16)
+#define IRDMA_CQPSQ_SRQ_SRQ_ID GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_SRQ_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE GENMASK_ULL(45, 44)
+#define IRDMA_CQPSQ_SRQ_VIRTMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_SRQ_TPH_EN BIT_ULL(60)
+#define IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT BIT_ULL(61)
+#define IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX GENMASK_ULL(27, 0)
+#define IRDMA_CQPSQ_SRQ_TPH_VALUE GENMASK_ULL(7, 0)
+#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S 8
+#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR GENMASK_ULL(63, 8)
+#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S 6
+#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR GENMASK_ULL(63, 6)
+
#define IRDMA_CQPSQ_CQ_CQSIZE GENMASK_ULL(20, 0)
#define IRDMA_CQPSQ_CQ_CQCTX GENMASK_ULL(62, 0)
#define IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD GENMASK(17, 0)
+#define IRDMA_CQPSQ_CQ_CQID_HIGH GENMASK_ULL(52, 50)
+#define IRDMA_CQPSQ_CQ_CEQID_HIGH GENMASK_ULL(59, 54)
#define IRDMA_CQPSQ_CQ_OP GENMASK_ULL(37, 32)
#define IRDMA_CQPSQ_CQ_CQRESIZE BIT_ULL(43)
#define IRDMA_CQPSQ_CQ_LPBLSIZE GENMASK_ULL(45, 44)
@@ -590,6 +584,7 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_STAG_MR BIT_ULL(43)
#define IRDMA_CQPSQ_STAG_MWTYPE BIT_ULL(42)
#define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY BIT_ULL(58)
+#define IRDMA_CQPSQ_STAG_PDID_HI GENMASK_ULL(59, 54)
#define IRDMA_CQPSQ_STAG_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
#define IRDMA_CQPSQ_STAG_HPAGESIZE GENMASK_ULL(47, 46)
@@ -600,7 +595,8 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61)
#define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX
-#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN BIT_ULL(61)
#define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
#define IRDMA_CQPSQ_QUERYSTAG_IDX IRDMA_CQPSQ_STAG_IDX
@@ -628,11 +624,8 @@ enum irdma_cqp_op_type {
/* Manage Push Page - MPP */
#define IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff
#define IRDMA_INVALID_PUSH_PAGE_INDEX 0xffffffff
-
-#define IRDMA_CQPSQ_MPP_QS_HANDLE GENMASK_ULL(9, 0)
-#define IRDMA_CQPSQ_MPP_PPIDX GENMASK_ULL(9, 0)
+#define IRDMA_CQPSQ_MPP_PPIDX GENMASK_ULL(31, 0)
#define IRDMA_CQPSQ_MPP_PPTYPE GENMASK_ULL(61, 60)
-
#define IRDMA_CQPSQ_MPP_FREE_PAGE BIT_ULL(62)
/* Upload Context - UCTX */
@@ -651,6 +644,8 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_CEQ_CEQSIZE GENMASK_ULL(21, 0)
#define IRDMA_CQPSQ_CEQ_CEQID GENMASK_ULL(9, 0)
+#define IRDMA_CQPSQ_CEQ_CEQID_HIGH GENMASK_ULL(15, 10)
+
#define IRDMA_CQPSQ_CEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
#define IRDMA_CQPSQ_CEQ_VMAP BIT_ULL(47)
#define IRDMA_CQPSQ_CEQ_ITRNOEXPIRE BIT_ULL(46)
@@ -660,10 +655,10 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_AEQ_VMAP BIT_ULL(47)
#define IRDMA_CQPSQ_AEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
-#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(18, 0)
-
+#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(20, 0)
#define IRDMA_COMMIT_FPM_BASE_S 32
-#define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(15, 0)
+
#define IRDMA_CQPSQ_FWQE_AECODE GENMASK_ULL(15, 0)
#define IRDMA_CQPSQ_FWQE_AESOURCE GENMASK_ULL(19, 16)
#define IRDMA_CQPSQ_FWQE_RQMNERR GENMASK_ULL(15, 0)
@@ -675,6 +670,10 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_FWQE_USERFLCODE BIT_ULL(60)
#define IRDMA_CQPSQ_FWQE_FLUSHSQ BIT_ULL(61)
#define IRDMA_CQPSQ_FWQE_FLUSHRQ BIT_ULL(62)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID BIT_ULL(42)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX GENMASK_ULL(49, 32)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID BIT_ULL(43)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX GENMASK_ULL(46, 32)
#define IRDMA_CQPSQ_MAPT_PORT GENMASK_ULL(15, 0)
#define IRDMA_CQPSQ_MAPT_ADDPORT BIT_ULL(62)
#define IRDMA_CQPSQ_UPESD_SDCMD GENMASK_ULL(31, 0)
@@ -693,9 +692,12 @@ enum irdma_cqp_op_type {
#define IRDMA_CQPSQ_SUSPENDQP_QPID GENMASK_ULL(23, 0)
#define IRDMA_CQPSQ_RESUMEQP_QSHANDLE GENMASK_ULL(31, 0)
#define IRDMA_CQPSQ_RESUMEQP_QPID GENMASK(23, 0)
+#define IRDMA_MANAGE_RSRC_VER2 BIT_ULL(2)
#define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001
#define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005
+#define IRDMA_CQPSQ_MIN_DEF_CMPL 0x0006
+#define IRDMA_CQPSQ_MIN_OOO_CMPL 0x0007
#define IRDMA_CQPSQ_MAJ_NO_ERROR 0x0000
#define IRDMA_CQPSQ_MAJ_OBJCACHE_ERROR 0xF000
@@ -712,6 +714,11 @@ enum irdma_cqp_op_type {
#define IRDMAQPC_INSERTL2TAG2 BIT_ULL(11)
#define IRDMAQPC_LIMIT GENMASK_ULL(13, 12)
+#define IRDMAQPC_USE_SRQ BIT_ULL(10)
+#define IRDMAQPC_SRQ_ID GENMASK_ULL(15, 0)
+#define IRDMAQPC_PASID GENMASK_ULL(19, 0)
+#define IRDMAQPC_PASID_VALID BIT_ULL(11)
+
#define IRDMAQPC_ECN_EN BIT_ULL(14)
#define IRDMAQPC_DROPOOOSEG BIT_ULL(15)
#define IRDMAQPC_DUPACK_THRESH GENMASK_ULL(18, 16)
@@ -782,21 +789,31 @@ enum irdma_cqp_op_type {
#define IRDMAQPC_CWNDROCE GENMASK_ULL(55, 32)
#define IRDMAQPC_SNDWL1 GENMASK_ULL(31, 0)
#define IRDMAQPC_SNDWL2 GENMASK_ULL(63, 32)
-#define IRDMAQPC_ERR_RQ_IDX GENMASK_ULL(45, 32)
+#define IRDMAQPC_MINRNR_TIMER GENMASK_ULL(4, 0)
+#define IRDMAQPC_ERR_RQ_IDX GENMASK_ULL(46, 32)
#define IRDMAQPC_RTOMIN GENMASK_ULL(63, 57)
#define IRDMAQPC_MAXSNDWND GENMASK_ULL(31, 0)
#define IRDMAQPC_REXMIT_THRESH GENMASK_ULL(53, 48)
#define IRDMAQPC_RNRNAK_THRESH GENMASK_ULL(56, 54)
-#define IRDMAQPC_TXCQNUM GENMASK_ULL(18, 0)
-#define IRDMAQPC_RXCQNUM GENMASK_ULL(50, 32)
+#define IRDMAQPC_TXCQNUM GENMASK_ULL(24, 0)
+#define IRDMAQPC_RXCQNUM GENMASK_ULL(56, 32)
#define IRDMAQPC_STAT_INDEX GENMASK_ULL(6, 0)
#define IRDMAQPC_Q2ADDR GENMASK_ULL(63, 8)
#define IRDMAQPC_LASTBYTESENT GENMASK_ULL(7, 0)
#define IRDMAQPC_MACADDRESS GENMASK_ULL(63, 16)
#define IRDMAQPC_ORDSIZE GENMASK_ULL(7, 0)
+#define IRDMAQPC_LOCALACKTIMEOUT GENMASK_ULL(12, 8)
+#define IRDMAQPC_RNRNAK_TMR GENMASK_ULL(4, 0)
+#define IRDMAQPC_ORDSIZE_GEN3 GENMASK_ULL(10, 0)
+#define IRDMAQPC_REMOTE_ATOMIC_EN BIT_ULL(18)
+#define IRDMAQPC_STAT_INDEX_GEN3 GENMASK_ULL(47, 32)
+#define IRDMAQPC_PKT_LIMIT GENMASK_ULL(55, 48)
+
#define IRDMAQPC_IRDSIZE GENMASK_ULL(18, 16)
+#define IRDMAQPC_IRDSIZE_GEN3 GENMASK_ULL(17, 14)
+
#define IRDMAQPC_UDPRIVCQENABLE BIT_ULL(19)
#define IRDMAQPC_WRRDRSPOK BIT_ULL(20)
#define IRDMAQPC_RDOK BIT_ULL(21)
@@ -833,6 +850,7 @@ enum irdma_cqp_op_type {
#define IRDMA_FEATURE_INFO GENMASK_ULL(47, 0)
#define IRDMA_FEATURE_CNT GENMASK_ULL(47, 32)
#define IRDMA_FEATURE_TYPE GENMASK_ULL(63, 48)
+#define IRDMA_FEATURE_RSRC_MAX GENMASK_ULL(31, 0)
#define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32)
#define IRDMAQPSQ_COPY_HOST_PBL BIT_ULL(43)
@@ -856,7 +874,7 @@ enum irdma_cqp_op_type {
#define IRDMAQPSQ_REMSTAGINV GENMASK_ULL(31, 0)
#define IRDMAQPSQ_DESTQKEY GENMASK_ULL(31, 0)
#define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32)
-#define IRDMAQPSQ_AHID GENMASK_ULL(16, 0)
+#define IRDMAQPSQ_AHID GENMASK_ULL(24, 0)
#define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57)
#define IRDMA_INLINE_VALID_S 7
@@ -869,6 +887,9 @@ enum irdma_cqp_op_type {
#define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX
+#define IRDMAQPSQ_STAG GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_REMOTE_STAG GENMASK_ULL(31, 0)
+
#define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48)
#define IRDMAQPSQ_VABASEDTO BIT_ULL(53)
#define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54)
@@ -879,6 +900,8 @@ enum irdma_cqp_op_type {
#define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX
+#define IRDMAQPSQ_REMOTE_ATOMICS_EN BIT_ULL(55)
+
#define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0)
#define IRDMAQPSQ_STAGKEY GENMASK_ULL(7, 0)
@@ -903,11 +926,14 @@ enum irdma_cqp_op_type {
#define IRDMAPFINT_OICR_PE_PUSH_M BIT(27)
#define IRDMAPFINT_OICR_PE_CRITERR_M BIT(28)
-#define IRDMA_QUERY_FPM_MAX_QPS GENMASK_ULL(18, 0)
-#define IRDMA_QUERY_FPM_MAX_CQS GENMASK_ULL(19, 0)
+#define IRDMA_QUERY_FPM_LOC_MEM_PAGES GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_MAX_QPS GENMASK_ULL(31, 0)
+#define IRDMA_QUERY_FPM_MAX_CQS GENMASK_ULL(31, 0)
#define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX GENMASK_ULL(13, 0)
-#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(45, 32)
+#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(44, 32)
+#define IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3 GENMASK_ULL(47, 32)
#define IRDMA_QUERY_FPM_MAX_CEQS GENMASK_ULL(9, 0)
+#define IRDMA_QUERY_FPM_MAX_IRD GENMASK_ULL(53, 50)
#define IRDMA_QUERY_FPM_XFBLOCKSIZE GENMASK_ULL(63, 32)
#define IRDMA_QUERY_FPM_Q1BLOCKSIZE GENMASK_ULL(63, 32)
#define IRDMA_QUERY_FPM_HTMULTIPLIER GENMASK_ULL(19, 16)
@@ -1103,7 +1129,7 @@ enum irdma_alignment {
IRDMA_CEQ_ALIGNMENT = 0x100,
IRDMA_CQ0_ALIGNMENT = 0x100,
IRDMA_SD_BUF_ALIGNMENT = 0x80,
- IRDMA_FEATURE_BUF_ALIGNMENT = 0x8,
+ IRDMA_FEATURE_BUF_ALIGNMENT = 0x10,
};
enum icrdma_protocol_used {
diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c
index ac58088a8e41..da18add141da 100644
--- a/drivers/infiniband/hw/irdma/hmc.c
+++ b/drivers/infiniband/hw/irdma/hmc.c
@@ -5,6 +5,7 @@
#include "defs.h"
#include "type.h"
#include "protos.h"
+#include "virtchnl.h"
/**
* irdma_find_sd_index_limit - finds segment descriptor index limit
@@ -228,6 +229,10 @@ int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
bool pd_error = false;
int ret_code = 0;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 &&
+ dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM)
+ return 0;
+
if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
return -EINVAL;
@@ -330,7 +335,7 @@ static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev,
u32 i, sd_idx;
struct irdma_dma_mem *mem;
- if (!reset)
+ if (dev->privileged && !reset)
ret_code = irdma_hmc_sd_grp(dev, info->hmc_info,
info->hmc_info->sd_indexes[0],
info->del_sd_cnt, false);
@@ -376,6 +381,9 @@ int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
u32 i, j;
int ret_code = 0;
+ if (dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM)
+ return 0;
+
if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
ibdev_dbg(to_ibdev(dev),
"HMC: error start_idx[%04d] >= [type %04d].cnt[%04d]\n",
@@ -589,7 +597,10 @@ int irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
pd_entry->sd_index = sd_idx;
pd_entry->valid = true;
pd_table->use_cnt++;
- irdma_invalidate_pf_hmc_pd(dev, sd_idx, rel_pd_idx);
+
+ if (hmc_info->hmc_fn_id < dev->hw_attrs.first_hw_vf_fpm_id &&
+ dev->privileged)
+ irdma_invalidate_pf_hmc_pd(dev, sd_idx, rel_pd_idx);
}
pd_entry->bp.use_cnt++;
@@ -640,7 +651,8 @@ int irdma_remove_pd_bp(struct irdma_sc_dev *dev,
pd_addr = pd_table->pd_page_addr.va;
pd_addr += rel_pd_idx;
memset(pd_addr, 0, sizeof(u64));
- irdma_invalidate_pf_hmc_pd(dev, sd_idx, idx);
+ if (dev->privileged && dev->hmc_fn_id == hmc_info->hmc_fn_id)
+ irdma_invalidate_pf_hmc_pd(dev, sd_idx, idx);
if (!pd_entry->rsrc_pg) {
mem = &pd_entry->bp.addr;
diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h
index 415f9e23bbf6..257a5d22aa96 100644
--- a/drivers/infiniband/hw/irdma/hmc.h
+++ b/drivers/infiniband/hw/irdma/hmc.h
@@ -16,11 +16,21 @@
#define IRDMA_HMC_PD_BP_BUF_ALIGNMENT 4096
#define IRDMA_FIRST_VF_FPM_ID 8
#define FPM_MULTIPLIER 1024
+#define IRDMA_OBJ_LOC_MEM_BIT 0x4
+#define IRDMA_XF_MULTIPLIER 16
+#define IRDMA_RRF_MULTIPLIER 8
+#define IRDMA_MIN_PBLE_PAGES 3
+#define IRDMA_HMC_PAGE_SIZE 2097152
+#define IRDMA_MIN_MR_PER_QP 4
+#define IRDMA_MIN_QP_CNT 64
+#define IRDMA_FSIAV_CNT_MAX 1048576
+#define IRDMA_MIN_IRD 8
+#define IRDMA_HMC_MIN_RRF 16
enum irdma_hmc_rsrc_type {
IRDMA_HMC_IW_QP = 0,
IRDMA_HMC_IW_CQ = 1,
- IRDMA_HMC_IW_RESERVED = 2,
+ IRDMA_HMC_IW_SRQ = 2,
IRDMA_HMC_IW_HTE = 3,
IRDMA_HMC_IW_ARP = 4,
IRDMA_HMC_IW_APBVT_ENTRY = 5,
@@ -48,11 +58,17 @@ enum irdma_sd_entry_type {
IRDMA_SD_TYPE_DIRECT = 2,
};
+enum irdma_hmc_obj_mem {
+ IRDMA_HOST_MEM = 0,
+ IRDMA_LOC_MEM = 1,
+};
+
struct irdma_hmc_obj_info {
u64 base;
u32 max_cnt;
u32 cnt;
u64 size;
+ enum irdma_hmc_obj_mem mem_loc;
};
struct irdma_hmc_bp {
@@ -117,6 +133,7 @@ struct irdma_update_sds_info {
struct irdma_ccq_cqe_info;
struct irdma_hmc_fcn_info {
u32 vf_id;
+ u8 protocol_used;
u8 free_fcn;
};
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 69ce1862eabe..7bad0e38786a 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -33,6 +33,7 @@ static struct irdma_rsrc_limits rsrc_limits_table[] = {
static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = {
IRDMA_HMC_IW_QP,
IRDMA_HMC_IW_CQ,
+ IRDMA_HMC_IW_SRQ,
IRDMA_HMC_IW_HTE,
IRDMA_HMC_IW_ARP,
IRDMA_HMC_IW_APBVT_ENTRY,
@@ -134,75 +135,68 @@ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq)
static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
struct irdma_aeqe_info *info)
{
+ struct qp_err_code qp_err;
+
qp->sq_flush_code = info->sq;
qp->rq_flush_code = info->rq;
- qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-
- switch (info->ae_id) {
- case IRDMA_AE_AMP_BOUNDS_VIOLATION:
- case IRDMA_AE_AMP_INVALID_STAG:
- case IRDMA_AE_AMP_RIGHTS_VIOLATION:
- case IRDMA_AE_AMP_UNALLOCATED_STAG:
- case IRDMA_AE_AMP_BAD_PD:
- case IRDMA_AE_AMP_BAD_QP:
- case IRDMA_AE_AMP_BAD_STAG_KEY:
- case IRDMA_AE_AMP_BAD_STAG_INDEX:
- case IRDMA_AE_AMP_TO_WRAP:
- case IRDMA_AE_PRIV_OPERATION_DENIED:
- qp->flush_code = FLUSH_PROT_ERR;
- qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
- break;
- case IRDMA_AE_UDA_XMIT_BAD_PD:
- case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
- qp->flush_code = FLUSH_LOC_QP_OP_ERR;
- qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
- break;
- case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
- case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
- case IRDMA_AE_UDA_L4LEN_INVALID:
- case IRDMA_AE_DDP_UBE_INVALID_MO:
- case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- qp->flush_code = FLUSH_LOC_LEN_ERR;
- qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
- break;
- case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
- qp->flush_code = FLUSH_REM_ACCESS_ERR;
- qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
- break;
- case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
- case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
- case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
- case IRDMA_AE_IB_REMOTE_OP_ERROR:
- qp->flush_code = FLUSH_REM_OP_ERR;
- qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
- break;
- case IRDMA_AE_LCE_QP_CATASTROPHIC:
- qp->flush_code = FLUSH_FATAL_ERR;
- qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
- break;
- case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
- qp->flush_code = FLUSH_GENERAL_ERR;
- break;
- case IRDMA_AE_LLP_TOO_MANY_RETRIES:
- qp->flush_code = FLUSH_RETRY_EXC_ERR;
- qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
- break;
- case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
- case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
- case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
- case IRDMA_AE_AMP_MWBIND_VALID_STAG:
- qp->flush_code = FLUSH_MW_BIND_ERR;
- qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
- break;
- case IRDMA_AE_IB_INVALID_REQUEST:
- qp->flush_code = FLUSH_REM_INV_REQ_ERR;
- qp->event_type = IRDMA_QP_EVENT_REQ_ERR;
- break;
- default:
- qp->flush_code = FLUSH_GENERAL_ERR;
- qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
- break;
+ if (qp->qp_uk.uk_attrs->hw_rev >= IRDMA_GEN_3) {
+ if (info->sq) {
+ qp->err_sq_idx_valid = true;
+ qp->err_sq_idx = info->wqe_idx;
+ }
+ if (info->rq) {
+ qp->err_rq_idx_valid = true;
+ qp->err_rq_idx = info->wqe_idx;
+ }
+ }
+
+ qp_err = irdma_ae_to_qp_err_code(info->ae_id);
+ qp->flush_code = qp_err.flush_code;
+ qp->event_type = qp_err.event_type;
+}
+
+/**
+ * irdma_complete_cqp_request - perform post-completion cleanup
+ * @cqp: device CQP
+ * @cqp_request: CQP request
+ *
+ * Mark CQP request as done, wake up waiting thread or invoke
+ * callback function and release/free CQP request.
+ */
+static void irdma_complete_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (cqp_request->waiting) {
+ WRITE_ONCE(cqp_request->request_done, true);
+ wake_up(&cqp_request->waitq);
+ } else if (cqp_request->callback_fcn) {
+ cqp_request->callback_fcn(cqp_request);
+ }
+ irdma_put_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_process_ae_def_cmpl - handle IRDMA_AE_CQP_DEFERRED_COMPLETE event
+ * @rf: RDMA PCI function
+ * @info: AEQ entry info
+ */
+static void irdma_process_ae_def_cmpl(struct irdma_pci_f *rf,
+ struct irdma_aeqe_info *info)
+{
+ u32 sw_def_info;
+ u64 scratch;
+
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+
+ irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, true,
+ &scratch, &sw_def_info);
+ while (scratch) {
+ struct irdma_cqp_request *cqp_request =
+ (struct irdma_cqp_request *)(uintptr_t)scratch;
+
+ irdma_complete_cqp_request(&rf->cqp, cqp_request);
+ irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, false,
+ &scratch, &sw_def_info);
}
}
@@ -223,6 +217,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
struct irdma_sc_qp *qp = NULL;
struct irdma_qp_host_ctx_info *ctx_info = NULL;
struct irdma_device *iwdev = rf->iwdev;
+ struct irdma_sc_srq *srq;
unsigned long flags;
u32 aeqcnt = 0;
@@ -236,6 +231,13 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
if (ret)
break;
+ if (info->aeqe_overflow) {
+ ibdev_err(&iwdev->ibdev, "AEQ has overflowed\n");
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ return;
+ }
+
aeqcnt++;
ibdev_dbg(&iwdev->ibdev,
"AEQ: ae_id = 0x%x bool qp=%d qp_id = %d tcp_state=%d iwarp_state=%d ae_src=%d\n",
@@ -266,9 +268,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE)
iwqp->last_aeq = info->ae_id;
spin_unlock_irqrestore(&iwqp->lock, flags);
- ctx_info = &iwqp->ctx_info;
+ } else if (info->srq) {
+ if (info->ae_id != IRDMA_AE_SRQ_LIMIT)
+ continue;
} else {
- if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR)
+ if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR &&
+ info->ae_id != IRDMA_AE_CQP_DEFERRED_COMPLETE)
continue;
}
@@ -363,6 +368,18 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
}
irdma_cq_rem_ref(&iwcq->ibcq);
break;
+ case IRDMA_AE_SRQ_LIMIT:
+ srq = (struct irdma_sc_srq *)(uintptr_t)info->compl_ctx;
+ irdma_srq_event(srq);
+ break;
+ case IRDMA_AE_SRQ_CATASTROPHIC_ERROR:
+ break;
+ case IRDMA_AE_CQP_DEFERRED_COMPLETE:
+ /* Remove completed CQP requests from pending list
+ * and notify about those CQP ops completion.
+ */
+ irdma_process_ae_def_cmpl(rf, info);
+ break;
case IRDMA_AE_RESET_NOT_SENT:
case IRDMA_AE_LLP_DOUBT_REACHABILITY:
case IRDMA_AE_RESOURCE_EXHAUSTION:
@@ -389,13 +406,18 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
case IRDMA_AE_LLP_TOO_MANY_RNRS:
case IRDMA_AE_LCE_CQ_CATASTROPHIC:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
default:
ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
- if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
- ctx_info->roce_info->err_rq_idx_valid = info->rq;
- if (info->rq) {
+ ctx_info = &iwqp->ctx_info;
+ if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) {
+ ctx_info->roce_info->err_rq_idx_valid =
+ ctx_info->srq_valid ? false : info->err_rq_idx_valid;
+ if (ctx_info->roce_info->err_rq_idx_valid) {
ctx_info->roce_info->err_rq_idx = info->wqe_idx;
irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
ctx_info);
@@ -599,6 +621,8 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf)
dma_free_coherent(dev->hw->device, cqp->sq.size, cqp->sq.va,
cqp->sq.pa);
cqp->sq.va = NULL;
+ kfree(cqp->oop_op_array);
+ cqp->oop_op_array = NULL;
kfree(cqp->scratch_array);
cqp->scratch_array = NULL;
kfree(cqp->cqp_requests);
@@ -631,7 +655,9 @@ static void irdma_destroy_aeq(struct irdma_pci_f *rf)
int status = -EBUSY;
if (!rf->msix_shared) {
- rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, rf->iw_msixtbl->idx, false);
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev,
+ rf->iw_msixtbl->idx, false);
irdma_destroy_irq(rf, rf->iw_msixtbl, rf);
}
if (rf->reset)
@@ -697,9 +723,10 @@ static void irdma_del_ceq_0(struct irdma_pci_f *rf)
if (rf->msix_shared) {
msix_vec = &rf->iw_msixtbl[0];
- rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev,
- msix_vec->ceq_id,
- msix_vec->idx, false);
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev,
+ msix_vec->ceq_id,
+ msix_vec->idx, false);
irdma_destroy_irq(rf, msix_vec, rf);
} else {
msix_vec = &rf->iw_msixtbl[1];
@@ -730,8 +757,10 @@ static void irdma_del_ceqs(struct irdma_pci_f *rf)
msix_vec = &rf->iw_msixtbl[2];
for (i = 1; i < rf->ceqs_count; i++, msix_vec++, iwceq++) {
- rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, msix_vec->ceq_id,
- msix_vec->idx, false);
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev,
+ msix_vec->ceq_id,
+ msix_vec->idx, false);
irdma_destroy_irq(rf, msix_vec, iwceq);
irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
IRDMA_OP_CEQ_DESTROY);
@@ -942,6 +971,13 @@ static int irdma_create_cqp(struct irdma_pci_f *rf)
goto err_scratch;
}
+ cqp->oop_op_array = kcalloc(sqsize, sizeof(*cqp->oop_op_array),
+ GFP_KERNEL);
+ if (!cqp->oop_op_array) {
+ status = -ENOMEM;
+ goto err_oop;
+ }
+ cqp_init_info.ooo_op_array = cqp->oop_op_array;
dev->cqp = &cqp->sc_cqp;
dev->cqp->dev = dev;
cqp->sq.size = ALIGN(sizeof(struct irdma_cqp_sq_wqe) * sqsize,
@@ -978,6 +1014,10 @@ static int irdma_create_cqp(struct irdma_pci_f *rf)
case IRDMA_GEN_2:
cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_2;
break;
+ case IRDMA_GEN_3:
+ cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_3;
+ cqp_init_info.ts_override = 1;
+ break;
}
status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info);
if (status) {
@@ -1012,6 +1052,9 @@ err_ctx:
cqp->sq.va, cqp->sq.pa);
cqp->sq.va = NULL;
err_sq:
+ kfree(cqp->oop_op_array);
+ cqp->oop_op_array = NULL;
+err_oop:
kfree(cqp->scratch_array);
cqp->scratch_array = NULL;
err_scratch:
@@ -1033,13 +1076,15 @@ static int irdma_create_ccq(struct irdma_pci_f *rf)
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_ccq_init_info info = {};
struct irdma_ccq *ccq = &rf->ccq;
+ int ccq_size;
int status;
dev->ccq = &ccq->sc_cq;
dev->ccq->dev = dev;
info.dev = dev;
+ ccq_size = (rf->rdma_ver >= IRDMA_GEN_3) ? IW_GEN_3_CCQ_SIZE : IW_CCQ_SIZE;
ccq->shadow_area.size = sizeof(struct irdma_cq_shadow_area);
- ccq->mem_cq.size = ALIGN(sizeof(struct irdma_cqe) * IW_CCQ_SIZE,
+ ccq->mem_cq.size = ALIGN(sizeof(struct irdma_cqe) * ccq_size,
IRDMA_CQ0_ALIGNMENT);
ccq->mem_cq.va = dma_alloc_coherent(dev->hw->device, ccq->mem_cq.size,
&ccq->mem_cq.pa, GFP_KERNEL);
@@ -1056,7 +1101,7 @@ static int irdma_create_ccq(struct irdma_pci_f *rf)
/* populate the ccq init info */
info.cq_base = ccq->mem_cq.va;
info.cq_pa = ccq->mem_cq.pa;
- info.num_elem = IW_CCQ_SIZE;
+ info.num_elem = ccq_size;
info.shadow_area = ccq->shadow_area.va;
info.shadow_area_pa = ccq->shadow_area.pa;
info.ceqe_mask = false;
@@ -1140,9 +1185,13 @@ static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
}
msix_vec->ceq_id = ceq_id;
- rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id, msix_vec->idx, true);
-
- return 0;
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id,
+ msix_vec->idx, true);
+ else
+ status = irdma_vchnl_req_ceq_vec_map(&rf->sc_dev, ceq_id,
+ msix_vec->idx);
+ return status;
}
/**
@@ -1155,7 +1204,7 @@ static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
{
struct irdma_msix_vector *msix_vec = rf->iw_msixtbl;
- u32 ret = 0;
+ int ret = 0;
if (!rf->msix_shared) {
snprintf(msix_vec->name, sizeof(msix_vec->name) - 1,
@@ -1166,12 +1215,16 @@ static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
}
if (ret) {
ibdev_dbg(&rf->iwdev->ibdev, "ERR: aeq irq config fail\n");
- return -EINVAL;
+ return ret;
}
- rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, true);
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx,
+ true);
+ else
+ ret = irdma_vchnl_req_aeq_vec_map(&rf->sc_dev, msix_vec->idx);
- return 0;
+ return ret;
}
/**
@@ -1179,13 +1232,13 @@ static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
* @rf: RDMA PCI function
* @iwceq: pointer to the ceq resources to be created
* @ceq_id: the id number of the iwceq
- * @vsi: SC vsi struct
+ * @vsi_idx: vsi idx
*
* Return 0, if the ceq and the resources associated with it
* are successfully created, otherwise return error
*/
static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
- u32 ceq_id, struct irdma_sc_vsi *vsi)
+ u32 ceq_id, u16 vsi_idx)
{
int status;
struct irdma_ceq_init_info info = {};
@@ -1209,7 +1262,7 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
info.elem_cnt = ceq_size;
iwceq->sc_ceq.ceq_id = ceq_id;
info.dev = dev;
- info.vsi = vsi;
+ info.vsi_idx = vsi_idx;
status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info);
if (!status) {
if (dev->ceq_valid)
@@ -1252,7 +1305,7 @@ static int irdma_setup_ceq_0(struct irdma_pci_f *rf)
}
iwceq = &rf->ceqlist[0];
- status = irdma_create_ceq(rf, iwceq, 0, &rf->default_vsi);
+ status = irdma_create_ceq(rf, iwceq, 0, rf->default_vsi.vsi_idx);
if (status) {
ibdev_dbg(&rf->iwdev->ibdev, "ERR: create ceq status = %d\n",
status);
@@ -1287,13 +1340,13 @@ exit:
/**
* irdma_setup_ceqs - manage the device ceq's and their interrupt resources
* @rf: RDMA PCI function
- * @vsi: VSI structure for this CEQ
+ * @vsi_idx: vsi_idx for this CEQ
*
* Allocate a list for all device completion event queues
* Create the ceq's and configure their msix interrupt vectors
* Return 0, if ceqs are successfully set up, otherwise return error
*/
-static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi)
+static int irdma_setup_ceqs(struct irdma_pci_f *rf, u16 vsi_idx)
{
u32 i;
u32 ceq_id;
@@ -1306,7 +1359,7 @@ static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi)
i = (rf->msix_shared) ? 1 : 2;
for (ceq_id = 1; i < num_ceqs; i++, ceq_id++) {
iwceq = &rf->ceqlist[ceq_id];
- status = irdma_create_ceq(rf, iwceq, ceq_id, vsi);
+ status = irdma_create_ceq(rf, iwceq, ceq_id, vsi_idx);
if (status) {
ibdev_dbg(&rf->iwdev->ibdev,
"ERR: create ceq status = %d\n", status);
@@ -1387,7 +1440,10 @@ static int irdma_create_aeq(struct irdma_pci_f *rf)
aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt +
hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt;
aeq_size = min(aeq_size, dev->hw_attrs.max_hw_aeq_size);
-
+ /* GEN_3 does not support virtual AEQ. Cap at max Kernel alloc size */
+ if (rf->rdma_ver == IRDMA_GEN_3)
+ aeq_size = min(aeq_size, (u32)((PAGE_SIZE << MAX_PAGE_ORDER) /
+ sizeof(struct irdma_sc_aeqe)));
aeq->mem.size = ALIGN(sizeof(struct irdma_sc_aeqe) * aeq_size,
IRDMA_AEQ_ALIGNMENT);
aeq->mem.va = dma_alloc_coherent(dev->hw->device, aeq->mem.size,
@@ -1395,6 +1451,8 @@ static int irdma_create_aeq(struct irdma_pci_f *rf)
GFP_KERNEL | __GFP_NOWARN);
if (aeq->mem.va)
goto skip_virt_aeq;
+ else if (rf->rdma_ver == IRDMA_GEN_3)
+ return -ENOMEM;
/* physically mapped aeq failed. setup virtual aeq */
status = irdma_create_virt_aeq(rf, aeq_size);
@@ -1569,6 +1627,8 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
+ if (!rf->sc_dev.privileged)
+ irdma_vchnl_req_put_hmc_fcn(&rf->sc_dev);
kfree(dev->hmc_info->sd_table.sd_entry);
dev->hmc_info->sd_table.sd_entry = NULL;
vfree(rf->mem_rsrc);
@@ -1635,6 +1695,7 @@ static int irdma_initialize_dev(struct irdma_pci_f *rf)
info.bar0 = rf->hw.hw_addr;
info.hmc_fn_id = rf->pf_id;
+ info.protocol_used = rf->protocol_used;
info.hw = &rf->hw;
status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info);
if (status)
@@ -1665,9 +1726,6 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev)
irdma_del_local_mac_entry(iwdev->rf,
(u8)iwdev->mac_ip_table_idx);
fallthrough;
- case AEQ_CREATED:
- case PBLE_CHUNK_MEM:
- case CEQS_CREATED:
case IEQ_CREATED:
if (!iwdev->roce_mode)
irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ,
@@ -1740,7 +1798,9 @@ static void irdma_get_used_rsrc(struct irdma_device *iwdev)
iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps,
iwdev->rf->max_qp);
iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs,
- iwdev->rf->max_cq);
+ iwdev->rf->max_cq);
+ iwdev->rf->used_srqs = find_first_zero_bit(iwdev->rf->allocated_srqs,
+ iwdev->rf->max_srq);
iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs,
iwdev->rf->max_mr);
}
@@ -1750,13 +1810,17 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
enum init_completion_state state = rf->init_state;
rf->init_state = INVALID_STATE;
- if (rf->rsrc_created) {
+
+ switch (state) {
+ case AEQ_CREATED:
irdma_destroy_aeq(rf);
+ fallthrough;
+ case PBLE_CHUNK_MEM:
irdma_destroy_pble_prm(rf->pble_rsrc);
+ fallthrough;
+ case CEQS_CREATED:
irdma_del_ceqs(rf);
- rf->rsrc_created = false;
- }
- switch (state) {
+ fallthrough;
case CEQ0_CREATED:
irdma_del_ceq_0(rf);
fallthrough;
@@ -1835,32 +1899,6 @@ int irdma_rt_init_hw(struct irdma_device *iwdev,
break;
iwdev->init_state = IEQ_CREATED;
}
- if (!rf->rsrc_created) {
- status = irdma_setup_ceqs(rf, &iwdev->vsi);
- if (status)
- break;
-
- iwdev->init_state = CEQS_CREATED;
-
- status = irdma_hmc_init_pble(&rf->sc_dev,
- rf->pble_rsrc);
- if (status) {
- irdma_del_ceqs(rf);
- break;
- }
-
- iwdev->init_state = PBLE_CHUNK_MEM;
-
- status = irdma_setup_aeq(rf);
- if (status) {
- irdma_destroy_pble_prm(rf->pble_rsrc);
- irdma_del_ceqs(rf);
- break;
- }
- iwdev->init_state = AEQ_CREATED;
- rf->rsrc_created = true;
- }
-
if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
irdma_alloc_set_mac(iwdev);
irdma_add_ip(iwdev);
@@ -1907,6 +1945,13 @@ int irdma_ctrl_init_hw(struct irdma_pci_f *rf)
break;
rf->init_state = CQP_CREATED;
+ dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT;
+ if (rf->rdma_ver != IRDMA_GEN_1) {
+ status = irdma_get_rdma_features(dev);
+ if (status)
+ break;
+ }
+
status = irdma_hmc_setup(rf);
if (status)
break;
@@ -1922,13 +1967,6 @@ int irdma_ctrl_init_hw(struct irdma_pci_f *rf)
break;
rf->init_state = CCQ_CREATED;
- dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT;
- if (rf->rdma_ver != IRDMA_GEN_1) {
- status = irdma_get_rdma_features(dev);
- if (status)
- break;
- }
-
status = irdma_setup_ceq_0(rf);
if (status)
break;
@@ -1942,6 +1980,25 @@ int irdma_ctrl_init_hw(struct irdma_pci_f *rf)
}
INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker);
irdma_sc_ccq_arm(dev->ccq);
+
+ status = irdma_setup_ceqs(rf, rf->iwdev ? rf->iwdev->vsi_num : 0);
+ if (status)
+ break;
+
+ rf->init_state = CEQS_CREATED;
+
+ status = irdma_hmc_init_pble(&rf->sc_dev,
+ rf->pble_rsrc);
+ if (status)
+ break;
+
+ rf->init_state = PBLE_CHUNK_MEM;
+
+ status = irdma_setup_aeq(rf);
+ if (status)
+ break;
+ rf->init_state = AEQ_CREATED;
+
return 0;
} while (0);
@@ -1960,7 +2017,8 @@ static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
rf->allocated_qps = (void *)(rf->mem_rsrc +
(sizeof(struct irdma_arp_entry) * rf->arp_table_size));
rf->allocated_cqs = &rf->allocated_qps[BITS_TO_LONGS(rf->max_qp)];
- rf->allocated_mrs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)];
+ rf->allocated_srqs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)];
+ rf->allocated_mrs = &rf->allocated_srqs[BITS_TO_LONGS(rf->max_srq)];
rf->allocated_pds = &rf->allocated_mrs[BITS_TO_LONGS(rf->max_mr)];
rf->allocated_ahs = &rf->allocated_pds[BITS_TO_LONGS(rf->max_pd)];
rf->allocated_mcgs = &rf->allocated_ahs[BITS_TO_LONGS(rf->max_ah)];
@@ -1988,12 +2046,14 @@ static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf)
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_qp);
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mr);
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_cq);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_srq);
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_pd);
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->arp_table_size);
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah);
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg);
rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp;
rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq;
+ rsrc_size += sizeof(struct irdma_srq **) * rf->max_srq;
return rsrc_size;
}
@@ -2021,6 +2081,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
rf->max_qp = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt;
rf->max_mr = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt;
rf->max_cq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt;
+ rf->max_srq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt;
rf->max_pd = rf->sc_dev.hw_attrs.max_hw_pds;
rf->arp_table_size = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt;
rf->max_ah = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt;
@@ -2040,6 +2101,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
set_bit(0, rf->allocated_mrs);
set_bit(0, rf->allocated_qps);
set_bit(0, rf->allocated_cqs);
+ set_bit(0, rf->allocated_srqs);
set_bit(0, rf->allocated_pds);
set_bit(0, rf->allocated_arps);
set_bit(0, rf->allocated_ahs);
@@ -2100,15 +2162,16 @@ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
cqp_request->compl_info.op_ret_val = info.op_ret_val;
cqp_request->compl_info.error = info.error;
- if (cqp_request->waiting) {
- WRITE_ONCE(cqp_request->request_done, true);
- wake_up(&cqp_request->waitq);
- irdma_put_cqp_request(&rf->cqp, cqp_request);
- } else {
- if (cqp_request->callback_fcn)
- cqp_request->callback_fcn(cqp_request);
- irdma_put_cqp_request(&rf->cqp, cqp_request);
- }
+ /*
+ * If this is deferred or pending completion, then mark
+ * CQP request as pending to not block the CQ, but don't
+ * release CQP request, as it is still on the OOO list.
+ */
+ if (info.pending)
+ cqp_request->pending = true;
+ else
+ irdma_complete_cqp_request(&rf->cqp,
+ cqp_request);
}
cqe_count++;
@@ -2718,7 +2781,9 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
struct irdma_pci_f *rf = iwqp->iwdev->rf;
u8 flush_code = iwqp->sc_qp.flush_code;
- if (!(flush_mask & IRDMA_FLUSH_SQ) && !(flush_mask & IRDMA_FLUSH_RQ))
+ if ((!(flush_mask & IRDMA_FLUSH_SQ) &&
+ !(flush_mask & IRDMA_FLUSH_RQ)) ||
+ ((flush_mask & IRDMA_REFLUSH) && rf->rdma_ver >= IRDMA_GEN_3))
return;
/* Set flush info fields*/
@@ -2731,6 +2796,10 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
info.rq_minor_code = FLUSH_GENERAL_ERR;
info.userflushcode = true;
+ info.err_sq_idx_valid = iwqp->sc_qp.err_sq_idx_valid;
+ info.err_sq_idx = iwqp->sc_qp.err_sq_idx;
+ info.err_rq_idx_valid = iwqp->sc_qp.err_rq_idx_valid;
+ info.err_rq_idx = iwqp->sc_qp.err_rq_idx;
if (flush_mask & IRDMA_REFLUSH) {
if (info.sq)
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
index ce61a27cb1f6..60c1f2b1811d 100644
--- a/drivers/infiniband/hw/irdma/i40iw_hw.c
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
@@ -85,6 +85,7 @@ static u64 i40iw_masks[IRDMA_MAX_MASKS] = {
I40E_CQPSQ_CQ_CEQID,
I40E_CQPSQ_CQ_CQID,
I40E_COMMIT_FPM_CQCNT,
+ I40E_CQPSQ_UPESD_HMCFNID,
};
static u64 i40iw_shifts[IRDMA_MAX_SHIFTS] = {
@@ -94,6 +95,7 @@ static u64 i40iw_shifts[IRDMA_MAX_SHIFTS] = {
I40E_CQPSQ_CQ_CEQID_S,
I40E_CQPSQ_CQ_CQID_S,
I40E_COMMIT_FPM_CQCNT_S,
+ I40E_CQPSQ_UPESD_HMCFNID_S,
};
/**
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.h b/drivers/infiniband/hw/irdma/i40iw_hw.h
index e1db84d8a62c..0095b327afcc 100644
--- a/drivers/infiniband/hw/irdma/i40iw_hw.h
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.h
@@ -123,6 +123,8 @@
#define I40E_CQPSQ_CQ_CQID GENMASK_ULL(15, 0)
#define I40E_COMMIT_FPM_CQCNT_S 0
#define I40E_COMMIT_FPM_CQCNT GENMASK_ULL(17, 0)
+#define I40E_CQPSQ_UPESD_HMCFNID_S 0
+#define I40E_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
#define I40E_VSIQF_CTL(_VSI) (0x0020D800 + ((_VSI) * 4))
diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c
index cc50a7070371..15e036ddaffb 100644
--- a/drivers/infiniband/hw/irdma/i40iw_if.c
+++ b/drivers/infiniband/hw/irdma/i40iw_if.c
@@ -75,6 +75,9 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info
struct irdma_pci_f *rf = iwdev->rf;
rf->rdma_ver = IRDMA_GEN_1;
+ rf->sc_dev.hw = &rf->hw;
+ rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_1;
+ rf->sc_dev.privileged = true;
rf->gen_ops.request_reset = i40iw_request_reset;
rf->pcidev = cdev_info->pcidev;
rf->pf_id = cdev_info->fid;
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
index 941d3edffadb..32f26284a788 100644
--- a/drivers/infiniband/hw/irdma/icrdma_hw.c
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
@@ -38,6 +38,7 @@ static u64 icrdma_masks[IRDMA_MAX_MASKS] = {
ICRDMA_CQPSQ_CQ_CEQID,
ICRDMA_CQPSQ_CQ_CQID,
ICRDMA_COMMIT_FPM_CQCNT,
+ ICRDMA_CQPSQ_UPESD_HMCFNID,
};
static u64 icrdma_shifts[IRDMA_MAX_SHIFTS] = {
@@ -47,6 +48,7 @@ static u64 icrdma_shifts[IRDMA_MAX_SHIFTS] = {
ICRDMA_CQPSQ_CQ_CEQID_S,
ICRDMA_CQPSQ_CQ_CQID_S,
ICRDMA_COMMIT_FPM_CQCNT_S,
+ ICRDMA_CQPSQ_UPESD_HMCFNID_S,
};
/**
@@ -194,6 +196,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev)
dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_2;
+ dev->hw_attrs.max_hw_device_pages = ICRDMA_MAX_PUSH_PAGE_COUNT;
dev->hw_attrs.uk_attrs.min_hw_wq_size = ICRDMA_MIN_WQ_SIZE;
dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR;
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.h b/drivers/infiniband/hw/irdma/icrdma_hw.h
index 697b9572b5c6..d97944ab45da 100644
--- a/drivers/infiniband/hw/irdma/icrdma_hw.h
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.h
@@ -58,14 +58,15 @@
#define ICRDMA_CQPSQ_CQ_CQID GENMASK_ULL(18, 0)
#define ICRDMA_COMMIT_FPM_CQCNT_S 0
#define ICRDMA_COMMIT_FPM_CQCNT GENMASK_ULL(19, 0)
-
+#define ICRDMA_CQPSQ_UPESD_HMCFNID_S 0
+#define ICRDMA_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
enum icrdma_device_caps_const {
ICRDMA_MAX_STATS_COUNT = 128,
ICRDMA_MAX_IRD_SIZE = 127,
ICRDMA_MAX_ORD_SIZE = 255,
ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */,
-
+ ICRDMA_MAX_PUSH_PAGE_COUNT = 256,
};
void icrdma_init_hw(struct irdma_sc_dev *dev);
diff --git a/drivers/infiniband/hw/irdma/icrdma_if.c b/drivers/infiniband/hw/irdma/icrdma_if.c
new file mode 100644
index 000000000000..27b191f61caf
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_if.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_ice.h>
+
+static void icrdma_prep_tc_change(struct irdma_device *iwdev)
+{
+ iwdev->vsi.tc_change_pending = true;
+ irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
+
+ /* Wait for all qp's to suspend */
+ wait_event_timeout(iwdev->suspend_wq,
+ !atomic_read(&iwdev->vsi.qp_suspend_reqs),
+ msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
+ irdma_ws_reset(&iwdev->vsi);
+}
+
+static void icrdma_fill_qos_info(struct irdma_l2params *l2params,
+ struct iidc_rdma_qos_params *qos_info)
+{
+ int i;
+
+ l2params->num_tc = qos_info->num_tc;
+ l2params->vsi_prio_type = qos_info->vport_priority_type;
+ l2params->vsi_rel_bw = qos_info->vport_relative_bw;
+ for (i = 0; i < l2params->num_tc; i++) {
+ l2params->tc_info[i].egress_virt_up =
+ qos_info->tc_info[i].egress_virt_up;
+ l2params->tc_info[i].ingress_virt_up =
+ qos_info->tc_info[i].ingress_virt_up;
+ l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
+ l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
+ l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
+ }
+ for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
+ l2params->up2tc[i] = qos_info->up2tc[i];
+ if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) {
+ l2params->dscp_mode = true;
+ memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
+ }
+}
+
+static void icrdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev);
+ struct irdma_l2params l2params = {};
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
+ ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu);
+ if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
+ l2params.mtu = iwdev->netdev->mtu;
+ l2params.mtu_changed = true;
+ irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ }
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) {
+ if (iwdev->vsi.tc_change_pending)
+ return;
+
+ icrdma_prep_tc_change(iwdev);
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) {
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+
+ if (!iwdev->vsi.tc_change_pending)
+ return;
+
+ l2params.tc_changed = true;
+ ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n");
+
+ icrdma_fill_qos_info(&l2params, &idc_priv->qos_info);
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode =
+ l2params.num_tc > 1 && !l2params.dscp_mode;
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) {
+ ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n",
+ event->reg);
+ if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
+ u32 pe_criterr;
+
+ pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]);
+#define IRDMA_Q1_RESOURCE_ERR 0x0001024d
+ if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) {
+ ibdev_err(&iwdev->ibdev, "critical PE Error, GLPE_CRITERR=0x%08x\n",
+ pe_criterr);
+ iwdev->rf->reset = true;
+ } else {
+ ibdev_warn(&iwdev->ibdev, "Q1 Resource Check\n");
+ }
+ }
+ if (event->reg & IRDMAPFINT_OICR_HMC_ERR_M) {
+ ibdev_err(&iwdev->ibdev, "HMC Error\n");
+ iwdev->rf->reset = true;
+ }
+ if (event->reg & IRDMAPFINT_OICR_PE_PUSH_M) {
+ ibdev_err(&iwdev->ibdev, "PE Push Error\n");
+ iwdev->rf->reset = true;
+ }
+ if (iwdev->rf->reset)
+ iwdev->rf->gen_ops.request_reset(iwdev->rf);
+ }
+}
+
+/**
+ * icrdma_lan_register_qset - Register qset with LAN driver
+ * @vsi: vsi structure
+ * @tc_node: Traffic class node
+ */
+static int icrdma_lan_register_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev;
+ struct iidc_rdma_qset_params qset = {};
+ int ret;
+
+ qset.qs_handle = tc_node->qs_handle;
+ qset.tc = tc_node->traffic_class;
+ qset.vport_id = vsi->vsi_idx;
+ ret = ice_add_rdma_qset(cdev_info, &qset);
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n");
+ return ret;
+ }
+
+ tc_node->l2_sched_node_id = qset.teid;
+ vsi->qos[tc_node->user_pri].l2_sched_node_id = qset.teid;
+
+ return 0;
+}
+
+/**
+ * icrdma_lan_unregister_qset - Unregister qset with LAN driver
+ * @vsi: vsi structure
+ * @tc_node: Traffic class node
+ */
+static void icrdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev;
+ struct iidc_rdma_qset_params qset = {};
+
+ qset.qs_handle = tc_node->qs_handle;
+ qset.tc = tc_node->traffic_class;
+ qset.vport_id = vsi->vsi_idx;
+ qset.teid = tc_node->l2_sched_node_id;
+
+ if (ice_del_rdma_qset(cdev_info, &qset))
+ ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
+}
+
+/**
+ * icrdma_request_reset - Request a reset
+ * @rf: RDMA PCI function
+ */
+static void icrdma_request_reset(struct irdma_pci_f *rf)
+{
+ ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
+ ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET);
+}
+
+static int icrdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
+ rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
+ GFP_KERNEL);
+ if (!rf->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < rf->msix_count; i++)
+ if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i]))
+ break;
+
+ if (i < IRDMA_MIN_MSIX) {
+ while (--i >= 0)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+ return -ENOMEM;
+ }
+
+ rf->msix_count = i;
+
+ return 0;
+}
+
+static void icrdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ for (i = 0; i < rf->msix_count; i++)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+}
+
+static void icrdma_fill_device_info(struct irdma_device *iwdev,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ rf->sc_dev.hw = &rf->hw;
+ rf->iwdev = iwdev;
+ rf->cdev = cdev_info;
+ rf->hw.hw_addr = idc_priv->hw_addr;
+ rf->pcidev = cdev_info->pdev;
+ rf->hw.device = &rf->pcidev->dev;
+ rf->pf_id = idc_priv->pf_id;
+ rf->rdma_ver = IRDMA_GEN_2;
+ rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2;
+ rf->sc_dev.is_pf = true;
+ rf->sc_dev.privileged = true;
+
+ rf->gen_ops.register_qset = icrdma_lan_register_qset;
+ rf->gen_ops.unregister_qset = icrdma_lan_unregister_qset;
+
+ rf->default_vsi.vsi_idx = idc_priv->vport_id;
+ rf->protocol_used =
+ cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ?
+ IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
+ rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
+ rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
+ rf->gen_ops.request_reset = icrdma_request_reset;
+ rf->limits_sel = 7;
+ mutex_init(&rf->ah_tbl_lock);
+
+ iwdev->netdev = idc_priv->netdev;
+ iwdev->vsi_num = idc_priv->vport_id;
+ iwdev->init_state = INITIAL_STATE;
+ iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
+ iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
+ iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+ iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->roce_mode = true;
+}
+
+static int icrdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_core_auxiliary_dev *iidc_adev;
+ struct iidc_rdma_core_dev_info *cdev_info;
+ struct iidc_rdma_priv_dev_info *idc_priv;
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ struct irdma_pci_f *rf;
+ int err;
+
+ iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ cdev_info = iidc_adev->cdev_info;
+ idc_priv = cdev_info->iidc_priv;
+
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ if (!iwdev)
+ return -ENOMEM;
+ iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!iwdev->rf) {
+ ib_dealloc_device(&iwdev->ibdev);
+ return -ENOMEM;
+ }
+
+ icrdma_fill_device_info(iwdev, cdev_info);
+ rf = iwdev->rf;
+
+ err = icrdma_init_interrupts(rf, cdev_info);
+ if (err)
+ goto err_init_interrupts;
+
+ err = irdma_ctrl_init_hw(rf);
+ if (err)
+ goto err_ctrl_init;
+
+ l2params.mtu = iwdev->netdev->mtu;
+ icrdma_fill_qos_info(&l2params, &idc_priv->qos_info);
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
+
+ err = irdma_rt_init_hw(iwdev, &l2params);
+ if (err)
+ goto err_rt_init;
+
+ err = irdma_ib_register_device(iwdev);
+ if (err)
+ goto err_ibreg;
+
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true);
+
+ ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn));
+ auxiliary_set_drvdata(aux_dev, iwdev);
+
+ return 0;
+
+err_ibreg:
+ irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ icrdma_deinit_interrupts(rf, cdev_info);
+err_init_interrupts:
+ kfree(iwdev->rf);
+ ib_dealloc_device(&iwdev->ibdev);
+
+ return err;
+}
+
+static void icrdma_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+ u8 rdma_ver = iwdev->rf->rdma_ver;
+
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false);
+ irdma_ib_unregister_device(iwdev);
+ icrdma_deinit_interrupts(iwdev->rf, cdev_info);
+
+ pr_debug("INIT: Gen[%d] func[%d] device remove success\n",
+ rdma_ver, PCI_FUNC(cdev_info->pdev->devfn));
+}
+
+static const struct auxiliary_device_id icrdma_auxiliary_id_table[] = {
+ {.name = "ice.iwarp", },
+ {.name = "ice.roce", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, icrdma_auxiliary_id_table);
+
+struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv = {
+ .adrv = {
+ .name = "gen_2",
+ .id_table = icrdma_auxiliary_id_table,
+ .probe = icrdma_probe,
+ .remove = icrdma_remove,
+ },
+ .event_handler = icrdma_iidc_event_handler,
+};
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
new file mode 100644
index 000000000000..2e8bb475e22a
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2018 - 2024 Intel Corporation */
+#include "osdep.h"
+#include "type.h"
+#include "protos.h"
+#include "ig3rdma_hw.h"
+
+/**
+ * ig3rdma_ena_irq - Enable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void ig3rdma_ena_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 val;
+ u32 int_stride = 1; /* one u32 per register */
+
+ if (dev->is_pf)
+ int_stride = 0x400;
+ else
+ idx--; /* VFs use DYN_CTL_N */
+
+ val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 1);
+
+ writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride));
+}
+
+/**
+ * ig3rdma_disable_irq - Disable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void ig3rdma_disable_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 int_stride = 1; /* one u32 per register */
+
+ if (dev->is_pf)
+ int_stride = 0x400;
+ else
+ idx--; /* VFs use DYN_CTL_N */
+
+ writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride));
+}
+
+static const struct irdma_irq_ops ig3rdma_irq_ops = {
+ .irdma_dis_irq = ig3rdma_disable_irq,
+ .irdma_en_irq = ig3rdma_ena_irq,
+};
+
+static const struct irdma_hw_stat_map ig3rdma_hw_stat_map[] = {
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 32, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 40, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 48, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 56, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 64, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 72, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 80, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 88, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 96, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 104, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 112, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 120, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 128, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 136, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 144, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 152, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 160, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 168, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 176, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 184, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 192, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 200, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 208, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 224, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 232, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 240, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 248, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 256, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 264, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 272, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 280, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 288, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 296, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 304, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 312, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 320, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 328, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = { 336, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = { 344, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = { 352, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = { 360, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RNR_SENT] = { 368, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RNR_RCVD] = { 376, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT] = { 384, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT] = { 392, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXATS] = { 408, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXATS] = { 416, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR] = { 424, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED] = { 432, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RTO] = { 440, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXOOOPKTS] = { 448, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_ICRCERR] = { 456, 0, 0 },
+};
+
+void ig3rdma_init_hw(struct irdma_sc_dev *dev)
+{
+ dev->irq_ops = &ig3rdma_irq_ops;
+ dev->hw_stats_map = ig3rdma_hw_stat_map;
+
+ dev->hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_3;
+ dev->hw_attrs.uk_attrs.max_hw_wq_frags = IG3RDMA_MAX_WQ_FRAGMENT_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = IG3RDMA_MAX_SGE_RD;
+ dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR;
+ dev->hw_attrs.first_hw_vf_fpm_id = 0;
+ dev->hw_attrs.max_hw_vf_fpm_id = IG3_MAX_APFS + IG3_MAX_AVFS;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_64_BYTE_CQE;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_CQE_TIMESTAMPING;
+
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_SRQ;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE |
+ IRDMA_FEATURE_CQ_RESIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ dev->hw_attrs.max_hw_ird = IG3RDMA_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = IG3RDMA_MAX_ORD_SIZE;
+ dev->hw_attrs.max_stat_inst = IG3RDMA_MAX_STATS_COUNT;
+ dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_3;
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = IG3RDMA_MIN_WQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_srq_quanta = IRDMA_SRQ_MAX_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_inline = IG3RDMA_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.max_hw_device_pages =
+ dev->is_pf ? IG3RDMA_MAX_PF_PUSH_PAGE_COUNT : IG3RDMA_MAX_VF_PUSH_PAGE_COUNT;
+}
+
+static void __iomem *__ig3rdma_get_reg_addr(struct irdma_mmio_region *region, u64 reg_offset)
+{
+ if (reg_offset >= region->offset &&
+ reg_offset < (region->offset + region->len)) {
+ reg_offset -= region->offset;
+
+ return region->addr + reg_offset;
+ }
+
+ return NULL;
+}
+
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset)
+{
+ u8 __iomem *reg_addr;
+ int i;
+
+ reg_addr = __ig3rdma_get_reg_addr(&hw->rdma_reg, reg_offset);
+ if (reg_addr)
+ return reg_addr;
+
+ for (i = 0; i < hw->num_io_regions; i++) {
+ reg_addr = __ig3rdma_get_reg_addr(&hw->io_regs[i], reg_offset);
+ if (reg_addr)
+ return reg_addr;
+ }
+
+ WARN_ON_ONCE(1);
+
+ return NULL;
+}
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.h b/drivers/infiniband/hw/irdma/ig3rdma_hw.h
new file mode 100644
index 000000000000..03d5f1188789
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2021 - 2024 Intel Corporation */
+#ifndef IG3RDMA_HW_H
+#define IG3RDMA_HW_H
+
+#define IG3_MAX_APFS 1
+#define IG3_MAX_AVFS 0
+
+#define IG3_PF_RDMA_REGION_OFFSET 0xBC00000
+#define IG3_PF_RDMA_REGION_LEN 0x401000
+#define IG3_VF_RDMA_REGION_OFFSET 0x8C00
+#define IG3_VF_RDMA_REGION_LEN 0x8400
+
+enum ig3rdma_device_caps_const {
+ IG3RDMA_MAX_WQ_FRAGMENT_COUNT = 14,
+ IG3RDMA_MAX_SGE_RD = 14,
+
+ IG3RDMA_MAX_STATS_COUNT = 128,
+
+ IG3RDMA_MAX_IRD_SIZE = 64,
+ IG3RDMA_MAX_ORD_SIZE = 64,
+ IG3RDMA_MIN_WQ_SIZE = 16 /* WQEs */,
+ IG3RDMA_MAX_INLINE_DATA_SIZE = 216,
+ IG3RDMA_MAX_PF_PUSH_PAGE_COUNT = 8192,
+ IG3RDMA_MAX_VF_PUSH_PAGE_COUNT = 16,
+};
+
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset);
+int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len,
+ u8 *recv_msg, u16 *recv_len);
+
+#endif /* IG3RDMA_HW_H*/
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_if.c b/drivers/infiniband/hw/irdma/ig3rdma_if.c
new file mode 100644
index 000000000000..1bb42eb298ba
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_if.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2023 - 2024 Intel Corporation */
+
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_idpf.h>
+#include "ig3rdma_hw.h"
+
+static void ig3rdma_idc_core_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(cdev_info->adev);
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_WARN_RESET)) {
+ rf->reset = true;
+ rf->sc_dev.vchnl_up = false;
+ }
+}
+
+int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len,
+ u8 *recv_msg, u16 *recv_len)
+{
+ struct iidc_rdma_core_dev_info *cdev_info = dev_to_rf(dev)->cdev;
+ int ret;
+
+ ret = idpf_idc_rdma_vc_send_sync(cdev_info, msg, len, recv_msg,
+ recv_len);
+ if (ret == -ETIMEDOUT) {
+ ibdev_err(&(dev_to_rf(dev)->iwdev->ibdev),
+ "Virtual channel Req <-> Resp completion timeout\n");
+ dev->vchnl_up = false;
+ }
+
+ return ret;
+}
+
+static int ig3rdma_vchnl_init(struct irdma_pci_f *rf,
+ struct iidc_rdma_core_dev_info *cdev_info,
+ u8 *rdma_ver)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct irdma_vchnl_init_info virt_info;
+ u8 gen = rf->rdma_ver;
+ int ret;
+
+ rf->vchnl_wq = alloc_ordered_workqueue("irdma-virtchnl-wq", 0);
+ if (!rf->vchnl_wq)
+ return -ENOMEM;
+
+ mutex_init(&rf->sc_dev.vchnl_mutex);
+
+ virt_info.is_pf = !idc_priv->ftype;
+ virt_info.hw_rev = gen;
+ virt_info.privileged = gen == IRDMA_GEN_2;
+ virt_info.vchnl_wq = rf->vchnl_wq;
+ ret = irdma_sc_vchnl_init(&rf->sc_dev, &virt_info);
+ if (ret) {
+ destroy_workqueue(rf->vchnl_wq);
+ return ret;
+ }
+
+ *rdma_ver = rf->sc_dev.hw_attrs.uk_attrs.hw_rev;
+
+ return 0;
+}
+
+/**
+ * ig3rdma_request_reset - Request a reset
+ * @rf: RDMA PCI function
+ */
+static void ig3rdma_request_reset(struct irdma_pci_f *rf)
+{
+ ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
+ idpf_idc_request_reset(rf->cdev, IIDC_FUNC_RESET);
+}
+
+static int ig3rdma_cfg_regions(struct irdma_hw *hw,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct pci_dev *pdev = cdev_info->pdev;
+ int i;
+
+ switch (idc_priv->ftype) {
+ case IIDC_FUNCTION_TYPE_PF:
+ hw->rdma_reg.len = IG3_PF_RDMA_REGION_LEN;
+ hw->rdma_reg.offset = IG3_PF_RDMA_REGION_OFFSET;
+ break;
+ case IIDC_FUNCTION_TYPE_VF:
+ hw->rdma_reg.len = IG3_VF_RDMA_REGION_LEN;
+ hw->rdma_reg.offset = IG3_VF_RDMA_REGION_OFFSET;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ hw->rdma_reg.addr = ioremap(pci_resource_start(pdev, 0) + hw->rdma_reg.offset,
+ hw->rdma_reg.len);
+
+ if (!hw->rdma_reg.addr)
+ return -ENOMEM;
+
+ hw->num_io_regions = le16_to_cpu(idc_priv->num_memory_regions);
+ hw->io_regs = kcalloc(hw->num_io_regions,
+ sizeof(struct irdma_mmio_region), GFP_KERNEL);
+
+ if (!hw->io_regs) {
+ iounmap(hw->rdma_reg.addr);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < hw->num_io_regions; i++) {
+ hw->io_regs[i].addr =
+ idc_priv->mapped_mem_regions[i].region_addr;
+ hw->io_regs[i].len =
+ le64_to_cpu(idc_priv->mapped_mem_regions[i].size);
+ hw->io_regs[i].offset =
+ le64_to_cpu(idc_priv->mapped_mem_regions[i].start_offset);
+ }
+
+ return 0;
+}
+
+static void ig3rdma_decfg_rf(struct irdma_pci_f *rf)
+{
+ struct irdma_hw *hw = &rf->hw;
+
+ destroy_workqueue(rf->vchnl_wq);
+ kfree(hw->io_regs);
+ iounmap(hw->rdma_reg.addr);
+}
+
+static int ig3rdma_cfg_rf(struct irdma_pci_f *rf,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ int err;
+
+ rf->sc_dev.hw = &rf->hw;
+ rf->cdev = cdev_info;
+ rf->pcidev = cdev_info->pdev;
+ rf->hw.device = &rf->pcidev->dev;
+ rf->msix_count = idc_priv->msix_count;
+ rf->msix_entries = idc_priv->msix_entries;
+
+ err = ig3rdma_vchnl_init(rf, cdev_info, &rf->rdma_ver);
+ if (err)
+ return err;
+
+ err = ig3rdma_cfg_regions(&rf->hw, cdev_info);
+ if (err) {
+ destroy_workqueue(rf->vchnl_wq);
+ return err;
+ }
+
+ rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
+ rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
+ rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
+ rf->gen_ops.request_reset = ig3rdma_request_reset;
+ rf->limits_sel = 7;
+ mutex_init(&rf->ah_tbl_lock);
+
+ return 0;
+}
+
+static int ig3rdma_core_probe(struct auxiliary_device *aux_dev,
+ const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_pci_f *rf;
+ int err;
+
+ rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!rf)
+ return -ENOMEM;
+
+ err = ig3rdma_cfg_rf(rf, cdev_info);
+ if (err)
+ goto err_cfg_rf;
+
+ err = irdma_ctrl_init_hw(rf);
+ if (err)
+ goto err_ctrl_init;
+
+ auxiliary_set_drvdata(aux_dev, rf);
+
+ err = idpf_idc_vport_dev_ctrl(cdev_info, true);
+ if (err)
+ goto err_vport_ctrl;
+
+ return 0;
+
+err_vport_ctrl:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ ig3rdma_decfg_rf(rf);
+err_cfg_rf:
+ kfree(rf);
+
+ return err;
+}
+
+static void ig3rdma_core_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_dev);
+
+ idpf_idc_vport_dev_ctrl(cdev_info, false);
+ irdma_ctrl_deinit_hw(rf);
+ ig3rdma_decfg_rf(rf);
+ kfree(rf);
+}
+
+static const struct auxiliary_device_id ig3rdma_core_auxiliary_id_table[] = {
+ {.name = "idpf.8086.rdma.core", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ig3rdma_core_auxiliary_id_table);
+
+struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv = {
+ .adrv = {
+ .name = "core",
+ .id_table = ig3rdma_core_auxiliary_id_table,
+ .probe = ig3rdma_core_probe,
+ .remove = ig3rdma_core_remove,
+ },
+ .event_handler = ig3rdma_idc_core_event_handler,
+};
diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
index 20d2e7393e3d..ff938a01d70c 100644
--- a/drivers/infiniband/hw/irdma/irdma.h
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -32,7 +32,16 @@
#define IRDMA_PFHMC_SDDATALOW_PMSDDATALOW GENMASK(31, 12)
#define IRDMA_PFHMC_SDCMD_PMSDWR BIT(31)
-#define IRDMA_INVALID_CQ_IDX 0xffffffff
+#define IRDMA_INVALID_CQ_IDX 0xffffffff
+#define IRDMA_Q_INVALID_IDX 0xffff
+
+enum irdma_dyn_idx_t {
+ IRDMA_IDX_ITR0 = 0,
+ IRDMA_IDX_ITR1 = 1,
+ IRDMA_IDX_ITR2 = 2,
+ IRDMA_IDX_NOITR = 3,
+};
+
enum irdma_registers {
IRDMA_CQPTAIL,
IRDMA_CQPDB,
@@ -67,6 +76,7 @@ enum irdma_shifts {
IRDMA_CQPSQ_CQ_CEQID_S,
IRDMA_CQPSQ_CQ_CQID_S,
IRDMA_COMMIT_FPM_CQCNT_S,
+ IRDMA_CQPSQ_UPESD_HMCFNID_S,
IRDMA_MAX_SHIFTS,
};
@@ -77,6 +87,7 @@ enum irdma_masks {
IRDMA_CQPSQ_CQ_CEQID_M,
IRDMA_CQPSQ_CQ_CQID_M,
IRDMA_COMMIT_FPM_CQCNT_M,
+ IRDMA_CQPSQ_UPESD_HMCFNID_M,
IRDMA_MAX_MASKS, /* Must be last entry */
};
@@ -92,7 +103,7 @@ struct irdma_mcast_grp_ctx_entry_info {
struct irdma_mcast_grp_info {
u8 dest_mac_addr[ETH_ALEN];
u16 vlan_id;
- u8 hmc_fcn_id;
+ u16 hmc_fcn_id;
bool ipv4_valid:1;
bool vlan_valid:1;
u16 mg_id;
@@ -107,6 +118,9 @@ enum irdma_vers {
IRDMA_GEN_RSVD,
IRDMA_GEN_1,
IRDMA_GEN_2,
+ IRDMA_GEN_3,
+ IRDMA_GEN_NEXT,
+ IRDMA_GEN_MAX = IRDMA_GEN_NEXT-1
};
struct irdma_uk_attrs {
@@ -118,6 +132,7 @@ struct irdma_uk_attrs {
u32 max_hw_wq_quanta;
u32 min_hw_cq_size;
u32 max_hw_cq_size;
+ u32 max_hw_srq_quanta;
u16 max_hw_sq_chunk;
u16 min_hw_wq_size;
u8 hw_rev;
@@ -147,10 +162,13 @@ struct irdma_hw_attrs {
u32 max_done_count;
u32 max_sleep_count;
u32 max_cqp_compl_wait_time_ms;
+ u32 min_hw_srq_id;
u16 max_stat_inst;
u16 max_stat_idx;
};
void i40iw_init_hw(struct irdma_sc_dev *dev);
void icrdma_init_hw(struct irdma_sc_dev *dev);
+void ig3rdma_init_hw(struct irdma_sc_dev *dev);
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset);
#endif /* IRDMA_H*/
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 1e840bbd619d..95957d52883d 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
+#include <linux/net/intel/iidc_rdma_idpf.h>
MODULE_ALIAS("i40iw");
MODULE_DESCRIPTION("Intel(R) Ethernet Protocol Driver for RDMA");
@@ -38,19 +39,7 @@ static void irdma_unregister_notifiers(void)
unregister_netdevice_notifier(&irdma_netdevice_notifier);
}
-static void irdma_prep_tc_change(struct irdma_device *iwdev)
-{
- iwdev->vsi.tc_change_pending = true;
- irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
-
- /* Wait for all qp's to suspend */
- wait_event_timeout(iwdev->suspend_wq,
- !atomic_read(&iwdev->vsi.qp_suspend_reqs),
- msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
- irdma_ws_reset(&iwdev->vsi);
-}
-
-static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
+void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
{
if (mtu < IRDMA_MIN_MTU_IPV4)
ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n", mtu);
@@ -58,35 +47,10 @@ static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu);
}
-static void irdma_fill_qos_info(struct irdma_l2params *l2params,
- struct iidc_rdma_qos_params *qos_info)
+static void ig3rdma_idc_vport_event_handler(struct iidc_rdma_vport_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
{
- int i;
-
- l2params->num_tc = qos_info->num_tc;
- l2params->vsi_prio_type = qos_info->vport_priority_type;
- l2params->vsi_rel_bw = qos_info->vport_relative_bw;
- for (i = 0; i < l2params->num_tc; i++) {
- l2params->tc_info[i].egress_virt_up =
- qos_info->tc_info[i].egress_virt_up;
- l2params->tc_info[i].ingress_virt_up =
- qos_info->tc_info[i].ingress_virt_up;
- l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
- l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
- l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
- }
- for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
- l2params->up2tc[i] = qos_info->up2tc[i];
- if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) {
- l2params->dscp_mode = true;
- memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
- }
-}
-
-static void irdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
- struct iidc_rdma_event *event)
-{
- struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev);
+ struct irdma_device *iwdev = auxiliary_get_drvdata(cdev_info->adev);
struct irdma_l2params l2params = {};
if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
@@ -97,248 +61,39 @@ static void irdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
irdma_change_l2params(&iwdev->vsi, &l2params);
}
- } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) {
- if (iwdev->vsi.tc_change_pending)
- return;
-
- irdma_prep_tc_change(iwdev);
- } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) {
- struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv;
-
- if (!iwdev->vsi.tc_change_pending)
- return;
-
- l2params.tc_changed = true;
- ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n");
-
- irdma_fill_qos_info(&l2params, &iidc_priv->qos_info);
- if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
- iwdev->dcb_vlan_mode =
- l2params.num_tc > 1 && !l2params.dscp_mode;
- irdma_change_l2params(&iwdev->vsi, &l2params);
- } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) {
- ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n",
- event->reg);
- if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
- u32 pe_criterr;
-
- pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]);
-#define IRDMA_Q1_RESOURCE_ERR 0x0001024d
- if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) {
- ibdev_err(&iwdev->ibdev, "critical PE Error, GLPE_CRITERR=0x%08x\n",
- pe_criterr);
- iwdev->rf->reset = true;
- } else {
- ibdev_warn(&iwdev->ibdev, "Q1 Resource Check\n");
- }
- }
- if (event->reg & IRDMAPFINT_OICR_HMC_ERR_M) {
- ibdev_err(&iwdev->ibdev, "HMC Error\n");
- iwdev->rf->reset = true;
- }
- if (event->reg & IRDMAPFINT_OICR_PE_PUSH_M) {
- ibdev_err(&iwdev->ibdev, "PE Push Error\n");
- iwdev->rf->reset = true;
- }
- if (iwdev->rf->reset)
- iwdev->rf->gen_ops.request_reset(iwdev->rf);
}
}
-/**
- * irdma_request_reset - Request a reset
- * @rf: RDMA PCI function
- */
-static void irdma_request_reset(struct irdma_pci_f *rf)
-{
- ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
- ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET);
-}
-
-/**
- * irdma_lan_register_qset - Register qset with LAN driver
- * @vsi: vsi structure
- * @tc_node: Traffic class node
- */
-static int irdma_lan_register_qset(struct irdma_sc_vsi *vsi,
- struct irdma_ws_node *tc_node)
-{
- struct irdma_device *iwdev = vsi->back_vsi;
- struct iidc_rdma_core_dev_info *cdev_info;
- struct iidc_rdma_qset_params qset = {};
- int ret;
-
- cdev_info = iwdev->rf->cdev;
- qset.qs_handle = tc_node->qs_handle;
- qset.tc = tc_node->traffic_class;
- qset.vport_id = vsi->vsi_idx;
- ret = ice_add_rdma_qset(cdev_info, &qset);
- if (ret) {
- ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n");
- return ret;
- }
-
- tc_node->l2_sched_node_id = qset.teid;
- vsi->qos[tc_node->user_pri].l2_sched_node_id = qset.teid;
-
- return 0;
-}
-
-/**
- * irdma_lan_unregister_qset - Unregister qset with LAN driver
- * @vsi: vsi structure
- * @tc_node: Traffic class node
- */
-static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
- struct irdma_ws_node *tc_node)
+static int ig3rdma_vport_probe(struct auxiliary_device *aux_dev,
+ const struct auxiliary_device_id *id)
{
- struct irdma_device *iwdev = vsi->back_vsi;
- struct iidc_rdma_core_dev_info *cdev_info;
- struct iidc_rdma_qset_params qset = {};
-
- cdev_info = iwdev->rf->cdev;
- qset.qs_handle = tc_node->qs_handle;
- qset.tc = tc_node->traffic_class;
- qset.vport_id = vsi->vsi_idx;
- qset.teid = tc_node->l2_sched_node_id;
-
- if (ice_del_rdma_qset(cdev_info, &qset))
- ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
-}
-
-static int irdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
-{
- int i;
-
- rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
- rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
- GFP_KERNEL);
- if (!rf->msix_entries)
- return -ENOMEM;
-
- for (i = 0; i < rf->msix_count; i++)
- if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i]))
- break;
-
- if (i < IRDMA_MIN_MSIX) {
- while (--i >= 0)
- ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+ struct iidc_rdma_vport_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev);
+ struct auxiliary_device *aux_core_dev = idc_adev->vdev_info->core_adev;
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_core_dev);
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ int err;
- kfree(rf->msix_entries);
+ if (!rf) {
+ WARN_ON_ONCE(1);
return -ENOMEM;
}
-
- rf->msix_count = i;
-
- return 0;
-}
-
-static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
-{
- int i;
-
- for (i = 0; i < rf->msix_count; i++)
- ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
-
- kfree(rf->msix_entries);
-}
-
-static void irdma_remove(struct auxiliary_device *aux_dev)
-{
- struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
- struct iidc_rdma_core_auxiliary_dev *iidc_adev;
- struct iidc_rdma_core_dev_info *cdev_info;
-
- iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
- cdev_info = iidc_adev->cdev_info;
-
- ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false);
- irdma_ib_unregister_device(iwdev);
- irdma_deinit_interrupts(iwdev->rf, cdev_info);
-
- kfree(iwdev->rf);
-
- pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(cdev_info->pdev->devfn));
-}
-
-static void irdma_fill_device_info(struct irdma_device *iwdev,
- struct iidc_rdma_core_dev_info *cdev_info)
-{
- struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv;
- struct irdma_pci_f *rf = iwdev->rf;
-
- rf->sc_dev.hw = &rf->hw;
- rf->iwdev = iwdev;
- rf->cdev = cdev_info;
- rf->hw.hw_addr = iidc_priv->hw_addr;
- rf->pcidev = cdev_info->pdev;
- rf->hw.device = &rf->pcidev->dev;
- rf->pf_id = iidc_priv->pf_id;
- rf->gen_ops.register_qset = irdma_lan_register_qset;
- rf->gen_ops.unregister_qset = irdma_lan_unregister_qset;
-
- rf->default_vsi.vsi_idx = iidc_priv->vport_id;
- rf->protocol_used =
- cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ?
- IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
- rf->rdma_ver = IRDMA_GEN_2;
- rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
- rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
- rf->gen_ops.request_reset = irdma_request_reset;
- rf->limits_sel = 7;
- rf->iwdev = iwdev;
-
- mutex_init(&iwdev->ah_tbl_lock);
-
- iwdev->netdev = iidc_priv->netdev;
- iwdev->vsi_num = iidc_priv->vport_id;
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ /* Fill iwdev info */
+ iwdev->is_vport = true;
+ iwdev->rf = rf;
+ iwdev->vport_id = idc_adev->vdev_info->vport_id;
+ iwdev->netdev = idc_adev->vdev_info->netdev;
iwdev->init_state = INITIAL_STATE;
iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
- if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY)
- iwdev->roce_mode = true;
-}
-
-static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id)
-{
- struct iidc_rdma_core_auxiliary_dev *iidc_adev;
- struct iidc_rdma_core_dev_info *cdev_info;
- struct iidc_rdma_priv_dev_info *iidc_priv;
- struct irdma_l2params l2params = {};
- struct irdma_device *iwdev;
- struct irdma_pci_f *rf;
- int err;
-
- iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
- cdev_info = iidc_adev->cdev_info;
- iidc_priv = cdev_info->iidc_priv;
-
- iwdev = ib_alloc_device(irdma_device, ibdev);
- if (!iwdev)
- return -ENOMEM;
- iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
- if (!iwdev->rf) {
- ib_dealloc_device(&iwdev->ibdev);
- return -ENOMEM;
- }
-
- irdma_fill_device_info(iwdev, cdev_info);
- rf = iwdev->rf;
-
- err = irdma_init_interrupts(rf, cdev_info);
- if (err)
- goto err_init_interrupts;
-
- err = irdma_ctrl_init_hw(rf);
- if (err)
- goto err_ctrl_init;
+ iwdev->roce_mode = true;
+ iwdev->push_mode = false;
l2params.mtu = iwdev->netdev->mtu;
- irdma_fill_qos_info(&l2params, &iidc_priv->qos_info);
- if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
- iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
err = irdma_rt_init_hw(iwdev, &l2params);
if (err)
@@ -348,43 +103,57 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
if (err)
goto err_ibreg;
- ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true);
-
- ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn));
auxiliary_set_drvdata(aux_dev, iwdev);
- return 0;
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] vport[%d] probe success. dev_name = %s, core_dev_name = %s, netdev=%s\n",
+ rf->rdma_ver, idc_adev->vdev_info->vport_id,
+ dev_name(&aux_dev->dev),
+ dev_name(&idc_adev->vdev_info->core_adev->dev),
+ netdev_name(idc_adev->vdev_info->netdev));
+ return 0;
err_ibreg:
irdma_rt_deinit_hw(iwdev);
err_rt_init:
- irdma_ctrl_deinit_hw(rf);
-err_ctrl_init:
- irdma_deinit_interrupts(rf, cdev_info);
-err_init_interrupts:
- kfree(iwdev->rf);
ib_dealloc_device(&iwdev->ibdev);
return err;
}
-static const struct auxiliary_device_id irdma_auxiliary_id_table[] = {
- {.name = "ice.iwarp", },
- {.name = "ice.roce", },
+static void ig3rdma_vport_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_vport_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev);
+ struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] dev_name = %s, core_dev_name = %s, netdev=%s\n",
+ iwdev->rf->rdma_ver, dev_name(&aux_dev->dev),
+ dev_name(&idc_adev->vdev_info->core_adev->dev),
+ netdev_name(idc_adev->vdev_info->netdev));
+
+ irdma_ib_unregister_device(iwdev);
+}
+
+static const struct auxiliary_device_id ig3rdma_vport_auxiliary_id_table[] = {
+ {.name = "idpf.8086.rdma.vdev", },
{},
};
-MODULE_DEVICE_TABLE(auxiliary, irdma_auxiliary_id_table);
+MODULE_DEVICE_TABLE(auxiliary, ig3rdma_vport_auxiliary_id_table);
-static struct iidc_rdma_core_auxiliary_drv irdma_auxiliary_drv = {
+static struct iidc_rdma_vport_auxiliary_drv ig3rdma_vport_auxiliary_drv = {
.adrv = {
- .id_table = irdma_auxiliary_id_table,
- .probe = irdma_probe,
- .remove = irdma_remove,
+ .name = "vdev",
+ .id_table = ig3rdma_vport_auxiliary_id_table,
+ .probe = ig3rdma_vport_probe,
+ .remove = ig3rdma_vport_remove,
},
- .event_handler = irdma_iidc_event_handler,
+ .event_handler = ig3rdma_idc_vport_event_handler,
};
+
static int __init irdma_init_module(void)
{
int ret;
@@ -396,14 +165,34 @@ static int __init irdma_init_module(void)
return ret;
}
- ret = auxiliary_driver_register(&irdma_auxiliary_drv.adrv);
+ ret = auxiliary_driver_register(&icrdma_core_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed icrdma(gen_2) auxiliary_driver_register() ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ ret = auxiliary_driver_register(&ig3rdma_core_auxiliary_drv.adrv);
if (ret) {
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
auxiliary_driver_unregister(&i40iw_auxiliary_drv);
- pr_err("Failed irdma auxiliary_driver_register() ret=%d\n",
+ pr_err("Failed ig3rdma(gen_3) core auxiliary_driver_register() ret=%d\n",
ret);
+
return ret;
}
+ ret = auxiliary_driver_register(&ig3rdma_vport_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed ig3rdma vport auxiliary_driver_register() ret=%d\n",
+ ret);
+
+ return ret;
+ }
irdma_register_notifiers();
return 0;
@@ -412,8 +201,10 @@ static int __init irdma_init_module(void)
static void __exit irdma_exit_module(void)
{
irdma_unregister_notifiers();
- auxiliary_driver_unregister(&irdma_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&ig3rdma_vport_auxiliary_drv.adrv);
}
module_init(irdma_init_module);
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index 674acc952168..886b30da188a 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -30,7 +30,6 @@
#endif
#include <linux/auxiliary_bus.h>
#include <linux/net/intel/iidc_rdma.h>
-#include <linux/net/intel/iidc_rdma_ice.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
@@ -54,6 +53,8 @@
#include "puda.h"
extern struct auxiliary_driver i40iw_auxiliary_drv;
+extern struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv;
+extern struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv;
#define IRDMA_FW_VER_DEFAULT 2
#define IRDMA_HW_VER 2
@@ -65,7 +66,8 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_MACIP_ADD 1
#define IRDMA_MACIP_DELETE 2
-#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 1)
+#define IW_GEN_3_CCQ_SIZE (2 * IRDMA_CQP_SW_SQSIZE_2048 + 2)
+#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 2)
#define IW_CEQ_SIZE 2048
#define IW_AEQ_SIZE 2048
@@ -127,12 +129,12 @@ enum init_completion_state {
HMC_OBJS_CREATED,
HW_RSRC_INITIALIZED,
CCQ_CREATED,
- CEQ0_CREATED, /* Last state of probe */
- ILQ_CREATED,
- IEQ_CREATED,
+ CEQ0_CREATED,
CEQS_CREATED,
PBLE_CHUNK_MEM,
AEQ_CREATED,
+ ILQ_CREATED,
+ IEQ_CREATED, /* Last state of probe */
IP_ADDR_REGISTERED, /* Last state of open */
};
@@ -167,6 +169,7 @@ struct irdma_cqp_request {
bool request_done; /* READ/WRITE_ONCE macros operate on it */
bool waiting:1;
bool dynamic:1;
+ bool pending:1;
};
struct irdma_cqp {
@@ -179,6 +182,7 @@ struct irdma_cqp {
struct irdma_dma_mem host_ctx;
u64 *scratch_array;
struct irdma_cqp_request *cqp_requests;
+ struct irdma_ooo_cqp_op *oop_op_array;
struct list_head cqp_avail_reqs;
struct list_head cqp_pending_reqs;
};
@@ -257,6 +261,7 @@ struct irdma_pci_f {
bool reset:1;
bool rsrc_created:1;
bool msix_shared:1;
+ bool hwqp1_rsvd:1;
u8 rsrc_profile;
u8 *hmc_info_mem;
u8 *mem_rsrc;
@@ -269,6 +274,8 @@ struct irdma_pci_f {
u32 max_mr;
u32 max_qp;
u32 max_cq;
+ u32 max_srq;
+ u32 next_srq;
u32 max_ah;
u32 next_ah;
u32 max_mcg;
@@ -282,6 +289,7 @@ struct irdma_pci_f {
u32 mr_stagmask;
u32 used_pds;
u32 used_cqs;
+ u32 used_srqs;
u32 used_mrs;
u32 used_qps;
u32 arp_table_size;
@@ -293,6 +301,7 @@ struct irdma_pci_f {
unsigned long *allocated_ws_nodes;
unsigned long *allocated_qps;
unsigned long *allocated_cqs;
+ unsigned long *allocated_srqs;
unsigned long *allocated_mrs;
unsigned long *allocated_pds;
unsigned long *allocated_mcgs;
@@ -327,10 +336,13 @@ struct irdma_pci_f {
wait_queue_head_t vchnl_waitq;
struct workqueue_struct *cqp_cmpl_wq;
struct work_struct cqp_cmpl_work;
+ struct workqueue_struct *vchnl_wq;
struct irdma_sc_vsi default_vsi;
void *back_fcn;
struct irdma_gen_ops gen_ops;
struct irdma_device *iwdev;
+ DECLARE_HASHTABLE(ah_hash_tbl, 8);
+ struct mutex ah_tbl_lock; /* protect AH hash table access */
};
struct irdma_device {
@@ -340,8 +352,6 @@ struct irdma_device {
struct workqueue_struct *cleanup_wq;
struct irdma_sc_vsi vsi;
struct irdma_cm_core cm_core;
- DECLARE_HASHTABLE(ah_hash_tbl, 8);
- struct mutex ah_tbl_lock; /* protect AH hash table access */
u32 roce_cwnd;
u32 roce_ackcreds;
u32 vendor_id;
@@ -350,12 +360,14 @@ struct irdma_device {
u32 rcv_wnd;
u16 mac_ip_table_idx;
u16 vsi_num;
+ u16 vport_id;
u8 rcv_wscale;
u8 iw_status;
bool roce_mode:1;
bool roce_dcqcn_en:1;
bool dcb_vlan_mode:1;
bool iw_ooo:1;
+ bool is_vport:1;
enum init_completion_state init_state;
wait_queue_head_t suspend_wq;
@@ -413,6 +425,11 @@ static inline struct irdma_pci_f *dev_to_rf(struct irdma_sc_dev *dev)
return container_of(dev, struct irdma_pci_f, sc_dev);
}
+static inline struct irdma_srq *to_iwsrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct irdma_srq, ibsrq);
+}
+
/**
* irdma_alloc_resource - allocate a resource
* @iwdev: device pointer
@@ -508,7 +525,8 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void irdma_cq_add_ref(struct ib_cq *ibcq);
void irdma_cq_rem_ref(struct ib_cq *ibcq);
void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
-
+void irdma_srq_event(struct irdma_sc_srq *srq);
+void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq);
void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf);
int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
struct irdma_modify_qp_info *info, bool wait);
@@ -557,4 +575,5 @@ int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event,
void *ptr);
void irdma_add_ip(struct irdma_device *iwdev);
void cqp_compl_worker(struct work_struct *work);
+void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev);
#endif /* IRDMA_MAIN_H */
diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
index 37ce35cb10e7..3091f9345f12 100644
--- a/drivers/infiniband/hw/irdma/pble.c
+++ b/drivers/infiniband/hw/irdma/pble.c
@@ -193,8 +193,15 @@ static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev,
{
enum irdma_sd_entry_type sd_entry_type;
- sd_entry_type = !idx->rel_pd_idx && pages == IRDMA_HMC_PD_CNT_IN_SD ?
- IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ sd_entry_type = (!idx->rel_pd_idx &&
+ pages == IRDMA_HMC_PD_CNT_IN_SD) ?
+ IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
+ else
+ sd_entry_type = (!idx->rel_pd_idx &&
+ pages == IRDMA_HMC_PD_CNT_IN_SD &&
+ dev->privileged) ?
+ IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
return sd_entry_type;
}
@@ -279,10 +286,11 @@ static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ?
sd_entry->u.pd_table.pd_page_addr.pa :
sd_entry->u.bp.addr.pa;
-
- if (!sd_entry->valid) {
- ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id, sd_reg_val,
- idx->sd_idx, sd_entry->entry_type, true);
+ if ((dev->privileged && !sd_entry->valid) ||
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id,
+ sd_reg_val, idx->sd_idx,
+ sd_entry->entry_type, true);
if (ret_code)
goto error;
}
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
index c0c9441885d3..324cfbf21764 100644
--- a/drivers/infiniband/hw/irdma/protos.h
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -10,6 +10,7 @@
#define ALL_TC2PFC 0xff
#define CQP_COMPL_WAIT_TIME_MS 10
#define CQP_TIMEOUT_THRESHOLD 500
+#define CQP_DEF_CMPL_TIMEOUT_THRESHOLD 2500
/* init operations */
int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
index 2fc638f2b143..d65041bee667 100644
--- a/drivers/infiniband/hw/irdma/puda.h
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -91,7 +91,7 @@ struct irdma_puda_rsrc_info {
u32 rq_size;
u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
u16 buf_size;
- u8 stats_idx;
+ u16 stats_idx;
bool stats_idx_valid:1;
int abi_ver;
};
@@ -140,7 +140,7 @@ struct irdma_puda_rsrc {
u64 crc_err;
u64 pmode_count;
u64 partials_handled;
- u8 stats_idx;
+ u16 stats_idx;
bool check_crc:1;
bool stats_idx_valid:1;
};
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
index 527c6da2c1ac..4ae77cdde9dc 100644
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -8,6 +8,8 @@
#include "hmc.h"
#include "uda.h"
#include "ws.h"
+#include "virtchnl.h"
+
#define IRDMA_DEBUG_ERR "ERR"
#define IRDMA_DEBUG_INIT "INIT"
#define IRDMA_DEBUG_DEV "DEV"
@@ -95,12 +97,6 @@ enum irdma_term_mpa_errors {
MPA_REQ_RSP = 0x04,
};
-enum irdma_qp_event_type {
- IRDMA_QP_EVENT_CATASTROPHIC,
- IRDMA_QP_EVENT_ACCESS_ERR,
- IRDMA_QP_EVENT_REQ_ERR,
-};
-
enum irdma_hw_stats_index {
/* gen1 - 32-bit */
IRDMA_HW_STAT_INDEX_IP4RXDISCARD = 0,
@@ -154,12 +150,46 @@ enum irdma_hw_stats_index {
IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED = 44,
IRDMA_HW_STAT_INDEX_TXNPCNPSENT = 45,
IRDMA_HW_STAT_INDEX_MAX_GEN_2 = 46,
+
+ /* gen3 */
+ IRDMA_HW_STAT_INDEX_RNR_SENT = 46,
+ IRDMA_HW_STAT_INDEX_RNR_RCVD = 47,
+ IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT = 48,
+ IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT = 49,
+ IRDMA_HW_STAT_INDEX_RDMARXATS = 50,
+ IRDMA_HW_STAT_INDEX_RDMATXATS = 51,
+ IRDMA_HW_STAT_INDEX_NAKSEQERR = 52,
+ IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED = 53,
+ IRDMA_HW_STAT_INDEX_RTO = 54,
+ IRDMA_HW_STAT_INDEX_RXOOOPKTS = 55,
+ IRDMA_HW_STAT_INDEX_ICRCERR = 56,
+
+ IRDMA_HW_STAT_INDEX_MAX_GEN_3 = 57,
};
enum irdma_feature_type {
IRDMA_FEATURE_FW_INFO = 0,
IRDMA_HW_VERSION_INFO = 1,
+ IRDMA_QP_MAX_INCR = 2,
+ IRDMA_CQ_MAX_INCR = 3,
+ IRDMA_CEQ_MAX_INCR = 4,
+ IRDMA_SD_MAX_INCR = 5,
+ IRDMA_MR_MAX_INCR = 6,
+ IRDMA_Q1_MAX_INCR = 7,
+ IRDMA_AH_MAX_INCR = 8,
+ IRDMA_SRQ_MAX_INCR = 9,
+ IRDMA_TIMER_MAX_INCR = 10,
+ IRDMA_XF_MAX_INCR = 11,
+ IRDMA_RRF_MAX_INCR = 12,
+ IRDMA_PBLE_MAX_INCR = 13,
+ IRDMA_OBJ_1 = 22,
+ IRDMA_OBJ_2 = 23,
+ IRDMA_ENDPT_TRK = 24,
+ IRDMA_FTN_INLINE_MAX = 25,
IRDMA_QSETS_MAX = 26,
+ IRDMA_ASO = 27,
+ IRDMA_FTN_FLAGS = 32,
+ IRDMA_FTN_NOP = 33,
IRDMA_MAX_FEATURES, /* Must be last entry */
};
@@ -206,6 +236,7 @@ enum irdma_syn_rst_handling {
enum irdma_queue_type {
IRDMA_QUEUE_TYPE_SQ_RQ = 0,
IRDMA_QUEUE_TYPE_CQP,
+ IRDMA_QUEUE_TYPE_SRQ,
};
struct irdma_sc_dev;
@@ -233,12 +264,22 @@ struct irdma_cqp_init_info {
__le64 *host_ctx;
u64 *scratch_array;
u32 sq_size;
+ struct irdma_ooo_cqp_op *ooo_op_array;
+ u32 pe_en_vf_cnt;
u16 hw_maj_ver;
u16 hw_min_ver;
u8 struct_ver;
u8 hmc_profile;
u8 ena_vf_count;
u8 ceqs_per_vf;
+ u8 ooisc_blksize;
+ u8 rrsp_blksize;
+ u8 q1_blksize;
+ u8 xmit_blksize;
+ u8 ts_override;
+ u8 ts_shift;
+ u8 en_fine_grained_timers;
+ u8 blksizes_valid;
bool en_datacenter_tcp:1;
bool disable_packed:1;
bool rocev2_rto_policy:1;
@@ -310,9 +351,21 @@ struct irdma_vsi_pestat {
spinlock_t lock; /* rdma stats lock */
};
+struct irdma_mmio_region {
+ u8 __iomem *addr;
+ resource_size_t len;
+ resource_size_t offset;
+};
+
struct irdma_hw {
- u8 __iomem *hw_addr;
- u8 __iomem *priv_hw_addr;
+ union {
+ u8 __iomem *hw_addr;
+ struct {
+ struct irdma_mmio_region rdma_reg; /* RDMA region */
+ struct irdma_mmio_region *io_regs; /* Non-RDMA MMIO regions */
+ u16 num_io_regions; /* Number of Non-RDMA MMIO regions */
+ };
+ };
struct device *device;
struct irdma_hmc_info hmc;
};
@@ -351,7 +404,21 @@ struct irdma_cqp_quanta {
__le64 elem[IRDMA_CQP_WQE_SIZE];
};
+struct irdma_ooo_cqp_op {
+ struct list_head list_entry;
+ u64 scratch;
+ u32 def_info;
+ u32 sw_def_info;
+ u32 wqe_idx;
+ bool deferred:1;
+};
+
struct irdma_sc_cqp {
+ spinlock_t ooo_list_lock; /* protects list of pending completions */
+ struct list_head ooo_avail;
+ struct list_head ooo_pnd;
+ u32 last_def_cmpl_ticket;
+ u32 sw_def_cmpl_ticket;
u32 size;
u64 sq_pa;
u64 host_ctx_pa;
@@ -367,8 +434,10 @@ struct irdma_sc_cqp {
u64 *scratch_array;
u64 requested_ops;
atomic64_t completed_ops;
+ struct irdma_ooo_cqp_op *ooo_op_array;
u32 cqp_id;
u32 sq_size;
+ u32 pe_en_vf_cnt;
u32 hw_sq_size;
u16 hw_maj_ver;
u16 hw_min_ver;
@@ -378,6 +447,14 @@ struct irdma_sc_cqp {
u8 ena_vf_count;
u8 timeout_count;
u8 ceqs_per_vf;
+ u8 ooisc_blksize;
+ u8 rrsp_blksize;
+ u8 q1_blksize;
+ u8 xmit_blksize;
+ u8 ts_override;
+ u8 ts_shift;
+ u8 en_fine_grained_timers;
+ u8 blksizes_valid;
bool en_datacenter_tcp:1;
bool disable_packed:1;
bool rocev2_rto_policy:1;
@@ -397,6 +474,8 @@ struct irdma_sc_aeq {
u32 msix_idx;
u8 polarity;
bool virtual_map:1;
+ bool pasid_valid:1;
+ u32 pasid;
};
struct irdma_sc_ceq {
@@ -412,13 +491,15 @@ struct irdma_sc_ceq {
u8 tph_val;
u32 first_pm_pbl_idx;
u8 polarity;
- struct irdma_sc_vsi *vsi;
+ u16 vsi_idx;
struct irdma_sc_cq **reg_cq;
u32 reg_cq_size;
spinlock_t req_cq_lock; /* protect access to reg_cq array */
bool virtual_map:1;
bool tph_en:1;
bool itr_no_expire:1;
+ bool pasid_valid:1;
+ u32 pasid;
};
struct irdma_sc_cq {
@@ -426,6 +507,7 @@ struct irdma_sc_cq {
u64 cq_pa;
u64 shadow_area_pa;
struct irdma_sc_dev *dev;
+ u16 vsi_idx;
struct irdma_sc_vsi *vsi;
void *pbl_list;
void *back_cq;
@@ -477,8 +559,13 @@ struct irdma_sc_qp {
bool virtual_map:1;
bool flush_sq:1;
bool flush_rq:1;
+ bool err_sq_idx_valid:1;
+ bool err_rq_idx_valid:1;
+ u32 err_sq_idx;
+ u32 err_rq_idx;
bool sq_flush_code:1;
bool rq_flush_code:1;
+ u32 pkt_limit;
enum irdma_flush_opcode flush_code;
enum irdma_qp_event_type event_type;
u8 term_flags;
@@ -489,13 +576,13 @@ struct irdma_sc_qp {
struct irdma_stats_inst_info {
bool use_hmc_fcn_index;
u8 hmc_fn_id;
- u8 stats_idx;
+ u16 stats_idx;
};
struct irdma_up_info {
u8 map[8];
u8 cnp_up_override;
- u8 hmc_fcn_idx;
+ u16 hmc_fcn_idx;
bool use_vlan:1;
bool use_cnp_up_override:1;
};
@@ -518,6 +605,8 @@ struct irdma_ws_node_info {
struct irdma_hmc_fpm_misc {
u32 max_ceqs;
u32 max_sds;
+ u32 loc_mem_pages;
+ u8 ird;
u32 xf_block_size;
u32 q1_block_size;
u32 ht_multiplier;
@@ -526,6 +615,7 @@ struct irdma_hmc_fpm_misc {
u32 ooiscf_block_size;
};
+#define IRDMA_VCHNL_MAX_MSG_SIZE 512
#define IRDMA_LEAF_DEFAULT_REL_BW 64
#define IRDMA_PARENT_DEFAULT_REL_BW 1
@@ -601,19 +691,28 @@ struct irdma_sc_dev {
u64 cqp_cmd_stats[IRDMA_MAX_CQP_OPS];
struct irdma_hw_attrs hw_attrs;
struct irdma_hmc_info *hmc_info;
+ struct irdma_vchnl_rdma_caps vc_caps;
+ u8 vc_recv_buf[IRDMA_VCHNL_MAX_MSG_SIZE];
+ u16 vc_recv_len;
struct irdma_sc_cqp *cqp;
struct irdma_sc_aeq *aeq;
struct irdma_sc_ceq *ceq[IRDMA_CEQ_MAX_COUNT];
struct irdma_sc_cq *ccq;
const struct irdma_irq_ops *irq_ops;
+ struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
struct irdma_hmc_fpm_misc hmc_fpm_misc;
struct irdma_ws_node *ws_tree_root;
struct mutex ws_mutex; /* ws tree mutex */
+ u32 vchnl_ver;
u16 num_vfs;
- u8 hmc_fn_id;
+ u16 hmc_fn_id;
u8 vf_id;
+ bool privileged:1;
bool vchnl_up:1;
bool ceq_valid:1;
+ bool is_pf:1;
+ u8 protocol_used;
+ struct mutex vchnl_mutex; /* mutex to synchronize RDMA virtual channel messages */
u8 pci_rev;
int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri);
void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri);
@@ -632,6 +731,51 @@ struct irdma_modify_cq_info {
bool cq_resize:1;
};
+struct irdma_srq_init_info {
+ struct irdma_sc_pd *pd;
+ struct irdma_sc_vsi *vsi;
+ u64 srq_pa;
+ u64 shadow_area_pa;
+ u32 first_pm_pbl_idx;
+ u32 pasid;
+ u32 srq_size;
+ u16 srq_limit;
+ u8 pasid_valid;
+ u8 wqe_size;
+ u8 leaf_pbl_size;
+ u8 virtual_map;
+ u8 tph_en;
+ u8 arm_limit_event;
+ u8 tph_value;
+ u8 pbl_chunk_size;
+ struct irdma_srq_uk_init_info srq_uk_init_info;
+};
+
+struct irdma_sc_srq {
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_sc_pd *pd;
+ struct irdma_srq_uk srq_uk;
+ void *back_srq;
+ u64 srq_pa;
+ u64 shadow_area_pa;
+ u32 first_pm_pbl_idx;
+ u32 pasid;
+ u32 hw_srq_size;
+ u16 srq_limit;
+ u8 pasid_valid;
+ u8 leaf_pbl_size;
+ u8 virtual_map;
+ u8 tph_en;
+ u8 arm_limit_event;
+ u8 tph_val;
+};
+
+struct irdma_modify_srq_info {
+ u16 srq_limit;
+ u8 arm_limit_event;
+};
+
struct irdma_create_qp_info {
bool ord_valid:1;
bool tcp_ctx_valid:1;
@@ -671,7 +815,8 @@ struct irdma_ccq_cqe_info {
u16 maj_err_code;
u16 min_err_code;
u8 op_code;
- bool error;
+ bool error:1;
+ bool pending:1;
};
struct irdma_dcb_app_info {
@@ -720,7 +865,7 @@ struct irdma_vsi_init_info {
struct irdma_vsi_stats_info {
struct irdma_vsi_pestat *pestat;
- u8 fcn_id;
+ u16 fcn_id;
bool alloc_stats_inst;
};
@@ -731,7 +876,8 @@ struct irdma_device_init_info {
__le64 *fpm_commit_buf;
struct irdma_hw *hw;
void __iomem *bar0;
- u8 hmc_fn_id;
+ enum irdma_protocol_used protocol_used;
+ u16 hmc_fn_id;
};
struct irdma_ceq_init_info {
@@ -746,8 +892,8 @@ struct irdma_ceq_init_info {
bool itr_no_expire:1;
u8 pbl_chunk_size;
u8 tph_val;
+ u16 vsi_idx;
u32 first_pm_pbl_idx;
- struct irdma_sc_vsi *vsi;
struct irdma_sc_cq **reg_cq;
u32 reg_cq_idx;
};
@@ -807,6 +953,8 @@ struct irdma_udp_offload_info {
u32 cwnd;
u8 rexmit_thresh;
u8 rnr_nak_thresh;
+ u8 rnr_nak_tmr;
+ u8 min_rnr_timer;
};
struct irdma_roce_offload_info {
@@ -833,6 +981,7 @@ struct irdma_roce_offload_info {
bool dctcp_en:1;
bool fw_cc_enable:1;
bool use_stats_inst:1;
+ u8 local_ack_timeout;
u16 t_high;
u16 t_low;
u8 last_byte_sent;
@@ -933,8 +1082,10 @@ struct irdma_qp_host_ctx_info {
};
u32 send_cq_num;
u32 rcv_cq_num;
+ u32 srq_id;
u32 rem_endpoint_idx;
- u8 stats_idx;
+ u16 stats_idx;
+ bool remote_atomics_en:1;
bool srq_valid:1;
bool tcp_info_valid:1;
bool iwarp_info_valid:1;
@@ -945,6 +1096,7 @@ struct irdma_qp_host_ctx_info {
struct irdma_aeqe_info {
u64 compl_ctx;
u32 qp_cq_id;
+ u32 def_info; /* only valid for DEF_CMPL */
u16 ae_id;
u16 wqe_idx;
u8 tcp_state;
@@ -953,9 +1105,11 @@ struct irdma_aeqe_info {
bool cq:1;
bool sq:1;
bool rq:1;
+ bool srq:1;
bool in_rdrsp_wr:1;
bool out_rdrsp:1;
bool aeqe_overflow:1;
+ bool err_rq_idx_valid:1;
u8 q2_data_written;
u8 ae_src;
};
@@ -972,7 +1126,8 @@ struct irdma_allocate_stag_info {
bool use_hmc_fcn_index:1;
bool use_pf_rid:1;
bool all_memory:1;
- u8 hmc_fcn_index;
+ bool remote_atomics_en:1;
+ u16 hmc_fcn_index;
};
struct irdma_mw_alloc_info {
@@ -1000,6 +1155,7 @@ struct irdma_reg_ns_stag_info {
u8 hmc_fcn_index;
bool use_pf_rid:1;
bool all_memory:1;
+ bool remote_atomics_en:1;
};
struct irdma_fast_reg_stag_info {
@@ -1023,6 +1179,7 @@ struct irdma_fast_reg_stag_info {
u8 hmc_fcn_index;
bool use_pf_rid:1;
bool defer_flag:1;
+ bool remote_atomics_en:1;
};
struct irdma_dealloc_stag_info {
@@ -1130,6 +1287,8 @@ struct irdma_cqp_manage_push_page_info {
};
struct irdma_qp_flush_info {
+ u32 err_sq_idx;
+ u32 err_rq_idx;
u16 sq_minor_code;
u16 sq_major_code;
u16 rq_minor_code;
@@ -1140,6 +1299,8 @@ struct irdma_qp_flush_info {
bool rq:1;
bool userflushcode:1;
bool generate_ae:1;
+ bool err_sq_idx_valid:1;
+ bool err_rq_idx_valid:1;
};
struct irdma_gen_ae_info {
@@ -1189,6 +1350,11 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i
void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable);
void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout,
struct irdma_sc_dev *dev);
+void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
+ struct irdma_aeqe_info *info,
+ bool first, u64 *scratch,
+ u32 *sw_def_info);
+u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev);
int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err);
int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp);
int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
@@ -1224,6 +1390,8 @@ void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *inf
int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
u8 hmc_fn_id, bool post_sq,
bool poll_registers);
+int irdma_sc_srq_init(struct irdma_sc_srq *srq,
+ struct irdma_srq_init_info *info);
void sc_vsi_update_stats(struct irdma_sc_vsi *vsi);
struct cqp_info {
@@ -1467,6 +1635,23 @@ struct cqp_info {
struct irdma_dma_mem query_buff_mem;
u64 scratch;
} query_rdma;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ u64 scratch;
+ } srq_create;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ struct irdma_modify_srq_info info;
+ u64 scratch;
+ } srq_modify;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ u64 scratch;
+ } srq_destroy;
+
} u;
};
diff --git a/drivers/infiniband/hw/irdma/uda_d.h b/drivers/infiniband/hw/irdma/uda_d.h
index 5a9e6eabf032..4fb4daa20722 100644
--- a/drivers/infiniband/hw/irdma/uda_d.h
+++ b/drivers/infiniband/hw/irdma/uda_d.h
@@ -78,8 +78,7 @@
#define IRDMA_UDAQPC_IPID GENMASK_ULL(47, 32)
#define IRDMA_UDAQPC_SNDMSS GENMASK_ULL(29, 16)
#define IRDMA_UDAQPC_VLANTAG GENMASK_ULL(15, 0)
-
-#define IRDMA_UDA_CQPSQ_MAV_PDINDEXHI GENMASK_ULL(21, 20)
+#define IRDMA_UDA_CQPSQ_MAV_PDINDEXHI GENMASK_ULL(27, 20)
#define IRDMA_UDA_CQPSQ_MAV_PDINDEXLO GENMASK_ULL(63, 48)
#define IRDMA_UDA_CQPSQ_MAV_SRCMACADDRINDEX GENMASK_ULL(29, 24)
#define IRDMA_UDA_CQPSQ_MAV_ARPINDEX GENMASK_ULL(63, 48)
@@ -94,7 +93,7 @@
#define IRDMA_UDA_CQPSQ_MAV_OPCODE GENMASK_ULL(37, 32)
#define IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK BIT_ULL(62)
#define IRDMA_UDA_CQPSQ_MAV_IPV4VALID BIT_ULL(59)
-#define IRDMA_UDA_CQPSQ_MAV_AVIDX GENMASK_ULL(16, 0)
+#define IRDMA_UDA_CQPSQ_MAV_AVIDX GENMASK_ULL(23, 0)
#define IRDMA_UDA_CQPSQ_MAV_INSERTVLANTAG BIT_ULL(60)
#define IRDMA_UDA_MGCTX_VFFLAG BIT_ULL(29)
#define IRDMA_UDA_MGCTX_DESTPORT GENMASK_ULL(47, 32)
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index 38c54e59cc2e..ce1ae10c30fc 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -198,6 +198,26 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
return wqe;
}
+__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx)
+{
+ int ret_code;
+ __le64 *wqe;
+
+ if (IRDMA_RING_FULL_ERR(srq->srq_ring))
+ return NULL;
+
+ IRDMA_ATOMIC_RING_MOVE_HEAD(srq->srq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ if (!*wqe_idx)
+ srq->srwqe_polarity = !srq->srwqe_polarity;
+ /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */
+ wqe = srq->srq_base[*wqe_idx * (srq->wqe_size_multiplier)].elem;
+
+ return wqe;
+}
+
/**
* irdma_qp_get_next_recv_wqe - get next qp's rcv wqe
* @qp: hw qp ptr
@@ -318,6 +338,160 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
}
/**
+ * irdma_uk_atomic_fetch_add - atomic fetch and add operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ struct irdma_atomic_fetch_add *op_info;
+ u32 total_size = 0;
+ u16 quanta = 2;
+ u32 wqe_idx;
+ __le64 *wqe;
+ u64 hdr;
+
+ op_info = &info->op.atomic_fetch_add;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, op_info->tagged_offset);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag));
+ set_64bit_val(wqe, 16, op_info->remote_tagged_offset);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_FETCH_ADD) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ set_64bit_val(wqe, 32, op_info->fetch_add_data_bytes);
+ set_64bit_val(wqe, 40, 0);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity));
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_atomic_compare_swap - atomic compare and swap operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ struct irdma_atomic_compare_swap *op_info;
+ u32 total_size = 0;
+ u16 quanta = 2;
+ u32 wqe_idx;
+ __le64 *wqe;
+ u64 hdr;
+
+ op_info = &info->op.atomic_compare_swap;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, op_info->tagged_offset);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag));
+ set_64bit_val(wqe, 16, op_info->remote_tagged_offset);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ set_64bit_val(wqe, 32, op_info->swap_data_bytes);
+ set_64bit_val(wqe, 40, op_info->compare_data_bytes);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity));
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_srq_post_receive - post a receive wqe to a shared rq
+ * @srq: shared rq ptr
+ * @info: post rq information
+ */
+int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq,
+ struct irdma_post_rq_info *info)
+{
+ u32 wqe_idx, i, byte_off;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u64 hdr;
+
+ if (srq->max_srq_frag_cnt < info->num_sges)
+ return -EINVAL;
+
+ wqe = irdma_srq_get_next_recv_wqe(srq, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ addl_frag_cnt = info->num_sges > 1 ? info->num_sges - 1 : 0;
+ srq->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list,
+ srq->srwqe_polarity);
+
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
+ srq->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i],
+ srq->srwqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (srq->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) &&
+ info->num_sges) {
+ srq->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ srq->srwqe_polarity);
+ if (srq->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16, (u64)info->wr_id);
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_VALID, srq->srwqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ set_64bit_val(srq->shadow_area, 0, (wqe_idx + 1) % srq->srq_ring.size);
+
+ return 0;
+}
+
+/**
* irdma_uk_rdma_read - rdma read command
* @qp: hw qp ptr
* @info: post sq information
@@ -973,6 +1147,9 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
u64 comp_ctx, qword0, qword2, qword3;
__le64 *cqe;
struct irdma_qp_uk *qp;
+ struct irdma_srq_uk *srq;
+ struct qp_err_code qp_err;
+ u8 is_srq;
struct irdma_ring *pring = NULL;
u32 wqe_idx;
int ret_code;
@@ -1046,21 +1223,46 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
}
info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+ is_srq = (u8)FIELD_GET(IRDMA_CQ_SRQ, qword3);
info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if (is_srq)
+ get_64bit_val(cqe, 40, (u64 *)&qp);
+ else
+ qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
if (info->error) {
info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3);
info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3);
- if (info->major_err == IRDMA_FLUSH_MAJOR_ERR) {
- info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ switch (info->major_err) {
+ case IRDMA_SRQFLUSH_RSVD_MAJOR_ERR:
+ qp_err = irdma_ae_to_qp_err_code(info->minor_err);
+ info->minor_err = qp_err.flush_code;
+ fallthrough;
+ case IRDMA_FLUSH_MAJOR_ERR:
/* Set the min error to standard flush error code for remaining cqes */
if (info->minor_err != FLUSH_GENERAL_ERR) {
qword3 &= ~IRDMA_CQ_MINERR;
qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR);
set_64bit_val(cqe, 24, qword3);
}
- } else {
- info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
+ info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ break;
+ default:
+#define IRDMA_CIE_SIGNATURE 0xE
+#define IRDMA_CQMAJERR_HIGH_NIBBLE GENMASK(15, 12)
+ if (info->q_type == IRDMA_CQE_QTYPE_SQ &&
+ qp->qp_type == IRDMA_QP_TYPE_ROCE_UD &&
+ FIELD_GET(IRDMA_CQMAJERR_HIGH_NIBBLE, info->major_err)
+ == IRDMA_CIE_SIGNATURE) {
+ info->error = 0;
+ info->major_err = 0;
+ info->minor_err = 0;
+ info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
+ } else {
+ info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
+ }
+ break;
}
} else {
info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
@@ -1069,7 +1271,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
get_64bit_val(cqe, 0, &qword0);
get_64bit_val(cqe, 16, &qword2);
- info->tcp_seq_num_rtt = (u32)FIELD_GET(IRDMACQ_TCPSEQNUMRTT, qword0);
info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2);
info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2);
@@ -1085,7 +1286,22 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
- if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
+ if (info->q_type == IRDMA_CQE_QTYPE_RQ && is_srq) {
+ srq = qp->srq_uk;
+
+ get_64bit_val(cqe, 8, &info->wr_id);
+ info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
+
+ if (qword3 & IRDMACQ_STAG) {
+ info->stag_invalid_set = true;
+ info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG,
+ qword2);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ IRDMA_RING_MOVE_TAIL(srq->srq_ring);
+ pring = &srq->srq_ring;
+ } else if (info->q_type == IRDMA_CQE_QTYPE_RQ && !is_srq) {
u32 array_idx;
array_idx = wqe_idx / qp->rq_wqe_size_multiplier;
@@ -1180,9 +1396,15 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
ret_code = 0;
exit:
- if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED)
+ if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) {
if (pring && IRDMA_RING_MORE_WORK(*pring))
- move_cq_head = false;
+ /* Park CQ head during a flush to generate additional CQEs
+ * from SW for all unprocessed WQEs. For GEN3 and beyond
+ * FW will generate/flush these CQEs so move to the next CQE
+ */
+ move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ?
+ false : true;
+ }
if (move_cq_head) {
IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
@@ -1210,10 +1432,10 @@ exit:
}
/**
- * irdma_qp_round_up - return round up qp wq depth
+ * irdma_round_up_wq - return round up qp wq depth
* @wqdepth: wq depth in quanta to round up
*/
-static int irdma_qp_round_up(u32 wqdepth)
+static int irdma_round_up_wq(u32 wqdepth)
{
int scount = 1;
@@ -1268,7 +1490,7 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
{
u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
- *sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD);
+ *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD);
if (*sqdepth < min_size)
*sqdepth = min_size;
@@ -1290,7 +1512,7 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
{
u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
- *rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD);
+ *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD);
if (*rqdepth < min_size)
*rqdepth = min_size;
@@ -1300,6 +1522,26 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
return 0;
}
+/*
+ * irdma_get_srqdepth - get SRQ depth (quanta)
+ * @uk_attrs: qp HW attributes
+ * @srq_size: SRQ size
+ * @shift: shift which determines size of WQE
+ * @srqdepth: depth of SRQ
+ */
+int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
+ u32 *srqdepth)
+{
+ *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD);
+
+ if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift))
+ *srqdepth = uk_attrs->min_hw_wq_size << shift;
+ else if (*srqdepth > uk_attrs->max_hw_srq_quanta)
+ return -EINVAL;
+
+ return 0;
+}
+
static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = {
.iw_copy_inline_data = irdma_copy_inline_data,
.iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta,
@@ -1336,6 +1578,42 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp,
}
/**
+ * irdma_uk_srq_init - initialize shared qp
+ * @srq: hw srq (user and kernel)
+ * @info: srq initialization info
+ *
+ * Initializes the vars used in both user and kernel mode.
+ * The size of the wqe depends on number of max fragments
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for srq.
+ */
+int irdma_uk_srq_init(struct irdma_srq_uk *srq,
+ struct irdma_srq_uk_init_info *info)
+{
+ u8 rqshift;
+
+ srq->uk_attrs = info->uk_attrs;
+ if (info->max_srq_frag_cnt > srq->uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ irdma_get_wqe_shift(srq->uk_attrs, info->max_srq_frag_cnt, 0, &rqshift);
+ srq->srq_caps = info->srq_caps;
+ srq->srq_base = info->srq;
+ srq->shadow_area = info->shadow_area;
+ srq->srq_id = info->srq_id;
+ srq->srwqe_polarity = 0;
+ srq->srq_size = info->srq_size;
+ srq->wqe_size = rqshift;
+ srq->max_srq_frag_cnt = min(srq->uk_attrs->max_hw_wq_frags,
+ ((u32)2 << rqshift) - 1);
+ IRDMA_RING_INIT(srq->srq_ring, srq->srq_size);
+ srq->wqe_size_multiplier = 1 << rqshift;
+ srq->wqe_ops = iw_wqe_uk_ops;
+
+ return 0;
+}
+
+/**
* irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ
* @ukinfo: qp initialization info
* @sq_shift: Returns shift of SQ
@@ -1461,6 +1739,7 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
qp->wqe_ops = iw_wqe_uk_ops_gen_1;
else
qp->wqe_ops = iw_wqe_uk_ops;
+ qp->srq_uk = info->srq_uk;
return ret_code;
}
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
index 380e4a47aede..ab57f689827a 100644
--- a/drivers/infiniband/hw/irdma/user.h
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -41,10 +41,114 @@
#define IRDMA_OP_TYPE_INV_STAG 0x0a
#define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b
#define IRDMA_OP_TYPE_NOP 0x0c
+#define IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD 0x0f
+#define IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP 0x11
#define IRDMA_OP_TYPE_REC 0x3e
#define IRDMA_OP_TYPE_REC_IMM 0x3f
-#define IRDMA_FLUSH_MAJOR_ERR 1
+#define IRDMA_FLUSH_MAJOR_ERR 1
+#define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe
+
+/* Async Events codes */
+#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102
+#define IRDMA_AE_AMP_INVALID_STAG 0x0103
+#define IRDMA_AE_AMP_BAD_QP 0x0104
+#define IRDMA_AE_AMP_BAD_PD 0x0105
+#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106
+#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107
+#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108
+#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109
+#define IRDMA_AE_AMP_TO_WRAP 0x010a
+#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c
+#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d
+#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
+#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
+#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111
+#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
+#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
+#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114
+#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115
+#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
+#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117
+#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
+#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
+#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
+#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b
+#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c
+#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d
+#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e
+#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f
+#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120
+#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132
+#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134
+#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135
+#define IRDMA_AE_BAD_CLOSE 0x0201
+#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
+#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203
+#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
+#define IRDMA_AE_STAG_ZERO_INVALID 0x0206
+#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207
+#define IRDMA_AE_IB_INVALID_REQUEST 0x0208
+#define IRDMA_AE_SRQ_LIMIT 0x0209
+#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a
+#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b
+#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c
+#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d
+#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e
+#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f
+#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220
+#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221
+#define IRDMA_AE_ATOMIC_MASK 0x0222
+#define IRDMA_AE_INVALID_REQUEST 0x0223
+#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224
+#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
+#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
+#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
+#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305
+#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
+#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307
+#define IRDMA_AE_DDP_NO_L_BIT 0x0308
+#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
+#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
+#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
+#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
+#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316
+#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380
+#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381
+#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382
+#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383
+#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401
+#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402
+#define IRDMA_AE_STALE_ARP_ENTRY 0x0403
+#define IRDMA_AE_INVALID_AH_ENTRY 0x0406
+#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501
+#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502
+#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503
+#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504
+#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
+#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507
+#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508
+#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509
+#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a
+#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
+#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c
+#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e
+#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f
+#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520
+#define IRDMA_AE_RESET_SENT 0x0601
+#define IRDMA_AE_TERMINATE_SENT 0x0602
+#define IRDMA_AE_RESET_NOT_SENT 0x0603
+#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700
+#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
+#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702
+#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703
+#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704
+#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705
+#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900
+#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901
+#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B
enum irdma_device_caps_const {
IRDMA_WQE_SIZE = 4,
@@ -55,11 +159,12 @@ enum irdma_device_caps_const {
IRDMA_CEQE_SIZE = 1,
IRDMA_CQP_CTX_SIZE = 8,
IRDMA_SHADOW_AREA_SIZE = 8,
- IRDMA_QUERY_FPM_BUF_SIZE = 176,
- IRDMA_COMMIT_FPM_BUF_SIZE = 176,
+ IRDMA_QUERY_FPM_BUF_SIZE = 192,
+ IRDMA_COMMIT_FPM_BUF_SIZE = 192,
IRDMA_GATHER_STATS_BUF_SIZE = 1024,
IRDMA_MIN_IW_QP_ID = 0,
IRDMA_MAX_IW_QP_ID = 262143,
+ IRDMA_MIN_IW_SRQ_ID = 0,
IRDMA_MIN_CEQID = 0,
IRDMA_MAX_CEQID = 1023,
IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1,
@@ -67,6 +172,7 @@ enum irdma_device_caps_const {
IRDMA_MAX_CQID = 524287,
IRDMA_MIN_AEQ_ENTRIES = 1,
IRDMA_MAX_AEQ_ENTRIES = 524287,
+ IRDMA_MAX_AEQ_ENTRIES_GEN_3 = 262144,
IRDMA_MIN_CEQ_ENTRIES = 1,
IRDMA_MAX_CEQ_ENTRIES = 262143,
IRDMA_MIN_CQ_SIZE = 1,
@@ -105,6 +211,13 @@ enum irdma_flush_opcode {
FLUSH_RETRY_EXC_ERR,
FLUSH_MW_BIND_ERR,
FLUSH_REM_INV_REQ_ERR,
+ FLUSH_RNR_RETRY_EXC_ERR,
+};
+
+enum irdma_qp_event_type {
+ IRDMA_QP_EVENT_CATASTROPHIC,
+ IRDMA_QP_EVENT_ACCESS_ERR,
+ IRDMA_QP_EVENT_REQ_ERR,
};
enum irdma_cmpl_status {
@@ -147,6 +260,8 @@ enum irdma_qp_caps {
IRDMA_PUSH_MODE = 8,
};
+struct irdma_srq_uk;
+struct irdma_srq_uk_init_info;
struct irdma_qp_uk;
struct irdma_cq_uk;
struct irdma_qp_uk_init_info;
@@ -201,6 +316,24 @@ struct irdma_bind_window {
bool ena_writes:1;
irdma_stag mw_stag;
bool mem_window_type_1:1;
+ bool remote_atomics_en:1;
+};
+
+struct irdma_atomic_fetch_add {
+ u64 tagged_offset;
+ u64 remote_tagged_offset;
+ u64 fetch_add_data_bytes;
+ u32 stag;
+ u32 remote_stag;
+};
+
+struct irdma_atomic_compare_swap {
+ u64 tagged_offset;
+ u64 remote_tagged_offset;
+ u64 swap_data_bytes;
+ u64 compare_data_bytes;
+ u32 stag;
+ u32 remote_stag;
};
struct irdma_inv_local_stag {
@@ -219,6 +352,7 @@ struct irdma_post_sq_info {
bool report_rtt:1;
bool udp_hdr:1;
bool defer_flag:1;
+ bool remote_atomic_en:1;
u32 imm_data;
u32 stag_to_inv;
union {
@@ -227,6 +361,8 @@ struct irdma_post_sq_info {
struct irdma_rdma_read rdma_read;
struct irdma_bind_window bind_window;
struct irdma_inv_local_stag inv_local_stag;
+ struct irdma_atomic_fetch_add atomic_fetch_add;
+ struct irdma_atomic_compare_swap atomic_compare_swap;
} op;
};
@@ -255,6 +391,15 @@ struct irdma_cq_poll_info {
bool imm_valid:1;
};
+struct qp_err_code {
+ enum irdma_flush_opcode flush_code;
+ enum irdma_qp_event_type event_type;
+};
+
+int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
struct irdma_post_sq_info *info, bool post_sq);
int irdma_uk_inline_send(struct irdma_qp_uk *qp,
@@ -300,6 +445,39 @@ int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo,
u32 *sq_depth, u8 *sq_shift);
int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo,
u32 *rq_depth, u8 *rq_shift);
+int irdma_uk_srq_init(struct irdma_srq_uk *srq,
+ struct irdma_srq_uk_init_info *info);
+int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq,
+ struct irdma_post_rq_info *info);
+
+struct irdma_srq_uk {
+ u32 srq_caps;
+ struct irdma_qp_quanta *srq_base;
+ struct irdma_uk_attrs *uk_attrs;
+ __le64 *shadow_area;
+ struct irdma_ring srq_ring;
+ struct irdma_ring initial_ring;
+ u32 srq_id;
+ u32 srq_size;
+ u32 max_srq_frag_cnt;
+ struct irdma_wqe_uk_ops wqe_ops;
+ u8 srwqe_polarity;
+ u8 wqe_size;
+ u8 wqe_size_multiplier;
+ u8 deferred_flag;
+};
+
+struct irdma_srq_uk_init_info {
+ struct irdma_qp_quanta *srq;
+ struct irdma_uk_attrs *uk_attrs;
+ __le64 *shadow_area;
+ u64 *srq_wrid_array;
+ u32 srq_id;
+ u32 srq_caps;
+ u32 srq_size;
+ u32 max_srq_frag_cnt;
+};
+
struct irdma_sq_uk_wr_trk_info {
u64 wrid;
u32 wr_len;
@@ -344,6 +522,7 @@ struct irdma_qp_uk {
bool destroy_pending:1; /* Indicates the QP is being destroyed */
void *back_qp;
u8 dbg_rq_flushed;
+ struct irdma_srq_uk *srq_uk;
u8 sq_flush_seen;
u8 rq_flush_seen;
};
@@ -383,6 +562,7 @@ struct irdma_qp_uk_init_info {
u8 rq_shift;
int abi_ver;
bool legacy_mode;
+ struct irdma_srq_uk *srq_uk;
};
struct irdma_cq_uk_init_info {
@@ -398,6 +578,7 @@ struct irdma_cq_uk_init_info {
__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
u16 quanta, u32 total_size,
struct irdma_post_sq_info *info);
+__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx);
__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx);
void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq);
int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq);
@@ -409,5 +590,85 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
u32 *wqdepth);
int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
u32 *wqdepth);
+int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
+ u32 *srqdepth);
void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx);
+
+static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id)
+{
+ struct qp_err_code qp_err = {};
+
+ switch (ae_id) {
+ case IRDMA_AE_AMP_BOUNDS_VIOLATION:
+ case IRDMA_AE_AMP_INVALID_STAG:
+ case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+ case IRDMA_AE_AMP_UNALLOCATED_STAG:
+ case IRDMA_AE_AMP_BAD_PD:
+ case IRDMA_AE_AMP_BAD_QP:
+ case IRDMA_AE_AMP_BAD_STAG_KEY:
+ case IRDMA_AE_AMP_BAD_STAG_INDEX:
+ case IRDMA_AE_AMP_TO_WRAP:
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ qp_err.flush_code = FLUSH_PROT_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_UDA_XMIT_BAD_PD:
+ case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
+ qp_err.flush_code = FLUSH_LOC_QP_OP_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case IRDMA_AE_UDA_L4LEN_INVALID:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp_err.flush_code = FLUSH_LOC_LEN_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
+ qp_err.flush_code = FLUSH_REM_ACCESS_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
+ case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
+ case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
+ case IRDMA_AE_AMP_MWBIND_VALID_STAG:
+ qp_err.flush_code = FLUSH_MW_BIND_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ qp_err.flush_code = FLUSH_RETRY_EXC_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_IB_INVALID_REQUEST:
+ qp_err.flush_code = FLUSH_REM_INV_REQ_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR;
+ break;
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
+ case IRDMA_AE_IB_REMOTE_OP_ERROR:
+ qp_err.flush_code = FLUSH_REM_OP_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ qp_err.flush_code = FLUSH_RNR_RETRY_EXC_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
+ qp_err.flush_code = FLUSH_FATAL_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ default:
+ qp_err.flush_code = FLUSH_GENERAL_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ }
+
+ return qp_err;
+}
#endif /* IRDMA_USER_H */
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index b510ef747399..8b94d87b0192 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -481,6 +481,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp,
WRITE_ONCE(cqp_request->request_done, false);
cqp_request->callback_fcn = NULL;
cqp_request->waiting = false;
+ cqp_request->pending = false;
spin_lock_irqsave(&cqp->req_lock, flags);
list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
@@ -521,6 +522,22 @@ irdma_free_pending_cqp_request(struct irdma_cqp *cqp,
}
/**
+ * irdma_cleanup_deferred_cqp_ops - clean-up cqp with no completions
+ * @dev: sc_dev
+ * @cqp: cqp
+ */
+static void irdma_cleanup_deferred_cqp_ops(struct irdma_sc_dev *dev,
+ struct irdma_cqp *cqp)
+{
+ u64 scratch;
+
+ /* process all CQP requests with deferred/pending completions */
+ while ((scratch = irdma_sc_cqp_cleanup_handler(dev)))
+ irdma_free_pending_cqp_request(cqp, (struct irdma_cqp_request *)
+ (uintptr_t)scratch);
+}
+
+/**
* irdma_cleanup_pending_cqp_op - clean-up cqp with no
* completions
* @rf: RDMA PCI function
@@ -533,6 +550,8 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf)
struct cqp_cmds_info *pcmdinfo = NULL;
u32 i, pending_work, wqe_idx;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ irdma_cleanup_deferred_cqp_ops(dev, cqp);
pending_work = IRDMA_RING_USED_QUANTA(cqp->sc_cqp.sq_ring);
wqe_idx = IRDMA_RING_CURRENT_TAIL(cqp->sc_cqp.sq_ring);
for (i = 0; i < pending_work; i++) {
@@ -552,6 +571,26 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf)
}
}
+static int irdma_get_timeout_threshold(struct irdma_sc_dev *dev)
+{
+ u16 time_s = dev->vc_caps.cqp_timeout_s;
+
+ if (!time_s)
+ return CQP_TIMEOUT_THRESHOLD;
+
+ return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms;
+}
+
+static int irdma_get_def_timeout_threshold(struct irdma_sc_dev *dev)
+{
+ u16 time_s = dev->vc_caps.cqp_def_timeout_s;
+
+ if (!time_s)
+ return CQP_DEF_CMPL_TIMEOUT_THRESHOLD;
+
+ return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms;
+}
+
/**
* irdma_wait_event - wait for completion
* @rf: RDMA PCI function
@@ -561,6 +600,7 @@ static int irdma_wait_event(struct irdma_pci_f *rf,
struct irdma_cqp_request *cqp_request)
{
struct irdma_cqp_timeout cqp_timeout = {};
+ int timeout_threshold = irdma_get_timeout_threshold(&rf->sc_dev);
bool cqp_error = false;
int err_code = 0;
@@ -572,9 +612,17 @@ static int irdma_wait_event(struct irdma_pci_f *rf,
msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS)))
break;
+ if (cqp_request->pending)
+ /* There was a deferred or pending completion
+ * received for this CQP request, so we need
+ * to wait longer than usual.
+ */
+ timeout_threshold =
+ irdma_get_def_timeout_threshold(&rf->sc_dev);
+
irdma_check_cqp_progress(&cqp_timeout, &rf->sc_dev);
- if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD)
+ if (cqp_timeout.count < timeout_threshold)
continue;
if (!rf->reset) {
@@ -649,6 +697,9 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = {
[IRDMA_OP_ADD_LOCAL_MAC_ENTRY] = "Add Local MAC Entry Cmd",
[IRDMA_OP_DELETE_LOCAL_MAC_ENTRY] = "Delete Local MAC Entry Cmd",
[IRDMA_OP_CQ_MODIFY] = "CQ Modify Cmd",
+ [IRDMA_OP_SRQ_CREATE] = "Create SRQ Cmd",
+ [IRDMA_OP_SRQ_MODIFY] = "Modify SRQ Cmd",
+ [IRDMA_OP_SRQ_DESTROY] = "Destroy SRQ Cmd",
};
static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = {
@@ -1065,6 +1116,26 @@ static void irdma_dealloc_push_page(struct irdma_pci_f *rf,
irdma_put_cqp_request(&rf->cqp, cqp_request);
}
+static void irdma_free_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 qp_num)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ unsigned long flags;
+
+ if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ return;
+
+ irdma_vchnl_req_del_vport(&rf->sc_dev, iwdev->vport_id, qp_num);
+
+ if (qp_num == 1) {
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rf->hwqp1_rsvd = false;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ } else if (qp_num > 2) {
+ irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ }
+}
+
/**
* irdma_free_qp_rsrc - free up memory resources for qp
* @iwqp: qp ptr (user or kernel)
@@ -1073,7 +1144,7 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
{
struct irdma_device *iwdev = iwqp->iwdev;
struct irdma_pci_f *rf = iwdev->rf;
- u32 qp_num = iwqp->ibqp.qp_num;
+ u32 qp_num = iwqp->sc_qp.qp_uk.qp_id;
irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
irdma_dealloc_push_page(rf, &iwqp->sc_qp);
@@ -1083,8 +1154,12 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
iwqp->sc_qp.user_pri);
}
- if (qp_num > 2)
- irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ if (iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ irdma_free_gsi_qp_rsrc(iwqp, qp_num);
+ } else {
+ if (qp_num > 2)
+ irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ }
dma_free_coherent(rf->sc_dev.hw->device, iwqp->q2_ctx_mem.size,
iwqp->q2_ctx_mem.va, iwqp->q2_ctx_mem.pa);
iwqp->q2_ctx_mem.va = NULL;
@@ -1096,6 +1171,30 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
}
/**
+ * irdma_srq_wq_destroy - send srq destroy cqp
+ * @rf: RDMA PCI function
+ * @srq: hardware control srq
+ */
+void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_destroy.srq = srq;
+ cqp_info->in.u.srq_destroy.scratch = (uintptr_t)cqp_request;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
* irdma_cq_wq_destroy - send cq destroy cqp
* @rf: RDMA PCI function
* @cq: hardware control cq
@@ -2266,7 +2365,10 @@ bool irdma_cq_empty(struct irdma_cq *iwcq)
u8 polarity;
ukcq = &iwcq->sc_cq.cq_uk;
- cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq);
+ if (ukcq->avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(ukcq);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq);
get_64bit_val(cqe, 24, &qword3);
polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index da5a41b275d8..76ce6137f2ba 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -41,7 +41,8 @@ static int irdma_query_device(struct ib_device *ibdev,
props->max_cq = rf->max_cq - rf->used_cqs;
props->max_cqe = rf->max_cqe - 1;
props->max_mr = rf->max_mr - rf->used_mrs;
- props->max_mw = props->max_mr;
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+ props->max_mw = props->max_mr;
props->max_pd = rf->max_pd - rf->used_pds;
props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
props->max_qp_rd_atom = hw_attrs->max_hw_ird;
@@ -56,9 +57,21 @@ static int irdma_query_device(struct ib_device *ibdev,
props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX;
props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR;
-#define HCA_CLOCK_TIMESTAMP_MASK 0x1ffff
- if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2)
- props->timestamp_mask = HCA_CLOCK_TIMESTAMP_MASK;
+ props->max_srq = rf->max_srq - rf->used_srqs;
+ props->max_srq_wr = IRDMA_MAX_SRQ_WRS;
+ props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ if (hw_attrs->uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ props->atomic_cap = IB_ATOMIC_HCA;
+ else
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) {
+#define HCA_CORE_CLOCK_KHZ 1000000UL
+ props->timestamp_mask = GENMASK(31, 0);
+ props->hca_core_clock = HCA_CORE_CLOCK_KHZ;
+ }
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
return 0;
}
@@ -292,6 +305,10 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
ucontext->iwdev = iwdev;
ucontext->abi_ver = req.userspace_ver;
+ if (!(req.comp_mask & IRDMA_SUPPORT_WQE_FORMAT_V2) &&
+ uk_attrs->hw_rev >= IRDMA_GEN_3)
+ return -EOPNOTSUPP;
+
if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR)
ucontext->use_raw_attrs = true;
@@ -332,6 +349,8 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR;
uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size;
uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE;
+ uresp.max_hw_srq_quanta = uk_attrs->max_hw_srq_quanta;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA;
if (ib_copy_to_udata(udata, &uresp,
min(sizeof(uresp), udata->outlen))) {
rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
@@ -343,6 +362,8 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
spin_lock_init(&ucontext->cq_reg_mem_list_lock);
INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
spin_lock_init(&ucontext->qp_reg_mem_list_lock);
+ INIT_LIST_HEAD(&ucontext->srq_reg_mem_list);
+ spin_lock_init(&ucontext->srq_reg_mem_list_lock);
return 0;
@@ -521,7 +542,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
iwqp->sc_qp.qp_uk.destroy_pending = true;
- if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
+ if (iwqp->iwarp_state >= IRDMA_QP_STATE_IDLE)
irdma_modify_qp_to_err(&iwqp->sc_qp);
if (!iwqp->user_mode)
@@ -541,6 +562,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
irdma_remove_push_mmap_entries(iwqp);
+
+ if (iwqp->sc_qp.qp_uk.qp_id == 1)
+ iwdev->rf->hwqp1_rsvd = false;
irdma_free_qp_rsrc(iwqp);
return 0;
@@ -564,7 +588,11 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev,
if (iwpbl->pbl_allocated) {
init_info->virtual_map = true;
init_info->sq_pa = qpmr->sq_pbl.idx;
- init_info->rq_pa = qpmr->rq_pbl.idx;
+ /* Need to use contiguous buffer for RQ of QP
+ * in case it is associated with SRQ.
+ */
+ init_info->rq_pa = init_info->qp_uk_init_info.srq_uk ?
+ qpmr->rq_pa : qpmr->rq_pbl.idx;
} else {
init_info->sq_pa = qpmr->sq_pbl.addr;
init_info->rq_pa = qpmr->rq_pbl.addr;
@@ -719,6 +747,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE);
ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
+ ukinfo->qp_id = info->qp_uk_init_info.qp_id;
iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
@@ -775,9 +804,12 @@ static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
roce_info = &iwqp->roce_info;
ether_addr_copy(roce_info->mac_addr, iwdev->netdev->dev_addr);
+ if (iwqp->ibqp.qp_type == IB_QPT_GSI && iwqp->ibqp.qp_num != 1)
+ roce_info->is_qp1 = true;
roce_info->rd_en = true;
roce_info->wr_rdresp_en = true;
- roce_info->bind_en = true;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ roce_info->bind_en = true;
roce_info->dcqcn_en = false;
roce_info->rtomin = 5;
@@ -808,7 +840,6 @@ static void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr);
iwarp_info->rd_en = true;
iwarp_info->wr_rdresp_en = true;
- iwarp_info->bind_en = true;
iwarp_info->ecn_en = true;
iwarp_info->rtomin = 5;
@@ -864,6 +895,47 @@ static void irdma_flush_worker(struct work_struct *work)
irdma_generate_flush_completions(iwqp);
}
+static int irdma_setup_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 *qp_num)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ unsigned long flags;
+ int ret;
+
+ if (rf->rdma_ver <= IRDMA_GEN_2) {
+ *qp_num = 1;
+ return 0;
+ }
+
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ if (!rf->hwqp1_rsvd) {
+ *qp_num = 1;
+ rf->hwqp1_rsvd = true;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ } else {
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ ret = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
+ qp_num, &rf->next_qp);
+ if (ret)
+ return ret;
+ }
+
+ ret = irdma_vchnl_req_add_vport(&rf->sc_dev, iwdev->vport_id, *qp_num,
+ (&iwdev->vsi)->qos);
+ if (ret) {
+ if (*qp_num != 1) {
+ irdma_free_rsrc(rf, rf->allocated_qps, *qp_num);
+ } else {
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rf->hwqp1_rsvd = false;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ }
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* irdma_create_qp - create qp
* @ibqp: ptr of qp
@@ -889,6 +961,18 @@ static int irdma_create_qp(struct ib_qp *ibqp,
struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
struct irdma_qp_init_info init_info = {};
struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_srq *iwsrq;
+ bool srq_valid = false;
+ u32 srq_id = 0;
+
+ if (init_attr->srq) {
+ iwsrq = to_iwsrq(init_attr->srq);
+ srq_valid = true;
+ srq_id = iwsrq->srq_num;
+ init_attr->cap.max_recv_sge = uk_attrs->max_hw_wq_frags;
+ init_attr->cap.max_recv_wr = 4;
+ init_info.qp_uk_init_info.srq_uk = &iwsrq->sc_srq.srq_uk;
+ }
err_code = irdma_validate_qp_attrs(init_attr, iwdev);
if (err_code)
@@ -925,16 +1009,20 @@ static int irdma_create_qp(struct ib_qp *ibqp,
init_info.host_ctx = (__le64 *)(init_info.q2 + IRDMA_Q2_BUF_SIZE);
init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE;
- if (init_attr->qp_type == IB_QPT_GSI)
- qp_num = 1;
- else
+ if (init_attr->qp_type == IB_QPT_GSI) {
+ err_code = irdma_setup_gsi_qp_rsrc(iwqp, &qp_num);
+ if (err_code)
+ goto error;
+ iwqp->ibqp.qp_num = 1;
+ } else {
err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
&qp_num, &rf->next_qp);
- if (err_code)
- goto error;
+ if (err_code)
+ goto error;
+ iwqp->ibqp.qp_num = qp_num;
+ }
iwqp->iwpd = iwpd;
- iwqp->ibqp.qp_num = qp_num;
qp = &iwqp->sc_qp;
iwqp->iwscq = to_iwcq(init_attr->send_cq);
iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
@@ -991,13 +1079,22 @@ static int irdma_create_qp(struct ib_qp *ibqp,
}
ctx_info = &iwqp->ctx_info;
+ ctx_info->srq_valid = srq_valid;
+ ctx_info->srq_id = srq_id;
ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
- if (rdma_protocol_roce(&iwdev->ibdev, 1))
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ if (dev->ws_add(&iwdev->vsi, 0)) {
+ irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp);
+ err_code = -EINVAL;
+ goto error;
+ }
+ irdma_qp_add_qos(&iwqp->sc_qp);
irdma_roce_fill_and_set_qpctx_info(iwqp, ctx_info);
- else
+ } else {
irdma_iw_fill_and_set_qpctx_info(iwqp, ctx_info);
+ }
err_code = irdma_cqp_create_qp_cmd(iwqp);
if (err_code)
@@ -1009,16 +1106,6 @@ static int irdma_create_qp(struct ib_qp *ibqp,
iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
rf->qp_table[qp_num] = iwqp;
- if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
- if (dev->ws_add(&iwdev->vsi, 0)) {
- irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp);
- err_code = -EINVAL;
- goto error;
- }
-
- irdma_qp_add_qos(&iwqp->sc_qp);
- }
-
if (udata) {
/* GEN_1 legacy support with libi40iw does not have expanded uresp struct */
if (udata->outlen < sizeof(uresp)) {
@@ -1063,6 +1150,8 @@ static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp)
acc_flags |= IB_ACCESS_REMOTE_READ;
if (iwqp->roce_info.bind_en)
acc_flags |= IB_ACCESS_MW_BIND;
+ if (iwqp->ctx_info.remote_atomics_en)
+ acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
} else {
if (iwqp->iwarp_info.wr_rdresp_en) {
acc_flags |= IB_ACCESS_LOCAL_WRITE;
@@ -1070,8 +1159,8 @@ static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp)
}
if (iwqp->iwarp_info.rd_en)
acc_flags |= IB_ACCESS_REMOTE_READ;
- if (iwqp->iwarp_info.bind_en)
- acc_flags |= IB_ACCESS_MW_BIND;
+ if (iwqp->ctx_info.remote_atomics_en)
+ acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
}
return acc_flags;
}
@@ -1110,6 +1199,7 @@ static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->pkey_index = iwqp->roce_info.p_key;
attr->retry_cnt = iwqp->udp_info.rexmit_thresh;
attr->rnr_retry = iwqp->udp_info.rnr_nak_thresh;
+ attr->min_rnr_timer = iwqp->udp_info.min_rnr_timer;
attr->max_rd_atomic = iwqp->roce_info.ord_size;
attr->max_dest_rd_atomic = iwqp->roce_info.ird_size;
}
@@ -1118,6 +1208,7 @@ static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->qp_context = iwqp->ibqp.qp_context;
init_attr->send_cq = iwqp->ibqp.send_cq;
init_attr->recv_cq = iwqp->ibqp.recv_cq;
+ init_attr->srq = iwqp->ibqp.srq;
init_attr->cap = attr->cap;
return 0;
@@ -1242,6 +1333,10 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_RNR_RETRY)
udp_info->rnr_nak_thresh = attr->rnr_retry;
+ if (attr_mask & IB_QP_MIN_RNR_TIMER &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ udp_info->min_rnr_timer = attr->min_rnr_timer;
+
if (attr_mask & IB_QP_RETRY_CNT)
udp_info->rexmit_thresh = attr->retry_cnt;
@@ -1362,6 +1457,9 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
roce_info->wr_rdresp_en = true;
if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
roce_info->rd_en = true;
+ if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ ctx_info->remote_atomics_en = true;
}
wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
@@ -1777,6 +1875,24 @@ exit:
}
/**
+ * irdma_srq_free_rsrc - free up resources for srq
+ * @rf: RDMA PCI function
+ * @iwsrq: srq ptr
+ */
+static void irdma_srq_free_rsrc(struct irdma_pci_f *rf, struct irdma_srq *iwsrq)
+{
+ struct irdma_sc_srq *srq = &iwsrq->sc_srq;
+
+ if (!iwsrq->user_mode) {
+ dma_free_coherent(rf->sc_dev.hw->device, iwsrq->kmem.size,
+ iwsrq->kmem.va, iwsrq->kmem.pa);
+ iwsrq->kmem.va = NULL;
+ }
+
+ irdma_free_rsrc(rf, rf->allocated_srqs, srq->srq_uk.srq_id);
+}
+
+/**
* irdma_cq_free_rsrc - free up resources for cq
* @rf: RDMA PCI function
* @iwcq: cq ptr
@@ -1840,6 +1956,22 @@ static int irdma_process_resize_list(struct irdma_cq *iwcq,
}
/**
+ * irdma_destroy_srq - destroy srq
+ * @ibsrq: srq pointer
+ * @udata: user data
+ */
+static int irdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_sc_srq *srq = &iwsrq->sc_srq;
+
+ irdma_srq_wq_destroy(iwdev->rf, srq);
+ irdma_srq_free_rsrc(iwdev->rf, iwsrq);
+ return 0;
+}
+
+/**
* irdma_destroy_cq - destroy cq
* @ib_cq: cq pointer
* @udata: user data
@@ -1914,8 +2046,13 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
if (!iwcq->user_mode) {
entries++;
- if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+
+ if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
entries *= 2;
+
+ if (entries & 1)
+ entries += 1; /* cq size must be an even number */
}
info.cq_size = max(entries, 4);
@@ -2022,10 +2159,297 @@ error:
return ret;
}
+/**
+ * irdma_srq_event - event notification for srq limit
+ * @srq: shared srq struct
+ */
+void irdma_srq_event(struct irdma_sc_srq *srq)
+{
+ struct irdma_srq *iwsrq = container_of(srq, struct irdma_srq, sc_srq);
+ struct ib_srq *ibsrq = &iwsrq->ibsrq;
+ struct ib_event event;
+
+ srq->srq_limit = 0;
+
+ if (!ibsrq->event_handler)
+ return;
+
+ event.device = ibsrq->device;
+ event.element.port_num = 1;
+ event.element.srq = ibsrq;
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+}
+
+/**
+ * irdma_modify_srq - modify srq request
+ * @ibsrq: srq's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+static int irdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_modify_srq_info *info;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (!(attr_mask & IB_SRQ_LIMIT))
+ return 0;
+
+ if (attr->srq_limit > iwsrq->sc_srq.srq_uk.srq_size)
+ return -EINVAL;
+
+ /* Execute this cqp op synchronously, so we can update srq_limit
+ * upon successful completion.
+ */
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.srq_modify.info;
+ info->srq_limit = attr->srq_limit;
+ if (info->srq_limit > 0xFFF)
+ info->srq_limit = 0xFFF;
+ info->arm_limit_event = 1;
+
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_modify.srq = &iwsrq->sc_srq;
+ cqp_info->in.u.srq_modify.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwsrq->sc_srq.srq_limit = info->srq_limit;
+
+ return 0;
+}
+
+static int irdma_setup_umode_srq(struct irdma_device *iwdev,
+ struct irdma_srq *iwsrq,
+ struct irdma_srq_init_info *info,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_SRQ_MIN_REQ_LEN \
+ offsetofend(struct irdma_create_srq_req, user_shadow_area)
+ struct irdma_create_srq_req req = {};
+ struct irdma_ucontext *ucontext;
+ struct irdma_srq_mr *srqmr;
+ struct irdma_pbl *iwpbl;
+ unsigned long flags;
+
+ iwsrq->user_mode = true;
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+
+ if (udata->inlen < IRDMA_CREATE_SRQ_MIN_REQ_LEN)
+ return -EINVAL;
+
+ if (ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen)))
+ return -EFAULT;
+
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ iwpbl = irdma_get_pbl((unsigned long)req.user_srq_buf,
+ &ucontext->srq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+ if (!iwpbl)
+ return -EPROTO;
+
+ iwsrq->iwpbl = iwpbl;
+ srqmr = &iwpbl->srq_mr;
+
+ if (iwpbl->pbl_allocated) {
+ info->virtual_map = true;
+ info->pbl_chunk_size = 1;
+ info->first_pm_pbl_idx = srqmr->srq_pbl.idx;
+ info->leaf_pbl_size = 1;
+ } else {
+ info->srq_pa = srqmr->srq_pbl.addr;
+ }
+ info->shadow_area_pa = srqmr->shadow;
+
+ return 0;
+}
+
+static int irdma_setup_kmode_srq(struct irdma_device *iwdev,
+ struct irdma_srq *iwsrq,
+ struct irdma_srq_init_info *info, u32 depth,
+ u8 shift)
+{
+ struct irdma_srq_uk_init_info *ukinfo = &info->srq_uk_init_info;
+ struct irdma_dma_mem *mem = &iwsrq->kmem;
+ u32 size, ring_size;
+
+ ring_size = depth * IRDMA_QP_WQE_MIN_SIZE;
+ size = ring_size + (IRDMA_SHADOW_AREA_SIZE << 3);
+
+ mem->size = ALIGN(size, 256);
+ mem->va = dma_alloc_coherent(iwdev->rf->hw.device, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va)
+ return -ENOMEM;
+
+ ukinfo->srq = mem->va;
+ ukinfo->srq_size = depth >> shift;
+ ukinfo->shadow_area = mem->va + ring_size;
+
+ info->shadow_area_pa = info->srq_pa + ring_size;
+ info->srq_pa = mem->pa;
+
+ return 0;
+}
+
+/**
+ * irdma_create_srq - create srq
+ * @ibsrq: ib's srq pointer
+ * @initattrs: attributes for srq
+ * @udata: user data for create srq
+ */
+static int irdma_create_srq(struct ib_srq *ibsrq,
+ struct ib_srq_init_attr *initattrs,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct ib_srq_attr *attr = &initattrs->attr;
+ struct irdma_pd *iwpd = to_iwpd(ibsrq->pd);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_srq_uk_init_info *ukinfo;
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_srq_init_info info = {};
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_uk_attrs *uk_attrs;
+ struct cqp_cmds_info *cqp_info;
+ int err_code = 0;
+ u32 depth;
+ u8 shift;
+
+ uk_attrs = &rf->sc_dev.hw_attrs.uk_attrs;
+ ukinfo = &info.srq_uk_init_info;
+
+ if (initattrs->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
+ if (!(uk_attrs->feature_flags & IRDMA_FEATURE_SRQ) ||
+ attr->max_sge > uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ refcount_set(&iwsrq->refcnt, 1);
+ spin_lock_init(&iwsrq->lock);
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_srqs, rf->max_srq,
+ &iwsrq->srq_num, &rf->next_srq);
+ if (err_code)
+ return err_code;
+
+ ukinfo->max_srq_frag_cnt = attr->max_sge;
+ ukinfo->uk_attrs = uk_attrs;
+ ukinfo->srq_id = iwsrq->srq_num;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_srq_frag_cnt, 0,
+ &shift);
+
+ err_code = irdma_get_srqdepth(ukinfo->uk_attrs, attr->max_wr,
+ shift, &depth);
+ if (err_code)
+ return err_code;
+
+ /* Actual SRQ size in WRs for ring and HW */
+ ukinfo->srq_size = depth >> shift;
+
+ /* Max postable WRs to SRQ */
+ iwsrq->max_wr = (depth - IRDMA_RQ_RSVD) >> shift;
+ attr->max_wr = iwsrq->max_wr;
+
+ if (udata)
+ err_code = irdma_setup_umode_srq(iwdev, iwsrq, &info, udata);
+ else
+ err_code = irdma_setup_kmode_srq(iwdev, iwsrq, &info, depth,
+ shift);
+
+ if (err_code)
+ goto free_rsrc;
+
+ info.vsi = &iwdev->vsi;
+ info.pd = &iwpd->sc_pd;
+
+ err_code = irdma_sc_srq_init(&iwsrq->sc_srq, &info);
+ if (err_code)
+ goto free_dmem;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto free_dmem;
+ }
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_create.srq = &iwsrq->sc_srq;
+ cqp_info->in.u.srq_create.scratch = (uintptr_t)cqp_request;
+ err_code = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (err_code)
+ goto free_dmem;
+
+ if (udata) {
+ struct irdma_create_srq_resp resp = {};
+
+ resp.srq_id = iwsrq->srq_num;
+ resp.srq_size = ukinfo->srq_size;
+ if (ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen))) {
+ err_code = -EPROTO;
+ goto srq_destroy;
+ }
+ }
+
+ return 0;
+
+srq_destroy:
+ irdma_srq_wq_destroy(rf, &iwsrq->sc_srq);
+
+free_dmem:
+ if (!iwsrq->user_mode)
+ dma_free_coherent(rf->hw.device, iwsrq->kmem.size,
+ iwsrq->kmem.va, iwsrq->kmem.pa);
+free_rsrc:
+ irdma_free_rsrc(rf, rf->allocated_srqs, iwsrq->srq_num);
+ return err_code;
+}
+
+/**
+ * irdma_query_srq - get SRQ attributes
+ * @ibsrq: the SRQ to query
+ * @attr: the attributes of the SRQ
+ */
+static int irdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+
+ attr->max_wr = iwsrq->max_wr;
+ attr->max_sge = iwsrq->sc_srq.srq_uk.max_srq_frag_cnt;
+ attr->srq_limit = iwsrq->sc_srq.srq_limit;
+
+ return 0;
+}
+
static inline int cq_validate_flags(u32 flags, u8 hw_rev)
{
- /* GEN1 does not support CQ create flags */
- if (hw_rev == IRDMA_GEN_1)
+ /* GEN1/2 does not support CQ create flags */
+ if (hw_rev <= IRDMA_GEN_2)
return flags ? -EOPNOTSUPP : 0;
return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0;
@@ -2058,6 +2482,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
unsigned long flags;
int err_code;
int entries = attr->cqe;
+ bool cqe_64byte_ena;
err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
if (err_code)
@@ -2081,6 +2506,9 @@ static int irdma_create_cq(struct ib_cq *ibcq,
info.dev = dev;
ukinfo->cq_size = max(entries, 4);
ukinfo->cq_id = cq_num;
+ cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ?
+ true : false;
+ ukinfo->avoid_mem_cflct = cqe_64byte_ena;
iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
if (attr->comp_vector < rf->ceqs_count)
info.ceq_id = attr->comp_vector;
@@ -2116,8 +2544,6 @@ static int irdma_create_cq(struct ib_cq *ibcq,
goto cq_free_rsrc;
}
- iwcq->iwpbl = iwpbl;
- iwcq->cq_mem_size = 0;
cqmr = &iwpbl->cq_mr;
if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
@@ -2132,7 +2558,6 @@ static int irdma_create_cq(struct ib_cq *ibcq,
err_code = -EPROTO;
goto cq_free_rsrc;
}
- iwcq->iwpbl_shadow = iwpbl_shadow;
cqmr_shadow = &iwpbl_shadow->cq_mr;
info.shadow_area_pa = cqmr_shadow->cq_pbl.addr;
cqmr->split = true;
@@ -2156,11 +2581,18 @@ static int irdma_create_cq(struct ib_cq *ibcq,
}
entries++;
- if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
entries *= 2;
+
+ if (entries & 1)
+ entries += 1; /* cq size must be an even number */
+
ukinfo->cq_size = entries;
- rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
+ if (cqe_64byte_ena)
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe);
+ else
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256);
iwcq->kmem.va = dma_alloc_coherent(dev->hw->device,
iwcq->kmem.size,
@@ -2240,8 +2672,9 @@ cq_free_rsrc:
/**
* irdma_get_mr_access - get hw MR access permissions from IB access flags
* @access: IB access flags
+ * @hw_rev: Hardware version
*/
-static inline u16 irdma_get_mr_access(int access)
+static inline u16 irdma_get_mr_access(int access, u8 hw_rev)
{
u16 hw_access = 0;
@@ -2251,8 +2684,10 @@ static inline u16 irdma_get_mr_access(int access)
IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0;
hw_access |= (access & IB_ACCESS_REMOTE_READ) ?
IRDMA_ACCESS_FLAGS_REMOTEREAD : 0;
- hw_access |= (access & IB_ACCESS_MW_BIND) ?
- IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0;
+ if (hw_rev >= IRDMA_GEN_3) {
+ hw_access |= (access & IB_ACCESS_MW_BIND) ?
+ IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0;
+ }
hw_access |= (access & IB_ZERO_BASED) ?
IRDMA_ACCESS_FLAGS_ZERO_BASED : 0;
hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD;
@@ -2463,6 +2898,7 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev,
struct irdma_mr *iwmr = iwpbl->iwmr;
struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
struct irdma_cq_mr *cqmr = &iwpbl->cq_mr;
+ struct irdma_srq_mr *srqmr = &iwpbl->srq_mr;
struct irdma_hmc_pble *hmc_p;
u64 *arr = iwmr->pgaddrmem;
u32 pg_size, total;
@@ -2482,7 +2918,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev,
total = req->sq_pages + req->rq_pages;
hmc_p = &qpmr->sq_pbl;
qpmr->shadow = (dma_addr_t)arr[total];
-
+ /* Need to use physical address for RQ of QP
+ * in case it is associated with SRQ.
+ */
+ qpmr->rq_pa = (dma_addr_t)arr[req->sq_pages];
if (lvl) {
ret = irdma_check_mem_contiguous(arr, req->sq_pages,
pg_size);
@@ -2502,6 +2941,18 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev,
hmc_p->addr = arr[req->sq_pages];
}
break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ hmc_p = &srqmr->srq_pbl;
+ srqmr->shadow = (dma_addr_t)arr[req->rq_pages];
+ if (lvl)
+ ret = irdma_check_mem_contiguous(arr, req->rq_pages,
+ pg_size);
+
+ if (!ret)
+ hmc_p->idx = palloc->level1.idx;
+ else
+ hmc_p->addr = arr[0];
+ break;
case IRDMA_MEMREG_TYPE_CQ:
hmc_p = &cqmr->cq_pbl;
@@ -2806,7 +3257,10 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
stag_info->stag_key = (u8)iwmr->stag;
stag_info->total_len = iwmr->len;
- stag_info->access_rights = irdma_get_mr_access(access);
+ stag_info->access_rights = irdma_get_mr_access(access,
+ iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev);
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ stag_info->remote_atomics_en = (access & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
stag_info->pd_id = iwpd->sc_pd.pd_id;
stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED)
@@ -2972,6 +3426,37 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
return 0;
}
+static int irdma_reg_user_mr_type_srq(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext;
+ unsigned long flags;
+ u32 total;
+ int err;
+ u8 lvl;
+
+ total = req.rq_pages + IRDMA_SHADOW_PGCNT;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ lvl = req.rq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->srq_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
static int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req,
struct ib_udata *udata,
struct irdma_mr *iwmr)
@@ -3063,6 +3548,12 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
goto error;
break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ err = irdma_reg_user_mr_type_srq(req, udata, iwmr);
+ if (err)
+ goto error;
+
+ break;
case IRDMA_MEMREG_TYPE_CQ:
err = irdma_reg_user_mr_type_cq(req, udata, iwmr);
if (err)
@@ -3106,9 +3597,9 @@ static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access);
if (IS_ERR(umem_dmabuf)) {
- err = PTR_ERR(umem_dmabuf);
- ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
- return ERR_PTR(err);
+ ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%pe]\n",
+ umem_dmabuf);
+ return ERR_CAST(umem_dmabuf);
}
iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM);
@@ -3382,6 +3873,14 @@ static void irdma_del_memlist(struct irdma_mr *iwmr,
}
spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+ break;
default:
break;
}
@@ -3461,6 +3960,40 @@ static int irdma_post_send(struct ib_qp *ibqp,
if (ib_wr->send_flags & IB_SEND_FENCE)
info.read_fence = true;
switch (ib_wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_ATOMIC_OPS))) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP;
+ info.op.atomic_compare_swap.tagged_offset = ib_wr->sg_list[0].addr;
+ info.op.atomic_compare_swap.remote_tagged_offset =
+ atomic_wr(ib_wr)->remote_addr;
+ info.op.atomic_compare_swap.swap_data_bytes = atomic_wr(ib_wr)->swap;
+ info.op.atomic_compare_swap.compare_data_bytes =
+ atomic_wr(ib_wr)->compare_add;
+ info.op.atomic_compare_swap.stag = ib_wr->sg_list[0].lkey;
+ info.op.atomic_compare_swap.remote_stag = atomic_wr(ib_wr)->rkey;
+ err = irdma_uk_atomic_compare_swap(ukqp, &info, false);
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_ATOMIC_OPS))) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD;
+ info.op.atomic_fetch_add.tagged_offset = ib_wr->sg_list[0].addr;
+ info.op.atomic_fetch_add.remote_tagged_offset =
+ atomic_wr(ib_wr)->remote_addr;
+ info.op.atomic_fetch_add.fetch_add_data_bytes =
+ atomic_wr(ib_wr)->compare_add;
+ info.op.atomic_fetch_add.stag = ib_wr->sg_list[0].lkey;
+ info.op.atomic_fetch_add.remote_stag =
+ atomic_wr(ib_wr)->rkey;
+ err = irdma_uk_atomic_fetch_add(ukqp, &info, false);
+ break;
case IB_WR_SEND_WITH_IMM:
if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) {
info.imm_data_valid = true;
@@ -3555,7 +4088,9 @@ static int irdma_post_send(struct ib_qp *ibqp,
stag_info.signaled = info.signaled;
stag_info.read_fence = info.read_fence;
- stag_info.access_rights = irdma_get_mr_access(reg_wr(ib_wr)->access);
+ stag_info.access_rights =
+ irdma_get_mr_access(reg_wr(ib_wr)->access,
+ dev->hw_attrs.uk_attrs.hw_rev);
stag_info.stag_key = reg_wr(ib_wr)->key & 0xff;
stag_info.stag_idx = reg_wr(ib_wr)->key >> 8;
stag_info.page_size = reg_wr(ib_wr)->mr->page_size;
@@ -3594,6 +4129,48 @@ static int irdma_post_send(struct ib_qp *ibqp,
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_post_srq_recv - post receive wr for kernel application
+ * @ibsrq: ib srq pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int irdma_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *ib_wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_srq_uk *uksrq = &iwsrq->sc_srq.srq_uk;
+ struct irdma_post_rq_info post_recv = {};
+ unsigned long flags;
+ int err = 0;
+
+ spin_lock_irqsave(&iwsrq->lock, flags);
+ while (ib_wr) {
+ if (ib_wr->num_sge > uksrq->max_srq_frag_cnt) {
+ err = -EINVAL;
+ goto out;
+ }
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ post_recv.sg_list = ib_wr->sg_list;
+ err = irdma_uk_srq_post_receive(uksrq, &post_recv);
+ if (err)
+ goto out;
+
+ ib_wr = ib_wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&iwsrq->lock, flags);
+
if (err)
*bad_wr = ib_wr;
@@ -3619,6 +4196,11 @@ static int irdma_post_recv(struct ib_qp *ibqp,
iwqp = to_iwqp(ibqp);
ukqp = &iwqp->sc_qp.qp_uk;
+ if (ukqp->srq_uk) {
+ *bad_wr = ib_wr;
+ return -EINVAL;
+ }
+
spin_lock_irqsave(&iwqp->lock, flags);
while (ib_wr) {
post_recv.num_sges = ib_wr->num_sge;
@@ -3671,6 +4253,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
return IB_WC_MW_BIND_ERR;
case FLUSH_REM_INV_REQ_ERR:
return IB_WC_REM_INV_REQ_ERR;
+ case FLUSH_RNR_RETRY_EXC_ERR:
+ return IB_WC_RNR_RETRY_EXC_ERR;
case FLUSH_FATAL_ERR:
default:
return IB_WC_FATAL_ERR;
@@ -3727,8 +4311,12 @@ static void irdma_process_cqe(struct ib_wc *entry,
if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
set_ib_wc_op_sq(cq_poll_info, entry);
} else {
- set_ib_wc_op_rq(cq_poll_info, entry,
- qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM);
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ set_ib_wc_op_rq(cq_poll_info, entry,
+ qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
+ true : false);
+ else
+ set_ib_wc_op_rq_gen_3(cq_poll_info, entry);
if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
cq_poll_info->stag_invalid_set) {
entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
@@ -3923,40 +4511,7 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
return ret;
}
-static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
- err = ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
- return 0;
-}
-
-static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
- err = ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
- return 0;
-}
-
-static const struct rdma_stat_desc irdma_hw_stat_names[] = {
+static const struct rdma_stat_desc irdma_hw_stat_descs[] = {
/* gen1 - 32-bit */
[IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards",
[IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts",
@@ -3964,9 +4519,6 @@ static const struct rdma_stat_desc irdma_hw_stat_names[] = {
[IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards",
[IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts",
[IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes",
- [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "tcpRetransSegs",
- [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "tcpInOptErrors",
- [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "tcpInProtoErrors",
[IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors",
/* gen1 - 64-bit */
[IRDMA_HW_STAT_INDEX_IP4RXOCTS].name = "ip4InOctets",
@@ -3985,16 +4537,14 @@ static const struct rdma_stat_desc irdma_hw_stat_names[] = {
[IRDMA_HW_STAT_INDEX_IP6TXPKTS].name = "ip6OutPkts",
[IRDMA_HW_STAT_INDEX_IP6TXFRAGS].name = "ip6OutSegRqd",
[IRDMA_HW_STAT_INDEX_IP6TXMCPKTS].name = "ip6OutMcastPkts",
- [IRDMA_HW_STAT_INDEX_TCPRXSEGS].name = "tcpInSegs",
- [IRDMA_HW_STAT_INDEX_TCPTXSEG].name = "tcpOutSegs",
- [IRDMA_HW_STAT_INDEX_RDMARXRDS].name = "iwInRdmaReads",
- [IRDMA_HW_STAT_INDEX_RDMARXSNDS].name = "iwInRdmaSends",
- [IRDMA_HW_STAT_INDEX_RDMARXWRS].name = "iwInRdmaWrites",
- [IRDMA_HW_STAT_INDEX_RDMATXRDS].name = "iwOutRdmaReads",
- [IRDMA_HW_STAT_INDEX_RDMATXSNDS].name = "iwOutRdmaSends",
- [IRDMA_HW_STAT_INDEX_RDMATXWRS].name = "iwOutRdmaWrites",
- [IRDMA_HW_STAT_INDEX_RDMAVBND].name = "iwRdmaBnd",
- [IRDMA_HW_STAT_INDEX_RDMAVINV].name = "iwRdmaInv",
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS].name = "InRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS].name = "InRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS].name = "InRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS].name = "OutRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS].name = "OutRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS].name = "OutRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMAVBND].name = "RdmaBnd",
+ [IRDMA_HW_STAT_INDEX_RDMAVINV].name = "RdmaInv",
/* gen2 - 32-bit */
[IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled",
@@ -4008,9 +4558,59 @@ static const struct rdma_stat_desc irdma_hw_stat_names[] = {
[IRDMA_HW_STAT_INDEX_UDPRXPKTS].name = "RxUDP",
[IRDMA_HW_STAT_INDEX_UDPTXPKTS].name = "TxUDP",
[IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS].name = "RxECNMrkd",
-
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "RetransSegs",
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "InOptErrors",
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "InProtoErrors",
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS].name = "InSegs",
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG].name = "OutSegs",
+
+ /* gen3 */
+ [IRDMA_HW_STAT_INDEX_RNR_SENT].name = "RNR sent",
+ [IRDMA_HW_STAT_INDEX_RNR_RCVD].name = "RNR received",
+ [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT].name = "ord limit count",
+ [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT].name = "ird limit count",
+ [IRDMA_HW_STAT_INDEX_RDMARXATS].name = "Rx atomics",
+ [IRDMA_HW_STAT_INDEX_RDMATXATS].name = "Tx atomics",
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR].name = "Nak Sequence Error",
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED].name = "Nak Sequence Error Implied",
+ [IRDMA_HW_STAT_INDEX_RTO].name = "RTO",
+ [IRDMA_HW_STAT_INDEX_RXOOOPKTS].name = "Rcvd Out of order packets",
+ [IRDMA_HW_STAT_INDEX_ICRCERR].name = "CRC errors",
};
+static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
static void irdma_get_dev_fw_str(struct ib_device *dev, char *str)
{
struct irdma_device *iwdev = to_iwdev(dev);
@@ -4034,7 +4634,7 @@ static struct rdma_hw_stats *irdma_alloc_hw_port_stats(struct ib_device *ibdev,
int num_counters = dev->hw_attrs.max_stat_idx;
unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
- return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters,
+ return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters,
lifespan);
}
@@ -4539,7 +5139,7 @@ static bool irdma_ah_exists(struct irdma_device *iwdev,
new_ah->sc_ah.ah_info.dest_ip_addr[2] ^
new_ah->sc_ah.ah_info.dest_ip_addr[3];
- hash_for_each_possible(iwdev->ah_hash_tbl, ah, list, key) {
+ hash_for_each_possible(iwdev->rf->ah_hash_tbl, ah, list, key) {
/* Set ah_valid and ah_id the same so memcmp can work */
new_ah->sc_ah.ah_info.ah_idx = ah->sc_ah.ah_info.ah_idx;
new_ah->sc_ah.ah_info.ah_valid = ah->sc_ah.ah_info.ah_valid;
@@ -4565,14 +5165,14 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags)
struct irdma_ah *ah = to_iwah(ibah);
if ((ah_flags & RDMA_DESTROY_AH_SLEEPABLE) && ah->parent_ah) {
- mutex_lock(&iwdev->ah_tbl_lock);
+ mutex_lock(&iwdev->rf->ah_tbl_lock);
if (!refcount_dec_and_test(&ah->parent_ah->refcnt)) {
- mutex_unlock(&iwdev->ah_tbl_lock);
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
return 0;
}
hash_del(&ah->parent_ah->list);
kfree(ah->parent_ah);
- mutex_unlock(&iwdev->ah_tbl_lock);
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
}
irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY,
@@ -4609,11 +5209,11 @@ static int irdma_create_user_ah(struct ib_ah *ibah,
err = irdma_setup_ah(ibah, attr);
if (err)
return err;
- mutex_lock(&iwdev->ah_tbl_lock);
+ mutex_lock(&iwdev->rf->ah_tbl_lock);
if (!irdma_ah_exists(iwdev, ah)) {
err = irdma_create_hw_ah(iwdev, ah, true);
if (err) {
- mutex_unlock(&iwdev->ah_tbl_lock);
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
return err;
}
/* Add new AH to list */
@@ -4625,11 +5225,11 @@ static int irdma_create_user_ah(struct ib_ah *ibah,
parent_ah->sc_ah.ah_info.dest_ip_addr[3];
ah->parent_ah = parent_ah;
- hash_add(iwdev->ah_hash_tbl, &parent_ah->list, key);
+ hash_add(iwdev->rf->ah_hash_tbl, &parent_ah->list, key);
refcount_set(&parent_ah->refcnt, 1);
}
}
- mutex_unlock(&iwdev->ah_tbl_lock);
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen));
@@ -4691,6 +5291,20 @@ static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev,
return IB_LINK_LAYER_ETHERNET;
}
+static const struct ib_device_ops irdma_gen1_dev_ops = {
+ .dealloc_driver = irdma_ib_dealloc_device,
+};
+
+static const struct ib_device_ops irdma_gen3_dev_ops = {
+ .alloc_mw = irdma_alloc_mw,
+ .create_srq = irdma_create_srq,
+ .dealloc_mw = irdma_dealloc_mw,
+ .destroy_srq = irdma_destroy_srq,
+ .modify_srq = irdma_modify_srq,
+ .post_srq_recv = irdma_post_srq_recv,
+ .query_srq = irdma_query_srq,
+};
+
static const struct ib_device_ops irdma_roce_dev_ops = {
.attach_mcast = irdma_attach_mcast,
.create_ah = irdma_create_ah,
@@ -4725,7 +5339,6 @@ static const struct ib_device_ops irdma_dev_ops = {
.alloc_hw_port_stats = irdma_alloc_hw_port_stats,
.alloc_mr = irdma_alloc_mr,
- .alloc_mw = irdma_alloc_mw,
.alloc_pd = irdma_alloc_pd,
.alloc_ucontext = irdma_alloc_ucontext,
.create_cq = irdma_create_cq,
@@ -4761,6 +5374,7 @@ static const struct ib_device_ops irdma_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_mw, irdma_mr, ibmw),
INIT_RDMA_OBJ_SIZE(ib_qp, irdma_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, irdma_srq, ibsrq),
};
/**
@@ -4808,6 +5422,10 @@ static void irdma_init_rdma_device(struct irdma_device *iwdev)
iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count;
iwdev->ibdev.dev.parent = &pcidev->dev;
ib_set_device_ops(&iwdev->ibdev, &irdma_dev_ops);
+ if (iwdev->rf->rdma_ver == IRDMA_GEN_1)
+ ib_set_device_ops(&iwdev->ibdev, &irdma_gen1_dev_ops);
+ if (iwdev->rf->rdma_ver >= IRDMA_GEN_3)
+ ib_set_device_ops(&iwdev->ibdev, &irdma_gen3_dev_ops);
}
/**
@@ -4879,5 +5497,9 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev)
struct irdma_device *iwdev = to_iwdev(ibdev);
irdma_rt_deinit_hw(iwdev);
- irdma_ctrl_deinit_hw(iwdev->rf);
+ if (!iwdev->is_vport) {
+ irdma_ctrl_deinit_hw(iwdev->rf);
+ if (iwdev->rf->vchnl_wq)
+ destroy_workqueue(iwdev->rf->vchnl_wq);
+ }
}
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
index cfa140b36395..ed21c1b56e8e 100644
--- a/drivers/infiniband/hw/irdma/verbs.h
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -8,6 +8,7 @@
#define IRDMA_PKEY_TBL_SZ 1
#define IRDMA_DEFAULT_PKEY 0xFFFF
+#define IRDMA_SHADOW_PGCNT 1
struct irdma_ucontext {
struct ib_ucontext ibucontext;
@@ -17,6 +18,8 @@ struct irdma_ucontext {
spinlock_t cq_reg_mem_list_lock; /* protect CQ memory list */
struct list_head qp_reg_mem_list;
spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */
+ struct list_head srq_reg_mem_list;
+ spinlock_t srq_reg_mem_list_lock; /* protect SRQ memory list */
int abi_ver;
u8 legacy_mode : 1;
u8 use_raw_attrs : 1;
@@ -65,10 +68,16 @@ struct irdma_cq_mr {
bool split;
};
+struct irdma_srq_mr {
+ struct irdma_hmc_pble srq_pbl;
+ dma_addr_t shadow;
+};
+
struct irdma_qp_mr {
struct irdma_hmc_pble sq_pbl;
struct irdma_hmc_pble rq_pbl;
dma_addr_t shadow;
+ dma_addr_t rq_pa;
struct page *sq_page;
};
@@ -85,6 +94,7 @@ struct irdma_pbl {
union {
struct irdma_qp_mr qp_mr;
struct irdma_cq_mr cq_mr;
+ struct irdma_srq_mr srq_mr;
};
bool pbl_allocated:1;
@@ -112,24 +122,33 @@ struct irdma_mr {
struct irdma_pbl iwpbl;
};
+struct irdma_srq {
+ struct ib_srq ibsrq;
+ struct irdma_sc_srq sc_srq __aligned(64);
+ struct irdma_dma_mem kmem;
+ u64 *srq_wrid_mem;
+ refcount_t refcnt;
+ spinlock_t lock; /* for poll srq */
+ struct irdma_pbl *iwpbl;
+ struct irdma_sge *sg_list;
+ u16 srq_head;
+ u32 srq_num;
+ u32 max_wr;
+ bool user_mode:1;
+};
+
struct irdma_cq {
struct ib_cq ibcq;
struct irdma_sc_cq sc_cq;
- u16 cq_head;
- u16 cq_size;
u16 cq_num;
bool user_mode;
atomic_t armed;
enum irdma_cmpl_notify last_notify;
- u32 polled_cmpls;
- u32 cq_mem_size;
struct irdma_dma_mem kmem;
struct irdma_dma_mem kmem_shadow;
struct completion free_cq;
refcount_t refcnt;
spinlock_t lock; /* for poll cq */
- struct irdma_pbl *iwpbl;
- struct irdma_pbl *iwpbl_shadow;
struct list_head resize_list;
struct irdma_cq_poll_info cur_cqe;
struct list_head cmpl_generated;
@@ -259,6 +278,12 @@ static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info,
case IRDMA_OP_TYPE_FAST_REG_NSMR:
entry->opcode = IB_WC_REG_MR;
break;
+ case IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP:
+ entry->opcode = IB_WC_COMP_SWAP;
+ break;
+ case IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD:
+ entry->opcode = IB_WC_FETCH_ADD;
+ break;
case IRDMA_OP_TYPE_INV_STAG:
entry->opcode = IB_WC_LOCAL_INV;
break;
@@ -267,6 +292,19 @@ static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info,
}
}
+static inline void set_ib_wc_op_rq_gen_3(struct irdma_cq_poll_info *info,
+ struct ib_wc *entry)
+{
+ switch (info->op_type) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ }
+}
+
static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info,
struct ib_wc *entry, bool send_imm_support)
{
diff --git a/drivers/infiniband/hw/irdma/virtchnl.c b/drivers/infiniband/hw/irdma/virtchnl.c
new file mode 100644
index 000000000000..16ad27247527
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/virtchnl.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "virtchnl.h"
+#include "ws.h"
+#include "i40iw_hw.h"
+#include "ig3rdma_hw.h"
+
+struct vchnl_reg_map_elem {
+ u16 reg_id;
+ u16 reg_idx;
+ bool pg_rel;
+};
+
+struct vchnl_regfld_map_elem {
+ u16 regfld_id;
+ u16 regfld_idx;
+};
+
+static struct vchnl_reg_map_elem vchnl_reg_map[] = {
+ {IRDMA_VCHNL_REG_ID_CQPTAIL, IRDMA_CQPTAIL, false},
+ {IRDMA_VCHNL_REG_ID_CQPDB, IRDMA_CQPDB, false},
+ {IRDMA_VCHNL_REG_ID_CCQPSTATUS, IRDMA_CCQPSTATUS, false},
+ {IRDMA_VCHNL_REG_ID_CCQPHIGH, IRDMA_CCQPHIGH, false},
+ {IRDMA_VCHNL_REG_ID_CCQPLOW, IRDMA_CCQPLOW, false},
+ {IRDMA_VCHNL_REG_ID_CQARM, IRDMA_CQARM, false},
+ {IRDMA_VCHNL_REG_ID_CQACK, IRDMA_CQACK, false},
+ {IRDMA_VCHNL_REG_ID_AEQALLOC, IRDMA_AEQALLOC, false},
+ {IRDMA_VCHNL_REG_ID_CQPERRCODES, IRDMA_CQPERRCODES, false},
+ {IRDMA_VCHNL_REG_ID_WQEALLOC, IRDMA_WQEALLOC, false},
+ {IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET, IRDMA_DB_ADDR_OFFSET, false },
+ {IRDMA_VCHNL_REG_ID_DYN_CTL, IRDMA_GLINT_DYN_CTL, false },
+ {IRDMA_VCHNL_REG_INV_ID, IRDMA_VCHNL_REG_INV_ID, false }
+};
+
+static struct vchnl_regfld_map_elem vchnl_regfld_map[] = {
+ {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR, IRDMA_CCQPSTATUS_CCQP_ERR_M},
+ {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE, IRDMA_CCQPSTATUS_CCQP_DONE_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID, IRDMA_CQPSQ_STAG_PDID_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID, IRDMA_CQPSQ_CQ_CEQID_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID, IRDMA_CQPSQ_CQ_CQID_M},
+ {IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT, IRDMA_COMMIT_FPM_CQCNT_M},
+ {IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID, IRDMA_CQPSQ_UPESD_HMCFNID_M},
+ {IRDMA_VCHNL_REGFLD_INV_ID, IRDMA_VCHNL_REGFLD_INV_ID}
+};
+
+#define IRDMA_VCHNL_REG_COUNT ARRAY_SIZE(vchnl_reg_map)
+#define IRDMA_VCHNL_REGFLD_COUNT ARRAY_SIZE(vchnl_regfld_map)
+#define IRDMA_VCHNL_REGFLD_BUF_SIZE \
+ (IRDMA_VCHNL_REG_COUNT * sizeof(struct irdma_vchnl_reg_info) + \
+ IRDMA_VCHNL_REGFLD_COUNT * sizeof(struct irdma_vchnl_reg_field_info))
+#define IRDMA_REGMAP_RESP_BUF_SIZE (IRDMA_VCHNL_RESP_MIN_SIZE + IRDMA_VCHNL_REGFLD_BUF_SIZE)
+
+/**
+ * irdma_sc_vchnl_init - Initialize dev virtchannel and get hw_rev
+ * @dev: dev structure to update
+ * @info: virtchannel info parameters to fill into the dev structure
+ */
+int irdma_sc_vchnl_init(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_init_info *info)
+{
+ dev->vchnl_up = true;
+ dev->privileged = info->privileged;
+ dev->is_pf = info->is_pf;
+ dev->hw_attrs.uk_attrs.hw_rev = info->hw_rev;
+
+ if (!dev->privileged) {
+ int ret = irdma_vchnl_req_get_ver(dev, IRDMA_VCHNL_CHNL_VER_MAX,
+ &dev->vchnl_ver);
+
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get Channel version ret = %d, version is %u\n",
+ ret, dev->vchnl_ver);
+
+ if (ret)
+ return ret;
+
+ ret = irdma_vchnl_req_get_caps(dev);
+ if (ret)
+ return ret;
+
+ dev->hw_attrs.uk_attrs.hw_rev = dev->vc_caps.hw_rev;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_verify_resp - Verify requested response size
+ * @vchnl_req: vchnl message requested
+ * @resp_len: response length sent from vchnl peer
+ */
+static int irdma_vchnl_req_verify_resp(struct irdma_vchnl_req *vchnl_req,
+ u16 resp_len)
+{
+ switch (vchnl_req->vchnl_msg->op_code) {
+ case IRDMA_VCHNL_OP_GET_VER:
+ case IRDMA_VCHNL_OP_GET_HMC_FCN:
+ case IRDMA_VCHNL_OP_PUT_HMC_FCN:
+ if (resp_len != vchnl_req->parm_len)
+ return -EBADMSG;
+ break;
+ case IRDMA_VCHNL_OP_GET_RDMA_CAPS:
+ if (resp_len < IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE)
+ return -EBADMSG;
+ break;
+ case IRDMA_VCHNL_OP_GET_REG_LAYOUT:
+ case IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP:
+ case IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP:
+ case IRDMA_VCHNL_OP_ADD_VPORT:
+ case IRDMA_VCHNL_OP_DEL_VPORT:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void irdma_free_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req)
+{
+ kfree(vchnl_req->vchnl_msg);
+}
+
+static int irdma_alloc_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req,
+ struct irdma_vchnl_req_init_info *info)
+{
+ struct irdma_vchnl_op_buf *vchnl_msg;
+
+ vchnl_msg = kzalloc(IRDMA_VCHNL_MAX_MSG_SIZE, GFP_KERNEL);
+
+ if (!vchnl_msg)
+ return -ENOMEM;
+
+ vchnl_msg->op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->buf_len = sizeof(*vchnl_msg) + info->req_parm_len;
+ if (info->req_parm_len)
+ memcpy(vchnl_msg->buf, info->req_parm, info->req_parm_len);
+ vchnl_msg->op_code = info->op_code;
+ vchnl_msg->op_ver = info->op_ver;
+
+ vchnl_req->vchnl_msg = vchnl_msg;
+ vchnl_req->parm = info->resp_parm;
+ vchnl_req->parm_len = info->resp_parm_len;
+
+ return 0;
+}
+
+static int irdma_vchnl_req_send_sync(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req_init_info *info)
+{
+ u16 resp_len = sizeof(dev->vc_recv_buf);
+ struct irdma_vchnl_req vchnl_req = {};
+ u16 msg_len;
+ u8 *msg;
+ int ret;
+
+ ret = irdma_alloc_vchnl_req_msg(&vchnl_req, info);
+ if (ret)
+ return ret;
+
+ msg_len = vchnl_req.vchnl_msg->buf_len;
+ msg = (u8 *)vchnl_req.vchnl_msg;
+
+ mutex_lock(&dev->vchnl_mutex);
+ ret = ig3rdma_vchnl_send_sync(dev, msg, msg_len, dev->vc_recv_buf,
+ &resp_len);
+ dev->vc_recv_len = resp_len;
+ if (ret)
+ goto exit;
+
+ ret = irdma_vchnl_req_get_resp(dev, &vchnl_req);
+exit:
+ mutex_unlock(&dev->vchnl_mutex);
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: virtual channel send %s caller: %pS ret=%d op=%u op_ver=%u req_len=%u parm_len=%u resp_len=%u\n",
+ !ret ? "SUCCEEDS" : "FAILS", __builtin_return_address(0),
+ ret, vchnl_req.vchnl_msg->op_code,
+ vchnl_req.vchnl_msg->op_ver, vchnl_req.vchnl_msg->buf_len,
+ vchnl_req.parm_len, vchnl_req.resp_len);
+ irdma_free_vchnl_req_msg(&vchnl_req);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_get_reg_layout - Get Register Layout
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev)
+{
+ u16 reg_idx, reg_id, tmp_reg_id, regfld_idx, regfld_id, tmp_regfld_id;
+ struct irdma_vchnl_reg_field_info *regfld_array = NULL;
+ u8 resp_buffer[IRDMA_REGMAP_RESP_BUF_SIZE] = {};
+ struct vchnl_regfld_map_elem *regfld_map_array;
+ struct irdma_vchnl_req_init_info info = {};
+ struct vchnl_reg_map_elem *reg_map_array;
+ struct irdma_vchnl_reg_info *reg_array;
+ u8 num_bits, shift_cnt;
+ u16 buf_len = 0;
+ u64 bitmask;
+ u32 rindex;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_REG_LAYOUT;
+ info.op_ver = IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0;
+ info.resp_parm = resp_buffer;
+ info.resp_parm_len = sizeof(resp_buffer);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ /* parse the response buffer and update reg info*/
+ /* Parse registers till invalid */
+ /* Parse register fields till invalid */
+ reg_array = (struct irdma_vchnl_reg_info *)resp_buffer;
+ for (rindex = 0; rindex < IRDMA_VCHNL_REG_COUNT; rindex++) {
+ buf_len += sizeof(struct irdma_vchnl_reg_info);
+ if (buf_len >= sizeof(resp_buffer))
+ return -ENOMEM;
+
+ regfld_array =
+ (struct irdma_vchnl_reg_field_info *)&reg_array[rindex + 1];
+ reg_id = reg_array[rindex].reg_id;
+ if (reg_id == IRDMA_VCHNL_REG_INV_ID)
+ break;
+
+ reg_id &= ~IRDMA_VCHNL_REG_PAGE_REL;
+ if (reg_id >= IRDMA_VCHNL_REG_COUNT)
+ return -EINVAL;
+
+ /* search regmap for register index in hw_regs.*/
+ reg_map_array = vchnl_reg_map;
+ do {
+ tmp_reg_id = reg_map_array->reg_id;
+ if (tmp_reg_id == reg_id)
+ break;
+
+ reg_map_array++;
+ } while (tmp_reg_id != IRDMA_VCHNL_REG_INV_ID);
+ if (tmp_reg_id != reg_id)
+ continue;
+
+ reg_idx = reg_map_array->reg_idx;
+
+ /* Page relative, DB Offset do not need bar offset */
+ if (reg_idx == IRDMA_DB_ADDR_OFFSET ||
+ (reg_array[rindex].reg_id & IRDMA_VCHNL_REG_PAGE_REL)) {
+ dev->hw_regs[reg_idx] =
+ (u32 __iomem *)(uintptr_t)reg_array[rindex].reg_offset;
+ continue;
+ }
+
+ /* Update the local HW struct */
+ dev->hw_regs[reg_idx] = ig3rdma_get_reg_addr(dev->hw,
+ reg_array[rindex].reg_offset);
+ if (!dev->hw_regs[reg_idx])
+ return -EINVAL;
+ }
+
+ if (!regfld_array)
+ return -ENOMEM;
+
+ /* set up doorbell variables using mapped DB page */
+ dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC];
+ dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM];
+ dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC];
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+
+ for (rindex = 0; rindex < IRDMA_VCHNL_REGFLD_COUNT; rindex++) {
+ buf_len += sizeof(struct irdma_vchnl_reg_field_info);
+ if ((buf_len - 1) > sizeof(resp_buffer))
+ break;
+
+ if (regfld_array[rindex].fld_id == IRDMA_VCHNL_REGFLD_INV_ID)
+ break;
+
+ regfld_id = regfld_array[rindex].fld_id;
+ regfld_map_array = vchnl_regfld_map;
+ do {
+ tmp_regfld_id = regfld_map_array->regfld_id;
+ if (tmp_regfld_id == regfld_id)
+ break;
+
+ regfld_map_array++;
+ } while (tmp_regfld_id != IRDMA_VCHNL_REGFLD_INV_ID);
+
+ if (tmp_regfld_id != regfld_id)
+ continue;
+
+ regfld_idx = regfld_map_array->regfld_idx;
+
+ num_bits = regfld_array[rindex].fld_bits;
+ shift_cnt = regfld_array[rindex].fld_shift;
+ if ((num_bits + shift_cnt > 64) || !num_bits) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: Invalid field mask id %d bits %d shift %d",
+ regfld_id, num_bits, shift_cnt);
+
+ continue;
+ }
+
+ bitmask = (1ULL << num_bits) - 1;
+ dev->hw_masks[regfld_idx] = bitmask << shift_cnt;
+ dev->hw_shifts[regfld_idx] = shift_cnt;
+ }
+
+ return 0;
+}
+
+int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id, struct irdma_qos *qos)
+{
+ struct irdma_vchnl_resp_vport_info resp_vport = { 0 };
+ struct irdma_vchnl_req_vport_info req_vport = { 0 };
+ struct irdma_vchnl_req_init_info info = { 0 };
+ int ret, i;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_ADD_VPORT;
+ info.op_ver = IRDMA_VCHNL_OP_ADD_VPORT_V0;
+ req_vport.vport_id = vport_id;
+ req_vport.qp1_id = qp1_id;
+ info.req_parm_len = sizeof(req_vport);
+ info.req_parm = &req_vport;
+ info.resp_parm = &resp_vport;
+ info.resp_parm_len = sizeof(resp_vport);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ qos[i].qs_handle = resp_vport.qs_handle[i];
+ qos[i].valid = true;
+ }
+
+ return 0;
+}
+
+int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id, u32 qp1_id)
+{
+ struct irdma_vchnl_req_init_info info = { 0 };
+ struct irdma_vchnl_req_vport_info req_vport = { 0 };
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_DEL_VPORT;
+ info.op_ver = IRDMA_VCHNL_OP_DEL_VPORT_V0;
+ req_vport.vport_id = vport_id;
+ req_vport.qp1_id = qp1_id;
+ info.req_parm_len = sizeof(req_vport);
+ info.req_parm = &req_vport;
+
+ return irdma_vchnl_req_send_sync(dev, &info);
+}
+
+/**
+ * irdma_vchnl_req_aeq_vec_map - Map AEQ to vector on this function
+ * @dev: RDMA device pointer
+ * @v_idx: vector index
+ */
+int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ struct irdma_vchnl_qvlist_info *qvl;
+ struct irdma_vchnl_qv_info *qv;
+ u16 qvl_size, num_vectors = 1;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ qvl_size = struct_size(qvl, qv_info, num_vectors);
+
+ qvl = kzalloc(qvl_size, GFP_KERNEL);
+ if (!qvl)
+ return -ENOMEM;
+
+ qvl->num_vectors = 1;
+ qv = qvl->qv_info;
+
+ qv->ceq_idx = IRDMA_Q_INVALID_IDX;
+ qv->v_idx = v_idx;
+ qv->itr_idx = IRDMA_IDX_ITR0;
+
+ info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP;
+ info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0;
+ info.req_parm = qvl;
+ info.req_parm_len = qvl_size;
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ kfree(qvl);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_ceq_vec_map - Map CEQ to vector on this function
+ * @dev: RDMA device pointer
+ * @ceq_id: CEQ index
+ * @v_idx: vector index
+ */
+int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id, u32 v_idx)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ struct irdma_vchnl_qvlist_info *qvl;
+ struct irdma_vchnl_qv_info *qv;
+ u16 qvl_size, num_vectors = 1;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ qvl_size = struct_size(qvl, qv_info, num_vectors);
+
+ qvl = kzalloc(qvl_size, GFP_KERNEL);
+ if (!qvl)
+ return -ENOMEM;
+
+ qvl->num_vectors = num_vectors;
+ qv = qvl->qv_info;
+
+ qv->aeq_idx = IRDMA_Q_INVALID_IDX;
+ qv->ceq_idx = ceq_id;
+ qv->v_idx = v_idx;
+ qv->itr_idx = IRDMA_IDX_ITR0;
+
+ info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP;
+ info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0;
+ info.req_parm = qvl;
+ info.req_parm_len = qvl_size;
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ kfree(qvl);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_get_ver - Request Channel version
+ * @dev: RDMA device pointer
+ * @ver_req: Virtual channel version requested
+ * @ver_res: Virtual channel version response
+ */
+int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req, u32 *ver_res)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_VER;
+ info.op_ver = ver_req;
+ info.resp_parm = ver_res;
+ info.resp_parm_len = sizeof(*ver_res);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ if (ret)
+ return ret;
+
+ if (*ver_res < IRDMA_VCHNL_CHNL_VER_MIN) {
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: %s unsupported vchnl version 0x%0x\n",
+ __func__, *ver_res);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_get_hmc_fcn - Request VF HMC Function
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_hmc_info req_hmc = {};
+ struct irdma_vchnl_resp_hmc_info resp_hmc = {};
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_HMC_FCN;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ info.op_ver = IRDMA_VCHNL_OP_GET_HMC_FCN_V2;
+ req_hmc.protocol_used = dev->protocol_used;
+ info.req_parm_len = sizeof(req_hmc);
+ info.req_parm = &req_hmc;
+ info.resp_parm = &resp_hmc;
+ info.resp_parm_len = sizeof(resp_hmc);
+ }
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ int i;
+
+ dev->hmc_fn_id = resp_hmc.hmc_func;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ dev->qos[i].qs_handle = resp_hmc.qs_handle[i];
+ dev->qos[i].valid = true;
+ }
+ }
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_put_hmc_fcn - Free VF HMC Function
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_init_info info = {};
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_PUT_HMC_FCN;
+ info.op_ver = IRDMA_VCHNL_OP_PUT_HMC_FCN_V0;
+
+ return irdma_vchnl_req_send_sync(dev, &info);
+}
+
+/**
+ * irdma_vchnl_req_get_caps - Request RDMA capabilities
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_RDMA_CAPS;
+ info.op_ver = IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0;
+ info.resp_parm = &dev->vc_caps;
+ info.resp_parm_len = sizeof(dev->vc_caps);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ if (dev->vc_caps.hw_rev > IRDMA_GEN_MAX ||
+ dev->vc_caps.hw_rev < IRDMA_GEN_2) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: %s unsupported hw_rev version 0x%0x\n",
+ __func__, dev->vc_caps.hw_rev);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_get_resp - Receive the inbound vchnl response.
+ * @dev: Dev pointer
+ * @vchnl_req: Vchannel request
+ */
+int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req *vchnl_req)
+{
+ struct irdma_vchnl_resp_buf *vchnl_msg_resp =
+ (struct irdma_vchnl_resp_buf *)dev->vc_recv_buf;
+ u16 resp_len;
+ int ret;
+
+ if ((uintptr_t)vchnl_req != (uintptr_t)vchnl_msg_resp->op_ctx) {
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: error vchnl context value does not match\n");
+ return -EBADMSG;
+ }
+
+ resp_len = dev->vc_recv_len - sizeof(*vchnl_msg_resp);
+ resp_len = min(resp_len, vchnl_req->parm_len);
+
+ ret = irdma_vchnl_req_verify_resp(vchnl_req, resp_len);
+ if (ret)
+ return ret;
+
+ ret = (int)vchnl_msg_resp->op_ret;
+ if (ret)
+ return ret;
+
+ vchnl_req->resp_len = 0;
+ if (vchnl_req->parm_len && vchnl_req->parm && resp_len) {
+ memcpy(vchnl_req->parm, vchnl_msg_resp->buf, resp_len);
+ vchnl_req->resp_len = resp_len;
+ ibdev_dbg(to_ibdev(dev), "VIRT: Got response, data size %u\n",
+ resp_len);
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/virtchnl.h b/drivers/infiniband/hw/irdma/virtchnl.h
new file mode 100644
index 000000000000..aa955a9125bd
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/virtchnl.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+#ifndef IRDMA_VIRTCHNL_H
+#define IRDMA_VIRTCHNL_H
+
+#include "hmc.h"
+#include "irdma.h"
+
+/* IRDMA_VCHNL_CHNL_VER_V0 is for legacy hw, no longer supported. */
+#define IRDMA_VCHNL_CHNL_VER_V2 2
+#define IRDMA_VCHNL_CHNL_VER_MIN IRDMA_VCHNL_CHNL_VER_V2
+#define IRDMA_VCHNL_CHNL_VER_MAX IRDMA_VCHNL_CHNL_VER_V2
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V0 0
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V1 1
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V2 2
+#define IRDMA_VCHNL_OP_PUT_HMC_FCN_V0 0
+#define IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0 0
+#define IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0 0
+#define IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP_V0 0
+#define IRDMA_VCHNL_OP_ADD_VPORT_V0 0
+#define IRDMA_VCHNL_OP_DEL_VPORT_V0 0
+#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0 0
+#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE 1
+
+#define IRDMA_VCHNL_REG_ID_CQPTAIL 0
+#define IRDMA_VCHNL_REG_ID_CQPDB 1
+#define IRDMA_VCHNL_REG_ID_CCQPSTATUS 2
+#define IRDMA_VCHNL_REG_ID_CCQPHIGH 3
+#define IRDMA_VCHNL_REG_ID_CCQPLOW 4
+#define IRDMA_VCHNL_REG_ID_CQARM 5
+#define IRDMA_VCHNL_REG_ID_CQACK 6
+#define IRDMA_VCHNL_REG_ID_AEQALLOC 7
+#define IRDMA_VCHNL_REG_ID_CQPERRCODES 8
+#define IRDMA_VCHNL_REG_ID_WQEALLOC 9
+#define IRDMA_VCHNL_REG_ID_IPCONFIG0 10
+#define IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET 11
+#define IRDMA_VCHNL_REG_ID_DYN_CTL 12
+#define IRDMA_VCHNL_REG_ID_AEQITRMASK 13
+#define IRDMA_VCHNL_REG_ID_CEQITRMASK 14
+#define IRDMA_VCHNL_REG_INV_ID 0xFFFF
+#define IRDMA_VCHNL_REG_PAGE_REL 0x8000
+
+#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR 2
+#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE 5
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID 6
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID 7
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID 8
+#define IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT 9
+#define IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID 10
+#define IRDMA_VCHNL_REGFLD_INV_ID 0xFFFF
+
+#define IRDMA_VCHNL_RESP_MIN_SIZE (sizeof(struct irdma_vchnl_resp_buf))
+
+enum irdma_vchnl_ops {
+ IRDMA_VCHNL_OP_GET_VER = 0,
+ IRDMA_VCHNL_OP_GET_HMC_FCN = 1,
+ IRDMA_VCHNL_OP_PUT_HMC_FCN = 2,
+ IRDMA_VCHNL_OP_GET_REG_LAYOUT = 11,
+ IRDMA_VCHNL_OP_GET_RDMA_CAPS = 13,
+ IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP = 14,
+ IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP = 15,
+ IRDMA_VCHNL_OP_ADD_VPORT = 16,
+ IRDMA_VCHNL_OP_DEL_VPORT = 17,
+};
+
+struct irdma_vchnl_req_hmc_info {
+ u8 protocol_used;
+ u8 disable_qos;
+} __packed;
+
+struct irdma_vchnl_resp_hmc_info {
+ u16 hmc_func;
+ u16 qs_handle[IRDMA_MAX_USER_PRIORITY];
+} __packed;
+
+struct irdma_vchnl_qv_info {
+ u32 v_idx;
+ u16 ceq_idx;
+ u16 aeq_idx;
+ u8 itr_idx;
+};
+
+struct irdma_vchnl_qvlist_info {
+ u32 num_vectors;
+ struct irdma_vchnl_qv_info qv_info[];
+};
+
+struct irdma_vchnl_req_vport_info {
+ u16 vport_id;
+ u32 qp1_id;
+};
+
+struct irdma_vchnl_resp_vport_info {
+ u16 qs_handle[IRDMA_MAX_USER_PRIORITY];
+};
+
+struct irdma_vchnl_op_buf {
+ u16 op_code;
+ u16 op_ver;
+ u16 buf_len;
+ u16 rsvd;
+ u64 op_ctx;
+ u8 buf[];
+} __packed;
+
+struct irdma_vchnl_resp_buf {
+ u64 op_ctx;
+ u16 buf_len;
+ s16 op_ret;
+ u16 rsvd[2];
+ u8 buf[];
+} __packed;
+
+struct irdma_vchnl_rdma_caps {
+ u8 hw_rev;
+ u16 cqp_timeout_s;
+ u16 cqp_def_timeout_s;
+ u16 max_hw_push_len;
+} __packed;
+
+struct irdma_vchnl_init_info {
+ struct workqueue_struct *vchnl_wq;
+ enum irdma_vers hw_rev;
+ bool privileged;
+ bool is_pf;
+};
+
+struct irdma_vchnl_reg_info {
+ u32 reg_offset;
+ u16 field_cnt;
+ u16 reg_id; /* High bit of reg_id: bar or page relative */
+};
+
+struct irdma_vchnl_reg_field_info {
+ u8 fld_shift;
+ u8 fld_bits;
+ u16 fld_id;
+};
+
+struct irdma_vchnl_req {
+ struct irdma_vchnl_op_buf *vchnl_msg;
+ void *parm;
+ u32 vf_id;
+ u16 parm_len;
+ u16 resp_len;
+};
+
+struct irdma_vchnl_req_init_info {
+ void *req_parm;
+ void *resp_parm;
+ u16 req_parm_len;
+ u16 resp_parm_len;
+ u16 op_code;
+ u16 op_ver;
+} __packed;
+
+struct irdma_qos;
+
+int irdma_sc_vchnl_init(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_init_info *info);
+int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req,
+ u32 *ver_res);
+int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req *vc_req);
+int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx);
+int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id,
+ u32 v_idx);
+int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id, struct irdma_qos *qos);
+int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id);
+#endif /* IRDMA_VIRTCHNL_H */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 28e154bbb50f..1becc8779123 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -291,6 +291,32 @@ out:
return wc_index;
}
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, MANA_GSI_QPN, false);
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ struct mana_ib_cq *cq;
+ unsigned long flags;
+
+ if (!qp)
+ return;
+
+ cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while ((shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq))
+ != NULL) {
+ shadow_wqe->header.error_code = IB_WC_GENERAL_ERR;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+
+ mana_put_qp_ref(qp);
+}
+
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index fa60872f169f..bdeddb642b87 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -230,6 +230,9 @@ static void mana_ib_remove(struct auxiliary_device *adev)
{
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+ if (mana_ib_is_rnic(dev))
+ mana_drain_gsi_sqs(dev);
+
ib_unregister_device(&dev->ib_dev);
dma_pool_destroy(dev->av_pool);
if (mana_ib_is_rnic(dev)) {
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 6a2471f2e804..fac159f7128d 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -273,9 +273,8 @@ int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem)) {
- err = PTR_ERR(umem);
- ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %d\n", err);
- return err;
+ ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
+ return PTR_ERR(umem);
}
err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 5d31034ac7fb..9d36232ed880 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -43,6 +43,8 @@
*/
#define MANA_AV_BUFFER_SIZE 64
+#define MANA_GSI_QPN (1)
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -410,7 +412,7 @@ struct mana_ib_ah_attr {
u8 traffic_class;
u16 src_port;
u16 dest_port;
- u32 reserved;
+ u32 flow_label;
};
struct mana_rnic_set_qp_state_req {
@@ -427,8 +429,15 @@ struct mana_rnic_set_qp_state_req {
u32 retry_cnt;
u32 rnr_retry;
u32 min_rnr_timer;
- u32 reserved;
+ u32 rate_limit;
struct mana_ib_ah_attr ah_attr;
+ u64 reserved1;
+ u32 qkey;
+ u32 qp_access_flags;
+ u8 local_ack_timeout;
+ u8 max_rd_atomic;
+ u16 reserved2;
+ u32 reserved3;
}; /* HW Data */
struct mana_rnic_set_qp_state_resp {
@@ -718,6 +727,7 @@ int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev);
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 55701046ffba..3d0245a4c1ed 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -138,7 +138,8 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
ibdev_dbg(ibdev,
- "Failed to get umem for register user-mr, %d\n", err);
+ "Failed to get umem for register user-mr, %pe\n",
+ mr->umem);
goto err_free;
}
@@ -220,7 +221,8 @@ struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 leng
umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
if (IS_ERR(umem_dmabuf)) {
err = PTR_ERR(umem_dmabuf);
- ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %pe\n",
+ umem_dmabuf);
goto err_free;
}
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index a6bf4d539e67..48c1f4977f21 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -735,6 +735,8 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
+
+ req.hdr.req.msg_version = GDMA_MESSAGE_V3;
req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.qp_handle = qp->qp_handle;
@@ -748,6 +750,12 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
req.retry_cnt = attr->retry_cnt;
req.rnr_retry = attr->rnr_retry;
req.min_rnr_timer = attr->min_rnr_timer;
+ req.rate_limit = attr->rate_limit;
+ req.qkey = attr->qkey;
+ req.local_ack_timeout = attr->timeout;
+ req.qp_access_flags = attr->qp_access_flags;
+ req.max_rd_atomic = attr->max_rd_atomic;
+
if (attr_mask & IB_QP_AV) {
ndev = mana_ib_get_netdev(&mdev->ib_dev, ibqp->port);
if (!ndev) {
@@ -774,6 +782,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ibqp->qp_num, attr->dest_qp_num);
req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2;
req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
+ req.ah_attr.flow_label = attr->ah_attr.grh.flow_label;
}
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index e6e132f10625..91c714f72099 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1836,9 +1836,9 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
if (IS_ERR(tun_qp->qp)) {
ret = PTR_ERR(tun_qp->qp);
+ pr_err("Couldn't create %s QP (%pe)\n",
+ create_tun ? "tunnel" : "special", tun_qp->qp);
tun_qp->qp = NULL;
- pr_err("Couldn't create %s QP (%d)\n",
- create_tun ? "tunnel" : "special", ret);
return ret;
}
@@ -2017,14 +2017,14 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
NULL, ctx, &cq_attr);
if (IS_ERR(ctx->cq)) {
ret = PTR_ERR(ctx->cq);
- pr_err("Couldn't create tunnel CQ (%d)\n", ret);
+ pr_err("Couldn't create tunnel CQ (%pe)\n", ctx->cq);
goto err_buf;
}
ctx->pd = ib_alloc_pd(ctx->ib_dev, 0);
if (IS_ERR(ctx->pd)) {
ret = PTR_ERR(ctx->pd);
- pr_err("Couldn't create tunnel PD (%d)\n", ret);
+ pr_err("Couldn't create tunnel PD (%pe)\n", ctx->pd);
goto err_cq;
}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 50fd407103c7..f2887ae6390e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1652,7 +1652,8 @@ int mlx4_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
if (IS_ERR(sqp->roce_v2_gsi)) {
- pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ pr_err("Failed to create GSI QP for RoCEv2 (%pe)\n",
+ sqp->roce_v2_gsi);
sqp->roce_v2_gsi = NULL;
} else {
to_mqp(sqp->roce_v2_gsi)->flags |=
diff --git a/drivers/infiniband/hw/mlx5/data_direct.c b/drivers/infiniband/hw/mlx5/data_direct.c
index b9ba84afaae2..b81ac5709b56 100644
--- a/drivers/infiniband/hw/mlx5/data_direct.c
+++ b/drivers/infiniband/hw/mlx5/data_direct.c
@@ -35,7 +35,7 @@ static int mlx5_data_direct_vpd_get_vuid(struct mlx5_data_direct_dev *dev)
vpd_data = pci_vpd_alloc(pdev, &vpd_size);
if (IS_ERR(vpd_data)) {
- pci_err(pdev, "Unable to read VPD, err=%ld\n", PTR_ERR(vpd_data));
+ pci_err(pdev, "Unable to read VPD, err=%pe\n", vpd_data);
return PTR_ERR(vpd_data);
}
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index b804f2dd5628..d5487834ed25 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -131,8 +131,9 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
IB_POLL_SOFTIRQ);
if (IS_ERR(gsi->cq)) {
- mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
- PTR_ERR(gsi->cq));
+ mlx5_ib_warn(dev,
+ "unable to create send CQ for GSI QP. error %pe\n",
+ gsi->cq);
ret = PTR_ERR(gsi->cq);
goto err_free_wrs;
}
@@ -147,8 +148,9 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
if (IS_ERR(gsi->rx_qp)) {
- mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
- PTR_ERR(gsi->rx_qp));
+ mlx5_ib_warn(dev,
+ "unable to create hardware GSI QP. error %pe\n",
+ gsi->rx_qp);
ret = PTR_ERR(gsi->rx_qp);
goto err_destroy_cq;
}
@@ -294,8 +296,9 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
qp = create_gsi_ud_qp(gsi);
if (IS_ERR(qp)) {
- mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
- PTR_ERR(qp));
+ mlx5_ib_warn(dev,
+ "unable to create hardware UD QP for GSI: %pe\n",
+ qp);
return;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d456e4fde3e1..fc1e86f6c409 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -13,6 +13,7 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
+#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
@@ -883,6 +884,51 @@ static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev,
resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
}
+/*
+ * Calculate maximum SQ overhead across all QP types.
+ * Other QP types (REG_UMR, UC, RC, UD/SMI/GSI, XRC_TGT)
+ * have smaller overhead than the types calculated below,
+ * so they are implicitly included.
+ */
+static u32 mlx5_ib_calc_max_sq_overhead(void)
+{
+ u32 max_overhead_xrc, overhead_ud_lso, a, b;
+
+ /* XRC_INI */
+ max_overhead_xrc = sizeof(struct mlx5_wqe_xrc_seg);
+ max_overhead_xrc += sizeof(struct mlx5_wqe_ctrl_seg);
+ a = sizeof(struct mlx5_wqe_atomic_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg);
+ b = sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg) +
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD / MLX5_IB_UMR_OCTOWORD;
+ max_overhead_xrc += max(a, b);
+
+ /* UD with LSO */
+ overhead_ud_lso = sizeof(struct mlx5_wqe_ctrl_seg);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_eth_pad);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_eth_seg);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_datagram_seg);
+
+ return max(max_overhead_xrc, overhead_ud_lso);
+}
+
+static u32 mlx5_ib_calc_max_qp_wr(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ u32 max_wqe_bb_units = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ u32 max_wqe_size;
+ /* max QP overhead + 1 SGE, no inline, no special features */
+ max_wqe_size = mlx5_ib_calc_max_sq_overhead() +
+ sizeof(struct mlx5_wqe_data_seg);
+
+ max_wqe_size = roundup_pow_of_two(max_wqe_size);
+
+ max_wqe_size = ALIGN(max_wqe_size, MLX5_SEND_WQE_BB);
+
+ return (max_wqe_bb_units * MLX5_SEND_WQE_BB) / max_wqe_size;
+}
+
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
@@ -1041,7 +1087,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_mr_size = ~0ull;
props->page_size_cap = ~(min_page_size - 1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
- props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ props->max_qp_wr = mlx5_ib_calc_max_qp_wr(dev);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
@@ -1793,7 +1839,8 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
}
static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
- struct mlx5_core_dev *slave)
+ struct mlx5_core_dev *slave,
+ struct mlx5_ib_lb_state *lb_state)
{
int err;
@@ -1805,6 +1852,7 @@ static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
if (err)
goto out;
+ lb_state->force_enable = true;
return 0;
out:
@@ -1813,16 +1861,22 @@ out:
}
static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
- struct mlx5_core_dev *slave)
+ struct mlx5_core_dev *slave,
+ struct mlx5_ib_lb_state *lb_state)
{
mlx5_nic_vport_update_local_lb(slave, false);
mlx5_nic_vport_update_local_lb(master, false);
+
+ lb_state->force_enable = false;
}
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
+ if (dev->lb.force_enable)
+ return 0;
+
mutex_lock(&dev->lb.mutex);
if (td)
dev->lb.user_td++;
@@ -1844,6 +1898,9 @@ int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
+ if (dev->lb.force_enable)
+ return;
+
mutex_lock(&dev->lb.mutex);
if (td)
dev->lb.user_td--;
@@ -2994,14 +3051,16 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
- mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%d\n", ret);
+ mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%pe\n",
+ pd);
goto unlock;
}
cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
- mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%d\n", ret);
+ mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%pe\n",
+ cq);
ib_dealloc_pd(pd);
goto unlock;
}
@@ -3045,7 +3104,9 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
s0 = ib_create_srq(devr->p0, &attr);
if (IS_ERR(s0)) {
ret = PTR_ERR(s0);
- mlx5_ib_err(dev, "Couldn't create SRQ 0 for res init, err=%d\n", ret);
+ mlx5_ib_err(dev,
+ "Couldn't create SRQ 0 for res init, err=%pe\n",
+ s0);
goto unlock;
}
@@ -3057,7 +3118,9 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
s1 = ib_create_srq(devr->p0, &attr);
if (IS_ERR(s1)) {
ret = PTR_ERR(s1);
- mlx5_ib_err(dev, "Couldn't create SRQ 1 for res init, err=%d\n", ret);
+ mlx5_ib_err(dev,
+ "Couldn't create SRQ 1 for res init, err=%pe\n",
+ s1);
ib_destroy_srq(s0);
}
@@ -3118,6 +3181,7 @@ mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_core_dev *mdev = dev->mdev;
+ bool ro_supp = false;
void *mkc;
u32 mkey;
u32 pdn;
@@ -3146,14 +3210,37 @@ mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev)
MLX5_SET(mkc, mkc, length64, 1);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
- kvfree(in);
if (err)
- goto err;
+ goto err_mkey;
dev->ddr.mkey = mkey;
dev->ddr.pdn = pdn;
+
+ /* create another mkey with RO support */
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) {
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
+ ro_supp = true;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) {
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
+ ro_supp = true;
+ }
+
+ if (ro_supp) {
+ err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
+ /* RO is defined as best effort */
+ if (!err) {
+ dev->ddr.mkey_ro = mkey;
+ dev->ddr.mkey_ro_valid = true;
+ }
+ }
+
+ kvfree(in);
return 0;
+err_mkey:
+ kvfree(in);
err:
mlx5_core_dealloc_pd(mdev, pdn);
return err;
@@ -3162,6 +3249,10 @@ err:
static void
mlx5_ib_free_data_direct_resources(struct mlx5_ib_dev *dev)
{
+
+ if (dev->ddr.mkey_ro_valid)
+ mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey_ro);
+
mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey);
mlx5_core_dealloc_pd(dev->mdev, dev->ddr.pdn);
}
@@ -3523,7 +3614,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
- mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
@@ -3620,7 +3711,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
- err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
if (err)
goto unbind;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 7ffc7ee92cf0..09d82d5f95e3 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -854,6 +854,8 @@ struct mlx5_ib_port_resources {
struct mlx5_data_direct_resources {
u32 pdn;
u32 mkey;
+ u32 mkey_ro;
+ u8 mkey_ro_valid :1;
};
struct mlx5_ib_resources {
@@ -1109,6 +1111,7 @@ struct mlx5_ib_lb_state {
u32 user_td;
int qps;
bool enabled;
+ bool force_enable;
};
struct mlx5_ib_pf_eq {
@@ -1802,6 +1805,10 @@ mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
bitmap = GENMASK_ULL(max_log_entity_size_cap, min_log_entity_size_cap);
+ /* In KSM mode HW requires IOVA and mkey's page size to be aligned */
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KSM && iova)
+ bitmap &= GENMASK_ULL(__ffs64(iova), 0);
+
return ib_umem_find_best_pgsz(umem, bitmap, iova);
}
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1317f2cb38a4..325fa04cbe8a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1652,8 +1652,7 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
fd, access_flags);
if (IS_ERR(umem_dmabuf)) {
- mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
- PTR_ERR(umem_dmabuf));
+ mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf);
return ERR_CAST(umem_dmabuf);
}
@@ -1717,11 +1716,11 @@ reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
goto end;
}
- /* The device's 'data direct mkey' was created without RO flags to
- * simplify things and allow for a single mkey per device.
- * Since RO is not a must, mask it out accordingly.
+ /* If no device's 'data direct mkey' with RO flags exists
+ * mask it out accordingly.
*/
- access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
+ if (!dev->ddr.mkey_ro_valid)
+ access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
offset, length, virt_addr, fd,
access_flags, MLX5_MKC_ACCESS_MODE_KSM,
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 7ef35cddce81..4e562e0dd9e1 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -761,7 +761,11 @@ _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd,
if (dd) {
cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
- cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
+ if (mr->access_flags & IB_ACCESS_RELAXED_ORDERING &&
+ dev->ddr.mkey_ro_valid)
+ cur_ksm->key = cpu_to_be32(dev->ddr.mkey_ro);
+ else
+ cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
if (mr->umem->is_dmabuf &&
(flags & MLX5_IB_UPD_XLT_ZAP)) {
cur_ksm->va = 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index e825e2ef7966..134a79eecfcb 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -492,7 +492,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
{
u32 i, offset, max_scan, qpn;
struct rvt_qpn_map *map;
- u32 ret;
+ int ret;
u32 max_qpn = exclude_prefix == RVT_AIP_QP_PREFIX ?
RVT_AIP_QPN_MAX : RVT_QPN_MAX;
@@ -510,7 +510,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
else
qpt->flags |= n;
spin_unlock(&qpt->lock);
- goto bail;
+
+ return ret;
}
qpn = qpt->last + qpt->incr;
@@ -530,7 +531,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
if (!test_and_set_bit(offset, map->page)) {
qpt->last = qpn;
ret = qpn;
- goto bail;
+
+ return ret;
}
offset += qpt->incr;
/*
@@ -565,10 +567,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
qpn = mk_qpn(qpt, map, offset);
}
- ret = -ENOMEM;
-
-bail:
- return ret;
+ return -ENOMEM;
}
/**
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 6f8f353e9583..f522820b950c 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -132,8 +132,12 @@ static void do_task(struct rxe_task *task)
* yield the cpu and reschedule the task
*/
if (!ret) {
- task->state = TASK_STATE_IDLE;
- resched = 1;
+ if (task->state != TASK_STATE_DRAINING) {
+ task->state = TASK_STATE_IDLE;
+ resched = 1;
+ } else {
+ cont = 1;
+ }
goto exit;
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 35c3bde0d00a..efa2f097b582 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -769,7 +769,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
struct siw_wqe *wqe = tx_wqe(qp);
unsigned long flags;
- int rv = 0;
+ int rv = 0, imm_err = 0;
if (wr && !rdma_is_kernel_res(&qp->base_qp.res)) {
siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
@@ -955,9 +955,17 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
* Send directly if SQ processing is not in progress.
* Eventual immediate errors (rv < 0) do not affect the involved
* RI resources (Verbs, 8.3.1) and thus do not prevent from SQ
- * processing, if new work is already pending. But rv must be passed
- * to caller.
+ * processing, if new work is already pending. But rv and pointer
+ * to failed work request must be passed to caller.
*/
+ if (unlikely(rv < 0)) {
+ /*
+ * Immediate error
+ */
+ siw_dbg_qp(qp, "Immediate error %d\n", rv);
+ imm_err = rv;
+ *bad_wr = wr;
+ }
if (wqe->wr_status != SIW_WR_IDLE) {
spin_unlock_irqrestore(&qp->sq_lock, flags);
goto skip_direct_sending;
@@ -982,15 +990,10 @@ skip_direct_sending:
up_read(&qp->state_lock);
- if (rv >= 0)
- return 0;
- /*
- * Immediate error
- */
- siw_dbg_qp(qp, "error %d\n", rv);
+ if (unlikely(imm_err))
+ return imm_err;
- *bad_wr = wr;
- return rv;
+ return (rv >= 0) ? 0 : rv;
}
/*
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7acafc5c0e09..5b4d76e97437 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -351,26 +351,27 @@ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr,
}
/*
- * Find the master net_device on top of the given net_device.
+ * Find the L2 master net_device on top of the given net_device.
* @dev: base IPoIB net_device
*
- * Returns the master net_device with a reference held, or the same net_device
- * if no master exists.
+ * Returns the L2 master net_device with reference held if the L2 master
+ * exists (such as bond netdevice), or returns same netdev with reference
+ * held when master does not exist or when L3 master (such as VRF netdev).
*/
static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
{
struct net_device *master;
rcu_read_lock();
+
master = netdev_master_upper_dev_get_rcu(dev);
+ if (!master || netif_is_l3_master(master))
+ master = dev;
+
dev_hold(master);
rcu_read_unlock();
- if (master)
- return master;
-
- dev_hold(dev);
- return dev;
+ return master;
}
struct ipoib_walk_data {
@@ -522,7 +523,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
if (ret)
return NULL;
- /* See if we can find a unique device matching the L2 parameters */
+ /* See if we can find a unique device matching the pkey and GID */
matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
gid, NULL, &net_dev);
@@ -535,7 +536,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
dev_put(net_dev);
- /* Couldn't find a unique device with L2 parameters only. Use L3
+ /* Couldn't find a unique device with pkey and GID only. Use L3
* address to uniquely match the net device */
matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
gid, addr, &net_dev);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 5dfb4644446b..71269446353d 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -667,9 +667,9 @@ static int srpt_refresh_port(struct srpt_port *sport)
srpt_mad_recv_handler,
sport, 0);
if (IS_ERR(mad_agent)) {
- pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n",
+ pr_err("%s-%d: MAD agent registration failed (%pe). Note: this is expected if SR-IOV is enabled.\n",
dev_name(&sport->sdev->device->dev), sport->port,
- PTR_ERR(mad_agent));
+ mad_agent);
sport->mad_agent = NULL;
memset(&port_modify, 0, sizeof(port_modify));
port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
@@ -1865,8 +1865,8 @@ retry:
IB_POLL_WORKQUEUE);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
- pr_err("failed to create CQ cqe= %d ret= %d\n",
- ch->rq_size + sq_size, ret);
+ pr_err("failed to create CQ cqe= %d ret= %pe\n",
+ ch->rq_size + sq_size, ch->cq);
goto out;
}
ch->cq_size = ch->rq_size + sq_size;
@@ -3132,7 +3132,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
WARN_ON_ONCE(sdev->srq);
srq = ib_create_srq(sdev->pd, &srq_attr);
if (IS_ERR(srq)) {
- pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(srq));
+ pr_debug("ib_create_srq() failed: %pe\n", srq);
return PTR_ERR(srq);
}
@@ -3236,8 +3236,7 @@ static int srpt_add_one(struct ib_device *device)
if (rdma_port_get_link_layer(device, 1) == IB_LINK_LAYER_INFINIBAND)
sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
if (IS_ERR(sdev->cm_id)) {
- pr_info("ib_create_cm_id() failed: %ld\n",
- PTR_ERR(sdev->cm_id));
+ pr_info("ib_create_cm_id() failed: %pe\n", sdev->cm_id);
ret = PTR_ERR(sdev->cm_id);
sdev->cm_id = NULL;
if (!rdma_cm_id)
@@ -3687,8 +3686,7 @@ static struct rdma_cm_id *srpt_create_rdma_id(struct sockaddr *listen_addr)
rdma_cm_id = rdma_create_id(&init_net, srpt_rdma_cm_handler,
NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(rdma_cm_id)) {
- pr_err("RDMA/CM ID creation failed: %ld\n",
- PTR_ERR(rdma_cm_id));
+ pr_err("RDMA/CM ID creation failed: %pe\n", rdma_cm_id);
goto out;
}
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 95f63c5f6159..a698a2e7ce2a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -792,6 +792,11 @@ struct amd_iommu {
u32 flags;
volatile u64 *cmd_sem;
atomic64_t cmd_sem_val;
+ /*
+ * Track physical address to directly use it in build_completion_wait()
+ * and avoid adding any special checks and handling for kdump.
+ */
+ u64 cmd_sem_paddr;
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index ba9e582a8bbe..f2991c11867c 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -406,6 +406,9 @@ static void iommu_set_device_table(struct amd_iommu *iommu)
BUG_ON(iommu->mmio_base == NULL);
+ if (is_kdump_kernel())
+ return;
+
entry = iommu_virt_to_phys(dev_table);
entry |= (dev_table_size >> 12) - 1;
memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
@@ -646,7 +649,10 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
{
- iommu_free_pages(pci_seg->dev_table);
+ if (is_kdump_kernel())
+ memunmap((void *)pci_seg->dev_table);
+ else
+ iommu_free_pages(pci_seg->dev_table);
pci_seg->dev_table = NULL;
}
@@ -710,6 +716,26 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
pci_seg->alias_table = NULL;
}
+static inline void *iommu_memremap(unsigned long paddr, size_t size)
+{
+ phys_addr_t phys;
+
+ if (!paddr)
+ return NULL;
+
+ /*
+ * Obtain true physical address in kdump kernel when SME is enabled.
+ * Currently, previous kernel with SME enabled and kdump kernel
+ * with SME support disabled is not supported.
+ */
+ phys = __sme_clr(paddr);
+
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ return (__force void *)ioremap_encrypted(phys, size);
+ else
+ return memremap(phys, size, MEMREMAP_WB);
+}
+
/*
* Allocates the command buffer. This buffer is per AMD IOMMU. We can
* write commands to that buffer later and the IOMMU will execute them
@@ -795,11 +821,16 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
BUG_ON(iommu->cmd_buf == NULL);
- entry = iommu_virt_to_phys(iommu->cmd_buf);
- entry |= MMIO_CMD_SIZE_512;
-
- memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
- &entry, sizeof(entry));
+ if (!is_kdump_kernel()) {
+ /*
+ * Command buffer is re-used for kdump kernel and setting
+ * of MMIO register is not required.
+ */
+ entry = iommu_virt_to_phys(iommu->cmd_buf);
+ entry |= MMIO_CMD_SIZE_512;
+ memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
+ &entry, sizeof(entry));
+ }
amd_iommu_reset_cmd_buffer(iommu);
}
@@ -850,10 +881,15 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu)
BUG_ON(iommu->evt_buf == NULL);
- entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
-
- memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
- &entry, sizeof(entry));
+ if (!is_kdump_kernel()) {
+ /*
+ * Event buffer is re-used for kdump kernel and setting
+ * of MMIO register is not required.
+ */
+ entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+ memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
+ &entry, sizeof(entry));
+ }
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
@@ -942,8 +978,91 @@ err_out:
static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
{
iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1);
+ if (!iommu->cmd_sem)
+ return -ENOMEM;
+ iommu->cmd_sem_paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
+ return 0;
+}
+
+static int __init remap_event_buffer(struct amd_iommu *iommu)
+{
+ u64 paddr;
+
+ pr_info_once("Re-using event buffer from the previous kernel\n");
+ paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK;
+ iommu->evt_buf = iommu_memremap(paddr, EVT_BUFFER_SIZE);
+
+ return iommu->evt_buf ? 0 : -ENOMEM;
+}
+
+static int __init remap_command_buffer(struct amd_iommu *iommu)
+{
+ u64 paddr;
+
+ pr_info_once("Re-using command buffer from the previous kernel\n");
+ paddr = readq(iommu->mmio_base + MMIO_CMD_BUF_OFFSET) & PM_ADDR_MASK;
+ iommu->cmd_buf = iommu_memremap(paddr, CMD_BUFFER_SIZE);
+
+ return iommu->cmd_buf ? 0 : -ENOMEM;
+}
+
+static int __init remap_or_alloc_cwwb_sem(struct amd_iommu *iommu)
+{
+ u64 paddr;
+
+ if (check_feature(FEATURE_SNP)) {
+ /*
+ * When SNP is enabled, the exclusion base register is used for the
+ * completion wait buffer (CWB) address. Read and re-use it.
+ */
+ pr_info_once("Re-using CWB buffers from the previous kernel\n");
+ paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK;
+ iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE);
+ if (!iommu->cmd_sem)
+ return -ENOMEM;
+ iommu->cmd_sem_paddr = paddr;
+ } else {
+ return alloc_cwwb_sem(iommu);
+ }
+
+ return 0;
+}
+
+static int __init alloc_iommu_buffers(struct amd_iommu *iommu)
+{
+ int ret;
+
+ /*
+ * Reuse/Remap the previous kernel's allocated completion wait
+ * command and event buffers for kdump boot.
+ */
+ if (is_kdump_kernel()) {
+ ret = remap_or_alloc_cwwb_sem(iommu);
+ if (ret)
+ return ret;
+
+ ret = remap_command_buffer(iommu);
+ if (ret)
+ return ret;
+
+ ret = remap_event_buffer(iommu);
+ if (ret)
+ return ret;
+ } else {
+ ret = alloc_cwwb_sem(iommu);
+ if (ret)
+ return ret;
- return iommu->cmd_sem ? 0 : -ENOMEM;
+ ret = alloc_command_buffer(iommu);
+ if (ret)
+ return ret;
+
+ ret = alloc_event_buffer(iommu);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static void __init free_cwwb_sem(struct amd_iommu *iommu)
@@ -951,6 +1070,38 @@ static void __init free_cwwb_sem(struct amd_iommu *iommu)
if (iommu->cmd_sem)
iommu_free_pages((void *)iommu->cmd_sem);
}
+static void __init unmap_cwwb_sem(struct amd_iommu *iommu)
+{
+ if (iommu->cmd_sem) {
+ if (check_feature(FEATURE_SNP))
+ memunmap((void *)iommu->cmd_sem);
+ else
+ iommu_free_pages((void *)iommu->cmd_sem);
+ }
+}
+
+static void __init unmap_command_buffer(struct amd_iommu *iommu)
+{
+ memunmap((void *)iommu->cmd_buf);
+}
+
+static void __init unmap_event_buffer(struct amd_iommu *iommu)
+{
+ memunmap(iommu->evt_buf);
+}
+
+static void __init free_iommu_buffers(struct amd_iommu *iommu)
+{
+ if (is_kdump_kernel()) {
+ unmap_cwwb_sem(iommu);
+ unmap_command_buffer(iommu);
+ unmap_event_buffer(iommu);
+ } else {
+ free_cwwb_sem(iommu);
+ free_command_buffer(iommu);
+ free_event_buffer(iommu);
+ }
+}
static void iommu_enable_xt(struct amd_iommu *iommu)
{
@@ -982,15 +1133,12 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
dte->data[i] |= (1UL << _bit);
}
-static bool __copy_device_table(struct amd_iommu *iommu)
+static bool __reuse_device_table(struct amd_iommu *iommu)
{
- u64 int_ctl, int_tab_len, entry = 0;
struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
- struct dev_table_entry *old_devtb = NULL;
- u32 lo, hi, devid, old_devtb_size;
+ u32 lo, hi, old_devtb_size;
phys_addr_t old_devtb_phys;
- u16 dom_id, dte_v, irq_v;
- u64 tmp;
+ u64 entry;
/* Each IOMMU use separate device table with the same size */
lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
@@ -1015,66 +1163,20 @@ static bool __copy_device_table(struct amd_iommu *iommu)
pr_err("The address of old device table is above 4G, not trustworthy!\n");
return false;
}
- old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
- ? (__force void *)ioremap_encrypted(old_devtb_phys,
- pci_seg->dev_table_size)
- : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
-
- if (!old_devtb)
- return false;
- pci_seg->old_dev_tbl_cpy = iommu_alloc_pages_sz(
- GFP_KERNEL | GFP_DMA32, pci_seg->dev_table_size);
+ /*
+ * Re-use the previous kernel's device table for kdump.
+ */
+ pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size);
if (pci_seg->old_dev_tbl_cpy == NULL) {
- pr_err("Failed to allocate memory for copying old device table!\n");
- memunmap(old_devtb);
+ pr_err("Failed to remap memory for reusing old device table!\n");
return false;
}
- for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
- pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
- dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
- dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
-
- if (dte_v && dom_id) {
- pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
- pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
- /* Reserve the Domain IDs used by previous kernel */
- if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) {
- pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
- memunmap(old_devtb);
- return false;
- }
- /* If gcr3 table existed, mask it out */
- if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
- tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31);
- pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
- tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV);
- pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
- }
- }
-
- irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
- int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
- int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
- if (irq_v && (int_ctl || int_tab_len)) {
- if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
- (int_tab_len != DTE_INTTABLEN_512 &&
- int_tab_len != DTE_INTTABLEN_2K)) {
- pr_err("Wrong old irq remapping flag: %#x\n", devid);
- memunmap(old_devtb);
- return false;
- }
-
- pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
- }
- }
- memunmap(old_devtb);
-
return true;
}
-static bool copy_device_table(void)
+static bool reuse_device_table(void)
{
struct amd_iommu *iommu;
struct amd_iommu_pci_seg *pci_seg;
@@ -1082,17 +1184,17 @@ static bool copy_device_table(void)
if (!amd_iommu_pre_enabled)
return false;
- pr_warn("Translation is already enabled - trying to copy translation structures\n");
+ pr_warn("Translation is already enabled - trying to reuse translation structures\n");
/*
* All IOMMUs within PCI segment shares common device table.
- * Hence copy device table only once per PCI segment.
+ * Hence reuse device table only once per PCI segment.
*/
for_each_pci_segment(pci_seg) {
for_each_iommu(iommu) {
if (pci_seg->id != iommu->pci_seg->id)
continue;
- if (!__copy_device_table(iommu))
+ if (!__reuse_device_table(iommu))
return false;
break;
}
@@ -1655,9 +1757,7 @@ static void __init free_sysfs(struct amd_iommu *iommu)
static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_sysfs(iommu);
- free_cwwb_sem(iommu);
- free_command_buffer(iommu);
- free_event_buffer(iommu);
+ free_iommu_buffers(iommu);
amd_iommu_free_ppr_log(iommu);
free_ga_log(iommu);
iommu_unmap_mmio_space(iommu);
@@ -1821,14 +1921,9 @@ static int __init init_iommu_one_late(struct amd_iommu *iommu)
{
int ret;
- if (alloc_cwwb_sem(iommu))
- return -ENOMEM;
-
- if (alloc_command_buffer(iommu))
- return -ENOMEM;
-
- if (alloc_event_buffer(iommu))
- return -ENOMEM;
+ ret = alloc_iommu_buffers(iommu);
+ if (ret)
+ return ret;
iommu->int_enabled = false;
@@ -2778,8 +2873,8 @@ static void early_enable_iommu(struct amd_iommu *iommu)
* This function finally enables all IOMMUs found in the system after
* they have been initialized.
*
- * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
- * the old content of device table entries. Not this case or copy failed,
+ * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse
+ * the old content of device table entries. Not this case or reuse failed,
* just continue as normal kernel does.
*/
static void early_enable_iommus(void)
@@ -2787,18 +2882,25 @@ static void early_enable_iommus(void)
struct amd_iommu *iommu;
struct amd_iommu_pci_seg *pci_seg;
- if (!copy_device_table()) {
+ if (!reuse_device_table()) {
/*
- * If come here because of failure in copying device table from old
+ * If come here because of failure in reusing device table from old
* kernel with all IOMMUs enabled, print error message and try to
* free allocated old_dev_tbl_cpy.
*/
- if (amd_iommu_pre_enabled)
- pr_err("Failed to copy DEV table from previous kernel.\n");
+ if (amd_iommu_pre_enabled) {
+ pr_err("Failed to reuse DEV table from previous kernel.\n");
+ /*
+ * Bail out early if unable to remap/reuse DEV table from
+ * previous kernel if SNP enabled as IOMMU commands will
+ * time out without DEV table and cause kdump boot panic.
+ */
+ BUG_ON(check_feature(FEATURE_SNP));
+ }
for_each_pci_segment(pci_seg) {
if (pci_seg->old_dev_tbl_cpy != NULL) {
- iommu_free_pages(pci_seg->old_dev_tbl_cpy);
+ memunmap((void *)pci_seg->old_dev_tbl_cpy);
pci_seg->old_dev_tbl_cpy = NULL;
}
}
@@ -2808,7 +2910,7 @@ static void early_enable_iommus(void)
early_enable_iommu(iommu);
}
} else {
- pr_info("Copied DEV table from previous kernel.\n");
+ pr_info("Reused DEV table from previous kernel.\n");
for_each_pci_segment(pci_seg) {
iommu_free_pages(pci_seg->dev_table);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index eb348c63a8d0..2e1865daa1ce 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -14,6 +14,7 @@
#include <linux/pci-ats.h>
#include <linux/bitmap.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/dma-map-ops.h>
@@ -265,7 +266,7 @@ static inline int get_acpihid_device_id(struct device *dev,
return -EINVAL;
if (fw_bug)
dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n",
- hid_count, hid_count > 1 ? "s" : "");
+ hid_count, str_plural(hid_count));
if (hid_count > 1)
return -EINVAL;
if (entry)
@@ -1195,7 +1196,7 @@ static void build_completion_wait(struct iommu_cmd *cmd,
struct amd_iommu *iommu,
u64 data)
{
- u64 paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
+ u64 paddr = iommu->cmd_sem_paddr;
memset(cmd, 0, sizeof(*cmd));
cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 190f28d76615..95a4e62b8f63 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -122,6 +122,8 @@
#define DART_T8110_ERROR_ADDR_LO 0x170
#define DART_T8110_ERROR_ADDR_HI 0x174
+#define DART_T8110_ERROR_STREAMS 0x1c0
+
#define DART_T8110_PROTECT 0x200
#define DART_T8110_UNPROTECT 0x204
#define DART_T8110_PROTECT_LOCK 0x208
@@ -133,6 +135,7 @@
#define DART_T8110_TCR 0x1000
#define DART_T8110_TCR_REMAP GENMASK(11, 8)
#define DART_T8110_TCR_REMAP_EN BIT(7)
+#define DART_T8110_TCR_FOUR_LEVEL BIT(3)
#define DART_T8110_TCR_BYPASS_DAPF BIT(2)
#define DART_T8110_TCR_BYPASS_DART BIT(1)
#define DART_T8110_TCR_TRANSLATE_ENABLE BIT(0)
@@ -166,22 +169,23 @@ struct apple_dart_hw {
int max_sid_count;
- u64 lock;
- u64 lock_bit;
+ u32 lock;
+ u32 lock_bit;
- u64 error;
+ u32 error;
- u64 enable_streams;
+ u32 enable_streams;
- u64 tcr;
- u64 tcr_enabled;
- u64 tcr_disabled;
- u64 tcr_bypass;
+ u32 tcr;
+ u32 tcr_enabled;
+ u32 tcr_disabled;
+ u32 tcr_bypass;
+ u32 tcr_4level;
- u64 ttbr;
- u64 ttbr_valid;
- u64 ttbr_addr_field_shift;
- u64 ttbr_shift;
+ u32 ttbr;
+ u32 ttbr_valid;
+ u32 ttbr_addr_field_shift;
+ u32 ttbr_shift;
int ttbr_count;
};
@@ -217,6 +221,7 @@ struct apple_dart {
u32 pgsize;
u32 num_streams;
u32 supports_bypass : 1;
+ u32 four_level : 1;
struct iommu_group *sid2group[DART_MAX_STREAMS];
struct iommu_device iommu;
@@ -305,13 +310,19 @@ static struct apple_dart_domain *to_dart_domain(struct iommu_domain *dom)
}
static void
-apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map)
+apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map, int levels)
{
struct apple_dart *dart = stream_map->dart;
+ u32 tcr = dart->hw->tcr_enabled;
int sid;
+ if (levels == 4)
+ tcr |= dart->hw->tcr_4level;
+
+ WARN_ON(levels != 3 && levels != 4);
+ WARN_ON(levels == 4 && !dart->four_level);
for_each_set_bit(sid, stream_map->sidmap, dart->num_streams)
- writel(dart->hw->tcr_enabled, dart->regs + DART_TCR(dart, sid));
+ writel(tcr, dart->regs + DART_TCR(dart, sid));
}
static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map)
@@ -569,7 +580,8 @@ apple_dart_setup_translation(struct apple_dart_domain *domain,
for (; i < stream_map->dart->hw->ttbr_count; ++i)
apple_dart_hw_clear_ttbr(stream_map, i);
- apple_dart_hw_enable_translation(stream_map);
+ apple_dart_hw_enable_translation(stream_map,
+ pgtbl_cfg->apple_dart_cfg.n_levels);
stream_map->dart->hw->invalidate_tlb(stream_map);
}
@@ -614,7 +626,7 @@ static int apple_dart_finalize_domain(struct apple_dart_domain *dart_domain,
dart_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
dart_domain->domain.geometry.aperture_start = 0;
dart_domain->domain.geometry.aperture_end =
- (dma_addr_t)DMA_BIT_MASK(dart->ias);
+ (dma_addr_t)DMA_BIT_MASK(pgtbl_cfg.ias);
dart_domain->domain.geometry.force_aperture = true;
dart_domain->finalized = true;
@@ -807,6 +819,8 @@ static int apple_dart_of_xlate(struct device *dev,
if (cfg_dart) {
if (cfg_dart->pgsize != dart->pgsize)
return -EINVAL;
+ if (cfg_dart->ias != dart->ias)
+ return -EINVAL;
}
cfg->supports_bypass &= dart->supports_bypass;
@@ -1077,6 +1091,9 @@ static irqreturn_t apple_dart_t8110_irq(int irq, void *dev)
error, stream_idx, error_code, fault_name, addr);
writel(error, dart->regs + DART_T8110_ERROR);
+ for (int i = 0; i < BITS_TO_U32(dart->num_streams); i++)
+ writel(U32_MAX, dart->regs + DART_T8110_ERROR_STREAMS + 4 * i);
+
return IRQ_HANDLED;
}
@@ -1137,6 +1154,7 @@ static int apple_dart_probe(struct platform_device *pdev)
dart->ias = FIELD_GET(DART_T8110_PARAMS3_VA_WIDTH, dart_params[2]);
dart->oas = FIELD_GET(DART_T8110_PARAMS3_PA_WIDTH, dart_params[2]);
dart->num_streams = FIELD_GET(DART_T8110_PARAMS4_NUM_SIDS, dart_params[3]);
+ dart->four_level = dart->ias > 36;
break;
}
@@ -1169,9 +1187,9 @@ static int apple_dart_probe(struct platform_device *pdev)
dev_info(
&pdev->dev,
- "DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n",
+ "DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d, AS %d -> %d] initialized\n",
dart->pgsize, dart->num_streams, dart->supports_bypass,
- dart->pgsize > PAGE_SIZE);
+ dart->pgsize > PAGE_SIZE, dart->ias, dart->oas);
return 0;
err_sysfs_remove:
@@ -1292,6 +1310,7 @@ static const struct apple_dart_hw apple_dart_hw_t8110 = {
.tcr_enabled = DART_T8110_TCR_TRANSLATE_ENABLE,
.tcr_disabled = 0,
.tcr_bypass = DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART,
+ .tcr_4level = DART_T8110_TCR_FOUR_LEVEL,
.ttbr = DART_T8110_TTBR,
.ttbr_valid = DART_T8110_TTBR_VALID,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index ea2ef53bd4fe..7944a3af4545 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -724,7 +724,12 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
unsigned long attrs)
{
- int prot = coherent ? IOMMU_CACHE : 0;
+ int prot;
+
+ if (attrs & DMA_ATTR_MMIO)
+ prot = IOMMU_MMIO;
+ else
+ prot = coherent ? IOMMU_CACHE : 0;
if (attrs & DMA_ATTR_PRIVILEGED)
prot |= IOMMU_PRIV;
@@ -1190,11 +1195,9 @@ static inline size_t iova_unaligned(struct iova_domain *iovad, phys_addr_t phys,
return iova_offset(iovad, phys | size);
}
-dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
{
- phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
int prot = dma_info_to_prot(dir, coherent, attrs);
struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -1208,27 +1211,34 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
*/
if (dev_use_swiotlb(dev, size, dir) &&
iova_unaligned(iovad, phys, size)) {
+ if (attrs & DMA_ATTR_MMIO)
+ return DMA_MAPPING_ERROR;
+
phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
if (phys == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
}
- if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
arch_sync_dma_for_device(phys, size, dir);
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
- if (iova == DMA_MAPPING_ERROR)
+ if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
}
-void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
+void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- struct iommu_domain *domain = iommu_get_dma_domain(dev);
phys_addr_t phys;
- phys = iommu_iova_to_phys(domain, dma_handle);
+ if (attrs & DMA_ATTR_MMIO) {
+ __iommu_dma_unmap(dev, dma_handle, size);
+ return;
+ }
+
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
if (WARN_ON(!phys))
return;
@@ -1341,7 +1351,7 @@ static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *s
int i;
for_each_sg(sg, s, nents, i)
- iommu_dma_unmap_page(dev, sg_dma_address(s),
+ iommu_dma_unmap_phys(dev, sg_dma_address(s),
sg_dma_len(s), dir, attrs);
}
@@ -1354,8 +1364,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
sg_dma_mark_swiotlb(sg);
for_each_sg(sg, s, nents, i) {
- sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
- s->offset, s->length, dir, attrs);
+ sg_dma_address(s) = iommu_dma_map_phys(dev, sg_phys(s),
+ s->length, dir, attrs);
if (sg_dma_address(s) == DMA_MAPPING_ERROR)
goto out_unmap;
sg_dma_len(s) = s->length;
@@ -1546,20 +1556,6 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
__iommu_dma_unmap(dev, start, end - start);
}
-dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
- return __iommu_dma_map(dev, phys, size,
- dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
- dma_get_mask(dev));
-}
-
-void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
- __iommu_dma_unmap(dev, handle, size);
-}
-
static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
{
size_t alloc_size = PAGE_ALIGN(size);
@@ -1838,12 +1834,13 @@ static int __dma_iova_link(struct device *dev, dma_addr_t addr,
unsigned long attrs)
{
bool coherent = dev_is_dma_coherent(dev);
+ int prot = dma_info_to_prot(dir, coherent, attrs);
- if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
arch_sync_dma_for_device(phys, size, dir);
return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size,
- dma_info_to_prot(dir, coherent, attrs), GFP_ATOMIC);
+ prot, GFP_ATOMIC);
}
static int iommu_dma_iova_bounce_and_link(struct device *dev, dma_addr_t addr,
@@ -1949,9 +1946,13 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
return -EIO;
if (dev_use_swiotlb(dev, size, dir) &&
- iova_unaligned(iovad, phys, size))
+ iova_unaligned(iovad, phys, size)) {
+ if (attrs & DMA_ATTR_MMIO)
+ return -EPERM;
+
return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
size, dir, attrs);
+ }
return __dma_iova_link(dev, state->addr + offset - iova_start_pad,
phys - iova_start_pad,
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index affbf4a1558d..617fd81a80f0 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -62,8 +62,6 @@ static const struct iommu_regset iommu_regs_64[] = {
IOMMU_REGSET_ENTRY(CAP),
IOMMU_REGSET_ENTRY(ECAP),
IOMMU_REGSET_ENTRY(RTADDR),
- IOMMU_REGSET_ENTRY(CCMD),
- IOMMU_REGSET_ENTRY(AFLOG),
IOMMU_REGSET_ENTRY(PHMBASE),
IOMMU_REGSET_ENTRY(PHMLIMIT),
IOMMU_REGSET_ENTRY(IQH),
@@ -435,8 +433,21 @@ static int domain_translation_struct_show(struct seq_file *m,
}
pgd &= VTD_PAGE_MASK;
} else { /* legacy mode */
- pgd = context->lo & VTD_PAGE_MASK;
- agaw = context->hi & 7;
+ u8 tt = (u8)(context->lo & GENMASK_ULL(3, 2)) >> 2;
+
+ /*
+ * According to Translation Type(TT),
+ * get the page table pointer(SSPTPTR).
+ */
+ switch (tt) {
+ case CONTEXT_TT_MULTI_LEVEL:
+ case CONTEXT_TT_DEV_IOTLB:
+ pgd = context->lo & VTD_PAGE_MASK;
+ agaw = context->hi & 7;
+ break;
+ default:
+ goto iommu_unlock;
+ }
}
seq_printf(m, "Device %04x:%02x:%02x.%x ",
@@ -648,17 +659,11 @@ DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
static void latency_show_one(struct seq_file *m, struct intel_iommu *iommu,
struct dmar_drhd_unit *drhd)
{
- int ret;
-
seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
iommu->name, drhd->reg_base_addr);
- ret = dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE);
- if (ret < 0)
- seq_puts(m, "Failed to get latency snapshot");
- else
- seq_puts(m, debug_buf);
- seq_puts(m, "\n");
+ dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE);
+ seq_printf(m, "%s\n", debug_buf);
}
static int latency_show(struct seq_file *m, void *v)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dff2d895b8ab..e236c7ec221f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3817,7 +3817,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_pri_supported(pdev))
+ ecap_pds(iommu->ecap) && pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index d09b92871659..3056583d7f56 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -77,7 +77,6 @@
#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
#define DMAR_FEUADDR_REG 0x44 /* Upper address register */
-#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */
#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */
#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */
#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
@@ -173,8 +172,6 @@
#define cap_pgsel_inv(c) (((c) >> 39) & 1)
#define cap_super_page_val(c) (((c) >> 34) & 0xf)
-#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \
- * OFFSET_STRIDE) + 21)
#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16)
#define cap_max_fault_reg_offset(c) \
@@ -462,7 +459,6 @@ enum {
#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32)
/* Page group response descriptor QW1 */
-#define QI_PGRP_LPIG(x) (((u64)(x)) << 2)
#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3)
@@ -541,7 +537,8 @@ enum {
#define pasid_supported(iommu) (sm_supported(iommu) && \
ecap_pasid((iommu)->ecap))
#define ssads_supported(iommu) (sm_supported(iommu) && \
- ecap_slads((iommu)->ecap))
+ ecap_slads((iommu)->ecap) && \
+ ecap_smpwc(iommu->ecap))
#define nested_supported(iommu) (sm_supported(iommu) && \
ecap_nest((iommu)->ecap))
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
index adc4de6bbd88..dceeadc3ee7c 100644
--- a/drivers/iommu/intel/perf.c
+++ b/drivers/iommu/intel/perf.c
@@ -113,7 +113,7 @@ static char *latency_type_names[] = {
" svm_prq"
};
-int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+void dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
{
struct latency_statistic *lstat = iommu->perf_statistic;
unsigned long flags;
@@ -122,7 +122,7 @@ int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
memset(str, 0, size);
for (i = 0; i < COUNTS_NUM; i++)
- bytes += snprintf(str + bytes, size - bytes,
+ bytes += scnprintf(str + bytes, size - bytes,
"%s", latency_counter_names[i]);
spin_lock_irqsave(&latency_lock, flags);
@@ -130,7 +130,7 @@ int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
if (!dmar_latency_enabled(iommu, i))
continue;
- bytes += snprintf(str + bytes, size - bytes,
+ bytes += scnprintf(str + bytes, size - bytes,
"\n%s", latency_type_names[i]);
for (j = 0; j < COUNTS_NUM; j++) {
@@ -156,11 +156,9 @@ int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
break;
}
- bytes += snprintf(str + bytes, size - bytes,
+ bytes += scnprintf(str + bytes, size - bytes,
"%12lld", val);
}
}
spin_unlock_irqrestore(&latency_lock, flags);
-
- return bytes;
}
diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h
index df9a36942d64..1d4baad7e852 100644
--- a/drivers/iommu/intel/perf.h
+++ b/drivers/iommu/intel/perf.h
@@ -40,7 +40,7 @@ void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type);
bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type);
void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type,
u64 latency);
-int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size);
+void dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size);
#else
static inline int
dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
@@ -64,9 +64,8 @@ dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 laten
{
}
-static inline int
+static inline void
dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
{
- return 0;
}
#endif /* CONFIG_DMAR_PERF */
diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c
index 52570e42a14c..ff63c228e6e1 100644
--- a/drivers/iommu/intel/prq.c
+++ b/drivers/iommu/intel/prq.c
@@ -151,8 +151,7 @@ static void handle_bad_prq_event(struct intel_iommu *iommu,
QI_PGRP_PASID_P(req->pasid_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
- desc.qw1 = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_LPIG(req->lpig);
+ desc.qw1 = QI_PGRP_IDX(req->prg_index);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -379,19 +378,17 @@ void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_fault_page_request *prm;
struct qi_desc desc;
bool pasid_present;
- bool last_page;
u16 sid;
prm = &evt->fault.prm;
sid = PCI_DEVID(bus, devfn);
pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
QI_PGRP_PASID_P(pasid_present) |
QI_PGRP_RESP_CODE(msg->code) |
QI_PGRP_RESP_TYPE;
- desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw1 = QI_PGRP_IDX(prm->grpid);
desc.qw2 = 0;
desc.qw3 = 0;
diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c
index 679bda104797..54d287cc0dd1 100644
--- a/drivers/iommu/io-pgtable-dart.c
+++ b/drivers/iommu/io-pgtable-dart.c
@@ -27,8 +27,9 @@
#define DART1_MAX_ADDR_BITS 36
-#define DART_MAX_TABLES 4
-#define DART_LEVELS 2
+#define DART_MAX_TABLE_BITS 2
+#define DART_MAX_TABLES BIT(DART_MAX_TABLE_BITS)
+#define DART_MAX_LEVELS 4 /* Includes TTBR level */
/* Struct accessors */
#define io_pgtable_to_data(x) \
@@ -68,6 +69,7 @@
struct dart_io_pgtable {
struct io_pgtable iop;
+ int levels;
int tbl_bits;
int bits_per_level;
@@ -156,44 +158,45 @@ static dart_iopte dart_install_table(dart_iopte *table,
return old;
}
-static int dart_get_table(struct dart_io_pgtable *data, unsigned long iova)
+static int dart_get_index(struct dart_io_pgtable *data, unsigned long iova, int level)
{
- return (iova >> (3 * data->bits_per_level + ilog2(sizeof(dart_iopte)))) &
- ((1 << data->tbl_bits) - 1);
+ return (iova >> (level * data->bits_per_level + ilog2(sizeof(dart_iopte)))) &
+ ((1 << data->bits_per_level) - 1);
}
-static int dart_get_l1_index(struct dart_io_pgtable *data, unsigned long iova)
-{
-
- return (iova >> (2 * data->bits_per_level + ilog2(sizeof(dart_iopte)))) &
- ((1 << data->bits_per_level) - 1);
-}
-
-static int dart_get_l2_index(struct dart_io_pgtable *data, unsigned long iova)
+static int dart_get_last_index(struct dart_io_pgtable *data, unsigned long iova)
{
return (iova >> (data->bits_per_level + ilog2(sizeof(dart_iopte)))) &
((1 << data->bits_per_level) - 1);
}
-static dart_iopte *dart_get_l2(struct dart_io_pgtable *data, unsigned long iova)
+static dart_iopte *dart_get_last(struct dart_io_pgtable *data, unsigned long iova)
{
dart_iopte pte, *ptep;
- int tbl = dart_get_table(data, iova);
+ int level = data->levels;
+ int tbl = dart_get_index(data, iova, level);
+
+ if (tbl >= (1 << data->tbl_bits))
+ return NULL;
ptep = data->pgd[tbl];
if (!ptep)
return NULL;
- ptep += dart_get_l1_index(data, iova);
- pte = READ_ONCE(*ptep);
+ while (--level > 1) {
+ ptep += dart_get_index(data, iova, level);
+ pte = READ_ONCE(*ptep);
- /* Valid entry? */
- if (!pte)
- return NULL;
+ /* Valid entry? */
+ if (!pte)
+ return NULL;
- /* Deref to get level 2 table */
- return iopte_deref(pte, data);
+ /* Deref to get next level table */
+ ptep = iopte_deref(pte, data);
+ }
+
+ return ptep;
}
static dart_iopte dart_prot_to_pte(struct dart_io_pgtable *data,
@@ -230,6 +233,7 @@ static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
int ret = 0, tbl, num_entries, max_entries, map_idx_start;
dart_iopte pte, *cptep, *ptep;
dart_iopte prot;
+ int level = data->levels;
if (WARN_ON(pgsize != cfg->pgsize_bitmap))
return -EINVAL;
@@ -240,31 +244,36 @@ static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return -EINVAL;
- tbl = dart_get_table(data, iova);
+ tbl = dart_get_index(data, iova, level);
+
+ if (tbl >= (1 << data->tbl_bits))
+ return -ENOMEM;
ptep = data->pgd[tbl];
- ptep += dart_get_l1_index(data, iova);
- pte = READ_ONCE(*ptep);
+ while (--level > 1) {
+ ptep += dart_get_index(data, iova, level);
+ pte = READ_ONCE(*ptep);
- /* no L2 table present */
- if (!pte) {
- cptep = iommu_alloc_pages_sz(gfp, tblsz);
- if (!cptep)
- return -ENOMEM;
+ /* no table present */
+ if (!pte) {
+ cptep = iommu_alloc_pages_sz(gfp, tblsz);
+ if (!cptep)
+ return -ENOMEM;
- pte = dart_install_table(cptep, ptep, 0, data);
- if (pte)
- iommu_free_pages(cptep);
+ pte = dart_install_table(cptep, ptep, 0, data);
+ if (pte)
+ iommu_free_pages(cptep);
- /* L2 table is present (now) */
- pte = READ_ONCE(*ptep);
- }
+ /* L2 table is present (now) */
+ pte = READ_ONCE(*ptep);
+ }
- ptep = iopte_deref(pte, data);
+ ptep = iopte_deref(pte, data);
+ }
/* install a leaf entries into L2 table */
prot = dart_prot_to_pte(data, iommu_prot);
- map_idx_start = dart_get_l2_index(data, iova);
+ map_idx_start = dart_get_last_index(data, iova);
max_entries = DART_PTES_PER_TABLE(data) - map_idx_start;
num_entries = min_t(int, pgcount, max_entries);
ptep += map_idx_start;
@@ -293,13 +302,13 @@ static size_t dart_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
if (WARN_ON(pgsize != cfg->pgsize_bitmap || !pgcount))
return 0;
- ptep = dart_get_l2(data, iova);
+ ptep = dart_get_last(data, iova);
/* Valid L2 IOPTE pointer? */
if (WARN_ON(!ptep))
return 0;
- unmap_idx_start = dart_get_l2_index(data, iova);
+ unmap_idx_start = dart_get_last_index(data, iova);
ptep += unmap_idx_start;
max_entries = DART_PTES_PER_TABLE(data) - unmap_idx_start;
@@ -330,13 +339,13 @@ static phys_addr_t dart_iova_to_phys(struct io_pgtable_ops *ops,
struct dart_io_pgtable *data = io_pgtable_ops_to_data(ops);
dart_iopte pte, *ptep;
- ptep = dart_get_l2(data, iova);
+ ptep = dart_get_last(data, iova);
/* Valid L2 IOPTE pointer? */
if (!ptep)
return 0;
- ptep += dart_get_l2_index(data, iova);
+ ptep += dart_get_last_index(data, iova);
pte = READ_ONCE(*ptep);
/* Found translation */
@@ -353,21 +362,37 @@ static struct dart_io_pgtable *
dart_alloc_pgtable(struct io_pgtable_cfg *cfg)
{
struct dart_io_pgtable *data;
- int tbl_bits, bits_per_level, va_bits, pg_shift;
+ int levels, max_tbl_bits, tbl_bits, bits_per_level, va_bits, pg_shift;
+
+ /*
+ * Old 4K page DARTs can use up to 4 top-level tables.
+ * Newer ones only ever use a maximum of 1.
+ */
+ if (cfg->pgsize_bitmap == SZ_4K)
+ max_tbl_bits = DART_MAX_TABLE_BITS;
+ else
+ max_tbl_bits = 0;
pg_shift = __ffs(cfg->pgsize_bitmap);
bits_per_level = pg_shift - ilog2(sizeof(dart_iopte));
va_bits = cfg->ias - pg_shift;
- tbl_bits = max_t(int, 0, va_bits - (bits_per_level * DART_LEVELS));
- if ((1 << tbl_bits) > DART_MAX_TABLES)
+ levels = max_t(int, 2, (va_bits - max_tbl_bits + bits_per_level - 1) / bits_per_level);
+
+ if (levels > (DART_MAX_LEVELS - 1))
+ return NULL;
+
+ tbl_bits = max_t(int, 0, va_bits - (bits_per_level * levels));
+
+ if (tbl_bits > max_tbl_bits)
return NULL;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
+ data->levels = levels + 1; /* Table level counts as one level */
data->tbl_bits = tbl_bits;
data->bits_per_level = bits_per_level;
@@ -403,6 +428,7 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
return NULL;
cfg->apple_dart_cfg.n_ttbrs = 1 << data->tbl_bits;
+ cfg->apple_dart_cfg.n_levels = data->levels;
for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) {
data->pgd[i] =
@@ -422,24 +448,31 @@ out_free_data:
return NULL;
}
-static void apple_dart_free_pgtable(struct io_pgtable *iop)
+static void apple_dart_free_pgtables(struct dart_io_pgtable *data, dart_iopte *ptep, int level)
{
- struct dart_io_pgtable *data = io_pgtable_to_data(iop);
- dart_iopte *ptep, *end;
- int i;
+ dart_iopte *end;
+ dart_iopte *start = ptep;
- for (i = 0; i < (1 << data->tbl_bits) && data->pgd[i]; ++i) {
- ptep = data->pgd[i];
+ if (level > 1) {
end = (void *)ptep + DART_GRANULE(data);
while (ptep != end) {
dart_iopte pte = *ptep++;
if (pte)
- iommu_free_pages(iopte_deref(pte, data));
+ apple_dart_free_pgtables(data, iopte_deref(pte, data), level - 1);
}
- iommu_free_pages(data->pgd[i]);
}
+ iommu_free_pages(start);
+}
+
+static void apple_dart_free_pgtable(struct io_pgtable *iop)
+{
+ struct dart_io_pgtable *data = io_pgtable_to_data(iop);
+ int i;
+
+ for (i = 0; i < (1 << data->tbl_bits) && data->pgd[i]; ++i)
+ apple_dart_free_pgtables(data, data->pgd[i], data->levels - 1);
kfree(data);
}
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index e236b932e766..c95394cd03a7 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -37,6 +37,8 @@ void iommu_device_unregister_bus(struct iommu_device *iommu,
const struct bus_type *bus,
struct notifier_block *nb);
+int iommu_mock_device_add(struct device *dev, struct iommu_device *iommu);
+
struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group,
ioasid_t pasid,
unsigned int type);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 060ebe330ee1..59244c744eab 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -304,6 +304,7 @@ void iommu_device_unregister_bus(struct iommu_device *iommu,
struct notifier_block *nb)
{
bus_unregister_notifier(bus, nb);
+ fwnode_remove_software_node(iommu->fwnode);
iommu_device_unregister(iommu);
}
EXPORT_SYMBOL_GPL(iommu_device_unregister_bus);
@@ -326,6 +327,12 @@ int iommu_device_register_bus(struct iommu_device *iommu,
if (err)
return err;
+ iommu->fwnode = fwnode_create_software_node(NULL, NULL);
+ if (IS_ERR(iommu->fwnode)) {
+ bus_unregister_notifier(bus, nb);
+ return PTR_ERR(iommu->fwnode);
+ }
+
spin_lock(&iommu_device_lock);
list_add_tail(&iommu->list, &iommu_device_list);
spin_unlock(&iommu_device_lock);
@@ -335,9 +342,28 @@ int iommu_device_register_bus(struct iommu_device *iommu,
iommu_device_unregister_bus(iommu, bus, nb);
return err;
}
+ WRITE_ONCE(iommu->ready, true);
return 0;
}
EXPORT_SYMBOL_GPL(iommu_device_register_bus);
+
+int iommu_mock_device_add(struct device *dev, struct iommu_device *iommu)
+{
+ int rc;
+
+ mutex_lock(&iommu_probe_device_lock);
+ rc = iommu_fwspec_init(dev, iommu->fwnode);
+ mutex_unlock(&iommu_probe_device_lock);
+
+ if (rc)
+ return rc;
+
+ rc = device_add(dev);
+ if (rc)
+ iommu_fwspec_free(dev);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(iommu_mock_device_add);
#endif
static struct dev_iommu *dev_iommu_get(struct device *dev)
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 61686603c769..de178827a078 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -1126,7 +1126,7 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
goto err_put;
}
- rc = device_add(&mdev->dev);
+ rc = iommu_mock_device_add(&mdev->dev, &mock_iommu.iommu_dev);
if (rc)
goto err_put;
return mdev;
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 6fb93927bdb9..5c6f5943f44b 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1303,8 +1303,8 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
struct omap_iommu_device *iommu;
struct omap_iommu *oiommu;
struct iotlb_entry e;
+ int ret = -EINVAL;
int omap_pgsz;
- u32 ret = -EINVAL;
int i;
omap_pgsz = bytes_to_iopgsz(bytes);
diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c
index 725e919b97ef..83a28c83f991 100644
--- a/drivers/iommu/riscv/iommu-platform.c
+++ b/drivers/iommu/riscv/iommu-platform.c
@@ -10,6 +10,8 @@
* Tomasz Jeznach <tjeznach@rivosinc.com>
*/
+#include <linux/acpi.h>
+#include <linux/irqchip/riscv-imsic.h>
#include <linux/kernel.h>
#include <linux/msi.h>
#include <linux/of_irq.h>
@@ -46,6 +48,7 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
enum riscv_iommu_igs_settings igs;
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu = NULL;
+ struct irq_domain *msi_domain;
struct resource *res = NULL;
int vec, ret;
@@ -76,8 +79,13 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
switch (igs) {
case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
case RISCV_IOMMU_CAPABILITIES_IGS_MSI:
- if (is_of_node(dev->fwnode))
+ if (is_of_node(dev_fwnode(dev))) {
of_msi_configure(dev, to_of_node(dev->fwnode));
+ } else {
+ msi_domain = irq_find_matching_fwnode(imsic_acpi_get_fwnode(dev),
+ DOMAIN_BUS_PLATFORM_MSI);
+ dev_set_msi_domain(dev, msi_domain);
+ }
if (!dev_get_msi_domain(dev)) {
dev_warn(dev, "failed to find an MSI domain\n");
@@ -150,6 +158,12 @@ static const struct of_device_id riscv_iommu_of_match[] = {
{},
};
+static const struct acpi_device_id riscv_iommu_acpi_match[] = {
+ { "RSCV0004", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, riscv_iommu_acpi_match);
+
static struct platform_driver riscv_iommu_platform_driver = {
.probe = riscv_iommu_platform_probe,
.remove = riscv_iommu_platform_remove,
@@ -158,6 +172,7 @@ static struct platform_driver riscv_iommu_platform_driver = {
.name = "riscv,iommu",
.of_match_table = riscv_iommu_of_match,
.suppress_bind_attrs = true,
+ .acpi_match_table = riscv_iommu_acpi_match,
},
};
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 0eae2f4bdc5e..ebb22979075d 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -12,6 +12,8 @@
#define pr_fmt(fmt) "riscv-iommu: " fmt
+#include <linux/acpi.h>
+#include <linux/acpi_rimt.h>
#include <linux/compiler.h>
#include <linux/crash_dump.h>
#include <linux/init.h>
@@ -1650,6 +1652,14 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
goto err_iodir_off;
}
+ if (!acpi_disabled) {
+ rc = rimt_iommu_register(iommu->dev);
+ if (rc) {
+ dev_err_probe(iommu->dev, rc, "cannot register iommu with RIMT\n");
+ goto err_remove_sysfs;
+ }
+ }
+
rc = iommu_device_register(&iommu->iommu, &riscv_iommu_ops, iommu->dev);
if (rc) {
dev_err_probe(iommu->dev, rc, "cannot register iommu interface\n");
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 07c19b2182ca..104aa5355090 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -688,4 +688,6 @@ config DM_AUDIT
source "drivers/md/dm-vdo/Kconfig"
+source "drivers/md/dm-pcache/Kconfig"
+
endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 5a51b3408b70..c338cc6fbe2e 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -73,6 +73,7 @@ obj-$(CONFIG_DM_RAID) += dm-raid.o
obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
obj-$(CONFIG_DM_VERITY) += dm-verity.o
obj-$(CONFIG_DM_VDO) += dm-vdo/
+obj-$(CONFIG_DM_PCACHE) += dm-pcache/
obj-$(CONFIG_DM_CACHE) += dm-cache.o
obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
obj-$(CONFIG_DM_EBS) += dm-ebs.o
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 8f3a23f4b168..e6d28be11c5c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1337,7 +1337,7 @@ static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector,
char *ptr;
unsigned int len;
- bio = bio_kmalloc(1, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
+ bio = bio_kmalloc(1, GFP_NOWAIT);
if (!bio) {
use_dmio(b, op, sector, n_sectors, offset, ioprio);
return;
@@ -1601,18 +1601,18 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
* dm-bufio is resistant to allocation failures (it just keeps
* one buffer reserved in cases all the allocations fail).
* So set flags to not try too hard:
- * GFP_NOWAIT: don't wait; if we need to sleep we'll release our
- * mutex and wait ourselves.
+ * GFP_NOWAIT: don't wait and don't print a warning in case of
+ * failure; if we need to sleep we'll release our mutex
+ * and wait ourselves.
* __GFP_NORETRY: don't retry and rather return failure
* __GFP_NOMEMALLOC: don't use emergency reserves
- * __GFP_NOWARN: don't print a warning in case of failure
*
* For debugging, if we set the cache size to 1, no new buffers will
* be allocated.
*/
while (1) {
if (dm_bufio_cache_size_latch != 1) {
- b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC);
if (b)
return b;
}
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 2ed894155cab..7e1e8cc0e33a 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -590,7 +590,7 @@ static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned in
nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u));
ht->hash_bits = __ffs(nr_buckets);
- ht->buckets = vmalloc(array_size(nr_buckets, sizeof(*ht->buckets)));
+ ht->buckets = vmalloc_array(nr_buckets, sizeof(*ht->buckets));
if (!ht->buckets)
return -ENOMEM;
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index c889332e533b..a3c9f74fe2dc 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -162,6 +162,7 @@ struct mapped_device {
#define DMF_SUSPENDED_INTERNALLY 7
#define DMF_POST_SUSPENDING 8
#define DMF_EMULATE_ZONE_APPEND 9
+#define DMF_QUEUE_STOPPED 10
static inline sector_t dm_get_size(struct mapped_device *md)
{
@@ -291,6 +292,7 @@ struct dm_io {
struct dm_io *next;
struct dm_stats_aux stats_aux;
blk_status_t status;
+ bool requeue_flush_with_data;
atomic_t io_count;
struct mapped_device *md;
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index 8b50c908c6f4..efb3cd4f9cd4 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -45,7 +45,7 @@ static void fix_separator_chars(char **buf)
/*
* Internal function to allocate memory for IMA measurements.
*/
-static void *dm_ima_alloc(size_t len, gfp_t flags, bool noio)
+static void *dm_ima_alloc(size_t len, bool noio)
{
unsigned int noio_flag;
void *ptr;
@@ -53,7 +53,7 @@ static void *dm_ima_alloc(size_t len, gfp_t flags, bool noio)
if (noio)
noio_flag = memalloc_noio_save();
- ptr = kzalloc(len, flags);
+ ptr = kzalloc(len, GFP_KERNEL);
if (noio)
memalloc_noio_restore(noio_flag);
@@ -68,13 +68,13 @@ static int dm_ima_alloc_and_copy_name_uuid(struct mapped_device *md, char **dev_
char **dev_uuid, bool noio)
{
int r;
- *dev_name = dm_ima_alloc(DM_NAME_LEN*2, GFP_KERNEL, noio);
+ *dev_name = dm_ima_alloc(DM_NAME_LEN*2, noio);
if (!(*dev_name)) {
r = -ENOMEM;
goto error;
}
- *dev_uuid = dm_ima_alloc(DM_UUID_LEN*2, GFP_KERNEL, noio);
+ *dev_uuid = dm_ima_alloc(DM_UUID_LEN*2, noio);
if (!(*dev_uuid)) {
r = -ENOMEM;
goto error;
@@ -109,7 +109,7 @@ static int dm_ima_alloc_and_copy_device_data(struct mapped_device *md, char **de
if (r)
return r;
- *device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio);
+ *device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, noio);
if (!(*device_data)) {
r = -ENOMEM;
goto error;
@@ -153,14 +153,12 @@ static int dm_ima_alloc_and_copy_capacity_str(struct mapped_device *md, char **c
capacity = get_capacity(md->disk);
- *capacity_str = dm_ima_alloc(DM_IMA_DEVICE_CAPACITY_BUF_LEN, GFP_KERNEL, noio);
+ *capacity_str = dm_ima_alloc(DM_IMA_DEVICE_CAPACITY_BUF_LEN, noio);
if (!(*capacity_str))
return -ENOMEM;
- scnprintf(*capacity_str, DM_IMA_DEVICE_BUF_LEN, "current_device_capacity=%llu;",
- capacity);
-
- return 0;
+ return scnprintf(*capacity_str, DM_IMA_DEVICE_BUF_LEN, "current_device_capacity=%llu;",
+ capacity);
}
/*
@@ -195,15 +193,15 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
const size_t hash_alg_prefix_len = strlen(DM_IMA_TABLE_HASH_ALG) + 1;
char table_load_event_name[] = "dm_table_load";
- ima_buf = dm_ima_alloc(DM_IMA_MEASUREMENT_BUF_LEN, GFP_KERNEL, noio);
+ ima_buf = dm_ima_alloc(DM_IMA_MEASUREMENT_BUF_LEN, noio);
if (!ima_buf)
return;
- target_metadata_buf = dm_ima_alloc(DM_IMA_TARGET_METADATA_BUF_LEN, GFP_KERNEL, noio);
+ target_metadata_buf = dm_ima_alloc(DM_IMA_TARGET_METADATA_BUF_LEN, noio);
if (!target_metadata_buf)
goto error;
- target_data_buf = dm_ima_alloc(DM_IMA_TARGET_DATA_BUF_LEN, GFP_KERNEL, noio);
+ target_data_buf = dm_ima_alloc(DM_IMA_TARGET_DATA_BUF_LEN, noio);
if (!target_data_buf)
goto error;
@@ -218,7 +216,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
shash->tfm = tfm;
digest_size = crypto_shash_digestsize(tfm);
- digest = dm_ima_alloc(digest_size, GFP_KERNEL, noio);
+ digest = dm_ima_alloc(digest_size, noio);
if (!digest)
goto error;
@@ -327,7 +325,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
if (r < 0)
goto error;
- digest_buf = dm_ima_alloc((digest_size*2) + hash_alg_prefix_len + 1, GFP_KERNEL, noio);
+ digest_buf = dm_ima_alloc((digest_size*2) + hash_alg_prefix_len + 1, noio);
if (!digest_buf)
goto error;
@@ -371,18 +369,18 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap)
{
char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL;
char active[] = "active_table_hash=";
- unsigned int active_len = strlen(active), capacity_len = 0;
+ unsigned int active_len = strlen(active);
unsigned int l = 0;
bool noio = true;
bool nodata = true;
- int r;
+ int capacity_len;
- device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio);
+ device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, noio);
if (!device_table_data)
return;
- r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
- if (r)
+ capacity_len = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
+ if (capacity_len < 0)
goto error;
memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len);
@@ -445,8 +443,7 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap)
}
if (nodata) {
- r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio);
- if (r)
+ if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio))
goto error;
l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
@@ -454,7 +451,6 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap)
DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
- capacity_len = strlen(capacity_str);
memcpy(device_table_data + l, capacity_str, capacity_len);
l += capacity_len;
@@ -483,18 +479,17 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all)
unsigned int device_active_len = strlen(device_active_str);
unsigned int device_inactive_len = strlen(device_inactive_str);
unsigned int remove_all_len = strlen(remove_all_str);
- unsigned int capacity_len = 0;
unsigned int l = 0;
bool noio = true;
bool nodata = true;
- int r;
+ int capacity_len;
- device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN*2, GFP_KERNEL, noio);
+ device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN*2, noio);
if (!device_table_data)
goto exit;
- r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
- if (r) {
+ capacity_len = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
+ if (capacity_len < 0) {
kfree(device_table_data);
goto exit;
}
@@ -570,7 +565,6 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all)
memcpy(device_table_data + l, remove_all ? "y;" : "n;", 2);
l += 2;
- capacity_len = strlen(capacity_str);
memcpy(device_table_data + l, capacity_str, capacity_len);
l += capacity_len;
@@ -602,20 +596,20 @@ exit:
*/
void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map)
{
- unsigned int l = 0, capacity_len = 0;
+ unsigned int l = 0;
char *device_table_data = NULL, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL;
char inactive_str[] = "inactive_table_hash=";
unsigned int inactive_len = strlen(inactive_str);
bool noio = true;
bool nodata = true;
- int r;
+ int capacity_len;
- device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio);
+ device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, noio);
if (!device_table_data)
return;
- r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
- if (r)
+ capacity_len = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
+ if (capacity_len < 0)
goto error1;
memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len);
@@ -650,7 +644,6 @@ void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map)
DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
- capacity_len = strlen(capacity_str);
memcpy(device_table_data + l, capacity_str, capacity_len);
l += capacity_len;
@@ -703,7 +696,7 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md)
char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL;
char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL;
bool noio = true;
- int r, len;
+ int len;
if (dm_ima_alloc_and_copy_device_data(md, &new_device_data,
md->ima.active_table.num_targets, noio))
@@ -712,12 +705,11 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md)
if (dm_ima_alloc_and_copy_name_uuid(md, &new_dev_name, &new_dev_uuid, noio))
goto error;
- combined_device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN * 2, GFP_KERNEL, noio);
+ combined_device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN * 2, noio);
if (!combined_device_data)
goto error;
- r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio);
- if (r)
+ if (dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio) < 0)
goto error;
old_device_data = md->ima.active_table.device_metadata;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index ab96b692e5a3..170bf67a2edd 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -219,10 +219,13 @@ struct dm_integrity_c {
__u8 log2_blocks_per_bitmap_bit;
unsigned char mode;
+ bool internal_hash;
int failed;
- struct crypto_shash *internal_hash;
+ struct crypto_shash *internal_shash;
+ struct crypto_ahash *internal_ahash;
+ unsigned int internal_hash_digestsize;
struct dm_target *ti;
@@ -277,6 +280,9 @@ struct dm_integrity_c {
bool fix_hmac;
bool legacy_recalculate;
+ mempool_t ahash_req_pool;
+ struct ahash_request *journal_ahash_req;
+
struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg;
struct alg_spec journal_mac_alg;
@@ -326,6 +332,8 @@ struct dm_integrity_io {
unsigned payload_len;
bool integrity_payload_from_mempool;
bool integrity_range_locked;
+
+ struct ahash_request *ahash_req;
};
struct journal_completion {
@@ -352,6 +360,7 @@ struct bitmap_block_status {
static struct kmem_cache *journal_io_cache;
#define JOURNAL_IO_MEMPOOL 32
+#define AHASH_MEMPOOL 32
#ifdef DEBUG_PRINT
#define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__)
@@ -1634,15 +1643,15 @@ static void integrity_end_io(struct bio *bio)
dec_in_flight(dio);
}
-static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector,
- const char *data, char *result)
+static void integrity_sector_checksum_shash(struct dm_integrity_c *ic, sector_t sector,
+ const char *data, unsigned offset, char *result)
{
__le64 sector_le = cpu_to_le64(sector);
- SHASH_DESC_ON_STACK(req, ic->internal_hash);
+ SHASH_DESC_ON_STACK(req, ic->internal_shash);
int r;
unsigned int digest_size;
- req->tfm = ic->internal_hash;
+ req->tfm = ic->internal_shash;
r = crypto_shash_init(req);
if (unlikely(r < 0)) {
@@ -1664,7 +1673,7 @@ static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector
goto failed;
}
- r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT);
+ r = crypto_shash_update(req, data + offset, ic->sectors_per_block << SECTOR_SHIFT);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_update", r);
goto failed;
@@ -1676,7 +1685,7 @@ static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector
goto failed;
}
- digest_size = crypto_shash_digestsize(ic->internal_hash);
+ digest_size = ic->internal_hash_digestsize;
if (unlikely(digest_size < ic->tag_size))
memset(result + digest_size, 0, ic->tag_size - digest_size);
@@ -1687,6 +1696,104 @@ failed:
get_random_bytes(result, ic->tag_size);
}
+static void integrity_sector_checksum_ahash(struct dm_integrity_c *ic, struct ahash_request **ahash_req,
+ sector_t sector, struct page *page, unsigned offset, char *result)
+{
+ __le64 sector_le = cpu_to_le64(sector);
+ struct ahash_request *req;
+ DECLARE_CRYPTO_WAIT(wait);
+ struct scatterlist sg[3], *s = sg;
+ int r;
+ unsigned int digest_size;
+ unsigned int nbytes = 0;
+
+ might_sleep();
+
+ req = *ahash_req;
+ if (unlikely(!req)) {
+ req = mempool_alloc(&ic->ahash_req_pool, GFP_NOIO);
+ *ahash_req = req;
+ }
+
+ ahash_request_set_tfm(req, ic->internal_ahash);
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait);
+
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
+ sg_init_table(sg, 3);
+ sg_set_buf(s, (const __u8 *)&ic->sb->salt, SALT_SIZE);
+ nbytes += SALT_SIZE;
+ s++;
+ } else {
+ sg_init_table(sg, 2);
+ }
+
+ if (likely(!is_vmalloc_addr(&sector_le))) {
+ sg_set_buf(s, &sector_le, sizeof(sector_le));
+ } else {
+ struct page *sec_page = vmalloc_to_page(&sector_le);
+ unsigned int sec_off = offset_in_page(&sector_le);
+ sg_set_page(s, sec_page, sizeof(sector_le), sec_off);
+ }
+ nbytes += sizeof(sector_le);
+ s++;
+
+ sg_set_page(s, page, ic->sectors_per_block << SECTOR_SHIFT, offset);
+ nbytes += ic->sectors_per_block << SECTOR_SHIFT;
+
+ ahash_request_set_crypt(req, sg, result, nbytes);
+
+ r = crypto_wait_req(crypto_ahash_digest(req), &wait);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, "crypto_ahash_digest", r);
+ goto failed;
+ }
+
+ digest_size = ic->internal_hash_digestsize;
+ if (unlikely(digest_size < ic->tag_size))
+ memset(result + digest_size, 0, ic->tag_size - digest_size);
+
+ return;
+
+failed:
+ /* this shouldn't happen anyway, the hash functions have no reason to fail */
+ get_random_bytes(result, ic->tag_size);
+}
+
+static void integrity_sector_checksum(struct dm_integrity_c *ic, struct ahash_request **ahash_req,
+ sector_t sector, const char *data, unsigned offset, char *result)
+{
+ if (likely(ic->internal_shash != NULL))
+ integrity_sector_checksum_shash(ic, sector, data, offset, result);
+ else
+ integrity_sector_checksum_ahash(ic, ahash_req, sector, (struct page *)data, offset, result);
+}
+
+static void *integrity_kmap(struct dm_integrity_c *ic, struct page *p)
+{
+ if (likely(ic->internal_shash != NULL))
+ return kmap_local_page(p);
+ else
+ return p;
+}
+
+static void integrity_kunmap(struct dm_integrity_c *ic, const void *ptr)
+{
+ if (likely(ic->internal_shash != NULL))
+ kunmap_local(ptr);
+}
+
+static void *integrity_identity(struct dm_integrity_c *ic, void *data)
+{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(offset_in_page(data));
+ BUG_ON(!virt_addr_valid(data));
+#endif
+ if (likely(ic->internal_shash != NULL))
+ return data;
+ else
+ return virt_to_page(data);
+}
+
static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum)
{
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
@@ -1711,6 +1818,7 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks
sector_t alignment;
char *mem;
char *buffer = page_to_virt(page);
+ unsigned int buffer_offset;
int r;
struct dm_io_request io_req;
struct dm_io_region io_loc;
@@ -1728,7 +1836,7 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks
alignment &= -alignment;
io_loc.sector = round_down(io_loc.sector, alignment);
io_loc.count += sector - io_loc.sector;
- buffer += (sector - io_loc.sector) << SECTOR_SHIFT;
+ buffer_offset = (sector - io_loc.sector) << SECTOR_SHIFT;
io_loc.count = round_up(io_loc.count, alignment);
r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
@@ -1737,7 +1845,7 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks
goto free_ret;
}
- integrity_sector_checksum(ic, logical_sector, buffer, checksum);
+ integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, integrity_identity(ic, buffer), buffer_offset, checksum);
r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block,
&dio->metadata_offset, ic->tag_size, TAG_CMP);
if (r) {
@@ -1754,7 +1862,7 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks
}
mem = bvec_kmap_local(&bv);
- memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT);
+ memcpy(mem + pos, buffer + buffer_offset, ic->sectors_per_block << SECTOR_SHIFT);
kunmap_local(mem);
pos += ic->sectors_per_block << SECTOR_SHIFT;
@@ -1776,7 +1884,7 @@ static void integrity_metadata(struct work_struct *w)
if (ic->internal_hash) {
struct bvec_iter iter;
struct bio_vec bv;
- unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash);
+ unsigned int digest_size = ic->internal_hash_digestsize;
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
char *checksums;
unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
@@ -1837,17 +1945,17 @@ static void integrity_metadata(struct work_struct *w)
char *mem, *checksums_ptr;
again:
- mem = bvec_kmap_local(&bv_copy);
+ mem = integrity_kmap(ic, bv_copy.bv_page);
pos = 0;
checksums_ptr = checksums;
do {
- integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
+ integrity_sector_checksum(ic, &dio->ahash_req, sector, mem, bv_copy.bv_offset + pos, checksums_ptr);
checksums_ptr += ic->tag_size;
sectors_to_process -= ic->sectors_per_block;
pos += ic->sectors_per_block << SECTOR_SHIFT;
sector += ic->sectors_per_block;
} while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack);
- kunmap_local(mem);
+ integrity_kunmap(ic, mem);
r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
@@ -1949,6 +2057,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
dio->ic = ic;
dio->bi_status = 0;
dio->op = bio_op(bio);
+ dio->ahash_req = NULL;
if (ic->mode == 'I') {
bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector);
@@ -2071,19 +2180,6 @@ retry_kmap:
js++;
mem_ptr += 1 << SECTOR_SHIFT;
} while (++s < ic->sectors_per_block);
-#ifdef INTERNAL_VERIFY
- if (ic->internal_hash) {
- char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
-
- integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
- if (unlikely(crypto_memneq(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
- DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
- logical_sector);
- dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum",
- bio, logical_sector, 0);
- }
- }
-#endif
}
if (!ic->internal_hash) {
@@ -2124,15 +2220,17 @@ retry_kmap:
} while (++s < ic->sectors_per_block);
if (ic->internal_hash) {
- unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash);
+ unsigned int digest_size = ic->internal_hash_digestsize;
+ void *js_page = integrity_identity(ic, (char *)js - offset_in_page(js));
+ unsigned js_offset = offset_in_page(js);
if (unlikely(digest_size > ic->tag_size)) {
char checksums_onstack[HASH_MAX_DIGESTSIZE];
- integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
+ integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, js_page, js_offset, checksums_onstack);
memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
} else
- integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je));
+ integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, js_page, js_offset, journal_entry_tag(ic, je));
}
journal_entry_set_sector(je, logical_sector);
@@ -2428,7 +2526,7 @@ retry:
if (!dio->integrity_payload) {
unsigned digest_size, extra_size;
dio->payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block);
- digest_size = crypto_shash_digestsize(ic->internal_hash);
+ digest_size = ic->internal_hash_digestsize;
extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
dio->payload_len += extra_size;
dio->integrity_payload = kmalloc(dio->payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
@@ -2505,11 +2603,11 @@ skip_spinlock:
unsigned pos = 0;
while (dio->bio_details.bi_iter.bi_size) {
struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
- const char *mem = bvec_kmap_local(&bv);
+ const char *mem = integrity_kmap(ic, bv.bv_page);
if (ic->tag_size < ic->tuple_size)
memset(dio->integrity_payload + pos + ic->tag_size, 0, ic->tuple_size - ic->tuple_size);
- integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, dio->integrity_payload + pos);
- kunmap_local(mem);
+ integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, mem, bv.bv_offset, dio->integrity_payload + pos);
+ integrity_kunmap(ic, mem);
pos += ic->tuple_size;
bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
}
@@ -2588,8 +2686,8 @@ static void dm_integrity_inline_recheck(struct work_struct *w)
}
bio_put(outgoing_bio);
- integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, outgoing_data, digest);
- if (unlikely(crypto_memneq(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
+ integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, integrity_identity(ic, outgoing_data), 0, digest);
+ if (unlikely(crypto_memneq(digest, dio->integrity_payload, min(ic->internal_hash_digestsize, ic->tag_size)))) {
DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
ic->dev->bdev, dio->bio_details.bi_iter.bi_sector);
atomic64_inc(&ic->number_of_mismatches);
@@ -2612,33 +2710,58 @@ static void dm_integrity_inline_recheck(struct work_struct *w)
bio_endio(bio);
}
+static inline bool dm_integrity_check(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
+{
+ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+ unsigned pos = 0;
+
+ while (dio->bio_details.bi_iter.bi_size) {
+ char digest[HASH_MAX_DIGESTSIZE];
+ struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
+ char *mem = integrity_kmap(ic, bv.bv_page);
+ integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, mem, bv.bv_offset, digest);
+ if (unlikely(crypto_memneq(digest, dio->integrity_payload + pos,
+ min(ic->internal_hash_digestsize, ic->tag_size)))) {
+ integrity_kunmap(ic, mem);
+ dm_integrity_free_payload(dio);
+ INIT_WORK(&dio->work, dm_integrity_inline_recheck);
+ queue_work(ic->offload_wq, &dio->work);
+ return false;
+ }
+ integrity_kunmap(ic, mem);
+ pos += ic->tuple_size;
+ bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
+ }
+
+ return true;
+}
+
+static void dm_integrity_inline_async_check(struct work_struct *w)
+{
+ struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
+ struct dm_integrity_c *ic = dio->ic;
+ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+
+ if (likely(dm_integrity_check(ic, dio)))
+ bio_endio(bio);
+}
+
static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
{
struct dm_integrity_c *ic = ti->private;
+ struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
if (ic->mode == 'I') {
- struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
- if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK)) {
- unsigned pos = 0;
+ if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK) && likely(dio->bio_details.bi_iter.bi_size != 0)) {
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
unlikely(dio->integrity_range_locked))
- goto skip_check;
- while (dio->bio_details.bi_iter.bi_size) {
- char digest[HASH_MAX_DIGESTSIZE];
- struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
- char *mem = bvec_kmap_local(&bv);
- //memset(mem, 0xff, ic->sectors_per_block << SECTOR_SHIFT);
- integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, digest);
- if (unlikely(crypto_memneq(digest, dio->integrity_payload + pos,
- min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
- kunmap_local(mem);
- dm_integrity_free_payload(dio);
- INIT_WORK(&dio->work, dm_integrity_inline_recheck);
- queue_work(ic->offload_wq, &dio->work);
+ goto skip_check;
+ if (likely(ic->internal_shash != NULL)) {
+ if (unlikely(!dm_integrity_check(ic, dio)))
return DM_ENDIO_INCOMPLETE;
- }
- kunmap_local(mem);
- pos += ic->tuple_size;
- bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
+ } else {
+ INIT_WORK(&dio->work, dm_integrity_inline_async_check);
+ queue_work(ic->offload_wq, &dio->work);
+ return DM_ENDIO_INCOMPLETE;
}
}
skip_check:
@@ -2646,6 +2769,8 @@ skip_check:
if (unlikely(dio->integrity_range_locked))
remove_range(ic, &dio->range);
}
+ if (unlikely(dio->ahash_req))
+ mempool_free(dio->ahash_req, &ic->ahash_req_pool);
return DM_ENDIO_DONE;
}
@@ -2902,9 +3027,12 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start
#endif
ic->internal_hash) {
char test_tag[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
+ struct journal_sector *js = access_journal_data(ic, i, l);
+ void *js_page = integrity_identity(ic, (char *)js - offset_in_page(js));
+ unsigned js_offset = offset_in_page(js);
- integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
- (char *)access_journal_data(ic, i, l), test_tag);
+ integrity_sector_checksum(ic, &ic->journal_ahash_req, sec + ((l - j) << ic->sb->log2_sectors_per_block),
+ js_page, js_offset, test_tag);
if (unlikely(crypto_memneq(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0);
@@ -2987,6 +3115,7 @@ static void integrity_recalc(struct work_struct *w)
size_t recalc_tags_size;
u8 *recalc_buffer = NULL;
u8 *recalc_tags = NULL;
+ struct ahash_request *ahash_req = NULL;
struct dm_integrity_range range;
struct dm_io_request io_req;
struct dm_io_region io_loc;
@@ -3001,7 +3130,7 @@ static void integrity_recalc(struct work_struct *w)
unsigned recalc_sectors = RECALC_SECTORS;
retry:
- recalc_buffer = __vmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO);
+ recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN);
if (!recalc_buffer) {
oom:
recalc_sectors >>= 1;
@@ -3011,11 +3140,11 @@ oom:
goto free_ret;
}
recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
- if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
- recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
+ if (ic->internal_hash_digestsize > ic->tag_size)
+ recalc_tags_size += ic->internal_hash_digestsize - ic->tag_size;
recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO);
if (!recalc_tags) {
- vfree(recalc_buffer);
+ kfree(recalc_buffer);
recalc_buffer = NULL;
goto oom;
}
@@ -3081,7 +3210,7 @@ next_chunk:
goto err;
io_req.bi_opf = REQ_OP_READ;
- io_req.mem.type = DM_IO_VMA;
+ io_req.mem.type = DM_IO_KMEM;
io_req.mem.ptr.addr = recalc_buffer;
io_req.notify.fn = NULL;
io_req.client = ic->io;
@@ -3097,7 +3226,10 @@ next_chunk:
t = recalc_tags;
for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
- integrity_sector_checksum(ic, logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t);
+ void *ptr = recalc_buffer + (i << SECTOR_SHIFT);
+ void *ptr_page = integrity_identity(ic, (char *)ptr - offset_in_page(ptr));
+ unsigned ptr_offset = offset_in_page(ptr);
+ integrity_sector_checksum(ic, &ahash_req, logical_sector + i, ptr_page, ptr_offset, t);
t += ic->tag_size;
}
@@ -3139,8 +3271,9 @@ unlock_ret:
recalc_write_super(ic);
free_ret:
- vfree(recalc_buffer);
+ kfree(recalc_buffer);
kvfree(recalc_tags);
+ mempool_free(ahash_req, &ic->ahash_req_pool);
}
static void integrity_recalc_inline(struct work_struct *w)
@@ -3149,6 +3282,7 @@ static void integrity_recalc_inline(struct work_struct *w)
size_t recalc_tags_size;
u8 *recalc_buffer = NULL;
u8 *recalc_tags = NULL;
+ struct ahash_request *ahash_req = NULL;
struct dm_integrity_range range;
struct bio *bio;
struct bio_integrity_payload *bip;
@@ -3171,8 +3305,8 @@ oom:
}
recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tuple_size;
- if (crypto_shash_digestsize(ic->internal_hash) > ic->tuple_size)
- recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tuple_size;
+ if (ic->internal_hash_digestsize > ic->tuple_size)
+ recalc_tags_size += ic->internal_hash_digestsize - ic->tuple_size;
recalc_tags = kmalloc(recalc_tags_size, GFP_NOIO | __GFP_NOWARN);
if (!recalc_tags) {
kfree(recalc_buffer);
@@ -3217,8 +3351,11 @@ next_chunk:
t = recalc_tags;
for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
+ void *ptr = recalc_buffer + (i << SECTOR_SHIFT);
+ void *ptr_page = integrity_identity(ic, (char *)ptr - offset_in_page(ptr));
+ unsigned ptr_offset = offset_in_page(ptr);
memset(t, 0, ic->tuple_size);
- integrity_sector_checksum(ic, range.logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t);
+ integrity_sector_checksum(ic, &ahash_req, range.logical_sector + i, ptr_page, ptr_offset, t);
t += ic->tuple_size;
}
@@ -3270,6 +3407,7 @@ unlock_ret:
free_ret:
kfree(recalc_buffer);
kfree(recalc_tags);
+ mempool_free(ahash_req, &ic->ahash_req_pool);
}
static void bitmap_block_work(struct work_struct *w)
@@ -4210,30 +4348,53 @@ nomem:
return -ENOMEM;
}
-static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
- char *error_alg, char *error_key)
+static int get_mac(struct crypto_shash **shash, struct crypto_ahash **ahash,
+ struct alg_spec *a, char **error, char *error_alg, char *error_key)
{
int r;
if (a->alg_string) {
- *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
- if (IS_ERR(*hash)) {
- *error = error_alg;
- r = PTR_ERR(*hash);
- *hash = NULL;
- return r;
- }
-
- if (a->key) {
- r = crypto_shash_setkey(*hash, a->key, a->key_size);
- if (r) {
+ if (shash) {
+ *shash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
+ if (IS_ERR(*shash)) {
+ *shash = NULL;
+ goto try_ahash;
+ }
+ if (a->key) {
+ r = crypto_shash_setkey(*shash, a->key, a->key_size);
+ if (r) {
+ *error = error_key;
+ return r;
+ }
+ } else if (crypto_shash_get_flags(*shash) & CRYPTO_TFM_NEED_KEY) {
*error = error_key;
+ return -ENOKEY;
+ }
+ return 0;
+ }
+try_ahash:
+ if (ahash) {
+ *ahash = crypto_alloc_ahash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
+ if (IS_ERR(*ahash)) {
+ *error = error_alg;
+ r = PTR_ERR(*ahash);
+ *ahash = NULL;
return r;
}
- } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) {
- *error = error_key;
- return -ENOKEY;
+ if (a->key) {
+ r = crypto_ahash_setkey(*ahash, a->key, a->key_size);
+ if (r) {
+ *error = error_key;
+ return r;
+ }
+ } else if (crypto_ahash_get_flags(*ahash) & CRYPTO_TFM_NEED_KEY) {
+ *error = error_key;
+ return -ENOKEY;
+ }
+ return 0;
}
+ *error = error_alg;
+ return -ENOENT;
}
return 0;
@@ -4690,12 +4851,26 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
buffer_sectors = 1;
ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT);
- r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error,
+ r = get_mac(&ic->internal_shash, &ic->internal_ahash, &ic->internal_hash_alg, &ti->error,
"Invalid internal hash", "Error setting internal hash key");
if (r)
goto bad;
+ if (ic->internal_shash) {
+ ic->internal_hash = true;
+ ic->internal_hash_digestsize = crypto_shash_digestsize(ic->internal_shash);
+ }
+ if (ic->internal_ahash) {
+ ic->internal_hash = true;
+ ic->internal_hash_digestsize = crypto_ahash_digestsize(ic->internal_ahash);
+ r = mempool_init_kmalloc_pool(&ic->ahash_req_pool, AHASH_MEMPOOL,
+ sizeof(struct ahash_request) + crypto_ahash_reqsize(ic->internal_ahash));
+ if (r) {
+ ti->error = "Cannot allocate mempool";
+ goto bad;
+ }
+ }
- r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error,
+ r = get_mac(&ic->journal_mac, NULL, &ic->journal_mac_alg, &ti->error,
"Invalid journal mac", "Error setting journal mac key");
if (r)
goto bad;
@@ -4706,7 +4881,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
r = -EINVAL;
goto bad;
}
- ic->tag_size = crypto_shash_digestsize(ic->internal_hash);
+ ic->tag_size = ic->internal_hash_digestsize;
}
if (ic->tag_size > MAX_TAG_SIZE) {
ti->error = "Too big tag size";
@@ -5178,6 +5353,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
kvfree(ic->bbs);
if (ic->bufio)
dm_bufio_client_destroy(ic->bufio);
+ mempool_free(ic->journal_ahash_req, &ic->ahash_req_pool);
+ mempool_exit(&ic->ahash_req_pool);
bioset_exit(&ic->recalc_bios);
bioset_exit(&ic->recheck_bios);
mempool_exit(&ic->recheck_pool);
@@ -5215,8 +5392,10 @@ static void dm_integrity_dtr(struct dm_target *ti)
if (ic->sb)
free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT);
- if (ic->internal_hash)
- crypto_free_shash(ic->internal_hash);
+ if (ic->internal_shash)
+ crypto_free_shash(ic->internal_shash);
+ if (ic->internal_ahash)
+ crypto_free_ahash(ic->internal_ahash);
free_alg(&ic->internal_hash_alg);
if (ic->journal_crypt)
@@ -5233,7 +5412,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 13, 0},
+ .version = {1, 14, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 679b07dee229..7bb7174f8f4f 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -414,7 +414,7 @@ static int log_super(struct log_writes_c *lc)
}
/*
- * Super sector should be writen in-order, otherwise the
+ * Super sector should be written in-order, otherwise the
* nr_entries could be rewritten incorrectly by an old bio.
*/
wait_for_completion_io(&lc->super_done);
diff --git a/drivers/md/dm-pcache/Kconfig b/drivers/md/dm-pcache/Kconfig
new file mode 100644
index 000000000000..0e251eca892e
--- /dev/null
+++ b/drivers/md/dm-pcache/Kconfig
@@ -0,0 +1,17 @@
+config DM_PCACHE
+ tristate "Persistent cache for Block Device (Experimental)"
+ depends on BLK_DEV_DM
+ depends on DEV_DAX
+ help
+ PCACHE provides a mechanism to use persistent memory (e.g., CXL persistent memory,
+ DAX-enabled devices) as a high-performance cache layer in front of
+ traditional block devices such as SSDs or HDDs.
+
+ PCACHE is implemented as a kernel module that integrates with the block
+ layer and supports direct access (DAX) to persistent memory for low-latency,
+ byte-addressable caching.
+
+ Note: This feature is experimental and should be tested thoroughly
+ before use in production environments.
+
+ If unsure, say 'N'.
diff --git a/drivers/md/dm-pcache/Makefile b/drivers/md/dm-pcache/Makefile
new file mode 100644
index 000000000000..86776e4acad2
--- /dev/null
+++ b/drivers/md/dm-pcache/Makefile
@@ -0,0 +1,3 @@
+dm-pcache-y := dm_pcache.o cache_dev.o segment.o backing_dev.o cache.o cache_gc.o cache_writeback.o cache_segment.o cache_key.o cache_req.o
+
+obj-m += dm-pcache.o
diff --git a/drivers/md/dm-pcache/backing_dev.c b/drivers/md/dm-pcache/backing_dev.c
new file mode 100644
index 000000000000..7165fc0364bb
--- /dev/null
+++ b/drivers/md/dm-pcache/backing_dev.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/blkdev.h>
+
+#include "../dm-core.h"
+#include "pcache_internal.h"
+#include "cache_dev.h"
+#include "backing_dev.h"
+#include "cache.h"
+#include "dm_pcache.h"
+
+static struct kmem_cache *backing_req_cache;
+static struct kmem_cache *backing_bvec_cache;
+
+static void backing_dev_exit(struct pcache_backing_dev *backing_dev)
+{
+ mempool_exit(&backing_dev->req_pool);
+ mempool_exit(&backing_dev->bvec_pool);
+}
+
+static void req_submit_fn(struct work_struct *work);
+static void req_complete_fn(struct work_struct *work);
+static int backing_dev_init(struct dm_pcache *pcache)
+{
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+ int ret;
+
+ ret = mempool_init_slab_pool(&backing_dev->req_pool, 128, backing_req_cache);
+ if (ret)
+ goto err;
+
+ ret = mempool_init_slab_pool(&backing_dev->bvec_pool, 128, backing_bvec_cache);
+ if (ret)
+ goto req_pool_exit;
+
+ INIT_LIST_HEAD(&backing_dev->submit_list);
+ INIT_LIST_HEAD(&backing_dev->complete_list);
+ spin_lock_init(&backing_dev->submit_lock);
+ spin_lock_init(&backing_dev->complete_lock);
+ INIT_WORK(&backing_dev->req_submit_work, req_submit_fn);
+ INIT_WORK(&backing_dev->req_complete_work, req_complete_fn);
+ atomic_set(&backing_dev->inflight_reqs, 0);
+ init_waitqueue_head(&backing_dev->inflight_wq);
+
+ return 0;
+
+req_pool_exit:
+ mempool_exit(&backing_dev->req_pool);
+err:
+ return ret;
+}
+
+int backing_dev_start(struct dm_pcache *pcache)
+{
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+ int ret;
+
+ ret = backing_dev_init(pcache);
+ if (ret)
+ return ret;
+
+ backing_dev->dev_size = bdev_nr_sectors(backing_dev->dm_dev->bdev);
+
+ return 0;
+}
+
+void backing_dev_stop(struct dm_pcache *pcache)
+{
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+
+ /*
+ * There should not be any new request comming, just wait
+ * inflight requests done.
+ */
+ wait_event(backing_dev->inflight_wq,
+ atomic_read(&backing_dev->inflight_reqs) == 0);
+
+ flush_work(&backing_dev->req_submit_work);
+ flush_work(&backing_dev->req_complete_work);
+
+ backing_dev_exit(backing_dev);
+}
+
+/* pcache_backing_dev_req functions */
+void backing_dev_req_end(struct pcache_backing_dev_req *backing_req)
+{
+ struct pcache_backing_dev *backing_dev = backing_req->backing_dev;
+
+ if (backing_req->end_req)
+ backing_req->end_req(backing_req, backing_req->ret);
+
+ switch (backing_req->type) {
+ case BACKING_DEV_REQ_TYPE_REQ:
+ if (backing_req->req.upper_req)
+ pcache_req_put(backing_req->req.upper_req, backing_req->ret);
+ break;
+ case BACKING_DEV_REQ_TYPE_KMEM:
+ if (backing_req->kmem.bvecs != backing_req->kmem.inline_bvecs)
+ mempool_free(backing_req->kmem.bvecs, &backing_dev->bvec_pool);
+ break;
+ default:
+ BUG();
+ }
+
+ mempool_free(backing_req, &backing_dev->req_pool);
+
+ if (atomic_dec_and_test(&backing_dev->inflight_reqs))
+ wake_up(&backing_dev->inflight_wq);
+}
+
+static void req_complete_fn(struct work_struct *work)
+{
+ struct pcache_backing_dev *backing_dev = container_of(work, struct pcache_backing_dev, req_complete_work);
+ struct pcache_backing_dev_req *backing_req;
+ LIST_HEAD(tmp_list);
+
+ spin_lock_irq(&backing_dev->complete_lock);
+ list_splice_init(&backing_dev->complete_list, &tmp_list);
+ spin_unlock_irq(&backing_dev->complete_lock);
+
+ while (!list_empty(&tmp_list)) {
+ backing_req = list_first_entry(&tmp_list,
+ struct pcache_backing_dev_req, node);
+ list_del_init(&backing_req->node);
+ backing_dev_req_end(backing_req);
+ }
+}
+
+static void backing_dev_bio_end(struct bio *bio)
+{
+ struct pcache_backing_dev_req *backing_req = bio->bi_private;
+ struct pcache_backing_dev *backing_dev = backing_req->backing_dev;
+ unsigned long flags;
+
+ backing_req->ret = blk_status_to_errno(bio->bi_status);
+
+ spin_lock_irqsave(&backing_dev->complete_lock, flags);
+ list_move_tail(&backing_req->node, &backing_dev->complete_list);
+ queue_work(BACKING_DEV_TO_PCACHE(backing_dev)->task_wq, &backing_dev->req_complete_work);
+ spin_unlock_irqrestore(&backing_dev->complete_lock, flags);
+}
+
+static void req_submit_fn(struct work_struct *work)
+{
+ struct pcache_backing_dev *backing_dev = container_of(work, struct pcache_backing_dev, req_submit_work);
+ struct pcache_backing_dev_req *backing_req;
+ LIST_HEAD(tmp_list);
+
+ spin_lock(&backing_dev->submit_lock);
+ list_splice_init(&backing_dev->submit_list, &tmp_list);
+ spin_unlock(&backing_dev->submit_lock);
+
+ while (!list_empty(&tmp_list)) {
+ backing_req = list_first_entry(&tmp_list,
+ struct pcache_backing_dev_req, node);
+ list_del_init(&backing_req->node);
+ submit_bio_noacct(&backing_req->bio);
+ }
+}
+
+void backing_dev_req_submit(struct pcache_backing_dev_req *backing_req, bool direct)
+{
+ struct pcache_backing_dev *backing_dev = backing_req->backing_dev;
+
+ if (direct) {
+ submit_bio_noacct(&backing_req->bio);
+ return;
+ }
+
+ spin_lock(&backing_dev->submit_lock);
+ list_add_tail(&backing_req->node, &backing_dev->submit_list);
+ queue_work(BACKING_DEV_TO_PCACHE(backing_dev)->task_wq, &backing_dev->req_submit_work);
+ spin_unlock(&backing_dev->submit_lock);
+}
+
+static void bio_map(struct bio *bio, void *base, size_t size)
+{
+ struct page *page;
+ unsigned int offset;
+ unsigned int len;
+
+ if (!is_vmalloc_addr(base)) {
+ page = virt_to_page(base);
+ offset = offset_in_page(base);
+
+ BUG_ON(!bio_add_page(bio, page, size, offset));
+ return;
+ }
+
+ flush_kernel_vmap_range(base, size);
+ while (size) {
+ page = vmalloc_to_page(base);
+ offset = offset_in_page(base);
+ len = min_t(size_t, PAGE_SIZE - offset, size);
+
+ BUG_ON(!bio_add_page(bio, page, len, offset));
+ size -= len;
+ base += len;
+ }
+}
+
+static struct pcache_backing_dev_req *req_type_req_alloc(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ struct pcache_request *pcache_req = opts->req.upper_req;
+ struct pcache_backing_dev_req *backing_req;
+ struct bio *orig = pcache_req->bio;
+
+ backing_req = mempool_alloc(&backing_dev->req_pool, opts->gfp_mask);
+ if (!backing_req)
+ return NULL;
+
+ memset(backing_req, 0, sizeof(struct pcache_backing_dev_req));
+
+ bio_init_clone(backing_dev->dm_dev->bdev, &backing_req->bio, orig, opts->gfp_mask);
+
+ backing_req->type = BACKING_DEV_REQ_TYPE_REQ;
+ backing_req->backing_dev = backing_dev;
+ atomic_inc(&backing_dev->inflight_reqs);
+
+ return backing_req;
+}
+
+static struct pcache_backing_dev_req *kmem_type_req_alloc(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ struct pcache_backing_dev_req *backing_req;
+ u32 n_vecs = bio_add_max_vecs(opts->kmem.data, opts->kmem.len);
+
+ backing_req = mempool_alloc(&backing_dev->req_pool, opts->gfp_mask);
+ if (!backing_req)
+ return NULL;
+
+ memset(backing_req, 0, sizeof(struct pcache_backing_dev_req));
+
+ if (n_vecs > BACKING_DEV_REQ_INLINE_BVECS) {
+ backing_req->kmem.bvecs = mempool_alloc(&backing_dev->bvec_pool, opts->gfp_mask);
+ if (!backing_req->kmem.bvecs)
+ goto free_backing_req;
+ } else {
+ backing_req->kmem.bvecs = backing_req->kmem.inline_bvecs;
+ }
+
+ backing_req->kmem.n_vecs = n_vecs;
+ backing_req->type = BACKING_DEV_REQ_TYPE_KMEM;
+ backing_req->backing_dev = backing_dev;
+ atomic_inc(&backing_dev->inflight_reqs);
+
+ return backing_req;
+
+free_backing_req:
+ mempool_free(backing_req, &backing_dev->req_pool);
+ return NULL;
+}
+
+struct pcache_backing_dev_req *backing_dev_req_alloc(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ if (opts->type == BACKING_DEV_REQ_TYPE_REQ)
+ return req_type_req_alloc(backing_dev, opts);
+
+ if (opts->type == BACKING_DEV_REQ_TYPE_KMEM)
+ return kmem_type_req_alloc(backing_dev, opts);
+
+ BUG();
+}
+
+static void req_type_req_init(struct pcache_backing_dev_req *backing_req,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ struct pcache_request *pcache_req = opts->req.upper_req;
+ struct bio *clone;
+ u32 off = opts->req.req_off;
+ u32 len = opts->req.len;
+
+ clone = &backing_req->bio;
+ BUG_ON(off & SECTOR_MASK);
+ BUG_ON(len & SECTOR_MASK);
+ bio_trim(clone, off >> SECTOR_SHIFT, len >> SECTOR_SHIFT);
+
+ clone->bi_iter.bi_sector = (pcache_req->off + off) >> SECTOR_SHIFT;
+ clone->bi_private = backing_req;
+ clone->bi_end_io = backing_dev_bio_end;
+
+ INIT_LIST_HEAD(&backing_req->node);
+ backing_req->end_req = opts->end_fn;
+
+ pcache_req_get(pcache_req);
+ backing_req->req.upper_req = pcache_req;
+ backing_req->req.bio_off = off;
+}
+
+static void kmem_type_req_init(struct pcache_backing_dev_req *backing_req,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ struct pcache_backing_dev *backing_dev = backing_req->backing_dev;
+ struct bio *backing_bio;
+
+ bio_init(&backing_req->bio, backing_dev->dm_dev->bdev, backing_req->kmem.bvecs,
+ backing_req->kmem.n_vecs, opts->kmem.opf);
+
+ backing_bio = &backing_req->bio;
+ bio_map(backing_bio, opts->kmem.data, opts->kmem.len);
+
+ backing_bio->bi_iter.bi_sector = (opts->kmem.backing_off) >> SECTOR_SHIFT;
+ backing_bio->bi_private = backing_req;
+ backing_bio->bi_end_io = backing_dev_bio_end;
+
+ INIT_LIST_HEAD(&backing_req->node);
+ backing_req->end_req = opts->end_fn;
+ backing_req->priv_data = opts->priv_data;
+}
+
+void backing_dev_req_init(struct pcache_backing_dev_req *backing_req,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ if (opts->type == BACKING_DEV_REQ_TYPE_REQ)
+ return req_type_req_init(backing_req, opts);
+
+ if (opts->type == BACKING_DEV_REQ_TYPE_KMEM)
+ return kmem_type_req_init(backing_req, opts);
+
+ BUG();
+}
+
+struct pcache_backing_dev_req *backing_dev_req_create(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts)
+{
+ struct pcache_backing_dev_req *backing_req;
+
+ backing_req = backing_dev_req_alloc(backing_dev, opts);
+ if (!backing_req)
+ return NULL;
+
+ backing_dev_req_init(backing_req, opts);
+
+ return backing_req;
+}
+
+void backing_dev_flush(struct pcache_backing_dev *backing_dev)
+{
+ blkdev_issue_flush(backing_dev->dm_dev->bdev);
+}
+
+int pcache_backing_init(void)
+{
+ u32 max_bvecs = (PCACHE_CACHE_SUBTREE_SIZE >> PAGE_SHIFT) + 1;
+ int ret;
+
+ backing_req_cache = KMEM_CACHE(pcache_backing_dev_req, 0);
+ if (!backing_req_cache) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ backing_bvec_cache = kmem_cache_create("pcache-bvec-slab",
+ max_bvecs * sizeof(struct bio_vec),
+ 0, 0, NULL);
+ if (!backing_bvec_cache) {
+ ret = -ENOMEM;
+ goto destroy_req_cache;
+ }
+
+ return 0;
+destroy_req_cache:
+ kmem_cache_destroy(backing_req_cache);
+err:
+ return ret;
+}
+
+void pcache_backing_exit(void)
+{
+ kmem_cache_destroy(backing_bvec_cache);
+ kmem_cache_destroy(backing_req_cache);
+}
diff --git a/drivers/md/dm-pcache/backing_dev.h b/drivers/md/dm-pcache/backing_dev.h
new file mode 100644
index 000000000000..b371cba483b9
--- /dev/null
+++ b/drivers/md/dm-pcache/backing_dev.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _BACKING_DEV_H
+#define _BACKING_DEV_H
+
+#include <linux/device-mapper.h>
+
+#include "pcache_internal.h"
+
+struct pcache_backing_dev_req;
+typedef void (*backing_req_end_fn_t)(struct pcache_backing_dev_req *backing_req, int ret);
+
+#define BACKING_DEV_REQ_TYPE_REQ 1
+#define BACKING_DEV_REQ_TYPE_KMEM 2
+
+#define BACKING_DEV_REQ_INLINE_BVECS 4
+
+struct pcache_request;
+struct pcache_backing_dev_req {
+ u8 type;
+ struct bio bio;
+ struct pcache_backing_dev *backing_dev;
+
+ void *priv_data;
+ backing_req_end_fn_t end_req;
+
+ struct list_head node;
+ int ret;
+
+ union {
+ struct {
+ struct pcache_request *upper_req;
+ u32 bio_off;
+ } req;
+ struct {
+ struct bio_vec inline_bvecs[BACKING_DEV_REQ_INLINE_BVECS];
+ struct bio_vec *bvecs;
+ u32 n_vecs;
+ } kmem;
+ };
+};
+
+struct pcache_backing_dev {
+ struct pcache_cache *cache;
+
+ struct dm_dev *dm_dev;
+ mempool_t req_pool;
+ mempool_t bvec_pool;
+
+ struct list_head submit_list;
+ spinlock_t submit_lock;
+ struct work_struct req_submit_work;
+
+ struct list_head complete_list;
+ spinlock_t complete_lock;
+ struct work_struct req_complete_work;
+
+ atomic_t inflight_reqs;
+ wait_queue_head_t inflight_wq;
+
+ u64 dev_size;
+};
+
+struct dm_pcache;
+int backing_dev_start(struct dm_pcache *pcache);
+void backing_dev_stop(struct dm_pcache *pcache);
+
+struct pcache_backing_dev_req_opts {
+ u32 type;
+ union {
+ struct {
+ struct pcache_request *upper_req;
+ u32 req_off;
+ u32 len;
+ } req;
+ struct {
+ void *data;
+ blk_opf_t opf;
+ u32 len;
+ u64 backing_off;
+ } kmem;
+ };
+
+ gfp_t gfp_mask;
+ backing_req_end_fn_t end_fn;
+ void *priv_data;
+};
+
+static inline u32 backing_dev_req_coalesced_max_len(const void *data, u32 len)
+{
+ const void *p = data;
+ u32 done = 0, in_page, to_advance;
+ struct page *first_page, *next_page;
+
+ if (!is_vmalloc_addr(data))
+ return len;
+
+ first_page = vmalloc_to_page(p);
+advance:
+ in_page = PAGE_SIZE - offset_in_page(p);
+ to_advance = min_t(u32, in_page, len - done);
+
+ done += to_advance;
+ p += to_advance;
+
+ if (done == len)
+ return done;
+
+ next_page = vmalloc_to_page(p);
+ if (zone_device_pages_have_same_pgmap(first_page, next_page))
+ goto advance;
+
+ return done;
+}
+
+void backing_dev_req_submit(struct pcache_backing_dev_req *backing_req, bool direct);
+void backing_dev_req_end(struct pcache_backing_dev_req *backing_req);
+struct pcache_backing_dev_req *backing_dev_req_create(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts);
+struct pcache_backing_dev_req *backing_dev_req_alloc(struct pcache_backing_dev *backing_dev,
+ struct pcache_backing_dev_req_opts *opts);
+void backing_dev_req_init(struct pcache_backing_dev_req *backing_req,
+ struct pcache_backing_dev_req_opts *opts);
+void backing_dev_flush(struct pcache_backing_dev *backing_dev);
+
+int pcache_backing_init(void);
+void pcache_backing_exit(void);
+#endif /* _BACKING_DEV_H */
diff --git a/drivers/md/dm-pcache/cache.c b/drivers/md/dm-pcache/cache.c
new file mode 100644
index 000000000000..d8e92367d947
--- /dev/null
+++ b/drivers/md/dm-pcache/cache.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/blk_types.h>
+
+#include "cache.h"
+#include "cache_dev.h"
+#include "backing_dev.h"
+#include "dm_pcache.h"
+
+struct kmem_cache *key_cache;
+
+static inline struct pcache_cache_info *get_cache_info_addr(struct pcache_cache *cache)
+{
+ return cache->cache_info_addr + cache->info_index;
+}
+
+static void cache_info_write(struct pcache_cache *cache)
+{
+ struct pcache_cache_info *cache_info = &cache->cache_info;
+
+ cache_info->header.seq++;
+ cache_info->header.crc = pcache_meta_crc(&cache_info->header,
+ sizeof(struct pcache_cache_info));
+
+ memcpy_flushcache(get_cache_info_addr(cache), cache_info,
+ sizeof(struct pcache_cache_info));
+
+ cache->info_index = (cache->info_index + 1) % PCACHE_META_INDEX_MAX;
+}
+
+static void cache_info_init_default(struct pcache_cache *cache);
+static int cache_info_init(struct pcache_cache *cache, struct pcache_cache_options *opts)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_info *cache_info_addr;
+
+ cache_info_addr = pcache_meta_find_latest(&cache->cache_info_addr->header,
+ sizeof(struct pcache_cache_info),
+ PCACHE_CACHE_INFO_SIZE,
+ &cache->cache_info);
+ if (IS_ERR(cache_info_addr))
+ return PTR_ERR(cache_info_addr);
+
+ if (cache_info_addr) {
+ if (opts->data_crc !=
+ (cache->cache_info.flags & PCACHE_CACHE_FLAGS_DATA_CRC)) {
+ pcache_dev_err(pcache, "invalid option for data_crc: %s, expected: %s",
+ opts->data_crc ? "true" : "false",
+ cache->cache_info.flags & PCACHE_CACHE_FLAGS_DATA_CRC ? "true" : "false");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ /* init cache_info for new cache */
+ cache_info_init_default(cache);
+ cache_mode_set(cache, opts->cache_mode);
+ if (opts->data_crc)
+ cache->cache_info.flags |= PCACHE_CACHE_FLAGS_DATA_CRC;
+
+ return 0;
+}
+
+static void cache_info_set_gc_percent(struct pcache_cache_info *cache_info, u8 percent)
+{
+ cache_info->flags &= ~PCACHE_CACHE_FLAGS_GC_PERCENT_MASK;
+ cache_info->flags |= FIELD_PREP(PCACHE_CACHE_FLAGS_GC_PERCENT_MASK, percent);
+}
+
+int pcache_cache_set_gc_percent(struct pcache_cache *cache, u8 percent)
+{
+ if (percent > PCACHE_CACHE_GC_PERCENT_MAX || percent < PCACHE_CACHE_GC_PERCENT_MIN)
+ return -EINVAL;
+
+ mutex_lock(&cache->cache_info_lock);
+ cache_info_set_gc_percent(&cache->cache_info, percent);
+
+ cache_info_write(cache);
+ mutex_unlock(&cache->cache_info_lock);
+
+ return 0;
+}
+
+void cache_pos_encode(struct pcache_cache *cache,
+ struct pcache_cache_pos_onmedia *pos_onmedia_base,
+ struct pcache_cache_pos *pos, u64 seq, u32 *index)
+{
+ struct pcache_cache_pos_onmedia pos_onmedia;
+ struct pcache_cache_pos_onmedia *pos_onmedia_addr = pos_onmedia_base + *index;
+
+ pos_onmedia.cache_seg_id = pos->cache_seg->cache_seg_id;
+ pos_onmedia.seg_off = pos->seg_off;
+ pos_onmedia.header.seq = seq;
+ pos_onmedia.header.crc = cache_pos_onmedia_crc(&pos_onmedia);
+
+ memcpy_flushcache(pos_onmedia_addr, &pos_onmedia, sizeof(struct pcache_cache_pos_onmedia));
+ pmem_wmb();
+
+ *index = (*index + 1) % PCACHE_META_INDEX_MAX;
+}
+
+int cache_pos_decode(struct pcache_cache *cache,
+ struct pcache_cache_pos_onmedia *pos_onmedia,
+ struct pcache_cache_pos *pos, u64 *seq, u32 *index)
+{
+ struct pcache_cache_pos_onmedia latest, *latest_addr;
+
+ latest_addr = pcache_meta_find_latest(&pos_onmedia->header,
+ sizeof(struct pcache_cache_pos_onmedia),
+ sizeof(struct pcache_cache_pos_onmedia),
+ &latest);
+ if (IS_ERR(latest_addr))
+ return PTR_ERR(latest_addr);
+
+ if (!latest_addr)
+ return -EIO;
+
+ pos->cache_seg = &cache->segments[latest.cache_seg_id];
+ pos->seg_off = latest.seg_off;
+ *seq = latest.header.seq;
+ *index = (latest_addr - pos_onmedia);
+
+ return 0;
+}
+
+static inline void cache_info_set_seg_id(struct pcache_cache *cache, u32 seg_id)
+{
+ cache->cache_info.seg_id = seg_id;
+}
+
+static int cache_init(struct dm_pcache *pcache)
+{
+ struct pcache_cache *cache = &pcache->cache;
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+ struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
+ int ret;
+
+ cache->segments = kvcalloc(cache_dev->seg_num, sizeof(struct pcache_cache_segment), GFP_KERNEL);
+ if (!cache->segments) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ cache->seg_map = kvcalloc(BITS_TO_LONGS(cache_dev->seg_num), sizeof(unsigned long), GFP_KERNEL);
+ if (!cache->seg_map) {
+ ret = -ENOMEM;
+ goto free_segments;
+ }
+
+ cache->backing_dev = backing_dev;
+ cache->cache_dev = &pcache->cache_dev;
+ cache->n_segs = cache_dev->seg_num;
+ atomic_set(&cache->gc_errors, 0);
+ spin_lock_init(&cache->seg_map_lock);
+ spin_lock_init(&cache->key_head_lock);
+
+ mutex_init(&cache->cache_info_lock);
+ mutex_init(&cache->key_tail_lock);
+ mutex_init(&cache->dirty_tail_lock);
+ mutex_init(&cache->writeback_lock);
+
+ INIT_DELAYED_WORK(&cache->writeback_work, cache_writeback_fn);
+ INIT_DELAYED_WORK(&cache->gc_work, pcache_cache_gc_fn);
+ INIT_WORK(&cache->clean_work, clean_fn);
+
+ return 0;
+
+free_segments:
+ kvfree(cache->segments);
+err:
+ return ret;
+}
+
+static void cache_exit(struct pcache_cache *cache)
+{
+ kvfree(cache->seg_map);
+ kvfree(cache->segments);
+}
+
+static void cache_info_init_default(struct pcache_cache *cache)
+{
+ struct pcache_cache_info *cache_info = &cache->cache_info;
+
+ cache_info->header.seq = 0;
+ cache_info->n_segs = cache->cache_dev->seg_num;
+ cache_info_set_gc_percent(cache_info, PCACHE_CACHE_GC_PERCENT_DEFAULT);
+}
+
+static int cache_tail_init(struct pcache_cache *cache)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ bool new_cache = !(cache->cache_info.flags & PCACHE_CACHE_FLAGS_INIT_DONE);
+
+ if (new_cache) {
+ __set_bit(0, cache->seg_map);
+
+ cache->key_head.cache_seg = &cache->segments[0];
+ cache->key_head.seg_off = 0;
+ cache_pos_copy(&cache->key_tail, &cache->key_head);
+ cache_pos_copy(&cache->dirty_tail, &cache->key_head);
+
+ cache_encode_dirty_tail(cache);
+ cache_encode_key_tail(cache);
+ } else {
+ if (cache_decode_key_tail(cache) || cache_decode_dirty_tail(cache)) {
+ pcache_dev_err(pcache, "Corrupted key tail or dirty tail.\n");
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int get_seg_id(struct pcache_cache *cache,
+ struct pcache_cache_segment *prev_cache_seg,
+ bool new_cache, u32 *seg_id)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_dev *cache_dev = cache->cache_dev;
+ int ret;
+
+ if (new_cache) {
+ ret = cache_dev_get_empty_segment_id(cache_dev, seg_id);
+ if (ret) {
+ pcache_dev_err(pcache, "no available segment\n");
+ goto err;
+ }
+
+ if (prev_cache_seg)
+ cache_seg_set_next_seg(prev_cache_seg, *seg_id);
+ else
+ cache_info_set_seg_id(cache, *seg_id);
+ } else {
+ if (prev_cache_seg) {
+ struct pcache_segment_info *prev_seg_info;
+
+ prev_seg_info = &prev_cache_seg->cache_seg_info;
+ if (!segment_info_has_next(prev_seg_info)) {
+ ret = -EFAULT;
+ goto err;
+ }
+ *seg_id = prev_cache_seg->cache_seg_info.next_seg;
+ } else {
+ *seg_id = cache->cache_info.seg_id;
+ }
+ }
+ return 0;
+err:
+ return ret;
+}
+
+static int cache_segs_init(struct pcache_cache *cache)
+{
+ struct pcache_cache_segment *prev_cache_seg = NULL;
+ struct pcache_cache_info *cache_info = &cache->cache_info;
+ bool new_cache = !(cache->cache_info.flags & PCACHE_CACHE_FLAGS_INIT_DONE);
+ u32 seg_id;
+ int ret;
+ u32 i;
+
+ for (i = 0; i < cache_info->n_segs; i++) {
+ ret = get_seg_id(cache, prev_cache_seg, new_cache, &seg_id);
+ if (ret)
+ goto err;
+
+ ret = cache_seg_init(cache, seg_id, i, new_cache);
+ if (ret)
+ goto err;
+
+ prev_cache_seg = &cache->segments[i];
+ }
+ return 0;
+err:
+ return ret;
+}
+
+static int cache_init_req_keys(struct pcache_cache *cache, u32 n_paral)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ u32 n_subtrees;
+ int ret;
+ u32 i, cpu;
+
+ /* Calculate number of cache trees based on the device size */
+ n_subtrees = DIV_ROUND_UP(cache->dev_size << SECTOR_SHIFT, PCACHE_CACHE_SUBTREE_SIZE);
+ ret = cache_tree_init(cache, &cache->req_key_tree, n_subtrees);
+ if (ret)
+ goto err;
+
+ cache->n_ksets = n_paral;
+ cache->ksets = kvcalloc(cache->n_ksets, PCACHE_KSET_SIZE, GFP_KERNEL);
+ if (!cache->ksets) {
+ ret = -ENOMEM;
+ goto req_tree_exit;
+ }
+
+ /*
+ * Initialize each kset with a spinlock and delayed work for flushing.
+ * Each kset is associated with one queue to ensure independent handling
+ * of cache keys across multiple queues, maximizing multiqueue concurrency.
+ */
+ for (i = 0; i < cache->n_ksets; i++) {
+ struct pcache_cache_kset *kset = get_kset(cache, i);
+
+ kset->cache = cache;
+ spin_lock_init(&kset->kset_lock);
+ INIT_DELAYED_WORK(&kset->flush_work, kset_flush_fn);
+ }
+
+ cache->data_heads = alloc_percpu(struct pcache_cache_data_head);
+ if (!cache->data_heads) {
+ ret = -ENOMEM;
+ goto free_kset;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct pcache_cache_data_head *h =
+ per_cpu_ptr(cache->data_heads, cpu);
+ h->head_pos.cache_seg = NULL;
+ }
+
+ /*
+ * Replay persisted cache keys using cache_replay.
+ * This function loads and replays cache keys from previously stored
+ * ksets, allowing the cache to restore its state after a restart.
+ */
+ ret = cache_replay(cache);
+ if (ret) {
+ pcache_dev_err(pcache, "failed to replay keys\n");
+ goto free_heads;
+ }
+
+ return 0;
+
+free_heads:
+ free_percpu(cache->data_heads);
+free_kset:
+ kvfree(cache->ksets);
+req_tree_exit:
+ cache_tree_exit(&cache->req_key_tree);
+err:
+ return ret;
+}
+
+static void cache_destroy_req_keys(struct pcache_cache *cache)
+{
+ u32 i;
+
+ for (i = 0; i < cache->n_ksets; i++) {
+ struct pcache_cache_kset *kset = get_kset(cache, i);
+
+ cancel_delayed_work_sync(&kset->flush_work);
+ }
+
+ free_percpu(cache->data_heads);
+ kvfree(cache->ksets);
+ cache_tree_exit(&cache->req_key_tree);
+}
+
+int pcache_cache_start(struct dm_pcache *pcache)
+{
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+ struct pcache_cache *cache = &pcache->cache;
+ struct pcache_cache_options *opts = &pcache->opts;
+ int ret;
+
+ ret = cache_init(pcache);
+ if (ret)
+ return ret;
+
+ cache->cache_info_addr = CACHE_DEV_CACHE_INFO(cache->cache_dev);
+ cache->cache_ctrl = CACHE_DEV_CACHE_CTRL(cache->cache_dev);
+ backing_dev->cache = cache;
+ cache->dev_size = backing_dev->dev_size;
+
+ ret = cache_info_init(cache, opts);
+ if (ret)
+ goto cache_exit;
+
+ ret = cache_segs_init(cache);
+ if (ret)
+ goto cache_exit;
+
+ ret = cache_tail_init(cache);
+ if (ret)
+ goto cache_exit;
+
+ ret = cache_init_req_keys(cache, num_online_cpus());
+ if (ret)
+ goto cache_exit;
+
+ ret = cache_writeback_init(cache);
+ if (ret)
+ goto destroy_keys;
+
+ cache->cache_info.flags |= PCACHE_CACHE_FLAGS_INIT_DONE;
+ cache_info_write(cache);
+ queue_delayed_work(cache_get_wq(cache), &cache->gc_work, 0);
+
+ return 0;
+
+destroy_keys:
+ cache_destroy_req_keys(cache);
+cache_exit:
+ cache_exit(cache);
+
+ return ret;
+}
+
+void pcache_cache_stop(struct dm_pcache *pcache)
+{
+ struct pcache_cache *cache = &pcache->cache;
+
+ cache_flush(cache);
+
+ cancel_delayed_work_sync(&cache->gc_work);
+ flush_work(&cache->clean_work);
+ cache_writeback_exit(cache);
+
+ if (cache->req_key_tree.n_subtrees)
+ cache_destroy_req_keys(cache);
+
+ cache_exit(cache);
+}
+
+struct workqueue_struct *cache_get_wq(struct pcache_cache *cache)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+
+ return pcache->task_wq;
+}
+
+int pcache_cache_init(void)
+{
+ key_cache = KMEM_CACHE(pcache_cache_key, 0);
+ if (!key_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void pcache_cache_exit(void)
+{
+ kmem_cache_destroy(key_cache);
+}
diff --git a/drivers/md/dm-pcache/cache.h b/drivers/md/dm-pcache/cache.h
new file mode 100644
index 000000000000..1136d86958c8
--- /dev/null
+++ b/drivers/md/dm-pcache/cache.h
@@ -0,0 +1,635 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PCACHE_CACHE_H
+#define _PCACHE_CACHE_H
+
+#include "segment.h"
+
+/* Garbage collection thresholds */
+#define PCACHE_CACHE_GC_PERCENT_MIN 0 /* Minimum GC percentage */
+#define PCACHE_CACHE_GC_PERCENT_MAX 90 /* Maximum GC percentage */
+#define PCACHE_CACHE_GC_PERCENT_DEFAULT 70 /* Default GC percentage */
+
+#define PCACHE_CACHE_SUBTREE_SIZE (4 * PCACHE_MB) /* 4MB total tree size */
+#define PCACHE_CACHE_SUBTREE_SIZE_MASK 0x3FFFFF /* Mask for tree size */
+#define PCACHE_CACHE_SUBTREE_SIZE_SHIFT 22 /* Bit shift for tree size */
+
+/* Maximum number of keys per key set */
+#define PCACHE_KSET_KEYS_MAX 128
+#define PCACHE_CACHE_SEGS_MAX (1024 * 1024) /* maximum cache size for each device is 16T */
+#define PCACHE_KSET_ONMEDIA_SIZE_MAX struct_size_t(struct pcache_cache_kset_onmedia, data, PCACHE_KSET_KEYS_MAX)
+#define PCACHE_KSET_SIZE (sizeof(struct pcache_cache_kset) + sizeof(struct pcache_cache_key_onmedia) * PCACHE_KSET_KEYS_MAX)
+
+/* Maximum number of keys to clean in one round of clean_work */
+#define PCACHE_CLEAN_KEYS_MAX 10
+
+/* Writeback and garbage collection intervals in jiffies */
+#define PCACHE_CACHE_WRITEBACK_INTERVAL (5 * HZ)
+#define PCACHE_CACHE_GC_INTERVAL (5 * HZ)
+
+/* Macro to get the cache key structure from an rb_node pointer */
+#define CACHE_KEY(node) (container_of(node, struct pcache_cache_key, rb_node))
+
+struct pcache_cache_pos_onmedia {
+ struct pcache_meta_header header;
+ __u32 cache_seg_id;
+ __u32 seg_off;
+};
+
+/* Offset and size definitions for cache segment control */
+#define PCACHE_CACHE_SEG_CTRL_OFF (PCACHE_SEG_INFO_SIZE * PCACHE_META_INDEX_MAX)
+#define PCACHE_CACHE_SEG_CTRL_SIZE (4 * PCACHE_KB)
+
+struct pcache_cache_seg_gen {
+ struct pcache_meta_header header;
+ __u64 gen;
+};
+
+/* Control structure for cache segments */
+struct pcache_cache_seg_ctrl {
+ struct pcache_cache_seg_gen gen[PCACHE_META_INDEX_MAX];
+ __u64 res[64];
+};
+
+#define PCACHE_CACHE_FLAGS_DATA_CRC BIT(0)
+#define PCACHE_CACHE_FLAGS_INIT_DONE BIT(1)
+
+#define PCACHE_CACHE_FLAGS_CACHE_MODE_MASK GENMASK(5, 2)
+#define PCACHE_CACHE_MODE_WRITEBACK 0
+#define PCACHE_CACHE_MODE_WRITETHROUGH 1
+#define PCACHE_CACHE_MODE_WRITEAROUND 2
+#define PCACHE_CACHE_MODE_WRITEONLY 3
+
+#define PCACHE_CACHE_FLAGS_GC_PERCENT_MASK GENMASK(12, 6)
+
+struct pcache_cache_info {
+ struct pcache_meta_header header;
+ __u32 seg_id;
+ __u32 n_segs;
+ __u32 flags;
+ __u32 reserved;
+};
+
+struct pcache_cache_pos {
+ struct pcache_cache_segment *cache_seg;
+ u32 seg_off;
+};
+
+struct pcache_cache_segment {
+ struct pcache_cache *cache;
+ u32 cache_seg_id; /* Index in cache->segments */
+ struct pcache_segment segment;
+ atomic_t refs;
+
+ struct pcache_segment_info cache_seg_info;
+ struct mutex info_lock;
+ u32 info_index;
+
+ spinlock_t gen_lock;
+ u64 gen;
+ u64 gen_seq;
+ u32 gen_index;
+
+ struct pcache_cache_seg_ctrl *cache_seg_ctrl;
+};
+
+/* rbtree for cache entries */
+struct pcache_cache_subtree {
+ struct rb_root root;
+ spinlock_t tree_lock;
+};
+
+struct pcache_cache_tree {
+ struct pcache_cache *cache;
+ u32 n_subtrees;
+ mempool_t key_pool;
+ struct pcache_cache_subtree *subtrees;
+};
+
+extern struct kmem_cache *key_cache;
+
+struct pcache_cache_key {
+ struct pcache_cache_tree *cache_tree;
+ struct pcache_cache_subtree *cache_subtree;
+ struct kref ref;
+ struct rb_node rb_node;
+ struct list_head list_node;
+ u64 off;
+ u32 len;
+ u32 flags;
+ struct pcache_cache_pos cache_pos;
+ u64 seg_gen;
+};
+
+#define PCACHE_CACHE_KEY_FLAGS_EMPTY BIT(0)
+#define PCACHE_CACHE_KEY_FLAGS_CLEAN BIT(1)
+
+struct pcache_cache_key_onmedia {
+ __u64 off;
+ __u32 len;
+ __u32 flags;
+ __u32 cache_seg_id;
+ __u32 cache_seg_off;
+ __u64 seg_gen;
+ __u32 data_crc;
+ __u32 reserved;
+};
+
+struct pcache_cache_kset_onmedia {
+ __u32 crc;
+ union {
+ __u32 key_num;
+ __u32 next_cache_seg_id;
+ };
+ __u64 magic;
+ __u64 flags;
+ struct pcache_cache_key_onmedia data[];
+};
+
+struct pcache_cache {
+ struct pcache_backing_dev *backing_dev;
+ struct pcache_cache_dev *cache_dev;
+ struct pcache_cache_ctrl *cache_ctrl;
+ u64 dev_size;
+
+ struct pcache_cache_data_head __percpu *data_heads;
+
+ spinlock_t key_head_lock;
+ struct pcache_cache_pos key_head;
+ u32 n_ksets;
+ struct pcache_cache_kset *ksets;
+
+ struct mutex key_tail_lock;
+ struct pcache_cache_pos key_tail;
+ u64 key_tail_seq;
+ u32 key_tail_index;
+
+ struct mutex dirty_tail_lock;
+ struct pcache_cache_pos dirty_tail;
+ u64 dirty_tail_seq;
+ u32 dirty_tail_index;
+
+ struct pcache_cache_tree req_key_tree;
+ struct work_struct clean_work;
+
+ struct mutex writeback_lock;
+ char wb_kset_onmedia_buf[PCACHE_KSET_ONMEDIA_SIZE_MAX];
+ struct pcache_cache_tree writeback_key_tree;
+ struct delayed_work writeback_work;
+ struct {
+ atomic_t pending;
+ u32 advance;
+ int ret;
+ } writeback_ctx;
+
+ char gc_kset_onmedia_buf[PCACHE_KSET_ONMEDIA_SIZE_MAX];
+ struct delayed_work gc_work;
+ atomic_t gc_errors;
+
+ struct mutex cache_info_lock;
+ struct pcache_cache_info cache_info;
+ struct pcache_cache_info *cache_info_addr;
+ u32 info_index;
+
+ u32 n_segs;
+ unsigned long *seg_map;
+ u32 last_cache_seg;
+ bool cache_full;
+ spinlock_t seg_map_lock;
+ struct pcache_cache_segment *segments;
+};
+
+struct workqueue_struct *cache_get_wq(struct pcache_cache *cache);
+
+struct dm_pcache;
+struct pcache_cache_options {
+ u32 cache_mode:4;
+ u32 data_crc:1;
+};
+int pcache_cache_start(struct dm_pcache *pcache);
+void pcache_cache_stop(struct dm_pcache *pcache);
+
+struct pcache_cache_ctrl {
+ /* Updated by gc_thread */
+ struct pcache_cache_pos_onmedia key_tail_pos[PCACHE_META_INDEX_MAX];
+
+ /* Updated by writeback_thread */
+ struct pcache_cache_pos_onmedia dirty_tail_pos[PCACHE_META_INDEX_MAX];
+};
+
+struct pcache_cache_data_head {
+ struct pcache_cache_pos head_pos;
+};
+
+static inline u16 pcache_cache_get_gc_percent(struct pcache_cache *cache)
+{
+ return FIELD_GET(PCACHE_CACHE_FLAGS_GC_PERCENT_MASK, cache->cache_info.flags);
+}
+
+int pcache_cache_set_gc_percent(struct pcache_cache *cache, u8 percent);
+
+/* cache key */
+struct pcache_cache_key *cache_key_alloc(struct pcache_cache_tree *cache_tree, gfp_t gfp_mask);
+void cache_key_init(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key);
+void cache_key_get(struct pcache_cache_key *key);
+void cache_key_put(struct pcache_cache_key *key);
+int cache_key_append(struct pcache_cache *cache, struct pcache_cache_key *key, bool force_close);
+void cache_key_insert(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key, bool fixup);
+int cache_key_decode(struct pcache_cache *cache,
+ struct pcache_cache_key_onmedia *key_onmedia,
+ struct pcache_cache_key *key);
+void cache_pos_advance(struct pcache_cache_pos *pos, u32 len);
+
+#define PCACHE_KSET_FLAGS_LAST BIT(0)
+#define PCACHE_KSET_MAGIC 0x676894a64e164f1aULL
+
+struct pcache_cache_kset {
+ struct pcache_cache *cache;
+ spinlock_t kset_lock;
+ struct delayed_work flush_work;
+ struct pcache_cache_kset_onmedia kset_onmedia;
+};
+
+extern struct pcache_cache_kset_onmedia pcache_empty_kset;
+
+#define SUBTREE_WALK_RET_OK 0
+#define SUBTREE_WALK_RET_ERR 1
+#define SUBTREE_WALK_RET_NEED_KEY 2
+#define SUBTREE_WALK_RET_NEED_REQ 3
+#define SUBTREE_WALK_RET_RESEARCH 4
+
+struct pcache_cache_subtree_walk_ctx {
+ struct pcache_cache_tree *cache_tree;
+ struct rb_node *start_node;
+ struct pcache_request *pcache_req;
+ struct pcache_cache_key *key;
+ u32 req_done;
+ int ret;
+
+ /* pre-allocated key and backing_dev_req */
+ struct pcache_cache_key *pre_alloc_key;
+ struct pcache_backing_dev_req *pre_alloc_req;
+
+ struct list_head *delete_key_list;
+ struct list_head *submit_req_list;
+
+ /*
+ * |--------| key_tmp
+ * |====| key
+ */
+ int (*before)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx);
+
+ /*
+ * |----------| key_tmp
+ * |=====| key
+ */
+ int (*after)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx);
+
+ /*
+ * |----------------| key_tmp
+ * |===========| key
+ */
+ int (*overlap_tail)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx);
+
+ /*
+ * |--------| key_tmp
+ * |==========| key
+ */
+ int (*overlap_head)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx);
+
+ /*
+ * |----| key_tmp
+ * |==========| key
+ */
+ int (*overlap_contain)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx);
+
+ /*
+ * |-----------| key_tmp
+ * |====| key
+ */
+ int (*overlap_contained)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx);
+
+ int (*walk_finally)(struct pcache_cache_subtree_walk_ctx *ctx, int ret);
+ bool (*walk_done)(struct pcache_cache_subtree_walk_ctx *ctx);
+};
+
+int cache_subtree_walk(struct pcache_cache_subtree_walk_ctx *ctx);
+struct rb_node *cache_subtree_search(struct pcache_cache_subtree *cache_subtree, struct pcache_cache_key *key,
+ struct rb_node **parentp, struct rb_node ***newp,
+ struct list_head *delete_key_list);
+int cache_kset_close(struct pcache_cache *cache, struct pcache_cache_kset *kset);
+void clean_fn(struct work_struct *work);
+void kset_flush_fn(struct work_struct *work);
+int cache_replay(struct pcache_cache *cache);
+int cache_tree_init(struct pcache_cache *cache, struct pcache_cache_tree *cache_tree, u32 n_subtrees);
+void cache_tree_clear(struct pcache_cache_tree *cache_tree);
+void cache_tree_exit(struct pcache_cache_tree *cache_tree);
+
+/* cache segments */
+struct pcache_cache_segment *get_cache_segment(struct pcache_cache *cache);
+int cache_seg_init(struct pcache_cache *cache, u32 seg_id, u32 cache_seg_id,
+ bool new_cache);
+void cache_seg_get(struct pcache_cache_segment *cache_seg);
+void cache_seg_put(struct pcache_cache_segment *cache_seg);
+void cache_seg_set_next_seg(struct pcache_cache_segment *cache_seg, u32 seg_id);
+
+/* cache request*/
+int cache_flush(struct pcache_cache *cache);
+void miss_read_end_work_fn(struct work_struct *work);
+int pcache_cache_handle_req(struct pcache_cache *cache, struct pcache_request *pcache_req);
+
+/* gc */
+void pcache_cache_gc_fn(struct work_struct *work);
+
+/* writeback */
+void cache_writeback_exit(struct pcache_cache *cache);
+int cache_writeback_init(struct pcache_cache *cache);
+void cache_writeback_fn(struct work_struct *work);
+
+/* inline functions */
+static inline struct pcache_cache_subtree *get_subtree(struct pcache_cache_tree *cache_tree, u64 off)
+{
+ if (cache_tree->n_subtrees == 1)
+ return &cache_tree->subtrees[0];
+
+ return &cache_tree->subtrees[off >> PCACHE_CACHE_SUBTREE_SIZE_SHIFT];
+}
+
+static inline void *cache_pos_addr(struct pcache_cache_pos *pos)
+{
+ return (pos->cache_seg->segment.data + pos->seg_off);
+}
+
+static inline void *get_key_head_addr(struct pcache_cache *cache)
+{
+ return cache_pos_addr(&cache->key_head);
+}
+
+static inline u32 get_kset_id(struct pcache_cache *cache, u64 off)
+{
+ u32 kset_id;
+
+ div_u64_rem(off >> PCACHE_CACHE_SUBTREE_SIZE_SHIFT, cache->n_ksets, &kset_id);
+
+ return kset_id;
+}
+
+static inline struct pcache_cache_kset *get_kset(struct pcache_cache *cache, u32 kset_id)
+{
+ return (void *)cache->ksets + PCACHE_KSET_SIZE * kset_id;
+}
+
+static inline struct pcache_cache_data_head *get_data_head(struct pcache_cache *cache)
+{
+ return this_cpu_ptr(cache->data_heads);
+}
+
+static inline bool cache_key_empty(struct pcache_cache_key *key)
+{
+ return key->flags & PCACHE_CACHE_KEY_FLAGS_EMPTY;
+}
+
+static inline bool cache_key_clean(struct pcache_cache_key *key)
+{
+ return key->flags & PCACHE_CACHE_KEY_FLAGS_CLEAN;
+}
+
+static inline void cache_pos_copy(struct pcache_cache_pos *dst, struct pcache_cache_pos *src)
+{
+ memcpy(dst, src, sizeof(struct pcache_cache_pos));
+}
+
+/**
+ * cache_seg_is_ctrl_seg - Checks if a cache segment is a cache ctrl segment.
+ * @cache_seg_id: ID of the cache segment.
+ *
+ * Returns true if the cache segment ID corresponds to a cache ctrl segment.
+ *
+ * Note: We extend the segment control of the first cache segment
+ * (cache segment ID 0) to serve as the cache control (pcache_cache_ctrl)
+ * for the entire PCACHE cache. This function determines whether the given
+ * cache segment is the one storing the pcache_cache_ctrl information.
+ */
+static inline bool cache_seg_is_ctrl_seg(u32 cache_seg_id)
+{
+ return (cache_seg_id == 0);
+}
+
+/**
+ * cache_key_cutfront - Cuts a specified length from the front of a cache key.
+ * @key: Pointer to pcache_cache_key structure.
+ * @cut_len: Length to cut from the front.
+ *
+ * Advances the cache key position by cut_len and adjusts offset and length accordingly.
+ */
+static inline void cache_key_cutfront(struct pcache_cache_key *key, u32 cut_len)
+{
+ if (key->cache_pos.cache_seg)
+ cache_pos_advance(&key->cache_pos, cut_len);
+
+ key->off += cut_len;
+ key->len -= cut_len;
+}
+
+/**
+ * cache_key_cutback - Cuts a specified length from the back of a cache key.
+ * @key: Pointer to pcache_cache_key structure.
+ * @cut_len: Length to cut from the back.
+ *
+ * Reduces the length of the cache key by cut_len.
+ */
+static inline void cache_key_cutback(struct pcache_cache_key *key, u32 cut_len)
+{
+ key->len -= cut_len;
+}
+
+static inline void cache_key_delete(struct pcache_cache_key *key)
+{
+ struct pcache_cache_subtree *cache_subtree;
+
+ cache_subtree = key->cache_subtree;
+ BUG_ON(!cache_subtree);
+
+ rb_erase(&key->rb_node, &cache_subtree->root);
+ key->flags = 0;
+ cache_key_put(key);
+}
+
+static inline bool cache_data_crc_on(struct pcache_cache *cache)
+{
+ return (cache->cache_info.flags & PCACHE_CACHE_FLAGS_DATA_CRC);
+}
+
+static inline u32 cache_mode_get(struct pcache_cache *cache)
+{
+ return FIELD_GET(PCACHE_CACHE_FLAGS_CACHE_MODE_MASK, cache->cache_info.flags);
+}
+
+static inline void cache_mode_set(struct pcache_cache *cache, u32 cache_mode)
+{
+ cache->cache_info.flags &= ~PCACHE_CACHE_FLAGS_CACHE_MODE_MASK;
+ cache->cache_info.flags |= FIELD_PREP(PCACHE_CACHE_FLAGS_CACHE_MODE_MASK, cache_mode);
+}
+
+/**
+ * cache_key_data_crc - Calculates CRC for data in a cache key.
+ * @key: Pointer to the pcache_cache_key structure.
+ *
+ * Returns the CRC-32 checksum of the data within the cache key's position.
+ */
+static inline u32 cache_key_data_crc(struct pcache_cache_key *key)
+{
+ void *data;
+
+ data = cache_pos_addr(&key->cache_pos);
+
+ return crc32c(PCACHE_CRC_SEED, data, key->len);
+}
+
+static inline u32 cache_kset_crc(struct pcache_cache_kset_onmedia *kset_onmedia)
+{
+ u32 crc_size;
+
+ if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST)
+ crc_size = sizeof(struct pcache_cache_kset_onmedia) - 4;
+ else
+ crc_size = struct_size(kset_onmedia, data, kset_onmedia->key_num) - 4;
+
+ return crc32c(PCACHE_CRC_SEED, (void *)kset_onmedia + 4, crc_size);
+}
+
+static inline u32 get_kset_onmedia_size(struct pcache_cache_kset_onmedia *kset_onmedia)
+{
+ return struct_size_t(struct pcache_cache_kset_onmedia, data, kset_onmedia->key_num);
+}
+
+/**
+ * cache_seg_remain - Computes remaining space in a cache segment.
+ * @pos: Pointer to pcache_cache_pos structure.
+ *
+ * Returns the amount of remaining space in the segment data starting from
+ * the current position offset.
+ */
+static inline u32 cache_seg_remain(struct pcache_cache_pos *pos)
+{
+ struct pcache_cache_segment *cache_seg;
+ struct pcache_segment *segment;
+ u32 seg_remain;
+
+ cache_seg = pos->cache_seg;
+ segment = &cache_seg->segment;
+ seg_remain = segment->data_size - pos->seg_off;
+
+ return seg_remain;
+}
+
+/**
+ * cache_key_invalid - Checks if a cache key is invalid.
+ * @key: Pointer to pcache_cache_key structure.
+ *
+ * Returns true if the cache key is invalid due to its generation being
+ * less than the generation of its segment; otherwise returns false.
+ *
+ * When the GC (garbage collection) thread identifies a segment
+ * as reclaimable, it increments the segment's generation (gen). However,
+ * it does not immediately remove all related cache keys. When accessing
+ * such a cache key, this function can be used to determine if the cache
+ * key has already become invalid.
+ */
+static inline bool cache_key_invalid(struct pcache_cache_key *key)
+{
+ if (cache_key_empty(key))
+ return false;
+
+ return (key->seg_gen < key->cache_pos.cache_seg->gen);
+}
+
+/**
+ * cache_key_lstart - Retrieves the logical start offset of a cache key.
+ * @key: Pointer to pcache_cache_key structure.
+ *
+ * Returns the logical start offset for the cache key.
+ */
+static inline u64 cache_key_lstart(struct pcache_cache_key *key)
+{
+ return key->off;
+}
+
+/**
+ * cache_key_lend - Retrieves the logical end offset of a cache key.
+ * @key: Pointer to pcache_cache_key structure.
+ *
+ * Returns the logical end offset for the cache key.
+ */
+static inline u64 cache_key_lend(struct pcache_cache_key *key)
+{
+ return key->off + key->len;
+}
+
+static inline void cache_key_copy(struct pcache_cache_key *key_dst, struct pcache_cache_key *key_src)
+{
+ key_dst->off = key_src->off;
+ key_dst->len = key_src->len;
+ key_dst->seg_gen = key_src->seg_gen;
+ key_dst->cache_tree = key_src->cache_tree;
+ key_dst->cache_subtree = key_src->cache_subtree;
+ key_dst->flags = key_src->flags;
+
+ cache_pos_copy(&key_dst->cache_pos, &key_src->cache_pos);
+}
+
+/**
+ * cache_pos_onmedia_crc - Calculates the CRC for an on-media cache position.
+ * @pos_om: Pointer to pcache_cache_pos_onmedia structure.
+ *
+ * Calculates the CRC-32 checksum of the position, excluding the first 4 bytes.
+ * Returns the computed CRC value.
+ */
+static inline u32 cache_pos_onmedia_crc(struct pcache_cache_pos_onmedia *pos_om)
+{
+ return pcache_meta_crc(&pos_om->header, sizeof(struct pcache_cache_pos_onmedia));
+}
+
+void cache_pos_encode(struct pcache_cache *cache,
+ struct pcache_cache_pos_onmedia *pos_onmedia,
+ struct pcache_cache_pos *pos, u64 seq, u32 *index);
+int cache_pos_decode(struct pcache_cache *cache,
+ struct pcache_cache_pos_onmedia *pos_onmedia,
+ struct pcache_cache_pos *pos, u64 *seq, u32 *index);
+
+static inline void cache_encode_key_tail(struct pcache_cache *cache)
+{
+ cache_pos_encode(cache, cache->cache_ctrl->key_tail_pos,
+ &cache->key_tail, ++cache->key_tail_seq,
+ &cache->key_tail_index);
+}
+
+static inline int cache_decode_key_tail(struct pcache_cache *cache)
+{
+ return cache_pos_decode(cache, cache->cache_ctrl->key_tail_pos,
+ &cache->key_tail, &cache->key_tail_seq,
+ &cache->key_tail_index);
+}
+
+static inline void cache_encode_dirty_tail(struct pcache_cache *cache)
+{
+ cache_pos_encode(cache, cache->cache_ctrl->dirty_tail_pos,
+ &cache->dirty_tail, ++cache->dirty_tail_seq,
+ &cache->dirty_tail_index);
+}
+
+static inline int cache_decode_dirty_tail(struct pcache_cache *cache)
+{
+ return cache_pos_decode(cache, cache->cache_ctrl->dirty_tail_pos,
+ &cache->dirty_tail, &cache->dirty_tail_seq,
+ &cache->dirty_tail_index);
+}
+
+int pcache_cache_init(void);
+void pcache_cache_exit(void);
+#endif /* _PCACHE_CACHE_H */
diff --git a/drivers/md/dm-pcache/cache_dev.c b/drivers/md/dm-pcache/cache_dev.c
new file mode 100644
index 000000000000..ece689e6ce59
--- /dev/null
+++ b/drivers/md/dm-pcache/cache_dev.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/blkdev.h>
+#include <linux/dax.h>
+#include <linux/vmalloc.h>
+#include <linux/parser.h>
+
+#include "cache_dev.h"
+#include "backing_dev.h"
+#include "cache.h"
+#include "dm_pcache.h"
+
+static void cache_dev_dax_exit(struct pcache_cache_dev *cache_dev)
+{
+ if (cache_dev->use_vmap)
+ vunmap(cache_dev->mapping);
+}
+
+static int build_vmap(struct dax_device *dax_dev, long total_pages, void **vaddr)
+{
+ struct page **pages;
+ long i = 0, chunk;
+ unsigned long pfn;
+ int ret;
+
+ pages = vmalloc_array(total_pages, sizeof(struct page *));
+ if (!pages)
+ return -ENOMEM;
+
+ do {
+ chunk = dax_direct_access(dax_dev, i, total_pages - i,
+ DAX_ACCESS, NULL, &pfn);
+ if (chunk <= 0) {
+ ret = chunk ? chunk : -EINVAL;
+ goto out_free;
+ }
+
+ if (!pfn_valid(pfn)) {
+ ret = -EOPNOTSUPP;
+ goto out_free;
+ }
+
+ while (chunk-- && i < total_pages) {
+ pages[i++] = pfn_to_page(pfn);
+ pfn++;
+ if (!(i & 15))
+ cond_resched();
+ }
+ } while (i < total_pages);
+
+ *vaddr = vmap(pages, total_pages, VM_MAP, PAGE_KERNEL);
+ if (!*vaddr) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ ret = 0;
+
+out_free:
+ vfree(pages);
+ return ret;
+}
+
+static int cache_dev_dax_init(struct pcache_cache_dev *cache_dev)
+{
+ struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
+ struct dax_device *dax_dev;
+ long total_pages, mapped_pages;
+ u64 bdev_size;
+ void *vaddr;
+ int ret;
+ int id;
+ unsigned long pfn;
+
+ dax_dev = cache_dev->dm_dev->dax_dev;
+ /* total size check */
+ bdev_size = bdev_nr_bytes(cache_dev->dm_dev->bdev);
+ if (bdev_size < PCACHE_CACHE_DEV_SIZE_MIN) {
+ pcache_dev_err(pcache, "dax device is too small, required at least %llu",
+ PCACHE_CACHE_DEV_SIZE_MIN);
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ total_pages = bdev_size >> PAGE_SHIFT;
+ /* attempt: direct-map the whole range */
+ id = dax_read_lock();
+ mapped_pages = dax_direct_access(dax_dev, 0, total_pages,
+ DAX_ACCESS, &vaddr, &pfn);
+ if (mapped_pages < 0) {
+ pcache_dev_err(pcache, "dax_direct_access failed: %ld\n", mapped_pages);
+ ret = mapped_pages;
+ goto unlock;
+ }
+
+ if (!pfn_valid(pfn)) {
+ ret = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ if (mapped_pages == total_pages) {
+ /* success: contiguous direct mapping */
+ cache_dev->mapping = vaddr;
+ } else {
+ /* need vmap fallback */
+ ret = build_vmap(dax_dev, total_pages, &vaddr);
+ if (ret) {
+ pcache_dev_err(pcache, "vmap fallback failed: %d\n", ret);
+ goto unlock;
+ }
+
+ cache_dev->mapping = vaddr;
+ cache_dev->use_vmap = true;
+ }
+ dax_read_unlock(id);
+
+ return 0;
+unlock:
+ dax_read_unlock(id);
+out:
+ return ret;
+}
+
+void cache_dev_zero_range(struct pcache_cache_dev *cache_dev, void *pos, u32 size)
+{
+ memset(pos, 0, size);
+ dax_flush(cache_dev->dm_dev->dax_dev, pos, size);
+}
+
+static int sb_read(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
+{
+ struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev);
+
+ if (copy_mc_to_kernel(sb, sb_addr, sizeof(struct pcache_sb)))
+ return -EIO;
+
+ return 0;
+}
+
+static void sb_write(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
+{
+ struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev);
+
+ memcpy_flushcache(sb_addr, sb, sizeof(struct pcache_sb));
+ pmem_wmb();
+}
+
+static int sb_init(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
+{
+ struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
+ u64 nr_segs;
+ u64 cache_dev_size;
+ u64 magic;
+ u32 flags = 0;
+
+ magic = le64_to_cpu(sb->magic);
+ if (magic)
+ return -EEXIST;
+
+ cache_dev_size = bdev_nr_bytes(file_bdev(cache_dev->dm_dev->bdev_file));
+ if (cache_dev_size < PCACHE_CACHE_DEV_SIZE_MIN) {
+ pcache_dev_err(pcache, "dax device is too small, required at least %llu",
+ PCACHE_CACHE_DEV_SIZE_MIN);
+ return -ENOSPC;
+ }
+
+ nr_segs = (cache_dev_size - PCACHE_SEGMENTS_OFF) / ((PCACHE_SEG_SIZE));
+
+#if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN)
+ flags |= PCACHE_SB_F_BIGENDIAN;
+#endif
+ sb->flags = cpu_to_le32(flags);
+ sb->magic = cpu_to_le64(PCACHE_MAGIC);
+ sb->seg_num = cpu_to_le32(nr_segs);
+ sb->crc = cpu_to_le32(crc32c(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4));
+
+ cache_dev_zero_range(cache_dev, CACHE_DEV_CACHE_INFO(cache_dev),
+ PCACHE_CACHE_INFO_SIZE * PCACHE_META_INDEX_MAX +
+ PCACHE_CACHE_CTRL_SIZE);
+
+ return 0;
+}
+
+static int sb_validate(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
+{
+ struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
+ u32 flags;
+ u32 crc;
+
+ if (le64_to_cpu(sb->magic) != PCACHE_MAGIC) {
+ pcache_dev_err(pcache, "unexpected magic: %llx\n",
+ le64_to_cpu(sb->magic));
+ return -EINVAL;
+ }
+
+ crc = crc32c(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4);
+ if (crc != le32_to_cpu(sb->crc)) {
+ pcache_dev_err(pcache, "corrupted sb: %u, expected: %u\n", crc, le32_to_cpu(sb->crc));
+ return -EINVAL;
+ }
+
+ flags = le32_to_cpu(sb->flags);
+#if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN)
+ if (!(flags & PCACHE_SB_F_BIGENDIAN)) {
+ pcache_dev_err(pcache, "cache_dev is not big endian\n");
+ return -EINVAL;
+ }
+#else
+ if (flags & PCACHE_SB_F_BIGENDIAN) {
+ pcache_dev_err(pcache, "cache_dev is big endian\n");
+ return -EINVAL;
+ }
+#endif
+ return 0;
+}
+
+static int cache_dev_init(struct pcache_cache_dev *cache_dev, u32 seg_num)
+{
+ cache_dev->seg_num = seg_num;
+ cache_dev->seg_bitmap = kvcalloc(BITS_TO_LONGS(cache_dev->seg_num), sizeof(unsigned long), GFP_KERNEL);
+ if (!cache_dev->seg_bitmap)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void cache_dev_exit(struct pcache_cache_dev *cache_dev)
+{
+ kvfree(cache_dev->seg_bitmap);
+}
+
+void cache_dev_stop(struct dm_pcache *pcache)
+{
+ struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
+
+ cache_dev_exit(cache_dev);
+ cache_dev_dax_exit(cache_dev);
+}
+
+int cache_dev_start(struct dm_pcache *pcache)
+{
+ struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
+ struct pcache_sb sb;
+ bool format = false;
+ int ret;
+
+ mutex_init(&cache_dev->seg_lock);
+
+ ret = cache_dev_dax_init(cache_dev);
+ if (ret) {
+ pcache_dev_err(pcache, "failed to init cache_dev %s via dax way: %d.",
+ cache_dev->dm_dev->name, ret);
+ goto err;
+ }
+
+ ret = sb_read(cache_dev, &sb);
+ if (ret)
+ goto dax_release;
+
+ if (le64_to_cpu(sb.magic) == 0) {
+ format = true;
+ ret = sb_init(cache_dev, &sb);
+ if (ret < 0)
+ goto dax_release;
+ }
+
+ ret = sb_validate(cache_dev, &sb);
+ if (ret)
+ goto dax_release;
+
+ cache_dev->sb_flags = le32_to_cpu(sb.flags);
+ ret = cache_dev_init(cache_dev, le32_to_cpu(sb.seg_num));
+ if (ret)
+ goto dax_release;
+
+ if (format)
+ sb_write(cache_dev, &sb);
+
+ return 0;
+
+dax_release:
+ cache_dev_dax_exit(cache_dev);
+err:
+ return ret;
+}
+
+int cache_dev_get_empty_segment_id(struct pcache_cache_dev *cache_dev, u32 *seg_id)
+{
+ int ret;
+
+ mutex_lock(&cache_dev->seg_lock);
+ *seg_id = find_next_zero_bit(cache_dev->seg_bitmap, cache_dev->seg_num, 0);
+ if (*seg_id == cache_dev->seg_num) {
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
+ __set_bit(*seg_id, cache_dev->seg_bitmap);
+ ret = 0;
+unlock:
+ mutex_unlock(&cache_dev->seg_lock);
+ return ret;
+}
diff --git a/drivers/md/dm-pcache/cache_dev.h b/drivers/md/dm-pcache/cache_dev.h
new file mode 100644
index 000000000000..6251eb4ebe96
--- /dev/null
+++ b/drivers/md/dm-pcache/cache_dev.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PCACHE_CACHE_DEV_H
+#define _PCACHE_CACHE_DEV_H
+
+#include <linux/device.h>
+#include <linux/device-mapper.h>
+
+#include "pcache_internal.h"
+
+#define PCACHE_MAGIC 0x65B05EFA96C596EFULL
+
+#define PCACHE_SB_OFF (4 * PCACHE_KB)
+#define PCACHE_SB_SIZE (4 * PCACHE_KB)
+
+#define PCACHE_CACHE_INFO_OFF (PCACHE_SB_OFF + PCACHE_SB_SIZE)
+#define PCACHE_CACHE_INFO_SIZE (4 * PCACHE_KB)
+
+#define PCACHE_CACHE_CTRL_OFF (PCACHE_CACHE_INFO_OFF + (PCACHE_CACHE_INFO_SIZE * PCACHE_META_INDEX_MAX))
+#define PCACHE_CACHE_CTRL_SIZE (4 * PCACHE_KB)
+
+#define PCACHE_SEGMENTS_OFF (PCACHE_CACHE_CTRL_OFF + PCACHE_CACHE_CTRL_SIZE)
+#define PCACHE_SEG_INFO_SIZE (4 * PCACHE_KB)
+
+#define PCACHE_CACHE_DEV_SIZE_MIN (512 * PCACHE_MB) /* 512 MB */
+#define PCACHE_SEG_SIZE (16 * PCACHE_MB) /* Size of each PCACHE segment (16 MB) */
+
+#define CACHE_DEV_SB(cache_dev) ((struct pcache_sb *)(cache_dev->mapping + PCACHE_SB_OFF))
+#define CACHE_DEV_CACHE_INFO(cache_dev) ((void *)cache_dev->mapping + PCACHE_CACHE_INFO_OFF)
+#define CACHE_DEV_CACHE_CTRL(cache_dev) ((void *)cache_dev->mapping + PCACHE_CACHE_CTRL_OFF)
+#define CACHE_DEV_SEGMENTS(cache_dev) ((void *)cache_dev->mapping + PCACHE_SEGMENTS_OFF)
+#define CACHE_DEV_SEGMENT(cache_dev, id) ((void *)CACHE_DEV_SEGMENTS(cache_dev) + (u64)id * PCACHE_SEG_SIZE)
+
+/*
+ * PCACHE SB flags configured during formatting
+ *
+ * The PCACHE_SB_F_xxx flags define registration requirements based on cache_dev
+ * formatting. For a machine to register a cache_dev:
+ * - PCACHE_SB_F_BIGENDIAN: Requires a big-endian machine.
+ */
+#define PCACHE_SB_F_BIGENDIAN BIT(0)
+
+struct pcache_sb {
+ __le32 crc;
+ __le32 flags;
+ __le64 magic;
+
+ __le32 seg_num;
+};
+
+struct pcache_cache_dev {
+ u32 sb_flags;
+ u32 seg_num;
+ void *mapping;
+ bool use_vmap;
+
+ struct dm_dev *dm_dev;
+
+ struct mutex seg_lock;
+ unsigned long *seg_bitmap;
+};
+
+struct dm_pcache;
+int cache_dev_start(struct dm_pcache *pcache);
+void cache_dev_stop(struct dm_pcache *pcache);
+
+void cache_dev_zero_range(struct pcache_cache_dev *cache_dev, void *pos, u32 size);
+
+int cache_dev_get_empty_segment_id(struct pcache_cache_dev *cache_dev, u32 *seg_id);
+
+#endif /* _PCACHE_CACHE_DEV_H */
diff --git a/drivers/md/dm-pcache/cache_gc.c b/drivers/md/dm-pcache/cache_gc.c
new file mode 100644
index 000000000000..94f8b276a021
--- /dev/null
+++ b/drivers/md/dm-pcache/cache_gc.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "cache.h"
+#include "backing_dev.h"
+#include "cache_dev.h"
+#include "dm_pcache.h"
+
+/**
+ * cache_key_gc - Releases the reference of a cache key segment.
+ * @cache: Pointer to the pcache_cache structure.
+ * @key: Pointer to the cache key to be garbage collected.
+ *
+ * This function decrements the reference count of the cache segment
+ * associated with the given key. If the reference count drops to zero,
+ * the segment may be invalidated and reused.
+ */
+static void cache_key_gc(struct pcache_cache *cache, struct pcache_cache_key *key)
+{
+ cache_seg_put(key->cache_pos.cache_seg);
+}
+
+static bool need_gc(struct pcache_cache *cache, struct pcache_cache_pos *dirty_tail, struct pcache_cache_pos *key_tail)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_kset_onmedia *kset_onmedia;
+ void *dirty_addr, *key_addr;
+ u32 segs_used, segs_gc_threshold, to_copy;
+ int ret;
+
+ dirty_addr = cache_pos_addr(dirty_tail);
+ key_addr = cache_pos_addr(key_tail);
+ if (dirty_addr == key_addr) {
+ pcache_dev_debug(pcache, "key tail is equal to dirty tail: %u:%u\n",
+ dirty_tail->cache_seg->cache_seg_id,
+ dirty_tail->seg_off);
+ return false;
+ }
+
+ kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->gc_kset_onmedia_buf;
+
+ to_copy = min(PCACHE_KSET_ONMEDIA_SIZE_MAX, PCACHE_SEG_SIZE - key_tail->seg_off);
+ ret = copy_mc_to_kernel(kset_onmedia, key_addr, to_copy);
+ if (ret) {
+ pcache_dev_err(pcache, "error to read kset: %d", ret);
+ return false;
+ }
+
+ /* Check if kset_onmedia is corrupted */
+ if (kset_onmedia->magic != PCACHE_KSET_MAGIC) {
+ pcache_dev_debug(pcache, "gc error: magic is not as expected. key_tail: %u:%u magic: %llx, expected: %llx\n",
+ key_tail->cache_seg->cache_seg_id, key_tail->seg_off,
+ kset_onmedia->magic, PCACHE_KSET_MAGIC);
+ return false;
+ }
+
+ /* Verify the CRC of the kset_onmedia */
+ if (kset_onmedia->crc != cache_kset_crc(kset_onmedia)) {
+ pcache_dev_debug(pcache, "gc error: crc is not as expected. crc: %x, expected: %x\n",
+ cache_kset_crc(kset_onmedia), kset_onmedia->crc);
+ return false;
+ }
+
+ segs_used = bitmap_weight(cache->seg_map, cache->n_segs);
+ segs_gc_threshold = cache->n_segs * pcache_cache_get_gc_percent(cache) / 100;
+ if (segs_used < segs_gc_threshold) {
+ pcache_dev_debug(pcache, "segs_used: %u, segs_gc_threshold: %u\n", segs_used, segs_gc_threshold);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * last_kset_gc - Advances the garbage collection for the last kset.
+ * @cache: Pointer to the pcache_cache structure.
+ * @kset_onmedia: Pointer to the kset_onmedia structure for the last kset.
+ */
+static void last_kset_gc(struct pcache_cache *cache, struct pcache_cache_kset_onmedia *kset_onmedia)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_segment *cur_seg, *next_seg;
+
+ cur_seg = cache->key_tail.cache_seg;
+
+ next_seg = &cache->segments[kset_onmedia->next_cache_seg_id];
+
+ mutex_lock(&cache->key_tail_lock);
+ cache->key_tail.cache_seg = next_seg;
+ cache->key_tail.seg_off = 0;
+ cache_encode_key_tail(cache);
+ mutex_unlock(&cache->key_tail_lock);
+
+ pcache_dev_debug(pcache, "gc advance kset seg: %u\n", cur_seg->cache_seg_id);
+
+ spin_lock(&cache->seg_map_lock);
+ __clear_bit(cur_seg->cache_seg_id, cache->seg_map);
+ spin_unlock(&cache->seg_map_lock);
+}
+
+void pcache_cache_gc_fn(struct work_struct *work)
+{
+ struct pcache_cache *cache = container_of(work, struct pcache_cache, gc_work.work);
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_pos dirty_tail, key_tail;
+ struct pcache_cache_kset_onmedia *kset_onmedia;
+ struct pcache_cache_key_onmedia *key_onmedia;
+ struct pcache_cache_key *key;
+ int ret;
+ int i;
+
+ kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->gc_kset_onmedia_buf;
+
+ while (true) {
+ if (pcache_is_stopping(pcache) || atomic_read(&cache->gc_errors))
+ return;
+
+ /* Get new tail positions */
+ mutex_lock(&cache->dirty_tail_lock);
+ cache_pos_copy(&dirty_tail, &cache->dirty_tail);
+ mutex_unlock(&cache->dirty_tail_lock);
+
+ mutex_lock(&cache->key_tail_lock);
+ cache_pos_copy(&key_tail, &cache->key_tail);
+ mutex_unlock(&cache->key_tail_lock);
+
+ if (!need_gc(cache, &dirty_tail, &key_tail))
+ break;
+
+ if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST) {
+ /* Don't move to the next segment if dirty_tail has not moved */
+ if (dirty_tail.cache_seg == key_tail.cache_seg)
+ break;
+
+ last_kset_gc(cache, kset_onmedia);
+ continue;
+ }
+
+ for (i = 0; i < kset_onmedia->key_num; i++) {
+ struct pcache_cache_key key_tmp = { 0 };
+
+ key_onmedia = &kset_onmedia->data[i];
+
+ key = &key_tmp;
+ cache_key_init(&cache->req_key_tree, key);
+
+ ret = cache_key_decode(cache, key_onmedia, key);
+ if (ret) {
+ /* return without re-arm gc work, and prevent future
+ * gc, because we can't retry the partial-gc-ed kset
+ */
+ atomic_inc(&cache->gc_errors);
+ pcache_dev_err(pcache, "failed to decode cache key in gc\n");
+ return;
+ }
+
+ cache_key_gc(cache, key);
+ }
+
+ pcache_dev_debug(pcache, "gc advance: %u:%u %u\n",
+ key_tail.cache_seg->cache_seg_id,
+ key_tail.seg_off,
+ get_kset_onmedia_size(kset_onmedia));
+
+ mutex_lock(&cache->key_tail_lock);
+ cache_pos_advance(&cache->key_tail, get_kset_onmedia_size(kset_onmedia));
+ cache_encode_key_tail(cache);
+ mutex_unlock(&cache->key_tail_lock);
+ }
+
+ queue_delayed_work(cache_get_wq(cache), &cache->gc_work, PCACHE_CACHE_GC_INTERVAL);
+}
diff --git a/drivers/md/dm-pcache/cache_key.c b/drivers/md/dm-pcache/cache_key.c
new file mode 100644
index 000000000000..2b77e121f89b
--- /dev/null
+++ b/drivers/md/dm-pcache/cache_key.c
@@ -0,0 +1,888 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "cache.h"
+#include "backing_dev.h"
+#include "cache_dev.h"
+#include "dm_pcache.h"
+
+struct pcache_cache_kset_onmedia pcache_empty_kset = { 0 };
+
+void cache_key_init(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key)
+{
+ kref_init(&key->ref);
+ key->cache_tree = cache_tree;
+ INIT_LIST_HEAD(&key->list_node);
+ RB_CLEAR_NODE(&key->rb_node);
+}
+
+struct pcache_cache_key *cache_key_alloc(struct pcache_cache_tree *cache_tree, gfp_t gfp_mask)
+{
+ struct pcache_cache_key *key;
+
+ key = mempool_alloc(&cache_tree->key_pool, gfp_mask);
+ if (!key)
+ return NULL;
+
+ memset(key, 0, sizeof(struct pcache_cache_key));
+ cache_key_init(cache_tree, key);
+
+ return key;
+}
+
+/**
+ * cache_key_get - Increment the reference count of a cache key.
+ * @key: Pointer to the pcache_cache_key structure.
+ *
+ * This function increments the reference count of the specified cache key,
+ * ensuring that it is not freed while still in use.
+ */
+void cache_key_get(struct pcache_cache_key *key)
+{
+ kref_get(&key->ref);
+}
+
+/**
+ * cache_key_destroy - Free a cache key structure when its reference count drops to zero.
+ * @ref: Pointer to the kref structure.
+ *
+ * This function is called when the reference count of the cache key reaches zero.
+ * It frees the allocated cache key back to the slab cache.
+ */
+static void cache_key_destroy(struct kref *ref)
+{
+ struct pcache_cache_key *key = container_of(ref, struct pcache_cache_key, ref);
+ struct pcache_cache_tree *cache_tree = key->cache_tree;
+
+ mempool_free(key, &cache_tree->key_pool);
+}
+
+void cache_key_put(struct pcache_cache_key *key)
+{
+ kref_put(&key->ref, cache_key_destroy);
+}
+
+void cache_pos_advance(struct pcache_cache_pos *pos, u32 len)
+{
+ /* Ensure enough space remains in the current segment */
+ BUG_ON(cache_seg_remain(pos) < len);
+
+ pos->seg_off += len;
+}
+
+static void cache_key_encode(struct pcache_cache *cache,
+ struct pcache_cache_key_onmedia *key_onmedia,
+ struct pcache_cache_key *key)
+{
+ key_onmedia->off = key->off;
+ key_onmedia->len = key->len;
+
+ key_onmedia->cache_seg_id = key->cache_pos.cache_seg->cache_seg_id;
+ key_onmedia->cache_seg_off = key->cache_pos.seg_off;
+
+ key_onmedia->seg_gen = key->seg_gen;
+ key_onmedia->flags = key->flags;
+
+ if (cache_data_crc_on(cache))
+ key_onmedia->data_crc = cache_key_data_crc(key);
+}
+
+int cache_key_decode(struct pcache_cache *cache,
+ struct pcache_cache_key_onmedia *key_onmedia,
+ struct pcache_cache_key *key)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+
+ key->off = key_onmedia->off;
+ key->len = key_onmedia->len;
+
+ key->cache_pos.cache_seg = &cache->segments[key_onmedia->cache_seg_id];
+ key->cache_pos.seg_off = key_onmedia->cache_seg_off;
+
+ key->seg_gen = key_onmedia->seg_gen;
+ key->flags = key_onmedia->flags;
+
+ if (cache_data_crc_on(cache) &&
+ key_onmedia->data_crc != cache_key_data_crc(key)) {
+ pcache_dev_err(pcache, "key: %llu:%u seg %u:%u data_crc error: %x, expected: %x\n",
+ key->off, key->len, key->cache_pos.cache_seg->cache_seg_id,
+ key->cache_pos.seg_off, cache_key_data_crc(key), key_onmedia->data_crc);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static void append_last_kset(struct pcache_cache *cache, u32 next_seg)
+{
+ struct pcache_cache_kset_onmedia kset_onmedia = { 0 };
+
+ kset_onmedia.flags |= PCACHE_KSET_FLAGS_LAST;
+ kset_onmedia.next_cache_seg_id = next_seg;
+ kset_onmedia.magic = PCACHE_KSET_MAGIC;
+ kset_onmedia.crc = cache_kset_crc(&kset_onmedia);
+
+ memcpy_flushcache(get_key_head_addr(cache), &kset_onmedia, sizeof(struct pcache_cache_kset_onmedia));
+ pmem_wmb();
+ cache_pos_advance(&cache->key_head, sizeof(struct pcache_cache_kset_onmedia));
+}
+
+int cache_kset_close(struct pcache_cache *cache, struct pcache_cache_kset *kset)
+{
+ struct pcache_cache_kset_onmedia *kset_onmedia;
+ u32 kset_onmedia_size;
+ int ret;
+
+ kset_onmedia = &kset->kset_onmedia;
+
+ if (!kset_onmedia->key_num)
+ return 0;
+
+ kset_onmedia_size = struct_size(kset_onmedia, data, kset_onmedia->key_num);
+
+ spin_lock(&cache->key_head_lock);
+again:
+ /* Reserve space for the last kset */
+ if (cache_seg_remain(&cache->key_head) < kset_onmedia_size + sizeof(struct pcache_cache_kset_onmedia)) {
+ struct pcache_cache_segment *next_seg;
+
+ next_seg = get_cache_segment(cache);
+ if (!next_seg) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* clear outdated kset in next seg */
+ memcpy_flushcache(next_seg->segment.data, &pcache_empty_kset,
+ sizeof(struct pcache_cache_kset_onmedia));
+ append_last_kset(cache, next_seg->cache_seg_id);
+ cache->key_head.cache_seg = next_seg;
+ cache->key_head.seg_off = 0;
+ goto again;
+ }
+
+ kset_onmedia->magic = PCACHE_KSET_MAGIC;
+ kset_onmedia->crc = cache_kset_crc(kset_onmedia);
+
+ /* clear outdated kset after current kset */
+ memcpy_flushcache(get_key_head_addr(cache) + kset_onmedia_size, &pcache_empty_kset,
+ sizeof(struct pcache_cache_kset_onmedia));
+ /* write current kset into segment */
+ memcpy_flushcache(get_key_head_addr(cache), kset_onmedia, kset_onmedia_size);
+ pmem_wmb();
+
+ /* reset kset_onmedia */
+ memset(kset_onmedia, 0, sizeof(struct pcache_cache_kset_onmedia));
+ cache_pos_advance(&cache->key_head, kset_onmedia_size);
+
+ ret = 0;
+out:
+ spin_unlock(&cache->key_head_lock);
+
+ return ret;
+}
+
+/**
+ * cache_key_append - Append a cache key to the related kset.
+ * @cache: Pointer to the pcache_cache structure.
+ * @key: Pointer to the cache key structure to append.
+ * @force_close: Need to close current kset if true.
+ *
+ * This function appends a cache key to the appropriate kset. If the kset
+ * is full, it closes the kset. If not, it queues a flush work to write
+ * the kset to media.
+ *
+ * Returns 0 on success, or a negative error code on failure.
+ */
+int cache_key_append(struct pcache_cache *cache, struct pcache_cache_key *key, bool force_close)
+{
+ struct pcache_cache_kset *kset;
+ struct pcache_cache_kset_onmedia *kset_onmedia;
+ struct pcache_cache_key_onmedia *key_onmedia;
+ u32 kset_id = get_kset_id(cache, key->off);
+ int ret = 0;
+
+ kset = get_kset(cache, kset_id);
+ kset_onmedia = &kset->kset_onmedia;
+
+ spin_lock(&kset->kset_lock);
+ key_onmedia = &kset_onmedia->data[kset_onmedia->key_num];
+ cache_key_encode(cache, key_onmedia, key);
+
+ /* Check if the current kset has reached the maximum number of keys */
+ if (++kset_onmedia->key_num == PCACHE_KSET_KEYS_MAX || force_close) {
+ /* If full, close the kset */
+ ret = cache_kset_close(cache, kset);
+ if (ret) {
+ kset_onmedia->key_num--;
+ goto out;
+ }
+ } else {
+ /* If not full, queue a delayed work to flush the kset */
+ queue_delayed_work(cache_get_wq(cache), &kset->flush_work, 1 * HZ);
+ }
+out:
+ spin_unlock(&kset->kset_lock);
+
+ return ret;
+}
+
+/**
+ * cache_subtree_walk - Traverse the cache tree.
+ * @ctx: Pointer to the context structure for traversal.
+ *
+ * This function traverses the cache tree starting from the specified node.
+ * It calls the appropriate callback functions based on the relationships
+ * between the keys in the cache tree.
+ *
+ * Returns 0 on success, or a negative error code on failure.
+ */
+int cache_subtree_walk(struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache_key *key_tmp, *key;
+ struct rb_node *node_tmp;
+ int ret = SUBTREE_WALK_RET_OK;
+
+ key = ctx->key;
+ node_tmp = ctx->start_node;
+
+ while (node_tmp) {
+ if (ctx->walk_done && ctx->walk_done(ctx))
+ break;
+
+ key_tmp = CACHE_KEY(node_tmp);
+ /*
+ * If key_tmp ends before the start of key, continue to the next node.
+ * |----------|
+ * |=====|
+ */
+ if (cache_key_lend(key_tmp) <= cache_key_lstart(key)) {
+ if (ctx->after) {
+ ret = ctx->after(key, key_tmp, ctx);
+ if (ret)
+ goto out;
+ }
+ goto next;
+ }
+
+ /*
+ * If key_tmp starts after the end of key, stop traversing.
+ * |--------|
+ * |====|
+ */
+ if (cache_key_lstart(key_tmp) >= cache_key_lend(key)) {
+ if (ctx->before) {
+ ret = ctx->before(key, key_tmp, ctx);
+ if (ret)
+ goto out;
+ }
+ break;
+ }
+
+ /* Handle overlapping keys */
+ if (cache_key_lstart(key_tmp) >= cache_key_lstart(key)) {
+ /*
+ * If key_tmp encompasses key.
+ * |----------------| key_tmp
+ * |===========| key
+ */
+ if (cache_key_lend(key_tmp) >= cache_key_lend(key)) {
+ if (ctx->overlap_tail) {
+ ret = ctx->overlap_tail(key, key_tmp, ctx);
+ if (ret)
+ goto out;
+ }
+ break;
+ }
+
+ /*
+ * If key_tmp is contained within key.
+ * |----| key_tmp
+ * |==========| key
+ */
+ if (ctx->overlap_contain) {
+ ret = ctx->overlap_contain(key, key_tmp, ctx);
+ if (ret)
+ goto out;
+ }
+
+ goto next;
+ }
+
+ /*
+ * If key_tmp starts before key ends but ends after key.
+ * |-----------| key_tmp
+ * |====| key
+ */
+ if (cache_key_lend(key_tmp) > cache_key_lend(key)) {
+ if (ctx->overlap_contained) {
+ ret = ctx->overlap_contained(key, key_tmp, ctx);
+ if (ret)
+ goto out;
+ }
+ break;
+ }
+
+ /*
+ * If key_tmp starts before key and ends within key.
+ * |--------| key_tmp
+ * |==========| key
+ */
+ if (ctx->overlap_head) {
+ ret = ctx->overlap_head(key, key_tmp, ctx);
+ if (ret)
+ goto out;
+ }
+next:
+ node_tmp = rb_next(node_tmp);
+ }
+
+out:
+ if (ctx->walk_finally)
+ ret = ctx->walk_finally(ctx, ret);
+
+ return ret;
+}
+
+/**
+ * cache_subtree_search - Search for a key in the cache tree.
+ * @cache_subtree: Pointer to the cache tree structure.
+ * @key: Pointer to the cache key to search for.
+ * @parentp: Pointer to store the parent node of the found node.
+ * @newp: Pointer to store the location where the new node should be inserted.
+ * @delete_key_list: List to collect invalid keys for deletion.
+ *
+ * This function searches the cache tree for a specific key and returns
+ * the node that is the predecessor of the key, or first node if the key is
+ * less than all keys in the tree. If any invalid keys are found during
+ * the search, they are added to the delete_key_list for later cleanup.
+ *
+ * Returns a pointer to the previous node.
+ */
+struct rb_node *cache_subtree_search(struct pcache_cache_subtree *cache_subtree, struct pcache_cache_key *key,
+ struct rb_node **parentp, struct rb_node ***newp,
+ struct list_head *delete_key_list)
+{
+ struct rb_node **new, *parent = NULL;
+ struct pcache_cache_key *key_tmp;
+ struct rb_node *prev_node = NULL;
+
+ new = &(cache_subtree->root.rb_node);
+ while (*new) {
+ key_tmp = container_of(*new, struct pcache_cache_key, rb_node);
+ if (cache_key_invalid(key_tmp))
+ list_add(&key_tmp->list_node, delete_key_list);
+
+ parent = *new;
+ if (key_tmp->off >= key->off) {
+ new = &((*new)->rb_left);
+ } else {
+ prev_node = *new;
+ new = &((*new)->rb_right);
+ }
+ }
+
+ if (!prev_node)
+ prev_node = rb_first(&cache_subtree->root);
+
+ if (parentp)
+ *parentp = parent;
+
+ if (newp)
+ *newp = new;
+
+ return prev_node;
+}
+
+static struct pcache_cache_key *get_pre_alloc_key(struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache_key *key;
+
+ if (ctx->pre_alloc_key) {
+ key = ctx->pre_alloc_key;
+ ctx->pre_alloc_key = NULL;
+
+ return key;
+ }
+
+ return cache_key_alloc(ctx->cache_tree, GFP_NOWAIT);
+}
+
+/**
+ * fixup_overlap_tail - Adjust the key when it overlaps at the tail.
+ * @key: Pointer to the new cache key being inserted.
+ * @key_tmp: Pointer to the existing key that overlaps.
+ * @ctx: Pointer to the context for walking the cache tree.
+ *
+ * This function modifies the existing key (key_tmp) when there is an
+ * overlap at the tail with the new key. If the modified key becomes
+ * empty, it is deleted.
+ */
+static int fixup_overlap_tail(struct pcache_cache_key *key,
+ struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ /*
+ * |----------------| key_tmp
+ * |===========| key
+ */
+ BUG_ON(cache_key_empty(key));
+ if (cache_key_empty(key_tmp)) {
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ cache_key_cutfront(key_tmp, cache_key_lend(key) - cache_key_lstart(key_tmp));
+ if (key_tmp->len == 0) {
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/**
+ * fixup_overlap_contain - Handle case where new key completely contains an existing key.
+ * @key: Pointer to the new cache key being inserted.
+ * @key_tmp: Pointer to the existing key that is being contained.
+ * @ctx: Pointer to the context for walking the cache tree.
+ *
+ * This function deletes the existing key (key_tmp) when the new key
+ * completely contains it. It returns SUBTREE_WALK_RET_RESEARCH to indicate that the
+ * tree structure may have changed, necessitating a re-insertion of
+ * the new key.
+ */
+static int fixup_overlap_contain(struct pcache_cache_key *key,
+ struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ /*
+ * |----| key_tmp
+ * |==========| key
+ */
+ BUG_ON(cache_key_empty(key));
+ cache_key_delete(key_tmp);
+
+ return SUBTREE_WALK_RET_RESEARCH;
+}
+
+/**
+ * fixup_overlap_contained - Handle overlap when a new key is contained in an existing key.
+ * @key: The new cache key being inserted.
+ * @key_tmp: The existing cache key that overlaps with the new key.
+ * @ctx: Context for the cache tree walk.
+ *
+ * This function adjusts the existing key if the new key is contained
+ * within it. If the existing key is empty, it indicates a placeholder key
+ * that was inserted during a miss read. This placeholder will later be
+ * updated with real data from the backing_dev, making it no longer an empty key.
+ *
+ * If we delete key or insert a key, the structure of the entire cache tree may change,
+ * requiring a full research of the tree to find a new insertion point.
+ */
+static int fixup_overlap_contained(struct pcache_cache_key *key,
+ struct pcache_cache_key *key_tmp, struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache_tree *cache_tree = ctx->cache_tree;
+
+ /*
+ * |-----------| key_tmp
+ * |====| key
+ */
+ BUG_ON(cache_key_empty(key));
+ if (cache_key_empty(key_tmp)) {
+ /* If key_tmp is empty, don't split it;
+ * it's a placeholder key for miss reads that will be updated later.
+ */
+ cache_key_cutback(key_tmp, cache_key_lend(key_tmp) - cache_key_lstart(key));
+ if (key_tmp->len == 0) {
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+ } else {
+ struct pcache_cache_key *key_fixup;
+ bool need_research = false;
+
+ key_fixup = get_pre_alloc_key(ctx);
+ if (!key_fixup)
+ return SUBTREE_WALK_RET_NEED_KEY;
+
+ cache_key_copy(key_fixup, key_tmp);
+
+ /* Split key_tmp based on the new key's range */
+ cache_key_cutback(key_tmp, cache_key_lend(key_tmp) - cache_key_lstart(key));
+ if (key_tmp->len == 0) {
+ cache_key_delete(key_tmp);
+ need_research = true;
+ }
+
+ /* Create a new portion for key_fixup */
+ cache_key_cutfront(key_fixup, cache_key_lend(key) - cache_key_lstart(key_tmp));
+ if (key_fixup->len == 0) {
+ cache_key_put(key_fixup);
+ } else {
+ /* Insert the new key into the cache */
+ cache_key_insert(cache_tree, key_fixup, false);
+ need_research = true;
+ }
+
+ if (need_research)
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/**
+ * fixup_overlap_head - Handle overlap when a new key overlaps with the head of an existing key.
+ * @key: The new cache key being inserted.
+ * @key_tmp: The existing cache key that overlaps with the new key.
+ * @ctx: Context for the cache tree walk.
+ *
+ * This function adjusts the existing key if the new key overlaps
+ * with the beginning of it. If the resulting key length is zero
+ * after the adjustment, the key is deleted. This indicates that
+ * the key no longer holds valid data and requires the tree to be
+ * re-researched for a new insertion point.
+ */
+static int fixup_overlap_head(struct pcache_cache_key *key,
+ struct pcache_cache_key *key_tmp, struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ /*
+ * |--------| key_tmp
+ * |==========| key
+ */
+ BUG_ON(cache_key_empty(key));
+ /* Adjust key_tmp by cutting back based on the new key's start */
+ cache_key_cutback(key_tmp, cache_key_lend(key_tmp) - cache_key_lstart(key));
+ if (key_tmp->len == 0) {
+ /* If the adjusted key_tmp length is zero, delete it */
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/**
+ * cache_key_insert - Insert a new cache key into the cache tree.
+ * @cache_tree: Pointer to the cache_tree structure.
+ * @key: The cache key to insert.
+ * @fixup: Indicates if this is a new key being inserted.
+ *
+ * This function searches for the appropriate location to insert
+ * a new cache key into the cache tree. It handles key overlaps
+ * and ensures any invalid keys are removed before insertion.
+ */
+void cache_key_insert(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key, bool fixup)
+{
+ struct pcache_cache *cache = cache_tree->cache;
+ struct pcache_cache_subtree_walk_ctx walk_ctx = { 0 };
+ struct rb_node **new, *parent = NULL;
+ struct pcache_cache_subtree *cache_subtree;
+ struct pcache_cache_key *key_tmp = NULL, *key_next;
+ struct rb_node *prev_node = NULL;
+ LIST_HEAD(delete_key_list);
+ int ret;
+
+ cache_subtree = get_subtree(cache_tree, key->off);
+ key->cache_subtree = cache_subtree;
+search:
+ prev_node = cache_subtree_search(cache_subtree, key, &parent, &new, &delete_key_list);
+ if (!list_empty(&delete_key_list)) {
+ /* Remove invalid keys from the delete list */
+ list_for_each_entry_safe(key_tmp, key_next, &delete_key_list, list_node) {
+ list_del_init(&key_tmp->list_node);
+ cache_key_delete(key_tmp);
+ }
+ goto search;
+ }
+
+ if (fixup) {
+ /* Set up the context with the cache, start node, and new key */
+ walk_ctx.cache_tree = cache_tree;
+ walk_ctx.start_node = prev_node;
+ walk_ctx.key = key;
+
+ /* Assign overlap handling functions for different scenarios */
+ walk_ctx.overlap_tail = fixup_overlap_tail;
+ walk_ctx.overlap_head = fixup_overlap_head;
+ walk_ctx.overlap_contain = fixup_overlap_contain;
+ walk_ctx.overlap_contained = fixup_overlap_contained;
+
+ ret = cache_subtree_walk(&walk_ctx);
+ switch (ret) {
+ case SUBTREE_WALK_RET_OK:
+ break;
+ case SUBTREE_WALK_RET_RESEARCH:
+ goto search;
+ case SUBTREE_WALK_RET_NEED_KEY:
+ spin_unlock(&cache_subtree->tree_lock);
+ pcache_dev_debug(CACHE_TO_PCACHE(cache), "allocate pre_alloc_key with GFP_NOIO");
+ walk_ctx.pre_alloc_key = cache_key_alloc(cache_tree, GFP_NOIO);
+ spin_lock(&cache_subtree->tree_lock);
+ goto search;
+ default:
+ BUG();
+ }
+ }
+
+ if (walk_ctx.pre_alloc_key)
+ cache_key_put(walk_ctx.pre_alloc_key);
+
+ /* Link and insert the new key into the red-black tree */
+ rb_link_node(&key->rb_node, parent, new);
+ rb_insert_color(&key->rb_node, &cache_subtree->root);
+}
+
+/**
+ * clean_fn - Cleanup function to remove invalid keys from the cache tree.
+ * @work: Pointer to the work_struct associated with the cleanup.
+ *
+ * This function cleans up invalid keys from the cache tree in the background
+ * after a cache segment has been invalidated during cache garbage collection.
+ * It processes a maximum of PCACHE_CLEAN_KEYS_MAX keys per iteration and holds
+ * the tree lock to ensure thread safety.
+ */
+void clean_fn(struct work_struct *work)
+{
+ struct pcache_cache *cache = container_of(work, struct pcache_cache, clean_work);
+ struct pcache_cache_subtree *cache_subtree;
+ struct rb_node *node;
+ struct pcache_cache_key *key;
+ int i, count;
+
+ for (i = 0; i < cache->req_key_tree.n_subtrees; i++) {
+ cache_subtree = &cache->req_key_tree.subtrees[i];
+
+again:
+ if (pcache_is_stopping(CACHE_TO_PCACHE(cache)))
+ return;
+
+ /* Delete up to PCACHE_CLEAN_KEYS_MAX keys in one iteration */
+ count = 0;
+ spin_lock(&cache_subtree->tree_lock);
+ node = rb_first(&cache_subtree->root);
+ while (node) {
+ key = CACHE_KEY(node);
+ node = rb_next(node);
+ if (cache_key_invalid(key)) {
+ count++;
+ cache_key_delete(key);
+ }
+
+ if (count >= PCACHE_CLEAN_KEYS_MAX) {
+ /* Unlock and pause before continuing cleanup */
+ spin_unlock(&cache_subtree->tree_lock);
+ usleep_range(1000, 2000);
+ goto again;
+ }
+ }
+ spin_unlock(&cache_subtree->tree_lock);
+ }
+}
+
+/*
+ * kset_flush_fn - Flush work for a cache kset.
+ *
+ * This function is called when a kset flush work is queued from
+ * cache_key_append(). If the kset is full, it will be closed
+ * immediately. If not, the flush work will be queued for later closure.
+ *
+ * If cache_kset_close detects that a new segment is required to store
+ * the kset and there are no available segments, it will return an error.
+ * In this scenario, a retry will be attempted.
+ */
+void kset_flush_fn(struct work_struct *work)
+{
+ struct pcache_cache_kset *kset = container_of(work, struct pcache_cache_kset, flush_work.work);
+ struct pcache_cache *cache = kset->cache;
+ int ret;
+
+ if (pcache_is_stopping(CACHE_TO_PCACHE(cache)))
+ return;
+
+ spin_lock(&kset->kset_lock);
+ ret = cache_kset_close(cache, kset);
+ spin_unlock(&kset->kset_lock);
+
+ if (ret) {
+ /* Failed to flush kset, schedule a retry. */
+ queue_delayed_work(cache_get_wq(cache), &kset->flush_work, msecs_to_jiffies(100));
+ }
+}
+
+static int kset_replay(struct pcache_cache *cache, struct pcache_cache_kset_onmedia *kset_onmedia)
+{
+ struct pcache_cache_key_onmedia *key_onmedia;
+ struct pcache_cache_subtree *cache_subtree;
+ struct pcache_cache_key *key;
+ int ret;
+ int i;
+
+ for (i = 0; i < kset_onmedia->key_num; i++) {
+ key_onmedia = &kset_onmedia->data[i];
+
+ key = cache_key_alloc(&cache->req_key_tree, GFP_NOIO);
+ ret = cache_key_decode(cache, key_onmedia, key);
+ if (ret) {
+ cache_key_put(key);
+ goto err;
+ }
+
+ __set_bit(key->cache_pos.cache_seg->cache_seg_id, cache->seg_map);
+
+ /* Check if the segment generation is valid for insertion. */
+ if (key->seg_gen < key->cache_pos.cache_seg->gen) {
+ cache_key_put(key);
+ } else {
+ cache_subtree = get_subtree(&cache->req_key_tree, key->off);
+ spin_lock(&cache_subtree->tree_lock);
+ cache_key_insert(&cache->req_key_tree, key, true);
+ spin_unlock(&cache_subtree->tree_lock);
+ }
+
+ cache_seg_get(key->cache_pos.cache_seg);
+ }
+
+ return 0;
+err:
+ return ret;
+}
+
+int cache_replay(struct pcache_cache *cache)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_pos pos_tail;
+ struct pcache_cache_pos *pos;
+ struct pcache_cache_kset_onmedia *kset_onmedia;
+ u32 to_copy, count = 0;
+ int ret = 0;
+
+ kset_onmedia = kzalloc(PCACHE_KSET_ONMEDIA_SIZE_MAX, GFP_KERNEL);
+ if (!kset_onmedia)
+ return -ENOMEM;
+
+ cache_pos_copy(&pos_tail, &cache->key_tail);
+ pos = &pos_tail;
+
+ /*
+ * In cache replaying stage, there is no other one will access
+ * cache->seg_map, so we can set bit here without cache->seg_map_lock.
+ */
+ __set_bit(pos->cache_seg->cache_seg_id, cache->seg_map);
+
+ while (true) {
+ to_copy = min(PCACHE_KSET_ONMEDIA_SIZE_MAX, PCACHE_SEG_SIZE - pos->seg_off);
+ ret = copy_mc_to_kernel(kset_onmedia, cache_pos_addr(pos), to_copy);
+ if (ret) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (kset_onmedia->magic != PCACHE_KSET_MAGIC ||
+ kset_onmedia->crc != cache_kset_crc(kset_onmedia)) {
+ break;
+ }
+
+ /* Process the last kset and prepare for the next segment. */
+ if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST) {
+ struct pcache_cache_segment *next_seg;
+
+ pcache_dev_debug(pcache, "last kset replay, next: %u\n", kset_onmedia->next_cache_seg_id);
+
+ next_seg = &cache->segments[kset_onmedia->next_cache_seg_id];
+
+ pos->cache_seg = next_seg;
+ pos->seg_off = 0;
+
+ __set_bit(pos->cache_seg->cache_seg_id, cache->seg_map);
+ continue;
+ }
+
+ /* Replay the kset and check for errors. */
+ ret = kset_replay(cache, kset_onmedia);
+ if (ret)
+ goto out;
+
+ /* Advance the position after processing the kset. */
+ cache_pos_advance(pos, get_kset_onmedia_size(kset_onmedia));
+ if (++count > 512) {
+ cond_resched();
+ count = 0;
+ }
+ }
+
+ /* Update the key_head position after replaying. */
+ spin_lock(&cache->key_head_lock);
+ cache_pos_copy(&cache->key_head, pos);
+ spin_unlock(&cache->key_head_lock);
+out:
+ kfree(kset_onmedia);
+ return ret;
+}
+
+int cache_tree_init(struct pcache_cache *cache, struct pcache_cache_tree *cache_tree, u32 n_subtrees)
+{
+ int ret;
+ u32 i;
+
+ cache_tree->cache = cache;
+ cache_tree->n_subtrees = n_subtrees;
+
+ ret = mempool_init_slab_pool(&cache_tree->key_pool, 1024, key_cache);
+ if (ret)
+ goto err;
+
+ /*
+ * Allocate and initialize the subtrees array.
+ * Each element is a cache tree structure that contains
+ * an RB tree root and a spinlock for protecting its contents.
+ */
+ cache_tree->subtrees = kvcalloc(cache_tree->n_subtrees, sizeof(struct pcache_cache_subtree), GFP_KERNEL);
+ if (!cache_tree->subtrees) {
+ ret = -ENOMEM;
+ goto key_pool_exit;
+ }
+
+ for (i = 0; i < cache_tree->n_subtrees; i++) {
+ struct pcache_cache_subtree *cache_subtree = &cache_tree->subtrees[i];
+
+ cache_subtree->root = RB_ROOT;
+ spin_lock_init(&cache_subtree->tree_lock);
+ }
+
+ return 0;
+
+key_pool_exit:
+ mempool_exit(&cache_tree->key_pool);
+err:
+ return ret;
+}
+
+void cache_tree_clear(struct pcache_cache_tree *cache_tree)
+{
+ struct pcache_cache_subtree *cache_subtree;
+ struct rb_node *node;
+ struct pcache_cache_key *key;
+ u32 i;
+
+ for (i = 0; i < cache_tree->n_subtrees; i++) {
+ cache_subtree = &cache_tree->subtrees[i];
+
+ spin_lock(&cache_subtree->tree_lock);
+ node = rb_first(&cache_subtree->root);
+ while (node) {
+ key = CACHE_KEY(node);
+ node = rb_next(node);
+
+ cache_key_delete(key);
+ }
+ spin_unlock(&cache_subtree->tree_lock);
+ }
+}
+
+void cache_tree_exit(struct pcache_cache_tree *cache_tree)
+{
+ cache_tree_clear(cache_tree);
+ kvfree(cache_tree->subtrees);
+ mempool_exit(&cache_tree->key_pool);
+}
diff --git a/drivers/md/dm-pcache/cache_req.c b/drivers/md/dm-pcache/cache_req.c
new file mode 100644
index 000000000000..27f94c1fa968
--- /dev/null
+++ b/drivers/md/dm-pcache/cache_req.c
@@ -0,0 +1,836 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "cache.h"
+#include "backing_dev.h"
+#include "cache_dev.h"
+#include "dm_pcache.h"
+
+static int cache_data_head_init(struct pcache_cache *cache)
+{
+ struct pcache_cache_segment *next_seg;
+ struct pcache_cache_data_head *data_head;
+
+ data_head = get_data_head(cache);
+ next_seg = get_cache_segment(cache);
+ if (!next_seg)
+ return -EBUSY;
+
+ cache_seg_get(next_seg);
+ data_head->head_pos.cache_seg = next_seg;
+ data_head->head_pos.seg_off = 0;
+
+ return 0;
+}
+
+/**
+ * cache_data_alloc - Allocate data for a cache key.
+ * @cache: Pointer to the cache structure.
+ * @key: Pointer to the cache key to allocate data for.
+ *
+ * This function tries to allocate space from the cache segment specified by the
+ * data head. If the remaining space in the segment is insufficient to allocate
+ * the requested length for the cache key, it will allocate whatever is available
+ * and adjust the key's length accordingly. This function does not allocate
+ * space that crosses segment boundaries.
+ */
+static int cache_data_alloc(struct pcache_cache *cache, struct pcache_cache_key *key)
+{
+ struct pcache_cache_data_head *data_head;
+ struct pcache_cache_pos *head_pos;
+ struct pcache_cache_segment *cache_seg;
+ u32 seg_remain;
+ u32 allocated = 0, to_alloc;
+ int ret = 0;
+
+ preempt_disable();
+ data_head = get_data_head(cache);
+again:
+ to_alloc = key->len - allocated;
+ if (!data_head->head_pos.cache_seg) {
+ seg_remain = 0;
+ } else {
+ cache_pos_copy(&key->cache_pos, &data_head->head_pos);
+ key->seg_gen = key->cache_pos.cache_seg->gen;
+
+ head_pos = &data_head->head_pos;
+ cache_seg = head_pos->cache_seg;
+ seg_remain = cache_seg_remain(head_pos);
+ }
+
+ if (seg_remain > to_alloc) {
+ /* If remaining space in segment is sufficient for the cache key, allocate it. */
+ cache_pos_advance(head_pos, to_alloc);
+ allocated += to_alloc;
+ cache_seg_get(cache_seg);
+ } else if (seg_remain) {
+ /* If remaining space is not enough, allocate the remaining space and adjust the cache key length. */
+ cache_pos_advance(head_pos, seg_remain);
+ key->len = seg_remain;
+
+ /* Get for key: obtain a reference to the cache segment for the key. */
+ cache_seg_get(cache_seg);
+ /* Put for head_pos->cache_seg: release the reference for the current head's segment. */
+ cache_seg_put(head_pos->cache_seg);
+ head_pos->cache_seg = NULL;
+ } else {
+ /* Initialize a new data head if no segment is available. */
+ ret = cache_data_head_init(cache);
+ if (ret)
+ goto out;
+
+ goto again;
+ }
+
+out:
+ preempt_enable();
+
+ return ret;
+}
+
+static int cache_copy_from_req_bio(struct pcache_cache *cache, struct pcache_cache_key *key,
+ struct pcache_request *pcache_req, u32 bio_off)
+{
+ struct pcache_cache_pos *pos = &key->cache_pos;
+ struct pcache_segment *segment;
+
+ segment = &pos->cache_seg->segment;
+
+ return segment_copy_from_bio(segment, pos->seg_off, key->len, pcache_req->bio, bio_off);
+}
+
+static int cache_copy_to_req_bio(struct pcache_cache *cache, struct pcache_request *pcache_req,
+ u32 bio_off, u32 len, struct pcache_cache_pos *pos, u64 key_gen)
+{
+ struct pcache_cache_segment *cache_seg = pos->cache_seg;
+ struct pcache_segment *segment = &cache_seg->segment;
+ int ret;
+
+ spin_lock(&cache_seg->gen_lock);
+ if (key_gen < cache_seg->gen) {
+ spin_unlock(&cache_seg->gen_lock);
+ return -EINVAL;
+ }
+
+ ret = segment_copy_to_bio(segment, pos->seg_off, len, pcache_req->bio, bio_off);
+ spin_unlock(&cache_seg->gen_lock);
+
+ return ret;
+}
+
+/**
+ * miss_read_end_req - Handle the end of a miss read request.
+ * @backing_req: Pointer to the request structure.
+ * @read_ret: Return value of read.
+ *
+ * This function is called when a backing request to read data from
+ * the backing_dev is completed. If the key associated with the request
+ * is empty (a placeholder), it allocates cache space for the key,
+ * copies the data read from the bio into the cache, and updates
+ * the key's status. If the key has been overwritten by a write
+ * request during this process, it will be deleted from the cache
+ * tree and no further action will be taken.
+ */
+static void miss_read_end_req(struct pcache_backing_dev_req *backing_req, int read_ret)
+{
+ void *priv_data = backing_req->priv_data;
+ struct pcache_request *pcache_req = backing_req->req.upper_req;
+ struct pcache_cache *cache = backing_req->backing_dev->cache;
+ int ret;
+
+ if (priv_data) {
+ struct pcache_cache_key *key;
+ struct pcache_cache_subtree *cache_subtree;
+
+ key = (struct pcache_cache_key *)priv_data;
+ cache_subtree = key->cache_subtree;
+
+ /* if this key was deleted from cache_subtree by a write, key->flags should be cleared,
+ * so if cache_key_empty() return true, this key is still in cache_subtree
+ */
+ spin_lock(&cache_subtree->tree_lock);
+ if (cache_key_empty(key)) {
+ /* Check if the backing request was successful. */
+ if (read_ret) {
+ cache_key_delete(key);
+ goto unlock;
+ }
+
+ /* Allocate cache space for the key and copy data from the backing_dev. */
+ ret = cache_data_alloc(cache, key);
+ if (ret) {
+ cache_key_delete(key);
+ goto unlock;
+ }
+
+ ret = cache_copy_from_req_bio(cache, key, pcache_req, backing_req->req.bio_off);
+ if (ret) {
+ cache_seg_put(key->cache_pos.cache_seg);
+ cache_key_delete(key);
+ goto unlock;
+ }
+ key->flags &= ~PCACHE_CACHE_KEY_FLAGS_EMPTY;
+ key->flags |= PCACHE_CACHE_KEY_FLAGS_CLEAN;
+
+ /* Append the key to the cache. */
+ ret = cache_key_append(cache, key, false);
+ if (ret) {
+ cache_seg_put(key->cache_pos.cache_seg);
+ cache_key_delete(key);
+ goto unlock;
+ }
+ }
+unlock:
+ spin_unlock(&cache_subtree->tree_lock);
+ cache_key_put(key);
+ }
+}
+
+/**
+ * submit_cache_miss_req - Submit a backing request when cache data is missing
+ * @cache: The cache context that manages cache operations
+ * @backing_req: The cache request containing information about the read request
+ *
+ * This function is used to handle cases where a cache read request cannot locate
+ * the required data in the cache. When such a miss occurs during `cache_subtree_walk`,
+ * it triggers a backing read request to fetch data from the backing storage.
+ *
+ * If `pcache_req->priv_data` is set, it points to a `pcache_cache_key`, representing
+ * a new cache key to be inserted into the cache. The function calls `cache_key_insert`
+ * to attempt adding the key. On insertion failure, it releases the key reference and
+ * clears `priv_data` to avoid further processing.
+ */
+static void submit_cache_miss_req(struct pcache_cache *cache, struct pcache_backing_dev_req *backing_req)
+{
+ if (backing_req->priv_data) {
+ struct pcache_cache_key *key;
+
+ /* Attempt to insert the key into the cache if priv_data is set */
+ key = (struct pcache_cache_key *)backing_req->priv_data;
+ cache_key_insert(&cache->req_key_tree, key, true);
+ }
+ backing_dev_req_submit(backing_req, false);
+}
+
+static void cache_miss_req_free(struct pcache_backing_dev_req *backing_req)
+{
+ struct pcache_cache_key *key;
+
+ if (backing_req->priv_data) {
+ key = backing_req->priv_data;
+ backing_req->priv_data = NULL;
+ cache_key_put(key); /* for ->priv_data */
+ cache_key_put(key); /* for init ref in alloc */
+ }
+
+ backing_dev_req_end(backing_req);
+}
+
+static struct pcache_backing_dev_req *cache_miss_req_alloc(struct pcache_cache *cache,
+ struct pcache_request *parent,
+ gfp_t gfp_mask)
+{
+ struct pcache_backing_dev *backing_dev = cache->backing_dev;
+ struct pcache_backing_dev_req *backing_req;
+ struct pcache_cache_key *key = NULL;
+ struct pcache_backing_dev_req_opts req_opts = { 0 };
+
+ req_opts.type = BACKING_DEV_REQ_TYPE_REQ;
+ req_opts.gfp_mask = gfp_mask;
+ req_opts.req.upper_req = parent;
+
+ backing_req = backing_dev_req_alloc(backing_dev, &req_opts);
+ if (!backing_req)
+ return NULL;
+
+ key = cache_key_alloc(&cache->req_key_tree, gfp_mask);
+ if (!key)
+ goto free_backing_req;
+
+ cache_key_get(key);
+ backing_req->priv_data = key;
+
+ return backing_req;
+
+free_backing_req:
+ cache_miss_req_free(backing_req);
+ return NULL;
+}
+
+static void cache_miss_req_init(struct pcache_cache *cache,
+ struct pcache_backing_dev_req *backing_req,
+ struct pcache_request *parent,
+ u32 off, u32 len, bool insert_key)
+{
+ struct pcache_cache_key *key;
+ struct pcache_backing_dev_req_opts req_opts = { 0 };
+
+ req_opts.type = BACKING_DEV_REQ_TYPE_REQ;
+ req_opts.req.upper_req = parent;
+ req_opts.req.req_off = off;
+ req_opts.req.len = len;
+ req_opts.end_fn = miss_read_end_req;
+
+ backing_dev_req_init(backing_req, &req_opts);
+
+ if (insert_key) {
+ key = backing_req->priv_data;
+ key->off = parent->off + off;
+ key->len = len;
+ key->flags |= PCACHE_CACHE_KEY_FLAGS_EMPTY;
+ } else {
+ key = backing_req->priv_data;
+ backing_req->priv_data = NULL;
+ cache_key_put(key);
+ cache_key_put(key);
+ }
+}
+
+static struct pcache_backing_dev_req *get_pre_alloc_req(struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache *cache = ctx->cache_tree->cache;
+ struct pcache_request *pcache_req = ctx->pcache_req;
+ struct pcache_backing_dev_req *backing_req;
+
+ if (ctx->pre_alloc_req) {
+ backing_req = ctx->pre_alloc_req;
+ ctx->pre_alloc_req = NULL;
+
+ return backing_req;
+ }
+
+ return cache_miss_req_alloc(cache, pcache_req, GFP_NOWAIT);
+}
+
+/*
+ * In the process of walking the cache tree to locate cached data, this
+ * function handles the situation where the requested data range lies
+ * entirely before an existing cache node (`key_tmp`). This outcome
+ * signifies that the target data is absent from the cache (cache miss).
+ *
+ * To fulfill this portion of the read request, the function creates a
+ * backing request (`backing_req`) for the missing data range represented
+ * by `key`. It then appends this request to the submission list in the
+ * `ctx`, which will later be processed to retrieve the data from backing
+ * storage. After setting up the backing request, `req_done` in `ctx` is
+ * updated to reflect the length of the handled range, and the range
+ * in `key` is adjusted by trimming off the portion that is now handled.
+ *
+ * The scenario handled here:
+ *
+ * |--------| key_tmp (existing cached range)
+ * |====| key (requested range, preceding key_tmp)
+ *
+ * Since `key` is before `key_tmp`, it signifies that the requested data
+ * range is missing in the cache (cache miss) and needs retrieval from
+ * backing storage.
+ */
+static int read_before(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_backing_dev_req *backing_req;
+ struct pcache_cache *cache = ctx->cache_tree->cache;
+
+ /*
+ * In this scenario, `key` represents a range that precedes `key_tmp`,
+ * meaning the requested data range is missing from the cache tree
+ * and must be retrieved from the backing_dev.
+ */
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, key->len, true);
+
+ list_add(&backing_req->node, ctx->submit_req_list);
+ ctx->req_done += key->len;
+ cache_key_cutfront(key, key->len);
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/*
+ * During cache_subtree_walk, this function manages a scenario where part of the
+ * requested data range overlaps with an existing cache node (`key_tmp`).
+ *
+ * |----------------| key_tmp (existing cached range)
+ * |===========| key (requested range, overlapping the tail of key_tmp)
+ */
+static int read_overlap_tail(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache *cache = ctx->cache_tree->cache;
+ struct pcache_backing_dev_req *backing_req;
+ u32 io_len;
+ int ret;
+
+ /*
+ * Calculate the length of the non-overlapping portion of `key`
+ * before `key_tmp`, representing the data missing in the cache.
+ */
+ io_len = cache_key_lstart(key_tmp) - cache_key_lstart(key);
+ if (io_len) {
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, io_len, true);
+
+ list_add(&backing_req->node, ctx->submit_req_list);
+ ctx->req_done += io_len;
+ cache_key_cutfront(key, io_len);
+ }
+
+ /*
+ * Handle the overlapping portion by calculating the length of
+ * the remaining data in `key` that coincides with `key_tmp`.
+ */
+ io_len = cache_key_lend(key) - cache_key_lstart(key_tmp);
+ if (cache_key_empty(key_tmp)) {
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, io_len, false);
+ submit_cache_miss_req(cache, backing_req);
+ } else {
+ ret = cache_copy_to_req_bio(ctx->cache_tree->cache, ctx->pcache_req, ctx->req_done,
+ io_len, &key_tmp->cache_pos, key_tmp->seg_gen);
+ if (ret) {
+ if (ret == -EINVAL) {
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ ctx->ret = ret;
+ return SUBTREE_WALK_RET_ERR;
+ }
+ }
+
+ ctx->req_done += io_len;
+ cache_key_cutfront(key, io_len);
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/*
+ * |----| key_tmp (existing cached range)
+ * |==========| key (requested range)
+ */
+static int read_overlap_contain(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache *cache = ctx->cache_tree->cache;
+ struct pcache_backing_dev_req *backing_req;
+ u32 io_len;
+ int ret;
+
+ /*
+ * Calculate the non-overlapping part of `key` before `key_tmp`
+ * to identify the missing data length.
+ */
+ io_len = cache_key_lstart(key_tmp) - cache_key_lstart(key);
+ if (io_len) {
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, io_len, true);
+
+ list_add(&backing_req->node, ctx->submit_req_list);
+
+ ctx->req_done += io_len;
+ cache_key_cutfront(key, io_len);
+ }
+
+ /*
+ * Handle the overlapping portion between `key` and `key_tmp`.
+ */
+ io_len = key_tmp->len;
+ if (cache_key_empty(key_tmp)) {
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, io_len, false);
+ submit_cache_miss_req(cache, backing_req);
+ } else {
+ ret = cache_copy_to_req_bio(ctx->cache_tree->cache, ctx->pcache_req, ctx->req_done,
+ io_len, &key_tmp->cache_pos, key_tmp->seg_gen);
+ if (ret) {
+ if (ret == -EINVAL) {
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ ctx->ret = ret;
+ return SUBTREE_WALK_RET_ERR;
+ }
+ }
+
+ ctx->req_done += io_len;
+ cache_key_cutfront(key, io_len);
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/*
+ * |-----------| key_tmp (existing cached range)
+ * |====| key (requested range, fully within key_tmp)
+ *
+ * If `key_tmp` contains valid cached data, this function copies the relevant
+ * portion to the request's bio. Otherwise, it sends a backing request to
+ * fetch the required data range.
+ */
+static int read_overlap_contained(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache *cache = ctx->cache_tree->cache;
+ struct pcache_backing_dev_req *backing_req;
+ struct pcache_cache_pos pos;
+ int ret;
+
+ /*
+ * Check if `key_tmp` is empty, indicating a miss. If so, initiate
+ * a backing request to fetch the required data for `key`.
+ */
+ if (cache_key_empty(key_tmp)) {
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, key->len, false);
+ submit_cache_miss_req(cache, backing_req);
+ } else {
+ cache_pos_copy(&pos, &key_tmp->cache_pos);
+ cache_pos_advance(&pos, cache_key_lstart(key) - cache_key_lstart(key_tmp));
+
+ ret = cache_copy_to_req_bio(ctx->cache_tree->cache, ctx->pcache_req, ctx->req_done,
+ key->len, &pos, key_tmp->seg_gen);
+ if (ret) {
+ if (ret == -EINVAL) {
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ ctx->ret = ret;
+ return SUBTREE_WALK_RET_ERR;
+ }
+ }
+
+ ctx->req_done += key->len;
+ cache_key_cutfront(key, key->len);
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/*
+ * |--------| key_tmp (existing cached range)
+ * |==========| key (requested range, overlapping the head of key_tmp)
+ */
+static int read_overlap_head(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
+ struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ struct pcache_cache *cache = ctx->cache_tree->cache;
+ struct pcache_backing_dev_req *backing_req;
+ struct pcache_cache_pos pos;
+ u32 io_len;
+ int ret;
+
+ io_len = cache_key_lend(key_tmp) - cache_key_lstart(key);
+
+ if (cache_key_empty(key_tmp)) {
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, io_len, false);
+ submit_cache_miss_req(cache, backing_req);
+ } else {
+ cache_pos_copy(&pos, &key_tmp->cache_pos);
+ cache_pos_advance(&pos, cache_key_lstart(key) - cache_key_lstart(key_tmp));
+
+ ret = cache_copy_to_req_bio(ctx->cache_tree->cache, ctx->pcache_req, ctx->req_done,
+ io_len, &pos, key_tmp->seg_gen);
+ if (ret) {
+ if (ret == -EINVAL) {
+ cache_key_delete(key_tmp);
+ return SUBTREE_WALK_RET_RESEARCH;
+ }
+
+ ctx->ret = ret;
+ return SUBTREE_WALK_RET_ERR;
+ }
+ }
+
+ ctx->req_done += io_len;
+ cache_key_cutfront(key, io_len);
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/**
+ * read_walk_finally - Finalizes the cache read tree walk by submitting any
+ * remaining backing requests
+ * @ctx: Context structure holding information about the cache,
+ * read request, and submission list
+ * @ret: the return value after this walk.
+ *
+ * This function is called at the end of the `cache_subtree_walk` during a
+ * cache read operation. It completes the walk by checking if any data
+ * requested by `key` was not found in the cache tree, and if so, it sends
+ * a backing request to retrieve that data. Then, it iterates through the
+ * submission list of backing requests created during the walk, removing
+ * each request from the list and submitting it.
+ *
+ * The scenario managed here includes:
+ * - Sending a backing request for the remaining length of `key` if it was
+ * not fulfilled by existing cache entries.
+ * - Iterating through `ctx->submit_req_list` to submit each backing request
+ * enqueued during the walk.
+ *
+ * This ensures all necessary backing requests for cache misses are submitted
+ * to the backing storage to retrieve any data that could not be found in
+ * the cache.
+ */
+static int read_walk_finally(struct pcache_cache_subtree_walk_ctx *ctx, int ret)
+{
+ struct pcache_cache *cache = ctx->cache_tree->cache;
+ struct pcache_backing_dev_req *backing_req, *next_req;
+ struct pcache_cache_key *key = ctx->key;
+
+ list_for_each_entry_safe(backing_req, next_req, ctx->submit_req_list, node) {
+ list_del_init(&backing_req->node);
+ submit_cache_miss_req(ctx->cache_tree->cache, backing_req);
+ }
+
+ if (ret != SUBTREE_WALK_RET_OK)
+ return ret;
+
+ if (key->len) {
+ backing_req = get_pre_alloc_req(ctx);
+ if (!backing_req)
+ return SUBTREE_WALK_RET_NEED_REQ;
+
+ cache_miss_req_init(cache, backing_req, ctx->pcache_req, ctx->req_done, key->len, true);
+ submit_cache_miss_req(cache, backing_req);
+ ctx->req_done += key->len;
+ }
+
+ return SUBTREE_WALK_RET_OK;
+}
+
+/*
+ * This function is used within `cache_subtree_walk` to determine whether the
+ * read operation has covered the requested data length. It compares the
+ * amount of data processed (`ctx->req_done`) with the total data length
+ * specified in the original request (`ctx->pcache_req->data_len`).
+ *
+ * If `req_done` meets or exceeds the required data length, the function
+ * returns `true`, indicating the walk is complete. Otherwise, it returns `false`,
+ * signaling that additional data processing is needed to fulfill the request.
+ */
+static bool read_walk_done(struct pcache_cache_subtree_walk_ctx *ctx)
+{
+ return (ctx->req_done >= ctx->pcache_req->data_len);
+}
+
+/**
+ * cache_read - Process a read request by traversing the cache tree
+ * @cache: Cache structure holding cache trees and related configurations
+ * @pcache_req: Request structure with information about the data to read
+ *
+ * This function attempts to fulfill a read request by traversing the cache tree(s)
+ * to locate cached data for the requested range. If parts of the data are missing
+ * in the cache, backing requests are generated to retrieve the required segments.
+ *
+ * The function operates by initializing a key for the requested data range and
+ * preparing a context (`walk_ctx`) to manage the cache tree traversal. The context
+ * includes pointers to functions (e.g., `read_before`, `read_overlap_tail`) that handle
+ * specific conditions encountered during the traversal. The `walk_finally` and `walk_done`
+ * functions manage the end stages of the traversal, while the `delete_key_list` and
+ * `submit_req_list` lists track any keys to be deleted or requests to be submitted.
+ *
+ * The function first calculates the requested range and checks if it fits within the
+ * current cache tree (based on the tree's size limits). It then locks the cache tree
+ * and performs a search to locate any matching keys. If there are outdated keys,
+ * these are deleted, and the search is restarted to ensure accurate data retrieval.
+ *
+ * If the requested range spans multiple cache trees, the function moves on to the
+ * next tree once the current range has been processed. This continues until the
+ * entire requested data length has been handled.
+ */
+static int cache_read(struct pcache_cache *cache, struct pcache_request *pcache_req)
+{
+ struct pcache_cache_key key_data = { .off = pcache_req->off, .len = pcache_req->data_len };
+ struct pcache_cache_subtree *cache_subtree;
+ struct pcache_cache_key *key_tmp = NULL, *key_next;
+ struct rb_node *prev_node = NULL;
+ struct pcache_cache_key *key = &key_data;
+ struct pcache_cache_subtree_walk_ctx walk_ctx = { 0 };
+ struct pcache_backing_dev_req *backing_req, *next_req;
+ LIST_HEAD(delete_key_list);
+ LIST_HEAD(submit_req_list);
+ int ret;
+
+ walk_ctx.cache_tree = &cache->req_key_tree;
+ walk_ctx.req_done = 0;
+ walk_ctx.pcache_req = pcache_req;
+ walk_ctx.before = read_before;
+ walk_ctx.overlap_tail = read_overlap_tail;
+ walk_ctx.overlap_head = read_overlap_head;
+ walk_ctx.overlap_contain = read_overlap_contain;
+ walk_ctx.overlap_contained = read_overlap_contained;
+ walk_ctx.walk_finally = read_walk_finally;
+ walk_ctx.walk_done = read_walk_done;
+ walk_ctx.delete_key_list = &delete_key_list;
+ walk_ctx.submit_req_list = &submit_req_list;
+
+next:
+ key->off = pcache_req->off + walk_ctx.req_done;
+ key->len = pcache_req->data_len - walk_ctx.req_done;
+ if (key->len > PCACHE_CACHE_SUBTREE_SIZE - (key->off & PCACHE_CACHE_SUBTREE_SIZE_MASK))
+ key->len = PCACHE_CACHE_SUBTREE_SIZE - (key->off & PCACHE_CACHE_SUBTREE_SIZE_MASK);
+
+ cache_subtree = get_subtree(&cache->req_key_tree, key->off);
+ spin_lock(&cache_subtree->tree_lock);
+search:
+ prev_node = cache_subtree_search(cache_subtree, key, NULL, NULL, &delete_key_list);
+ if (!list_empty(&delete_key_list)) {
+ list_for_each_entry_safe(key_tmp, key_next, &delete_key_list, list_node) {
+ list_del_init(&key_tmp->list_node);
+ cache_key_delete(key_tmp);
+ }
+ goto search;
+ }
+
+ walk_ctx.start_node = prev_node;
+ walk_ctx.key = key;
+
+ ret = cache_subtree_walk(&walk_ctx);
+ if (ret == SUBTREE_WALK_RET_RESEARCH)
+ goto search;
+ spin_unlock(&cache_subtree->tree_lock);
+
+ if (ret == SUBTREE_WALK_RET_ERR) {
+ ret = walk_ctx.ret;
+ goto out;
+ }
+
+ if (ret == SUBTREE_WALK_RET_NEED_REQ) {
+ walk_ctx.pre_alloc_req = cache_miss_req_alloc(cache, pcache_req, GFP_NOIO);
+ pcache_dev_debug(CACHE_TO_PCACHE(cache), "allocate pre_alloc_req with GFP_NOIO");
+ }
+
+ if (walk_ctx.req_done < pcache_req->data_len)
+ goto next;
+ ret = 0;
+out:
+ if (walk_ctx.pre_alloc_req)
+ cache_miss_req_free(walk_ctx.pre_alloc_req);
+
+ list_for_each_entry_safe(backing_req, next_req, &submit_req_list, node) {
+ list_del_init(&backing_req->node);
+ backing_dev_req_end(backing_req);
+ }
+
+ return ret;
+}
+
+static int cache_write(struct pcache_cache *cache, struct pcache_request *pcache_req)
+{
+ struct pcache_cache_subtree *cache_subtree;
+ struct pcache_cache_key *key;
+ u64 offset = pcache_req->off;
+ u32 length = pcache_req->data_len;
+ u32 io_done = 0;
+ int ret;
+
+ while (true) {
+ if (io_done >= length)
+ break;
+
+ key = cache_key_alloc(&cache->req_key_tree, GFP_NOIO);
+ key->off = offset + io_done;
+ key->len = length - io_done;
+ if (key->len > PCACHE_CACHE_SUBTREE_SIZE - (key->off & PCACHE_CACHE_SUBTREE_SIZE_MASK))
+ key->len = PCACHE_CACHE_SUBTREE_SIZE - (key->off & PCACHE_CACHE_SUBTREE_SIZE_MASK);
+
+ ret = cache_data_alloc(cache, key);
+ if (ret) {
+ cache_key_put(key);
+ goto err;
+ }
+
+ ret = cache_copy_from_req_bio(cache, key, pcache_req, io_done);
+ if (ret) {
+ cache_seg_put(key->cache_pos.cache_seg);
+ cache_key_put(key);
+ goto err;
+ }
+
+ cache_subtree = get_subtree(&cache->req_key_tree, key->off);
+ spin_lock(&cache_subtree->tree_lock);
+ cache_key_insert(&cache->req_key_tree, key, true);
+ ret = cache_key_append(cache, key, pcache_req->bio->bi_opf & REQ_FUA);
+ if (ret) {
+ cache_seg_put(key->cache_pos.cache_seg);
+ cache_key_delete(key);
+ goto unlock;
+ }
+
+ io_done += key->len;
+ spin_unlock(&cache_subtree->tree_lock);
+ }
+
+ return 0;
+unlock:
+ spin_unlock(&cache_subtree->tree_lock);
+err:
+ return ret;
+}
+
+/**
+ * cache_flush - Flush all ksets to persist any pending cache data
+ * @cache: Pointer to the cache structure
+ *
+ * This function iterates through all ksets associated with the provided `cache`
+ * and ensures that any data marked for persistence is written to media. For each
+ * kset, it acquires the kset lock, then invokes `cache_kset_close`, which handles
+ * the persistence logic for that kset.
+ *
+ * If `cache_kset_close` encounters an error, the function exits immediately with
+ * the respective error code, preventing the flush operation from proceeding to
+ * subsequent ksets.
+ */
+int cache_flush(struct pcache_cache *cache)
+{
+ struct pcache_cache_kset *kset;
+ int ret;
+ u32 i;
+
+ for (i = 0; i < cache->n_ksets; i++) {
+ kset = get_kset(cache, i);
+
+ spin_lock(&kset->kset_lock);
+ ret = cache_kset_close(cache, kset);
+ spin_unlock(&kset->kset_lock);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int pcache_cache_handle_req(struct pcache_cache *cache, struct pcache_request *pcache_req)
+{
+ struct bio *bio = pcache_req->bio;
+
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH))
+ return cache_flush(cache);
+
+ if (bio_data_dir(bio) == READ)
+ return cache_read(cache, pcache_req);
+
+ return cache_write(cache, pcache_req);
+}
diff --git a/drivers/md/dm-pcache/cache_segment.c b/drivers/md/dm-pcache/cache_segment.c
new file mode 100644
index 000000000000..f0b58980806e
--- /dev/null
+++ b/drivers/md/dm-pcache/cache_segment.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "cache_dev.h"
+#include "cache.h"
+#include "backing_dev.h"
+#include "dm_pcache.h"
+
+static inline struct pcache_segment_info *get_seg_info_addr(struct pcache_cache_segment *cache_seg)
+{
+ struct pcache_segment_info *seg_info_addr;
+ u32 seg_id = cache_seg->segment.seg_id;
+ void *seg_addr;
+
+ seg_addr = CACHE_DEV_SEGMENT(cache_seg->cache->cache_dev, seg_id);
+ seg_info_addr = seg_addr + PCACHE_SEG_INFO_SIZE * cache_seg->info_index;
+
+ return seg_info_addr;
+}
+
+static void cache_seg_info_write(struct pcache_cache_segment *cache_seg)
+{
+ struct pcache_segment_info *seg_info_addr;
+ struct pcache_segment_info *seg_info = &cache_seg->cache_seg_info;
+
+ mutex_lock(&cache_seg->info_lock);
+ seg_info->header.seq++;
+ seg_info->header.crc = pcache_meta_crc(&seg_info->header, sizeof(struct pcache_segment_info));
+
+ seg_info_addr = get_seg_info_addr(cache_seg);
+ memcpy_flushcache(seg_info_addr, seg_info, sizeof(struct pcache_segment_info));
+ pmem_wmb();
+
+ cache_seg->info_index = (cache_seg->info_index + 1) % PCACHE_META_INDEX_MAX;
+ mutex_unlock(&cache_seg->info_lock);
+}
+
+static int cache_seg_info_load(struct pcache_cache_segment *cache_seg)
+{
+ struct pcache_segment_info *cache_seg_info_addr_base, *cache_seg_info_addr;
+ struct pcache_cache_dev *cache_dev = cache_seg->cache->cache_dev;
+ struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
+ u32 seg_id = cache_seg->segment.seg_id;
+ int ret = 0;
+
+ cache_seg_info_addr_base = CACHE_DEV_SEGMENT(cache_dev, seg_id);
+
+ mutex_lock(&cache_seg->info_lock);
+ cache_seg_info_addr = pcache_meta_find_latest(&cache_seg_info_addr_base->header,
+ sizeof(struct pcache_segment_info),
+ PCACHE_SEG_INFO_SIZE,
+ &cache_seg->cache_seg_info);
+ if (IS_ERR(cache_seg_info_addr)) {
+ ret = PTR_ERR(cache_seg_info_addr);
+ goto out;
+ } else if (!cache_seg_info_addr) {
+ ret = -EIO;
+ goto out;
+ }
+ cache_seg->info_index = cache_seg_info_addr - cache_seg_info_addr_base;
+out:
+ mutex_unlock(&cache_seg->info_lock);
+
+ if (ret)
+ pcache_dev_err(pcache, "can't read segment info of segment: %u, ret: %d\n",
+ cache_seg->segment.seg_id, ret);
+ return ret;
+}
+
+static int cache_seg_ctrl_load(struct pcache_cache_segment *cache_seg)
+{
+ struct pcache_cache_seg_ctrl *cache_seg_ctrl = cache_seg->cache_seg_ctrl;
+ struct pcache_cache_seg_gen cache_seg_gen, *cache_seg_gen_addr;
+ int ret = 0;
+
+ cache_seg_gen_addr = pcache_meta_find_latest(&cache_seg_ctrl->gen->header,
+ sizeof(struct pcache_cache_seg_gen),
+ sizeof(struct pcache_cache_seg_gen),
+ &cache_seg_gen);
+ if (IS_ERR(cache_seg_gen_addr)) {
+ ret = PTR_ERR(cache_seg_gen_addr);
+ goto out;
+ }
+
+ if (!cache_seg_gen_addr) {
+ cache_seg->gen = 0;
+ cache_seg->gen_seq = 0;
+ cache_seg->gen_index = 0;
+ goto out;
+ }
+
+ cache_seg->gen = cache_seg_gen.gen;
+ cache_seg->gen_seq = cache_seg_gen.header.seq;
+ cache_seg->gen_index = (cache_seg_gen_addr - cache_seg_ctrl->gen);
+out:
+
+ return ret;
+}
+
+static inline struct pcache_cache_seg_gen *get_cache_seg_gen_addr(struct pcache_cache_segment *cache_seg)
+{
+ struct pcache_cache_seg_ctrl *cache_seg_ctrl = cache_seg->cache_seg_ctrl;
+
+ return (cache_seg_ctrl->gen + cache_seg->gen_index);
+}
+
+/*
+ * cache_seg_ctrl_write - write cache segment control information
+ * @seg: the cache segment to update
+ *
+ * This function writes the control information of a cache segment to media.
+ *
+ * Although this updates shared control data, we intentionally do not use
+ * any locking here. All accesses to control information are single-threaded:
+ *
+ * - All reads occur during the init phase, where no concurrent writes
+ * can happen.
+ * - Writes happen once during init and once when the last reference
+ * to the segment is dropped in cache_seg_put().
+ *
+ * Both cases are guaranteed to be single-threaded, so there is no risk
+ * of concurrent read/write races.
+ */
+static void cache_seg_ctrl_write(struct pcache_cache_segment *cache_seg)
+{
+ struct pcache_cache_seg_gen cache_seg_gen;
+
+ cache_seg_gen.gen = cache_seg->gen;
+ cache_seg_gen.header.seq = ++cache_seg->gen_seq;
+ cache_seg_gen.header.crc = pcache_meta_crc(&cache_seg_gen.header,
+ sizeof(struct pcache_cache_seg_gen));
+
+ memcpy_flushcache(get_cache_seg_gen_addr(cache_seg), &cache_seg_gen, sizeof(struct pcache_cache_seg_gen));
+ pmem_wmb();
+
+ cache_seg->gen_index = (cache_seg->gen_index + 1) % PCACHE_META_INDEX_MAX;
+}
+
+static void cache_seg_ctrl_init(struct pcache_cache_segment *cache_seg)
+{
+ cache_seg->gen = 0;
+ cache_seg->gen_seq = 0;
+ cache_seg->gen_index = 0;
+ cache_seg_ctrl_write(cache_seg);
+}
+
+static int cache_seg_meta_load(struct pcache_cache_segment *cache_seg)
+{
+ int ret;
+
+ ret = cache_seg_info_load(cache_seg);
+ if (ret)
+ goto err;
+
+ ret = cache_seg_ctrl_load(cache_seg);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ return ret;
+}
+
+/**
+ * cache_seg_set_next_seg - Sets the ID of the next segment
+ * @cache_seg: Pointer to the cache segment structure.
+ * @seg_id: The segment ID to set as the next segment.
+ *
+ * A pcache_cache allocates multiple cache segments, which are linked together
+ * through next_seg. When loading a pcache_cache, the first cache segment can
+ * be found using cache->seg_id, which allows access to all the cache segments.
+ */
+void cache_seg_set_next_seg(struct pcache_cache_segment *cache_seg, u32 seg_id)
+{
+ cache_seg->cache_seg_info.flags |= PCACHE_SEG_INFO_FLAGS_HAS_NEXT;
+ cache_seg->cache_seg_info.next_seg = seg_id;
+ cache_seg_info_write(cache_seg);
+}
+
+int cache_seg_init(struct pcache_cache *cache, u32 seg_id, u32 cache_seg_id,
+ bool new_cache)
+{
+ struct pcache_cache_dev *cache_dev = cache->cache_dev;
+ struct pcache_cache_segment *cache_seg = &cache->segments[cache_seg_id];
+ struct pcache_segment_init_options seg_options = { 0 };
+ struct pcache_segment *segment = &cache_seg->segment;
+ int ret;
+
+ cache_seg->cache = cache;
+ cache_seg->cache_seg_id = cache_seg_id;
+ spin_lock_init(&cache_seg->gen_lock);
+ atomic_set(&cache_seg->refs, 0);
+ mutex_init(&cache_seg->info_lock);
+
+ /* init pcache_segment */
+ seg_options.type = PCACHE_SEGMENT_TYPE_CACHE_DATA;
+ seg_options.data_off = PCACHE_CACHE_SEG_CTRL_OFF + PCACHE_CACHE_SEG_CTRL_SIZE;
+ seg_options.seg_id = seg_id;
+ seg_options.seg_info = &cache_seg->cache_seg_info;
+ pcache_segment_init(cache_dev, segment, &seg_options);
+
+ cache_seg->cache_seg_ctrl = CACHE_DEV_SEGMENT(cache_dev, seg_id) + PCACHE_CACHE_SEG_CTRL_OFF;
+
+ if (new_cache) {
+ cache_dev_zero_range(cache_dev, CACHE_DEV_SEGMENT(cache_dev, seg_id),
+ PCACHE_SEG_INFO_SIZE * PCACHE_META_INDEX_MAX +
+ PCACHE_CACHE_SEG_CTRL_SIZE);
+
+ cache_seg_ctrl_init(cache_seg);
+
+ cache_seg->info_index = 0;
+ cache_seg_info_write(cache_seg);
+
+ /* clear outdated kset in segment */
+ memcpy_flushcache(segment->data, &pcache_empty_kset, sizeof(struct pcache_cache_kset_onmedia));
+ pmem_wmb();
+ } else {
+ ret = cache_seg_meta_load(cache_seg);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ return ret;
+}
+
+/**
+ * get_cache_segment - Retrieves a free cache segment from the cache.
+ * @cache: Pointer to the cache structure.
+ *
+ * This function attempts to find a free cache segment that can be used.
+ * It locks the segment map and checks for the next available segment ID.
+ * If a free segment is found, it initializes it and returns a pointer to the
+ * cache segment structure. Returns NULL if no segments are available.
+ */
+struct pcache_cache_segment *get_cache_segment(struct pcache_cache *cache)
+{
+ struct pcache_cache_segment *cache_seg;
+ u32 seg_id;
+
+ spin_lock(&cache->seg_map_lock);
+again:
+ seg_id = find_next_zero_bit(cache->seg_map, cache->n_segs, cache->last_cache_seg);
+ if (seg_id == cache->n_segs) {
+ /* reset the hint of ->last_cache_seg and retry */
+ if (cache->last_cache_seg) {
+ cache->last_cache_seg = 0;
+ goto again;
+ }
+ cache->cache_full = true;
+ spin_unlock(&cache->seg_map_lock);
+ return NULL;
+ }
+
+ /*
+ * found an available cache_seg, mark it used in seg_map
+ * and update the search hint ->last_cache_seg
+ */
+ __set_bit(seg_id, cache->seg_map);
+ cache->last_cache_seg = seg_id;
+ spin_unlock(&cache->seg_map_lock);
+
+ cache_seg = &cache->segments[seg_id];
+ cache_seg->cache_seg_id = seg_id;
+
+ return cache_seg;
+}
+
+static void cache_seg_gen_increase(struct pcache_cache_segment *cache_seg)
+{
+ spin_lock(&cache_seg->gen_lock);
+ cache_seg->gen++;
+ spin_unlock(&cache_seg->gen_lock);
+
+ cache_seg_ctrl_write(cache_seg);
+}
+
+void cache_seg_get(struct pcache_cache_segment *cache_seg)
+{
+ atomic_inc(&cache_seg->refs);
+}
+
+static void cache_seg_invalidate(struct pcache_cache_segment *cache_seg)
+{
+ struct pcache_cache *cache;
+
+ cache = cache_seg->cache;
+ cache_seg_gen_increase(cache_seg);
+
+ spin_lock(&cache->seg_map_lock);
+ if (cache->cache_full)
+ cache->cache_full = false;
+ __clear_bit(cache_seg->cache_seg_id, cache->seg_map);
+ spin_unlock(&cache->seg_map_lock);
+
+ pcache_defer_reqs_kick(CACHE_TO_PCACHE(cache));
+ /* clean_work will clean the bad key in key_tree*/
+ queue_work(cache_get_wq(cache), &cache->clean_work);
+}
+
+void cache_seg_put(struct pcache_cache_segment *cache_seg)
+{
+ if (atomic_dec_and_test(&cache_seg->refs))
+ cache_seg_invalidate(cache_seg);
+}
diff --git a/drivers/md/dm-pcache/cache_writeback.c b/drivers/md/dm-pcache/cache_writeback.c
new file mode 100644
index 000000000000..87a82b3fe836
--- /dev/null
+++ b/drivers/md/dm-pcache/cache_writeback.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/bio.h>
+
+#include "cache.h"
+#include "backing_dev.h"
+#include "cache_dev.h"
+#include "dm_pcache.h"
+
+static void writeback_ctx_end(struct pcache_cache *cache, int ret)
+{
+ if (ret && !cache->writeback_ctx.ret) {
+ pcache_dev_err(CACHE_TO_PCACHE(cache), "writeback error: %d", ret);
+ cache->writeback_ctx.ret = ret;
+ }
+
+ if (!atomic_dec_and_test(&cache->writeback_ctx.pending))
+ return;
+
+ if (!cache->writeback_ctx.ret) {
+ backing_dev_flush(cache->backing_dev);
+
+ mutex_lock(&cache->dirty_tail_lock);
+ cache_pos_advance(&cache->dirty_tail, cache->writeback_ctx.advance);
+ cache_encode_dirty_tail(cache);
+ mutex_unlock(&cache->dirty_tail_lock);
+ }
+ queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, 0);
+}
+
+static void writeback_end_req(struct pcache_backing_dev_req *backing_req, int ret)
+{
+ struct pcache_cache *cache = backing_req->priv_data;
+
+ mutex_lock(&cache->writeback_lock);
+ writeback_ctx_end(cache, ret);
+ mutex_unlock(&cache->writeback_lock);
+}
+
+static inline bool is_cache_clean(struct pcache_cache *cache, struct pcache_cache_pos *dirty_tail)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_kset_onmedia *kset_onmedia;
+ u32 to_copy;
+ void *addr;
+ int ret;
+
+ addr = cache_pos_addr(dirty_tail);
+ kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->wb_kset_onmedia_buf;
+
+ to_copy = min(PCACHE_KSET_ONMEDIA_SIZE_MAX, PCACHE_SEG_SIZE - dirty_tail->seg_off);
+ ret = copy_mc_to_kernel(kset_onmedia, addr, to_copy);
+ if (ret) {
+ pcache_dev_err(pcache, "error to read kset: %d", ret);
+ return true;
+ }
+
+ /* Check if the magic number matches the expected value */
+ if (kset_onmedia->magic != PCACHE_KSET_MAGIC) {
+ pcache_dev_debug(pcache, "dirty_tail: %u:%u magic: %llx, not expected: %llx\n",
+ dirty_tail->cache_seg->cache_seg_id, dirty_tail->seg_off,
+ kset_onmedia->magic, PCACHE_KSET_MAGIC);
+ return true;
+ }
+
+ /* Verify the CRC checksum for data integrity */
+ if (kset_onmedia->crc != cache_kset_crc(kset_onmedia)) {
+ pcache_dev_debug(pcache, "dirty_tail: %u:%u crc: %x, not expected: %x\n",
+ dirty_tail->cache_seg->cache_seg_id, dirty_tail->seg_off,
+ cache_kset_crc(kset_onmedia), kset_onmedia->crc);
+ return true;
+ }
+
+ return false;
+}
+
+void cache_writeback_exit(struct pcache_cache *cache)
+{
+ cancel_delayed_work_sync(&cache->writeback_work);
+ backing_dev_flush(cache->backing_dev);
+ cache_tree_exit(&cache->writeback_key_tree);
+}
+
+int cache_writeback_init(struct pcache_cache *cache)
+{
+ int ret;
+
+ ret = cache_tree_init(cache, &cache->writeback_key_tree, 1);
+ if (ret)
+ goto err;
+
+ atomic_set(&cache->writeback_ctx.pending, 0);
+
+ /* Queue delayed work to start writeback handling */
+ queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, 0);
+
+ return 0;
+err:
+ return ret;
+}
+
+static void cache_key_writeback(struct pcache_cache *cache, struct pcache_cache_key *key)
+{
+ struct pcache_backing_dev_req *writeback_req;
+ struct pcache_backing_dev_req_opts writeback_req_opts = { 0 };
+ struct pcache_cache_pos *pos;
+ void *addr;
+ u32 seg_remain, req_len, done = 0;
+
+ if (cache_key_clean(key))
+ return;
+
+ pos = &key->cache_pos;
+
+ seg_remain = cache_seg_remain(pos);
+ BUG_ON(seg_remain < key->len);
+next_req:
+ addr = cache_pos_addr(pos) + done;
+ req_len = backing_dev_req_coalesced_max_len(addr, key->len - done);
+
+ writeback_req_opts.type = BACKING_DEV_REQ_TYPE_KMEM;
+ writeback_req_opts.gfp_mask = GFP_NOIO;
+ writeback_req_opts.end_fn = writeback_end_req;
+ writeback_req_opts.priv_data = cache;
+
+ writeback_req_opts.kmem.data = addr;
+ writeback_req_opts.kmem.opf = REQ_OP_WRITE;
+ writeback_req_opts.kmem.len = req_len;
+ writeback_req_opts.kmem.backing_off = key->off + done;
+
+ writeback_req = backing_dev_req_create(cache->backing_dev, &writeback_req_opts);
+
+ atomic_inc(&cache->writeback_ctx.pending);
+ backing_dev_req_submit(writeback_req, true);
+
+ done += req_len;
+ if (done < key->len)
+ goto next_req;
+}
+
+static void cache_wb_tree_writeback(struct pcache_cache *cache, u32 advance)
+{
+ struct pcache_cache_tree *cache_tree = &cache->writeback_key_tree;
+ struct pcache_cache_subtree *cache_subtree;
+ struct rb_node *node;
+ struct pcache_cache_key *key;
+ u32 i;
+
+ cache->writeback_ctx.ret = 0;
+ cache->writeback_ctx.advance = advance;
+ atomic_set(&cache->writeback_ctx.pending, 1);
+
+ for (i = 0; i < cache_tree->n_subtrees; i++) {
+ cache_subtree = &cache_tree->subtrees[i];
+
+ node = rb_first(&cache_subtree->root);
+ while (node) {
+ key = CACHE_KEY(node);
+ node = rb_next(node);
+
+ cache_key_writeback(cache, key);
+ cache_key_delete(key);
+ }
+ }
+ writeback_ctx_end(cache, 0);
+}
+
+static int cache_kset_insert_tree(struct pcache_cache *cache, struct pcache_cache_kset_onmedia *kset_onmedia)
+{
+ struct pcache_cache_key_onmedia *key_onmedia;
+ struct pcache_cache_subtree *cache_subtree;
+ struct pcache_cache_key *key;
+ int ret;
+ u32 i;
+
+ /* Iterate through all keys in the kset and write each back to storage */
+ for (i = 0; i < kset_onmedia->key_num; i++) {
+ key_onmedia = &kset_onmedia->data[i];
+
+ key = cache_key_alloc(&cache->writeback_key_tree, GFP_NOIO);
+ ret = cache_key_decode(cache, key_onmedia, key);
+ if (ret) {
+ cache_key_put(key);
+ goto clear_tree;
+ }
+
+ cache_subtree = get_subtree(&cache->writeback_key_tree, key->off);
+ spin_lock(&cache_subtree->tree_lock);
+ cache_key_insert(&cache->writeback_key_tree, key, true);
+ spin_unlock(&cache_subtree->tree_lock);
+ }
+
+ return 0;
+clear_tree:
+ cache_tree_clear(&cache->writeback_key_tree);
+ return ret;
+}
+
+static void last_kset_writeback(struct pcache_cache *cache,
+ struct pcache_cache_kset_onmedia *last_kset_onmedia)
+{
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_segment *next_seg;
+
+ pcache_dev_debug(pcache, "last kset, next: %u\n", last_kset_onmedia->next_cache_seg_id);
+
+ next_seg = &cache->segments[last_kset_onmedia->next_cache_seg_id];
+
+ mutex_lock(&cache->dirty_tail_lock);
+ cache->dirty_tail.cache_seg = next_seg;
+ cache->dirty_tail.seg_off = 0;
+ cache_encode_dirty_tail(cache);
+ mutex_unlock(&cache->dirty_tail_lock);
+}
+
+void cache_writeback_fn(struct work_struct *work)
+{
+ struct pcache_cache *cache = container_of(work, struct pcache_cache, writeback_work.work);
+ struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
+ struct pcache_cache_pos dirty_tail;
+ struct pcache_cache_kset_onmedia *kset_onmedia;
+ u32 delay;
+ int ret;
+
+ mutex_lock(&cache->writeback_lock);
+ if (atomic_read(&cache->writeback_ctx.pending))
+ goto unlock;
+
+ if (pcache_is_stopping(pcache))
+ goto unlock;
+
+ kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->wb_kset_onmedia_buf;
+
+ mutex_lock(&cache->dirty_tail_lock);
+ cache_pos_copy(&dirty_tail, &cache->dirty_tail);
+ mutex_unlock(&cache->dirty_tail_lock);
+
+ if (is_cache_clean(cache, &dirty_tail)) {
+ delay = PCACHE_CACHE_WRITEBACK_INTERVAL;
+ goto queue_work;
+ }
+
+ if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST) {
+ last_kset_writeback(cache, kset_onmedia);
+ delay = 0;
+ goto queue_work;
+ }
+
+ ret = cache_kset_insert_tree(cache, kset_onmedia);
+ if (ret) {
+ delay = PCACHE_CACHE_WRITEBACK_INTERVAL;
+ goto queue_work;
+ }
+
+ cache_wb_tree_writeback(cache, get_kset_onmedia_size(kset_onmedia));
+ delay = 0;
+queue_work:
+ queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, delay);
+unlock:
+ mutex_unlock(&cache->writeback_lock);
+}
diff --git a/drivers/md/dm-pcache/dm_pcache.c b/drivers/md/dm-pcache/dm_pcache.c
new file mode 100644
index 000000000000..e5f5936fa6f0
--- /dev/null
+++ b/drivers/md/dm-pcache/dm_pcache.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+
+#include "../dm-core.h"
+#include "cache_dev.h"
+#include "backing_dev.h"
+#include "cache.h"
+#include "dm_pcache.h"
+
+void pcache_defer_reqs_kick(struct dm_pcache *pcache)
+{
+ struct pcache_cache *cache = &pcache->cache;
+
+ spin_lock(&cache->seg_map_lock);
+ if (!cache->cache_full)
+ queue_work(pcache->task_wq, &pcache->defered_req_work);
+ spin_unlock(&cache->seg_map_lock);
+}
+
+static void defer_req(struct pcache_request *pcache_req)
+{
+ struct dm_pcache *pcache = pcache_req->pcache;
+
+ BUG_ON(!list_empty(&pcache_req->list_node));
+
+ spin_lock(&pcache->defered_req_list_lock);
+ list_add(&pcache_req->list_node, &pcache->defered_req_list);
+ pcache_defer_reqs_kick(pcache);
+ spin_unlock(&pcache->defered_req_list_lock);
+}
+
+static void defered_req_fn(struct work_struct *work)
+{
+ struct dm_pcache *pcache = container_of(work, struct dm_pcache, defered_req_work);
+ struct pcache_request *pcache_req;
+ LIST_HEAD(tmp_list);
+ int ret;
+
+ if (pcache_is_stopping(pcache))
+ return;
+
+ spin_lock(&pcache->defered_req_list_lock);
+ list_splice_init(&pcache->defered_req_list, &tmp_list);
+ spin_unlock(&pcache->defered_req_list_lock);
+
+ while (!list_empty(&tmp_list)) {
+ pcache_req = list_first_entry(&tmp_list,
+ struct pcache_request, list_node);
+ list_del_init(&pcache_req->list_node);
+ pcache_req->ret = 0;
+ ret = pcache_cache_handle_req(&pcache->cache, pcache_req);
+ if (ret == -EBUSY)
+ defer_req(pcache_req);
+ else
+ pcache_req_put(pcache_req, ret);
+ }
+}
+
+void pcache_req_get(struct pcache_request *pcache_req)
+{
+ kref_get(&pcache_req->ref);
+}
+
+static void end_req(struct kref *ref)
+{
+ struct pcache_request *pcache_req = container_of(ref, struct pcache_request, ref);
+ struct dm_pcache *pcache = pcache_req->pcache;
+ struct bio *bio = pcache_req->bio;
+ int ret = pcache_req->ret;
+
+ if (ret == -EBUSY) {
+ pcache_req_get(pcache_req);
+ defer_req(pcache_req);
+ } else {
+ bio->bi_status = errno_to_blk_status(ret);
+ bio_endio(bio);
+
+ if (atomic_dec_and_test(&pcache->inflight_reqs))
+ wake_up(&pcache->inflight_wq);
+ }
+}
+
+void pcache_req_put(struct pcache_request *pcache_req, int ret)
+{
+ /* Set the return status if it is not already set */
+ if (ret && !pcache_req->ret)
+ pcache_req->ret = ret;
+
+ kref_put(&pcache_req->ref, end_req);
+}
+
+static bool at_least_one_arg(struct dm_arg_set *as, char **error)
+{
+ if (!as->argc) {
+ *error = "Insufficient args";
+ return false;
+ }
+
+ return true;
+}
+
+static int parse_cache_dev(struct dm_pcache *pcache, struct dm_arg_set *as,
+ char **error)
+{
+ int ret;
+
+ if (!at_least_one_arg(as, error))
+ return -EINVAL;
+ ret = dm_get_device(pcache->ti, dm_shift_arg(as),
+ BLK_OPEN_READ | BLK_OPEN_WRITE,
+ &pcache->cache_dev.dm_dev);
+ if (ret) {
+ *error = "Error opening cache device";
+ return ret;
+ }
+
+ return 0;
+}
+
+static int parse_backing_dev(struct dm_pcache *pcache, struct dm_arg_set *as,
+ char **error)
+{
+ int ret;
+
+ if (!at_least_one_arg(as, error))
+ return -EINVAL;
+
+ ret = dm_get_device(pcache->ti, dm_shift_arg(as),
+ BLK_OPEN_READ | BLK_OPEN_WRITE,
+ &pcache->backing_dev.dm_dev);
+ if (ret) {
+ *error = "Error opening backing device";
+ return ret;
+ }
+
+ return 0;
+}
+
+static void pcache_init_opts(struct pcache_cache_options *opts)
+{
+ opts->cache_mode = PCACHE_CACHE_MODE_WRITEBACK;
+ opts->data_crc = false;
+}
+
+static int parse_cache_opts(struct dm_pcache *pcache, struct dm_arg_set *as,
+ char **error)
+{
+ struct pcache_cache_options *opts = &pcache->opts;
+ static const struct dm_arg _args[] = {
+ {0, 4, "Invalid number of cache option arguments"},
+ };
+ unsigned int argc;
+ const char *arg;
+ int ret;
+
+ pcache_init_opts(opts);
+ if (!as->argc)
+ return 0;
+
+ ret = dm_read_arg_group(_args, as, &argc, error);
+ if (ret)
+ return -EINVAL;
+
+ while (argc) {
+ arg = dm_shift_arg(as);
+ argc--;
+
+ if (!strcmp(arg, "cache_mode")) {
+ arg = dm_shift_arg(as);
+ if (!strcmp(arg, "writeback")) {
+ opts->cache_mode = PCACHE_CACHE_MODE_WRITEBACK;
+ } else {
+ *error = "Invalid cache mode parameter";
+ return -EINVAL;
+ }
+ argc--;
+ } else if (!strcmp(arg, "data_crc")) {
+ arg = dm_shift_arg(as);
+ if (!strcmp(arg, "true")) {
+ opts->data_crc = true;
+ } else if (!strcmp(arg, "false")) {
+ opts->data_crc = false;
+ } else {
+ *error = "Invalid data crc parameter";
+ return -EINVAL;
+ }
+ argc--;
+ } else {
+ *error = "Unrecognised cache option requested";
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int pcache_start(struct dm_pcache *pcache, char **error)
+{
+ int ret;
+
+ ret = cache_dev_start(pcache);
+ if (ret) {
+ *error = "Failed to start cache dev";
+ return ret;
+ }
+
+ ret = backing_dev_start(pcache);
+ if (ret) {
+ *error = "Failed to start backing dev";
+ goto stop_cache;
+ }
+
+ ret = pcache_cache_start(pcache);
+ if (ret) {
+ *error = "Failed to start pcache";
+ goto stop_backing;
+ }
+
+ return 0;
+stop_backing:
+ backing_dev_stop(pcache);
+stop_cache:
+ cache_dev_stop(pcache);
+
+ return ret;
+}
+
+static void pcache_destroy_args(struct dm_pcache *pcache)
+{
+ if (pcache->cache_dev.dm_dev)
+ dm_put_device(pcache->ti, pcache->cache_dev.dm_dev);
+ if (pcache->backing_dev.dm_dev)
+ dm_put_device(pcache->ti, pcache->backing_dev.dm_dev);
+}
+
+static int pcache_parse_args(struct dm_pcache *pcache, unsigned int argc, char **argv,
+ char **error)
+{
+ struct dm_arg_set as;
+ int ret;
+
+ as.argc = argc;
+ as.argv = argv;
+
+ /*
+ * Parse cache device
+ */
+ ret = parse_cache_dev(pcache, &as, error);
+ if (ret)
+ return ret;
+ /*
+ * Parse backing device
+ */
+ ret = parse_backing_dev(pcache, &as, error);
+ if (ret)
+ goto out;
+ /*
+ * Parse optional arguments
+ */
+ ret = parse_cache_opts(pcache, &as, error);
+ if (ret)
+ goto out;
+
+ return 0;
+out:
+ pcache_destroy_args(pcache);
+ return ret;
+}
+
+static int dm_pcache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ struct mapped_device *md = ti->table->md;
+ struct dm_pcache *pcache;
+ int ret;
+
+ if (md->map) {
+ ti->error = "Don't support table loading for live md";
+ return -EOPNOTSUPP;
+ }
+
+ /* Allocate memory for the cache structure */
+ pcache = kzalloc(sizeof(struct dm_pcache), GFP_KERNEL);
+ if (!pcache)
+ return -ENOMEM;
+
+ pcache->task_wq = alloc_workqueue("pcache-%s-wq", WQ_UNBOUND | WQ_MEM_RECLAIM,
+ 0, md->name);
+ if (!pcache->task_wq) {
+ ret = -ENOMEM;
+ goto free_pcache;
+ }
+
+ spin_lock_init(&pcache->defered_req_list_lock);
+ INIT_LIST_HEAD(&pcache->defered_req_list);
+ INIT_WORK(&pcache->defered_req_work, defered_req_fn);
+ pcache->ti = ti;
+
+ ret = pcache_parse_args(pcache, argc, argv, &ti->error);
+ if (ret)
+ goto destroy_wq;
+
+ ret = pcache_start(pcache, &ti->error);
+ if (ret)
+ goto destroy_args;
+
+ ti->num_flush_bios = 1;
+ ti->flush_supported = true;
+ ti->per_io_data_size = sizeof(struct pcache_request);
+ ti->private = pcache;
+ atomic_set(&pcache->inflight_reqs, 0);
+ atomic_set(&pcache->state, PCACHE_STATE_RUNNING);
+ init_waitqueue_head(&pcache->inflight_wq);
+
+ return 0;
+destroy_args:
+ pcache_destroy_args(pcache);
+destroy_wq:
+ destroy_workqueue(pcache->task_wq);
+free_pcache:
+ kfree(pcache);
+
+ return ret;
+}
+
+static void defer_req_stop(struct dm_pcache *pcache)
+{
+ struct pcache_request *pcache_req;
+ LIST_HEAD(tmp_list);
+
+ flush_work(&pcache->defered_req_work);
+
+ spin_lock(&pcache->defered_req_list_lock);
+ list_splice_init(&pcache->defered_req_list, &tmp_list);
+ spin_unlock(&pcache->defered_req_list_lock);
+
+ while (!list_empty(&tmp_list)) {
+ pcache_req = list_first_entry(&tmp_list,
+ struct pcache_request, list_node);
+ list_del_init(&pcache_req->list_node);
+ pcache_req_put(pcache_req, -EIO);
+ }
+}
+
+static void dm_pcache_dtr(struct dm_target *ti)
+{
+ struct dm_pcache *pcache;
+
+ pcache = ti->private;
+ atomic_set(&pcache->state, PCACHE_STATE_STOPPING);
+ defer_req_stop(pcache);
+
+ wait_event(pcache->inflight_wq,
+ atomic_read(&pcache->inflight_reqs) == 0);
+
+ pcache_cache_stop(pcache);
+ backing_dev_stop(pcache);
+ cache_dev_stop(pcache);
+
+ pcache_destroy_args(pcache);
+ drain_workqueue(pcache->task_wq);
+ destroy_workqueue(pcache->task_wq);
+
+ kfree(pcache);
+}
+
+static int dm_pcache_map_bio(struct dm_target *ti, struct bio *bio)
+{
+ struct pcache_request *pcache_req = dm_per_bio_data(bio, sizeof(struct pcache_request));
+ struct dm_pcache *pcache = ti->private;
+ int ret;
+
+ pcache_req->pcache = pcache;
+ kref_init(&pcache_req->ref);
+ pcache_req->ret = 0;
+ pcache_req->bio = bio;
+ pcache_req->off = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ pcache_req->data_len = bio->bi_iter.bi_size;
+ INIT_LIST_HEAD(&pcache_req->list_node);
+ atomic_inc(&pcache->inflight_reqs);
+
+ ret = pcache_cache_handle_req(&pcache->cache, pcache_req);
+ if (ret == -EBUSY)
+ defer_req(pcache_req);
+ else
+ pcache_req_put(pcache_req, ret);
+
+ return DM_MAPIO_SUBMITTED;
+}
+
+static void dm_pcache_status(struct dm_target *ti, status_type_t type,
+ unsigned int status_flags, char *result,
+ unsigned int maxlen)
+{
+ struct dm_pcache *pcache = ti->private;
+ struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
+ struct pcache_backing_dev *backing_dev = &pcache->backing_dev;
+ struct pcache_cache *cache = &pcache->cache;
+ unsigned int sz = 0;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ DMEMIT("%x %u %u %u %u %x %u:%u %u:%u %u:%u",
+ cache_dev->sb_flags,
+ cache_dev->seg_num,
+ cache->n_segs,
+ bitmap_weight(cache->seg_map, cache->n_segs),
+ pcache_cache_get_gc_percent(cache),
+ cache->cache_info.flags,
+ cache->key_head.cache_seg->cache_seg_id,
+ cache->key_head.seg_off,
+ cache->dirty_tail.cache_seg->cache_seg_id,
+ cache->dirty_tail.seg_off,
+ cache->key_tail.cache_seg->cache_seg_id,
+ cache->key_tail.seg_off);
+ break;
+ case STATUSTYPE_TABLE:
+ DMEMIT("%s %s 4 cache_mode writeback crc %s",
+ cache_dev->dm_dev->name,
+ backing_dev->dm_dev->name,
+ cache_data_crc_on(cache) ? "true" : "false");
+ break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
+ }
+}
+
+static int dm_pcache_message(struct dm_target *ti, unsigned int argc,
+ char **argv, char *result, unsigned int maxlen)
+{
+ struct dm_pcache *pcache = ti->private;
+ unsigned long val;
+
+ if (argc != 2)
+ goto err;
+
+ if (!strcasecmp(argv[0], "gc_percent")) {
+ if (kstrtoul(argv[1], 10, &val))
+ goto err;
+
+ return pcache_cache_set_gc_percent(&pcache->cache, val);
+ }
+err:
+ return -EINVAL;
+}
+
+static struct target_type dm_pcache_target = {
+ .name = "pcache",
+ .version = {0, 1, 0},
+ .module = THIS_MODULE,
+ .features = DM_TARGET_SINGLETON,
+ .ctr = dm_pcache_ctr,
+ .dtr = dm_pcache_dtr,
+ .map = dm_pcache_map_bio,
+ .status = dm_pcache_status,
+ .message = dm_pcache_message,
+};
+
+static int __init dm_pcache_init(void)
+{
+ int ret;
+
+ ret = pcache_backing_init();
+ if (ret)
+ goto err;
+
+ ret = pcache_cache_init();
+ if (ret)
+ goto backing_exit;
+
+ ret = dm_register_target(&dm_pcache_target);
+ if (ret)
+ goto cache_exit;
+ return 0;
+
+cache_exit:
+ pcache_cache_exit();
+backing_exit:
+ pcache_backing_exit();
+err:
+ return ret;
+}
+module_init(dm_pcache_init);
+
+static void __exit dm_pcache_exit(void)
+{
+ dm_unregister_target(&dm_pcache_target);
+ pcache_cache_exit();
+ pcache_backing_exit();
+}
+module_exit(dm_pcache_exit);
+
+MODULE_DESCRIPTION("dm-pcache Persistent Cache for block device");
+MODULE_AUTHOR("Dongsheng Yang <dongsheng.yang@linux.dev>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-pcache/dm_pcache.h b/drivers/md/dm-pcache/dm_pcache.h
new file mode 100644
index 000000000000..b4e06be0c0b9
--- /dev/null
+++ b/drivers/md/dm-pcache/dm_pcache.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _DM_PCACHE_H
+#define _DM_PCACHE_H
+#include <linux/device-mapper.h>
+
+#include "../dm-core.h"
+
+#define CACHE_DEV_TO_PCACHE(cache_dev) (container_of(cache_dev, struct dm_pcache, cache_dev))
+#define BACKING_DEV_TO_PCACHE(backing_dev) (container_of(backing_dev, struct dm_pcache, backing_dev))
+#define CACHE_TO_PCACHE(cache) (container_of(cache, struct dm_pcache, cache))
+
+#define PCACHE_STATE_RUNNING 1
+#define PCACHE_STATE_STOPPING 2
+
+struct pcache_cache_dev;
+struct pcache_backing_dev;
+struct pcache_cache;
+struct pcache_cache_options;
+struct dm_pcache {
+ struct dm_target *ti;
+ struct pcache_cache_dev cache_dev;
+ struct pcache_backing_dev backing_dev;
+ struct pcache_cache cache;
+ struct pcache_cache_options opts;
+
+ spinlock_t defered_req_list_lock;
+ struct list_head defered_req_list;
+ struct workqueue_struct *task_wq;
+
+ struct work_struct defered_req_work;
+
+ atomic_t state;
+ atomic_t inflight_reqs;
+ wait_queue_head_t inflight_wq;
+};
+
+static inline bool pcache_is_stopping(struct dm_pcache *pcache)
+{
+ return (atomic_read(&pcache->state) == PCACHE_STATE_STOPPING);
+}
+
+#define pcache_dev_err(pcache, fmt, ...) \
+ pcache_err("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__)
+#define pcache_dev_info(pcache, fmt, ...) \
+ pcache_info("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__)
+#define pcache_dev_debug(pcache, fmt, ...) \
+ pcache_debug("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__)
+
+struct pcache_request {
+ struct dm_pcache *pcache;
+ struct bio *bio;
+
+ u64 off;
+ u32 data_len;
+
+ struct kref ref;
+ int ret;
+
+ struct list_head list_node;
+};
+
+void pcache_req_get(struct pcache_request *pcache_req);
+void pcache_req_put(struct pcache_request *pcache_req, int ret);
+
+void pcache_defer_reqs_kick(struct dm_pcache *pcache);
+
+#endif /* _DM_PCACHE_H */
diff --git a/drivers/md/dm-pcache/pcache_internal.h b/drivers/md/dm-pcache/pcache_internal.h
new file mode 100644
index 000000000000..d427e534727c
--- /dev/null
+++ b/drivers/md/dm-pcache/pcache_internal.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PCACHE_INTERNAL_H
+#define _PCACHE_INTERNAL_H
+
+#include <linux/delay.h>
+#include <linux/crc32c.h>
+
+#define pcache_err(fmt, ...) \
+ pr_err("dm-pcache: %s:%u " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define pcache_info(fmt, ...) \
+ pr_info("dm-pcache: %s:%u " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define pcache_debug(fmt, ...) \
+ pr_debug("dm-pcache: %s:%u " fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define PCACHE_KB (1024ULL)
+#define PCACHE_MB (1024 * PCACHE_KB)
+
+/* Maximum number of metadata indices */
+#define PCACHE_META_INDEX_MAX 2
+
+#define PCACHE_CRC_SEED 0x3B15A
+/*
+ * struct pcache_meta_header - PCACHE metadata header structure
+ * @crc: CRC checksum for validating metadata integrity.
+ * @seq: Sequence number to track metadata updates.
+ * @version: Metadata version.
+ * @res: Reserved space for future use.
+ */
+struct pcache_meta_header {
+ __u32 crc;
+ __u8 seq;
+ __u8 version;
+ __u16 res;
+};
+
+/*
+ * pcache_meta_crc - Calculate CRC for the given metadata header.
+ * @header: Pointer to the metadata header.
+ * @meta_size: Size of the metadata structure.
+ *
+ * Returns the CRC checksum calculated by excluding the CRC field itself.
+ */
+static inline u32 pcache_meta_crc(struct pcache_meta_header *header, u32 meta_size)
+{
+ return crc32c(PCACHE_CRC_SEED, (void *)header + 4, meta_size - 4);
+}
+
+/*
+ * pcache_meta_seq_after - Check if a sequence number is more recent, accounting for overflow.
+ * @seq1: First sequence number.
+ * @seq2: Second sequence number.
+ *
+ * Determines if @seq1 is more recent than @seq2 by calculating the signed
+ * difference between them. This approach allows handling sequence number
+ * overflow correctly because the difference wraps naturally, and any value
+ * greater than zero indicates that @seq1 is "after" @seq2. This method
+ * assumes 8-bit unsigned sequence numbers, where the difference wraps
+ * around if seq1 overflows past seq2.
+ *
+ * Returns:
+ * - true if @seq1 is more recent than @seq2, indicating it comes "after"
+ * - false otherwise.
+ */
+static inline bool pcache_meta_seq_after(u8 seq1, u8 seq2)
+{
+ return (s8)(seq1 - seq2) > 0;
+}
+
+/*
+ * pcache_meta_find_latest - Find the latest valid metadata.
+ * @header: Pointer to the metadata header.
+ * @meta_size: Size of each metadata block.
+ *
+ * Finds the latest valid metadata by checking sequence numbers. If a
+ * valid entry with the highest sequence number is found, its pointer
+ * is returned. Returns NULL if no valid metadata is found.
+ */
+static inline void __must_check *pcache_meta_find_latest(struct pcache_meta_header *header,
+ u32 meta_size, u32 meta_max_size,
+ void *meta_ret)
+{
+ struct pcache_meta_header *meta, *latest = NULL;
+ u32 i, seq_latest = 0;
+ void *meta_addr;
+
+ meta = meta_ret;
+
+ for (i = 0; i < PCACHE_META_INDEX_MAX; i++) {
+ meta_addr = (void *)header + (i * meta_max_size);
+ if (copy_mc_to_kernel(meta, meta_addr, meta_size)) {
+ pcache_err("hardware memory error when copy meta");
+ return ERR_PTR(-EIO);
+ }
+
+ /* Skip if CRC check fails, which means corrupted */
+ if (meta->crc != pcache_meta_crc(meta, meta_size))
+ continue;
+
+ /* Update latest if a more recent sequence is found */
+ if (!latest || pcache_meta_seq_after(meta->seq, seq_latest)) {
+ seq_latest = meta->seq;
+ latest = (void *)header + (i * meta_max_size);
+ }
+ }
+
+ if (!latest)
+ return NULL;
+
+ if (copy_mc_to_kernel(meta_ret, latest, meta_size)) {
+ pcache_err("hardware memory error");
+ return ERR_PTR(-EIO);
+ }
+
+ return latest;
+}
+
+#endif /* _PCACHE_INTERNAL_H */
diff --git a/drivers/md/dm-pcache/segment.c b/drivers/md/dm-pcache/segment.c
new file mode 100644
index 000000000000..7e9818701445
--- /dev/null
+++ b/drivers/md/dm-pcache/segment.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/dax.h>
+
+#include "pcache_internal.h"
+#include "cache_dev.h"
+#include "segment.h"
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+ struct iov_iter iter;
+ size_t copied;
+ void *src;
+
+ iov_iter_bvec(&iter, ITER_DEST, &bio->bi_io_vec[bio->bi_iter.bi_idx],
+ bio_segments(bio), bio->bi_iter.bi_size);
+ iter.iov_offset = bio->bi_iter.bi_bvec_done;
+ if (bio_off)
+ iov_iter_advance(&iter, bio_off);
+
+ src = segment->data + data_off;
+ copied = _copy_mc_to_iter(src, data_len, &iter);
+ if (copied != data_len)
+ return -EIO;
+
+ return 0;
+}
+
+int segment_copy_from_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off)
+{
+ struct iov_iter iter;
+ size_t copied;
+ void *dst;
+
+ iov_iter_bvec(&iter, ITER_SOURCE, &bio->bi_io_vec[bio->bi_iter.bi_idx],
+ bio_segments(bio), bio->bi_iter.bi_size);
+ iter.iov_offset = bio->bi_iter.bi_bvec_done;
+ if (bio_off)
+ iov_iter_advance(&iter, bio_off);
+
+ dst = segment->data + data_off;
+ copied = _copy_from_iter_flushcache(dst, data_len, &iter);
+ if (copied != data_len)
+ return -EIO;
+ pmem_wmb();
+
+ return 0;
+}
+
+void pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+ struct pcache_segment_init_options *options)
+{
+ segment->seg_info = options->seg_info;
+ segment_info_set_type(segment->seg_info, options->type);
+
+ segment->cache_dev = cache_dev;
+ segment->seg_id = options->seg_id;
+ segment->data_size = PCACHE_SEG_SIZE - options->data_off;
+ segment->data = CACHE_DEV_SEGMENT(cache_dev, options->seg_id) + options->data_off;
+}
diff --git a/drivers/md/dm-pcache/segment.h b/drivers/md/dm-pcache/segment.h
new file mode 100644
index 000000000000..deca1ddcb02b
--- /dev/null
+++ b/drivers/md/dm-pcache/segment.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PCACHE_SEGMENT_H
+#define _PCACHE_SEGMENT_H
+
+#include <linux/bio.h>
+#include <linux/bitfield.h>
+
+#include "pcache_internal.h"
+
+struct pcache_segment_info {
+ struct pcache_meta_header header;
+ __u32 flags;
+ __u32 next_seg;
+};
+
+#define PCACHE_SEG_INFO_FLAGS_HAS_NEXT BIT(0)
+
+#define PCACHE_SEG_INFO_FLAGS_TYPE_MASK GENMASK(4, 1)
+#define PCACHE_SEGMENT_TYPE_CACHE_DATA 1
+
+static inline bool segment_info_has_next(struct pcache_segment_info *seg_info)
+{
+ return (seg_info->flags & PCACHE_SEG_INFO_FLAGS_HAS_NEXT);
+}
+
+static inline void segment_info_set_type(struct pcache_segment_info *seg_info, u8 type)
+{
+ seg_info->flags &= ~PCACHE_SEG_INFO_FLAGS_TYPE_MASK;
+ seg_info->flags |= FIELD_PREP(PCACHE_SEG_INFO_FLAGS_TYPE_MASK, type);
+}
+
+static inline u8 segment_info_get_type(struct pcache_segment_info *seg_info)
+{
+ return FIELD_GET(PCACHE_SEG_INFO_FLAGS_TYPE_MASK, seg_info->flags);
+}
+
+struct pcache_segment_pos {
+ struct pcache_segment *segment; /* Segment associated with the position */
+ u32 off; /* Offset within the segment */
+};
+
+struct pcache_segment_init_options {
+ u8 type;
+ u32 seg_id;
+ u32 data_off;
+
+ struct pcache_segment_info *seg_info;
+};
+
+struct pcache_segment {
+ struct pcache_cache_dev *cache_dev;
+
+ void *data;
+ u32 data_size;
+ u32 seg_id;
+
+ struct pcache_segment_info *seg_info;
+};
+
+int segment_copy_to_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+int segment_copy_from_bio(struct pcache_segment *segment,
+ u32 data_off, u32 data_len, struct bio *bio, u32 bio_off);
+
+static inline void segment_pos_advance(struct pcache_segment_pos *seg_pos, u32 len)
+{
+ BUG_ON(seg_pos->off + len > seg_pos->segment->data_size);
+
+ seg_pos->off += len;
+}
+
+void pcache_segment_init(struct pcache_cache_dev *cache_dev, struct pcache_segment *segment,
+ struct pcache_segment_init_options *options);
+#endif /* _PCACHE_SEGMENT_H */
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 0a1788fed68c..c6f7129e43d3 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3247,7 +3247,7 @@ size_check:
rs_reset_inconclusive_reshape(rs);
/* Start raid set read-only and assumed clean to change in raid_resume() */
- rs->md.ro = 1;
+ rs->md.ro = MD_RDONLY;
rs->md.in_sync = 1;
/* Has to be held on running the array */
@@ -3385,7 +3385,7 @@ static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long r
/* The MD sync thread can be done with io or be interrupted but still be running */
if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
(test_bit(MD_RECOVERY_RUNNING, &recovery) ||
- (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
+ (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
return st_reshape;
@@ -3775,11 +3775,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
} else
return -EINVAL;
}
- if (mddev->ro == 2) {
+ if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
if (!mddev->suspended)
md_wakeup_thread(mddev->sync_thread);
}
@@ -3860,6 +3860,7 @@ static void raid_postsuspend(struct dm_target *ti)
*/
md_stop_writes(&rs->md);
mddev_suspend(&rs->md, false);
+ rs->md.ro = MD_RDONLY;
}
}
@@ -3972,7 +3973,7 @@ static void rs_update_sbs(struct raid_set *rs)
int ro = mddev->ro;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
md_update_sb(mddev, 1);
mddev->ro = ro;
}
@@ -4131,7 +4132,7 @@ static void raid_resume(struct dm_target *ti)
WARN_ON_ONCE(rcu_dereference_protected(mddev->sync_thread,
lockdep_is_held(&mddev->reconfig_mutex)));
clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
mddev->in_sync = 0;
md_unfrozen_sync_thread(mddev);
mddev_unlock_and_resume(mddev);
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index a4550975c27d..e9b47b659976 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -206,7 +206,7 @@ struct dm_region_hash *dm_region_hash_create(
rh->shift = RH_HASH_SHIFT;
rh->prime = RH_HASH_MULT;
- rh->buckets = vmalloc(array_size(nr_buckets, sizeof(*rh->buckets)));
+ rh->buckets = vmalloc_array(nr_buckets, sizeof(*rh->buckets));
if (!rh->buckets) {
DMERR("unable to allocate region hash bucket memory");
kfree(rh);
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index bb1a70b5a215..50a52ca50b34 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -114,8 +114,8 @@ static int alloc_region_table(struct dm_target *ti, unsigned int nr_paths)
return -EINVAL;
}
- sctx->region_table = vmalloc(array_size(nr_slots,
- sizeof(region_table_slot_t)));
+ sctx->region_table = vmalloc_array(nr_slots,
+ sizeof(region_table_slot_t));
if (!sctx->region_table) {
ti->error = "Cannot allocate region table";
return -ENOMEM;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 2af5a9514c05..8fede41adec0 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -263,7 +263,8 @@ static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
static struct target_type error_target = {
.name = "error",
.version = {1, 7, 0},
- .features = DM_TARGET_WILDCARD | DM_TARGET_ZONED_HM,
+ .features = DM_TARGET_WILDCARD | DM_TARGET_ZONED_HM |
+ DM_TARGET_PASSES_INTEGRITY,
.ctr = io_err_ctr,
.dtr = io_err_dtr,
.map = io_err_map,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 007bb93e5fca..c84149ba4e38 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3031,8 +3031,8 @@ static struct pool *pool_create(struct mapped_device *pool_md,
}
pool->cell_sort_array =
- vmalloc(array_size(CELL_SORT_ARRAY_SIZE,
- sizeof(*pool->cell_sort_array)));
+ vmalloc_array(CELL_SORT_ARRAY_SIZE,
+ sizeof(*pool->cell_sort_array));
if (!pool->cell_sort_array) {
*error = "Error allocating cell sort array";
err_p = ERR_PTR(-ENOMEM);
diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c
index 810002747091..262e11581f2d 100644
--- a/drivers/md/dm-vdo/data-vio.c
+++ b/drivers/md/dm-vdo/data-vio.c
@@ -17,6 +17,7 @@
#include <linux/minmax.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
+#include <linux/string.h>
#include <linux/wait.h>
#include "logger.h"
@@ -509,18 +510,6 @@ static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lb
vdo_enqueue_completion(completion, VDO_DEFAULT_Q_MAP_BIO_PRIORITY);
}
-static bool is_zero_block(char *block)
-{
- int i;
-
- for (i = 0; i < VDO_BLOCK_SIZE; i += sizeof(u64)) {
- if (*((u64 *) &block[i]))
- return false;
- }
-
- return true;
-}
-
static void copy_from_bio(struct bio *bio, char *data_ptr)
{
struct bio_vec biovec;
@@ -572,7 +561,7 @@ static void launch_bio(struct vdo *vdo, struct data_vio *data_vio, struct bio *b
* we acknowledge the bio.
*/
copy_from_bio(bio, data_vio->vio.data);
- data_vio->is_zero = is_zero_block(data_vio->vio.data);
+ data_vio->is_zero = mem_is_zero(data_vio->vio.data, VDO_BLOCK_SIZE);
data_vio->write = true;
}
@@ -1459,7 +1448,7 @@ static void modify_for_partial_write(struct vdo_completion *completion)
copy_from_bio(bio, data + data_vio->offset);
}
- data_vio->is_zero = is_zero_block(data);
+ data_vio->is_zero = mem_is_zero(data, VDO_BLOCK_SIZE);
data_vio->read = false;
launch_data_vio_logical_callback(data_vio,
continue_data_vio_with_block_map_slot);
diff --git a/drivers/md/dm-vdo/indexer/volume-index.c b/drivers/md/dm-vdo/indexer/volume-index.c
index 12f954a0c532..afb062e1f1fb 100644
--- a/drivers/md/dm-vdo/indexer/volume-index.c
+++ b/drivers/md/dm-vdo/indexer/volume-index.c
@@ -836,7 +836,7 @@ static int start_restoring_volume_sub_index(struct volume_sub_index *sub_index,
"%zu bytes decoded of %zu expected", offset,
sizeof(buffer));
if (result != VDO_SUCCESS)
- result = UDS_CORRUPT_DATA;
+ return UDS_CORRUPT_DATA;
if (memcmp(header.magic, MAGIC_START_5, MAGIC_SIZE) != 0) {
return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
@@ -928,7 +928,7 @@ static int start_restoring_volume_index(struct volume_index *volume_index,
"%zu bytes decoded of %zu expected", offset,
sizeof(buffer));
if (result != VDO_SUCCESS)
- result = UDS_CORRUPT_DATA;
+ return UDS_CORRUPT_DATA;
if (memcmp(header.magic, MAGIC_START_6, MAGIC_SIZE) != 0)
return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7bd6fa05b00a..f5e5e59b232b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -490,18 +490,13 @@ u64 dm_start_time_ns_from_clone(struct bio *bio)
}
EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
-static inline bool bio_is_flush_with_data(struct bio *bio)
-{
- return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
-}
-
static inline unsigned int dm_io_sectors(struct dm_io *io, struct bio *bio)
{
/*
* If REQ_PREFLUSH set, don't account payload, it will be
* submitted (and accounted) after this flush completes.
*/
- if (bio_is_flush_with_data(bio))
+ if (io->requeue_flush_with_data)
return 0;
if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT)))
return io->sectors;
@@ -590,6 +585,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio, gfp_t g
io = container_of(tio, struct dm_io, tio);
io->magic = DM_IO_MAGIC;
io->status = BLK_STS_OK;
+ io->requeue_flush_with_data = false;
/* one ref is for submission, the other is for completion */
atomic_set(&io->io_count, 2);
@@ -948,6 +944,7 @@ static void __dm_io_complete(struct dm_io *io, bool first_stage)
struct mapped_device *md = io->md;
blk_status_t io_error;
bool requeued;
+ bool requeue_flush_with_data;
requeued = dm_handle_requeue(io, first_stage);
if (requeued && first_stage)
@@ -964,6 +961,7 @@ static void __dm_io_complete(struct dm_io *io, bool first_stage)
__dm_start_io_acct(io);
dm_end_io_acct(io);
}
+ requeue_flush_with_data = io->requeue_flush_with_data;
free_io(io);
smp_wmb();
this_cpu_dec(*md->pending_io);
@@ -976,7 +974,7 @@ static void __dm_io_complete(struct dm_io *io, bool first_stage)
if (requeued)
return;
- if (bio_is_flush_with_data(bio)) {
+ if (unlikely(requeue_flush_with_data)) {
/*
* Preflush done for flush with data, reissue
* without REQ_PREFLUSH.
@@ -1996,12 +1994,30 @@ static void dm_split_and_process_bio(struct mapped_device *md,
}
init_clone_info(&ci, io, map, bio, is_abnormal);
- if (bio->bi_opf & REQ_PREFLUSH) {
+ if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0)) {
+ /*
+ * The "flush_bypasses_map" is set on targets where it is safe
+ * to skip the map function and submit bios directly to the
+ * underlying block devices - currently, it is set for dm-linear
+ * and dm-stripe.
+ *
+ * If we have just one underlying device (i.e. there is one
+ * linear target or multiple linear targets pointing to the same
+ * device), we can send the flush with data directly to it.
+ */
+ if (map->flush_bypasses_map) {
+ struct list_head *devices = dm_table_get_devices(map);
+ if (devices->next == devices->prev)
+ goto send_preflush_with_data;
+ }
+ if (bio->bi_iter.bi_size)
+ io->requeue_flush_with_data = true;
__send_empty_flush(&ci);
/* dm_io_complete submits any data associated with flush */
goto out;
}
+send_preflush_with_data:
if (static_branch_unlikely(&zoned_enabled) &&
(bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
error = __send_zone_reset_all(&ci);
@@ -2908,7 +2924,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
{
bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
- int r;
+ int r = 0;
lockdep_assert_held(&md->suspend_lock);
@@ -2960,8 +2976,10 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
* Stop md->queue before flushing md->wq in case request-based
* dm defers requests to md->wq from md->queue.
*/
- if (dm_request_based(md))
+ if (map && dm_request_based(md)) {
dm_stop_queue(md->queue);
+ set_bit(DMF_QUEUE_STOPPED, &md->flags);
+ }
flush_workqueue(md->wq);
@@ -2970,7 +2988,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
* We call dm_wait_for_completion to wait for all existing requests
* to finish.
*/
- r = dm_wait_for_completion(md, task_state);
+ if (map)
+ r = dm_wait_for_completion(md, task_state);
if (!r)
set_bit(dmf_suspended_flag, &md->flags);
@@ -2983,7 +3002,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
if (r < 0) {
dm_queue_flush(md);
- if (dm_request_based(md))
+ if (test_and_clear_bit(DMF_QUEUE_STOPPED, &md->flags))
dm_start_queue(md->queue);
unlock_fs(md);
@@ -3067,7 +3086,7 @@ static int __dm_resume(struct mapped_device *md, struct dm_table *map)
* so that mapping of targets can work correctly.
* Request-based dm is queueing the deferred I/Os in its request_queue.
*/
- if (dm_request_based(md))
+ if (test_and_clear_bit(DMF_QUEUE_STOPPED, &md->flags))
dm_start_queue(md->queue);
unlock_fs(md);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 1d0e0e7362bd..3fc33b1b4dfb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9705,6 +9705,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
flags_ext3 = le32_to_cpu(resp->flags_ext3);
if (flags_ext3 & FUNC_QCAPS_RESP_FLAGS_EXT3_ROCE_VF_DYN_ALLOC_SUPPORT)
bp->fw_cap |= BNXT_FW_CAP_ROCE_VF_DYN_ALLOC_SUPPORT;
+ if (flags_ext3 & FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_MIRROR_ON_ROCE;
bp->tx_push_thresh = 0;
if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) &&
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 06a4c2afdf8a..741b2d854789 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2514,6 +2514,7 @@ struct bnxt {
#define BNXT_FW_CAP_VNIC_RE_FLUSH BIT_ULL(40)
#define BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS BIT_ULL(41)
#define BNXT_FW_CAP_NPAR_1_2 BIT_ULL(42)
+ #define BNXT_FW_CAP_MIRROR_ON_ROCE BIT_ULL(43)
u32 fw_dbg_cap;
@@ -2537,6 +2538,8 @@ struct bnxt {
((bp)->fw_cap & BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED)
#define BNXT_SW_RES_LMT(bp) \
((bp)->fw_cap & BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS)
+#define BNXT_MIRROR_ON_ROCE_CAP(bp) \
+ ((bp)->fw_cap & BNXT_FW_CAP_MIRROR_ON_ROCE)
u32 hwrm_spec_code;
u16 hwrm_cmd_seq;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 61cf201bb0dc..f8c2c72b382d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -100,6 +100,12 @@ void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp)
if (BNXT_PF(bp) && !bp->pf.port_id &&
bp->port_count > 1)
bp->edev->ulp_num_ctxs++;
+
+ /* Reserve one additional stat_ctx when the device is capable
+ * of supporting port mirroring on RDMA device.
+ */
+ if (BNXT_MIRROR_ON_ROCE_CAP(bp))
+ bp->edev->ulp_num_ctxs++;
}
}
diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
index 01fe76786f77..c99758adf3ad 100644
--- a/drivers/net/ethernet/pensando/Kconfig
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -24,6 +24,7 @@ config IONIC
select NET_DEVLINK
select DIMLIB
select PAGE_POOL
+ select AUXILIARY_BUS
help
This enables the support for the Pensando family of Ethernet
adapters. More specific information on this driver can be
diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile
index 4e7642a2d25f..a598972fef41 100644
--- a/drivers/net/ethernet/pensando/ionic/Makefile
+++ b/drivers/net/ethernet/pensando/ionic/Makefile
@@ -5,5 +5,5 @@ obj-$(CONFIG_IONIC) := ionic.o
ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \
ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \
- ionic_txrx.o ionic_stats.o ionic_fw.o
+ ionic_txrx.o ionic_stats.o ionic_fw.o ionic_aux.o
ionic-$(CONFIG_PTP_1588_CLOCK) += ionic_phc.o
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 04f00ea94230..85198e6a806e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -65,16 +65,9 @@ struct ionic {
int watchdog_period;
};
-struct ionic_admin_ctx {
- struct completion work;
- union ionic_adminq_cmd cmd;
- union ionic_adminq_comp comp;
-};
-
int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
const int err, const bool do_msg);
-int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
u8 status, int err);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_api.h b/drivers/net/ethernet/pensando/ionic/ionic_api.h
new file mode 100644
index 000000000000..bd88666836b8
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_api.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_API_H_
+#define _IONIC_API_H_
+
+#include <linux/auxiliary_bus.h>
+#include "ionic_if.h"
+#include "ionic_regs.h"
+
+/**
+ * struct ionic_aux_dev - Auxiliary device information
+ * @lif: Logical interface
+ * @idx: Index identifier
+ * @adev: Auxiliary device
+ */
+struct ionic_aux_dev {
+ struct ionic_lif *lif;
+ int idx;
+ struct auxiliary_device adev;
+};
+
+/**
+ * struct ionic_admin_ctx - Admin command context
+ * @work: Work completion wait queue element
+ * @cmd: Admin command (64B) to be copied to the queue
+ * @comp: Admin completion (16B) copied from the queue
+ */
+struct ionic_admin_ctx {
+ struct completion work;
+ union ionic_adminq_cmd cmd;
+ union ionic_adminq_comp comp;
+};
+
+#define IONIC_INTR_INDEX_NOT_ASSIGNED -1
+#define IONIC_INTR_NAME_MAX_SZ 32
+
+/**
+ * struct ionic_intr_info - Interrupt information
+ * @name: Name identifier
+ * @rearm_count: Interrupt rearm count
+ * @index: Interrupt index position
+ * @vector: Interrupt number
+ * @dim_coal_hw: Interrupt coalesce value in hardware units
+ * @affinity_mask: CPU affinity mask
+ * @aff_notify: context for notification of IRQ affinity changes
+ */
+struct ionic_intr_info {
+ char name[IONIC_INTR_NAME_MAX_SZ];
+ u64 rearm_count;
+ unsigned int index;
+ unsigned int vector;
+ u32 dim_coal_hw;
+ cpumask_var_t *affinity_mask;
+ struct irq_affinity_notify aff_notify;
+};
+
+/**
+ * ionic_adminq_post_wait - Post an admin command and wait for response
+ * @lif: Logical interface
+ * @ctx: API admin command context
+ *
+ * Post the command to an admin queue in the ethernet driver. If this command
+ * succeeds, then the command has been posted, but that does not indicate a
+ * completion. If this command returns success, then the completion callback
+ * will eventually be called.
+ *
+ * Return: zero or negative error status
+ */
+int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
+
+/**
+ * ionic_error_to_errno - Transform ionic_if errors to os errno
+ * @code: Ionic error number
+ *
+ * Return: Negative OS error number or zero
+ */
+int ionic_error_to_errno(enum ionic_status_code code);
+
+/**
+ * ionic_request_rdma_reset - request reset or disable the device or lif
+ * @lif: Logical interface
+ *
+ * The reset is triggered asynchronously. It will wait until reset request
+ * completes or times out.
+ */
+void ionic_request_rdma_reset(struct ionic_lif *lif);
+
+/**
+ * ionic_intr_alloc - Reserve a device interrupt
+ * @lif: Logical interface
+ * @intr: Reserved ionic interrupt structure
+ *
+ * Reserve an interrupt index and get irq number for that index.
+ *
+ * Return: zero or negative error status
+ */
+int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr);
+
+/**
+ * ionic_intr_free - Release a device interrupt index
+ * @lif: Logical interface
+ * @intr: Interrupt index
+ *
+ * Mark the interrupt index unused so that it can be reserved again.
+ */
+void ionic_intr_free(struct ionic_lif *lif, int intr);
+
+/**
+ * ionic_get_cmb - Reserve cmb pages
+ * @lif: Logical interface
+ * @pgid: First page index
+ * @pgaddr: First page bus addr (contiguous)
+ * @order: Log base two number of pages (PAGE_SIZE)
+ * @stride_log2: Size of stride to determine CMB pool
+ * @expdb: Will be set to true if this CMB region has expdb enabled
+ *
+ * Return: zero or negative error status
+ */
+int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr,
+ int order, u8 stride_log2, bool *expdb);
+
+/**
+ * ionic_put_cmb - Release cmb pages
+ * @lif: Logical interface
+ * @pgid: First page index
+ * @order: Log base two number of pages (PAGE_SIZE)
+ */
+void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order);
+
+#endif /* _IONIC_API_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_aux.c b/drivers/net/ethernet/pensando/ionic/ionic_aux.c
new file mode 100644
index 000000000000..a2be338eb3e5
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_aux.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/kernel.h>
+#include "ionic.h"
+#include "ionic_lif.h"
+#include "ionic_aux.h"
+
+static DEFINE_IDA(aux_ida);
+
+static void ionic_auxbus_release(struct device *dev)
+{
+ struct ionic_aux_dev *ionic_adev;
+
+ ionic_adev = container_of(dev, struct ionic_aux_dev, adev.dev);
+ ida_free(&aux_ida, ionic_adev->adev.id);
+ kfree(ionic_adev);
+}
+
+int ionic_auxbus_register(struct ionic_lif *lif)
+{
+ struct ionic_aux_dev *ionic_adev;
+ struct auxiliary_device *aux_dev;
+ int err, id;
+
+ if (!(le64_to_cpu(lif->ionic->ident.lif.capabilities) & IONIC_LIF_CAP_RDMA))
+ return 0;
+
+ ionic_adev = kzalloc(sizeof(*ionic_adev), GFP_KERNEL);
+ if (!ionic_adev)
+ return -ENOMEM;
+
+ aux_dev = &ionic_adev->adev;
+
+ id = ida_alloc(&aux_ida, GFP_KERNEL);
+ if (id < 0) {
+ dev_err(lif->ionic->dev, "Failed to allocate aux id: %d\n", id);
+ kfree(ionic_adev);
+ return id;
+ }
+
+ aux_dev->id = id;
+ aux_dev->name = "rdma";
+ aux_dev->dev.parent = &lif->ionic->pdev->dev;
+ aux_dev->dev.release = ionic_auxbus_release;
+ ionic_adev->lif = lif;
+ err = auxiliary_device_init(aux_dev);
+ if (err) {
+ dev_err(lif->ionic->dev, "Failed to initialize %s aux device: %d\n",
+ aux_dev->name, err);
+ ida_free(&aux_ida, id);
+ kfree(ionic_adev);
+ return err;
+ }
+
+ err = auxiliary_device_add(aux_dev);
+ if (err) {
+ dev_err(lif->ionic->dev, "Failed to add %s aux device: %d\n",
+ aux_dev->name, err);
+ auxiliary_device_uninit(aux_dev);
+ return err;
+ }
+
+ lif->ionic_adev = ionic_adev;
+ return 0;
+}
+
+void ionic_auxbus_unregister(struct ionic_lif *lif)
+{
+ mutex_lock(&lif->adev_lock);
+ if (!lif->ionic_adev)
+ goto out;
+
+ auxiliary_device_delete(&lif->ionic_adev->adev);
+ auxiliary_device_uninit(&lif->ionic_adev->adev);
+
+ lif->ionic_adev = NULL;
+out:
+ mutex_unlock(&lif->adev_lock);
+}
+
+void ionic_request_rdma_reset(struct ionic_lif *lif)
+{
+ struct ionic *ionic = lif->ionic;
+ int err;
+
+ union ionic_dev_cmd cmd = {
+ .cmd.opcode = IONIC_CMD_RDMA_RESET_LIF,
+ .cmd.lif_index = cpu_to_le16(lif->index),
+ };
+
+ mutex_lock(&ionic->dev_cmd_lock);
+
+ ionic_dev_cmd_go(&ionic->idev, &cmd);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ if (err)
+ pr_warn("%s request_reset: error %d\n", __func__, err);
+}
+EXPORT_SYMBOL_NS(ionic_request_rdma_reset, "NET_IONIC");
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_aux.h b/drivers/net/ethernet/pensando/ionic/ionic_aux.h
new file mode 100644
index 000000000000..f5528a9f187d
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_aux.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_AUX_H_
+#define _IONIC_AUX_H_
+
+int ionic_auxbus_register(struct ionic_lif *lif);
+void ionic_auxbus_unregister(struct ionic_lif *lif);
+
+#endif /* _IONIC_AUX_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 136bfa3516d0..70d86c5f52fb 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -9,6 +9,7 @@
#include "ionic.h"
#include "ionic_bus.h"
#include "ionic_lif.h"
+#include "ionic_aux.h"
#include "ionic_debugfs.h"
/* Supported devices */
@@ -271,6 +272,8 @@ static int ionic_setup_one(struct ionic *ionic)
}
ionic_debugfs_add_ident(ionic);
+ ionic_map_cmb(ionic);
+
err = ionic_init(ionic);
if (err) {
dev_err(dev, "Cannot init device: %d, aborting\n", err);
@@ -375,6 +378,8 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_deregister_devlink;
}
+ ionic_auxbus_register(ionic->lif);
+
mod_timer(&ionic->watchdog_timer,
round_jiffies(jiffies + ionic->watchdog_period));
ionic_queue_doorbell_check(ionic, IONIC_NAPI_DEADLINE);
@@ -416,6 +421,7 @@ static void ionic_remove(struct pci_dev *pdev)
if (ionic->lif->doorbell_wa)
cancel_delayed_work_sync(&ionic->doorbell_check_dwork);
+ ionic_auxbus_unregister(ionic->lif);
ionic_lif_unregister(ionic->lif);
ionic_devlink_unregister(ionic);
ionic_lif_deinit(ionic->lif);
@@ -445,6 +451,7 @@ static void ionic_reset_prepare(struct pci_dev *pdev)
timer_delete_sync(&ionic->watchdog_timer);
cancel_work_sync(&lif->deferred.work);
+ ionic_auxbus_unregister(ionic->lif);
mutex_lock(&lif->queue_lock);
ionic_stop_queues_reconfig(lif);
ionic_txrx_free(lif);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 093c5358b6e8..ab27e9225c1e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -199,13 +199,201 @@ void ionic_init_devinfo(struct ionic *ionic)
dev_dbg(ionic->dev, "fw_version %s\n", idev->dev_info.fw_version);
}
+static void ionic_map_disc_cmb(struct ionic *ionic)
+{
+ struct ionic_identity *ident = &ionic->ident;
+ u32 length_reg0, length, offset, num_regions;
+ struct ionic_dev_bar *bar = ionic->bars;
+ struct ionic_dev *idev = &ionic->idev;
+ struct device *dev = ionic->dev;
+ int err, sz, i;
+ u64 end;
+
+ mutex_lock(&ionic->dev_cmd_lock);
+
+ ionic_dev_cmd_discover_cmb(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ if (!err) {
+ sz = min(sizeof(ident->cmb_layout),
+ sizeof(idev->dev_cmd_regs->data));
+ memcpy_fromio(&ident->cmb_layout,
+ &idev->dev_cmd_regs->data, sz);
+ }
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ if (err) {
+ dev_warn(dev, "Cannot discover CMB layout, disabling CMB\n");
+ return;
+ }
+
+ bar += 2;
+
+ num_regions = le32_to_cpu(ident->cmb_layout.num_regions);
+ if (!num_regions || num_regions > IONIC_MAX_CMB_REGIONS) {
+ dev_warn(dev, "Invalid number of CMB entries (%d)\n",
+ num_regions);
+ return;
+ }
+
+ dev_dbg(dev, "ionic_cmb_layout_identity num_regions %d flags %x:\n",
+ num_regions, ident->cmb_layout.flags);
+
+ for (i = 0; i < num_regions; i++) {
+ offset = le32_to_cpu(ident->cmb_layout.region[i].offset);
+ length = le32_to_cpu(ident->cmb_layout.region[i].length);
+ end = offset + length;
+
+ dev_dbg(dev, "CMB entry %d: bar_num %u cmb_type %u offset %x length %u\n",
+ i, ident->cmb_layout.region[i].bar_num,
+ ident->cmb_layout.region[i].cmb_type,
+ offset, length);
+
+ if (end > (bar->len >> IONIC_CMB_SHIFT_64K)) {
+ dev_warn(dev, "Out of bounds CMB region %d offset %x length %u\n",
+ i, offset, length);
+ return;
+ }
+ }
+
+ /* if first entry matches PCI config, expdb is not supported */
+ if (ident->cmb_layout.region[0].bar_num == bar->res_index &&
+ le32_to_cpu(ident->cmb_layout.region[0].length) == bar->len &&
+ !ident->cmb_layout.region[0].offset) {
+ dev_warn(dev, "No CMB mapping discovered\n");
+ return;
+ }
+
+ /* process first entry for regular mapping */
+ length_reg0 = le32_to_cpu(ident->cmb_layout.region[0].length);
+ if (!length_reg0) {
+ dev_warn(dev, "region len = 0. No CMB mapping discovered\n");
+ return;
+ }
+
+ /* Verify first entry size matches expected 8MB size (in 64KB pages) */
+ if (length_reg0 != IONIC_BAR2_CMB_ENTRY_SIZE >> IONIC_CMB_SHIFT_64K) {
+ dev_warn(dev, "Unexpected CMB size in entry 0: %u pages\n",
+ length_reg0);
+ return;
+ }
+
+ sz = BITS_TO_LONGS((length_reg0 << IONIC_CMB_SHIFT_64K) /
+ PAGE_SIZE) * sizeof(long);
+ idev->cmb_inuse = kzalloc(sz, GFP_KERNEL);
+ if (!idev->cmb_inuse) {
+ dev_warn(dev, "No memory for CMB, disabling\n");
+ idev->phy_cmb_pages = 0;
+ idev->phy_cmb_expdb64_pages = 0;
+ idev->phy_cmb_expdb128_pages = 0;
+ idev->phy_cmb_expdb256_pages = 0;
+ idev->phy_cmb_expdb512_pages = 0;
+ idev->cmb_npages = 0;
+ return;
+ }
+
+ for (i = 0; i < num_regions; i++) {
+ /* check this region matches first region length as to
+ * ease implementation
+ */
+ if (le32_to_cpu(ident->cmb_layout.region[i].length) !=
+ length_reg0)
+ continue;
+
+ offset = le32_to_cpu(ident->cmb_layout.region[i].offset);
+
+ switch (ident->cmb_layout.region[i].cmb_type) {
+ case IONIC_CMB_TYPE_DEVMEM:
+ idev->phy_cmb_pages = bar->bus_addr + offset;
+ idev->cmb_npages =
+ (length_reg0 << IONIC_CMB_SHIFT_64K) / PAGE_SIZE;
+ dev_dbg(dev, "regular cmb mapping: bar->bus_addr %pa region[%d].length %u\n",
+ &bar->bus_addr, i, length);
+ dev_dbg(dev, "idev->phy_cmb_pages %pad, idev->cmb_npages %u\n",
+ &idev->phy_cmb_pages, idev->cmb_npages);
+ break;
+
+ case IONIC_CMB_TYPE_EXPDB64:
+ idev->phy_cmb_expdb64_pages =
+ bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K);
+ dev_dbg(dev, "idev->phy_cmb_expdb64_pages %pad\n",
+ &idev->phy_cmb_expdb64_pages);
+ break;
+
+ case IONIC_CMB_TYPE_EXPDB128:
+ idev->phy_cmb_expdb128_pages =
+ bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K);
+ dev_dbg(dev, "idev->phy_cmb_expdb128_pages %pad\n",
+ &idev->phy_cmb_expdb128_pages);
+ break;
+
+ case IONIC_CMB_TYPE_EXPDB256:
+ idev->phy_cmb_expdb256_pages =
+ bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K);
+ dev_dbg(dev, "idev->phy_cmb_expdb256_pages %pad\n",
+ &idev->phy_cmb_expdb256_pages);
+ break;
+
+ case IONIC_CMB_TYPE_EXPDB512:
+ idev->phy_cmb_expdb512_pages =
+ bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K);
+ dev_dbg(dev, "idev->phy_cmb_expdb512_pages %pad\n",
+ &idev->phy_cmb_expdb512_pages);
+ break;
+
+ default:
+ dev_warn(dev, "[%d] Invalid cmb_type (%d)\n",
+ i, ident->cmb_layout.region[i].cmb_type);
+ break;
+ }
+ }
+}
+
+static void ionic_map_classic_cmb(struct ionic *ionic)
+{
+ struct ionic_dev_bar *bar = ionic->bars;
+ struct ionic_dev *idev = &ionic->idev;
+ struct device *dev = ionic->dev;
+ int sz;
+
+ bar += 2;
+ /* classic CMB mapping */
+ idev->phy_cmb_pages = bar->bus_addr;
+ idev->cmb_npages = bar->len / PAGE_SIZE;
+ dev_dbg(dev, "classic cmb mapping: bar->bus_addr %pa bar->len %lu\n",
+ &bar->bus_addr, bar->len);
+ dev_dbg(dev, "idev->phy_cmb_pages %pad, idev->cmb_npages %u\n",
+ &idev->phy_cmb_pages, idev->cmb_npages);
+
+ sz = BITS_TO_LONGS(idev->cmb_npages) * sizeof(long);
+ idev->cmb_inuse = kzalloc(sz, GFP_KERNEL);
+ if (!idev->cmb_inuse) {
+ idev->phy_cmb_pages = 0;
+ idev->cmb_npages = 0;
+ }
+}
+
+void ionic_map_cmb(struct ionic *ionic)
+{
+ struct pci_dev *pdev = ionic->pdev;
+ struct device *dev = ionic->dev;
+
+ if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) {
+ dev_dbg(dev, "No CMB, disabling\n");
+ return;
+ }
+
+ if (ionic->ident.dev.capabilities & cpu_to_le64(IONIC_DEV_CAP_DISC_CMB))
+ ionic_map_disc_cmb(ionic);
+ else
+ ionic_map_classic_cmb(ionic);
+}
+
int ionic_dev_setup(struct ionic *ionic)
{
struct ionic_dev_bar *bar = ionic->bars;
unsigned int num_bars = ionic->num_bars;
struct ionic_dev *idev = &ionic->idev;
struct device *dev = ionic->dev;
- int size;
u32 sig;
int err;
@@ -255,16 +443,11 @@ int ionic_dev_setup(struct ionic *ionic)
mutex_init(&idev->cmb_inuse_lock);
if (num_bars < 3 || !ionic->bars[IONIC_PCI_BAR_CMB].len) {
idev->cmb_inuse = NULL;
+ idev->phy_cmb_pages = 0;
+ idev->cmb_npages = 0;
return 0;
}
- idev->phy_cmb_pages = bar->bus_addr;
- idev->cmb_npages = bar->len / PAGE_SIZE;
- size = BITS_TO_LONGS(idev->cmb_npages) * sizeof(long);
- idev->cmb_inuse = kzalloc(size, GFP_KERNEL);
- if (!idev->cmb_inuse)
- dev_warn(dev, "No memory for CMB, disabling\n");
-
return 0;
}
@@ -277,6 +460,11 @@ void ionic_dev_teardown(struct ionic *ionic)
idev->phy_cmb_pages = 0;
idev->cmb_npages = 0;
+ idev->phy_cmb_expdb64_pages = 0;
+ idev->phy_cmb_expdb128_pages = 0;
+ idev->phy_cmb_expdb256_pages = 0;
+ idev->phy_cmb_expdb512_pages = 0;
+
if (ionic->wq) {
destroy_workqueue(ionic->wq);
ionic->wq = NULL;
@@ -698,28 +886,79 @@ void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq,
ionic_dev_cmd_go(idev, &cmd);
}
+void ionic_dev_cmd_discover_cmb(struct ionic_dev *idev)
+{
+ union ionic_dev_cmd cmd = {
+ .discover_cmb.opcode = IONIC_CMD_DISCOVER_CMB,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
int ionic_db_page_num(struct ionic_lif *lif, int pid)
{
return (lif->hw_index * lif->dbid_count) + pid;
}
-int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr, int order)
+int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr,
+ int order, u8 stride_log2, bool *expdb)
{
struct ionic_dev *idev = &lif->ionic->idev;
- int ret;
+ void __iomem *nonexpdb_pgptr;
+ phys_addr_t nonexpdb_pgaddr;
+ int i, idx;
mutex_lock(&idev->cmb_inuse_lock);
- ret = bitmap_find_free_region(idev->cmb_inuse, idev->cmb_npages, order);
+ idx = bitmap_find_free_region(idev->cmb_inuse, idev->cmb_npages, order);
mutex_unlock(&idev->cmb_inuse_lock);
- if (ret < 0)
- return ret;
+ if (idx < 0)
+ return idx;
+
+ *pgid = (u32)idx;
+
+ if (idev->phy_cmb_expdb64_pages &&
+ stride_log2 == IONIC_EXPDB_64B_WQE_LG2) {
+ *pgaddr = idev->phy_cmb_expdb64_pages + idx * PAGE_SIZE;
+ if (expdb)
+ *expdb = true;
+ } else if (idev->phy_cmb_expdb128_pages &&
+ stride_log2 == IONIC_EXPDB_128B_WQE_LG2) {
+ *pgaddr = idev->phy_cmb_expdb128_pages + idx * PAGE_SIZE;
+ if (expdb)
+ *expdb = true;
+ } else if (idev->phy_cmb_expdb256_pages &&
+ stride_log2 == IONIC_EXPDB_256B_WQE_LG2) {
+ *pgaddr = idev->phy_cmb_expdb256_pages + idx * PAGE_SIZE;
+ if (expdb)
+ *expdb = true;
+ } else if (idev->phy_cmb_expdb512_pages &&
+ stride_log2 == IONIC_EXPDB_512B_WQE_LG2) {
+ *pgaddr = idev->phy_cmb_expdb512_pages + idx * PAGE_SIZE;
+ if (expdb)
+ *expdb = true;
+ } else {
+ *pgaddr = idev->phy_cmb_pages + idx * PAGE_SIZE;
+ if (expdb)
+ *expdb = false;
+ }
- *pgid = ret;
- *pgaddr = idev->phy_cmb_pages + ret * PAGE_SIZE;
+ /* clear the requested CMB region, 1 PAGE_SIZE ioremap at a time */
+ nonexpdb_pgaddr = idev->phy_cmb_pages + idx * PAGE_SIZE;
+ for (i = 0; i < (1 << order); i++) {
+ nonexpdb_pgptr =
+ ioremap_wc(nonexpdb_pgaddr + i * PAGE_SIZE, PAGE_SIZE);
+ if (!nonexpdb_pgptr) {
+ ionic_put_cmb(lif, *pgid, order);
+ return -ENOMEM;
+ }
+ memset_io(nonexpdb_pgptr, 0, PAGE_SIZE);
+ iounmap(nonexpdb_pgptr);
+ }
return 0;
}
+EXPORT_SYMBOL_NS(ionic_get_cmb, "NET_IONIC");
void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order)
{
@@ -729,6 +968,7 @@ void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order)
bitmap_release_region(idev->cmb_inuse, pgid, order);
mutex_unlock(&idev->cmb_inuse_lock);
}
+EXPORT_SYMBOL_NS(ionic_put_cmb, "NET_IONIC");
int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
struct ionic_intr_info *intr,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index c8c710cfe70c..35566f97eaea 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -12,6 +12,7 @@
#include "ionic_if.h"
#include "ionic_regs.h"
+#include "ionic_api.h"
#define IONIC_MAX_TX_DESC 8192
#define IONIC_MAX_RX_DESC 16384
@@ -34,6 +35,11 @@
#define IONIC_RX_MIN_DOORBELL_DEADLINE (HZ / 100) /* 10ms */
#define IONIC_RX_MAX_DOORBELL_DEADLINE (HZ * 4) /* 4s */
+#define IONIC_EXPDB_64B_WQE_LG2 6
+#define IONIC_EXPDB_128B_WQE_LG2 7
+#define IONIC_EXPDB_256B_WQE_LG2 8
+#define IONIC_EXPDB_512B_WQE_LG2 9
+
struct ionic_dev_bar {
void __iomem *vaddr;
phys_addr_t bus_addr;
@@ -170,6 +176,11 @@ struct ionic_dev {
dma_addr_t phy_cmb_pages;
u32 cmb_npages;
+ dma_addr_t phy_cmb_expdb64_pages;
+ dma_addr_t phy_cmb_expdb128_pages;
+ dma_addr_t phy_cmb_expdb256_pages;
+ dma_addr_t phy_cmb_expdb512_pages;
+
u32 port_info_sz;
struct ionic_port_info *port_info;
dma_addr_t port_info_pa;
@@ -273,19 +284,6 @@ struct ionic_queue {
char name[IONIC_QUEUE_NAME_MAX_SZ];
} ____cacheline_aligned_in_smp;
-#define IONIC_INTR_INDEX_NOT_ASSIGNED -1
-#define IONIC_INTR_NAME_MAX_SZ 32
-
-struct ionic_intr_info {
- char name[IONIC_INTR_NAME_MAX_SZ];
- u64 rearm_count;
- unsigned int index;
- unsigned int vector;
- u32 dim_coal_hw;
- cpumask_var_t *affinity_mask;
- struct irq_affinity_notify aff_notify;
-};
-
struct ionic_cq {
struct ionic_lif *lif;
struct ionic_queue *bound_q;
@@ -363,8 +361,8 @@ void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq,
int ionic_db_page_num(struct ionic_lif *lif, int pid);
-int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr, int order);
-void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order);
+void ionic_dev_cmd_discover_cmb(struct ionic_dev *idev);
+void ionic_map_cmb(struct ionic *ionic);
int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
struct ionic_intr_info *intr,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 9886cd66ce68..47559c909c8b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -56,6 +56,9 @@ enum ionic_cmd_opcode {
IONIC_CMD_VF_SETATTR = 61,
IONIC_CMD_VF_CTRL = 62,
+ /* CMB command */
+ IONIC_CMD_DISCOVER_CMB = 80,
+
/* QoS commands */
IONIC_CMD_QOS_CLASS_IDENTIFY = 240,
IONIC_CMD_QOS_CLASS_INIT = 241,
@@ -269,9 +272,11 @@ union ionic_drv_identity {
/**
* enum ionic_dev_capability - Device capabilities
* @IONIC_DEV_CAP_VF_CTRL: Device supports VF ctrl operations
+ * @IONIC_DEV_CAP_DISC_CMB: Device supports CMB discovery operations
*/
enum ionic_dev_capability {
IONIC_DEV_CAP_VF_CTRL = BIT(0),
+ IONIC_DEV_CAP_DISC_CMB = BIT(1),
};
/**
@@ -395,6 +400,7 @@ enum ionic_logical_qtype {
* @IONIC_Q_F_4X_DESC: Quadruple main descriptor size
* @IONIC_Q_F_4X_CQ_DESC: Quadruple cq descriptor size
* @IONIC_Q_F_4X_SG_DESC: Quadruple sg descriptor size
+ * @IONIC_QIDENT_F_EXPDB: Queue supports express doorbell
*/
enum ionic_q_feature {
IONIC_QIDENT_F_CQ = BIT_ULL(0),
@@ -407,6 +413,7 @@ enum ionic_q_feature {
IONIC_Q_F_4X_DESC = BIT_ULL(7),
IONIC_Q_F_4X_CQ_DESC = BIT_ULL(8),
IONIC_Q_F_4X_SG_DESC = BIT_ULL(9),
+ IONIC_QIDENT_F_EXPDB = BIT_ULL(10),
};
/**
@@ -495,6 +502,16 @@ union ionic_lif_config {
};
/**
+ * enum ionic_lif_rdma_cap_stats - LIF stat type
+ * @IONIC_LIF_RDMA_STAT_GLOBAL: Global stats
+ * @IONIC_LIF_RDMA_STAT_QP: Queue pair stats
+ */
+enum ionic_lif_rdma_cap_stats {
+ IONIC_LIF_RDMA_STAT_GLOBAL = BIT(0),
+ IONIC_LIF_RDMA_STAT_QP = BIT(1),
+};
+
+/**
* struct ionic_lif_identity - LIF identity information (type-specific)
*
* @capabilities: LIF capabilities
@@ -513,10 +530,10 @@ union ionic_lif_config {
* @eth.config: LIF config struct with features, mtu, mac, q counts
*
* @rdma: RDMA identify structure
- * @rdma.version: RDMA version of opcodes and queue descriptors
+ * @rdma.version: RDMA capability version
* @rdma.qp_opcodes: Number of RDMA queue pair opcodes supported
* @rdma.admin_opcodes: Number of RDMA admin opcodes supported
- * @rdma.rsvd: reserved byte(s)
+ * @rdma.minor_version: RDMA capability minor version
* @rdma.npts_per_lif: Page table size per LIF
* @rdma.nmrs_per_lif: Number of memory regions per LIF
* @rdma.nahs_per_lif: Number of address handles per LIF
@@ -526,12 +543,17 @@ union ionic_lif_config {
* @rdma.rrq_stride: Remote RQ work request stride
* @rdma.rsq_stride: Remote SQ work request stride
* @rdma.dcqcn_profiles: Number of DCQCN profiles
- * @rdma.rsvd_dimensions: reserved byte(s)
+ * @rdma.udma_shift: Log2 number of queues per queue group
+ * @rdma.rsvd_dimensions: Reserved byte
+ * @rdma.page_size_cap: Supported page sizes
* @rdma.aq_qtype: RDMA Admin Qtype
* @rdma.sq_qtype: RDMA Send Qtype
* @rdma.rq_qtype: RDMA Receive Qtype
* @rdma.cq_qtype: RDMA Completion Qtype
* @rdma.eq_qtype: RDMA Event Qtype
+ * @rdma.stats_type: Supported statistics type
+ * (enum ionic_lif_rdma_cap_stats)
+ * @rdma.rsvd1: Reserved byte(s)
* @words: word access to struct contents
*/
union ionic_lif_identity {
@@ -557,7 +579,7 @@ union ionic_lif_identity {
u8 version;
u8 qp_opcodes;
u8 admin_opcodes;
- u8 rsvd;
+ u8 minor_version;
__le32 npts_per_lif;
__le32 nmrs_per_lif;
__le32 nahs_per_lif;
@@ -567,12 +589,16 @@ union ionic_lif_identity {
u8 rrq_stride;
u8 rsq_stride;
u8 dcqcn_profiles;
- u8 rsvd_dimensions[10];
+ u8 udma_shift;
+ u8 rsvd_dimensions;
+ __le64 page_size_cap;
struct ionic_lif_logical_qtype aq_qtype;
struct ionic_lif_logical_qtype sq_qtype;
struct ionic_lif_logical_qtype rq_qtype;
struct ionic_lif_logical_qtype cq_qtype;
struct ionic_lif_logical_qtype eq_qtype;
+ __le16 stats_type;
+ u8 rsvd1[162];
} __packed rdma;
} __packed;
__le32 words[478];
@@ -2195,6 +2221,80 @@ struct ionic_vf_ctrl_comp {
};
/**
+ * struct ionic_discover_cmb_cmd - CMB discovery command
+ * @opcode: Opcode for the command
+ * @rsvd: Reserved bytes
+ */
+struct ionic_discover_cmb_cmd {
+ u8 opcode;
+ u8 rsvd[63];
+};
+
+/**
+ * struct ionic_discover_cmb_comp - CMB discover command completion.
+ * @status: Status of the command (enum ionic_status_code)
+ * @rsvd: Reserved bytes
+ */
+struct ionic_discover_cmb_comp {
+ u8 status;
+ u8 rsvd[15];
+};
+
+#define IONIC_MAX_CMB_REGIONS 16
+#define IONIC_CMB_SHIFT_64K 16
+
+enum ionic_cmb_type {
+ IONIC_CMB_TYPE_DEVMEM = 0,
+ IONIC_CMB_TYPE_EXPDB64 = 1,
+ IONIC_CMB_TYPE_EXPDB128 = 2,
+ IONIC_CMB_TYPE_EXPDB256 = 3,
+ IONIC_CMB_TYPE_EXPDB512 = 4,
+};
+
+/**
+ * union ionic_cmb_region - Configuration for CMB region
+ * @bar_num: CMB mapping number from FW
+ * @cmb_type: Type of CMB this region describes (enum ionic_cmb_type)
+ * @rsvd: Reserved
+ * @offset: Offset within BAR in 64KB pages
+ * @length: Length of the CMB region
+ * @words: 32-bit words for direct access to the entire region
+ */
+union ionic_cmb_region {
+ struct {
+ u8 bar_num;
+ u8 cmb_type;
+ u8 rsvd[6];
+ __le32 offset;
+ __le32 length;
+ } __packed;
+ __le32 words[4];
+};
+
+/**
+ * union ionic_discover_cmb_identity - CMB layout identity structure
+ * @num_regions: Number of CMB regions, up to 16
+ * @flags: Feature and capability bits (0 for express
+ * doorbell, 1 for 4K alignment indicator,
+ * 31-24 for version information)
+ * @region: CMB mappings region, entry 0 for regular
+ * mapping, entries 1-7 for WQE sizes 64,
+ * 128, 256, 512, 1024, 2048 and 4096 bytes
+ * @words: Full union buffer size
+ */
+union ionic_discover_cmb_identity {
+ struct {
+ __le32 num_regions;
+#define IONIC_CMB_FLAG_EXPDB BIT(0)
+#define IONIC_CMB_FLAG_4KALIGN BIT(1)
+#define IONIC_CMB_FLAG_VERSION 0xff000000
+ __le32 flags;
+ union ionic_cmb_region region[IONIC_MAX_CMB_REGIONS];
+ };
+ __le32 words[478];
+};
+
+/**
* struct ionic_qos_identify_cmd - QoS identify command
* @opcode: opcode
* @ver: Highest version of identify supported by driver
@@ -3054,6 +3154,8 @@ union ionic_dev_cmd {
struct ionic_vf_getattr_cmd vf_getattr;
struct ionic_vf_ctrl_cmd vf_ctrl;
+ struct ionic_discover_cmb_cmd discover_cmb;
+
struct ionic_lif_identify_cmd lif_identify;
struct ionic_lif_init_cmd lif_init;
struct ionic_lif_reset_cmd lif_reset;
@@ -3093,6 +3195,8 @@ union ionic_dev_cmd_comp {
struct ionic_vf_getattr_comp vf_getattr;
struct ionic_vf_ctrl_comp vf_ctrl;
+ struct ionic_discover_cmb_comp discover_cmb;
+
struct ionic_lif_identify_comp lif_identify;
struct ionic_lif_init_comp lif_init;
ionic_lif_reset_comp lif_reset;
@@ -3234,6 +3338,9 @@ union ionic_adminq_comp {
#define IONIC_BAR0_DEV_CMD_DATA_REGS_OFFSET 0x0c00
#define IONIC_BAR0_INTR_STATUS_OFFSET 0x1000
#define IONIC_BAR0_INTR_CTRL_OFFSET 0x2000
+
+/* BAR2 */
+#define IONIC_BAR2_CMB_ENTRY_SIZE 0x800000
#define IONIC_DEV_CMD_DONE 0x00000001
#define IONIC_ASIC_TYPE_NONE 0
@@ -3287,6 +3394,7 @@ struct ionic_identity {
union ionic_port_identity port;
union ionic_qos_identity qos;
union ionic_q_identity txq;
+ union ionic_discover_cmb_identity cmb_layout;
};
#endif /* _IONIC_IF_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 48cb5d30b5f6..b28966ae50c2 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -19,6 +19,7 @@
#include "ionic_bus.h"
#include "ionic_dev.h"
#include "ionic_lif.h"
+#include "ionic_aux.h"
#include "ionic_txrx.h"
#include "ionic_ethtool.h"
#include "ionic_debugfs.h"
@@ -243,29 +244,36 @@ static int ionic_request_irq(struct ionic_lif *lif, struct ionic_qcq *qcq)
0, intr->name, &qcq->napi);
}
-static int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr)
+int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr)
{
struct ionic *ionic = lif->ionic;
- int index;
+ int index, err;
index = find_first_zero_bit(ionic->intrs, ionic->nintrs);
- if (index == ionic->nintrs) {
- netdev_warn(lif->netdev, "%s: no intr, index=%d nintrs=%d\n",
- __func__, index, ionic->nintrs);
+ if (index == ionic->nintrs)
return -ENOSPC;
- }
set_bit(index, ionic->intrs);
ionic_intr_init(&ionic->idev, intr, index);
+ err = ionic_bus_get_irq(ionic, intr->index);
+ if (err < 0) {
+ clear_bit(index, ionic->intrs);
+ return err;
+ }
+
+ intr->vector = err;
+
return 0;
}
+EXPORT_SYMBOL_NS(ionic_intr_alloc, "NET_IONIC");
-static void ionic_intr_free(struct ionic *ionic, int index)
+void ionic_intr_free(struct ionic_lif *lif, int index)
{
- if (index != IONIC_INTR_INDEX_NOT_ASSIGNED && index < ionic->nintrs)
- clear_bit(index, ionic->intrs);
+ if (index != IONIC_INTR_INDEX_NOT_ASSIGNED && index < lif->ionic->nintrs)
+ clear_bit(index, lif->ionic->intrs);
}
+EXPORT_SYMBOL_NS(ionic_intr_free, "NET_IONIC");
static void ionic_irq_aff_notify(struct irq_affinity_notify *notify,
const cpumask_t *mask)
@@ -400,7 +408,7 @@ static void ionic_qcq_intr_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
irq_set_affinity_hint(qcq->intr.vector, NULL);
devm_free_irq(lif->ionic->dev, qcq->intr.vector, &qcq->napi);
qcq->intr.vector = 0;
- ionic_intr_free(lif->ionic, qcq->intr.index);
+ ionic_intr_free(lif, qcq->intr.index);
qcq->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
}
@@ -510,13 +518,6 @@ static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qc
goto err_out;
}
- err = ionic_bus_get_irq(lif->ionic, qcq->intr.index);
- if (err < 0) {
- netdev_warn(lif->netdev, "no vector for %s: %d\n",
- qcq->q.name, err);
- goto err_out_free_intr;
- }
- qcq->intr.vector = err;
ionic_intr_mask_assert(lif->ionic->idev.intr_ctrl, qcq->intr.index,
IONIC_INTR_MASK_SET);
@@ -545,7 +546,7 @@ static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qc
return 0;
err_out_free_intr:
- ionic_intr_free(lif->ionic, qcq->intr.index);
+ ionic_intr_free(lif, qcq->intr.index);
err_out:
return err;
}
@@ -672,7 +673,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
new->cmb_order = order_base_2(new->cmb_q_size / PAGE_SIZE);
err = ionic_get_cmb(lif, &new->cmb_pgid, &new->cmb_q_base_pa,
- new->cmb_order);
+ new->cmb_order, 0, NULL);
if (err) {
netdev_err(lif->netdev,
"Cannot allocate queue order %d from cmb: err %d\n",
@@ -740,7 +741,7 @@ err_out_free_q:
err_out_free_irq:
if (flags & IONIC_QCQ_F_INTR) {
devm_free_irq(dev, new->intr.vector, &new->napi);
- ionic_intr_free(lif->ionic, new->intr.index);
+ ionic_intr_free(lif, new->intr.index);
}
err_out_free_page_pool:
page_pool_destroy(new->q.page_pool);
@@ -3293,6 +3294,7 @@ int ionic_lif_alloc(struct ionic *ionic)
mutex_init(&lif->queue_lock);
mutex_init(&lif->config_lock);
+ mutex_init(&lif->adev_lock);
spin_lock_init(&lif->adminq_lock);
@@ -3349,6 +3351,7 @@ err_out_free_lif_info:
lif->info = NULL;
lif->info_pa = 0;
err_out_free_mutex:
+ mutex_destroy(&lif->adev_lock);
mutex_destroy(&lif->config_lock);
mutex_destroy(&lif->queue_lock);
err_out_free_netdev:
@@ -3384,6 +3387,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
netif_device_detach(lif->netdev);
+ ionic_auxbus_unregister(ionic->lif);
mutex_lock(&lif->queue_lock);
if (test_bit(IONIC_LIF_F_UP, lif->state)) {
dev_info(ionic->dev, "Surprise FW stop, stopping queues\n");
@@ -3446,6 +3450,8 @@ int ionic_restart_lif(struct ionic_lif *lif)
netif_device_attach(lif->netdev);
ionic_queue_doorbell_check(ionic, IONIC_NAPI_DEADLINE);
+ ionic_auxbus_register(ionic->lif);
+
return 0;
err_txrx_free:
@@ -3528,6 +3534,7 @@ void ionic_lif_free(struct ionic_lif *lif)
mutex_destroy(&lif->config_lock);
mutex_destroy(&lif->queue_lock);
+ mutex_destroy(&lif->adev_lock);
/* free netdev & lif */
ionic_debugfs_del_lif(lif);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index e01756fb7fdd..43bdd0fb8733 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -10,6 +10,7 @@
#include <linux/dim.h>
#include <linux/pci.h>
#include "ionic_rx_filter.h"
+#include "ionic_api.h"
#define IONIC_ADMINQ_LENGTH 16 /* must be a power of two */
#define IONIC_NOTIFYQ_LENGTH 64 /* must be a power of two */
@@ -225,6 +226,8 @@ struct ionic_lif {
dma_addr_t info_pa;
u32 info_sz;
struct ionic_qtype_info qtype_info[IONIC_QTYPE_MAX];
+ struct ionic_aux_dev *ionic_adev;
+ struct mutex adev_lock; /* lock for aux_dev actions */
u8 rss_hash_key[IONIC_RSS_HASH_KEY_SIZE];
u8 *rss_ind_tbl;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 0e60a6bef99a..14dc055be3e9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -72,7 +72,7 @@ static const char *ionic_error_to_str(enum ionic_status_code code)
}
}
-static int ionic_error_to_errno(enum ionic_status_code code)
+int ionic_error_to_errno(enum ionic_status_code code)
{
switch (code) {
case IONIC_RC_SUCCESS:
@@ -114,6 +114,7 @@ static int ionic_error_to_errno(enum ionic_status_code code)
return -EIO;
}
}
+EXPORT_SYMBOL_NS(ionic_error_to_errno, "NET_IONIC");
static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
{
@@ -480,6 +481,7 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
{
return __ionic_adminq_post_wait(lif, ctx, true);
}
+EXPORT_SYMBOL_NS(ionic_adminq_post_wait, "NET_IONIC");
int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
{
diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
index 4bfd03724ad6..b26a468ddc98 100644
--- a/drivers/scsi/aic94xx/aic94xx_task.c
+++ b/drivers/scsi/aic94xx/aic94xx_task.c
@@ -488,7 +488,6 @@ static int asd_build_ssp_ascb(struct asd_ascb *ascb, struct sas_task *task,
scb->ssp_task.conn_handle = cpu_to_le16(
(u16)(unsigned long)dev->lldd_dev);
scb->ssp_task.data_dir = data_dir_flags[task->data_dir];
- scb->ssp_task.retry_count = scb->ssp_task.retry_count;
ascb->tasklet_complete = asd_task_tasklet_complete;
diff --git a/drivers/scsi/bfa/bfa_core.c b/drivers/scsi/bfa/bfa_core.c
index a99a101b95ef..2559df8baa05 100644
--- a/drivers/scsi/bfa/bfa_core.c
+++ b/drivers/scsi/bfa/bfa_core.c
@@ -1282,7 +1282,6 @@ bfa_iocfc_cfgrsp(struct bfa_s *bfa)
struct bfi_iocfc_cfgrsp_s *cfgrsp = iocfc->cfgrsp;
struct bfa_iocfc_fwcfg_s *fwcfg = &cfgrsp->fwcfg;
- fwcfg->num_cqs = fwcfg->num_cqs;
fwcfg->num_ioim_reqs = be16_to_cpu(fwcfg->num_ioim_reqs);
fwcfg->num_fwtio_reqs = be16_to_cpu(fwcfg->num_fwtio_reqs);
fwcfg->num_tskim_reqs = be16_to_cpu(fwcfg->num_tskim_reqs);
diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
index a516df019c22..010a1df37f15 100644
--- a/drivers/scsi/csiostor/csio_wr.c
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -960,7 +960,7 @@ csio_wr_copy_to_wrp(void *data_buf, struct csio_wr_pair *wrp,
memcpy((uint8_t *) wrp->addr1 + wr_off, data_buf, nbytes);
data_len -= nbytes;
- /* Write the remaining data from the begining of circular buffer */
+ /* Write the remaining data from the beginning of circular buffer */
if (data_len) {
CSIO_DB_ASSERT(data_len <= wrp->size2);
CSIO_DB_ASSERT(wrp->addr2 != NULL);
@@ -1224,7 +1224,7 @@ csio_wr_process_iq(struct csio_hw *hw, struct csio_q *q,
/*
* We need to re-arm SGE interrupts in case we got a stray interrupt,
- * especially in msix mode. With INTx, this may be a common occurence.
+ * especially in msix mode. With INTx, this may be a common occurrence.
*/
if (unlikely(!q->inc_idx)) {
CSIO_INC_STATS(q, n_stray_comp);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index d1a4cc69d408..30a9c6612651 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -876,7 +876,7 @@ static int hisi_sas_dev_found(struct domain_device *device)
device->lldd_dev = sas_dev;
hisi_hba->hw->setup_itct(hisi_hba, sas_dev);
- if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
+ if (dev_parent_is_expander(device)) {
int phy_no;
phy_no = sas_find_attached_phy_id(&parent_dev->ex_dev, device);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index 4431698a5d78..f3516a0611dd 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -925,7 +925,6 @@ static void setup_itct_v2_hw(struct hisi_hba *hisi_hba,
struct device *dev = hisi_hba->dev;
u64 qw0, device_id = sas_dev->device_id;
struct hisi_sas_itct *itct = &hisi_hba->itct[device_id];
- struct domain_device *parent_dev = device->parent;
struct asd_sas_port *sas_port = device->port;
struct hisi_sas_port *port = to_hisi_sas_port(sas_port);
u64 sas_addr;
@@ -942,7 +941,7 @@ static void setup_itct_v2_hw(struct hisi_hba *hisi_hba,
break;
case SAS_SATA_DEV:
case SAS_SATA_PENDING:
- if (parent_dev && dev_is_expander(parent_dev->dev_type))
+ if (dev_parent_is_expander(device))
qw0 = HISI_SAS_DEV_TYPE_STP << ITCT_HDR_DEV_TYPE_OFF;
else
qw0 = HISI_SAS_DEV_TYPE_SATA << ITCT_HDR_DEV_TYPE_OFF;
@@ -2494,7 +2493,6 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba,
{
struct sas_task *task = slot->task;
struct domain_device *device = task->dev;
- struct domain_device *parent_dev = device->parent;
struct hisi_sas_device *sas_dev = device->lldd_dev;
struct hisi_sas_cmd_hdr *hdr = slot->cmd_hdr;
struct asd_sas_port *sas_port = device->port;
@@ -2509,7 +2507,7 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba,
/* create header */
/* dw0 */
dw0 = port->id << CMD_HDR_PORT_OFF;
- if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
+ if (dev_parent_is_expander(device)) {
dw0 |= 3 << CMD_HDR_CMD_OFF;
} else {
phy_id = device->phy->identify.phy_identifier;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 2f3d61abab3a..2f9e01717ef3 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -874,7 +874,6 @@ static void setup_itct_v3_hw(struct hisi_hba *hisi_hba,
struct device *dev = hisi_hba->dev;
u64 qw0, device_id = sas_dev->device_id;
struct hisi_sas_itct *itct = &hisi_hba->itct[device_id];
- struct domain_device *parent_dev = device->parent;
struct asd_sas_port *sas_port = device->port;
struct hisi_sas_port *port = to_hisi_sas_port(sas_port);
u64 sas_addr;
@@ -891,7 +890,7 @@ static void setup_itct_v3_hw(struct hisi_hba *hisi_hba,
break;
case SAS_SATA_DEV:
case SAS_SATA_PENDING:
- if (parent_dev && dev_is_expander(parent_dev->dev_type))
+ if (dev_parent_is_expander(device))
qw0 = HISI_SAS_DEV_TYPE_STP << ITCT_HDR_DEV_TYPE_OFF;
else
qw0 = HISI_SAS_DEV_TYPE_SATA << ITCT_HDR_DEV_TYPE_OFF;
@@ -1476,7 +1475,6 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba,
{
struct sas_task *task = slot->task;
struct domain_device *device = task->dev;
- struct domain_device *parent_dev = device->parent;
struct hisi_sas_device *sas_dev = device->lldd_dev;
struct hisi_sas_cmd_hdr *hdr = slot->cmd_hdr;
struct asd_sas_port *sas_port = device->port;
@@ -1487,7 +1485,7 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba,
u32 dw1 = 0, dw2 = 0;
hdr->dw0 = cpu_to_le32(port->id << CMD_HDR_PORT_OFF);
- if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
+ if (dev_parent_is_expander(device)) {
hdr->dw0 |= cpu_to_le32(3 << CMD_HDR_CMD_OFF);
} else {
phy_id = device->phy->identify.phy_identifier;
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c73a71ac3c29..3654b12c5d5a 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2662,10 +2662,8 @@ static void complete_scsi_command(struct CommandList *cp)
case CMD_TARGET_STATUS:
cmd->result |= ei->ScsiStatus;
/* copy the sense data */
- if (SCSI_SENSE_BUFFERSIZE < sizeof(ei->SenseInfo))
- sense_data_size = SCSI_SENSE_BUFFERSIZE;
- else
- sense_data_size = sizeof(ei->SenseInfo);
+ sense_data_size = min_t(unsigned long, SCSI_SENSE_BUFFERSIZE,
+ sizeof(ei->SenseInfo));
if (ei->SenseLen < sense_data_size)
sense_data_size = ei->SenseLen;
memcpy(cmd->sense_buffer, ei->SenseInfo, sense_data_size);
@@ -3628,10 +3626,7 @@ static bool hpsa_vpd_page_supported(struct ctlr_info *h,
if (rc != 0)
goto exit_unsupported;
pages = buf[3];
- if ((pages + HPSA_VPD_HEADER_SZ) <= 255)
- bufsize = pages + HPSA_VPD_HEADER_SZ;
- else
- bufsize = 255;
+ bufsize = min(pages + HPSA_VPD_HEADER_SZ, 255);
/* Get the whole VPD page list */
rc = hpsa_scsi_do_inquiry(h, scsi3addr,
@@ -6407,18 +6402,14 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h,
return -EINVAL;
}
if (iocommand->buf_size > 0) {
- buff = kmalloc(iocommand->buf_size, GFP_KERNEL);
- if (buff == NULL)
- return -ENOMEM;
if (iocommand->Request.Type.Direction & XFER_WRITE) {
- /* Copy the data into the buffer we created */
- if (copy_from_user(buff, iocommand->buf,
- iocommand->buf_size)) {
- rc = -EFAULT;
- goto out_kfree;
- }
+ buff = memdup_user(iocommand->buf, iocommand->buf_size);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
} else {
- memset(buff, 0, iocommand->buf_size);
+ buff = kzalloc(iocommand->buf_size, GFP_KERNEL);
+ if (!buff)
+ return -ENOMEM;
}
}
c = cmd_alloc(h);
@@ -6478,7 +6469,6 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h,
}
out:
cmd_free(h, c);
-out_kfree:
kfree(buff);
return rc;
}
@@ -6522,18 +6512,21 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h,
while (left) {
sz = (left > ioc->malloc_size) ? ioc->malloc_size : left;
buff_size[sg_used] = sz;
- buff[sg_used] = kmalloc(sz, GFP_KERNEL);
- if (buff[sg_used] == NULL) {
- status = -ENOMEM;
- goto cleanup1;
- }
+
if (ioc->Request.Type.Direction & XFER_WRITE) {
- if (copy_from_user(buff[sg_used], data_ptr, sz)) {
- status = -EFAULT;
+ buff[sg_used] = memdup_user(data_ptr, sz);
+ if (IS_ERR(buff[sg_used])) {
+ status = PTR_ERR(buff[sg_used]);
goto cleanup1;
}
- } else
- memset(buff[sg_used], 0, sz);
+ } else {
+ buff[sg_used] = kzalloc(sz, GFP_KERNEL);
+ if (!buff[sg_used]) {
+ status = -ENOMEM;
+ goto cleanup1;
+ }
+ }
+
left -= sz;
data_ptr += sz;
sg_used++;
@@ -7632,8 +7625,8 @@ static void hpsa_free_cfgtables(struct ctlr_info *h)
}
/* Find and map CISS config table and transfer table
-+ * several items must be unmapped (freed) later
-+ * */
+ * several items must be unmapped (freed) later
+ */
static int hpsa_find_cfgtables(struct ctlr_info *h)
{
u64 cfg_offset;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index dd6754db7e4c..44214884deaf 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4281,11 +4281,11 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg)
}
if (ioa_cfg->sis64)
- ioa_data = vmalloc(array_size(IPR_FMT3_MAX_NUM_DUMP_PAGES,
- sizeof(__be32 *)));
+ ioa_data = vmalloc_array(IPR_FMT3_MAX_NUM_DUMP_PAGES,
+ sizeof(__be32 *));
else
- ioa_data = vmalloc(array_size(IPR_FMT2_MAX_NUM_DUMP_PAGES,
- sizeof(__be32 *)));
+ ioa_data = vmalloc_array(IPR_FMT2_MAX_NUM_DUMP_PAGES,
+ sizeof(__be32 *));
if (!ioa_data) {
ipr_err("Dump memory allocation failed\n");
diff --git a/drivers/scsi/isci/remote_device.c b/drivers/scsi/isci/remote_device.c
index 82deb6a83a8c..4c7462965ea1 100644
--- a/drivers/scsi/isci/remote_device.c
+++ b/drivers/scsi/isci/remote_device.c
@@ -1434,7 +1434,7 @@ static enum sci_status isci_remote_device_construct(struct isci_port *iport,
struct domain_device *dev = idev->domain_dev;
enum sci_status status;
- if (dev->parent && dev_is_expander(dev->parent->dev_type))
+ if (dev_parent_is_expander(dev))
status = sci_remote_device_ea_construct(iport, idev);
else
status = sci_remote_device_da_construct(iport, idev);
diff --git a/drivers/scsi/libfc/fc_encode.h b/drivers/scsi/libfc/fc_encode.h
index 02e31db31d68..e046091a549a 100644
--- a/drivers/scsi/libfc/fc_encode.h
+++ b/drivers/scsi/libfc/fc_encode.h
@@ -356,7 +356,7 @@ static inline int fc_ct_ms_fill(struct fc_lport *lport,
put_unaligned_be16(len, &entry->len);
snprintf((char *)&entry->value,
FC_FDMI_HBA_ATTR_OSNAMEVERSION_LEN,
- "%s v%s",
+ "%.62s v%.62s",
init_utsname()->sysname,
init_utsname()->release);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 869b5d4db44c..d953225f6cc2 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1313,10 +1313,7 @@ static int sas_check_parent_topology(struct domain_device *child)
int i;
int res = 0;
- if (!child->parent)
- return 0;
-
- if (!dev_is_expander(child->parent->dev_type))
+ if (!dev_parent_is_expander(child))
return 0;
parent_ex = &child->parent->ex_dev;
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index fe4fb67eb50c..224edacf2d8e 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -661,15 +661,12 @@ struct lpfc_vport {
uint32_t num_disc_nodes; /* in addition to hba_state */
uint32_t gidft_inp; /* cnt of outstanding GID_FTs */
- uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */
uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */
uint32_t fc_rscn_flush; /* flag use of fc_rscn_id_list */
struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN];
struct lpfc_name fc_nodename; /* fc nodename */
struct lpfc_name fc_portname; /* fc portname */
- struct lpfc_work_evt disc_timeout_evt;
-
struct timer_list fc_disctmo; /* Discovery rescue timer */
uint8_t fc_ns_retry; /* retries for fabric nameserver */
uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */
@@ -744,12 +741,6 @@ struct lpfc_vport {
struct lpfc_vmid_priority_info vmid_priority;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
- struct dentry *debug_disc_trc;
- struct dentry *debug_nodelist;
- struct dentry *debug_nvmestat;
- struct dentry *debug_scsistat;
- struct dentry *debug_ioktime;
- struct dentry *debug_hdwqstat;
struct dentry *vport_debugfs_root;
struct lpfc_debugfs_trc *disc_trc;
atomic_t disc_trc_cnt;
@@ -767,7 +758,6 @@ struct lpfc_vport {
/* There is a single nvme instance per vport. */
struct nvme_fc_local_port *localport;
uint8_t nvmei_support; /* driver supports NVME Initiator */
- uint32_t last_fcp_wqidx;
uint32_t rcv_flogi_cnt; /* How many unsol FLOGIs ACK'd. */
};
@@ -1060,8 +1050,6 @@ struct lpfc_hba {
struct lpfc_dmabuf hbqslimp;
- uint16_t pci_cfg_value;
-
uint8_t fc_linkspeed; /* Link speed after last READ_LA */
uint32_t fc_eventTag; /* event tag for link attention */
@@ -1088,7 +1076,6 @@ struct lpfc_hba {
struct lpfc_stats fc_stat;
- struct lpfc_nodelist fc_fcpnodev; /* nodelist entry for no device */
uint32_t nport_event_cnt; /* timestamp for nlplist entry */
uint8_t wwnn[8];
@@ -1229,9 +1216,6 @@ struct lpfc_hba {
uint32_t hbq_count; /* Count of configured HBQs */
struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies */
- atomic_t fcp_qidx; /* next FCP WQ (RR Policy) */
- atomic_t nvme_qidx; /* next NVME WQ (RR Policy) */
-
phys_addr_t pci_bar0_map; /* Physical address for PCI BAR0 */
phys_addr_t pci_bar1_map; /* Physical address for PCI BAR1 */
phys_addr_t pci_bar2_map; /* Physical address for PCI BAR2 */
@@ -1348,30 +1332,9 @@ struct lpfc_hba {
unsigned long last_ramp_down_time;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
struct dentry *hba_debugfs_root;
- atomic_t debugfs_vport_count;
- struct dentry *debug_multixri_pools;
- struct dentry *debug_hbqinfo;
- struct dentry *debug_dumpHostSlim;
- struct dentry *debug_dumpHBASlim;
- struct dentry *debug_InjErrLBA; /* LBA to inject errors at */
- struct dentry *debug_InjErrNPortID; /* NPortID to inject errors at */
- struct dentry *debug_InjErrWWPN; /* WWPN to inject errors at */
- struct dentry *debug_writeGuard; /* inject write guard_tag errors */
- struct dentry *debug_writeApp; /* inject write app_tag errors */
- struct dentry *debug_writeRef; /* inject write ref_tag errors */
- struct dentry *debug_readGuard; /* inject read guard_tag errors */
- struct dentry *debug_readApp; /* inject read app_tag errors */
- struct dentry *debug_readRef; /* inject read ref_tag errors */
-
- struct dentry *debug_nvmeio_trc;
+ unsigned int debugfs_vport_count;
+
struct lpfc_debugfs_nvmeio_trc *nvmeio_trc;
- struct dentry *debug_hdwqinfo;
-#ifdef LPFC_HDWQ_LOCK_STAT
- struct dentry *debug_lockstat;
-#endif
- struct dentry *debug_cgn_buffer;
- struct dentry *debug_rx_monitor;
- struct dentry *debug_ras_log;
atomic_t nvmeio_trc_cnt;
uint32_t nvmeio_trc_size;
uint32_t nvmeio_trc_output_idx;
@@ -1388,19 +1351,10 @@ struct lpfc_hba {
sector_t lpfc_injerr_lba;
#define LPFC_INJERR_LBA_OFF (sector_t)(-1)
- struct dentry *debug_slow_ring_trc;
struct lpfc_debugfs_trc *slow_ring_trc;
atomic_t slow_ring_trc_cnt;
/* iDiag debugfs sub-directory */
struct dentry *idiag_root;
- struct dentry *idiag_pci_cfg;
- struct dentry *idiag_bar_acc;
- struct dentry *idiag_que_info;
- struct dentry *idiag_que_acc;
- struct dentry *idiag_drb_acc;
- struct dentry *idiag_ctl_acc;
- struct dentry *idiag_mbx_acc;
- struct dentry *idiag_ext_acc;
uint8_t lpfc_idiag_last_eq;
#endif
uint16_t nvmeio_trc_on;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 7c4d7bb3a56f..92b5b2dbe847 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2373,93 +2373,117 @@ out:
static ssize_t
lpfc_debugfs_dif_err_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
+ size_t nbytes, loff_t *ppos)
{
struct lpfc_hba *phba = file->private_data;
int kind = debugfs_get_aux_num(file);
- char cbuf[32];
- uint64_t tmp = 0;
+ char cbuf[32] = {0};
int cnt = 0;
- if (kind == writeGuard)
- cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wgrd_cnt);
- else if (kind == writeApp)
- cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wapp_cnt);
- else if (kind == writeRef)
- cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wref_cnt);
- else if (kind == readGuard)
- cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rgrd_cnt);
- else if (kind == readApp)
- cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rapp_cnt);
- else if (kind == readRef)
- cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rref_cnt);
- else if (kind == InjErrNPortID)
- cnt = scnprintf(cbuf, 32, "0x%06x\n",
+ switch (kind) {
+ case writeGuard:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "%u\n",
+ phba->lpfc_injerr_wgrd_cnt);
+ break;
+ case writeApp:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "%u\n",
+ phba->lpfc_injerr_wapp_cnt);
+ break;
+ case writeRef:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "%u\n",
+ phba->lpfc_injerr_wref_cnt);
+ break;
+ case readGuard:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "%u\n",
+ phba->lpfc_injerr_rgrd_cnt);
+ break;
+ case readApp:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "%u\n",
+ phba->lpfc_injerr_rapp_cnt);
+ break;
+ case readRef:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "%u\n",
+ phba->lpfc_injerr_rref_cnt);
+ break;
+ case InjErrNPortID:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "0x%06x\n",
phba->lpfc_injerr_nportid);
- else if (kind == InjErrWWPN) {
- memcpy(&tmp, &phba->lpfc_injerr_wwpn, sizeof(struct lpfc_name));
- tmp = cpu_to_be64(tmp);
- cnt = scnprintf(cbuf, 32, "0x%016llx\n", tmp);
- } else if (kind == InjErrLBA) {
- if (phba->lpfc_injerr_lba == (sector_t)(-1))
- cnt = scnprintf(cbuf, 32, "off\n");
+ break;
+ case InjErrWWPN:
+ cnt = scnprintf(cbuf, sizeof(cbuf), "0x%016llx\n",
+ be64_to_cpu(phba->lpfc_injerr_wwpn.u.wwn_be));
+ break;
+ case InjErrLBA:
+ if (phba->lpfc_injerr_lba == LPFC_INJERR_LBA_OFF)
+ cnt = scnprintf(cbuf, sizeof(cbuf), "off\n");
else
- cnt = scnprintf(cbuf, 32, "0x%llx\n",
- (uint64_t) phba->lpfc_injerr_lba);
- } else
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0547 Unknown debugfs error injection entry\n");
+ cnt = scnprintf(cbuf, sizeof(cbuf), "0x%llx\n",
+ (uint64_t)phba->lpfc_injerr_lba);
+ break;
+ default:
+ lpfc_log_msg(phba, KERN_WARNING, LOG_INIT,
+ "0547 Unknown debugfs error injection entry\n");
+ break;
+ }
return simple_read_from_buffer(buf, nbytes, ppos, &cbuf, cnt);
}
static ssize_t
lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *ppos)
+ size_t nbytes, loff_t *ppos)
{
struct lpfc_hba *phba = file->private_data;
int kind = debugfs_get_aux_num(file);
- char dstbuf[33];
- uint64_t tmp = 0;
- int size;
+ char dstbuf[33] = {0};
+ unsigned long long tmp;
+ unsigned long size;
- memset(dstbuf, 0, 33);
- size = (nbytes < 32) ? nbytes : 32;
+ size = (nbytes < (sizeof(dstbuf) - 1)) ? nbytes : (sizeof(dstbuf) - 1);
if (copy_from_user(dstbuf, buf, size))
return -EFAULT;
- if (kind == InjErrLBA) {
- if ((dstbuf[0] == 'o') && (dstbuf[1] == 'f') &&
- (dstbuf[2] == 'f'))
- tmp = (uint64_t)(-1);
+ if (kstrtoull(dstbuf, 0, &tmp)) {
+ if (kind != InjErrLBA || !strstr(dstbuf, "off"))
+ return -EINVAL;
}
- if ((tmp == 0) && (kstrtoull(dstbuf, 0, &tmp)))
- return -EINVAL;
-
- if (kind == writeGuard)
+ switch (kind) {
+ case writeGuard:
phba->lpfc_injerr_wgrd_cnt = (uint32_t)tmp;
- else if (kind == writeApp)
+ break;
+ case writeApp:
phba->lpfc_injerr_wapp_cnt = (uint32_t)tmp;
- else if (kind == writeRef)
+ break;
+ case writeRef:
phba->lpfc_injerr_wref_cnt = (uint32_t)tmp;
- else if (kind == readGuard)
+ break;
+ case readGuard:
phba->lpfc_injerr_rgrd_cnt = (uint32_t)tmp;
- else if (kind == readApp)
+ break;
+ case readApp:
phba->lpfc_injerr_rapp_cnt = (uint32_t)tmp;
- else if (kind == readRef)
+ break;
+ case readRef:
phba->lpfc_injerr_rref_cnt = (uint32_t)tmp;
- else if (kind == InjErrLBA)
- phba->lpfc_injerr_lba = (sector_t)tmp;
- else if (kind == InjErrNPortID)
+ break;
+ case InjErrLBA:
+ if (strstr(dstbuf, "off"))
+ phba->lpfc_injerr_lba = LPFC_INJERR_LBA_OFF;
+ else
+ phba->lpfc_injerr_lba = (sector_t)tmp;
+ break;
+ case InjErrNPortID:
phba->lpfc_injerr_nportid = (uint32_t)(tmp & Mask_DID);
- else if (kind == InjErrWWPN) {
- tmp = cpu_to_be64(tmp);
- memcpy(&phba->lpfc_injerr_wwpn, &tmp, sizeof(struct lpfc_name));
- } else
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0548 Unknown debugfs error injection entry\n");
-
+ break;
+ case InjErrWWPN:
+ phba->lpfc_injerr_wwpn.u.wwn_be = cpu_to_be64(tmp);
+ break;
+ default:
+ lpfc_log_msg(phba, KERN_WARNING, LOG_INIT,
+ "0548 Unknown debugfs error injection entry\n");
+ break;
+ }
return nbytes;
}
@@ -5728,7 +5752,7 @@ static const struct file_operations lpfc_debugfs_op_slow_ring_trc = {
};
static struct dentry *lpfc_debugfs_root = NULL;
-static atomic_t lpfc_debugfs_hba_count;
+static unsigned int lpfc_debugfs_hba_count;
/*
* File operations for the iDiag debugfs
@@ -6050,7 +6074,12 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
/* Setup lpfc root directory */
if (!lpfc_debugfs_root) {
lpfc_debugfs_root = debugfs_create_dir("lpfc", NULL);
- atomic_set(&lpfc_debugfs_hba_count, 0);
+ lpfc_debugfs_hba_count = 0;
+ if (IS_ERR(lpfc_debugfs_root)) {
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_INIT,
+ "0527 Cannot create debugfs lpfc\n");
+ return;
+ }
}
if (!lpfc_debugfs_start_time)
lpfc_debugfs_start_time = jiffies;
@@ -6061,150 +6090,96 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
pport_setup = true;
phba->hba_debugfs_root =
debugfs_create_dir(name, lpfc_debugfs_root);
- atomic_inc(&lpfc_debugfs_hba_count);
- atomic_set(&phba->debugfs_vport_count, 0);
+ phba->debugfs_vport_count = 0;
+ if (IS_ERR(phba->hba_debugfs_root)) {
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_INIT,
+ "0528 Cannot create debugfs %s\n", name);
+ return;
+ }
+ lpfc_debugfs_hba_count++;
/* Multi-XRI pools */
- snprintf(name, sizeof(name), "multixripools");
- phba->debug_multixri_pools =
- debugfs_create_file(name, S_IFREG | 0644,
- phba->hba_debugfs_root,
- phba,
- &lpfc_debugfs_op_multixripools);
- if (IS_ERR(phba->debug_multixri_pools)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "0527 Cannot create debugfs multixripools\n");
- goto debug_failed;
- }
+ debugfs_create_file("multixripools", 0644,
+ phba->hba_debugfs_root, phba,
+ &lpfc_debugfs_op_multixripools);
/* Congestion Info Buffer */
- scnprintf(name, sizeof(name), "cgn_buffer");
- phba->debug_cgn_buffer =
- debugfs_create_file(name, S_IFREG | 0644,
- phba->hba_debugfs_root,
- phba, &lpfc_cgn_buffer_op);
- if (IS_ERR(phba->debug_cgn_buffer)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "6527 Cannot create debugfs "
- "cgn_buffer\n");
- goto debug_failed;
- }
+ debugfs_create_file("cgn_buffer", 0644, phba->hba_debugfs_root,
+ phba, &lpfc_cgn_buffer_op);
/* RX Monitor */
- scnprintf(name, sizeof(name), "rx_monitor");
- phba->debug_rx_monitor =
- debugfs_create_file(name, S_IFREG | 0644,
- phba->hba_debugfs_root,
- phba, &lpfc_rx_monitor_op);
- if (IS_ERR(phba->debug_rx_monitor)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "6528 Cannot create debugfs "
- "rx_monitor\n");
- goto debug_failed;
- }
+ debugfs_create_file("rx_monitor", 0644, phba->hba_debugfs_root,
+ phba, &lpfc_rx_monitor_op);
/* RAS log */
- snprintf(name, sizeof(name), "ras_log");
- phba->debug_ras_log =
- debugfs_create_file(name, 0644,
- phba->hba_debugfs_root,
- phba, &lpfc_debugfs_ras_log);
- if (IS_ERR(phba->debug_ras_log)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "6148 Cannot create debugfs"
- " ras_log\n");
- goto debug_failed;
- }
+ debugfs_create_file("ras_log", 0644, phba->hba_debugfs_root,
+ phba, &lpfc_debugfs_ras_log);
/* Setup hbqinfo */
- snprintf(name, sizeof(name), "hbqinfo");
- phba->debug_hbqinfo =
- debugfs_create_file(name, S_IFREG | 0644,
- phba->hba_debugfs_root,
- phba, &lpfc_debugfs_op_hbqinfo);
+ debugfs_create_file("hbqinfo", 0644, phba->hba_debugfs_root,
+ phba, &lpfc_debugfs_op_hbqinfo);
#ifdef LPFC_HDWQ_LOCK_STAT
/* Setup lockstat */
- snprintf(name, sizeof(name), "lockstat");
- phba->debug_lockstat =
- debugfs_create_file(name, S_IFREG | 0644,
- phba->hba_debugfs_root,
- phba, &lpfc_debugfs_op_lockstat);
- if (IS_ERR(phba->debug_lockstat)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "4610 Can't create debugfs lockstat\n");
- goto debug_failed;
- }
+ debugfs_create_file("lockstat", 0644, phba->hba_debugfs_root,
+ phba, &lpfc_debugfs_op_lockstat);
#endif
-
- /* Setup dumpHBASlim */
if (phba->sli_rev < LPFC_SLI_REV4) {
- snprintf(name, sizeof(name), "dumpHBASlim");
- phba->debug_dumpHBASlim =
- debugfs_create_file(name,
- S_IFREG|S_IRUGO|S_IWUSR,
- phba->hba_debugfs_root,
- phba, &lpfc_debugfs_op_dumpHBASlim);
- } else
- phba->debug_dumpHBASlim = NULL;
+ /* Setup dumpHBASlim */
+ debugfs_create_file("dumpHBASlim", 0644,
+ phba->hba_debugfs_root, phba,
+ &lpfc_debugfs_op_dumpHBASlim);
+ }
- /* Setup dumpHostSlim */
if (phba->sli_rev < LPFC_SLI_REV4) {
- snprintf(name, sizeof(name), "dumpHostSlim");
- phba->debug_dumpHostSlim =
- debugfs_create_file(name,
- S_IFREG|S_IRUGO|S_IWUSR,
- phba->hba_debugfs_root,
- phba, &lpfc_debugfs_op_dumpHostSlim);
- } else
- phba->debug_dumpHostSlim = NULL;
+ /* Setup dumpHostSlim */
+ debugfs_create_file("dumpHostSlim", 0644,
+ phba->hba_debugfs_root, phba,
+ &lpfc_debugfs_op_dumpHostSlim);
+ }
/* Setup DIF Error Injections */
- phba->debug_InjErrLBA =
- debugfs_create_file_aux_num("InjErrLBA", 0644,
- phba->hba_debugfs_root,
- phba, InjErrLBA, &lpfc_debugfs_op_dif_err);
+ debugfs_create_file_aux_num("InjErrLBA", 0644,
+ phba->hba_debugfs_root, phba,
+ InjErrLBA,
+ &lpfc_debugfs_op_dif_err);
phba->lpfc_injerr_lba = LPFC_INJERR_LBA_OFF;
- phba->debug_InjErrNPortID =
- debugfs_create_file_aux_num("InjErrNPortID", 0644,
- phba->hba_debugfs_root,
- phba, InjErrNPortID, &lpfc_debugfs_op_dif_err);
-
- phba->debug_InjErrWWPN =
- debugfs_create_file_aux_num("InjErrWWPN", 0644,
- phba->hba_debugfs_root,
- phba, InjErrWWPN, &lpfc_debugfs_op_dif_err);
-
- phba->debug_writeGuard =
- debugfs_create_file_aux_num("writeGuardInjErr", 0644,
- phba->hba_debugfs_root,
- phba, writeGuard, &lpfc_debugfs_op_dif_err);
-
- phba->debug_writeApp =
- debugfs_create_file_aux_num("writeAppInjErr", 0644,
- phba->hba_debugfs_root,
- phba, writeApp, &lpfc_debugfs_op_dif_err);
-
- phba->debug_writeRef =
- debugfs_create_file_aux_num("writeRefInjErr", 0644,
- phba->hba_debugfs_root,
- phba, writeRef, &lpfc_debugfs_op_dif_err);
-
- phba->debug_readGuard =
- debugfs_create_file_aux_num("readGuardInjErr", 0644,
- phba->hba_debugfs_root,
- phba, readGuard, &lpfc_debugfs_op_dif_err);
-
- phba->debug_readApp =
- debugfs_create_file_aux_num("readAppInjErr", 0644,
- phba->hba_debugfs_root,
- phba, readApp, &lpfc_debugfs_op_dif_err);
-
- phba->debug_readRef =
- debugfs_create_file_aux_num("readRefInjErr", 0644,
- phba->hba_debugfs_root,
- phba, readRef, &lpfc_debugfs_op_dif_err);
+ debugfs_create_file_aux_num("InjErrNPortID", 0644,
+ phba->hba_debugfs_root, phba,
+ InjErrNPortID,
+ &lpfc_debugfs_op_dif_err);
+
+ debugfs_create_file_aux_num("InjErrWWPN", 0644,
+ phba->hba_debugfs_root, phba,
+ InjErrWWPN,
+ &lpfc_debugfs_op_dif_err);
+
+ debugfs_create_file_aux_num("writeGuardInjErr", 0644,
+ phba->hba_debugfs_root, phba,
+ writeGuard,
+ &lpfc_debugfs_op_dif_err);
+
+ debugfs_create_file_aux_num("writeAppInjErr", 0644,
+ phba->hba_debugfs_root, phba,
+ writeApp, &lpfc_debugfs_op_dif_err);
+
+ debugfs_create_file_aux_num("writeRefInjErr", 0644,
+ phba->hba_debugfs_root, phba,
+ writeRef, &lpfc_debugfs_op_dif_err);
+
+ debugfs_create_file_aux_num("readGuardInjErr", 0644,
+ phba->hba_debugfs_root, phba,
+ readGuard,
+ &lpfc_debugfs_op_dif_err);
+
+ debugfs_create_file_aux_num("readAppInjErr", 0644,
+ phba->hba_debugfs_root, phba,
+ readApp, &lpfc_debugfs_op_dif_err);
+
+ debugfs_create_file_aux_num("readRefInjErr", 0644,
+ phba->hba_debugfs_root, phba,
+ readRef, &lpfc_debugfs_op_dif_err);
/* Setup slow ring trace */
if (lpfc_debugfs_max_slow_ring_trc) {
@@ -6224,11 +6199,9 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
}
}
- snprintf(name, sizeof(name), "slow_ring_trace");
- phba->debug_slow_ring_trc =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- phba->hba_debugfs_root,
- phba, &lpfc_debugfs_op_slow_ring_trc);
+ debugfs_create_file("slow_ring_trace", 0644,
+ phba->hba_debugfs_root, phba,
+ &lpfc_debugfs_op_slow_ring_trc);
if (!phba->slow_ring_trc) {
phba->slow_ring_trc = kcalloc(
lpfc_debugfs_max_slow_ring_trc,
@@ -6238,16 +6211,13 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
"0416 Cannot create debugfs "
"slow_ring buffer\n");
- goto debug_failed;
+ goto out;
}
atomic_set(&phba->slow_ring_trc_cnt, 0);
}
- snprintf(name, sizeof(name), "nvmeio_trc");
- phba->debug_nvmeio_trc =
- debugfs_create_file(name, 0644,
- phba->hba_debugfs_root,
- phba, &lpfc_debugfs_op_nvmeio_trc);
+ debugfs_create_file("nvmeio_trc", 0644, phba->hba_debugfs_root,
+ phba, &lpfc_debugfs_op_nvmeio_trc);
atomic_set(&phba->nvmeio_trc_cnt, 0);
if (lpfc_debugfs_max_nvmeio_trc) {
@@ -6293,7 +6263,12 @@ nvmeio_off:
if (!vport->vport_debugfs_root) {
vport->vport_debugfs_root =
debugfs_create_dir(name, phba->hba_debugfs_root);
- atomic_inc(&phba->debugfs_vport_count);
+ if (IS_ERR(vport->vport_debugfs_root)) {
+ lpfc_vlog_msg(vport, KERN_WARNING, LOG_INIT,
+ "0529 Cannot create debugfs %s\n", name);
+ return;
+ }
+ phba->debugfs_vport_count++;
}
if (lpfc_debugfs_max_disc_trc) {
@@ -6320,54 +6295,27 @@ nvmeio_off:
lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
"0418 Cannot create debugfs disc trace "
"buffer\n");
- goto debug_failed;
+ goto out;
}
atomic_set(&vport->disc_trc_cnt, 0);
- snprintf(name, sizeof(name), "discovery_trace");
- vport->debug_disc_trc =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- vport->vport_debugfs_root,
- vport, &lpfc_debugfs_op_disc_trc);
- snprintf(name, sizeof(name), "nodelist");
- vport->debug_nodelist =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- vport->vport_debugfs_root,
- vport, &lpfc_debugfs_op_nodelist);
-
- snprintf(name, sizeof(name), "nvmestat");
- vport->debug_nvmestat =
- debugfs_create_file(name, 0644,
- vport->vport_debugfs_root,
- vport, &lpfc_debugfs_op_nvmestat);
-
- snprintf(name, sizeof(name), "scsistat");
- vport->debug_scsistat =
- debugfs_create_file(name, 0644,
- vport->vport_debugfs_root,
- vport, &lpfc_debugfs_op_scsistat);
- if (IS_ERR(vport->debug_scsistat)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "4611 Cannot create debugfs scsistat\n");
- goto debug_failed;
- }
+ debugfs_create_file("discovery_trace", 0644, vport->vport_debugfs_root,
+ vport, &lpfc_debugfs_op_disc_trc);
- snprintf(name, sizeof(name), "ioktime");
- vport->debug_ioktime =
- debugfs_create_file(name, 0644,
- vport->vport_debugfs_root,
- vport, &lpfc_debugfs_op_ioktime);
- if (IS_ERR(vport->debug_ioktime)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "0815 Cannot create debugfs ioktime\n");
- goto debug_failed;
- }
+ debugfs_create_file("nodelist", 0644, vport->vport_debugfs_root, vport,
+ &lpfc_debugfs_op_nodelist);
+
+ debugfs_create_file("nvmestat", 0644, vport->vport_debugfs_root, vport,
+ &lpfc_debugfs_op_nvmestat);
- snprintf(name, sizeof(name), "hdwqstat");
- vport->debug_hdwqstat =
- debugfs_create_file(name, 0644,
- vport->vport_debugfs_root,
- vport, &lpfc_debugfs_op_hdwqstat);
+ debugfs_create_file("scsistat", 0644, vport->vport_debugfs_root, vport,
+ &lpfc_debugfs_op_scsistat);
+
+ debugfs_create_file("ioktime", 0644, vport->vport_debugfs_root, vport,
+ &lpfc_debugfs_op_ioktime);
+
+ debugfs_create_file("hdwqstat", 0644, vport->vport_debugfs_root, vport,
+ &lpfc_debugfs_op_hdwqstat);
/*
* The following section is for additional directories/files for the
@@ -6375,93 +6323,58 @@ nvmeio_off:
*/
if (!pport_setup)
- goto debug_failed;
+ return;
/*
* iDiag debugfs root entry points for SLI4 device only
*/
if (phba->sli_rev < LPFC_SLI_REV4)
- goto debug_failed;
+ return;
- snprintf(name, sizeof(name), "iDiag");
if (!phba->idiag_root) {
phba->idiag_root =
- debugfs_create_dir(name, phba->hba_debugfs_root);
+ debugfs_create_dir("iDiag", phba->hba_debugfs_root);
/* Initialize iDiag data structure */
memset(&idiag, 0, sizeof(idiag));
}
/* iDiag read PCI config space */
- snprintf(name, sizeof(name), "pciCfg");
- if (!phba->idiag_pci_cfg) {
- phba->idiag_pci_cfg =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- phba->idiag_root, phba, &lpfc_idiag_op_pciCfg);
- idiag.offset.last_rd = 0;
- }
+ debugfs_create_file("pciCfg", 0644, phba->idiag_root, phba,
+ &lpfc_idiag_op_pciCfg);
+ idiag.offset.last_rd = 0;
/* iDiag PCI BAR access */
- snprintf(name, sizeof(name), "barAcc");
- if (!phba->idiag_bar_acc) {
- phba->idiag_bar_acc =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- phba->idiag_root, phba, &lpfc_idiag_op_barAcc);
- idiag.offset.last_rd = 0;
- }
+ debugfs_create_file("barAcc", 0644, phba->idiag_root, phba,
+ &lpfc_idiag_op_barAcc);
+ idiag.offset.last_rd = 0;
/* iDiag get PCI function queue information */
- snprintf(name, sizeof(name), "queInfo");
- if (!phba->idiag_que_info) {
- phba->idiag_que_info =
- debugfs_create_file(name, S_IFREG|S_IRUGO,
- phba->idiag_root, phba, &lpfc_idiag_op_queInfo);
- }
+ debugfs_create_file("queInfo", 0444, phba->idiag_root, phba,
+ &lpfc_idiag_op_queInfo);
/* iDiag access PCI function queue */
- snprintf(name, sizeof(name), "queAcc");
- if (!phba->idiag_que_acc) {
- phba->idiag_que_acc =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- phba->idiag_root, phba, &lpfc_idiag_op_queAcc);
- }
+ debugfs_create_file("queAcc", 0644, phba->idiag_root, phba,
+ &lpfc_idiag_op_queAcc);
/* iDiag access PCI function doorbell registers */
- snprintf(name, sizeof(name), "drbAcc");
- if (!phba->idiag_drb_acc) {
- phba->idiag_drb_acc =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- phba->idiag_root, phba, &lpfc_idiag_op_drbAcc);
- }
+ debugfs_create_file("drbAcc", 0644, phba->idiag_root, phba,
+ &lpfc_idiag_op_drbAcc);
/* iDiag access PCI function control registers */
- snprintf(name, sizeof(name), "ctlAcc");
- if (!phba->idiag_ctl_acc) {
- phba->idiag_ctl_acc =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- phba->idiag_root, phba, &lpfc_idiag_op_ctlAcc);
- }
+ debugfs_create_file("ctlAcc", 0644, phba->idiag_root, phba,
+ &lpfc_idiag_op_ctlAcc);
/* iDiag access mbox commands */
- snprintf(name, sizeof(name), "mbxAcc");
- if (!phba->idiag_mbx_acc) {
- phba->idiag_mbx_acc =
- debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
- phba->idiag_root, phba, &lpfc_idiag_op_mbxAcc);
- }
+ debugfs_create_file("mbxAcc", 0644, phba->idiag_root, phba,
+ &lpfc_idiag_op_mbxAcc);
/* iDiag extents access commands */
if (phba->sli4_hba.extents_in_use) {
- snprintf(name, sizeof(name), "extAcc");
- if (!phba->idiag_ext_acc) {
- phba->idiag_ext_acc =
- debugfs_create_file(name,
- S_IFREG|S_IRUGO|S_IWUSR,
- phba->idiag_root, phba,
- &lpfc_idiag_op_extAcc);
- }
+ debugfs_create_file("extAcc", 0644, phba->idiag_root, phba,
+ &lpfc_idiag_op_extAcc);
}
-
-debug_failed:
+out:
+ /* alloc'ed items are kfree'd in lpfc_debugfs_terminate */
return;
#endif
}
@@ -6486,145 +6399,26 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
kfree(vport->disc_trc);
vport->disc_trc = NULL;
- debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
- vport->debug_disc_trc = NULL;
-
- debugfs_remove(vport->debug_nodelist); /* nodelist */
- vport->debug_nodelist = NULL;
-
- debugfs_remove(vport->debug_nvmestat); /* nvmestat */
- vport->debug_nvmestat = NULL;
-
- debugfs_remove(vport->debug_scsistat); /* scsistat */
- vport->debug_scsistat = NULL;
-
- debugfs_remove(vport->debug_ioktime); /* ioktime */
- vport->debug_ioktime = NULL;
-
- debugfs_remove(vport->debug_hdwqstat); /* hdwqstat */
- vport->debug_hdwqstat = NULL;
-
if (vport->vport_debugfs_root) {
debugfs_remove(vport->vport_debugfs_root); /* vportX */
vport->vport_debugfs_root = NULL;
- atomic_dec(&phba->debugfs_vport_count);
+ phba->debugfs_vport_count--;
}
- if (atomic_read(&phba->debugfs_vport_count) == 0) {
-
- debugfs_remove(phba->debug_multixri_pools); /* multixripools*/
- phba->debug_multixri_pools = NULL;
-
- debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
- phba->debug_hbqinfo = NULL;
-
- debugfs_remove(phba->debug_cgn_buffer);
- phba->debug_cgn_buffer = NULL;
-
- debugfs_remove(phba->debug_rx_monitor);
- phba->debug_rx_monitor = NULL;
-
- debugfs_remove(phba->debug_ras_log);
- phba->debug_ras_log = NULL;
-
-#ifdef LPFC_HDWQ_LOCK_STAT
- debugfs_remove(phba->debug_lockstat); /* lockstat */
- phba->debug_lockstat = NULL;
-#endif
- debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
- phba->debug_dumpHBASlim = NULL;
-
- debugfs_remove(phba->debug_dumpHostSlim); /* HostSlim */
- phba->debug_dumpHostSlim = NULL;
-
- debugfs_remove(phba->debug_InjErrLBA); /* InjErrLBA */
- phba->debug_InjErrLBA = NULL;
-
- debugfs_remove(phba->debug_InjErrNPortID);
- phba->debug_InjErrNPortID = NULL;
-
- debugfs_remove(phba->debug_InjErrWWPN); /* InjErrWWPN */
- phba->debug_InjErrWWPN = NULL;
-
- debugfs_remove(phba->debug_writeGuard); /* writeGuard */
- phba->debug_writeGuard = NULL;
-
- debugfs_remove(phba->debug_writeApp); /* writeApp */
- phba->debug_writeApp = NULL;
-
- debugfs_remove(phba->debug_writeRef); /* writeRef */
- phba->debug_writeRef = NULL;
-
- debugfs_remove(phba->debug_readGuard); /* readGuard */
- phba->debug_readGuard = NULL;
-
- debugfs_remove(phba->debug_readApp); /* readApp */
- phba->debug_readApp = NULL;
-
- debugfs_remove(phba->debug_readRef); /* readRef */
- phba->debug_readRef = NULL;
-
+ if (!phba->debugfs_vport_count) {
kfree(phba->slow_ring_trc);
phba->slow_ring_trc = NULL;
- /* slow_ring_trace */
- debugfs_remove(phba->debug_slow_ring_trc);
- phba->debug_slow_ring_trc = NULL;
-
- debugfs_remove(phba->debug_nvmeio_trc);
- phba->debug_nvmeio_trc = NULL;
-
kfree(phba->nvmeio_trc);
phba->nvmeio_trc = NULL;
- /*
- * iDiag release
- */
- if (phba->sli_rev == LPFC_SLI_REV4) {
- /* iDiag extAcc */
- debugfs_remove(phba->idiag_ext_acc);
- phba->idiag_ext_acc = NULL;
-
- /* iDiag mbxAcc */
- debugfs_remove(phba->idiag_mbx_acc);
- phba->idiag_mbx_acc = NULL;
-
- /* iDiag ctlAcc */
- debugfs_remove(phba->idiag_ctl_acc);
- phba->idiag_ctl_acc = NULL;
-
- /* iDiag drbAcc */
- debugfs_remove(phba->idiag_drb_acc);
- phba->idiag_drb_acc = NULL;
-
- /* iDiag queAcc */
- debugfs_remove(phba->idiag_que_acc);
- phba->idiag_que_acc = NULL;
-
- /* iDiag queInfo */
- debugfs_remove(phba->idiag_que_info);
- phba->idiag_que_info = NULL;
-
- /* iDiag barAcc */
- debugfs_remove(phba->idiag_bar_acc);
- phba->idiag_bar_acc = NULL;
-
- /* iDiag pciCfg */
- debugfs_remove(phba->idiag_pci_cfg);
- phba->idiag_pci_cfg = NULL;
-
- /* Finally remove the iDiag debugfs root */
- debugfs_remove(phba->idiag_root);
- phba->idiag_root = NULL;
- }
-
if (phba->hba_debugfs_root) {
debugfs_remove(phba->hba_debugfs_root); /* fnX */
phba->hba_debugfs_root = NULL;
- atomic_dec(&lpfc_debugfs_hba_count);
+ lpfc_debugfs_hba_count--;
}
- if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
+ if (!lpfc_debugfs_hba_count) {
debugfs_remove(lpfc_debugfs_root); /* lpfc */
lpfc_debugfs_root = NULL;
}
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index f319f3af0400..a1464f8ac331 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2022 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2007-2011 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -44,6 +44,9 @@
/* hbqinfo output buffer size */
#define LPFC_HBQINFO_SIZE 8192
+/* hdwqinfo output buffer size */
+#define LPFC_HDWQINFO_SIZE 8192
+
/* nvmestat output buffer size */
#define LPFC_NVMESTAT_SIZE 8192
#define LPFC_IOKTIME_SIZE 8192
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index fca81e0c7c2e..b71db7d7d747 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -3762,7 +3762,7 @@ lpfc_issue_els_rdf(struct lpfc_vport *vport, uint8_t retry)
memset(prdf, 0, cmdsize);
prdf->rdf.fpin_cmd = ELS_RDF;
prdf->rdf.desc_len = cpu_to_be32(sizeof(struct lpfc_els_rdf_req) -
- sizeof(struct fc_els_rdf));
+ sizeof(struct fc_els_rdf_hdr));
prdf->reg_d1.reg_desc.desc_tag = cpu_to_be32(ELS_DTAG_FPIN_REGISTER);
prdf->reg_d1.reg_desc.desc_len = cpu_to_be32(
FC_TLV_DESC_LENGTH_FROM_SZ(prdf->reg_d1));
@@ -5339,12 +5339,12 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
ulp_status, ulp_word4, did);
/* ELS response tag <ulpIoTag> completes */
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
- "0110 ELS response tag x%x completes "
+ "0110 ELS response tag x%x completes fc_flag x%lx"
"Data: x%x x%x x%x x%x x%lx x%x x%x x%x %p %p\n",
- iotag, ulp_status, ulp_word4, tmo,
+ iotag, vport->fc_flag, ulp_status, ulp_word4, tmo,
ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
ndlp->nlp_rpi, kref_read(&ndlp->kref), mbox, ndlp);
- if (mbox) {
+ if (mbox && !test_bit(FC_PT2PT, &vport->fc_flag)) {
if (ulp_status == 0 &&
test_bit(NLP_ACC_REGLOGIN, &ndlp->nlp_flag)) {
if (!lpfc_unreg_rpi(vport, ndlp) &&
@@ -5403,6 +5403,10 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
out_free_mbox:
lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED);
+ } else if (mbox && test_bit(FC_PT2PT, &vport->fc_flag) &&
+ test_bit(NLP_ACC_REGLOGIN, &ndlp->nlp_flag)) {
+ lpfc_mbx_cmpl_reg_login(phba, mbox);
+ clear_bit(NLP_ACC_REGLOGIN, &ndlp->nlp_flag);
}
out:
if (ndlp && shost) {
@@ -11259,6 +11263,11 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
"0126 FDISC cmpl status: x%x/x%x)\n",
ulp_status, ulp_word4);
+
+ /* drop initial reference */
+ if (!test_and_set_bit(NLP_DROPPED, &ndlp->nlp_flag))
+ lpfc_nlp_put(ndlp);
+
goto fdisc_failed;
}
@@ -12008,7 +12017,11 @@ lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
sglq_entry->state = SGL_FREED;
spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock,
iflag);
-
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS | LOG_SLI |
+ LOG_DISCOVERY | LOG_NODE,
+ "0732 ELS XRI ABORT on Node: ndlp=x%px "
+ "xri=x%x\n",
+ ndlp, xri);
if (ndlp) {
lpfc_set_rrq_active(phba, ndlp,
sglq_entry->sli4_lxritag,
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 32298285ea5e..3bc0efa7453e 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -366,6 +366,7 @@ struct lpfc_name {
} s;
uint8_t wwn[8];
uint64_t name __packed __aligned(4);
+ __be64 wwn_be __packed __aligned(4);
} u;
};
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index bc709786e6af..a7f7ed86d2b0 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -4909,18 +4909,18 @@ struct send_frame_wqe {
#define ELS_RDF_REG_TAG_CNT 4
struct lpfc_els_rdf_reg_desc {
- struct fc_df_desc_fpin_reg reg_desc; /* descriptor header */
+ struct fc_df_desc_fpin_reg_hdr reg_desc; /* descriptor header */
__be32 desc_tags[ELS_RDF_REG_TAG_CNT];
/* tags in reg_desc */
};
struct lpfc_els_rdf_req {
- struct fc_els_rdf rdf; /* hdr up to descriptors */
+ struct fc_els_rdf_hdr rdf; /* hdr up to descriptors */
struct lpfc_els_rdf_reg_desc reg_d1; /* 1st descriptor */
};
struct lpfc_els_rdf_rsp {
- struct fc_els_rdf_resp rdf_resp; /* hdr up to descriptors */
+ struct fc_els_rdf_resp_hdr rdf_resp; /* hdr up to descriptors */
struct lpfc_els_rdf_reg_desc reg_d1; /* 1st descriptor */
};
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4081d2a358ee..0ca7429d86b8 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3057,13 +3057,6 @@ lpfc_cleanup(struct lpfc_vport *vport)
lpfc_vmid_vport_cleanup(vport);
list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
- if (vport->port_type != LPFC_PHYSICAL_PORT &&
- ndlp->nlp_DID == Fabric_DID) {
- /* Just free up ndlp with Fabric_DID for vports */
- lpfc_nlp_put(ndlp);
- continue;
- }
-
if (ndlp->nlp_DID == Fabric_Cntl_DID &&
ndlp->nlp_state == NLP_STE_UNUSED_NODE) {
lpfc_nlp_put(ndlp);
@@ -8300,10 +8293,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt,
phba->cfg_nvme_seg_cnt);
- if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
- i = phba->cfg_sg_dma_buf_size;
- else
- i = SLI4_PAGE_SIZE;
+ i = min(phba->cfg_sg_dma_buf_size, SLI4_PAGE_SIZE);
phba->lpfc_sg_dma_buf_pool =
dma_pool_create("lpfc_sg_dma_buf_pool",
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index a596b80d03d4..1e5ef93e67e3 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -326,8 +326,14 @@ lpfc_defer_plogi_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *login_mbox)
/* Now that REG_RPI completed successfully,
* we can now proceed with sending the PLOGI ACC.
*/
- rc = lpfc_els_rsp_acc(login_mbox->vport, ELS_CMD_PLOGI,
- save_iocb, ndlp, NULL);
+ if (test_bit(FC_PT2PT, &ndlp->vport->fc_flag)) {
+ rc = lpfc_els_rsp_acc(login_mbox->vport, ELS_CMD_PLOGI,
+ save_iocb, ndlp, login_mbox);
+ } else {
+ rc = lpfc_els_rsp_acc(login_mbox->vport, ELS_CMD_PLOGI,
+ save_iocb, ndlp, NULL);
+ }
+
if (rc) {
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"4576 PLOGI ACC fails pt2pt discovery: "
@@ -335,9 +341,16 @@ lpfc_defer_plogi_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *login_mbox)
}
}
- /* Now process the REG_RPI cmpl */
- lpfc_mbx_cmpl_reg_login(phba, login_mbox);
- clear_bit(NLP_ACC_REGLOGIN, &ndlp->nlp_flag);
+ /* If this is a fabric topology, complete the reg_rpi and prli now.
+ * For Pt2Pt, the reg_rpi and PRLI are deferred until after the LS_ACC
+ * completes. This ensures, in Pt2Pt, that the PLOGI LS_ACC is sent
+ * before the PRLI.
+ */
+ if (!test_bit(FC_PT2PT, &ndlp->vport->fc_flag)) {
+ /* Now process the REG_RPI cmpl */
+ lpfc_mbx_cmpl_reg_login(phba, login_mbox);
+ clear_bit(NLP_ACC_REGLOGIN, &ndlp->nlp_flag);
+ }
kfree(save_iocb);
}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index a6647dd360d1..e6f632521cff 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1234,12 +1234,8 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
if ((phba->cfg_nvme_enable_fb) &&
test_bit(NLP_FIRSTBURST, &pnode->nlp_flag)) {
req_len = lpfc_ncmd->nvmeCmd->payload_length;
- if (req_len < pnode->nvme_fb_size)
- wqe->fcp_iwrite.initial_xfer_len =
- req_len;
- else
- wqe->fcp_iwrite.initial_xfer_len =
- pnode->nvme_fb_size;
+ wqe->fcp_iwrite.initial_xfer_len = min(req_len,
+ pnode->nvme_fb_size);
} else {
wqe->fcp_iwrite.initial_xfer_len = 0;
}
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 508ceeecf2d9..6d9d8c196936 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5935,7 +5935,7 @@ lpfc_chk_tgt_mapped(struct lpfc_vport *vport, struct fc_rport *rport)
/**
* lpfc_reset_flush_io_context -
* @vport: The virtual port (scsi_host) for the flush context
- * @tgt_id: If aborting by Target contect - specifies the target id
+ * @tgt_id: If aborting by Target context - specifies the target id
* @lun_id: If aborting by Lun context - specifies the lun id
* @context: specifies the context level to flush at.
*
@@ -6109,8 +6109,14 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
pnode->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
spin_unlock_irqrestore(&pnode->lock, flags);
}
- lpfc_reset_flush_io_context(vport, tgt_id, lun_id,
- LPFC_CTX_TGT);
+ status = lpfc_reset_flush_io_context(vport, tgt_id, lun_id,
+ LPFC_CTX_TGT);
+ if (status != SUCCESS) {
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
+ "0726 Target Reset flush status x%x\n",
+ status);
+ return status;
+ }
return FAST_IO_FAIL;
}
@@ -6202,7 +6208,7 @@ lpfc_host_reset_handler(struct scsi_cmnd *cmnd)
int rc, ret = SUCCESS;
lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
- "3172 SCSI layer issued Host Reset Data:\n");
+ "3172 SCSI layer issued Host Reset\n");
lpfc_offline_prep(phba, LPFC_MBX_WAIT);
lpfc_offline(phba);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index a8fbdf7119d8..7ea7c4245c69 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -8820,7 +8820,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
if (unlikely(rc)) {
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"0381 Error %d during queue setup.\n", rc);
- goto out_stop_timers;
+ goto out_destroy_queue;
}
/* Initialize the driver internal SLI layer lists. */
lpfc_sli4_setup(phba);
@@ -9103,7 +9103,6 @@ out_free_iocblist:
lpfc_free_iocb_list(phba);
out_destroy_queue:
lpfc_sli4_queue_destroy(phba);
-out_stop_timers:
lpfc_stop_hba_timers(phba);
out_free_mbox:
mempool_free(mboxq, phba->mbox_mem_pool);
@@ -12439,19 +12438,11 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
}
/*
- * If we're unloading, don't abort iocb on the ELS ring, but change
- * the callback so that nothing happens when it finishes.
+ * Always abort the outstanding WQE and set the IA bit correctly
+ * for the context. This is necessary for correctly removing
+ * outstanding ndlp reference counts when the CQE completes with
+ * the XB bit set.
*/
- if (test_bit(FC_UNLOADING, &vport->load_flag) &&
- pring->ringno == LPFC_ELS_RING) {
- if (cmdiocb->cmd_flag & LPFC_IO_FABRIC)
- cmdiocb->fabric_cmd_cmpl = lpfc_ignore_els_cmpl;
- else
- cmdiocb->cmd_cmpl = lpfc_ignore_els_cmpl;
- return retval;
- }
-
- /* issue ABTS for this IOCB based on iotag */
abtsiocbp = __lpfc_sli_get_iocbq(phba);
if (abtsiocbp == NULL)
return IOCB_NORESOURCE;
@@ -21373,7 +21364,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
struct lpfc_sglq *sglq;
struct lpfc_sli_ring *pring;
unsigned long iflags;
- uint32_t ret = 0;
+ int ret = 0;
/* NVME_LS and NVME_LS ABTS requests. */
if (pwqe->cmd_flag & LPFC_IO_NVME_LS) {
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 9ee3a3a4ec4d..31c3c5abdca6 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "14.4.0.10"
+#define LPFC_DRIVER_VERSION "14.4.0.11"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h
index 96401eb7e231..8c8bfbbdd34e 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h
@@ -322,6 +322,9 @@ struct mpi3_man6_gpio_entry {
#define MPI3_MAN6_GPIO_EXTINT_PARAM1_FLAGS_TRIGGER_MASK (0x01)
#define MPI3_MAN6_GPIO_EXTINT_PARAM1_FLAGS_TRIGGER_EDGE (0x00)
#define MPI3_MAN6_GPIO_EXTINT_PARAM1_FLAGS_TRIGGER_LEVEL (0x01)
+#define MPI3_MAN6_GPIO_OVER_TEMP_PARAM1_LEVEL_WARNING (0x00)
+#define MPI3_MAN6_GPIO_OVER_TEMP_PARAM1_LEVEL_CRITICAL (0x01)
+#define MPI3_MAN6_GPIO_OVER_TEMP_PARAM1_LEVEL_FATAL (0x02)
#define MPI3_MAN6_GPIO_PORT_GREEN_PARAM1_PHY_STATUS_ALL_UP (0x00)
#define MPI3_MAN6_GPIO_PORT_GREEN_PARAM1_PHY_STATUS_ONE_OR_MORE_UP (0x01)
#define MPI3_MAN6_GPIO_CABLE_MGMT_PARAM1_INTERFACE_MODULE_PRESENT (0x00)
@@ -1250,6 +1253,37 @@ struct mpi3_io_unit_page17 {
__le32 current_key[];
};
#define MPI3_IOUNIT17_PAGEVERSION (0x00)
+struct mpi3_io_unit_page18 {
+ struct mpi3_config_page_header header;
+ u8 flags;
+ u8 poll_interval;
+ __le16 reserved0a;
+ __le32 reserved0c;
+};
+
+#define MPI3_IOUNIT18_PAGEVERSION (0x00)
+#define MPI3_IOUNIT18_FLAGS_DIRECTATTACHED_ENABLE (0x01)
+#define MPI3_IOUNIT18_POLLINTERVAL_DISABLE (0x00)
+#ifndef MPI3_IOUNIT19_DEVICE_MAX
+#define MPI3_IOUNIT19_DEVICE_MAX (1)
+#endif
+struct mpi3_iounit19_device {
+ __le16 temperature;
+ __le16 dev_handle;
+ __le16 persistent_id;
+ __le16 reserved06;
+};
+
+#define MPI3_IOUNIT19_DEVICE_TEMPERATURE_UNAVAILABLE (0x8000)
+struct mpi3_io_unit_page19 {
+ struct mpi3_config_page_header header;
+ __le16 num_devices;
+ __le16 reserved0a;
+ __le32 reserved0c;
+ struct mpi3_iounit19_device device[MPI3_IOUNIT19_DEVICE_MAX];
+};
+
+#define MPI3_IOUNIT19_PAGEVERSION (0x00)
struct mpi3_ioc_page0 {
struct mpi3_config_page_header header;
__le32 reserved08;
@@ -2356,7 +2390,9 @@ struct mpi3_device0_vd_format {
__le16 io_throttle_group;
__le16 io_throttle_group_low;
__le16 io_throttle_group_high;
- __le32 reserved0c;
+ u8 vd_abort_to;
+ u8 vd_reset_to;
+ __le16 reserved0e;
};
#define MPI3_DEVICE0_VD_STATE_OFFLINE (0x00)
#define MPI3_DEVICE0_VD_STATE_PARTIALLY_DEGRADED (0x01)
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_pci.h b/drivers/scsi/mpi3mr/mpi/mpi30_pci.h
index 7c15e5851ce4..4eeb11c3c73e 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_pci.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_pci.h
@@ -9,9 +9,11 @@
#define MPI3_NVME_ENCAP_CMD_MAX (1)
#endif
#define MPI3_NVME_FLAGS_FORCE_ADMIN_ERR_REPLY_MASK (0x0002)
+#define MPI3_NVME_FLAGS_FORCE_ADMIN_ERR_REPLY_SHIFT (1)
#define MPI3_NVME_FLAGS_FORCE_ADMIN_ERR_REPLY_FAIL_ONLY (0x0000)
#define MPI3_NVME_FLAGS_FORCE_ADMIN_ERR_REPLY_ALL (0x0002)
#define MPI3_NVME_FLAGS_SUBMISSIONQ_MASK (0x0001)
+#define MPI3_NVME_FLAGS_SUBMISSIONQ_SHIFT (0)
#define MPI3_NVME_FLAGS_SUBMISSIONQ_IO (0x0000)
#define MPI3_NVME_FLAGS_SUBMISSIONQ_ADMIN (0x0001)
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_sas.h b/drivers/scsi/mpi3mr/mpi/mpi30_sas.h
index 4a93c67d335f..190b06508b00 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_sas.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_sas.h
@@ -11,6 +11,7 @@
#define MPI3_SAS_DEVICE_INFO_STP_INITIATOR (0x00000010)
#define MPI3_SAS_DEVICE_INFO_SMP_INITIATOR (0x00000008)
#define MPI3_SAS_DEVICE_INFO_DEVICE_TYPE_MASK (0x00000007)
+#define MPI3_SAS_DEVICE_INFO_DEVICE_TYPE_SHIFT (0)
#define MPI3_SAS_DEVICE_INFO_DEVICE_TYPE_NO_DEVICE (0x00000000)
#define MPI3_SAS_DEVICE_INFO_DEVICE_TYPE_END_DEVICE (0x00000001)
#define MPI3_SAS_DEVICE_INFO_DEVICE_TYPE_EXPANDER (0x00000002)
diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h
index 5c522e2531c3..28ab2efb3baa 100644
--- a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h
+++ b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h
@@ -18,7 +18,7 @@ union mpi3_version_union {
#define MPI3_VERSION_MAJOR (3)
#define MPI3_VERSION_MINOR (0)
-#define MPI3_VERSION_UNIT (35)
+#define MPI3_VERSION_UNIT (37)
#define MPI3_VERSION_DEV (0)
#define MPI3_DEVHANDLE_INVALID (0xffff)
struct mpi3_sysif_oper_queue_indexes {
diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 8d4ef49e04d1..6742684e2990 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -56,8 +56,8 @@ extern struct list_head mrioc_list;
extern int prot_mask;
extern atomic64_t event_counter;
-#define MPI3MR_DRIVER_VERSION "8.14.0.5.50"
-#define MPI3MR_DRIVER_RELDATE "27-June-2025"
+#define MPI3MR_DRIVER_VERSION "8.15.0.5.50"
+#define MPI3MR_DRIVER_RELDATE "12-August-2025"
#define MPI3MR_DRIVER_NAME "mpi3mr"
#define MPI3MR_DRIVER_LICENSE "GPL"
@@ -697,6 +697,8 @@ struct tgt_dev_vd {
u16 tg_id;
u32 tg_high;
u32 tg_low;
+ u8 abort_to;
+ u8 reset_to;
struct mpi3mr_throttle_group_info *tg;
};
@@ -738,6 +740,8 @@ enum mpi3mr_dev_state {
* @wwid: World wide ID
* @enclosure_logical_id: Enclosure logical identifier
* @dev_spec: Device type specific information
+ * @abort_to: Timeout for abort TM
+ * @reset_to: Timeout for Target/LUN reset TM
* @ref_count: Reference count
* @state: device state
*/
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 0152d31d430a..8fe6e0bf342e 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -2353,6 +2353,8 @@ static int mpi3mr_create_op_queues(struct mpi3mr_ioc *mrioc)
{
int retval = 0;
u16 num_queues = 0, i = 0, msix_count_op_q = 1;
+ u32 ioc_status;
+ enum mpi3mr_iocstate ioc_state;
num_queues = min_t(int, mrioc->facts.max_op_reply_q,
mrioc->facts.max_op_req_q);
@@ -2408,6 +2410,14 @@ static int mpi3mr_create_op_queues(struct mpi3mr_ioc *mrioc)
retval = -1;
goto out_failed;
}
+ ioc_status = readl(&mrioc->sysif_regs->ioc_status);
+ ioc_state = mpi3mr_get_iocstate(mrioc);
+ if ((ioc_status & MPI3_SYSIF_IOC_STATUS_RESET_HISTORY) ||
+ ioc_state != MRIOC_STATE_READY) {
+ mpi3mr_print_fault_info(mrioc);
+ retval = -1;
+ goto out_failed;
+ }
mrioc->num_op_reply_q = mrioc->num_op_req_q = i;
ioc_info(mrioc,
"successfully created %d operational queue pairs(default/polled) queue = (%d/%d)\n",
@@ -5420,6 +5430,7 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
mpi3mr_reset_rc_name(reset_reason));
mrioc->device_refresh_on = 0;
+ scsi_block_requests(mrioc->shost);
mrioc->reset_in_progress = 1;
mrioc->stop_bsgs = 1;
mrioc->prev_reset_result = -1;
@@ -5528,6 +5539,7 @@ out:
if (!retval) {
mrioc->diagsave_timeout = 0;
mrioc->reset_in_progress = 0;
+ scsi_unblock_requests(mrioc->shost);
mrioc->pel_abort_requested = 0;
if (mrioc->pel_enabled) {
mrioc->pel_cmds.retry_count = 0;
@@ -5552,6 +5564,7 @@ out:
mrioc->device_refresh_on = 0;
mrioc->unrecoverable = 1;
mrioc->reset_in_progress = 0;
+ scsi_unblock_requests(mrioc->shost);
mrioc->stop_bsgs = 0;
retval = -1;
mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index 3df52a3b435b..b88633e1efe2 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -1308,6 +1308,12 @@ static void mpi3mr_update_tgtdev(struct mpi3mr_ioc *mrioc,
if (vdinf->vd_state == MPI3_DEVICE0_VD_STATE_OFFLINE)
tgtdev->is_hidden = 1;
tgtdev->non_stl = 1;
+ tgtdev->dev_spec.vd_inf.reset_to =
+ max_t(u8, vdinf->vd_reset_to,
+ MPI3MR_INTADMCMD_TIMEOUT);
+ tgtdev->dev_spec.vd_inf.abort_to =
+ max_t(u8, vdinf->vd_abort_to,
+ MPI3MR_INTADMCMD_TIMEOUT);
tgtdev->dev_spec.vd_inf.tg_id = vdinf_io_throttle_group;
tgtdev->dev_spec.vd_inf.tg_high =
le16_to_cpu(vdinf->io_throttle_group_high) * 2048;
@@ -2049,8 +2055,8 @@ static void mpi3mr_fwevt_bh(struct mpi3mr_ioc *mrioc,
if (!fwevt->process_evt)
goto evt_ack;
- dprint_event_bh(mrioc, "processing event(0x%02x) in the bottom half handler\n",
- fwevt->event_id);
+ dprint_event_bh(mrioc, "processing event(0x%02x) -(0x%08x) in the bottom half handler\n",
+ fwevt->event_id, fwevt->evt_ctx);
switch (fwevt->event_id) {
case MPI3_EVENT_DEVICE_ADDED:
@@ -2866,12 +2872,14 @@ static void mpi3mr_preparereset_evt_th(struct mpi3mr_ioc *mrioc,
"prepare for reset event top half with rc=start\n");
if (mrioc->prepare_for_reset)
return;
+ scsi_block_requests(mrioc->shost);
mrioc->prepare_for_reset = 1;
mrioc->prepare_for_reset_timeout_counter = 0;
} else if (evtdata->reason_code == MPI3_EVENT_PREPARE_RESET_RC_ABORT) {
dprint_event_th(mrioc,
"prepare for reset top half with rc=abort\n");
mrioc->prepare_for_reset = 0;
+ scsi_unblock_requests(mrioc->shost);
mrioc->prepare_for_reset_timeout_counter = 0;
}
if ((event_reply->msg_flags & MPI3_EVENT_NOTIFY_MSGFLAGS_ACK_MASK)
@@ -3076,8 +3084,8 @@ void mpi3mr_os_handle_events(struct mpi3mr_ioc *mrioc,
}
if (process_evt_bh || ack_req) {
dprint_event_th(mrioc,
- "scheduling bottom half handler for event(0x%02x),ack_required=%d\n",
- evt_type, ack_req);
+ "scheduling bottom half handler for event(0x%02x) - (0x%08x), ack_required=%d\n",
+ evt_type, le32_to_cpu(event_reply->event_context), ack_req);
sz = event_reply->event_data_length * 4;
fwevt = mpi3mr_alloc_fwevt(sz);
if (!fwevt) {
@@ -3915,11 +3923,13 @@ int mpi3mr_issue_tm(struct mpi3mr_ioc *mrioc, u8 tm_type,
if (scsi_tgt_priv_data)
atomic_inc(&scsi_tgt_priv_data->block_io);
- if (tgtdev && (tgtdev->dev_type == MPI3_DEVICE_DEVFORM_PCIE)) {
- if (cmd_priv && tgtdev->dev_spec.pcie_inf.abort_to)
- timeout = tgtdev->dev_spec.pcie_inf.abort_to;
- else if (!cmd_priv && tgtdev->dev_spec.pcie_inf.reset_to)
- timeout = tgtdev->dev_spec.pcie_inf.reset_to;
+ if (tgtdev) {
+ if (tgtdev->dev_type == MPI3_DEVICE_DEVFORM_PCIE)
+ timeout = cmd_priv ? tgtdev->dev_spec.pcie_inf.abort_to
+ : tgtdev->dev_spec.pcie_inf.reset_to;
+ else if (tgtdev->dev_type == MPI3_DEVICE_DEVFORM_VD)
+ timeout = cmd_priv ? tgtdev->dev_spec.vd_inf.abort_to
+ : tgtdev->dev_spec.vd_inf.reset_to;
}
init_completion(&drv_cmd->done);
diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c
index c8d6ced5640e..d70f002d6487 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_transport.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c
@@ -413,9 +413,11 @@ static void mpi3mr_remove_device_by_sas_address(struct mpi3mr_ioc *mrioc,
sas_address, hba_port);
if (tgtdev) {
if (!list_empty(&tgtdev->list)) {
- list_del_init(&tgtdev->list);
was_on_tgtdev_list = 1;
- mpi3mr_tgtdev_put(tgtdev);
+ if (tgtdev->state == MPI3MR_DEV_REMOVE_HS_STARTED) {
+ list_del_init(&tgtdev->list);
+ mpi3mr_tgtdev_put(tgtdev);
+ }
}
}
spin_unlock_irqrestore(&mrioc->tgtdev_lock, flags);
@@ -2079,6 +2081,8 @@ int mpi3mr_expander_add(struct mpi3mr_ioc *mrioc, u16 handle)
link_rate = (expander_pg1.negotiated_link_rate &
MPI3_SAS_NEG_LINK_RATE_LOGICAL_MASK) >>
MPI3_SAS_NEG_LINK_RATE_LOGICAL_SHIFT;
+ if (link_rate < MPI3_SAS_NEG_LINK_RATE_1_5)
+ link_rate = MPI3_SAS_NEG_LINK_RATE_1_5;
mpi3mr_update_links(mrioc, sas_address_parent,
handle, i, link_rate, hba_port);
}
@@ -2388,6 +2392,9 @@ int mpi3mr_report_tgtdev_to_sas_transport(struct mpi3mr_ioc *mrioc,
link_rate = mpi3mr_get_sas_negotiated_logical_linkrate(mrioc, tgtdev);
+ if (link_rate < MPI3_SAS_NEG_LINK_RATE_1_5)
+ link_rate = MPI3_SAS_NEG_LINK_RATE_1_5;
+
mpi3mr_update_links(mrioc, sas_address_parent, tgtdev->dev_handle,
parent_phy_number, link_rate, hba_port);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index bd3efa5b46c7..0d652db8fe24 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -1420,7 +1420,13 @@ _base_display_reply_info(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
loginfo = le32_to_cpu(mpi_reply->IOCLogInfo);
- _base_sas_log_info(ioc, loginfo);
+ if (ioc->logging_level & MPT_DEBUG_REPLY)
+ _base_sas_log_info(ioc, loginfo);
+ else {
+ if (!((ioc_status & MPI2_IOCSTATUS_MASK) &
+ MPI2_IOCSTATUS_CONFIG_INVALID_PAGE))
+ _base_sas_log_info(ioc, loginfo);
+ }
}
if (ioc_status || loginfo) {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 939141cde3ca..e6a6f21d309b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -77,8 +77,8 @@
#define MPT3SAS_DRIVER_NAME "mpt3sas"
#define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
#define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION "52.100.00.00"
-#define MPT3SAS_MAJOR_VERSION 52
+#define MPT3SAS_DRIVER_VERSION "54.100.00.00"
+#define MPT3SAS_MAJOR_VERSION 54
#define MPT3SAS_MINOR_VERSION 100
#define MPT3SAS_BUILD_VERSION 00
#define MPT3SAS_RELEASE_VERSION 00
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index dc74ebc6405a..f3400d01cc2a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -166,6 +166,9 @@ _transport_convert_phy_link_rate(u8 link_rate)
case MPI25_SAS_NEG_LINK_RATE_12_0:
rc = SAS_LINK_RATE_12_0_GBPS;
break;
+ case MPI26_SAS_NEG_LINK_RATE_22_5:
+ rc = SAS_LINK_RATE_22_5_GBPS;
+ break;
case MPI2_SAS_NEG_LINK_RATE_PHY_DISABLED:
rc = SAS_PHY_DISABLED;
break;
@@ -987,11 +990,9 @@ mpt3sas_transport_port_remove(struct MPT3SAS_ADAPTER *ioc, u64 sas_address,
list_for_each_entry_safe(mpt3sas_phy, next_phy,
&mpt3sas_port->phy_list, port_siblings) {
if ((ioc->logging_level & MPT_DEBUG_TRANSPORT))
- dev_printk(KERN_INFO, &mpt3sas_port->port->dev,
- "remove: sas_addr(0x%016llx), phy(%d)\n",
- (unsigned long long)
- mpt3sas_port->remote_identify.sas_address,
- mpt3sas_phy->phy_id);
+ ioc_info(ioc, "remove: sas_addr(0x%016llx), phy(%d)\n",
+ (unsigned long long) mpt3sas_port->remote_identify.sas_address,
+ mpt3sas_phy->phy_id);
mpt3sas_phy->phy_belongs_to_port = 0;
if (!ioc->remove_host)
sas_port_delete_phy(mpt3sas_port->port,
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 15b3d9d55a4b..f2e7997d5b9d 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1175,7 +1175,7 @@ static int mvs_dev_found_notify(struct domain_device *dev, int lock)
mvi_device->dev_type = dev->dev_type;
mvi_device->mvi_info = mvi;
mvi_device->sas_device = dev;
- if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
+ if (dev_parent_is_expander(dev)) {
int phy_id;
phy_id = sas_find_attached_phy_id(&parent_dev->ex_dev, dev);
diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index 95af3bb03834..a58abd796603 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -498,14 +498,14 @@ static bool myrs_enable_mmio_mbox(struct myrs_hba *cs,
/* Temporary dma mapping, used only in the scope of this function */
mbox = dma_alloc_coherent(&pdev->dev, sizeof(union myrs_cmd_mbox),
&mbox_addr, GFP_KERNEL);
- if (dma_mapping_error(&pdev->dev, mbox_addr))
+ if (!mbox)
return false;
/* These are the base addresses for the command memory mailbox array */
cs->cmd_mbox_size = MYRS_MAX_CMD_MBOX * sizeof(union myrs_cmd_mbox);
cmd_mbox = dma_alloc_coherent(&pdev->dev, cs->cmd_mbox_size,
&cs->cmd_mbox_addr, GFP_KERNEL);
- if (dma_mapping_error(&pdev->dev, cs->cmd_mbox_addr)) {
+ if (!cmd_mbox) {
dev_err(&pdev->dev, "Failed to map command mailbox\n");
goto out_free;
}
@@ -520,7 +520,7 @@ static bool myrs_enable_mmio_mbox(struct myrs_hba *cs,
cs->stat_mbox_size = MYRS_MAX_STAT_MBOX * sizeof(struct myrs_stat_mbox);
stat_mbox = dma_alloc_coherent(&pdev->dev, cs->stat_mbox_size,
&cs->stat_mbox_addr, GFP_KERNEL);
- if (dma_mapping_error(&pdev->dev, cs->stat_mbox_addr)) {
+ if (!stat_mbox) {
dev_err(&pdev->dev, "Failed to map status mailbox\n");
goto out_free;
}
@@ -533,7 +533,7 @@ static bool myrs_enable_mmio_mbox(struct myrs_hba *cs,
cs->fwstat_buf = dma_alloc_coherent(&pdev->dev,
sizeof(struct myrs_fwstat),
&cs->fwstat_addr, GFP_KERNEL);
- if (dma_mapping_error(&pdev->dev, cs->fwstat_addr)) {
+ if (!cs->fwstat_buf) {
dev_err(&pdev->dev, "Failed to map firmware health buffer\n");
cs->fwstat_buf = NULL;
goto out_free;
diff --git a/drivers/scsi/pm8001/pm8001_ctl.c b/drivers/scsi/pm8001/pm8001_ctl.c
index 7618f9cc9986..cbfda8c04e95 100644
--- a/drivers/scsi/pm8001/pm8001_ctl.c
+++ b/drivers/scsi/pm8001/pm8001_ctl.c
@@ -534,23 +534,25 @@ static ssize_t pm8001_ctl_iop_log_show(struct device *cdev,
char *str = buf;
u32 read_size =
pm8001_ha->main_cfg_tbl.pm80xx_tbl.event_log_size / 1024;
- static u32 start, end, count;
u32 max_read_times = 32;
u32 max_count = (read_size * 1024) / (max_read_times * 4);
u32 *temp = (u32 *)pm8001_ha->memoryMap.region[IOP].virt_ptr;
- if ((count % max_count) == 0) {
- start = 0;
- end = max_read_times;
- count = 0;
+ mutex_lock(&pm8001_ha->iop_log_lock);
+
+ if ((pm8001_ha->iop_log_count % max_count) == 0) {
+ pm8001_ha->iop_log_start = 0;
+ pm8001_ha->iop_log_end = max_read_times;
+ pm8001_ha->iop_log_count = 0;
} else {
- start = end;
- end = end + max_read_times;
+ pm8001_ha->iop_log_start = pm8001_ha->iop_log_end;
+ pm8001_ha->iop_log_end = pm8001_ha->iop_log_end + max_read_times;
}
- for (; start < end; start++)
- str += sprintf(str, "%08x ", *(temp+start));
- count++;
+ for (; pm8001_ha->iop_log_start < pm8001_ha->iop_log_end; pm8001_ha->iop_log_start++)
+ str += sprintf(str, "%08x ", *(temp+pm8001_ha->iop_log_start));
+ pm8001_ha->iop_log_count++;
+ mutex_unlock(&pm8001_ha->iop_log_lock);
return str - buf;
}
static DEVICE_ATTR(iop_log, S_IRUGO, pm8001_ctl_iop_log_show, NULL);
@@ -680,7 +682,7 @@ static int pm8001_set_nvmd(struct pm8001_hba_info *pm8001_ha)
struct pm8001_ioctl_payload *payload;
DECLARE_COMPLETION_ONSTACK(completion);
u8 *ioctlbuffer;
- u32 ret;
+ int ret;
u32 length = 1024 * 5 + sizeof(*payload) - 1;
if (pm8001_ha->fw_image->size > 4096) {
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 42a4eeac24c9..8005995a317c 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -2163,8 +2163,7 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
/* Print sas address of IO failed device */
if ((status != IO_SUCCESS) && (status != IO_OVERFLOW) &&
(status != IO_UNDERFLOW)) {
- if (!((t->dev->parent) &&
- (dev_is_expander(t->dev->parent->dev_type)))) {
+ if (!dev_parent_is_expander(t->dev)) {
for (i = 0, j = 4; j <= 7 && i <= 3; i++, j++)
sata_addr_low[i] = pm8001_ha->sas_addr[j];
for (i = 0, j = 0; j <= 3 && i <= 3; i++, j++)
@@ -4168,7 +4167,6 @@ static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha,
u16 firstBurstSize = 0;
u16 ITNT = 2000;
struct domain_device *dev = pm8001_dev->sas_device;
- struct domain_device *parent_dev = dev->parent;
struct pm8001_port *port = dev->port->lldd_port;
memset(&payload, 0, sizeof(payload));
@@ -4186,10 +4184,9 @@ static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha,
dev_is_expander(pm8001_dev->dev_type))
stp_sspsmp_sata = 0x01; /*ssp or smp*/
}
- if (parent_dev && dev_is_expander(parent_dev->dev_type))
- phy_id = parent_dev->ex_dev.ex_phy->phy_id;
- else
- phy_id = pm8001_dev->attached_phy;
+
+ phy_id = pm80xx_get_local_phy_id(dev);
+
opc = OPC_INB_REG_DEV;
linkrate = (pm8001_dev->sas_device->linkrate < dev->port->linkrate) ?
pm8001_dev->sas_device->linkrate : dev->port->linkrate;
diff --git a/drivers/scsi/pm8001/pm8001_hwi.h b/drivers/scsi/pm8001/pm8001_hwi.h
index fc2127dcb58d..f1ce8df082b0 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.h
+++ b/drivers/scsi/pm8001/pm8001_hwi.h
@@ -339,8 +339,10 @@ struct ssp_completion_resp {
__le32 status;
__le32 param;
__le32 ssptag_rescv_rescpad;
+
+ /* Must be last --ends in a flexible-array member. */
struct ssp_response_iu ssp_resp_iu;
- __le32 residual_count;
+ /* __le32 residual_count; */
} __attribute__((packed, aligned(4)));
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 599410bcdfea..8ff4b89ff81e 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -552,6 +552,7 @@ static struct pm8001_hba_info *pm8001_pci_alloc(struct pci_dev *pdev,
pm8001_ha->id = pm8001_id++;
pm8001_ha->logging_level = logging_level;
pm8001_ha->non_fatal_count = 0;
+ mutex_init(&pm8001_ha->iop_log_lock);
if (link_rate >= 1 && link_rate <= 15)
pm8001_ha->link_rate = (link_rate << 8);
else {
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index f7067878b34f..6a8d35aea93a 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -130,6 +130,16 @@ static void pm80xx_get_tag_opcodes(struct sas_task *task, int *ata_op,
}
}
+u32 pm80xx_get_local_phy_id(struct domain_device *dev)
+{
+ struct pm8001_device *pm8001_dev = dev->lldd_dev;
+
+ if (dev_parent_is_expander(dev))
+ return dev->parent->ex_dev.ex_phy->phy_id;
+
+ return pm8001_dev->attached_phy;
+}
+
void pm80xx_show_pending_commands(struct pm8001_hba_info *pm8001_ha,
struct pm8001_device *target_pm8001_dev)
{
@@ -477,7 +487,7 @@ int pm8001_queue_command(struct sas_task *task, gfp_t gfp_flags)
struct pm8001_device *pm8001_dev = dev->lldd_dev;
bool internal_abort = sas_is_internal_abort(task);
struct pm8001_hba_info *pm8001_ha;
- struct pm8001_port *port = NULL;
+ struct pm8001_port *port;
struct pm8001_ccb_info *ccb;
unsigned long flags;
u32 n_elem = 0;
@@ -502,8 +512,7 @@ int pm8001_queue_command(struct sas_task *task, gfp_t gfp_flags)
spin_lock_irqsave(&pm8001_ha->lock, flags);
- pm8001_dev = dev->lldd_dev;
- port = pm8001_ha->phy[pm8001_dev->attached_phy].port;
+ port = dev->port->lldd_port;
if (!internal_abort &&
(DEV_IS_GONE(pm8001_dev) || !port || !port->port_attached)) {
@@ -701,7 +710,7 @@ static int pm8001_dev_found_notify(struct domain_device *dev)
dev->lldd_dev = pm8001_device;
pm8001_device->dev_type = dev->dev_type;
pm8001_device->dcompletion = &completion;
- if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
+ if (dev_parent_is_expander(dev)) {
int phy_id;
phy_id = sas_find_attached_phy_id(&parent_dev->ex_dev, dev);
@@ -766,7 +775,16 @@ static void pm8001_dev_gone_notify(struct domain_device *dev)
spin_lock_irqsave(&pm8001_ha->lock, flags);
}
PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id);
- pm8001_ha->phy[pm8001_dev->attached_phy].phy_attached = 0;
+
+ /*
+ * The phy array only contains local phys. Thus, we cannot clear
+ * phy_attached for a device behind an expander.
+ */
+ if (!dev_parent_is_expander(dev)) {
+ u32 phy_id = pm80xx_get_local_phy_id(dev);
+
+ pm8001_ha->phy[phy_id].phy_attached = 0;
+ }
pm8001_free_dev(pm8001_dev);
} else {
pm8001_dbg(pm8001_ha, DISC, "Found dev has gone.\n");
@@ -1048,7 +1066,7 @@ int pm8001_abort_task(struct sas_task *task)
struct pm8001_hba_info *pm8001_ha;
struct pm8001_device *pm8001_dev;
int rc = TMF_RESP_FUNC_FAILED, ret;
- u32 phy_id, port_id;
+ u32 port_id;
struct sas_task_slow slow_task;
if (!task->lldd_task || !task->dev)
@@ -1057,7 +1075,6 @@ int pm8001_abort_task(struct sas_task *task)
dev = task->dev;
pm8001_dev = dev->lldd_dev;
pm8001_ha = pm8001_find_ha_by_dev(dev);
- phy_id = pm8001_dev->attached_phy;
if (PM8001_CHIP_DISP->fatal_errors(pm8001_ha)) {
// If the controller is seeing fatal errors
@@ -1089,7 +1106,8 @@ int pm8001_abort_task(struct sas_task *task)
if (pm8001_ha->chip_id == chip_8006) {
DECLARE_COMPLETION_ONSTACK(completion_reset);
DECLARE_COMPLETION_ONSTACK(completion);
- struct pm8001_phy *phy = pm8001_ha->phy + phy_id;
+ u32 phy_id = pm80xx_get_local_phy_id(dev);
+ struct pm8001_phy *phy = &pm8001_ha->phy[phy_id];
port_id = phy->port->port_id;
/* 1. Set Device state as Recovery */
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index 334485bb2c12..b63b6ffcaaf5 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -547,6 +547,10 @@ struct pm8001_hba_info {
u32 ci_offset;
u32 pi_offset;
u32 max_memcnt;
+ u32 iop_log_start;
+ u32 iop_log_end;
+ u32 iop_log_count;
+ struct mutex iop_log_lock;
};
struct pm8001_work {
@@ -798,6 +802,7 @@ void pm8001_setds_completion(struct domain_device *dev);
void pm8001_tmf_aborted(struct sas_task *task);
void pm80xx_show_pending_commands(struct pm8001_hba_info *pm8001_ha,
struct pm8001_device *dev);
+u32 pm80xx_get_local_phy_id(struct domain_device *dev);
#endif
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index c1bae995a412..31960b72c1e9 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -2340,8 +2340,7 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha,
/* Print sas address of IO failed device */
if ((status != IO_SUCCESS) && (status != IO_OVERFLOW) &&
(status != IO_UNDERFLOW)) {
- if (!((t->dev->parent) &&
- (dev_is_expander(t->dev->parent->dev_type)))) {
+ if (!dev_parent_is_expander(t->dev)) {
for (i = 0, j = 4; i <= 3 && j <= 7; i++, j++)
sata_addr_low[i] = pm8001_ha->sas_addr[j];
for (i = 0, j = 0; i <= 3 && j <= 3; i++, j++)
@@ -4780,7 +4779,6 @@ static int pm80xx_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha,
u16 firstBurstSize = 0;
u16 ITNT = 2000;
struct domain_device *dev = pm8001_dev->sas_device;
- struct domain_device *parent_dev = dev->parent;
struct pm8001_port *port = dev->port->lldd_port;
memset(&payload, 0, sizeof(payload));
@@ -4799,10 +4797,8 @@ static int pm80xx_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha,
dev_is_expander(pm8001_dev->dev_type))
stp_sspsmp_sata = 0x01; /*ssp or smp*/
}
- if (parent_dev && dev_is_expander(parent_dev->dev_type))
- phy_id = parent_dev->ex_dev.ex_phy->phy_id;
- else
- phy_id = pm8001_dev->attached_phy;
+
+ phy_id = pm80xx_get_local_phy_id(dev);
opc = OPC_INB_REG_DEV;
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h
index eb8fd37b2066..d8a63b7fed6a 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.h
+++ b/drivers/scsi/pm8001/pm80xx_hwi.h
@@ -558,8 +558,10 @@ struct ssp_completion_resp {
__le32 status;
__le32 param;
__le32 ssptag_rescv_rescpad;
+
+ /* Must be last --ends in a flexible-array member. */
struct ssp_response_iu ssp_resp_iu;
- __le32 residual_count;
+ /* __le32 residual_count; */
} __attribute__((packed, aligned(4)));
#define SSP_RESCV_BIT 0x00010000
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 10431a67d202..ccfc2d26dd37 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -3106,8 +3106,8 @@ static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job)
switch (rval) {
case QLA_SUCCESS:
/* Wait for the command completion. */
- ratov_j = ha->r_a_tov / 10 * 4 * 1000;
- ratov_j = msecs_to_jiffies(ratov_j);
+ ratov_j = ha->r_a_tov / 10 * 4;
+ ratov_j = secs_to_jiffies(ratov_j);
if (!wait_for_completion_timeout(&comp, ratov_j)) {
ql_log(ql_log_info, vha, 0x7089,
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index cb95b7b12051..604e66bead1e 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -4890,9 +4890,7 @@ struct purex_item {
struct purex_item *pkt);
atomic_t in_use;
uint16_t size;
- struct {
- uint8_t iocb[64];
- } iocb;
+ uint8_t iocb[] __counted_by(size);
};
#include "qla_edif.h"
@@ -5101,7 +5099,6 @@ typedef struct scsi_qla_host {
struct list_head head;
spinlock_t lock;
} purex_list;
- struct purex_item default_item;
struct name_list_extended gnl;
/* Count of active session/fcport */
@@ -5130,6 +5127,11 @@ typedef struct scsi_qla_host {
#define DPORT_DIAG_IN_PROGRESS BIT_0
#define DPORT_DIAG_CHIP_RESET_IN_PROGRESS BIT_1
uint16_t dport_status;
+
+ /* Must be last --ends in a flexible-array member. */
+ TRAILING_OVERLAP(struct purex_item, default_item, iocb,
+ uint8_t __default_item_iocb[QLA_DEFAULT_PAYLOAD_SIZE];
+ );
} scsi_qla_host_t;
struct qla27xx_image_status {
diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
index 91bbd3b75bff..ccd4485087a1 100644
--- a/drivers/scsi/qla2xxx/qla_edif.c
+++ b/drivers/scsi/qla2xxx/qla_edif.c
@@ -1798,7 +1798,7 @@ retry:
switch (rval) {
case QLA_SUCCESS:
break;
- case EAGAIN:
+ case -EAGAIN:
msleep(EDIF_MSLEEP_INTERVAL);
cnt++;
if (cnt < EDIF_RETRY_COUNT)
@@ -3649,7 +3649,7 @@ retry:
p->e.extra_rx_xchg_address, p->e.extra_control_flags,
sp->handle, sp->remap.req.len, bsg_job);
break;
- case EAGAIN:
+ case -EAGAIN:
msleep(EDIF_MSLEEP_INTERVAL);
cnt++;
if (cnt < EDIF_RETRY_COUNT)
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index be211ff22acb..6a2e1c7fd125 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2059,11 +2059,11 @@ static void qla_marker_sp_done(srb_t *sp, int res)
int cnt = 5; \
do { \
if (_chip_gen != sp->vha->hw->chip_reset || _login_gen != sp->fcport->login_gen) {\
- _rval = EINVAL; \
+ _rval = -EINVAL; \
break; \
} \
_rval = qla2x00_start_sp(_sp); \
- if (_rval == EAGAIN) \
+ if (_rval == -EAGAIN) \
msleep(1); \
else \
break; \
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index c4c6b5c6658c..4559b490614d 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1077,17 +1077,17 @@ static struct purex_item *
qla24xx_alloc_purex_item(scsi_qla_host_t *vha, uint16_t size)
{
struct purex_item *item = NULL;
- uint8_t item_hdr_size = sizeof(*item);
if (size > QLA_DEFAULT_PAYLOAD_SIZE) {
- item = kzalloc(item_hdr_size +
- (size - QLA_DEFAULT_PAYLOAD_SIZE), GFP_ATOMIC);
+ item = kzalloc(struct_size(item, iocb, size), GFP_ATOMIC);
} else {
if (atomic_inc_return(&vha->default_item.in_use) == 1) {
item = &vha->default_item;
goto initialize_purex_header;
} else {
- item = kzalloc(item_hdr_size, GFP_ATOMIC);
+ item = kzalloc(
+ struct_size(item, iocb, QLA_DEFAULT_PAYLOAD_SIZE),
+ GFP_ATOMIC);
}
}
if (!item) {
@@ -1127,17 +1127,16 @@ qla24xx_queue_purex_item(scsi_qla_host_t *vha, struct purex_item *pkt,
* @vha: SCSI driver HA context
* @pkt: ELS packet
*/
-static struct purex_item
-*qla24xx_copy_std_pkt(struct scsi_qla_host *vha, void *pkt)
+static struct purex_item *
+qla24xx_copy_std_pkt(struct scsi_qla_host *vha, void *pkt)
{
struct purex_item *item;
- item = qla24xx_alloc_purex_item(vha,
- QLA_DEFAULT_PAYLOAD_SIZE);
+ item = qla24xx_alloc_purex_item(vha, QLA_DEFAULT_PAYLOAD_SIZE);
if (!item)
return item;
- memcpy(&item->iocb, pkt, sizeof(item->iocb));
+ memcpy(&item->iocb, pkt, QLA_DEFAULT_PAYLOAD_SIZE);
return item;
}
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 8ee2e337c9e1..065f9bcca26f 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -419,7 +419,7 @@ retry:
switch (rval) {
case QLA_SUCCESS:
break;
- case EAGAIN:
+ case -EAGAIN:
msleep(PURLS_MSLEEP_INTERVAL);
cnt++;
if (cnt < PURLS_RETRY_COUNT)
@@ -1308,7 +1308,7 @@ void qla2xxx_process_purls_iocb(void **pkt, struct rsp_que **rsp)
ql_dbg(ql_dbg_unsol, vha, 0x2121,
"PURLS OP[%01x] size %d xchg addr 0x%x portid %06x\n",
- item->iocb.iocb[3], item->size, uctx->exchange_address,
+ item->iocb[3], item->size, uctx->exchange_address,
fcport->d_id.b24);
/* +48 0 1 2 3 4 5 6 7 8 9 A B C D E F
* ----- -----------------------------------------------
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index d4b484c0fd9d..98a5c105fdfd 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1291,8 +1291,8 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
"Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
/* Wait for the command completion. */
- ratov_j = ha->r_a_tov/10 * 4 * 1000;
- ratov_j = msecs_to_jiffies(ratov_j);
+ ratov_j = ha->r_a_tov / 10 * 4;
+ ratov_j = secs_to_jiffies(ratov_j);
switch (rval) {
case QLA_SUCCESS:
if (!wait_for_completion_timeout(&comp, ratov_j)) {
@@ -1806,8 +1806,8 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
rval = ha->isp_ops->abort_command(sp);
/* Wait for command completion. */
ret_cmd = false;
- ratov_j = ha->r_a_tov/10 * 4 * 1000;
- ratov_j = msecs_to_jiffies(ratov_j);
+ ratov_j = ha->r_a_tov / 10 * 4;
+ ratov_j = secs_to_jiffies(ratov_j);
switch (rval) {
case QLA_SUCCESS:
if (wait_for_completion_timeout(&comp, ratov_j)) {
@@ -6459,9 +6459,10 @@ dealloc:
void
qla24xx_free_purex_item(struct purex_item *item)
{
- if (item == &item->vha->default_item)
+ if (item == &item->vha->default_item) {
memset(&item->vha->default_item, 0, sizeof(struct purex_item));
- else
+ memset(&item->vha->__default_item_iocb, 0, QLA_DEFAULT_PAYLOAD_SIZE);
+ } else
kfree(item);
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 353cb60e1abe..b2ab97be5db3 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1155,14 +1155,9 @@ static ssize_t sdebug_error_write(struct file *file, const char __user *ubuf,
struct sdebug_err_inject *inject;
struct scsi_device *sdev = (struct scsi_device *)file->f_inode->i_private;
- buf = kzalloc(count + 1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (copy_from_user(buf, ubuf, count)) {
- kfree(buf);
- return -EFAULT;
- }
+ buf = memdup_user_nul(ubuf, count);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
if (buf[0] == '-')
return sdebug_err_remove(sdev, buf, count);
@@ -8805,8 +8800,8 @@ static int sdebug_add_store(void)
/* Logical Block Provisioning */
if (scsi_debug_lbp()) {
map_size = lba_to_map_index(sdebug_store_sectors - 1) + 1;
- sip->map_storep = vmalloc(array_size(sizeof(long),
- BITS_TO_LONGS(map_size)));
+ sip->map_storep = vcalloc(BITS_TO_LONGS(map_size),
+ sizeof(long));
pr_info("%lu provisioning blocks\n", map_size);
@@ -8815,8 +8810,6 @@ static int sdebug_add_store(void)
goto err;
}
- bitmap_zero(sip->map_storep, map_size);
-
/* Map first 1KB for partition table */
if (sdebug_num_parts)
map_region(sip, 0, 2);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 00ad574ce61c..0252d3f6bed1 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -106,7 +106,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim,
unsigned int mode);
static void sd_config_write_same(struct scsi_disk *sdkp,
struct queue_limits *lim);
-static int sd_revalidate_disk(struct gendisk *);
+static void sd_revalidate_disk(struct gendisk *);
static void sd_unlock_native_capacity(struct gendisk *disk);
static void sd_shutdown(struct device *);
static void scsi_disk_release(struct device *cdev);
@@ -3691,13 +3691,13 @@ static void sd_read_block_zero(struct scsi_disk *sdkp)
* performs disk spin up, read_capacity, etc.
* @disk: struct gendisk we care about
**/
-static int sd_revalidate_disk(struct gendisk *disk)
+static void sd_revalidate_disk(struct gendisk *disk)
{
struct scsi_disk *sdkp = scsi_disk(disk);
struct scsi_device *sdp = sdkp->device;
sector_t old_capacity = sdkp->capacity;
- struct queue_limits lim;
- unsigned char *buffer;
+ struct queue_limits *lim = NULL;
+ unsigned char *buffer = NULL;
unsigned int dev_max;
int err;
@@ -3709,25 +3709,26 @@ static int sd_revalidate_disk(struct gendisk *disk)
* of the other niceties.
*/
if (!scsi_device_online(sdp))
- goto out;
+ return;
+
+ lim = kmalloc(sizeof(*lim), GFP_KERNEL);
+ if (!lim)
+ return;
buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL);
- if (!buffer) {
- sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory "
- "allocation failure.\n");
+ if (!buffer)
goto out;
- }
sd_spinup_disk(sdkp);
- lim = queue_limits_start_update(sdkp->disk->queue);
+ *lim = queue_limits_start_update(sdkp->disk->queue);
/*
* Without media there is no reason to ask; moreover, some devices
* react badly if we do.
*/
if (sdkp->media_present) {
- sd_read_capacity(sdkp, &lim, buffer);
+ sd_read_capacity(sdkp, lim, buffer);
/*
* Some USB/UAS devices return generic values for mode pages
* until the media has been accessed. Trigger a READ operation
@@ -3741,17 +3742,17 @@ static int sd_revalidate_disk(struct gendisk *disk)
* cause this to be updated correctly and any device which
* doesn't support it should be treated as rotational.
*/
- lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM);
+ lim->features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM);
if (scsi_device_supports_vpd(sdp)) {
sd_read_block_provisioning(sdkp);
- sd_read_block_limits(sdkp, &lim);
+ sd_read_block_limits(sdkp, lim);
sd_read_block_limits_ext(sdkp);
- sd_read_block_characteristics(sdkp, &lim);
- sd_zbc_read_zones(sdkp, &lim, buffer);
+ sd_read_block_characteristics(sdkp, lim);
+ sd_zbc_read_zones(sdkp, lim, buffer);
}
- sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp));
+ sd_config_discard(sdkp, lim, sd_discard_mode(sdkp));
sd_print_capacity(sdkp, old_capacity);
@@ -3761,47 +3762,46 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_read_app_tag_own(sdkp, buffer);
sd_read_write_same(sdkp, buffer);
sd_read_security(sdkp, buffer);
- sd_config_protection(sdkp, &lim);
+ sd_config_protection(sdkp, lim);
}
/*
* We now have all cache related info, determine how we deal
* with flush requests.
*/
- sd_set_flush_flag(sdkp, &lim);
+ sd_set_flush_flag(sdkp, lim);
/* Initial block count limit based on CDB TRANSFER LENGTH field size. */
dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
/* Some devices report a maximum block count for READ/WRITE requests. */
dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
- lim.max_dev_sectors = logical_to_sectors(sdp, dev_max);
+ lim->max_dev_sectors = logical_to_sectors(sdp, dev_max);
if (sd_validate_min_xfer_size(sdkp))
- lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks);
+ lim->io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks);
else
- lim.io_min = 0;
+ lim->io_min = 0;
/*
* Limit default to SCSI host optimal sector limit if set. There may be
* an impact on performance for when the size of a request exceeds this
* host limit.
*/
- lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT;
+ lim->io_opt = sdp->host->opt_sectors << SECTOR_SHIFT;
if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
- lim.io_opt = min_not_zero(lim.io_opt,
+ lim->io_opt = min_not_zero(lim->io_opt,
logical_to_bytes(sdp, sdkp->opt_xfer_blocks));
}
sdkp->first_scan = 0;
set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity));
- sd_config_write_same(sdkp, &lim);
- kfree(buffer);
+ sd_config_write_same(sdkp, lim);
- err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim);
+ err = queue_limits_commit_update_frozen(sdkp->disk->queue, lim);
if (err)
- return err;
+ goto out;
/*
* Query concurrent positioning ranges after
@@ -3820,7 +3820,9 @@ static int sd_revalidate_disk(struct gendisk *disk)
set_capacity_and_notify(disk, 0);
out:
- return 0;
+ kfree(buffer);
+ kfree(lim);
+
}
/**
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 125944941601..03c97e60d36f 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -20,6 +20,7 @@
#include <linux/reboot.h>
#include <linux/cciss_ioctl.h>
#include <linux/crash_dump.h>
+#include <linux/string.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
@@ -6774,17 +6775,15 @@ static int pqi_passthru_ioctl(struct pqi_ctrl_info *ctrl_info, void __user *arg)
}
if (iocommand.buf_size > 0) {
- kernel_buffer = kmalloc(iocommand.buf_size, GFP_KERNEL);
- if (!kernel_buffer)
- return -ENOMEM;
if (iocommand.Request.Type.Direction & XFER_WRITE) {
- if (copy_from_user(kernel_buffer, iocommand.buf,
- iocommand.buf_size)) {
- rc = -EFAULT;
- goto out;
- }
+ kernel_buffer = memdup_user(iocommand.buf,
+ iocommand.buf_size);
+ if (IS_ERR(kernel_buffer))
+ return PTR_ERR(kernel_buffer);
} else {
- memset(kernel_buffer, 0, iocommand.buf_size);
+ kernel_buffer = kzalloc(iocommand.buf_size, GFP_KERNEL);
+ if (!kernel_buffer)
+ return -ENOMEM;
}
}
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index dc51ea352198..567f9cd29102 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1941,8 +1941,8 @@ static int storvsc_probe(struct hv_device *device,
int num_present_cpus = num_present_cpus();
struct Scsi_Host *host;
struct hv_host_device *host_dev;
- bool dev_is_ide = ((dev_id->driver_data == IDE_GUID) ? true : false);
- bool is_fc = ((dev_id->driver_data == SFC_GUID) ? true : false);
+ bool dev_is_ide = dev_id->driver_data == IDE_GUID;
+ bool is_fc = dev_id->driver_data == SFC_GUID;
int target = 0;
struct storvsc_device *stor_device;
int max_sub_channels = 0;
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 88db94f382bb..efe8cdb20060 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -665,7 +665,7 @@ static ssize_t lio_target_nacl_cmdsn_depth_store(struct config_item *item,
}
acl_ci = &se_nacl->acl_group.cg_item;
if (!acl_ci) {
- pr_err("Unable to locatel acl_ci\n");
+ pr_err("Unable to locate acl_ci\n");
return -EINVAL;
}
tpg_ci = &acl_ci->ci_parent->ci_group->cg_item;
@@ -684,7 +684,7 @@ static ssize_t lio_target_nacl_cmdsn_depth_store(struct config_item *item,
ret = core_tpg_set_initiator_node_queue_depth(se_nacl, cmdsn_depth);
- pr_debug("LIO_Target_ConfigFS: %s/%s Set CmdSN Window: %u for"
+ pr_debug("LIO_Target_ConfigFS: %s/%s Set CmdSN Window: %u for "
"InitiatorName: %s\n", config_item_name(wwn_ci),
config_item_name(tpg_ci), cmdsn_depth,
config_item_name(acl_ci));
@@ -1131,7 +1131,7 @@ static void lio_target_tiqn_deltpg(struct se_portal_group *se_tpg)
/* End items for lio_target_tiqn_cit */
-/* Start LIO-Target TIQN struct contig_item lio_target_cit */
+/* Start LIO-Target TIQN struct config_item lio_target_cit */
static ssize_t lio_target_wwn_lio_version_show(struct config_item *item,
char *page)
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
index f60b156ede12..620de3910599 100644
--- a/drivers/target/iscsi/iscsi_target_tmr.c
+++ b/drivers/target/iscsi/iscsi_target_tmr.c
@@ -112,7 +112,8 @@ u8 iscsit_tmr_task_reassign(
struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
struct iscsi_tm *hdr = (struct iscsi_tm *) buf;
- u64 ret, ref_lun;
+ u64 ref_lun;
+ int ret;
pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x,"
" RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n",
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index cc88aaa106da..c9bdd4140fd0 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -29,6 +29,10 @@
#define MCQ_ENTRY_SIZE_IN_DWORD 8
#define CQE_UCD_BA GENMASK_ULL(63, 7)
+#define UFSHCD_ENABLE_MCQ_INTRS (UTP_TASK_REQ_COMPL |\
+ UFSHCD_ERROR_MASK |\
+ MCQ_CQ_EVENT_STATUS)
+
/* Max mcq register polling time in microseconds */
#define MCQ_POLL_US 500000
@@ -355,9 +359,16 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_lock);
void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba)
{
struct ufs_hw_queue *hwq;
+ u32 intrs;
u16 qsize;
int i;
+ /* Enable required interrupts */
+ intrs = UFSHCD_ENABLE_MCQ_INTRS;
+ if (hba->quirks & UFSHCD_QUIRK_MCQ_BROKEN_INTR)
+ intrs &= ~MCQ_CQ_EVENT_STATUS;
+ ufshcd_enable_intr(hba, intrs);
+
for (i = 0; i < hba->nr_hw_queues; i++) {
hwq = &hba->uhq[i];
hwq->id = i;
diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c
index 4bd7d491e3c5..0086816b27cd 100644
--- a/drivers/ufs/core/ufs-sysfs.c
+++ b/drivers/ufs/core/ufs-sysfs.c
@@ -512,6 +512,8 @@ static ssize_t pm_qos_enable_show(struct device *dev,
{
struct ufs_hba *hba = dev_get_drvdata(dev);
+ guard(mutex)(&hba->pm_qos_mutex);
+
return sysfs_emit(buf, "%d\n", hba->pm_qos_enabled);
}
diff --git a/drivers/ufs/core/ufs_trace.h b/drivers/ufs/core/ufs_trace.h
index caa32e23ffa5..584c2b5c6ad9 100644
--- a/drivers/ufs/core/ufs_trace.h
+++ b/drivers/ufs/core/ufs_trace.h
@@ -11,6 +11,7 @@
#include <ufs/ufs.h>
#include <linux/tracepoint.h>
+#include "ufs_trace_types.h"
#define str_opcode(opcode) \
__print_symbolic(opcode, \
diff --git a/drivers/ufs/core/ufs_trace_types.h b/drivers/ufs/core/ufs_trace_types.h
new file mode 100644
index 000000000000..f2d5ad1d92b9
--- /dev/null
+++ b/drivers/ufs/core/ufs_trace_types.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _UFS_TRACE_TYPES_H_
+#define _UFS_TRACE_TYPES_H_
+
+enum ufs_trace_str_t {
+ UFS_CMD_SEND,
+ UFS_CMD_COMP,
+ UFS_DEV_COMP,
+ UFS_QUERY_SEND,
+ UFS_QUERY_COMP,
+ UFS_QUERY_ERR,
+ UFS_TM_SEND,
+ UFS_TM_COMP,
+ UFS_TM_ERR
+};
+
+enum ufs_trace_tsf_t {
+ UFS_TSF_CDB,
+ UFS_TSF_OSF,
+ UFS_TSF_TM_INPUT,
+ UFS_TSF_TM_OUTPUT
+};
+
+#endif /* _UFS_TRACE_TYPES_H_ */
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 9a43102b2b21..d9632d7c5f01 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -45,11 +45,6 @@
UTP_TASK_REQ_COMPL |\
UFSHCD_ERROR_MASK)
-#define UFSHCD_ENABLE_MCQ_INTRS (UTP_TASK_REQ_COMPL |\
- UFSHCD_ERROR_MASK |\
- MCQ_CQ_EVENT_STATUS)
-
-
/* UIC command timeout, unit: ms */
enum {
UIC_CMD_TIMEOUT_DEFAULT = 500,
@@ -316,6 +311,9 @@ static const struct ufs_dev_quirk ufs_fixups[] = {
{ .wmanufacturerid = UFS_VENDOR_TOSHIBA,
.model = "THGLF2G9D8KBADG",
.quirk = UFS_DEVICE_QUIRK_PA_TACTIVATE },
+ { .wmanufacturerid = UFS_VENDOR_TOSHIBA,
+ .model = "THGJFJT1E45BATP",
+ .quirk = UFS_DEVICE_QUIRK_NO_TIMESTAMP_SUPPORT },
{}
};
@@ -369,7 +367,7 @@ EXPORT_SYMBOL_GPL(ufshcd_disable_irq);
* @hba: per adapter instance
* @intrs: interrupt bits
*/
-static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs)
+void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs)
{
u32 old_val = ufshcd_readl(hba, REG_INTERRUPT_ENABLE);
u32 new_val = old_val | intrs;
@@ -606,10 +604,12 @@ void ufshcd_print_tr(struct ufs_hba *hba, int tag, bool pr_prdt)
lrbp = &hba->lrb[tag];
- dev_err(hba->dev, "UPIU[%d] - issue time %lld us\n",
- tag, div_u64(lrbp->issue_time_stamp_local_clock, 1000));
- dev_err(hba->dev, "UPIU[%d] - complete time %lld us\n",
- tag, div_u64(lrbp->compl_time_stamp_local_clock, 1000));
+ if (hba->monitor.enabled) {
+ dev_err(hba->dev, "UPIU[%d] - issue time %lld us\n", tag,
+ div_u64(lrbp->issue_time_stamp_local_clock, 1000));
+ dev_err(hba->dev, "UPIU[%d] - complete time %lld us\n", tag,
+ div_u64(lrbp->compl_time_stamp_local_clock, 1000));
+ }
dev_err(hba->dev,
"UPIU[%d] - Transfer Request Descriptor phys@0x%llx\n",
tag, (u64)lrbp->utrd_dma_addr);
@@ -1045,6 +1045,7 @@ EXPORT_SYMBOL_GPL(ufshcd_is_hba_active);
*/
void ufshcd_pm_qos_init(struct ufs_hba *hba)
{
+ guard(mutex)(&hba->pm_qos_mutex);
if (hba->pm_qos_enabled)
return;
@@ -1061,6 +1062,8 @@ void ufshcd_pm_qos_init(struct ufs_hba *hba)
*/
void ufshcd_pm_qos_exit(struct ufs_hba *hba)
{
+ guard(mutex)(&hba->pm_qos_mutex);
+
if (!hba->pm_qos_enabled)
return;
@@ -1075,6 +1078,8 @@ void ufshcd_pm_qos_exit(struct ufs_hba *hba)
*/
static void ufshcd_pm_qos_update(struct ufs_hba *hba, bool on)
{
+ guard(mutex)(&hba->pm_qos_mutex);
+
if (!hba->pm_qos_enabled)
return;
@@ -2230,11 +2235,13 @@ static void ufshcd_exit_clk_gating(struct ufs_hba *hba)
static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba)
{
bool queue_resume_work = false;
- ktime_t curr_t = ktime_get();
+ ktime_t curr_t;
if (!ufshcd_is_clkscaling_supported(hba))
return;
+ curr_t = ktime_get();
+
guard(spinlock_irqsave)(&hba->clk_scaling.lock);
if (!hba->clk_scaling.active_reqs++)
@@ -2354,10 +2361,12 @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag,
struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
unsigned long flags;
- lrbp->issue_time_stamp = ktime_get();
- lrbp->issue_time_stamp_local_clock = local_clock();
- lrbp->compl_time_stamp = ktime_set(0, 0);
- lrbp->compl_time_stamp_local_clock = 0;
+ if (hba->monitor.enabled) {
+ lrbp->issue_time_stamp = ktime_get();
+ lrbp->issue_time_stamp_local_clock = local_clock();
+ lrbp->compl_time_stamp = ktime_set(0, 0);
+ lrbp->compl_time_stamp_local_clock = 0;
+ }
ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND);
if (lrbp->cmd)
ufshcd_clk_scaling_start_busy(hba);
@@ -5622,8 +5631,10 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
enum utp_ocs ocs;
lrbp = &hba->lrb[task_tag];
- lrbp->compl_time_stamp = ktime_get();
- lrbp->compl_time_stamp_local_clock = local_clock();
+ if (hba->monitor.enabled) {
+ lrbp->compl_time_stamp = ktime_get();
+ lrbp->compl_time_stamp_local_clock = local_clock();
+ }
cmd = lrbp->cmd;
if (cmd) {
if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
@@ -6457,13 +6468,14 @@ void ufshcd_schedule_eh_work(struct ufs_hba *hba)
}
}
-static void ufshcd_force_error_recovery(struct ufs_hba *hba)
+void ufshcd_force_error_recovery(struct ufs_hba *hba)
{
spin_lock_irq(hba->host->host_lock);
hba->force_reset = true;
ufshcd_schedule_eh_work(hba);
spin_unlock_irq(hba->host->host_lock);
}
+EXPORT_SYMBOL_GPL(ufshcd_force_error_recovery);
static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
{
@@ -8786,7 +8798,8 @@ static void ufshcd_set_timestamp_attr(struct ufs_hba *hba)
struct ufs_dev_info *dev_info = &hba->dev_info;
struct utp_upiu_query_v4_0 *upiu_data;
- if (dev_info->wspecversion < 0x400)
+ if (dev_info->wspecversion < 0x400 ||
+ hba->dev_quirks & UFS_DEVICE_QUIRK_NO_TIMESTAMP_SUPPORT)
return;
ufshcd_dev_man_lock(hba);
@@ -8913,16 +8926,11 @@ err:
static void ufshcd_config_mcq(struct ufs_hba *hba)
{
int ret;
- u32 intrs;
ret = ufshcd_mcq_vops_config_esi(hba);
hba->mcq_esi_enabled = !ret;
dev_info(hba->dev, "ESI %sconfigured\n", ret ? "is not " : "");
- intrs = UFSHCD_ENABLE_MCQ_INTRS;
- if (hba->quirks & UFSHCD_QUIRK_MCQ_BROKEN_INTR)
- intrs &= ~MCQ_CQ_EVENT_STATUS;
- ufshcd_enable_intr(hba, intrs);
ufshcd_mcq_make_queues_operational(hba);
ufshcd_mcq_config_mac(hba, hba->nutrs);
@@ -10756,6 +10764,10 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
mutex_init(&hba->ee_ctrl_mutex);
mutex_init(&hba->wb_mutex);
+
+ /* Initialize mutex for PM QoS request synchronization */
+ mutex_init(&hba->pm_qos_mutex);
+
init_rwsem(&hba->clk_scaling_lock);
ufshcd_init_clk_gating(hba);
diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c
index f0adcd9dd553..70d195179eba 100644
--- a/drivers/ufs/host/ufs-exynos.c
+++ b/drivers/ufs/host/ufs-exynos.c
@@ -776,7 +776,7 @@ static void exynos_ufs_config_sync_pattern_mask(struct exynos_ufs *ufs,
u32 mask, sync_len;
enum {
SYNC_LEN_G1 = 80 * 1000, /* 80us */
- SYNC_LEN_G2 = 40 * 1000, /* 44us */
+ SYNC_LEN_G2 = 40 * 1000, /* 40us */
SYNC_LEN_G3 = 20 * 1000, /* 20us */
};
int i;
@@ -1896,6 +1896,13 @@ static int fsd_ufs_pre_pwr_change(struct exynos_ufs *ufs,
return 0;
}
+static int fsd_ufs_suspend(struct exynos_ufs *ufs)
+{
+ exynos_ufs_gate_clks(ufs);
+ hci_writel(ufs, 0, HCI_GPIO_OUT);
+ return 0;
+}
+
static inline u32 get_mclk_period_unipro_18(struct exynos_ufs *ufs)
{
return (16 * 1000 * 1000000UL / ufs->mclk_rate);
@@ -2162,6 +2169,7 @@ static const struct exynos_ufs_drv_data fsd_ufs_drvs = {
.pre_link = fsd_ufs_pre_link,
.post_link = fsd_ufs_post_link,
.pre_pwr_change = fsd_ufs_pre_pwr_change,
+ .suspend = fsd_ufs_suspend,
};
static const struct exynos_ufs_drv_data gs101_ufs_drvs = {
diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
index f902ce08c95a..758a393a9de1 100644
--- a/drivers/ufs/host/ufs-mediatek.c
+++ b/drivers/ufs/host/ufs-mediatek.c
@@ -29,6 +29,7 @@
#include "ufs-mediatek-sip.h"
static int ufs_mtk_config_mcq(struct ufs_hba *hba, bool irq);
+static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up);
#define CREATE_TRACE_POINTS
#include "ufs-mediatek-trace.h"
@@ -415,7 +416,7 @@ static void ufs_mtk_dbg_sel(struct ufs_hba *hba)
}
}
-static void ufs_mtk_wait_idle_state(struct ufs_hba *hba,
+static int ufs_mtk_wait_idle_state(struct ufs_hba *hba,
unsigned long retry_ms)
{
u64 timeout, time_checked;
@@ -451,8 +452,12 @@ static void ufs_mtk_wait_idle_state(struct ufs_hba *hba,
break;
} while (time_checked < timeout);
- if (wait_idle && sm != VS_HCE_BASE)
+ if (wait_idle && sm != VS_HCE_BASE) {
dev_info(hba->dev, "wait idle tmo: 0x%x\n", val);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
}
static int ufs_mtk_wait_link_state(struct ufs_hba *hba, u32 state,
@@ -798,8 +803,14 @@ static int ufs_mtk_setup_clocks(struct ufs_hba *hba, bool on,
clk_pwr_off = true;
}
- if (clk_pwr_off)
+ if (clk_pwr_off) {
ufs_mtk_pwr_ctrl(hba, false);
+ } else {
+ dev_warn(hba->dev, "Clock is not turned off, hba->ahit = 0x%x, AHIT = 0x%x\n",
+ hba->ahit,
+ ufshcd_readl(hba,
+ REG_AUTO_HIBERNATE_IDLE_TIMER));
+ }
ufs_mtk_mcq_disable_irq(hba);
} else if (on && status == POST_CHANGE) {
ufs_mtk_pwr_ctrl(hba, true);
@@ -1018,7 +1029,7 @@ static int ufs_mtk_vreg_fix_vcc(struct ufs_hba *hba)
struct arm_smccc_res res;
int err, ver;
- if (hba->vreg_info.vcc)
+ if (info->vcc)
return 0;
if (of_property_read_bool(np, "mediatek,ufs-vcc-by-num")) {
@@ -1075,6 +1086,80 @@ static void ufs_mtk_vreg_fix_vccqx(struct ufs_hba *hba)
}
}
+static void ufs_mtk_setup_clk_gating(struct ufs_hba *hba)
+{
+ unsigned long flags;
+ u32 ah_ms = 10;
+ u32 ah_scale, ah_timer;
+ u32 scale_us[] = {1, 10, 100, 1000, 10000, 100000};
+
+ if (ufshcd_is_clkgating_allowed(hba)) {
+ if (ufshcd_is_auto_hibern8_supported(hba) && hba->ahit) {
+ ah_scale = FIELD_GET(UFSHCI_AHIBERN8_SCALE_MASK,
+ hba->ahit);
+ ah_timer = FIELD_GET(UFSHCI_AHIBERN8_TIMER_MASK,
+ hba->ahit);
+ if (ah_scale <= 5)
+ ah_ms = ah_timer * scale_us[ah_scale] / 1000;
+ }
+
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ hba->clk_gating.delay_ms = max(ah_ms, 10U);
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+ }
+}
+
+/* Convert microseconds to Auto-Hibernate Idle Timer register value */
+static u32 ufs_mtk_us_to_ahit(unsigned int timer)
+{
+ unsigned int scale;
+
+ for (scale = 0; timer > UFSHCI_AHIBERN8_TIMER_MASK; ++scale)
+ timer /= UFSHCI_AHIBERN8_SCALE_FACTOR;
+
+ return FIELD_PREP(UFSHCI_AHIBERN8_TIMER_MASK, timer) |
+ FIELD_PREP(UFSHCI_AHIBERN8_SCALE_MASK, scale);
+}
+
+static void ufs_mtk_fix_ahit(struct ufs_hba *hba)
+{
+ unsigned int us;
+
+ if (ufshcd_is_auto_hibern8_supported(hba)) {
+ switch (hba->dev_info.wmanufacturerid) {
+ case UFS_VENDOR_SAMSUNG:
+ /* configure auto-hibern8 timer to 3.5 ms */
+ us = 3500;
+ break;
+
+ case UFS_VENDOR_MICRON:
+ /* configure auto-hibern8 timer to 2 ms */
+ us = 2000;
+ break;
+
+ default:
+ /* configure auto-hibern8 timer to 1 ms */
+ us = 1000;
+ break;
+ }
+
+ hba->ahit = ufs_mtk_us_to_ahit(us);
+ }
+
+ ufs_mtk_setup_clk_gating(hba);
+}
+
+static void ufs_mtk_fix_clock_scaling(struct ufs_hba *hba)
+{
+ /* UFS version is below 4.0, clock scaling is not necessary */
+ if ((hba->dev_info.wspecversion < 0x0400) &&
+ ufs_mtk_is_clk_scale_ready(hba)) {
+ hba->caps &= ~UFSHCD_CAP_CLK_SCALING;
+
+ _ufs_mtk_clk_scale(hba, false);
+ }
+}
+
static void ufs_mtk_init_mcq_irq(struct ufs_hba *hba)
{
struct ufs_mtk_host *host = ufshcd_get_variant(hba);
@@ -1240,6 +1325,10 @@ static bool ufs_mtk_pmc_via_fastauto(struct ufs_hba *hba,
dev_req_params->gear_rx < UFS_HS_G4)
return false;
+ if (dev_req_params->pwr_tx == SLOW_MODE ||
+ dev_req_params->pwr_rx == SLOW_MODE)
+ return false;
+
return true;
}
@@ -1255,6 +1344,10 @@ static int ufs_mtk_pre_pwr_change(struct ufs_hba *hba,
host_params.hs_rx_gear = UFS_HS_G5;
host_params.hs_tx_gear = UFS_HS_G5;
+ if (dev_max_params->pwr_rx == SLOW_MODE ||
+ dev_max_params->pwr_tx == SLOW_MODE)
+ host_params.desired_working_mode = UFS_PWM_MODE;
+
ret = ufshcd_negotiate_pwr_params(&host_params, dev_max_params, dev_req_params);
if (ret) {
pr_info("%s: failed to determine capabilities\n",
@@ -1278,6 +1371,28 @@ static int ufs_mtk_pre_pwr_change(struct ufs_hba *hba,
ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TXHSADAPTTYPE),
PA_NO_ADAPT);
+ if (!(hba->quirks & UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING)) {
+ ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0),
+ DL_FC0ProtectionTimeOutVal_Default);
+ ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1),
+ DL_TC0ReplayTimeOutVal_Default);
+ ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2),
+ DL_AFC0ReqTimeOutVal_Default);
+ ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA3),
+ DL_FC1ProtectionTimeOutVal_Default);
+ ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA4),
+ DL_TC1ReplayTimeOutVal_Default);
+ ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA5),
+ DL_AFC1ReqTimeOutVal_Default);
+
+ ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalFC0ProtectionTimeOutVal),
+ DL_FC0ProtectionTimeOutVal_Default);
+ ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalTC0ReplayTimeOutVal),
+ DL_TC0ReplayTimeOutVal_Default);
+ ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalAFC0ReqTimeOutVal),
+ DL_AFC0ReqTimeOutVal_Default);
+ }
+
ret = ufshcd_uic_change_pwr_mode(hba,
FASTAUTO_MODE << 4 | FASTAUTO_MODE);
@@ -1287,10 +1402,59 @@ static int ufs_mtk_pre_pwr_change(struct ufs_hba *hba,
}
}
- if (host->hw_ver.major >= 3) {
+ /* if already configured to the requested pwr_mode, skip adapt */
+ if (dev_req_params->gear_rx == hba->pwr_info.gear_rx &&
+ dev_req_params->gear_tx == hba->pwr_info.gear_tx &&
+ dev_req_params->lane_rx == hba->pwr_info.lane_rx &&
+ dev_req_params->lane_tx == hba->pwr_info.lane_tx &&
+ dev_req_params->pwr_rx == hba->pwr_info.pwr_rx &&
+ dev_req_params->pwr_tx == hba->pwr_info.pwr_tx &&
+ dev_req_params->hs_rate == hba->pwr_info.hs_rate) {
+ return ret;
+ }
+
+ if (dev_req_params->pwr_rx == FAST_MODE ||
+ dev_req_params->pwr_rx == FASTAUTO_MODE) {
+ if (host->hw_ver.major >= 3) {
+ ret = ufshcd_dme_configure_adapt(hba,
+ dev_req_params->gear_tx,
+ PA_INITIAL_ADAPT);
+ } else {
+ ret = ufshcd_dme_configure_adapt(hba,
+ dev_req_params->gear_tx,
+ PA_NO_ADAPT);
+ }
+ } else {
ret = ufshcd_dme_configure_adapt(hba,
- dev_req_params->gear_tx,
- PA_INITIAL_ADAPT);
+ dev_req_params->gear_tx,
+ PA_NO_ADAPT);
+ }
+
+ return ret;
+}
+
+static int ufs_mtk_auto_hibern8_disable(struct ufs_hba *hba)
+{
+ int ret;
+
+ /* disable auto-hibern8 */
+ ufshcd_writel(hba, 0, REG_AUTO_HIBERNATE_IDLE_TIMER);
+
+ /* wait host return to idle state when auto-hibern8 off */
+ ret = ufs_mtk_wait_idle_state(hba, 5);
+ if (ret)
+ goto out;
+
+ ret = ufs_mtk_wait_link_state(hba, VS_LINK_UP, 100);
+
+out:
+ if (ret) {
+ dev_warn(hba->dev, "exit h8 state fail, ret=%d\n", ret);
+
+ ufshcd_force_error_recovery(hba);
+
+ /* trigger error handler and break suspend */
+ ret = -EBUSY;
}
return ret;
@@ -1302,13 +1466,20 @@ static int ufs_mtk_pwr_change_notify(struct ufs_hba *hba,
struct ufs_pa_layer_attr *dev_req_params)
{
int ret = 0;
+ static u32 reg;
switch (stage) {
case PRE_CHANGE:
+ if (ufshcd_is_auto_hibern8_supported(hba)) {
+ reg = ufshcd_readl(hba, REG_AUTO_HIBERNATE_IDLE_TIMER);
+ ufs_mtk_auto_hibern8_disable(hba);
+ }
ret = ufs_mtk_pre_pwr_change(hba, dev_max_params,
dev_req_params);
break;
case POST_CHANGE:
+ if (ufshcd_is_auto_hibern8_supported(hba))
+ ufshcd_writel(hba, reg, REG_AUTO_HIBERNATE_IDLE_TIMER);
break;
default:
ret = -EINVAL;
@@ -1342,6 +1513,7 @@ static int ufs_mtk_pre_link(struct ufs_hba *hba)
{
int ret;
u32 tmp;
+ struct ufs_mtk_host *host = ufshcd_get_variant(hba);
ufs_mtk_get_controller_version(hba);
@@ -1367,34 +1539,33 @@ static int ufs_mtk_pre_link(struct ufs_hba *hba)
ret = ufshcd_dme_set(hba, UIC_ARG_MIB(VS_SAVEPOWERCONTROL), tmp);
+ /* Enable the 1144 functions setting */
+ if (host->ip_ver == IP_VER_MT6989) {
+ ret = ufshcd_dme_get(hba, UIC_ARG_MIB(VS_DEBUGOMC), &tmp);
+ if (ret)
+ return ret;
+
+ tmp |= 0x10;
+ ret = ufshcd_dme_set(hba, UIC_ARG_MIB(VS_DEBUGOMC), tmp);
+ }
+
return ret;
}
-static void ufs_mtk_setup_clk_gating(struct ufs_hba *hba)
+static void ufs_mtk_post_link(struct ufs_hba *hba)
{
- u32 ah_ms;
+ struct ufs_mtk_host *host = ufshcd_get_variant(hba);
+ u32 tmp;
- if (ufshcd_is_clkgating_allowed(hba)) {
- if (ufshcd_is_auto_hibern8_supported(hba) && hba->ahit)
- ah_ms = FIELD_GET(UFSHCI_AHIBERN8_TIMER_MASK,
- hba->ahit);
- else
- ah_ms = 10;
- ufshcd_clkgate_delay_set(hba->dev, ah_ms + 5);
+ /* fix device PA_INIT no adapt */
+ if (host->ip_ver >= IP_VER_MT6899) {
+ ufshcd_dme_get(hba, UIC_ARG_MIB(VS_DEBUGOMC), &tmp);
+ tmp |= 0x100;
+ ufshcd_dme_set(hba, UIC_ARG_MIB(VS_DEBUGOMC), tmp);
}
-}
-static void ufs_mtk_post_link(struct ufs_hba *hba)
-{
/* enable unipro clock gating feature */
ufs_mtk_cfg_unipro_cg(hba, true);
-
- /* will be configured during probe hba */
- if (ufshcd_is_auto_hibern8_supported(hba))
- hba->ahit = FIELD_PREP(UFSHCI_AHIBERN8_TIMER_MASK, 10) |
- FIELD_PREP(UFSHCI_AHIBERN8_SCALE_MASK, 3);
-
- ufs_mtk_setup_clk_gating(hba);
}
static int ufs_mtk_link_startup_notify(struct ufs_hba *hba,
@@ -1421,11 +1592,11 @@ static int ufs_mtk_device_reset(struct ufs_hba *hba)
{
struct arm_smccc_res res;
- /* disable hba before device reset */
- ufshcd_hba_stop(hba);
-
ufs_mtk_device_reset_ctrl(0, res);
+ /* disable hba in middle of device reset */
+ ufshcd_hba_stop(hba);
+
/*
* The reset signal is active low. UFS devices shall detect
* more than or equal to 1us of positive or negative RST_n
@@ -1462,7 +1633,11 @@ static int ufs_mtk_link_set_hpm(struct ufs_hba *hba)
return err;
/* Check link state to make sure exit h8 success */
- ufs_mtk_wait_idle_state(hba, 5);
+ err = ufs_mtk_wait_idle_state(hba, 5);
+ if (err) {
+ dev_warn(hba->dev, "wait idle fail, err=%d\n", err);
+ return err;
+ }
err = ufs_mtk_wait_link_state(hba, VS_LINK_UP, 100);
if (err) {
dev_warn(hba->dev, "exit h8 state fail, err=%d\n", err);
@@ -1507,6 +1682,9 @@ static void ufs_mtk_vccqx_set_lpm(struct ufs_hba *hba, bool lpm)
{
struct ufs_vreg *vccqx = NULL;
+ if (!hba->vreg_info.vccq && !hba->vreg_info.vccq2)
+ return;
+
if (hba->vreg_info.vccq)
vccqx = hba->vreg_info.vccq;
else
@@ -1561,21 +1739,6 @@ static void ufs_mtk_dev_vreg_set_lpm(struct ufs_hba *hba, bool lpm)
}
}
-static void ufs_mtk_auto_hibern8_disable(struct ufs_hba *hba)
-{
- int ret;
-
- /* disable auto-hibern8 */
- ufshcd_writel(hba, 0, REG_AUTO_HIBERNATE_IDLE_TIMER);
-
- /* wait host return to idle state when auto-hibern8 off */
- ufs_mtk_wait_idle_state(hba, 5);
-
- ret = ufs_mtk_wait_link_state(hba, VS_LINK_UP, 100);
- if (ret)
- dev_warn(hba->dev, "exit h8 state fail, ret=%d\n", ret);
-}
-
static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op,
enum ufs_notify_change_status status)
{
@@ -1584,7 +1747,7 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op,
if (status == PRE_CHANGE) {
if (ufshcd_is_auto_hibern8_supported(hba))
- ufs_mtk_auto_hibern8_disable(hba);
+ return ufs_mtk_auto_hibern8_disable(hba);
return 0;
}
@@ -1642,8 +1805,21 @@ static int ufs_mtk_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
}
return 0;
+
fail:
- return ufshcd_link_recovery(hba);
+ /*
+ * Check if the platform (parent) device has resumed, and ensure that
+ * power, clock, and MTCMOS are all turned on.
+ */
+ err = ufshcd_link_recovery(hba);
+ if (err) {
+ dev_err(hba->dev, "Device PM: req=%d, status:%d, err:%d\n",
+ hba->dev->power.request,
+ hba->dev->power.runtime_status,
+ hba->dev->power.runtime_error);
+ }
+
+ return 0; /* Cannot return a failure, otherwise, the I/O will hang. */
}
static void ufs_mtk_dbg_register_dump(struct ufs_hba *hba)
@@ -1726,6 +1902,8 @@ static void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba)
ufs_mtk_vreg_fix_vcc(hba);
ufs_mtk_vreg_fix_vccqx(hba);
+ ufs_mtk_fix_ahit(hba);
+ ufs_mtk_fix_clock_scaling(hba);
}
static void ufs_mtk_event_notify(struct ufs_hba *hba,
@@ -2012,6 +2190,7 @@ static int ufs_mtk_config_mcq_irq(struct ufs_hba *hba)
return ret;
}
}
+ host->is_mcq_intr_enabled = true;
return 0;
}
@@ -2095,10 +2274,12 @@ static const struct ufs_hba_variant_ops ufs_hba_mtk_vops = {
static int ufs_mtk_probe(struct platform_device *pdev)
{
int err;
- struct device *dev = &pdev->dev;
- struct device_node *reset_node;
- struct platform_device *reset_pdev;
+ struct device *dev = &pdev->dev, *phy_dev = NULL;
+ struct device_node *reset_node, *phy_node = NULL;
+ struct platform_device *reset_pdev, *phy_pdev = NULL;
struct device_link *link;
+ struct ufs_hba *hba;
+ struct ufs_mtk_host *host;
reset_node = of_find_compatible_node(NULL, NULL,
"ti,syscon-reset");
@@ -2125,13 +2306,51 @@ static int ufs_mtk_probe(struct platform_device *pdev)
}
skip_reset:
+ /* find phy node */
+ phy_node = of_parse_phandle(dev->of_node, "phys", 0);
+
+ if (phy_node) {
+ phy_pdev = of_find_device_by_node(phy_node);
+ if (!phy_pdev)
+ goto skip_phy;
+ phy_dev = &phy_pdev->dev;
+
+ pm_runtime_set_active(phy_dev);
+ pm_runtime_enable(phy_dev);
+ pm_runtime_get_sync(phy_dev);
+
+ put_device(phy_dev);
+ dev_info(dev, "phys node found\n");
+ } else {
+ dev_notice(dev, "phys node not found\n");
+ }
+
+skip_phy:
/* perform generic probe */
err = ufshcd_pltfrm_init(pdev, &ufs_hba_mtk_vops);
-
-out:
- if (err)
+ if (err) {
dev_err(dev, "probe failed %d\n", err);
+ goto out;
+ }
+
+ hba = platform_get_drvdata(pdev);
+ if (!hba)
+ goto out;
+
+ if (phy_node && phy_dev) {
+ host = ufshcd_get_variant(hba);
+ host->phy_dev = phy_dev;
+ }
+
+ /*
+ * Because the default power setting of VSx (the upper layer of
+ * VCCQ/VCCQ2) is HWLP, we need to prevent VCCQ/VCCQ2 from
+ * entering LPM.
+ */
+ ufs_mtk_dev_vreg_set_lpm(hba, false);
+out:
+ of_node_put(phy_node);
of_node_put(reset_node);
return err;
}
@@ -2156,27 +2375,38 @@ static int ufs_mtk_system_suspend(struct device *dev)
ret = ufshcd_system_suspend(dev);
if (ret)
- return ret;
+ goto out;
+
+ if (pm_runtime_suspended(hba->dev))
+ goto out;
ufs_mtk_dev_vreg_set_lpm(hba, true);
if (ufs_mtk_is_rtff_mtcmos(hba))
ufs_mtk_mtcmos_ctrl(false, res);
- return 0;
+out:
+ return ret;
}
static int ufs_mtk_system_resume(struct device *dev)
{
+ int ret = 0;
struct ufs_hba *hba = dev_get_drvdata(dev);
struct arm_smccc_res res;
- ufs_mtk_dev_vreg_set_lpm(hba, false);
+ if (pm_runtime_suspended(hba->dev))
+ goto out;
if (ufs_mtk_is_rtff_mtcmos(hba))
ufs_mtk_mtcmos_ctrl(true, res);
- return ufshcd_system_resume(dev);
+ ufs_mtk_dev_vreg_set_lpm(hba, false);
+
+out:
+ ret = ufshcd_system_resume(dev);
+
+ return ret;
}
#endif
@@ -2184,6 +2414,7 @@ static int ufs_mtk_system_resume(struct device *dev)
static int ufs_mtk_runtime_suspend(struct device *dev)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
+ struct ufs_mtk_host *host = ufshcd_get_variant(hba);
struct arm_smccc_res res;
int ret = 0;
@@ -2196,17 +2427,24 @@ static int ufs_mtk_runtime_suspend(struct device *dev)
if (ufs_mtk_is_rtff_mtcmos(hba))
ufs_mtk_mtcmos_ctrl(false, res);
+ if (host->phy_dev)
+ pm_runtime_put_sync(host->phy_dev);
+
return 0;
}
static int ufs_mtk_runtime_resume(struct device *dev)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
+ struct ufs_mtk_host *host = ufshcd_get_variant(hba);
struct arm_smccc_res res;
if (ufs_mtk_is_rtff_mtcmos(hba))
ufs_mtk_mtcmos_ctrl(true, res);
+ if (host->phy_dev)
+ pm_runtime_get_sync(host->phy_dev);
+
ufs_mtk_dev_vreg_set_lpm(hba, false);
return ufshcd_runtime_resume(dev);
diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h
index e46dc5fa209d..dfbf78bd8664 100644
--- a/drivers/ufs/host/ufs-mediatek.h
+++ b/drivers/ufs/host/ufs-mediatek.h
@@ -193,6 +193,7 @@ struct ufs_mtk_host {
bool is_mcq_intr_enabled;
int mcq_nr_intr;
struct ufs_mtk_mcq_intr_info mcq_intr_info[UFSHCD_MAX_Q_NR];
+ struct device *phy_dev;
};
/* MTK delay of autosuspend: 500 ms */
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 9574fdc2bb0f..3e83dc51d538 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -38,6 +38,9 @@
#define DEEMPHASIS_3_5_dB 0x04
#define NO_DEEMPHASIS 0x0
+#define UFS_ICE_SYNC_RST_SEL BIT(3)
+#define UFS_ICE_SYNC_RST_SW BIT(4)
+
enum {
TSTBUS_UAWM,
TSTBUS_UARM,
@@ -494,12 +497,8 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba)
* If the HS-G5 PHY gear is used, update host_params->hs_rate to Rate-A,
* so that the subsequent power mode change shall stick to Rate-A.
*/
- if (host->hw_ver.major == 0x5) {
- if (host->phy_gear == UFS_HS_G5)
- host_params->hs_rate = PA_HS_MODE_A;
- else
- host_params->hs_rate = PA_HS_MODE_B;
- }
+ if (host->hw_ver.major == 0x5 && host->phy_gear == UFS_HS_G5)
+ host_params->hs_rate = PA_HS_MODE_A;
mode = host_params->hs_rate == PA_HS_MODE_B ? PHY_MODE_UFS_HS_B : PHY_MODE_UFS_HS_A;
@@ -751,11 +750,29 @@ static int ufs_qcom_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
{
struct ufs_qcom_host *host = ufshcd_get_variant(hba);
int err;
+ u32 reg_val;
err = ufs_qcom_enable_lane_clks(host);
if (err)
return err;
+ if ((!ufs_qcom_is_link_active(hba)) &&
+ host->hw_ver.major == 5 &&
+ host->hw_ver.minor == 0 &&
+ host->hw_ver.step == 0) {
+ ufshcd_writel(hba, UFS_ICE_SYNC_RST_SEL | UFS_ICE_SYNC_RST_SW, UFS_MEM_ICE_CFG);
+ reg_val = ufshcd_readl(hba, UFS_MEM_ICE_CFG);
+ reg_val &= ~(UFS_ICE_SYNC_RST_SEL | UFS_ICE_SYNC_RST_SW);
+ /*
+ * HW documentation doesn't recommend any delay between the
+ * reset set and clear. But we are enforcing an arbitrary delay
+ * to give flops enough time to settle in.
+ */
+ usleep_range(50, 100);
+ ufshcd_writel(hba, reg_val, UFS_MEM_ICE_CFG);
+ ufshcd_readl(hba, UFS_MEM_ICE_CFG);
+ }
+
return ufs_qcom_ice_resume(host);
}
@@ -1096,6 +1113,18 @@ static void ufs_qcom_set_phy_gear(struct ufs_qcom_host *host)
}
}
+static void ufs_qcom_parse_gear_limits(struct ufs_hba *hba)
+{
+ struct ufs_qcom_host *host = ufshcd_get_variant(hba);
+ struct ufs_host_params *host_params = &host->host_params;
+ u32 hs_gear_old = host_params->hs_tx_gear;
+
+ ufshcd_parse_gear_limits(hba, host_params);
+ if (host_params->hs_tx_gear != hs_gear_old) {
+ host->phy_gear = host_params->hs_tx_gear;
+ }
+}
+
static void ufs_qcom_set_host_params(struct ufs_hba *hba)
{
struct ufs_qcom_host *host = ufshcd_get_variant(hba);
@@ -1162,6 +1191,13 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on,
case PRE_CHANGE:
if (on) {
ufs_qcom_icc_update_bw(host);
+ if (ufs_qcom_is_link_hibern8(hba)) {
+ err = ufs_qcom_enable_lane_clks(host);
+ if (err) {
+ dev_err(hba->dev, "enable lane clks failed, ret=%d\n", err);
+ return err;
+ }
+ }
} else {
if (!ufs_qcom_is_link_active(hba)) {
/* disable device ref_clk */
@@ -1187,6 +1223,9 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on,
if (ufshcd_is_hs_mode(&hba->pwr_info))
ufs_qcom_dev_ref_clk_ctrl(host, true);
} else {
+ if (ufs_qcom_is_link_hibern8(hba))
+ ufs_qcom_disable_lane_clks(host);
+
ufs_qcom_icc_set_bw(host, ufs_qcom_bw_table[MODE_MIN][0][0].mem_bw,
ufs_qcom_bw_table[MODE_MIN][0][0].cfg_bw);
}
@@ -1337,6 +1376,7 @@ static int ufs_qcom_init(struct ufs_hba *hba)
ufs_qcom_advertise_quirks(hba);
ufs_qcom_set_host_params(hba);
ufs_qcom_set_phy_gear(host);
+ ufs_qcom_parse_gear_limits(hba);
err = ufs_qcom_ice_init(host);
if (err)
@@ -1742,7 +1782,7 @@ static void ufs_qcom_dump_testbus(struct ufs_hba *hba)
}
static int ufs_qcom_dump_regs(struct ufs_hba *hba, size_t offset, size_t len,
- const char *prefix, enum ufshcd_res id)
+ const char *prefix, void __iomem *base)
{
u32 *regs __free(kfree) = NULL;
size_t pos;
@@ -1755,7 +1795,7 @@ static int ufs_qcom_dump_regs(struct ufs_hba *hba, size_t offset, size_t len,
return -ENOMEM;
for (pos = 0; pos < len; pos += 4)
- regs[pos / 4] = readl(hba->res[id].base + offset + pos);
+ regs[pos / 4] = readl(base + offset + pos);
print_hex_dump(KERN_ERR, prefix,
len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE,
@@ -1766,30 +1806,34 @@ static int ufs_qcom_dump_regs(struct ufs_hba *hba, size_t offset, size_t len,
static void ufs_qcom_dump_mcq_hci_regs(struct ufs_hba *hba)
{
+ struct ufshcd_mcq_opr_info_t *opr = &hba->mcq_opr[0];
+ void __iomem *mcq_vs_base = hba->mcq_base + UFS_MEM_VS_BASE;
+
struct dump_info {
+ void __iomem *base;
size_t offset;
size_t len;
const char *prefix;
- enum ufshcd_res id;
};
struct dump_info mcq_dumps[] = {
- {0x0, 256 * 4, "MCQ HCI-0 ", RES_MCQ},
- {0x400, 256 * 4, "MCQ HCI-1 ", RES_MCQ},
- {0x0, 5 * 4, "MCQ VS-0 ", RES_MCQ_VS},
- {0x0, 256 * 4, "MCQ SQD-0 ", RES_MCQ_SQD},
- {0x400, 256 * 4, "MCQ SQD-1 ", RES_MCQ_SQD},
- {0x800, 256 * 4, "MCQ SQD-2 ", RES_MCQ_SQD},
- {0xc00, 256 * 4, "MCQ SQD-3 ", RES_MCQ_SQD},
- {0x1000, 256 * 4, "MCQ SQD-4 ", RES_MCQ_SQD},
- {0x1400, 256 * 4, "MCQ SQD-5 ", RES_MCQ_SQD},
- {0x1800, 256 * 4, "MCQ SQD-6 ", RES_MCQ_SQD},
- {0x1c00, 256 * 4, "MCQ SQD-7 ", RES_MCQ_SQD},
+ {hba->mcq_base, 0x0, 256 * 4, "MCQ HCI-0 "},
+ {hba->mcq_base, 0x400, 256 * 4, "MCQ HCI-1 "},
+ {mcq_vs_base, 0x0, 5 * 4, "MCQ VS-0 "},
+ {opr->base, 0x0, 256 * 4, "MCQ SQD-0 "},
+ {opr->base, 0x400, 256 * 4, "MCQ SQD-1 "},
+ {opr->base, 0x800, 256 * 4, "MCQ SQD-2 "},
+ {opr->base, 0xc00, 256 * 4, "MCQ SQD-3 "},
+ {opr->base, 0x1000, 256 * 4, "MCQ SQD-4 "},
+ {opr->base, 0x1400, 256 * 4, "MCQ SQD-5 "},
+ {opr->base, 0x1800, 256 * 4, "MCQ SQD-6 "},
+ {opr->base, 0x1c00, 256 * 4, "MCQ SQD-7 "},
+
};
for (int i = 0; i < ARRAY_SIZE(mcq_dumps); i++) {
ufs_qcom_dump_regs(hba, mcq_dumps[i].offset, mcq_dumps[i].len,
- mcq_dumps[i].prefix, mcq_dumps[i].id);
+ mcq_dumps[i].prefix, mcq_dumps[i].base);
cond_resched();
}
}
@@ -1910,116 +1954,68 @@ static void ufs_qcom_config_scaling_param(struct ufs_hba *hba,
hba->clk_scaling.suspend_on_no_request = true;
}
-/* Resources */
-static const struct ufshcd_res_info ufs_res_info[RES_MAX] = {
- {.name = "ufs_mem",},
- {.name = "mcq",},
- /* Submission Queue DAO */
- {.name = "mcq_sqd",},
- /* Submission Queue Interrupt Status */
- {.name = "mcq_sqis",},
- /* Completion Queue DAO */
- {.name = "mcq_cqd",},
- /* Completion Queue Interrupt Status */
- {.name = "mcq_cqis",},
- /* MCQ vendor specific */
- {.name = "mcq_vs",},
-};
-
static int ufs_qcom_mcq_config_resource(struct ufs_hba *hba)
{
struct platform_device *pdev = to_platform_device(hba->dev);
- struct ufshcd_res_info *res;
- struct resource *res_mem, *res_mcq;
- int i, ret;
-
- memcpy(hba->res, ufs_res_info, sizeof(ufs_res_info));
-
- for (i = 0; i < RES_MAX; i++) {
- res = &hba->res[i];
- res->resource = platform_get_resource_byname(pdev,
- IORESOURCE_MEM,
- res->name);
- if (!res->resource) {
- dev_info(hba->dev, "Resource %s not provided\n", res->name);
- if (i == RES_UFS)
- return -ENODEV;
- continue;
- } else if (i == RES_UFS) {
- res_mem = res->resource;
- res->base = hba->mmio_base;
- continue;
- }
+ struct resource *res;
- res->base = devm_ioremap_resource(hba->dev, res->resource);
- if (IS_ERR(res->base)) {
- dev_err(hba->dev, "Failed to map res %s, err=%d\n",
- res->name, (int)PTR_ERR(res->base));
- ret = PTR_ERR(res->base);
- res->base = NULL;
- return ret;
- }
+ /* Map the MCQ configuration region */
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mcq");
+ if (!res) {
+ dev_err(hba->dev, "MCQ resource not found in device tree\n");
+ return -ENODEV;
}
- /* MCQ resource provided in DT */
- res = &hba->res[RES_MCQ];
- /* Bail if MCQ resource is provided */
- if (res->base)
- goto out;
-
- /* Explicitly allocate MCQ resource from ufs_mem */
- res_mcq = devm_kzalloc(hba->dev, sizeof(*res_mcq), GFP_KERNEL);
- if (!res_mcq)
- return -ENOMEM;
-
- res_mcq->start = res_mem->start +
- MCQ_SQATTR_OFFSET(hba->mcq_capabilities);
- res_mcq->end = res_mcq->start + hba->nr_hw_queues * MCQ_QCFG_SIZE - 1;
- res_mcq->flags = res_mem->flags;
- res_mcq->name = "mcq";
-
- ret = insert_resource(&iomem_resource, res_mcq);
- if (ret) {
- dev_err(hba->dev, "Failed to insert MCQ resource, err=%d\n",
- ret);
- return ret;
- }
-
- res->base = devm_ioremap_resource(hba->dev, res_mcq);
- if (IS_ERR(res->base)) {
- dev_err(hba->dev, "MCQ registers mapping failed, err=%d\n",
- (int)PTR_ERR(res->base));
- ret = PTR_ERR(res->base);
- goto ioremap_err;
+ hba->mcq_base = devm_ioremap_resource(hba->dev, res);
+ if (IS_ERR(hba->mcq_base)) {
+ dev_err(hba->dev, "Failed to map MCQ region: %ld\n",
+ PTR_ERR(hba->mcq_base));
+ return PTR_ERR(hba->mcq_base);
}
-out:
- hba->mcq_base = res->base;
return 0;
-ioremap_err:
- res->base = NULL;
- remove_resource(res_mcq);
- return ret;
}
static int ufs_qcom_op_runtime_config(struct ufs_hba *hba)
{
- struct ufshcd_res_info *mem_res, *sqdao_res;
struct ufshcd_mcq_opr_info_t *opr;
int i;
+ u32 doorbell_offsets[OPR_MAX];
- mem_res = &hba->res[RES_UFS];
- sqdao_res = &hba->res[RES_MCQ_SQD];
+ /*
+ * Configure doorbell address offsets in MCQ configuration registers.
+ * These values are offsets relative to mmio_base (UFS_HCI_BASE).
+ *
+ * Memory Layout:
+ * - mmio_base = UFS_HCI_BASE
+ * - mcq_base = MCQ_CONFIG_BASE = mmio_base + (UFS_QCOM_MCQCAP_QCFGPTR * 0x200)
+ * - Doorbell registers are at: mmio_base + (UFS_QCOM_MCQCAP_QCFGPTR * 0x200) +
+ * - UFS_QCOM_MCQ_SQD_OFFSET
+ * - Which is also: mcq_base + UFS_QCOM_MCQ_SQD_OFFSET
+ */
- if (!mem_res->base || !sqdao_res->base)
- return -EINVAL;
+ doorbell_offsets[OPR_SQD] = UFS_QCOM_SQD_ADDR_OFFSET;
+ doorbell_offsets[OPR_SQIS] = UFS_QCOM_SQIS_ADDR_OFFSET;
+ doorbell_offsets[OPR_CQD] = UFS_QCOM_CQD_ADDR_OFFSET;
+ doorbell_offsets[OPR_CQIS] = UFS_QCOM_CQIS_ADDR_OFFSET;
+ /*
+ * Configure MCQ operation registers.
+ *
+ * The doorbell registers are physically located within the MCQ region:
+ * - doorbell_physical_addr = mmio_base + doorbell_offset
+ * - doorbell_physical_addr = mcq_base + (doorbell_offset - MCQ_CONFIG_OFFSET)
+ */
for (i = 0; i < OPR_MAX; i++) {
opr = &hba->mcq_opr[i];
- opr->offset = sqdao_res->resource->start -
- mem_res->resource->start + 0x40 * i;
- opr->stride = 0x100;
- opr->base = sqdao_res->base + 0x40 * i;
+ opr->offset = doorbell_offsets[i]; /* Offset relative to mmio_base */
+ opr->stride = UFS_QCOM_MCQ_STRIDE; /* 256 bytes between queues */
+
+ /*
+ * Calculate the actual doorbell base address within MCQ region:
+ * base = mcq_base + (doorbell_offset - MCQ_CONFIG_OFFSET)
+ */
+ opr->base = hba->mcq_base + (opr->offset - UFS_QCOM_MCQ_CONFIG_OFFSET);
}
return 0;
@@ -2034,12 +2030,8 @@ static int ufs_qcom_get_hba_mac(struct ufs_hba *hba)
static int ufs_qcom_get_outstanding_cqs(struct ufs_hba *hba,
unsigned long *ocqs)
{
- struct ufshcd_res_info *mcq_vs_res = &hba->res[RES_MCQ_VS];
-
- if (!mcq_vs_res->base)
- return -EINVAL;
-
- *ocqs = readl(mcq_vs_res->base + UFS_MEM_CQIS_VS);
+ /* Read from MCQ vendor-specific register in MCQ region */
+ *ocqs = readl(hba->mcq_base + UFS_MEM_CQIS_VS);
return 0;
}
diff --git a/drivers/ufs/host/ufs-qcom.h b/drivers/ufs/host/ufs-qcom.h
index e0e129af7c16..380d02333d38 100644
--- a/drivers/ufs/host/ufs-qcom.h
+++ b/drivers/ufs/host/ufs-qcom.h
@@ -33,6 +33,28 @@
#define DL_VS_CLK_CFG_MASK GENMASK(9, 0)
#define DME_VS_CORE_CLK_CTRL_DME_HW_CGC_EN BIT(9)
+/* Qualcomm MCQ Configuration */
+#define UFS_QCOM_MCQCAP_QCFGPTR 224 /* 0xE0 in hex */
+#define UFS_QCOM_MCQ_CONFIG_OFFSET (UFS_QCOM_MCQCAP_QCFGPTR * 0x200) /* 0x1C000 */
+
+/* Doorbell offsets within MCQ region (relative to MCQ_CONFIG_BASE) */
+#define UFS_QCOM_MCQ_SQD_OFFSET 0x5000
+#define UFS_QCOM_MCQ_CQD_OFFSET 0x5080
+#define UFS_QCOM_MCQ_SQIS_OFFSET 0x5040
+#define UFS_QCOM_MCQ_CQIS_OFFSET 0x50C0
+#define UFS_QCOM_MCQ_STRIDE 0x100
+
+/* Calculated doorbell address offsets (relative to mmio_base) */
+#define UFS_QCOM_SQD_ADDR_OFFSET (UFS_QCOM_MCQ_CONFIG_OFFSET + UFS_QCOM_MCQ_SQD_OFFSET)
+#define UFS_QCOM_CQD_ADDR_OFFSET (UFS_QCOM_MCQ_CONFIG_OFFSET + UFS_QCOM_MCQ_CQD_OFFSET)
+#define UFS_QCOM_SQIS_ADDR_OFFSET (UFS_QCOM_MCQ_CONFIG_OFFSET + UFS_QCOM_MCQ_SQIS_OFFSET)
+#define UFS_QCOM_CQIS_ADDR_OFFSET (UFS_QCOM_MCQ_CONFIG_OFFSET + UFS_QCOM_MCQ_CQIS_OFFSET)
+#define REG_UFS_MCQ_STRIDE UFS_QCOM_MCQ_STRIDE
+
+/* MCQ Vendor specific address offsets (relative to MCQ_CONFIG_BASE) */
+#define UFS_MEM_VS_BASE 0x4000
+#define UFS_MEM_CQIS_VS 0x4008
+
/* QCOM UFS host controller vendor specific registers */
enum {
REG_UFS_SYS1CLK_1US = 0xC0,
@@ -60,7 +82,7 @@ enum {
UFS_AH8_CFG = 0xFC,
UFS_RD_REG_MCQ = 0xD00,
-
+ UFS_MEM_ICE_CFG = 0x2600,
REG_UFS_MEM_ICE_CONFIG = 0x260C,
REG_UFS_MEM_ICE_NUM_CORE = 0x2664,
@@ -95,10 +117,6 @@ enum {
REG_UFS_SW_H8_EXIT_CNT = 0x2710,
};
-enum {
- UFS_MEM_CQIS_VS = 0x8,
-};
-
#define UFS_CNTLR_2_x_x_VEN_REGS_OFFSET(x) (0x000 + x)
#define UFS_CNTLR_3_x_x_VEN_REGS_OFFSET(x) (0x400 + x)
diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
index ffe5d1d2b215..c2dafb583cf5 100644
--- a/drivers/ufs/host/ufshcd-pltfrm.c
+++ b/drivers/ufs/host/ufshcd-pltfrm.c
@@ -430,6 +430,39 @@ int ufshcd_negotiate_pwr_params(const struct ufs_host_params *host_params,
}
EXPORT_SYMBOL_GPL(ufshcd_negotiate_pwr_params);
+/**
+ * ufshcd_parse_gear_limits - Parse DT-based gear and rate limits for UFS
+ * @hba: Pointer to UFS host bus adapter instance
+ * @host_params: Pointer to UFS host parameters structure to be updated
+ *
+ * This function reads optional device tree properties to apply
+ * platform-specific constraints.
+ *
+ * "limit-hs-gear": Specifies the max HS gear.
+ * "limit-gear-rate": Specifies the max High-Speed rate.
+ */
+void ufshcd_parse_gear_limits(struct ufs_hba *hba, struct ufs_host_params *host_params)
+{
+ struct device_node *np = hba->dev->of_node;
+ u32 hs_gear;
+ const char *hs_rate;
+
+ if (!of_property_read_u32(np, "limit-hs-gear", &hs_gear)) {
+ host_params->hs_tx_gear = hs_gear;
+ host_params->hs_rx_gear = hs_gear;
+ }
+
+ if (!of_property_read_string(np, "limit-gear-rate", &hs_rate)) {
+ if (!strcmp(hs_rate, "rate-a"))
+ host_params->hs_rate = PA_HS_MODE_A;
+ else if (!strcmp(hs_rate, "rate-b"))
+ host_params->hs_rate = PA_HS_MODE_B;
+ else
+ dev_warn(hba->dev, "Invalid rate: %s\n", hs_rate);
+ }
+}
+EXPORT_SYMBOL_GPL(ufshcd_parse_gear_limits);
+
void ufshcd_init_host_params(struct ufs_host_params *host_params)
{
*host_params = (struct ufs_host_params){
diff --git a/drivers/ufs/host/ufshcd-pltfrm.h b/drivers/ufs/host/ufshcd-pltfrm.h
index 3017f8e8f93c..0a18a8aed94d 100644
--- a/drivers/ufs/host/ufshcd-pltfrm.h
+++ b/drivers/ufs/host/ufshcd-pltfrm.h
@@ -29,6 +29,7 @@ int ufshcd_negotiate_pwr_params(const struct ufs_host_params *host_params,
const struct ufs_pa_layer_attr *dev_max,
struct ufs_pa_layer_attr *agreed_pwr);
void ufshcd_init_host_params(struct ufs_host_params *host_params);
+void ufshcd_parse_gear_limits(struct ufs_hba *hba, struct ufs_host_params *host_params);
int ufshcd_pltfrm_init(struct platform_device *pdev,
const struct ufs_hba_variant_ops *vops);
void ufshcd_pltfrm_remove(struct platform_device *pdev);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index f5062061c408..c147145a6593 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -378,7 +378,7 @@ static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist
* is initialized by the hardware. Explicitly check/unpoison it
* depending on the direction.
*/
- kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
+ kmsan_handle_dma(sg_phys(sg), sg->length, direction);
*addr = (dma_addr_t)sg_phys(sg);
return 0;
}
@@ -3157,7 +3157,7 @@ dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr,
struct vring_virtqueue *vq = to_vvq(_vq);
if (!vq->use_dma_api) {
- kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir);
+ kmsan_handle_dma(virt_to_phys(ptr), size, dir);
return (dma_addr_t)virt_to_phys(ptr);
}
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index da1a7d3d377c..dd7747a2de87 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -392,6 +392,25 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
}
}
+static dma_addr_t xen_swiotlb_direct_map_resource(struct device *dev,
+ phys_addr_t paddr,
+ size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ dma_addr_t dma_addr = paddr;
+
+ if (unlikely(!dma_capable(dev, dma_addr, size, false))) {
+ dev_err_once(dev,
+ "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
+ WARN_ON_ONCE(1);
+ return DMA_MAPPING_ERROR;
+ }
+
+ return dma_addr;
+}
+
/*
* Return whether the given device DMA address mask can be supported
* properly. For example, if your device can only drive the low 24-bits
@@ -426,5 +445,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
.max_mapping_size = swiotlb_max_mapping_size,
- .map_resource = dma_direct_map_resource,
+ .map_resource = xen_swiotlb_direct_map_resource,
};