summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hns
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hns')
-rw-r--r--drivers/infiniband/hw/hns/Kconfig20
-rw-r--r--drivers/infiniband/hw/hns/Makefile10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c44
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c7
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c33
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h57
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c144
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c710
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h32
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c42
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c41
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c148
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c20
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_trace.h216
17 files changed, 1088 insertions, 452 deletions
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index ab3fbba70789..44cdb706fe27 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,21 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
-config INFINIBAND_HNS
- tristate "HNS RoCE Driver"
- depends on NET_VENDOR_HISILICON
- depends on ARM64 || (COMPILE_TEST && 64BIT)
- depends on (HNS_DSAF && HNS_ENET) || HNS3
- help
- This is a RoCE/RDMA driver for the Hisilicon RoCE engine.
-
- To compile HIP08 driver as module, choose M here.
-
config INFINIBAND_HNS_HIP08
- bool "Hisilicon Hip08 Family RoCE support"
- depends on INFINIBAND_HNS && PCI && HNS3
- depends on INFINIBAND_HNS=m || HNS3=y
+ tristate "Hisilicon Hip08 Family RoCE support"
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on PCI && HNS3
help
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
The RoCE engine is a PCI device.
- To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
- module will be called hns-roce-hw-v2.
+ To compile this driver, choose M here. This module will be called
+ hns-roce-hw-v2.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index be1e1cdbcfa8..baf592e6f21b 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -4,13 +4,11 @@
#
ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
+ccflags-y += -I $(src)
-hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
+hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \
- hns_roce_debugfs.o
+ hns_roce_debugfs.o hns_roce_hw_v2.o
-ifdef CONFIG_INFINIBAND_HNS_HIP08
-hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs)
-obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o
-endif
+obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index b4209b6aed8d..307c35888b30 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -33,7 +33,6 @@
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
-#include "hnae3.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
@@ -59,11 +58,15 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
struct hns_roce_ib_create_ah_resp resp = {};
struct hns_roce_ah *ah = to_hr_ah(ibah);
- int ret = 0;
- u32 max_sl;
-
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata)
- return -EOPNOTSUPP;
+ u8 tclass = get_tclass(grh);
+ u8 priority = 0;
+ u8 tc_mode = 0;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
ah->av.port = rdma_ah_get_port_num(ah_attr);
ah->av.gid_index = grh->sgid_index;
@@ -74,15 +77,24 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
ah->av.hop_limit = grh->hop_limit;
ah->av.flowlabel = grh->flow_label;
ah->av.udp_sport = get_ah_udp_sport(ah_attr);
- ah->av.tclass = get_tclass(grh);
-
- ah->av.sl = rdma_ah_get_sl(ah_attr);
- max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
- if (unlikely(ah->av.sl > max_sl)) {
- ibdev_err_ratelimited(&hr_dev->ib_dev,
- "failed to set sl, sl (%u) shouldn't be larger than %u.\n",
- ah->av.sl, max_sl);
- return -EINVAL;
+ ah->av.tclass = tclass;
+
+ ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+
+ if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ goto err_out;
+
+ if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ ah->av.sl = priority;
+ else
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
+
+ if (!check_sl_valid(hr_dev, ah->av.sl)) {
+ ret = -EINVAL;
+ goto err_out;
}
memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
@@ -99,6 +111,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
}
if (udata) {
+ resp.priority = ah->av.sl;
+ resp.tc_mode = tc_mode;
memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
ret = ib_copy_to_udata(udata, &resp,
min(udata->outlen, sizeof(resp)));
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 11a78ceae568..6ee911f6885b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -153,8 +153,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
return total;
}
-int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
- int buf_cnt, struct ib_umem *umem,
+int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem,
unsigned int page_shift)
{
struct ib_block_iter biter;
@@ -176,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
ida_destroy(&hr_dev->xrcd_ida.ida);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
ida_destroy(&hr_dev->srq_table.srq_ida.ida);
+ xa_destroy(&hr_dev->srq_table.xa);
+ }
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 7250d0643b5c..3a5c93c9fb3e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -149,7 +149,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
return ret;
}
- ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
+ ret = xa_err(xa_store_irq(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
if (ret) {
ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret);
goto err_put;
@@ -163,7 +163,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
return 0;
err_xa:
- xa_erase(&cq_table->array, hr_cq->cqn);
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
@@ -179,10 +179,10 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
hr_cq->cqn);
if (ret)
- dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
- hr_cq->cqn);
+ dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
+ ret, hr_cq->cqn);
- xa_erase(&cq_table->array, hr_cq->cqn);
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
/* Waiting interrupt process procedure carried out */
synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
@@ -353,9 +353,10 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
}
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct ib_udata *udata = &attrs->driver_udata;
struct hns_roce_ib_create_cq_resp resp = {};
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
struct ib_device *ibdev = &hr_dev->ib_dev;
@@ -476,13 +477,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
struct ib_event event;
struct ib_cq *ibcq;
- hr_cq = xa_load(&hr_dev->cq_table.array,
- cqn & (hr_dev->caps.num_cqs - 1));
- if (!hr_cq) {
- dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
- return;
- }
-
if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
@@ -491,7 +485,16 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
return;
}
- refcount_inc(&hr_cq->refcount);
+ xa_lock(&hr_dev->cq_table.array);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (hr_cq)
+ refcount_inc(&hr_cq->refcount);
+ xa_unlock(&hr_dev->cq_table.array);
+ if (!hr_cq) {
+ dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
+ return;
+ }
ibcq = &hr_cq->ib_cq;
if (ibcq->event_handler) {
@@ -534,4 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
ida_destroy(&hr_dev->cq_table.bank[i].ida);
+ xa_destroy(&hr_dev->cq_table.array);
+ mutex_destroy(&hr_dev->cq_table.bank_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
index e8febb40f645..b869cdc54118 100644
--- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
@@ -5,6 +5,7 @@
#include <linux/debugfs.h>
#include <linux/device.h>
+#include <linux/pci.h>
#include "hns_roce_device.h"
@@ -86,7 +87,7 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev)
{
struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs;
- dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev),
+ dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev),
hns_roce_dbgfs_root);
create_sw_stat_debugfs(hr_dev, dbgfs->root);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index c3cbd0a494bf..1dcc9cbb4678 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -83,6 +83,7 @@
#define MR_TYPE_DMA 0x03
#define HNS_ROCE_FRMR_MAX_PA 512
+#define HNS_ROCE_FRMR_ALIGN_SIZE 128
#define PKEY_ID 0xffff
#define NODE_DESC_SIZE 64
@@ -91,6 +92,8 @@
/* Configure to HW for PAGE_SIZE larger than 4KB */
#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
+#define ATOMIC_WR_LEN 8
+
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
@@ -100,6 +103,9 @@
#define CQ_BANKID_SHIFT 2
#define CQ_BANKID_MASK GENMASK(1, 0)
+#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF
+#define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF
+
enum {
SERV_TYPE_RC,
SERV_TYPE_UC,
@@ -184,6 +190,9 @@ enum {
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
+#define HNS_HW_MAX_PAGE_SHIFT 27
+#define HNS_HW_MAX_PAGE_SIZE (1 << HNS_HW_MAX_PAGE_SHIFT)
+
struct hns_roce_uar {
u64 pfn;
unsigned long index;
@@ -480,12 +489,6 @@ struct hns_roce_bank {
u32 next; /* Next ID to allocate. */
};
-struct hns_roce_idx_table {
- u32 *spare_idx;
- u32 head;
- u32 tail;
-};
-
struct hns_roce_qp_table {
struct hns_roce_hem_table qp_table;
struct hns_roce_hem_table irrl_table;
@@ -494,7 +497,7 @@ struct hns_roce_qp_table {
struct mutex scc_mutex;
struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
struct mutex bank_mutex;
- struct hns_roce_idx_table idx_table;
+ struct xarray dip_xa;
};
struct hns_roce_cq_table {
@@ -584,6 +587,7 @@ struct hns_roce_dev;
enum {
HNS_ROCE_FLUSH_FLAG = 0,
+ HNS_ROCE_STOP_FLUSH_FLAG = 1,
};
struct hns_roce_work {
@@ -645,6 +649,10 @@ struct hns_roce_qp {
struct hns_user_mmap_entry *dwqe_mmap_entry;
u32 config;
enum hns_roce_cong_type cong_type;
+ u8 tc_mode;
+ u8 priority;
+ spinlock_t flush_lock;
+ struct hns_roce_dip *dip;
};
struct hns_roce_ib_iboe {
@@ -710,6 +718,7 @@ struct hns_roce_eq {
int shift;
int event_type;
int sub_type;
+ struct work_struct work;
};
struct hns_roce_eq_table {
@@ -923,8 +932,7 @@ struct hns_roce_hw {
int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int flags,
void *mb_buf);
- int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
- struct hns_roce_mr *mr);
+ int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
@@ -950,6 +958,8 @@ struct hns_roce_hw {
int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
u64 *stats, u32 port, int *hw_counters);
+ int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority);
const struct ib_device_ops *hns_roce_dev_ops;
const struct ib_device_ops *hns_roce_dev_srq_ops;
};
@@ -969,8 +979,6 @@ struct hns_roce_dev {
enum hns_roce_device_state state;
struct list_head qp_list; /* list of all qps on this dev */
spinlock_t qp_list_lock; /* protect qp_list */
- struct list_head dip_list; /* list of all dest ips on this dev */
- spinlock_t dip_list_lock; /* protect dip_list */
struct list_head pgdir_list;
struct mutex pgdir_mutex;
@@ -1019,6 +1027,26 @@ struct hns_roce_dev {
atomic64_t *dfx_cnt;
};
+enum hns_roce_trace_type {
+ TRACE_SQ,
+ TRACE_RQ,
+ TRACE_SRQ,
+};
+
+static inline const char *trace_type_to_str(enum hns_roce_trace_type type)
+{
+ switch (type) {
+ case TRACE_SQ:
+ return "SQ";
+ case TRACE_RQ:
+ return "RQ";
+ case TRACE_SRQ:
+ return "SRQ";
+ default:
+ return "UNKNOWN";
+ }
+}
+
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
{
return container_of(ib_dev, struct hns_roce_dev, ib_dev);
@@ -1228,7 +1256,7 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, struct hns_roce_buf *buf,
unsigned int page_shift);
-int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+int hns_roce_get_umem_bufs(dma_addr_t *bufs,
int buf_cnt, struct ib_umem *umem,
unsigned int page_shift);
@@ -1261,7 +1289,7 @@ __be32 send_ieth(const struct ib_send_wr *wr);
int to_hr_qp_type(int qp_type);
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
@@ -1276,6 +1304,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
@@ -1292,4 +1321,6 @@ struct hns_user_mmap_entry *
hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
size_t length,
enum hns_roce_mmap_type mmap_type);
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl);
+
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index a4b3f19161dc..ca0798224e56 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -281,7 +281,7 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
return hem;
fail:
- hns_roce_free_hem(hr_dev, hem);
+ kfree(hem);
return NULL;
}
@@ -300,7 +300,7 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct device *dev = hr_dev->dev;
unsigned long mhop_obj = obj;
u32 l0_idx, l1_idx, l2_idx;
u32 chunk_ba_num;
@@ -331,14 +331,14 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
index->buf = l0_idx;
break;
default:
- ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n",
- table->type, mhop->hop_num);
+ dev_err(dev, "table %u not support mhop.hop_num = %u!\n",
+ table->type, mhop->hop_num);
return -EINVAL;
}
if (unlikely(index->buf >= table->num_hem)) {
- ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n",
- table->type, index->buf, table->num_hem);
+ dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n",
+ table->type, index->buf, table->num_hem);
return -EINVAL;
}
@@ -448,14 +448,14 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct device *dev = hr_dev->dev;
u32 step_idx;
int ret = 0;
if (index->inited & HEM_INDEX_L0) {
ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
if (ret) {
- ibdev_err(ibdev, "set HEM step 0 failed!\n");
+ dev_err(dev, "set HEM step 0 failed!\n");
goto out;
}
}
@@ -463,7 +463,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
if (index->inited & HEM_INDEX_L1) {
ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
if (ret) {
- ibdev_err(ibdev, "set HEM step 1 failed!\n");
+ dev_err(dev, "set HEM step 1 failed!\n");
goto out;
}
}
@@ -475,7 +475,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
step_idx = mhop->hop_num;
ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
if (ret)
- ibdev_err(ibdev, "set HEM step last failed!\n");
+ dev_err(dev, "set HEM step last failed!\n");
}
out:
return ret;
@@ -485,14 +485,14 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
unsigned long obj)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_hem_index index = {};
struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
int ret;
ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "calc hem config failed!\n");
+ dev_err(dev, "calc hem config failed!\n");
return ret;
}
@@ -504,7 +504,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "alloc mhop hem failed!\n");
+ dev_err(dev, "alloc mhop hem failed!\n");
goto out;
}
@@ -512,7 +512,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
if (table->type < HEM_TYPE_MTT) {
ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "set HEM address to HW failed!\n");
+ dev_err(dev, "set HEM address to HW failed!\n");
goto err_alloc;
}
}
@@ -575,7 +575,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct device *dev = hr_dev->dev;
u32 hop_num = mhop->hop_num;
u32 chunk_ba_num;
u32 step_idx;
@@ -605,21 +605,21 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx);
if (ret)
- ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n",
- hop_num, ret);
+ dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n",
+ hop_num, ret);
if (index->inited & HEM_INDEX_L1) {
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1);
if (ret)
- ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n",
- ret);
+ dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n",
+ ret);
}
if (index->inited & HEM_INDEX_L0) {
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
if (ret)
- ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n",
- ret);
+ dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n",
+ ret);
}
}
}
@@ -629,14 +629,14 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
unsigned long obj,
int check_refcount)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_hem_index index = {};
struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
int ret;
ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "calc hem config failed!\n");
+ dev_err(dev, "calc hem config failed!\n");
return;
}
@@ -672,8 +672,8 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
if (ret)
- dev_warn(dev, "failed to clear HEM base address, ret = %d.\n",
- ret);
+ dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n",
+ ret);
hns_roce_free_hem(hr_dev, table->hem[i]);
table->hem[i] = NULL;
@@ -877,6 +877,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
hns_roce_cleanup_mhop_hem_table(hr_dev, table);
+ mutex_destroy(&table->mutex);
return;
}
@@ -891,6 +892,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
hns_roce_free_hem(hr_dev, table->hem[i]);
}
+ mutex_destroy(&table->mutex);
kfree(table->hem);
}
@@ -929,6 +931,7 @@ struct hns_roce_hem_item {
size_t count; /* max ba numbers */
int start; /* start buf offset in this hem */
int end; /* end buf offset in this hem */
+ bool exist_bt;
};
/* All HEM items are linked in a tree structure */
@@ -957,6 +960,7 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
}
}
+ hem->exist_bt = exist_bt;
hem->count = count;
hem->start = start;
hem->end = end;
@@ -967,34 +971,32 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
}
static void hem_list_free_item(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_item *hem, bool exist_bt)
+ struct hns_roce_hem_item *hem)
{
- if (exist_bt)
+ if (hem->exist_bt)
dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN,
hem->addr, hem->dma_addr);
kfree(hem);
}
static void hem_list_free_all(struct hns_roce_dev *hr_dev,
- struct list_head *head, bool exist_bt)
+ struct list_head *head)
{
struct hns_roce_hem_item *hem, *temp_hem;
list_for_each_entry_safe(hem, temp_hem, head, list) {
list_del(&hem->list);
- hem_list_free_item(hr_dev, hem, exist_bt);
+ hem_list_free_item(hr_dev, hem);
}
}
-static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr,
- u64 table_addr)
+static void hem_list_link_bt(void *base_addr, u64 table_addr)
{
*(u64 *)(base_addr) = table_addr;
}
/* assign L0 table address to hem from root bt */
-static void hem_list_assign_bt(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_item *hem, void *cpu_addr,
+static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr,
u64 phy_addr)
{
hem->addr = cpu_addr;
@@ -1041,9 +1043,9 @@ static bool hem_list_is_bottom_bt(int hopnum, int bt_level)
* @bt_level: base address table level
* @unit: ba entries per bt page
*/
-static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
+static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
{
- u32 step;
+ u64 step;
int max;
int i;
@@ -1079,11 +1081,15 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
{
struct hns_roce_buf_region *r;
int total = 0;
- int step;
+ u64 step;
int i;
for (i = 0; i < region_cnt; i++) {
r = (struct hns_roce_buf_region *)&regions[i];
+ /* when r->hopnum = 0, the region should not occupy root_ba. */
+ if (!r->hopnum)
+ continue;
+
if (r->hopnum > 1) {
step = hem_list_calc_ba_range(r->hopnum, 1, unit);
if (step > 0)
@@ -1110,7 +1116,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
int ret = 0;
int max_ofs;
int level;
- u32 step;
+ u64 step;
int end;
if (hopnum <= 1)
@@ -1134,10 +1140,12 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
/* config L1 bt to last bt and link them to corresponding parent */
for (level = 1; level < hopnum; level++) {
- cur = hem_list_search_item(&mid_bt[level], offset);
- if (cur) {
- hem_ptrs[level] = cur;
- continue;
+ if (!hem_list_is_bottom_bt(hopnum, level)) {
+ cur = hem_list_search_item(&mid_bt[level], offset);
+ if (cur) {
+ hem_ptrs[level] = cur;
+ continue;
+ }
}
step = hem_list_calc_ba_range(hopnum, level, unit);
@@ -1147,7 +1155,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
}
start_aligned = (distance / step) * step + r->offset;
- end = min_t(int, start_aligned + step - 1, max_ofs);
+ end = min_t(u64, start_aligned + step - 1, max_ofs);
cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
true);
if (!cur) {
@@ -1163,8 +1171,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
if (level > 1) {
pre = hem_ptrs[level - 1];
step = (cur->start - pre->start) / step * BA_BYTE_LEN;
- hem_list_link_bt(hr_dev, pre->addr + step,
- cur->dma_addr);
+ hem_list_link_bt(pre->addr + step, cur->dma_addr);
}
}
@@ -1176,7 +1183,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
err_exit:
for (level = 1; level < hopnum; level++)
- hem_list_free_all(hr_dev, &temp_list[level], true);
+ hem_list_free_all(hr_dev, &temp_list[level]);
return ret;
}
@@ -1217,16 +1224,26 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
{
struct hns_roce_hem_item *hem;
+ /* This is on the has_mtt branch, if r->hopnum
+ * is 0, there is no root_ba to reuse for the
+ * region's fake hem, so a dma_alloc request is
+ * necessary here.
+ */
hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1,
- r->count, false);
+ r->count, !r->hopnum);
if (!hem)
return -ENOMEM;
- hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base);
+ /* The root_ba can be reused only when r->hopnum > 0. */
+ if (r->hopnum)
+ hem_list_assign_bt(hem, cpu_base, phy_base);
list_add(&hem->list, branch_head);
list_add(&hem->sibling, leaf_head);
- return r->count;
+ /* If r->hopnum == 0, 0 is returned,
+ * so that the root_bt entry is not occupied.
+ */
+ return r->hopnum ? r->count : 0;
}
static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
@@ -1236,7 +1253,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
struct hns_roce_hem_item *hem, *temp_hem;
int total = 0;
int offset;
- int step;
+ u64 step;
step = hem_list_calc_ba_range(r->hopnum, 1, unit);
if (step < 1)
@@ -1245,7 +1262,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
/* if exist mid bt, link L1 to L0 */
list_for_each_entry_safe(hem, temp_hem, branch_head, list) {
offset = (hem->start - r->offset) / step * BA_BYTE_LEN;
- hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr);
+ hem_list_link_bt(cpu_base + offset, hem->dma_addr);
total++;
}
@@ -1270,7 +1287,7 @@ setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list,
return -ENOMEM;
total = 0;
- for (i = 0; i < region_cnt && total < max_ba_num; i++) {
+ for (i = 0; i < region_cnt && total <= max_ba_num; i++) {
r = &regions[i];
if (!r->count)
continue;
@@ -1336,14 +1353,19 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
region_cnt);
if (ret) {
for (i = 0; i < region_cnt; i++)
- hem_list_free_all(hr_dev, &head.branch[i], false);
+ hem_list_free_all(hr_dev, &head.branch[i]);
- hem_list_free_all(hr_dev, &head.root, true);
+ hem_list_free_all(hr_dev, &head.root);
}
return ret;
}
+/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming
+ * the bt page size is not expanded by cal_best_bt_pg_sz()
+ */
+#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800
+
/* construct the base address table and link them by address hop config */
int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
@@ -1352,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
{
const struct hns_roce_buf_region *r;
int ofs, end;
+ int loop;
int unit;
int ret;
int i;
@@ -1369,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
continue;
end = r->offset + r->count;
- for (ofs = r->offset; ofs < end; ofs += unit) {
+ for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+
ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
hem_list->mid_bt[i],
&hem_list->btm_bt);
@@ -1401,10 +1427,9 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
- hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j],
- j != 0);
+ hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j]);
- hem_list_free_all(hr_dev, &hem_list->root_bt, true);
+ hem_list_free_all(hr_dev, &hem_list->root_bt);
INIT_LIST_HEAD(&hem_list->btm_bt);
hem_list->root_ba = 0;
}
@@ -1427,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct list_head *head = &hem_list->btm_bt;
struct hns_roce_hem_item *hem, *temp_hem;
void *cpu_base = NULL;
+ int loop = 1;
int nr = 0;
list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+ loop++;
+
if (hem_list_page_is_in_range(hem, offset)) {
nr = offset - hem->start;
cpu_base = hem->addr + nr * BA_BYTE_LEN;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 6fb51db9682b..9c415b2541af 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -57,16 +57,16 @@ enum {
};
#define check_whether_bt_num_3(type, hop_num) \
- (type < HEM_TYPE_MTT && hop_num == 2)
+ ((type) < HEM_TYPE_MTT && (hop_num) == 2)
#define check_whether_bt_num_2(type, hop_num) \
- ((type < HEM_TYPE_MTT && hop_num == 1) || \
- (type >= HEM_TYPE_MTT && hop_num == 2))
+ (((type) < HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 2))
#define check_whether_bt_num_1(type, hop_num) \
- ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \
- (type >= HEM_TYPE_MTT && hop_num == 1) || \
- (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0))
+ (((type) < HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0))
struct hns_roce_hem {
void *buf;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index ba7ae792d279..fa8747656f25 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -36,19 +36,22 @@
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include <net/addrconf.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
+#define CREATE_TRACE_POINTS
+#include "hns_roce_trace.h"
+
enum {
CMD_RST_PRC_OTHERS,
CMD_RST_PRC_SUCCESS,
@@ -372,19 +375,12 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
static int check_send_valid(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
-
if (unlikely(hr_qp->state == IB_QPS_RESET ||
hr_qp->state == IB_QPS_INIT ||
- hr_qp->state == IB_QPS_RTR)) {
- ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
- hr_qp->state);
+ hr_qp->state == IB_QPS_RTR))
return -EINVAL;
- } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
- ibdev_err(ibdev, "failed to post WQE, dev state %d!\n",
- hr_dev->state);
+ else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
return -EIO;
- }
return 0;
}
@@ -443,10 +439,6 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
-
- if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
- return -EINVAL;
-
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
ud_sq_wqe->sgid_index = ah->av.gid_index;
@@ -478,7 +470,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
ret = set_ud_opcode(ud_sq_wqe, wr);
- if (WARN_ON(ret))
+ if (WARN_ON_ONCE(ret))
return ret;
ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
@@ -582,10 +574,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr);
- if (WARN_ON(ret))
+ if (WARN_ON_ONCE(ret))
return ret;
- hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
@@ -595,11 +587,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ if (msg_len != ATOMIC_WR_LEN)
+ return -EINVAL;
set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
- else if (wr->opcode != IB_WR_REG_MR)
+ } else if (wr->opcode != IB_WR_REG_MR) {
ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
&curr_idx, valid_num_sge);
+ if (ret)
+ return ret;
+ }
/*
* The pipeline can sequentially post all valid WQEs into WQ buffer,
@@ -675,6 +672,10 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
#define HNS_ROCE_SL_SHIFT 2
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ return;
+ }
/* All kinds of DirectWQE have the same header field layout */
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
@@ -739,6 +740,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
else
ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+ trace_hns_sq_wqe(qp->qpn, wqe_idx, wqe, 1 << qp->sq.wqe_shift,
+ wr->wr_id, TRACE_SQ);
if (unlikely(ret)) {
*bad_wr = wr;
goto out;
@@ -808,6 +811,9 @@ static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
+
+ trace_hns_rq_wqe(hr_qp->qpn, wqe_idx, wqe, 1 << hr_qp->rq.wqe_shift,
+ wr->wr_id, TRACE_RQ);
}
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
@@ -944,7 +950,7 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
static void update_srq_db(struct hns_roce_srq *srq)
{
struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
- struct hns_roce_v2_db db;
+ struct hns_roce_v2_db db = {};
hr_reg_write(&db, DB_TAG, srq->srqn);
hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
@@ -985,6 +991,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
+
+ trace_hns_srq_wqe(srq->srqn, wqe_idx, wqe, 1 << srq->wqe_shift,
+ wr->wr_id, TRACE_SRQ);
}
if (likely(nreq)) {
@@ -1273,22 +1282,47 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
return -EIO;
}
-static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmq_desc *desc, int num)
+static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
+{
+ static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
+ {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
+ if (cmdq_tx_timeout[i].opcode == opcode)
+ return cmdq_tx_timeout[i].tx_timeout;
+
+ return tx_timeout;
+}
+
+static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
+{
+ u32 timeout = 0;
+
+ do {
+ if (hns_roce_cmq_csq_done(hr_dev))
+ break;
+ udelay(1);
+ } while (++timeout < tx_timeout);
+}
+
+static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc,
+ int num, u32 tx_timeout)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- u32 timeout = 0;
u16 desc_ret;
u32 tail;
int ret;
int i;
- spin_lock_bh(&csq->lock);
-
tail = csq->head;
for (i = 0; i < num; i++) {
+ trace_hns_cmdq_req(hr_dev, &desc[i]);
+
csq->desc[csq->head++] = desc[i];
if (csq->head == csq->desc_num)
csq->head = 0;
@@ -1299,27 +1333,19 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
- do {
- if (hns_roce_cmq_csq_done(hr_dev))
- break;
- udelay(1);
- } while (++timeout < priv->cmq.tx_timeout);
-
+ hns_roce_wait_csq_done(hr_dev, tx_timeout);
if (hns_roce_cmq_csq_done(hr_dev)) {
ret = 0;
for (i = 0; i < num; i++) {
+ trace_hns_cmdq_resp(hr_dev, &csq->desc[tail]);
+
/* check the result of hardware write back */
- desc[i] = csq->desc[tail++];
+ desc_ret = le16_to_cpu(csq->desc[tail++].retval);
if (tail == csq->desc_num)
tail = 0;
-
- desc_ret = le16_to_cpu(desc[i].retval);
if (likely(desc_ret == CMD_EXEC_SUCCESS))
continue;
- dev_err_ratelimited(hr_dev->dev,
- "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
- desc->opcode, desc_ret);
ret = hns_roce_cmd_err_convert_errno(desc_ret);
}
} else {
@@ -1334,14 +1360,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
ret = -EAGAIN;
}
- spin_unlock_bh(&csq->lock);
-
if (ret)
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
return ret;
}
+static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc, int num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
+ u16 opcode = le16_to_cpu(desc->opcode);
+ u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
+ u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT;
+ u32 rsv_tail;
+ int ret;
+ int i;
+
+ while (try_cnt) {
+ try_cnt--;
+
+ spin_lock_bh(&csq->lock);
+ rsv_tail = csq->head;
+ ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
+ if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME &&
+ try_cnt) {
+ spin_unlock_bh(&csq->lock);
+ mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC);
+ continue;
+ }
+
+ for (i = 0; i < num; i++) {
+ desc[i] = csq->desc[rsv_tail++];
+ if (rsv_tail == csq->desc_num)
+ rsv_tail = 0;
+ }
+ spin_unlock_bh(&csq->lock);
+ break;
+ }
+
+ if (ret)
+ dev_err_ratelimited(hr_dev->dev,
+ "Cmdq IO error, opcode = 0x%x, return = %d.\n",
+ opcode, ret);
+
+ return ret;
+}
+
static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
@@ -1658,8 +1724,8 @@ static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev,
for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) {
bd_idx = i / CNT_PER_DESC;
- if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) &&
- bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC)
+ if (bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC &&
+ !(desc[bd_idx].flag & cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT)))
break;
cnt_data = (__le64 *)&desc[bd_idx].data[0];
@@ -2105,7 +2171,7 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
caps->gmv_bt_num *
(HNS_HW_PAGE_SIZE / caps->gmv_entry_sz));
- caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE /
+ caps->gmv_entry_num = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE /
caps->gmv_entry_sz);
} else {
u32 func_num = max_t(u32, 1, hr_dev->func_num);
@@ -2461,14 +2527,16 @@ static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev,
static struct hns_roce_link_table *
alloc_link_table_buf(struct hns_roce_dev *hr_dev)
{
+ u16 total_sl = hr_dev->caps.sl_num * hr_dev->func_num;
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_link_table *link_tbl;
u32 pg_shift, size, min_size;
link_tbl = &priv->ext_llm;
pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT;
- size = hr_dev->caps.num_qps * HNS_ROCE_V2_EXT_LLM_ENTRY_SZ;
- min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(hr_dev->caps.sl_num) << pg_shift;
+ size = hr_dev->caps.num_qps * hr_dev->func_num *
+ HNS_ROCE_V2_EXT_LLM_ENTRY_SZ;
+ min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(total_sl) << pg_shift;
/* Alloc data table */
size = max(size, min_size);
@@ -2535,20 +2603,19 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev)
free_link_table_buf(hr_dev, &priv->ext_llm);
}
-static void free_dip_list(struct hns_roce_dev *hr_dev)
+static void free_dip_entry(struct hns_roce_dev *hr_dev)
{
struct hns_roce_dip *hr_dip;
- struct hns_roce_dip *tmp;
- unsigned long flags;
+ unsigned long idx;
- spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
+ xa_lock(&hr_dev->qp_table.dip_xa);
- list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) {
- list_del(&hr_dip->node);
+ xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) {
+ __xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx);
kfree(hr_dip);
}
- spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
+ xa_unlock(&hr_dev->qp_table.dip_xa);
}
static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
@@ -2671,6 +2738,8 @@ static void free_mr_exit(struct hns_roce_dev *hr_dev)
kfree(free_mr->rsv_pd);
free_mr->rsv_pd = NULL;
}
+
+ mutex_destroy(&free_mr->mutex);
}
static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
@@ -2748,8 +2817,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
IB_QPS_INIT, NULL);
if (ret) {
- ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
+ ret);
return ret;
}
@@ -2821,8 +2890,10 @@ static int free_mr_init(struct hns_roce_dev *hr_dev)
mutex_init(&free_mr->mutex);
ret = free_mr_alloc_res(hr_dev);
- if (ret)
+ if (ret) {
+ mutex_destroy(&free_mr->mutex);
return ret;
+ }
ret = free_mr_modify_qp(hr_dev);
if (ret)
@@ -2943,13 +3014,16 @@ err_llm_init_failed:
static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
{
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
hns_roce_function_clear(hr_dev);
if (!hr_dev->is_vf)
hns_roce_free_link_table(hr_dev);
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09)
- free_dip_list(hr_dev);
+ free_dip_entry(hr_dev);
}
static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
@@ -3208,13 +3282,14 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
/* Aligned to the hardware address access unit */
for (i = 0; i < ARRAY_SIZE(pages); i++)
- pages[i] >>= 6;
+ pages[i] >>= MPT_PBL_BUF_ADDR_S;
pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
- mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
- hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+ mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> MPT_PBL_BA_ADDR_S);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
@@ -3307,8 +3382,7 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
return ret;
}
-static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
- void *mb_buf, struct hns_roce_mr *mr)
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
{
dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
struct hns_roce_v2_mpt_entry *mpt_entry;
@@ -3335,8 +3409,10 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
- mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
- hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >>
+ MPT_PBL_BA_ADDR_S));
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
return 0;
}
@@ -3387,8 +3463,8 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
if (ret) {
- ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
+ ret);
return ret;
}
@@ -3427,9 +3503,9 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
ret = free_mr_post_send_lp_wqe(hr_qp);
if (ret) {
- ibdev_err(ibdev,
- "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
- hr_qp->qpn, ret);
+ ibdev_err_ratelimited(ibdev,
+ "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
+ hr_qp->qpn, ret);
break;
}
@@ -3440,16 +3516,16 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
while (cqe_cnt) {
npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
if (npolled < 0) {
- ibdev_err(ibdev,
- "failed to poll cqe for free mr, remain %d cqe.\n",
- cqe_cnt);
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr, remain %d cqe.\n",
+ cqe_cnt);
goto out;
}
if (time_after(jiffies, end)) {
- ibdev_err(ibdev,
- "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
- cqe_cnt);
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
+ cqe_cnt);
goto out;
}
cqe_cnt -= npolled;
@@ -3582,14 +3658,14 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift));
hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ,
to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift));
- hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> 3);
- hr_reg_write(cq_context, CQC_CQE_BA_H, (dma_handle >> (32 + 3)));
+ hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> CQC_CQE_BA_L_S);
+ hr_reg_write(cq_context, CQC_CQE_BA_H, dma_handle >> CQC_CQE_BA_H_S);
hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN,
hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB);
hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_L,
((u32)hr_cq->db.dma) >> 1);
hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_H,
- hr_cq->db.dma >> 32);
+ hr_cq->db.dma >> CQC_CQE_DB_RECORD_ADDR_H_S);
hr_reg_write(cq_context, CQC_CQ_MAX_CNT,
HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
hr_reg_write(cq_context, CQC_CQ_PERIOD,
@@ -3711,8 +3787,9 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
wc->status == IB_WC_WR_FLUSH_ERR))
return;
- ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
- print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
+ ibdev_err_ratelimited(&hr_dev->ib_dev, "error cqe status 0x%x:\n",
+ cqe_status);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 4, cqe,
cq->cqe_size, false);
wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
@@ -4217,8 +4294,7 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
}
static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
- struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *context)
{
hr_reg_write(context, QPC_SGE_SHIFT,
to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
@@ -4240,9 +4316,7 @@ static inline int get_pdn(struct ib_pd *ib_pd)
}
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
- struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -4259,7 +4333,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs));
- set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
+ set_qpc_wqe_cnt(hr_qp, context);
/* No VLAN need to set 0xFFF */
hr_reg_write(context, QPC_VLAN_ID, 0xfff);
@@ -4300,7 +4374,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
}
static void modify_qp_init_to_init(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -4394,12 +4467,14 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
upper_32_bits(to_hr_hw_page_addr(mtts[0])));
hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H);
- context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
- qpc_mask->rq_nxt_blk_addr = 0;
-
- hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H,
- upper_32_bits(to_hr_hw_page_addr(mtts[1])));
- hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ context->rq_nxt_blk_addr =
+ cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
+ qpc_mask->rq_nxt_blk_addr = 0;
+ hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H);
+ }
return 0;
}
@@ -4521,16 +4596,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
return -EINVAL;
}
- hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> 4);
+ hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> QPC_TRRL_BA_L_S);
hr_reg_clear(qpc_mask, QPC_TRRL_BA_L);
- context->trrl_ba = cpu_to_le32(trrl_ba >> (16 + 4));
+ context->trrl_ba = cpu_to_le32(trrl_ba >> QPC_TRRL_BA_M_S);
qpc_mask->trrl_ba = 0;
- hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> (32 + 16 + 4));
+ hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> QPC_TRRL_BA_H_S);
hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
- context->irrl_ba = cpu_to_le32(irrl_ba >> 6);
+ context->irrl_ba = cpu_to_le32(irrl_ba >> QPC_IRRL_BA_L_S);
qpc_mask->irrl_ba = 0;
- hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> (32 + 6));
+ hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> QPC_IRRL_BA_H_S);
hr_reg_clear(qpc_mask, QPC_IRRL_BA_H);
hr_reg_enable(context, QPC_RMT_E2E);
@@ -4592,8 +4667,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_reg_clear(qpc_mask, QPC_TRRL_HEAD_MAX);
hr_reg_clear(qpc_mask, QPC_TRRL_TAIL_MAX);
+#define MAX_LP_SGEN 3
/* rocee send 2^lp_sgen_ini segs every time */
- hr_reg_write(context, QPC_LP_SGEN_INI, 3);
+ hr_reg_write(context, QPC_LP_SGEN_INI, MAX_LP_SGEN);
hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI);
if (udata && ibqp->qp_type == IB_QPT_RC &&
@@ -4619,8 +4695,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
return 0;
}
-static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr, int attr_mask,
+static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -4667,26 +4742,49 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return 0;
}
+static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn)
+{
+ struct hns_roce_dip *hr_dip;
+ int ret;
+
+ hr_dip = xa_load(dip_xa, qpn);
+ if (hr_dip)
+ return 0;
+
+ hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL);
+ if (!hr_dip)
+ return -ENOMEM;
+
+ ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL));
+ if (ret)
+ kfree(hr_dip);
+
+ return ret;
+}
+
static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
u32 *dip_idx)
{
const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx;
- u32 *head = &hr_dev->qp_table.idx_table.head;
- u32 *tail = &hr_dev->qp_table.idx_table.tail;
+ struct xarray *dip_xa = &hr_dev->qp_table.dip_xa;
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct hns_roce_dip *hr_dip;
- unsigned long flags;
+ unsigned long idx;
int ret = 0;
- spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
+ ret = alloc_dip_entry(dip_xa, ibqp->qp_num);
+ if (ret)
+ return ret;
- spare_idx[*tail] = ibqp->qp_num;
- *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
+ xa_lock(dip_xa);
- list_for_each_entry(hr_dip, &hr_dev->dip_list, node) {
- if (!memcmp(grh->dgid.raw, hr_dip->dgid, 16)) {
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt &&
+ !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
*dip_idx = hr_dip->dip_idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
goto out;
}
}
@@ -4694,19 +4792,24 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
/* If no dgid is found, a new dip and a mapping between dgid and
* dip_idx will be created.
*/
- hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC);
- if (!hr_dip) {
- ret = -ENOMEM;
- goto out;
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt)
+ continue;
+
+ *dip_idx = idx;
+ memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ hr_dip->dip_idx = idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
+ break;
}
- memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
- hr_dip->dip_idx = *dip_idx = spare_idx[*head];
- *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1);
- list_add_tail(&hr_dip->node, &hr_dev->dip_list);
+ /* This should never happen. */
+ if (WARN_ON_ONCE(!hr_qp->dip))
+ ret = -ENOSPC;
out:
- spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
+ xa_unlock(dip_xa);
return ret;
}
@@ -4828,6 +4931,69 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
return 0;
}
+static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (!ops->get_dscp_prio)
+ return -EOPNOTSUPP;
+
+ return ops->get_dscp_prio(handle, dscp, tc_mode, priority);
+}
+
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl)
+{
+ u32 max_sl;
+
+ max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
+ if (unlikely(sl > max_sl)) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to set SL(%u). Shouldn't be larger than %u.\n",
+ sl, max_sl);
+ return false;
+ }
+
+ return true;
+}
+
+static int hns_roce_set_sl(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh),
+ &hr_qp->tc_mode, &hr_qp->priority);
+ if (ret && ret != -EOPNOTSUPP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to get dscp, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ hr_qp->sl = hr_qp->priority;
+ else
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+ if (!check_sl_valid(hr_dev, hr_qp->sl))
+ return -EINVAL;
+
+ hr_reg_write(context, QPC_SL, hr_qp->sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+
+ return 0;
+}
+
static int hns_roce_v2_set_path(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4843,25 +5009,18 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
int is_roce_protocol;
u16 vlan_id = 0xffff;
bool is_udp = false;
- u32 max_sl;
u8 ib_port;
u8 hr_port;
int ret;
- max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
- if (unlikely(sl > max_sl)) {
- ibdev_err_ratelimited(ibdev,
- "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n",
- sl, max_sl);
- return -EINVAL;
- }
-
/*
* If free_mr_en of qp is set, it means that this qp comes from
* free mr. This qp will perform the loopback operation.
* In the loopback scenario, only sl needs to be set.
*/
if (hr_qp->free_mr_en) {
+ if (!check_sl_valid(hr_dev, sl))
+ return -EINVAL;
hr_reg_write(context, QPC_SL, sl);
hr_reg_clear(qpc_mask, QPC_SL);
hr_qp->sl = sl;
@@ -4931,11 +5090,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
- hr_qp->sl = sl;
- hr_reg_write(context, QPC_SL, hr_qp->sl);
- hr_reg_clear(qpc_mask, QPC_SL);
-
- return 0;
+ return hns_roce_set_sl(ibqp, attr, context, qpc_mask);
}
static bool check_qp_state(enum ib_qp_state cur_state,
@@ -4975,22 +5130,19 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
int ret = 0;
- if (!check_qp_state(cur_state, new_state)) {
- ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
+ if (!check_qp_state(cur_state, new_state))
return -EINVAL;
- }
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
- modify_qp_reset_to_init(ibqp, attr, context, qpc_mask);
+ modify_qp_reset_to_init(ibqp, context);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- modify_qp_init_to_init(ibqp, attr, context, qpc_mask);
+ modify_qp_init_to_init(ibqp, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
qpc_mask, udata);
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
- ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context,
- qpc_mask);
+ ret = modify_qp_rtr_to_rts(ibqp, attr_mask, context, qpc_mask);
}
return ret;
@@ -5174,6 +5326,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp,
return;
spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+ trace_hns_sq_flush_cqe(hr_qp->qpn, hr_qp->sq.head, TRACE_SQ);
hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head);
hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX);
hr_qp->state = IB_QPS_ERR;
@@ -5183,6 +5336,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp,
return;
spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
+ trace_hns_rq_flush_cqe(hr_qp->qpn, hr_qp->rq.head, TRACE_RQ);
hr_reg_write(context, QPC_RQ_PRODUCER_IDX, hr_qp->rq.head);
hr_reg_clear(qpc_mask, QPC_RQ_PRODUCER_IDX);
spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag);
@@ -5240,7 +5394,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
/* SW pass context to HW */
ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
if (ret) {
- ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret);
+ ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret);
goto out;
}
@@ -5378,7 +5532,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
if (ret) {
- ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret);
+ ibdev_err_ratelimited(ibdev,
+ "failed to query QPC, ret = %d.\n",
+ ret);
ret = -EINVAL;
goto out;
}
@@ -5386,7 +5542,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
state = hr_reg_read(&context, QPC_QP_ST);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
- ibdev_err(ibdev, "Illegal ib_qp_state\n");
+ ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n");
ret = -EINVAL;
goto out;
}
@@ -5479,9 +5635,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
hr_qp->state, IB_QPS_RESET, udata);
if (ret)
- ibdev_err(ibdev,
- "failed to modify QP to RST, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(ibdev,
+ "failed to modify QP to RST, ret = %d.\n",
+ ret);
}
send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
@@ -5509,17 +5665,44 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
return ret;
}
+static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dip *hr_dip = hr_qp->dip;
+
+ if (!hr_dip)
+ return;
+
+ xa_lock(&hr_dev->qp_table.dip_xa);
+
+ hr_dip->qp_cnt--;
+ if (!hr_dip->qp_cnt)
+ memset(hr_dip->dgid, 0, GID_LEN_V2);
+
+ xa_unlock(&hr_dev->qp_table.dip_xa);
+}
+
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ unsigned long flags;
int ret;
+ /* Make sure flush_cqe() is completed */
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+ flush_work(&hr_qp->flush_work.work);
+
+ if (hr_qp->cong_type == CONG_TYPE_DIP)
+ put_dip_ctx_idx(hr_dev, hr_qp);
+
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
if (ret)
- ibdev_err(&hr_dev->ib_dev,
- "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
- hr_qp->qpn, ret);
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
+ hr_qp->qpn, ret);
hns_roce_qp_destroy(hr_dev, hr_qp, udata);
@@ -5802,7 +5985,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
dev_info(hr_dev->dev,
"cq_period(%u) reached the upper limit, adjusted to 65.\n",
cq_period);
- cq_period = HNS_ROCE_MAX_CQ_PERIOD;
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
}
cq_period *= HNS_ROCE_CLOCK_ADJUST;
}
@@ -5813,9 +5996,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret)
- ibdev_err(&hr_dev->ib_dev,
- "failed to process cmd when modifying CQ, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when modifying CQ, ret = %d.\n",
+ ret);
err_out:
if (ret)
@@ -5839,9 +6022,9 @@ static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
HNS_ROCE_CMD_QUERY_CQC, cqn);
if (ret) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to process cmd when querying CQ, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when querying CQ, ret = %d.\n",
+ ret);
goto err_mailbox;
}
@@ -5882,11 +6065,10 @@ err_mailbox:
return ret;
}
-static void hns_roce_irq_work_handle(struct work_struct *work)
+static void dump_aeqe_log(struct hns_roce_work *irq_work)
{
- struct hns_roce_work *irq_work =
- container_of(work, struct hns_roce_work, work);
- struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -5930,6 +6112,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
ibdev_warn(ibdev, "DB overflow.\n");
break;
+ case HNS_ROCE_EVENT_TYPE_MB:
+ break;
case HNS_ROCE_EVENT_TYPE_FLR:
ibdev_warn(ibdev, "function level reset.\n");
break;
@@ -5940,8 +6124,46 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
ibdev_err(ibdev, "invalid xrceth error.\n");
break;
default:
+ ibdev_info(ibdev, "Undefined event %d.\n",
+ irq_work->event_type);
break;
}
+}
+
+static void hns_roce_irq_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *irq_work =
+ container_of(work, struct hns_roce_work, work);
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ int event_type = irq_work->event_type;
+ u32 queue_num = irq_work->queue_num;
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ hns_roce_qp_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ hns_roce_cq_event(hr_dev, queue_num, event_type);
+ break;
+ default:
+ break;
+ }
+
+ dump_aeqe_log(irq_work);
kfree(irq_work);
}
@@ -6002,14 +6224,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
irqreturn_t aeqe_found = IRQ_NONE;
+ int num_aeqes = 0;
int event_type;
u32 queue_num;
int sub_type;
- while (aeqe) {
+ while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) {
/* Make sure we read AEQ entry after we have checked the
* ownership bit
*/
@@ -6020,25 +6242,12 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
- hns_roce_qp_event(hr_dev, queue_num, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- hns_roce_srq_event(hr_dev, queue_num, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- hns_roce_cq_event(hr_dev, queue_num, event_type);
+ hns_roce_flush_cqe(hr_dev, queue_num);
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@@ -6046,12 +6255,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
aeqe->event.cmd.status,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- case HNS_ROCE_EVENT_TYPE_FLR:
- break;
default:
- dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n",
- event_type, eq->eqn, eq->cons_index);
break;
}
@@ -6059,12 +6263,14 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
eq->sub_type = sub_type;
++eq->cons_index;
aeqe_found = IRQ_HANDLED;
+ trace_hns_ae_info(event_type, aeqe, eq->eqe_size);
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq);
+ ++num_aeqes;
}
update_eq_db(eq);
@@ -6084,33 +6290,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
-static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq)
{
- struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
- irqreturn_t ceqe_found = IRQ_NONE;
- u32 cqn;
+ queue_work(system_bh_wq, &eq->work);
- while (ceqe) {
- /* Make sure we read CEQ entry after we have checked the
- * ownership bit
- */
- dma_rmb();
-
- cqn = hr_reg_read(ceqe, CEQE_CQN);
-
- hns_roce_cq_completion(hr_dev, cqn);
-
- ++eq->cons_index;
- ceqe_found = IRQ_HANDLED;
- atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
-
- ceqe = next_ceqe_sw_v2(eq);
- }
-
- update_eq_db(eq);
-
- return IRQ_RETVAL(ceqe_found);
+ return IRQ_HANDLED;
}
static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
@@ -6121,7 +6305,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
- int_work = hns_roce_v2_ceq_int(hr_dev, eq);
+ int_work = hns_roce_v2_ceq_int(eq);
else
/* Asynchronous event interrupt */
int_work = hns_roce_v2_aeq_int(hr_dev, eq);
@@ -6135,6 +6319,7 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
struct pci_dev *pdev = hr_dev->pci_dev;
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
const struct hnae3_ae_ops *ops = ae_dev->ops;
+ enum hnae3_reset_type reset_type;
irqreturn_t int_work = IRQ_NONE;
u32 int_en;
@@ -6146,10 +6331,12 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
+ reset_type = hr_dev->is_vf ?
+ HNAE3_VF_FUNC_RESET : HNAE3_FUNC_RESET;
+
/* Set reset level for reset_event() */
if (ops->set_default_reset_request)
- ops->set_default_reset_request(ae_dev,
- HNAE3_FUNC_RESET);
+ ops->set_default_reset_request(ae_dev, reset_type);
if (ops->reset_event)
ops->reset_event(pdev, NULL);
@@ -6219,7 +6406,7 @@ static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
res_type == ECC_RESOURCE_SCCC)
return le64_to_cpu(*data);
- return le64_to_cpu(*data) << PAGE_SHIFT;
+ return le64_to_cpu(*data) << HNS_HW_PAGE_SHIFT;
}
static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
@@ -6333,9 +6520,16 @@ static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag);
}
-static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn)
+static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ hns_roce_mtr_destroy(hr_dev, &eq->mtr);
+}
+
+static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
+ int eqn = eq->eqn;
int ret;
u8 cmd;
@@ -6346,12 +6540,9 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn)
ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M);
if (ret)
- dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn);
-}
+ dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
-static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- hns_roce_mtr_destroy(hr_dev, &eq->mtr);
+ free_eq_buf(hr_dev, eq);
}
static void init_eq_config(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
@@ -6489,6 +6680,34 @@ free_cmd_mbox:
return ret;
}
+static void hns_roce_ceq_work(struct work_struct *work)
+{
+ struct hns_roce_eq *eq = from_work(eq, work, work);
+ struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ int ceqe_num = 0;
+ u32 cqn;
+
+ while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) {
+ /* Make sure we read CEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ cqn = hr_reg_read(ceqe, CEQE_CQN);
+
+ hns_roce_cq_completion(hr_dev, cqn);
+
+ ++eq->cons_index;
+ ++ceqe_num;
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
+
+ ceqe = next_ceqe_sw_v2(eq);
+ }
+
+ update_eq_db(eq);
+}
+
static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
int comp_num, int aeq_num, int other_num)
{
@@ -6520,21 +6739,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
j - other_num - aeq_num);
for (j = 0; j < irq_num; j++) {
- if (j < other_num)
+ if (j < other_num) {
ret = request_irq(hr_dev->irq[j],
hns_roce_v2_msix_interrupt_abn,
0, hr_dev->irq_names[j], hr_dev);
-
- else if (j < (other_num + comp_num))
+ } else if (j < (other_num + comp_num)) {
+ INIT_WORK(&eq_table->eq[j - other_num].work,
+ hns_roce_ceq_work);
ret = request_irq(eq_table->eq[j - other_num].irq,
hns_roce_v2_msix_interrupt_eq,
0, hr_dev->irq_names[j + aeq_num],
&eq_table->eq[j - other_num]);
- else
+ } else {
ret = request_irq(eq_table->eq[j - other_num].irq,
hns_roce_v2_msix_interrupt_eq,
0, hr_dev->irq_names[j - comp_num],
&eq_table->eq[j - other_num]);
+ }
+
if (ret) {
dev_err(hr_dev->dev, "request irq error!\n");
goto err_request_failed;
@@ -6544,12 +6766,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
return 0;
err_request_failed:
- for (j -= 1; j >= 0; j--)
- if (j < other_num)
+ for (j -= 1; j >= 0; j--) {
+ if (j < other_num) {
free_irq(hr_dev->irq[j], hr_dev);
- else
- free_irq(eq_table->eq[j - other_num].irq,
- &eq_table->eq[j - other_num]);
+ continue;
+ }
+ free_irq(eq_table->eq[j - other_num].irq,
+ &eq_table->eq[j - other_num]);
+ if (j < other_num + comp_num)
+ cancel_work_sync(&eq_table->eq[j - other_num].work);
+ }
err_kzalloc_failed:
for (i -= 1; i >= 0; i--)
@@ -6570,8 +6796,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
free_irq(hr_dev->irq[i], hr_dev);
- for (i = 0; i < eq_num; i++)
+ for (i = 0; i < eq_num; i++) {
free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+ if (i < hr_dev->caps.num_comp_vectors)
+ cancel_work_sync(&hr_dev->eq_table.eq[i].work);
+ }
for (i = 0; i < irq_num; i++)
kfree(hr_dev->irq_names[i]);
@@ -6591,6 +6820,9 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
int ret;
int i;
+ if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET)
+ return -EINVAL;
+
other_num = hr_dev->caps.num_other_vectors;
comp_num = hr_dev->caps.num_comp_vectors;
aeq_num = hr_dev->caps.num_aeq_vectors;
@@ -6660,7 +6892,7 @@ err_request_irq_fail:
err_create_eq_fail:
for (i -= 1; i >= 0; i--)
- free_eq_buf(hr_dev, &eq_table->eq[i]);
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
kfree(eq_table->eq);
return ret;
@@ -6680,11 +6912,8 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
__hns_roce_free_irq(hr_dev);
destroy_workqueue(hr_dev->irq_workq);
- for (i = 0; i < eq_num; i++) {
- hns_roce_v2_destroy_eqc(hr_dev, i);
-
- free_eq_buf(hr_dev, &eq_table->eq[i]);
- }
+ for (i = 0; i < eq_num; i++)
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
kfree(eq_table->eq);
}
@@ -6735,6 +6964,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.query_srqc = hns_roce_v2_query_srqc,
.query_sccc = hns_roce_v2_query_sccc,
.query_hw_counter = hns_roce_hw_v2_query_counter,
+ .get_dscp = hns_roce_hw_v2_get_dscp,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};
@@ -6851,9 +7081,6 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
hns_roce_handle_device_err(hr_dev);
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
- free_mr_exit(hr_dev);
-
hns_roce_exit(hr_dev);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
@@ -6914,6 +7141,7 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}
+
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
@@ -6932,6 +7160,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
hr_dev->active = false;
hr_dev->dis_db = true;
+
+ rdma_user_mmap_disassociate(&hr_dev->ib_dev);
+
hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
return 0;
@@ -7002,9 +7233,22 @@ static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
return ret;
}
+static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle,
+ bool linkup)
+{
+ struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct net_device *netdev = handle->rinfo.netdev;
+
+ if (linkup || !hr_dev)
+ return;
+
+ ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev);
+}
+
static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
.init_instance = hns_roce_hw_v2_init_instance,
.uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .link_status_change = hns_roce_hw_v2_link_status_change,
.reset_notify = hns_roce_hw_v2_reset_notify,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index df04bc8ede57..bc7466830eaf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -34,6 +34,7 @@
#define _HNS_ROCE_HW_V2_H
#include <linux/bitops.h>
+#include "hnae3.h"
#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
@@ -85,6 +86,11 @@
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
+/* budget must be smaller than aeqe_depth to guarantee that we update
+ * the ci before we polled all the entries in the EQ.
+ */
+#define HNS_AEQ_POLLING_BUDGET 64
+
enum {
HNS_ROCE_CMD_FLAG_IN = BIT(0),
HNS_ROCE_CMD_FLAG_OUT = BIT(1),
@@ -224,6 +230,14 @@ enum hns_roce_opcode_type {
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
+#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
+#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
+#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
+struct hns_roce_cmdq_tx_timeout_map {
+ u16 opcode;
+ u32 tx_timeout;
+};
+
enum {
TYPE_CRQ,
TYPE_CSQ,
@@ -276,6 +290,10 @@ struct hns_roce_v2_cq_context {
__le32 byte_64_se_cqe_idx;
};
+#define CQC_CQE_BA_L_S 3
+#define CQC_CQE_BA_H_S (32 + CQC_CQE_BA_L_S)
+#define CQC_CQE_DB_RECORD_ADDR_H_S 32
+
#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
@@ -447,6 +465,12 @@ struct hns_roce_v2_qp_context {
struct hns_roce_v2_qp_context_ex ext;
};
+#define QPC_TRRL_BA_L_S 4
+#define QPC_TRRL_BA_M_S (16 + QPC_TRRL_BA_L_S)
+#define QPC_TRRL_BA_H_S (32 + QPC_TRRL_BA_M_S)
+#define QPC_IRRL_BA_L_S 6
+#define QPC_IRRL_BA_H_S (32 + QPC_IRRL_BA_L_S)
+
#define QPC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context, h, l)
#define QPC_TST QPC_FIELD_LOC(2, 0)
@@ -716,6 +740,9 @@ struct hns_roce_v2_mpt_entry {
__le32 byte_64_buf_pa1;
};
+#define MPT_PBL_BUF_ADDR_S 6
+#define MPT_PBL_BA_ADDR_S 3
+
#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l)
#define MPT_ST MPT_FIELD_LOC(1, 0)
@@ -900,6 +927,7 @@ struct hns_roce_v2_rc_send_wqe {
#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
+#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
@@ -1323,7 +1351,7 @@ struct hns_roce_v2_priv {
struct hns_roce_dip {
u8 dgid[GID_LEN_V2];
u32 dip_idx;
- struct list_head node; /* all dips are on a list */
+ u32 qp_cnt;
};
struct fmea_ram_ecc {
@@ -1334,7 +1362,7 @@ struct fmea_ram_ecc {
/* only for RNR timeout issue of HIP08 */
#define HNS_ROCE_CLOCK_ADJUST 1000
-#define HNS_ROCE_MAX_CQ_PERIOD 65
+#define HNS_ROCE_MAX_CQ_PERIOD_HIP08 65
#define HNS_ROCE_MAX_EQ_PERIOD 65
#define HNS_ROCE_RNR_TIMER_10NS 1
#define HNS_ROCE_1US_CFG 999
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1dc60c2b2b7a..e7a497cc125c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -40,6 +40,7 @@
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
+#include "hns_roce_hw_v2.h"
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
const u8 *addr)
@@ -181,7 +182,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_DEVICE_RC_RNR_NAK_GEN;
props->max_send_sge = hr_dev->caps.max_sq_sg;
props->max_recv_sge = hr_dev->caps.max_rq_sg;
- props->max_sge_rd = 1;
+ props->max_sge_rd = hr_dev->caps.max_sq_sg;
props->max_cq = hr_dev->caps.num_cqs;
props->max_cqe = hr_dev->caps.max_cqes;
props->max_mr = hr_dev->caps.num_mtpts;
@@ -192,6 +193,12 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ props->max_ah = INT_MAX;
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD;
+ props->cq_caps.max_cq_moderation_count = HNS_ROCE_MAX_CQ_COUNT;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
+
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
props->max_srq = hr_dev->caps.num_srqs;
props->max_srq_wr = hr_dev->caps.max_srq_wrs;
@@ -421,6 +428,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
return 0;
error_fail_copy_to_udata:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
hns_roce_dealloc_uar_entry(context);
error_fail_uar_entry:
@@ -437,6 +447,10 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
+
hns_roce_dealloc_uar_entry(context);
ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
@@ -451,6 +465,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
pgprot_t prot;
int ret;
+ if (hr_dev->dis_db) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+ return -EPERM;
+ }
+
rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
if (!rdma_entry) {
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
@@ -743,7 +762,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
if (ret)
return ret;
}
- dma_set_max_seg_size(dev, UINT_MAX);
+ dma_set_max_seg_size(dev, SZ_2G);
ret = ib_register_device(ib_dev, "hns_%d", dev);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
@@ -925,6 +944,15 @@ err_unmap_dmpt:
return ret;
}
+static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_cleanup_bitmap(hr_dev);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&hr_dev->pgdir_mutex);
+}
+
/**
* hns_roce_setup_hca - setup host channel adapter
* @hr_dev: pointer to hns roce device
@@ -973,6 +1001,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
err_uar_table_free:
ida_destroy(&hr_dev->uar_ida.ida);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&hr_dev->pgdir_mutex);
+
return ret;
}
@@ -1102,8 +1134,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
INIT_LIST_HEAD(&hr_dev->qp_list);
spin_lock_init(&hr_dev->qp_list_lock);
- INIT_LIST_HEAD(&hr_dev->dip_list);
- spin_lock_init(&hr_dev->dip_list_lock);
ret = hns_roce_register_device(hr_dev);
if (ret)
@@ -1118,7 +1148,7 @@ error_failed_register_device:
hr_dev->hw->hw_exit(hr_dev);
error_failed_engine_init:
- hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
error_failed_setup_hca:
hns_roce_cleanup_hem(hr_dev);
@@ -1148,7 +1178,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
if (hr_dev->hw->hw_exit)
hr_dev->hw->hw_exit(hr_dev);
- hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
hns_roce_cleanup_hem(hr_dev);
if (hr_dev->cmd_mod)
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 9e05b57a2d67..93a48b41955b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -38,6 +38,7 @@
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
+#include "hns_roce_trace.h"
static u32 hw_index_to_key(int ind)
{
@@ -138,8 +139,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr
key_to_hw_index(mr->key) &
(hr_dev->caps.num_mtpts - 1));
if (ret)
- ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
- ret);
+ ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n",
+ ret);
}
free_mr_pbl(hr_dev, mr);
@@ -159,10 +160,11 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
+ trace_hns_mr(mr);
if (mr->type != MR_TYPE_FRMR)
ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
else
- ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
+ ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
if (ret) {
dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
goto err_page;
@@ -435,24 +437,30 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
}
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
- unsigned int *sg_offset)
+ unsigned int *sg_offset_p)
{
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_mtr *mtr = &mr->pbl_mtr;
- int ret = 0;
+ int ret, sg_num = 0;
+
+ if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
+ ibmr->page_size < HNS_HW_PAGE_SIZE ||
+ ibmr->page_size > HNS_HW_MAX_PAGE_SIZE)
+ return sg_num;
mr->npages = 0;
mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
sizeof(dma_addr_t), GFP_KERNEL);
if (!mr->page_list)
- return ret;
+ return sg_num;
- ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
- if (ret < 1) {
+ sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page);
+ if (sg_num < 1) {
ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
- mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);
goto err_page_list;
}
@@ -463,17 +471,16 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
if (ret) {
ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
- ret = 0;
+ sg_num = 0;
} else {
mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
- ret = mr->npages;
}
err_page_list:
kvfree(mr->page_list);
mr->page_list = NULL;
- return ret;
+ return sg_num;
}
static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
@@ -756,7 +763,7 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
return -ENOMEM;
if (mtr->umem)
- npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count,
+ npage = hns_roce_get_umem_bufs(pages, page_count,
mtr->umem, page_shift);
else
npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
@@ -809,11 +816,6 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
mapped_cnt < page_cnt; i++) {
r = &mtr->hem_cfg.region[i];
- /* if hopnum is 0, no need to map pages in this region */
- if (!r->hopnum) {
- mapped_cnt += r->count;
- continue;
- }
if (r->offset + r->count > page_cnt) {
ret = -EINVAL;
@@ -998,7 +1000,7 @@ static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
if (attr->region_count > ARRAY_SIZE(attr->region) ||
attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
ibdev_err(ibdev,
- "invalid buf attr, region count %d, page shift %u.\n",
+ "invalid buf attr, region count %u, page shift %u.\n",
attr->region_count, attr->page_shift);
return false;
}
@@ -1146,6 +1148,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
+ trace_hns_buf_attr(buf_attr);
/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
* to finish the MTT configuration.
*/
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f35a66325d9a..9f376a2232b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -39,6 +39,25 @@
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
+static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
+ u32 qpn)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_qp *qp;
+ unsigned long flags;
+
+ xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
+ qp = __hns_roce_qp_lookup(hr_dev, qpn);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);
+
+ if (!qp)
+ dev_warn(dev, "async event for bogus QP %08x\n", qpn);
+
+ return qp;
+}
+
static void flush_work_handle(struct work_struct *work)
{
struct hns_roce_work *flush_work = container_of(work,
@@ -71,11 +90,18 @@ static void flush_work_handle(struct work_struct *work)
void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_work *flush_work = &hr_qp->flush_work;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ /* Exit directly after destroy_qp() */
+ if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) {
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+ return;
+ }
- flush_work->hr_dev = hr_dev;
- INIT_WORK(&flush_work->work, flush_work_handle);
refcount_inc(&hr_qp->refcount);
queue_work(hr_dev->irq_workq, &flush_work->work);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
}
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
@@ -95,31 +121,28 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_qp *qp;
- xa_lock(&hr_dev->qp_table_xa);
- qp = __hns_roce_qp_lookup(hr_dev, qpn);
- if (qp)
- refcount_inc(&qp->refcount);
- xa_unlock(&hr_dev->qp_table_xa);
-
- if (!qp) {
- dev_warn(dev, "async event for bogus QP %08x\n", qpn);
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
return;
- }
- if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
- event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) {
- qp->state = IB_QPS_ERR;
+ qp->event(qp, (enum hns_roce_event)event_type);
- flush_cqe(hr_dev, qp);
- }
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
- qp->event(qp, (enum hns_roce_event)event_type);
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn)
+{
+ struct hns_roce_qp *qp;
+
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
+ return;
+
+ qp->state = IB_QPS_ERR;
+ flush_cqe(hr_dev, qp);
if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
@@ -410,7 +433,8 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
bankid = get_qp_bankid(hr_qp->qpn);
- ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3);
+ ida_free(&hr_dev->qp_table.bank[bankid].ida,
+ hr_qp->qpn / HNS_ROCE_QP_BANK_NUM);
mutex_lock(&hr_dev->qp_table.bank_mutex);
hr_dev->qp_table.bank[bankid].inuse--;
@@ -531,13 +555,15 @@ static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
{
unsigned int inline_sge;
- inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+ if (!max_inline_data)
+ return 0;
/*
* if max_inline_data less than
* HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
* In addition to ud's mode, no need to extend sge.
*/
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
inline_sge = 0;
@@ -842,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
struct hns_roce_ib_create_qp *ucmd,
struct hns_roce_ib_create_qp_resp *resp)
{
+ bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd);
struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
struct hns_roce_ucontext, ibucontext);
+ bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp);
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
- if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
+ if (has_sdb) {
ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
if (ret) {
ibdev_err(ibdev,
@@ -858,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
}
- if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
+ if (has_rdb) {
ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
if (ret) {
ibdev_err(ibdev,
@@ -872,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
return 0;
err_sdb:
- if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ if (has_sdb)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
err_out:
return ret;
@@ -1093,35 +1121,34 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibucontext);
hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
- if (ret)
+ if (ret) {
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
+ return ret;
+ }
ret = set_congest_param(hr_dev, hr_qp, ucmd);
- if (ret)
- return ret;
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
+ default_congest_type(hr_dev, hr_qp);
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,
"failed to set kernel SQ size, ret = %d.\n",
ret);
-
- default_congest_type(hr_dev, hr_qp);
}
return ret;
}
static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
- struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata,
struct hns_roce_qp *hr_qp)
{
+ struct hns_roce_work *flush_work = &hr_qp->flush_work;
struct hns_roce_ib_create_qp_resp resp = {};
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_qp ucmd = {};
@@ -1130,9 +1157,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
mutex_init(&hr_qp->mutex);
spin_lock_init(&hr_qp->sq.lock);
spin_lock_init(&hr_qp->rq.lock);
+ spin_lock_init(&hr_qp->flush_lock);
hr_qp->state = IB_QPS_RESET;
hr_qp->flush_flag = 0;
+ flush_work->hr_dev = hr_dev;
+ INIT_WORK(&flush_work->work, flush_work_handle);
if (init_attr->create_flags)
return -EOPNOTSUPP;
@@ -1140,7 +1170,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
if (ret) {
ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
- return ret;
+ goto err_out;
}
if (!udata) {
@@ -1148,7 +1178,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if (ret) {
ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
ret);
- return ret;
+ goto err_out;
}
}
@@ -1190,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
min(udata->outlen, sizeof(resp)));
if (ret) {
ibdev_err(ibdev, "copy qp resp failed!\n");
- goto err_store;
+ goto err_flow_ctrl;
}
}
@@ -1219,6 +1249,8 @@ err_qpn:
free_qp_buf(hr_dev, hr_qp);
err_buf:
free_kernel_wrid(hr_qp);
+err_out:
+ mutex_destroy(&hr_qp->mutex);
return ret;
}
@@ -1234,6 +1266,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
free_qp_buf(hr_dev, hr_qp);
free_kernel_wrid(hr_qp);
free_qp_db(hr_dev, hr_qp, udata);
+ mutex_destroy(&hr_qp->mutex);
}
static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
@@ -1271,7 +1304,6 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
struct ib_device *ibdev = qp->device;
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
struct hns_roce_qp *hr_qp = to_hr_qp(qp);
- struct ib_pd *pd = qp->pd;
int ret;
ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
@@ -1286,9 +1318,9 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
}
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
+ ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
if (ret)
- ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
+ ibdev_err(ibdev, "create QP type %d failed(%d)\n",
init_attr->qp_type, ret);
err_out:
@@ -1386,6 +1418,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_ib_modify_qp_resp resp = {};
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
enum ib_qp_state cur_state, new_state;
int ret = -EINVAL;
@@ -1427,6 +1460,18 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
new_state, udata);
+ if (ret)
+ goto out;
+
+ if (udata && udata->outlen) {
+ resp.tc_mode = hr_qp->tc_mode;
+ resp.priority = hr_qp->sl;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to copy modify qp resp.\n");
+ }
out:
mutex_unlock(&hr_qp->mutex);
@@ -1443,19 +1488,19 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
__acquire(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
__acquire(&send_cq->lock);
} else if (send_cq == recv_cq) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
}
}
@@ -1475,13 +1520,13 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
spin_unlock(&recv_cq->lock);
} else if (send_cq == recv_cq) {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
}
}
@@ -1529,14 +1574,10 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
unsigned int reserved_from_bot;
unsigned int i;
- qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
- sizeof(u32), GFP_KERNEL);
- if (!qp_table->idx_table.spare_idx)
- return -ENOMEM;
-
mutex_init(&qp_table->scc_mutex);
mutex_init(&qp_table->bank_mutex);
xa_init(&hr_dev->qp_table_xa);
+ xa_init(&qp_table->dip_xa);
reserved_from_bot = hr_dev->caps.reserved_qps;
@@ -1561,5 +1602,8 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
ida_destroy(&hr_dev->qp_table.bank[i].ida);
- kfree(hr_dev->qp_table.idx_table.spare_idx);
+ xa_destroy(&hr_dev->qp_table.dip_xa);
+ xa_destroy(&hr_dev->qp_table_xa);
+ mutex_destroy(&hr_dev->qp_table.bank_mutex);
+ mutex_destroy(&hr_dev->qp_table.scc_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 356d98816949..f637b73b946e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -4,7 +4,6 @@
#include <rdma/rdma_cm.h>
#include <rdma/restrack.h>
#include <uapi/rdma/rdma_netlink.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 4abae9477854..1090051f493b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -51,7 +51,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
break;
default:
dev_err(hr_dev->dev,
- "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ "hns_roce:Unexpected event type %d on SRQ %06lx\n",
event_type, srq->srqn);
return;
}
@@ -123,7 +123,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
return ret;
}
- ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ ret = xa_err(xa_store_irq(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
if (ret) {
ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
goto err_put;
@@ -136,7 +136,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
return 0;
err_xa:
- xa_erase(&srq_table->xa, srq->srqn);
+ xa_erase_irq(&srq_table->xa, srq->srqn);
err_put:
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
@@ -151,10 +151,10 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
srq->srqn);
if (ret)
- dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
- ret, srq->srqn);
+ dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
+ ret, srq->srqn);
- xa_erase(&srq_table->xa, srq->srqn);
+ xa_erase_irq(&srq_table->xa, srq->srqn);
if (refcount_dec_and_test(&srq->refcount))
complete(&srq->free);
@@ -250,7 +250,7 @@ static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev,
hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
}
-static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+static int alloc_srq_wrid(struct hns_roce_srq *srq)
{
srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
if (!srq->wrid)
@@ -297,7 +297,7 @@ static int set_srq_basic_param(struct hns_roce_srq *srq,
max_sge = proc_srq_sge(hr_dev, srq, !!udata);
if (attr->max_wr > hr_dev->caps.max_srq_wrs ||
- attr->max_sge > max_sge) {
+ attr->max_sge > max_sge || !attr->max_sge) {
ibdev_err(&hr_dev->ib_dev,
"invalid SRQ attr, depth = %u, sge = %u.\n",
attr->max_wr, attr->max_sge);
@@ -366,7 +366,7 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
goto err_idx;
if (!udata) {
- ret = alloc_srq_wrid(hr_dev, srq);
+ ret = alloc_srq_wrid(srq);
if (ret)
goto err_wqe_buf;
}
@@ -518,6 +518,7 @@ err_srq_db:
err_srq_buf:
free_srq_buf(hr_dev, srq);
err_out:
+ mutex_destroy(&srq->mutex);
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]);
return ret;
@@ -532,6 +533,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
free_srqn(hr_dev, srq);
free_srq_db(hr_dev, srq, udata);
free_srq_buf(hr_dev, srq);
+ mutex_destroy(&srq->mutex);
return 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_trace.h b/drivers/infiniband/hw/hns/hns_roce_trace.h
new file mode 100644
index 000000000000..59ceb591b3a1
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_trace.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns_roce
+
+#if !defined(__HNS_ROCE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HNS_ROCE_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <linux/string_choices.h>
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+
+DECLARE_EVENT_CLASS(flush_head_template,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, pi)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->pi = pi;
+ __entry->type = type;
+ ),
+
+ TP_printk("%s 0x%lx flush head 0x%x.",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->pi)
+);
+
+DEFINE_EVENT(flush_head_template, hns_sq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+DEFINE_EVENT(flush_head_template, hns_rq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+
+#define MAX_SGE_PER_WQE 64
+#define MAX_WQE_SIZE (MAX_SGE_PER_WQE * HNS_ROCE_SGE_SIZE)
+DECLARE_EVENT_CLASS(wqe_template,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len,
+ u64 id, enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, idx)
+ __array(u32, wqe,
+ MAX_WQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ __field(u64, id)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->idx = idx;
+ __entry->id = id;
+ __entry->len = len / sizeof(__le32);
+ __entry->type = type;
+ for (int i = 0; i < __entry->len; i++)
+ __entry->wqe[i] = le32_to_cpu(((__le32 *)wqe)[i]);
+ ),
+
+ TP_printk("%s 0x%lx wqe(0x%x/0x%llx): %s",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->idx, __entry->id,
+ __print_array(__entry->wqe, __entry->len,
+ sizeof(__le32)))
+);
+
+DEFINE_EVENT(wqe_template, hns_sq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_rq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_srq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+
+TRACE_EVENT(hns_ae_info,
+ TP_PROTO(int event_type, void *aeqe, unsigned int len),
+ TP_ARGS(event_type, aeqe, len),
+
+ TP_STRUCT__entry(__field(int, event_type)
+ __array(u32, aeqe,
+ HNS_ROCE_V3_EQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ ),
+
+ TP_fast_assign(__entry->event_type = event_type;
+ __entry->len = len / sizeof(__le32);
+ for (int i = 0; i < __entry->len; i++)
+ __entry->aeqe[i] = le32_to_cpu(((__le32 *)aeqe)[i]);
+ ),
+
+ TP_printk("event %2d aeqe: %s", __entry->event_type,
+ __print_array(__entry->aeqe, __entry->len, sizeof(__le32)))
+);
+
+TRACE_EVENT(hns_mr,
+ TP_PROTO(struct hns_roce_mr *mr),
+ TP_ARGS(mr),
+
+ TP_STRUCT__entry(__field(u64, iova)
+ __field(u64, size)
+ __field(u32, key)
+ __field(u32, pd)
+ __field(u32, pbl_hop_num)
+ __field(u32, npages)
+ __field(int, type)
+ __field(int, enabled)
+ ),
+
+ TP_fast_assign(__entry->iova = mr->iova;
+ __entry->size = mr->size;
+ __entry->key = mr->key;
+ __entry->pd = mr->pd;
+ __entry->pbl_hop_num = mr->pbl_hop_num;
+ __entry->npages = mr->npages;
+ __entry->type = mr->type;
+ __entry->enabled = mr->enabled;
+ ),
+
+ TP_printk("iova:0x%llx, size:%llu, key:%u, pd:%u, pbl_hop:%u, npages:%u, type:%d, status:%d",
+ __entry->iova, __entry->size, __entry->key,
+ __entry->pd, __entry->pbl_hop_num, __entry->npages,
+ __entry->type, __entry->enabled)
+);
+
+TRACE_EVENT(hns_buf_attr,
+ TP_PROTO(struct hns_roce_buf_attr *attr),
+ TP_ARGS(attr),
+
+ TP_STRUCT__entry(__field(unsigned int, region_count)
+ __field(unsigned int, region0_size)
+ __field(int, region0_hopnum)
+ __field(unsigned int, region1_size)
+ __field(int, region1_hopnum)
+ __field(unsigned int, region2_size)
+ __field(int, region2_hopnum)
+ __field(unsigned int, page_shift)
+ __field(bool, mtt_only)
+ ),
+
+ TP_fast_assign(__entry->region_count = attr->region_count;
+ __entry->region0_size = attr->region[0].size;
+ __entry->region0_hopnum = attr->region[0].hopnum;
+ __entry->region1_size = attr->region[1].size;
+ __entry->region1_hopnum = attr->region[1].hopnum;
+ __entry->region2_size = attr->region[2].size;
+ __entry->region2_hopnum = attr->region[2].hopnum;
+ __entry->page_shift = attr->page_shift;
+ __entry->mtt_only = attr->mtt_only;
+ ),
+
+ TP_printk("rg cnt:%u, pg_sft:0x%x, mtt_only:%s, rg 0 (sz:%u, hop:%u), rg 1 (sz:%u, hop:%u), rg 2 (sz:%u, hop:%u)\n",
+ __entry->region_count, __entry->page_shift,
+ str_yes_no(__entry->mtt_only),
+ __entry->region0_size, __entry->region0_hopnum,
+ __entry->region1_size, __entry->region1_hopnum,
+ __entry->region2_size, __entry->region2_hopnum)
+);
+
+DECLARE_EVENT_CLASS(cmdq,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc),
+
+ TP_STRUCT__entry(__string(dev_name, dev_name(hr_dev->dev))
+ __field(u16, opcode)
+ __field(u16, flag)
+ __field(u16, retval)
+ __array(u32, data, 6)
+ ),
+
+ TP_fast_assign(__assign_str(dev_name);
+ __entry->opcode = le16_to_cpu(desc->opcode);
+ __entry->flag = le16_to_cpu(desc->flag);
+ __entry->retval = le16_to_cpu(desc->retval);
+ for (int i = 0; i < 6; i++)
+ __entry->data[i] = le32_to_cpu(desc->data[i]);
+ ),
+
+ TP_printk("%s cmdq opcode:0x%x, flag:0x%x, retval:0x%x, data:%s\n",
+ __get_str(dev_name), __entry->opcode,
+ __entry->flag, __entry->retval,
+ __print_array(__entry->data, 6, sizeof(__le32)))
+);
+
+DEFINE_EVENT(cmdq, hns_cmdq_req,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+DEFINE_EVENT(cmdq, hns_cmdq_resp,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+
+#endif /* __HNS_ROCE_TRACE_H */
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hns_roce_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>