diff options
Diffstat (limited to 'drivers/infiniband/hw/hns')
-rw-r--r-- | drivers/infiniband/hw/hns/Kconfig | 20 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/Makefile | 10 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_ah.c | 44 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_alloc.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_cq.c | 33 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_debugfs.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_device.h | 57 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hem.c | 144 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hem.h | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 710 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 32 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_main.c | 42 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_mr.c | 41 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_qp.c | 148 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_restrack.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_srq.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_trace.h | 216 |
17 files changed, 1088 insertions, 452 deletions
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index ab3fbba70789..44cdb706fe27 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,21 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_HNS - tristate "HNS RoCE Driver" - depends on NET_VENDOR_HISILICON - depends on ARM64 || (COMPILE_TEST && 64BIT) - depends on (HNS_DSAF && HNS_ENET) || HNS3 - help - This is a RoCE/RDMA driver for the Hisilicon RoCE engine. - - To compile HIP08 driver as module, choose M here. - config INFINIBAND_HNS_HIP08 - bool "Hisilicon Hip08 Family RoCE support" - depends on INFINIBAND_HNS && PCI && HNS3 - depends on INFINIBAND_HNS=m || HNS3=y + tristate "Hisilicon Hip08 Family RoCE support" + depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on PCI && HNS3 help RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. - To compile this driver, choose Y here: if INFINIBAND_HNS is m, this - module will be called hns-roce-hw-v2. + To compile this driver, choose M here. This module will be called + hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index be1e1cdbcfa8..baf592e6f21b 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -4,13 +4,11 @@ # ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 +ccflags-y += -I $(src) -hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ +hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ - hns_roce_debugfs.o + hns_roce_debugfs.o hns_roce_hw_v2.o -ifdef CONFIG_INFINIBAND_HNS_HIP08 -hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o -endif +obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index b4209b6aed8d..307c35888b30 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -33,7 +33,6 @@ #include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> -#include "hnae3.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" @@ -59,11 +58,15 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); struct hns_roce_ib_create_ah_resp resp = {}; struct hns_roce_ah *ah = to_hr_ah(ibah); - int ret = 0; - u32 max_sl; - - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) - return -EOPNOTSUPP; + u8 tclass = get_tclass(grh); + u8 priority = 0; + u8 tc_mode = 0; + int ret; + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) { + ret = -EOPNOTSUPP; + goto err_out; + } ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; @@ -74,15 +77,24 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ah->av.hop_limit = grh->hop_limit; ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); - ah->av.tclass = get_tclass(grh); - - ah->av.sl = rdma_ah_get_sl(ah_attr); - max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); - if (unlikely(ah->av.sl > max_sl)) { - ibdev_err_ratelimited(&hr_dev->ib_dev, - "failed to set sl, sl (%u) shouldn't be larger than %u.\n", - ah->av.sl, max_sl); - return -EINVAL; + ah->av.tclass = tclass; + + ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority); + if (ret == -EOPNOTSUPP) + ret = 0; + + if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + goto err_out; + + if (tc_mode == HNAE3_TC_MAP_MODE_DSCP && + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + ah->av.sl = priority; + else + ah->av.sl = rdma_ah_get_sl(ah_attr); + + if (!check_sl_valid(hr_dev, ah->av.sl)) { + ret = -EINVAL; + goto err_out; } memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); @@ -99,6 +111,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, } if (udata) { + resp.priority = ah->av.sl; + resp.tc_mode = tc_mode; memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 11a78ceae568..6ee911f6885b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -153,8 +153,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, return total; } -int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, struct ib_umem *umem, +int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem, unsigned int page_shift) { struct ib_block_iter biter; @@ -176,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) ida_destroy(&hr_dev->xrcd_ida.ida); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ida_destroy(&hr_dev->srq_table.srq_ida.ida); + xa_destroy(&hr_dev->srq_table.xa); + } hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); ida_destroy(&hr_dev->mr_table.mtpt_ida.ida); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 7250d0643b5c..3a5c93c9fb3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -149,7 +149,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) return ret; } - ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); + ret = xa_err(xa_store_irq(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret); goto err_put; @@ -163,7 +163,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) return 0; err_xa: - xa_erase(&cq_table->array, hr_cq->cqn); + xa_erase_irq(&cq_table->array, hr_cq->cqn); err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); @@ -179,10 +179,10 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, hr_cq->cqn); if (ret) - dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, - hr_cq->cqn); + dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", + ret, hr_cq->cqn); - xa_erase(&cq_table->array, hr_cq->cqn); + xa_erase_irq(&cq_table->array, hr_cq->cqn); /* Waiting interrupt process procedure carried out */ synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); @@ -353,9 +353,10 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, } int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata) + struct uverbs_attr_bundle *attrs) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); + struct ib_udata *udata = &attrs->driver_udata; struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); struct ib_device *ibdev = &hr_dev->ib_dev; @@ -476,13 +477,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) struct ib_event event; struct ib_cq *ibcq; - hr_cq = xa_load(&hr_dev->cq_table.array, - cqn & (hr_dev->caps.num_cqs - 1)); - if (!hr_cq) { - dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn); - return; - } - if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { @@ -491,7 +485,16 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) return; } - refcount_inc(&hr_cq->refcount); + xa_lock(&hr_dev->cq_table.array); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (hr_cq) + refcount_inc(&hr_cq->refcount); + xa_unlock(&hr_dev->cq_table.array); + if (!hr_cq) { + dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn); + return; + } ibcq = &hr_cq->ib_cq; if (ibcq->event_handler) { @@ -534,4 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) ida_destroy(&hr_dev->cq_table.bank[i].ida); + xa_destroy(&hr_dev->cq_table.array); + mutex_destroy(&hr_dev->cq_table.bank_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index e8febb40f645..b869cdc54118 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -5,6 +5,7 @@ #include <linux/debugfs.h> #include <linux/device.h> +#include <linux/pci.h> #include "hns_roce_device.h" @@ -86,7 +87,7 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) { struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs; - dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev), + dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev), hns_roce_dbgfs_root); create_sw_stat_debugfs(hr_dev, dbgfs->root); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c3cbd0a494bf..1dcc9cbb4678 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -83,6 +83,7 @@ #define MR_TYPE_DMA 0x03 #define HNS_ROCE_FRMR_MAX_PA 512 +#define HNS_ROCE_FRMR_ALIGN_SIZE 128 #define PKEY_ID 0xffff #define NODE_DESC_SIZE 64 @@ -91,6 +92,8 @@ /* Configure to HW for PAGE_SIZE larger than 4KB */ #define PG_SHIFT_OFFSET (PAGE_SHIFT - 12) +#define ATOMIC_WR_LEN 8 + #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 @@ -100,6 +103,9 @@ #define CQ_BANKID_SHIFT 2 #define CQ_BANKID_MASK GENMASK(1, 0) +#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF +#define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF + enum { SERV_TYPE_RC, SERV_TYPE_UC, @@ -184,6 +190,9 @@ enum { #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) +#define HNS_HW_MAX_PAGE_SHIFT 27 +#define HNS_HW_MAX_PAGE_SIZE (1 << HNS_HW_MAX_PAGE_SHIFT) + struct hns_roce_uar { u64 pfn; unsigned long index; @@ -480,12 +489,6 @@ struct hns_roce_bank { u32 next; /* Next ID to allocate. */ }; -struct hns_roce_idx_table { - u32 *spare_idx; - u32 head; - u32 tail; -}; - struct hns_roce_qp_table { struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; @@ -494,7 +497,7 @@ struct hns_roce_qp_table { struct mutex scc_mutex; struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; struct mutex bank_mutex; - struct hns_roce_idx_table idx_table; + struct xarray dip_xa; }; struct hns_roce_cq_table { @@ -584,6 +587,7 @@ struct hns_roce_dev; enum { HNS_ROCE_FLUSH_FLAG = 0, + HNS_ROCE_STOP_FLUSH_FLAG = 1, }; struct hns_roce_work { @@ -645,6 +649,10 @@ struct hns_roce_qp { struct hns_user_mmap_entry *dwqe_mmap_entry; u32 config; enum hns_roce_cong_type cong_type; + u8 tc_mode; + u8 priority; + spinlock_t flush_lock; + struct hns_roce_dip *dip; }; struct hns_roce_ib_iboe { @@ -710,6 +718,7 @@ struct hns_roce_eq { int shift; int event_type; int sub_type; + struct work_struct work; }; struct hns_roce_eq_table { @@ -923,8 +932,7 @@ struct hns_roce_hw { int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, void *mb_buf); - int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, - struct hns_roce_mr *mr); + int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr); int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, @@ -950,6 +958,8 @@ struct hns_roce_hw { int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); int (*query_hw_counter)(struct hns_roce_dev *hr_dev, u64 *stats, u32 port, int *hw_counters); + int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp, + u8 *tc_mode, u8 *priority); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; @@ -969,8 +979,6 @@ struct hns_roce_dev { enum hns_roce_device_state state; struct list_head qp_list; /* list of all qps on this dev */ spinlock_t qp_list_lock; /* protect qp_list */ - struct list_head dip_list; /* list of all dest ips on this dev */ - spinlock_t dip_list_lock; /* protect dip_list */ struct list_head pgdir_list; struct mutex pgdir_mutex; @@ -1019,6 +1027,26 @@ struct hns_roce_dev { atomic64_t *dfx_cnt; }; +enum hns_roce_trace_type { + TRACE_SQ, + TRACE_RQ, + TRACE_SRQ, +}; + +static inline const char *trace_type_to_str(enum hns_roce_trace_type type) +{ + switch (type) { + case TRACE_SQ: + return "SQ"; + case TRACE_RQ: + return "RQ"; + case TRACE_SRQ: + return "SRQ"; + default: + return "UNKNOWN"; + } +} + static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) { return container_of(ib_dev, struct hns_roce_dev, ib_dev); @@ -1228,7 +1256,7 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, struct hns_roce_buf *buf, unsigned int page_shift); -int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, +int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem, unsigned int page_shift); @@ -1261,7 +1289,7 @@ __be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, @@ -1276,6 +1304,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); @@ -1292,4 +1321,6 @@ struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, enum hns_roce_mmap_type mmap_type); +bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl); + #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index a4b3f19161dc..ca0798224e56 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -281,7 +281,7 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, return hem; fail: - hns_roce_free_hem(hr_dev, hem); + kfree(hem); return NULL; } @@ -300,7 +300,7 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop, struct hns_roce_hem_index *index) { - struct ib_device *ibdev = &hr_dev->ib_dev; + struct device *dev = hr_dev->dev; unsigned long mhop_obj = obj; u32 l0_idx, l1_idx, l2_idx; u32 chunk_ba_num; @@ -331,14 +331,14 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, index->buf = l0_idx; break; default: - ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", - table->type, mhop->hop_num); + dev_err(dev, "table %u not support mhop.hop_num = %u!\n", + table->type, mhop->hop_num); return -EINVAL; } if (unlikely(index->buf >= table->num_hem)) { - ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", - table->type, index->buf, table->num_hem); + dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n", + table->type, index->buf, table->num_hem); return -EINVAL; } @@ -448,14 +448,14 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop, struct hns_roce_hem_index *index) { - struct ib_device *ibdev = &hr_dev->ib_dev; + struct device *dev = hr_dev->dev; u32 step_idx; int ret = 0; if (index->inited & HEM_INDEX_L0) { ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0); if (ret) { - ibdev_err(ibdev, "set HEM step 0 failed!\n"); + dev_err(dev, "set HEM step 0 failed!\n"); goto out; } } @@ -463,7 +463,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, if (index->inited & HEM_INDEX_L1) { ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1); if (ret) { - ibdev_err(ibdev, "set HEM step 1 failed!\n"); + dev_err(dev, "set HEM step 1 failed!\n"); goto out; } } @@ -475,7 +475,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, step_idx = mhop->hop_num; ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx); if (ret) - ibdev_err(ibdev, "set HEM step last failed!\n"); + dev_err(dev, "set HEM step last failed!\n"); } out: return ret; @@ -485,14 +485,14 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_hem_index index = {}; struct hns_roce_hem_mhop mhop = {}; + struct device *dev = hr_dev->dev; int ret; ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); if (ret) { - ibdev_err(ibdev, "calc hem config failed!\n"); + dev_err(dev, "calc hem config failed!\n"); return ret; } @@ -504,7 +504,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, ret = alloc_mhop_hem(hr_dev, table, &mhop, &index); if (ret) { - ibdev_err(ibdev, "alloc mhop hem failed!\n"); + dev_err(dev, "alloc mhop hem failed!\n"); goto out; } @@ -512,7 +512,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, if (table->type < HEM_TYPE_MTT) { ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index); if (ret) { - ibdev_err(ibdev, "set HEM address to HW failed!\n"); + dev_err(dev, "set HEM address to HW failed!\n"); goto err_alloc; } } @@ -575,7 +575,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop, struct hns_roce_hem_index *index) { - struct ib_device *ibdev = &hr_dev->ib_dev; + struct device *dev = hr_dev->dev; u32 hop_num = mhop->hop_num; u32 chunk_ba_num; u32 step_idx; @@ -605,21 +605,21 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx); if (ret) - ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n", - hop_num, ret); + dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n", + hop_num, ret); if (index->inited & HEM_INDEX_L1) { ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1); if (ret) - ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n", - ret); + dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n", + ret); } if (index->inited & HEM_INDEX_L0) { ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0); if (ret) - ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n", - ret); + dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n", + ret); } } } @@ -629,14 +629,14 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, unsigned long obj, int check_refcount) { - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_hem_index index = {}; struct hns_roce_hem_mhop mhop = {}; + struct device *dev = hr_dev->dev; int ret; ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); if (ret) { - ibdev_err(ibdev, "calc hem config failed!\n"); + dev_err(dev, "calc hem config failed!\n"); return; } @@ -672,8 +672,8 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); if (ret) - dev_warn(dev, "failed to clear HEM base address, ret = %d.\n", - ret); + dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n", + ret); hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; @@ -877,6 +877,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, if (hns_roce_check_whether_mhop(hr_dev, table->type)) { hns_roce_cleanup_mhop_hem_table(hr_dev, table); + mutex_destroy(&table->mutex); return; } @@ -891,6 +892,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, hns_roce_free_hem(hr_dev, table->hem[i]); } + mutex_destroy(&table->mutex); kfree(table->hem); } @@ -929,6 +931,7 @@ struct hns_roce_hem_item { size_t count; /* max ba numbers */ int start; /* start buf offset in this hem */ int end; /* end buf offset in this hem */ + bool exist_bt; }; /* All HEM items are linked in a tree structure */ @@ -957,6 +960,7 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } } + hem->exist_bt = exist_bt; hem->count = count; hem->start = start; hem->end = end; @@ -967,34 +971,32 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } static void hem_list_free_item(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, bool exist_bt) + struct hns_roce_hem_item *hem) { - if (exist_bt) + if (hem->exist_bt) dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN, hem->addr, hem->dma_addr); kfree(hem); } static void hem_list_free_all(struct hns_roce_dev *hr_dev, - struct list_head *head, bool exist_bt) + struct list_head *head) { struct hns_roce_hem_item *hem, *temp_hem; list_for_each_entry_safe(hem, temp_hem, head, list) { list_del(&hem->list); - hem_list_free_item(hr_dev, hem, exist_bt); + hem_list_free_item(hr_dev, hem); } } -static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr, - u64 table_addr) +static void hem_list_link_bt(void *base_addr, u64 table_addr) { *(u64 *)(base_addr) = table_addr; } /* assign L0 table address to hem from root bt */ -static void hem_list_assign_bt(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, void *cpu_addr, +static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr, u64 phy_addr) { hem->addr = cpu_addr; @@ -1041,9 +1043,9 @@ static bool hem_list_is_bottom_bt(int hopnum, int bt_level) * @bt_level: base address table level * @unit: ba entries per bt page */ -static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) +static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) { - u32 step; + u64 step; int max; int i; @@ -1079,11 +1081,15 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, { struct hns_roce_buf_region *r; int total = 0; - int step; + u64 step; int i; for (i = 0; i < region_cnt; i++) { r = (struct hns_roce_buf_region *)®ions[i]; + /* when r->hopnum = 0, the region should not occupy root_ba. */ + if (!r->hopnum) + continue; + if (r->hopnum > 1) { step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step > 0) @@ -1110,7 +1116,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, int ret = 0; int max_ofs; int level; - u32 step; + u64 step; int end; if (hopnum <= 1) @@ -1134,10 +1140,12 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, /* config L1 bt to last bt and link them to corresponding parent */ for (level = 1; level < hopnum; level++) { - cur = hem_list_search_item(&mid_bt[level], offset); - if (cur) { - hem_ptrs[level] = cur; - continue; + if (!hem_list_is_bottom_bt(hopnum, level)) { + cur = hem_list_search_item(&mid_bt[level], offset); + if (cur) { + hem_ptrs[level] = cur; + continue; + } } step = hem_list_calc_ba_range(hopnum, level, unit); @@ -1147,7 +1155,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } start_aligned = (distance / step) * step + r->offset; - end = min_t(int, start_aligned + step - 1, max_ofs); + end = min_t(u64, start_aligned + step - 1, max_ofs); cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit, true); if (!cur) { @@ -1163,8 +1171,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, if (level > 1) { pre = hem_ptrs[level - 1]; step = (cur->start - pre->start) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, pre->addr + step, - cur->dma_addr); + hem_list_link_bt(pre->addr + step, cur->dma_addr); } } @@ -1176,7 +1183,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, err_exit: for (level = 1; level < hopnum; level++) - hem_list_free_all(hr_dev, &temp_list[level], true); + hem_list_free_all(hr_dev, &temp_list[level]); return ret; } @@ -1217,16 +1224,26 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, { struct hns_roce_hem_item *hem; + /* This is on the has_mtt branch, if r->hopnum + * is 0, there is no root_ba to reuse for the + * region's fake hem, so a dma_alloc request is + * necessary here. + */ hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1, - r->count, false); + r->count, !r->hopnum); if (!hem) return -ENOMEM; - hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base); + /* The root_ba can be reused only when r->hopnum > 0. */ + if (r->hopnum) + hem_list_assign_bt(hem, cpu_base, phy_base); list_add(&hem->list, branch_head); list_add(&hem->sibling, leaf_head); - return r->count; + /* If r->hopnum == 0, 0 is returned, + * so that the root_bt entry is not occupied. + */ + return r->hopnum ? r->count : 0; } static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, @@ -1236,7 +1253,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, struct hns_roce_hem_item *hem, *temp_hem; int total = 0; int offset; - int step; + u64 step; step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step < 1) @@ -1245,7 +1262,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, /* if exist mid bt, link L1 to L0 */ list_for_each_entry_safe(hem, temp_hem, branch_head, list) { offset = (hem->start - r->offset) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); + hem_list_link_bt(cpu_base + offset, hem->dma_addr); total++; } @@ -1270,7 +1287,7 @@ setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, return -ENOMEM; total = 0; - for (i = 0; i < region_cnt && total < max_ba_num; i++) { + for (i = 0; i < region_cnt && total <= max_ba_num; i++) { r = ®ions[i]; if (!r->count) continue; @@ -1336,14 +1353,19 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, region_cnt); if (ret) { for (i = 0; i < region_cnt; i++) - hem_list_free_all(hr_dev, &head.branch[i], false); + hem_list_free_all(hr_dev, &head.branch[i]); - hem_list_free_all(hr_dev, &head.root, true); + hem_list_free_all(hr_dev, &head.root); } return ret; } +/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming + * the bt page size is not expanded by cal_best_bt_pg_sz() + */ +#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800 + /* construct the base address table and link them by address hop config */ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, @@ -1352,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; + int loop; int unit; int ret; int i; @@ -1369,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, continue; end = r->offset + r->count; - for (ofs = r->offset; ofs < end; ofs += unit) { + for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, hem_list->mid_bt[i], &hem_list->btm_bt); @@ -1401,10 +1427,9 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) - hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j], - j != 0); + hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j]); - hem_list_free_all(hr_dev, &hem_list->root_bt, true); + hem_list_free_all(hr_dev, &hem_list->root_bt); INIT_LIST_HEAD(&hem_list->btm_bt); hem_list->root_ba = 0; } @@ -1427,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; + int loop = 1; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + loop++; + if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 6fb51db9682b..9c415b2541af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -57,16 +57,16 @@ enum { }; #define check_whether_bt_num_3(type, hop_num) \ - (type < HEM_TYPE_MTT && hop_num == 2) + ((type) < HEM_TYPE_MTT && (hop_num) == 2) #define check_whether_bt_num_2(type, hop_num) \ - ((type < HEM_TYPE_MTT && hop_num == 1) || \ - (type >= HEM_TYPE_MTT && hop_num == 2)) + (((type) < HEM_TYPE_MTT && (hop_num) == 1) || \ + ((type) >= HEM_TYPE_MTT && (hop_num) == 2)) #define check_whether_bt_num_1(type, hop_num) \ - ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \ - (type >= HEM_TYPE_MTT && hop_num == 1) || \ - (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0)) + (((type) < HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0) || \ + ((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \ + ((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0)) struct hns_roce_hem { void *buf; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ba7ae792d279..fa8747656f25 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -36,19 +36,22 @@ #include <linux/iopoll.h> #include <linux/kernel.h> #include <linux/types.h> +#include <linux/workqueue.h> #include <net/addrconf.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" +#define CREATE_TRACE_POINTS +#include "hns_roce_trace.h" + enum { CMD_RST_PRC_OTHERS, CMD_RST_PRC_SUCCESS, @@ -372,19 +375,12 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct ib_device *ibdev = &hr_dev->ib_dev; - if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || - hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", - hr_qp->state); + hr_qp->state == IB_QPS_RTR)) return -EINVAL; - } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { - ibdev_err(ibdev, "failed to post WQE, dev state %d!\n", - hr_dev->state); + else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; - } return 0; } @@ -443,10 +439,6 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); - - if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) - return -EINVAL; - hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); ud_sq_wqe->sgid_index = ah->av.gid_index; @@ -478,7 +470,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, valid_num_sge = calc_wr_sge_num(wr, &msg_len); ret = set_ud_opcode(ud_sq_wqe, wr); - if (WARN_ON(ret)) + if (WARN_ON_ONCE(ret)) return ret; ud_sq_wqe->msg_len = cpu_to_le32(msg_len); @@ -582,10 +574,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, rc_sq_wqe->msg_len = cpu_to_le32(msg_len); ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr); - if (WARN_ON(ret)) + if (WARN_ON_ONCE(ret)) return ret; - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO, (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE, @@ -595,11 +587,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + if (msg_len != ATOMIC_WR_LEN) + return -EINVAL; set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); - else if (wr->opcode != IB_WR_REG_MR) + } else if (wr->opcode != IB_WR_REG_MR) { ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge); + if (ret) + return ret; + } /* * The pipeline can sequentially post all valid WQEs into WQ buffer, @@ -675,6 +672,10 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, #define HNS_ROCE_SL_SHIFT 2 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + if (unlikely(qp->state == IB_QPS_ERR)) { + flush_cqe(hr_dev, qp); + return; + } /* All kinds of DirectWQE have the same header field layout */ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG); hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl); @@ -739,6 +740,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, else ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); + trace_hns_sq_wqe(qp->qpn, wqe_idx, wqe, 1 << qp->sq.wqe_shift, + wr->wr_id, TRACE_SQ); if (unlikely(ret)) { *bad_wr = wr; goto out; @@ -808,6 +811,9 @@ static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge); + + trace_hns_rq_wqe(hr_qp->qpn, wqe_idx, wqe, 1 << hr_qp->rq.wqe_shift, + wr->wr_id, TRACE_RQ); } static int hns_roce_v2_post_recv(struct ib_qp *ibqp, @@ -944,7 +950,7 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) static void update_srq_db(struct hns_roce_srq *srq) { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - struct hns_roce_v2_db db; + struct hns_roce_v2_db db = {}; hr_reg_write(&db, DB_TAG, srq->srqn); hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB); @@ -985,6 +991,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); fill_wqe_idx(srq, wqe_idx); srq->wrid[wqe_idx] = wr->wr_id; + + trace_hns_srq_wqe(srq->srqn, wqe_idx, wqe, 1 << srq->wqe_shift, + wr->wr_id, TRACE_SRQ); } if (likely(nreq)) { @@ -1273,22 +1282,47 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret) return -EIO; } -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) +{ + static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = { + {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT}, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++) + if (cmdq_tx_timeout[i].opcode == opcode) + return cmdq_tx_timeout[i].tx_timeout; + + return tx_timeout; +} + +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout) +{ + u32 timeout = 0; + + do { + if (hns_roce_cmq_csq_done(hr_dev)) + break; + udelay(1); + } while (++timeout < tx_timeout); +} + +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, + int num, u32 tx_timeout) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - u32 timeout = 0; u16 desc_ret; u32 tail; int ret; int i; - spin_lock_bh(&csq->lock); - tail = csq->head; for (i = 0; i < num; i++) { + trace_hns_cmdq_req(hr_dev, &desc[i]); + csq->desc[csq->head++] = desc[i]; if (csq->head == csq->desc_num) csq->head = 0; @@ -1299,27 +1333,19 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); - do { - if (hns_roce_cmq_csq_done(hr_dev)) - break; - udelay(1); - } while (++timeout < priv->cmq.tx_timeout); - + hns_roce_wait_csq_done(hr_dev, tx_timeout); if (hns_roce_cmq_csq_done(hr_dev)) { ret = 0; for (i = 0; i < num; i++) { + trace_hns_cmdq_resp(hr_dev, &csq->desc[tail]); + /* check the result of hardware write back */ - desc[i] = csq->desc[tail++]; + desc_ret = le16_to_cpu(csq->desc[tail++].retval); if (tail == csq->desc_num) tail = 0; - - desc_ret = le16_to_cpu(desc[i].retval); if (likely(desc_ret == CMD_EXEC_SUCCESS)) continue; - dev_err_ratelimited(hr_dev->dev, - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", - desc->opcode, desc_ret); ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { @@ -1334,14 +1360,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ret = -EAGAIN; } - spin_unlock_bh(&csq->lock); - if (ret) atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); return ret; } +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; + u16 opcode = le16_to_cpu(desc->opcode); + u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); + u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT; + u32 rsv_tail; + int ret; + int i; + + while (try_cnt) { + try_cnt--; + + spin_lock_bh(&csq->lock); + rsv_tail = csq->head; + ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout); + if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME && + try_cnt) { + spin_unlock_bh(&csq->lock); + mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC); + continue; + } + + for (i = 0; i < num; i++) { + desc[i] = csq->desc[rsv_tail++]; + if (rsv_tail == csq->desc_num) + rsv_tail = 0; + } + spin_unlock_bh(&csq->lock); + break; + } + + if (ret) + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = 0x%x, return = %d.\n", + opcode, ret); + + return ret; +} + static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -1658,8 +1724,8 @@ static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev, for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) { bd_idx = i / CNT_PER_DESC; - if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) && - bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC) + if (bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC && + !(desc[bd_idx].flag & cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT))) break; cnt_data = (__le64 *)&desc[bd_idx].data[0]; @@ -2105,7 +2171,7 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz)); - caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_num = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); } else { u32 func_num = max_t(u32, 1, hr_dev->func_num); @@ -2461,14 +2527,16 @@ static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev, static struct hns_roce_link_table * alloc_link_table_buf(struct hns_roce_dev *hr_dev) { + u16 total_sl = hr_dev->caps.sl_num * hr_dev->func_num; struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_link_table *link_tbl; u32 pg_shift, size, min_size; link_tbl = &priv->ext_llm; pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT; - size = hr_dev->caps.num_qps * HNS_ROCE_V2_EXT_LLM_ENTRY_SZ; - min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(hr_dev->caps.sl_num) << pg_shift; + size = hr_dev->caps.num_qps * hr_dev->func_num * + HNS_ROCE_V2_EXT_LLM_ENTRY_SZ; + min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(total_sl) << pg_shift; /* Alloc data table */ size = max(size, min_size); @@ -2535,20 +2603,19 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev) free_link_table_buf(hr_dev, &priv->ext_llm); } -static void free_dip_list(struct hns_roce_dev *hr_dev) +static void free_dip_entry(struct hns_roce_dev *hr_dev) { struct hns_roce_dip *hr_dip; - struct hns_roce_dip *tmp; - unsigned long flags; + unsigned long idx; - spin_lock_irqsave(&hr_dev->dip_list_lock, flags); + xa_lock(&hr_dev->qp_table.dip_xa); - list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) { - list_del(&hr_dip->node); + xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) { + __xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx); kfree(hr_dip); } - spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); + xa_unlock(&hr_dev->qp_table.dip_xa); } static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev) @@ -2671,6 +2738,8 @@ static void free_mr_exit(struct hns_roce_dev *hr_dev) kfree(free_mr->rsv_pd); free_mr->rsv_pd = NULL; } + + mutex_destroy(&free_mr->mutex); } static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) @@ -2748,8 +2817,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, IB_QPS_INIT, NULL); if (ret) { - ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", - ret); + ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n", + ret); return ret; } @@ -2821,8 +2890,10 @@ static int free_mr_init(struct hns_roce_dev *hr_dev) mutex_init(&free_mr->mutex); ret = free_mr_alloc_res(hr_dev); - if (ret) + if (ret) { + mutex_destroy(&free_mr->mutex); return ret; + } ret = free_mr_modify_qp(hr_dev); if (ret) @@ -2943,13 +3014,16 @@ err_llm_init_failed: static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_exit(hr_dev); + hns_roce_function_clear(hr_dev); if (!hr_dev->is_vf) hns_roce_free_link_table(hr_dev); if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09) - free_dip_list(hr_dev); + free_dip_entry(hr_dev); } static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, @@ -3208,13 +3282,14 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, /* Aligned to the hardware address access unit */ for (i = 0; i < ARRAY_SIZE(pages); i++) - pages[i] >>= 6; + pages[i] >>= MPT_PBL_BUF_ADDR_S; pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); mpt_entry->pbl_size = cpu_to_le32(mr->npages); - mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); - hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); + mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> MPT_PBL_BA_ADDR_S); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, + upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S)); mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0])); @@ -3307,8 +3382,7 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, - void *mb_buf, struct hns_roce_mr *mr) +static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) { dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); struct hns_roce_v2_mpt_entry *mpt_entry; @@ -3335,8 +3409,10 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry->pbl_size = cpu_to_le32(mr->npages); - mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); - hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); + mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> + MPT_PBL_BA_ADDR_S)); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, + upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S)); return 0; } @@ -3387,8 +3463,8 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp) ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr); if (ret) { - ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n", - ret); + ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n", + ret); return ret; } @@ -3427,9 +3503,9 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) ret = free_mr_post_send_lp_wqe(hr_qp); if (ret) { - ibdev_err(ibdev, - "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", - hr_qp->qpn, ret); + ibdev_err_ratelimited(ibdev, + "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", + hr_qp->qpn, ret); break; } @@ -3440,16 +3516,16 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) while (cqe_cnt) { npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc); if (npolled < 0) { - ibdev_err(ibdev, - "failed to poll cqe for free mr, remain %d cqe.\n", - cqe_cnt); + ibdev_err_ratelimited(ibdev, + "failed to poll cqe for free mr, remain %d cqe.\n", + cqe_cnt); goto out; } if (time_after(jiffies, end)) { - ibdev_err(ibdev, - "failed to poll cqe for free mr and timeout, remain %d cqe.\n", - cqe_cnt); + ibdev_err_ratelimited(ibdev, + "failed to poll cqe for free mr and timeout, remain %d cqe.\n", + cqe_cnt); goto out; } cqe_cnt -= npolled; @@ -3582,14 +3658,14 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift)); hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ, to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift)); - hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> 3); - hr_reg_write(cq_context, CQC_CQE_BA_H, (dma_handle >> (32 + 3))); + hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> CQC_CQE_BA_L_S); + hr_reg_write(cq_context, CQC_CQE_BA_H, dma_handle >> CQC_CQE_BA_H_S); hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN, hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB); hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_L, ((u32)hr_cq->db.dma) >> 1); hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_H, - hr_cq->db.dma >> 32); + hr_cq->db.dma >> CQC_CQE_DB_RECORD_ADDR_H_S); hr_reg_write(cq_context, CQC_CQ_MAX_CNT, HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM); hr_reg_write(cq_context, CQC_CQ_PERIOD, @@ -3711,8 +3787,9 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, wc->status == IB_WC_WR_FLUSH_ERR)) return; - ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, + ibdev_err_ratelimited(&hr_dev->ib_dev, "error cqe status 0x%x:\n", + cqe_status); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 4, cqe, cq->cqe_size, false); wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); @@ -4217,8 +4294,7 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, } static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, - struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) + struct hns_roce_v2_qp_context *context) { hr_reg_write(context, QPC_SGE_SHIFT, to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, @@ -4240,9 +4316,7 @@ static inline int get_pdn(struct ib_pd *ib_pd) } static void modify_qp_reset_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, - struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) + struct hns_roce_v2_qp_context *context) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -4259,7 +4333,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs)); - set_qpc_wqe_cnt(hr_qp, context, qpc_mask); + set_qpc_wqe_cnt(hr_qp, context); /* No VLAN need to set 0xFFF */ hr_reg_write(context, QPC_VLAN_ID, 0xfff); @@ -4300,7 +4374,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, } static void modify_qp_init_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4394,12 +4467,14 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, upper_32_bits(to_hr_hw_page_addr(mtts[0]))); hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H); - context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); - qpc_mask->rq_nxt_blk_addr = 0; - - hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H, - upper_32_bits(to_hr_hw_page_addr(mtts[1]))); - hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + context->rq_nxt_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(mtts[1])); + qpc_mask->rq_nxt_blk_addr = 0; + hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); + hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H); + } return 0; } @@ -4521,16 +4596,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; } - hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> 4); + hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> QPC_TRRL_BA_L_S); hr_reg_clear(qpc_mask, QPC_TRRL_BA_L); - context->trrl_ba = cpu_to_le32(trrl_ba >> (16 + 4)); + context->trrl_ba = cpu_to_le32(trrl_ba >> QPC_TRRL_BA_M_S); qpc_mask->trrl_ba = 0; - hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> (32 + 16 + 4)); + hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> QPC_TRRL_BA_H_S); hr_reg_clear(qpc_mask, QPC_TRRL_BA_H); - context->irrl_ba = cpu_to_le32(irrl_ba >> 6); + context->irrl_ba = cpu_to_le32(irrl_ba >> QPC_IRRL_BA_L_S); qpc_mask->irrl_ba = 0; - hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> (32 + 6)); + hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> QPC_IRRL_BA_H_S); hr_reg_clear(qpc_mask, QPC_IRRL_BA_H); hr_reg_enable(context, QPC_RMT_E2E); @@ -4592,8 +4667,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_clear(qpc_mask, QPC_TRRL_HEAD_MAX); hr_reg_clear(qpc_mask, QPC_TRRL_TAIL_MAX); +#define MAX_LP_SGEN 3 /* rocee send 2^lp_sgen_ini segs every time */ - hr_reg_write(context, QPC_LP_SGEN_INI, 3); + hr_reg_write(context, QPC_LP_SGEN_INI, MAX_LP_SGEN); hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI); if (udata && ibqp->qp_type == IB_QPT_RC && @@ -4619,8 +4695,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return 0; } -static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, +static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4667,26 +4742,49 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn) +{ + struct hns_roce_dip *hr_dip; + int ret; + + hr_dip = xa_load(dip_xa, qpn); + if (hr_dip) + return 0; + + hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL); + if (!hr_dip) + return -ENOMEM; + + ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL)); + if (ret) + kfree(hr_dip); + + return ret; +} + static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, u32 *dip_idx) { const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx; - u32 *head = &hr_dev->qp_table.idx_table.head; - u32 *tail = &hr_dev->qp_table.idx_table.tail; + struct xarray *dip_xa = &hr_dev->qp_table.dip_xa; + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_dip *hr_dip; - unsigned long flags; + unsigned long idx; int ret = 0; - spin_lock_irqsave(&hr_dev->dip_list_lock, flags); + ret = alloc_dip_entry(dip_xa, ibqp->qp_num); + if (ret) + return ret; - spare_idx[*tail] = ibqp->qp_num; - *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1); + xa_lock(dip_xa); - list_for_each_entry(hr_dip, &hr_dev->dip_list, node) { - if (!memcmp(grh->dgid.raw, hr_dip->dgid, 16)) { + xa_for_each(dip_xa, idx, hr_dip) { + if (hr_dip->qp_cnt && + !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { *dip_idx = hr_dip->dip_idx; + hr_dip->qp_cnt++; + hr_qp->dip = hr_dip; goto out; } } @@ -4694,19 +4792,24 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, /* If no dgid is found, a new dip and a mapping between dgid and * dip_idx will be created. */ - hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC); - if (!hr_dip) { - ret = -ENOMEM; - goto out; + xa_for_each(dip_xa, idx, hr_dip) { + if (hr_dip->qp_cnt) + continue; + + *dip_idx = idx; + memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); + hr_dip->dip_idx = idx; + hr_dip->qp_cnt++; + hr_qp->dip = hr_dip; + break; } - memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - hr_dip->dip_idx = *dip_idx = spare_idx[*head]; - *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1); - list_add_tail(&hr_dip->node, &hr_dev->dip_list); + /* This should never happen. */ + if (WARN_ON_ONCE(!hr_qp->dip)) + ret = -ENOSPC; out: - spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); + xa_unlock(dip_xa); return ret; } @@ -4828,6 +4931,69 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return 0; } +static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp, + u8 *tc_mode, u8 *priority) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + if (!ops->get_dscp_prio) + return -EOPNOTSUPP; + + return ops->get_dscp_prio(handle, dscp, tc_mode, priority); +} + +bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl) +{ + u32 max_sl; + + max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); + if (unlikely(sl > max_sl)) { + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to set SL(%u). Shouldn't be larger than %u.\n", + sl, max_sl); + return false; + } + + return true; +} + +static int hns_roce_set_sl(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh), + &hr_qp->tc_mode, &hr_qp->priority); + if (ret && ret != -EOPNOTSUPP && + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + ibdev_err_ratelimited(ibdev, + "failed to get dscp, ret = %d.\n", ret); + return ret; + } + + if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP && + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + hr_qp->sl = hr_qp->priority; + else + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + + if (!check_sl_valid(hr_dev, hr_qp->sl)) + return -EINVAL; + + hr_reg_write(context, QPC_SL, hr_qp->sl); + hr_reg_clear(qpc_mask, QPC_SL); + + return 0; +} + static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4843,25 +5009,18 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, int is_roce_protocol; u16 vlan_id = 0xffff; bool is_udp = false; - u32 max_sl; u8 ib_port; u8 hr_port; int ret; - max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); - if (unlikely(sl > max_sl)) { - ibdev_err_ratelimited(ibdev, - "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n", - sl, max_sl); - return -EINVAL; - } - /* * If free_mr_en of qp is set, it means that this qp comes from * free mr. This qp will perform the loopback operation. * In the loopback scenario, only sl needs to be set. */ if (hr_qp->free_mr_en) { + if (!check_sl_valid(hr_dev, sl)) + return -EINVAL; hr_reg_write(context, QPC_SL, sl); hr_reg_clear(qpc_mask, QPC_SL); hr_qp->sl = sl; @@ -4931,11 +5090,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); - hr_qp->sl = sl; - hr_reg_write(context, QPC_SL, hr_qp->sl); - hr_reg_clear(qpc_mask, QPC_SL); - - return 0; + return hns_roce_set_sl(ibqp, attr, context, qpc_mask); } static bool check_qp_state(enum ib_qp_state cur_state, @@ -4975,22 +5130,19 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); int ret = 0; - if (!check_qp_state(cur_state, new_state)) { - ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); + if (!check_qp_state(cur_state, new_state)) return -EINVAL; - } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, hr_dev->caps.qpc_sz); - modify_qp_reset_to_init(ibqp, attr, context, qpc_mask); + modify_qp_reset_to_init(ibqp, context); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - modify_qp_init_to_init(ibqp, attr, context, qpc_mask); + modify_qp_init_to_init(ibqp, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, qpc_mask, udata); } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { - ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context, - qpc_mask); + ret = modify_qp_rtr_to_rts(ibqp, attr_mask, context, qpc_mask); } return ret; @@ -5174,6 +5326,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp, return; spin_lock_irqsave(&hr_qp->sq.lock, sq_flag); + trace_hns_sq_flush_cqe(hr_qp->qpn, hr_qp->sq.head, TRACE_SQ); hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head); hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX); hr_qp->state = IB_QPS_ERR; @@ -5183,6 +5336,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp, return; spin_lock_irqsave(&hr_qp->rq.lock, rq_flag); + trace_hns_rq_flush_cqe(hr_qp->qpn, hr_qp->rq.head, TRACE_RQ); hr_reg_write(context, QPC_RQ_PRODUCER_IDX, hr_qp->rq.head); hr_reg_clear(qpc_mask, QPC_RQ_PRODUCER_IDX); spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag); @@ -5240,7 +5394,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* SW pass context to HW */ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { - ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); + ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret); goto out; } @@ -5378,7 +5532,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context); if (ret) { - ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); + ibdev_err_ratelimited(ibdev, + "failed to query QPC, ret = %d.\n", + ret); ret = -EINVAL; goto out; } @@ -5386,7 +5542,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, state = hr_reg_read(&context, QPC_QP_ST); tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); if (tmp_qp_state == -1) { - ibdev_err(ibdev, "Illegal ib_qp_state\n"); + ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n"); ret = -EINVAL; goto out; } @@ -5479,9 +5635,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET, udata); if (ret) - ibdev_err(ibdev, - "failed to modify QP to RST, ret = %d.\n", - ret); + ibdev_err_ratelimited(ibdev, + "failed to modify QP to RST, ret = %d.\n", + ret); } send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; @@ -5509,17 +5665,44 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return ret; } +static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dip *hr_dip = hr_qp->dip; + + if (!hr_dip) + return; + + xa_lock(&hr_dev->qp_table.dip_xa); + + hr_dip->qp_cnt--; + if (!hr_dip->qp_cnt) + memset(hr_dip->dgid, 0, GID_LEN_V2); + + xa_unlock(&hr_dev->qp_table.dip_xa); +} + int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + unsigned long flags; int ret; + /* Make sure flush_cqe() is completed */ + spin_lock_irqsave(&hr_qp->flush_lock, flags); + set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + flush_work(&hr_qp->flush_work.work); + + if (hr_qp->cong_type == CONG_TYPE_DIP) + put_dip_ctx_idx(hr_dev, hr_qp); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", - hr_qp->qpn, ret); + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", + hr_qp->qpn, ret); hns_roce_qp_destroy(hr_dev, hr_qp, udata); @@ -5802,7 +5985,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) dev_info(hr_dev->dev, "cq_period(%u) reached the upper limit, adjusted to 65.\n", cq_period); - cq_period = HNS_ROCE_MAX_CQ_PERIOD; + cq_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08; } cq_period *= HNS_ROCE_CLOCK_ADJUST; } @@ -5813,9 +5996,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d.\n", - ret); + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to process cmd when modifying CQ, ret = %d.\n", + ret); err_out: if (ret) @@ -5839,9 +6022,9 @@ static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn, ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_CQC, cqn); if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when querying CQ, ret = %d.\n", - ret); + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to process cmd when querying CQ, ret = %d.\n", + ret); goto err_mailbox; } @@ -5882,11 +6065,10 @@ err_mailbox: return ret; } -static void hns_roce_irq_work_handle(struct work_struct *work) +static void dump_aeqe_log(struct hns_roce_work *irq_work) { - struct hns_roce_work *irq_work = - container_of(work, struct hns_roce_work, work); - struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; + struct hns_roce_dev *hr_dev = irq_work->hr_dev; + struct ib_device *ibdev = &hr_dev->ib_dev; switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5930,6 +6112,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work) case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); break; + case HNS_ROCE_EVENT_TYPE_MB: + break; case HNS_ROCE_EVENT_TYPE_FLR: ibdev_warn(ibdev, "function level reset.\n"); break; @@ -5940,8 +6124,46 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_err(ibdev, "invalid xrceth error.\n"); break; default: + ibdev_info(ibdev, "Undefined event %d.\n", + irq_work->event_type); break; } +} + +static void hns_roce_irq_work_handle(struct work_struct *work) +{ + struct hns_roce_work *irq_work = + container_of(work, struct hns_roce_work, work); + struct hns_roce_dev *hr_dev = irq_work->hr_dev; + int event_type = irq_work->event_type; + u32 queue_num = irq_work->queue_num; + + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + case HNS_ROCE_EVENT_TYPE_COMM_EST: + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: + case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: + hns_roce_qp_event(hr_dev, queue_num, event_type); + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, queue_num, event_type); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + hns_roce_cq_event(hr_dev, queue_num, event_type); + break; + default: + break; + } + + dump_aeqe_log(irq_work); kfree(irq_work); } @@ -6002,14 +6224,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - struct device *dev = hr_dev->dev; struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); irqreturn_t aeqe_found = IRQ_NONE; + int num_aeqes = 0; int event_type; u32 queue_num; int sub_type; - while (aeqe) { + while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) { /* Make sure we read AEQ entry after we have checked the * ownership bit */ @@ -6020,25 +6242,12 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM); switch (event_type) { - case HNS_ROCE_EVENT_TYPE_PATH_MIG: - case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - case HNS_ROCE_EVENT_TYPE_COMM_EST: - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: - hns_roce_qp_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, queue_num, event_type); + hns_roce_flush_cqe(hr_dev, queue_num); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -6046,12 +6255,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - case HNS_ROCE_EVENT_TYPE_FLR: - break; default: - dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n", - event_type, eq->eqn, eq->cons_index); break; } @@ -6059,12 +6263,14 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, eq->sub_type = sub_type; ++eq->cons_index; aeqe_found = IRQ_HANDLED; + trace_hns_ae_info(event_type, aeqe, eq->eqe_size); atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]); hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); + ++num_aeqes; } update_eq_db(eq); @@ -6084,33 +6290,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) !!(eq->cons_index & eq->entries)) ? ceqe : NULL; } -static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) +static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq) { - struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); - irqreturn_t ceqe_found = IRQ_NONE; - u32 cqn; + queue_work(system_bh_wq, &eq->work); - while (ceqe) { - /* Make sure we read CEQ entry after we have checked the - * ownership bit - */ - dma_rmb(); - - cqn = hr_reg_read(ceqe, CEQE_CQN); - - hns_roce_cq_completion(hr_dev, cqn); - - ++eq->cons_index; - ceqe_found = IRQ_HANDLED; - atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); - - ceqe = next_ceqe_sw_v2(eq); - } - - update_eq_db(eq); - - return IRQ_RETVAL(ceqe_found); + return IRQ_HANDLED; } static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) @@ -6121,7 +6305,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) if (eq->type_flag == HNS_ROCE_CEQ) /* Completion event interrupt */ - int_work = hns_roce_v2_ceq_int(hr_dev, eq); + int_work = hns_roce_v2_ceq_int(eq); else /* Asynchronous event interrupt */ int_work = hns_roce_v2_aeq_int(hr_dev, eq); @@ -6135,6 +6319,7 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev, struct pci_dev *pdev = hr_dev->pci_dev; struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); const struct hnae3_ae_ops *ops = ae_dev->ops; + enum hnae3_reset_type reset_type; irqreturn_t int_work = IRQ_NONE; u32 int_en; @@ -6146,10 +6331,12 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S); + reset_type = hr_dev->is_vf ? + HNAE3_VF_FUNC_RESET : HNAE3_FUNC_RESET; + /* Set reset level for reset_event() */ if (ops->set_default_reset_request) - ops->set_default_reset_request(ae_dev, - HNAE3_FUNC_RESET); + ops->set_default_reset_request(ae_dev, reset_type); if (ops->reset_event) ops->reset_event(pdev, NULL); @@ -6219,7 +6406,7 @@ static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data) res_type == ECC_RESOURCE_SCCC) return le64_to_cpu(*data); - return le64_to_cpu(*data) << PAGE_SHIFT; + return le64_to_cpu(*data) << HNS_HW_PAGE_SHIFT; } static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type, @@ -6333,9 +6520,16 @@ static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag); } -static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn) +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + hns_roce_mtr_destroy(hr_dev, &eq->mtr); +} + +static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) { struct device *dev = hr_dev->dev; + int eqn = eq->eqn; int ret; u8 cmd; @@ -6346,12 +6540,9 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn) ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M); if (ret) - dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn); -} + dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn); -static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) -{ - hns_roce_mtr_destroy(hr_dev, &eq->mtr); + free_eq_buf(hr_dev, eq); } static void init_eq_config(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) @@ -6489,6 +6680,34 @@ free_cmd_mbox: return ret; } +static void hns_roce_ceq_work(struct work_struct *work) +{ + struct hns_roce_eq *eq = from_work(eq, work, work); + struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); + struct hns_roce_dev *hr_dev = eq->hr_dev; + int ceqe_num = 0; + u32 cqn; + + while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) { + /* Make sure we read CEQ entry after we have checked the + * ownership bit + */ + dma_rmb(); + + cqn = hr_reg_read(ceqe, CEQE_CQN); + + hns_roce_cq_completion(hr_dev, cqn); + + ++eq->cons_index; + ++ceqe_num; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); + + ceqe = next_ceqe_sw_v2(eq); + } + + update_eq_db(eq); +} + static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, int comp_num, int aeq_num, int other_num) { @@ -6520,21 +6739,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, j - other_num - aeq_num); for (j = 0; j < irq_num; j++) { - if (j < other_num) + if (j < other_num) { ret = request_irq(hr_dev->irq[j], hns_roce_v2_msix_interrupt_abn, 0, hr_dev->irq_names[j], hr_dev); - - else if (j < (other_num + comp_num)) + } else if (j < (other_num + comp_num)) { + INIT_WORK(&eq_table->eq[j - other_num].work, + hns_roce_ceq_work); ret = request_irq(eq_table->eq[j - other_num].irq, hns_roce_v2_msix_interrupt_eq, 0, hr_dev->irq_names[j + aeq_num], &eq_table->eq[j - other_num]); - else + } else { ret = request_irq(eq_table->eq[j - other_num].irq, hns_roce_v2_msix_interrupt_eq, 0, hr_dev->irq_names[j - comp_num], &eq_table->eq[j - other_num]); + } + if (ret) { dev_err(hr_dev->dev, "request irq error!\n"); goto err_request_failed; @@ -6544,12 +6766,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, return 0; err_request_failed: - for (j -= 1; j >= 0; j--) - if (j < other_num) + for (j -= 1; j >= 0; j--) { + if (j < other_num) { free_irq(hr_dev->irq[j], hr_dev); - else - free_irq(eq_table->eq[j - other_num].irq, - &eq_table->eq[j - other_num]); + continue; + } + free_irq(eq_table->eq[j - other_num].irq, + &eq_table->eq[j - other_num]); + if (j < other_num + comp_num) + cancel_work_sync(&eq_table->eq[j - other_num].work); + } err_kzalloc_failed: for (i -= 1; i >= 0; i--) @@ -6570,8 +6796,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev) for (i = 0; i < hr_dev->caps.num_other_vectors; i++) free_irq(hr_dev->irq[i], hr_dev); - for (i = 0; i < eq_num; i++) + for (i = 0; i < eq_num; i++) { free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]); + if (i < hr_dev->caps.num_comp_vectors) + cancel_work_sync(&hr_dev->eq_table.eq[i].work); + } for (i = 0; i < irq_num; i++) kfree(hr_dev->irq_names[i]); @@ -6591,6 +6820,9 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) int ret; int i; + if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET) + return -EINVAL; + other_num = hr_dev->caps.num_other_vectors; comp_num = hr_dev->caps.num_comp_vectors; aeq_num = hr_dev->caps.num_aeq_vectors; @@ -6660,7 +6892,7 @@ err_request_irq_fail: err_create_eq_fail: for (i -= 1; i >= 0; i--) - free_eq_buf(hr_dev, &eq_table->eq[i]); + hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]); kfree(eq_table->eq); return ret; @@ -6680,11 +6912,8 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) __hns_roce_free_irq(hr_dev); destroy_workqueue(hr_dev->irq_workq); - for (i = 0; i < eq_num; i++) { - hns_roce_v2_destroy_eqc(hr_dev, i); - - free_eq_buf(hr_dev, &eq_table->eq[i]); - } + for (i = 0; i < eq_num; i++) + hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]); kfree(eq_table->eq); } @@ -6735,6 +6964,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .query_srqc = hns_roce_v2_query_srqc, .query_sccc = hns_roce_v2_query_sccc, .query_hw_counter = hns_roce_hw_v2_query_counter, + .get_dscp = hns_roce_hw_v2_get_dscp, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; @@ -6851,9 +7081,6 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; hns_roce_handle_device_err(hr_dev); - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) - free_mr_exit(hr_dev); - hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); @@ -6914,6 +7141,7 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; } + static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; @@ -6932,6 +7160,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) hr_dev->active = false; hr_dev->dis_db = true; + + rdma_user_mmap_disassociate(&hr_dev->ib_dev); + hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; return 0; @@ -7002,9 +7233,22 @@ static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle, return ret; } +static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, + bool linkup) +{ + struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct net_device *netdev = handle->rinfo.netdev; + + if (linkup || !hr_dev) + return; + + ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev); +} + static const struct hnae3_client_ops hns_roce_hw_v2_ops = { .init_instance = hns_roce_hw_v2_init_instance, .uninit_instance = hns_roce_hw_v2_uninit_instance, + .link_status_change = hns_roce_hw_v2_link_status_change, .reset_notify = hns_roce_hw_v2_reset_notify, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index df04bc8ede57..bc7466830eaf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_HW_V2_H #include <linux/bitops.h> +#include "hnae3.h" #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 @@ -85,6 +86,11 @@ #define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18) +/* budget must be smaller than aeqe_depth to guarantee that we update + * the ci before we polled all the entries in the EQ. + */ +#define HNS_AEQ_POLLING_BUDGET 64 + enum { HNS_ROCE_CMD_FLAG_IN = BIT(0), HNS_ROCE_CMD_FLAG_OUT = BIT(1), @@ -224,6 +230,14 @@ enum hns_roce_opcode_type { HNS_SWITCH_PARAMETER_CFG = 0x1033, }; +#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8 +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5 +struct hns_roce_cmdq_tx_timeout_map { + u16 opcode; + u32 tx_timeout; +}; + enum { TYPE_CRQ, TYPE_CSQ, @@ -276,6 +290,10 @@ struct hns_roce_v2_cq_context { __le32 byte_64_se_cqe_idx; }; +#define CQC_CQE_BA_L_S 3 +#define CQC_CQE_BA_H_S (32 + CQC_CQE_BA_L_S) +#define CQC_CQE_DB_RECORD_ADDR_H_S 32 + #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -447,6 +465,12 @@ struct hns_roce_v2_qp_context { struct hns_roce_v2_qp_context_ex ext; }; +#define QPC_TRRL_BA_L_S 4 +#define QPC_TRRL_BA_M_S (16 + QPC_TRRL_BA_L_S) +#define QPC_TRRL_BA_H_S (32 + QPC_TRRL_BA_M_S) +#define QPC_IRRL_BA_L_S 6 +#define QPC_IRRL_BA_H_S (32 + QPC_IRRL_BA_L_S) + #define QPC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context, h, l) #define QPC_TST QPC_FIELD_LOC(2, 0) @@ -716,6 +740,9 @@ struct hns_roce_v2_mpt_entry { __le32 byte_64_buf_pa1; }; +#define MPT_PBL_BUF_ADDR_S 6 +#define MPT_PBL_BA_ADDR_S 3 + #define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l) #define MPT_ST MPT_FIELD_LOC(1, 0) @@ -900,6 +927,7 @@ struct hns_roce_v2_rc_send_wqe { #define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) #define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) #define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) +#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10) #define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) #define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) #define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) @@ -1323,7 +1351,7 @@ struct hns_roce_v2_priv { struct hns_roce_dip { u8 dgid[GID_LEN_V2]; u32 dip_idx; - struct list_head node; /* all dips are on a list */ + u32 qp_cnt; }; struct fmea_ram_ecc { @@ -1334,7 +1362,7 @@ struct fmea_ram_ecc { /* only for RNR timeout issue of HIP08 */ #define HNS_ROCE_CLOCK_ADJUST 1000 -#define HNS_ROCE_MAX_CQ_PERIOD 65 +#define HNS_ROCE_MAX_CQ_PERIOD_HIP08 65 #define HNS_ROCE_MAX_EQ_PERIOD 65 #define HNS_ROCE_RNR_TIMER_10NS 1 #define HNS_ROCE_1US_CFG 999 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1dc60c2b2b7a..e7a497cc125c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -40,6 +40,7 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" +#include "hns_roce_hw_v2.h" static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, const u8 *addr) @@ -181,7 +182,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_DEVICE_RC_RNR_NAK_GEN; props->max_send_sge = hr_dev->caps.max_sq_sg; props->max_recv_sge = hr_dev->caps.max_rq_sg; - props->max_sge_rd = 1; + props->max_sge_rd = hr_dev->caps.max_sq_sg; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; props->max_mr = hr_dev->caps.num_mtpts; @@ -192,6 +193,12 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = 1; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; + props->max_ah = INT_MAX; + props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD; + props->cq_caps.max_cq_moderation_count = HNS_ROCE_MAX_CQ_COUNT; + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { props->max_srq = hr_dev->caps.num_srqs; props->max_srq_wr = hr_dev->caps.max_srq_wrs; @@ -421,6 +428,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, return 0; error_fail_copy_to_udata: + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&context->page_mutex); hns_roce_dealloc_uar_entry(context); error_fail_uar_entry: @@ -437,6 +447,10 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&context->page_mutex); + hns_roce_dealloc_uar_entry(context); ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); @@ -451,6 +465,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) pgprot_t prot; int ret; + if (hr_dev->dis_db) { + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); + return -EPERM; + } + rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); if (!rdma_entry) { atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); @@ -743,7 +762,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); @@ -925,6 +944,15 @@ err_unmap_dmpt: return ret; } +static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) +{ + hns_roce_cleanup_bitmap(hr_dev); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&hr_dev->pgdir_mutex); +} + /** * hns_roce_setup_hca - setup host channel adapter * @hr_dev: pointer to hns roce device @@ -973,6 +1001,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) err_uar_table_free: ida_destroy(&hr_dev->uar_ida.ida); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || + hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) + mutex_destroy(&hr_dev->pgdir_mutex); + return ret; } @@ -1102,8 +1134,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) INIT_LIST_HEAD(&hr_dev->qp_list); spin_lock_init(&hr_dev->qp_list_lock); - INIT_LIST_HEAD(&hr_dev->dip_list); - spin_lock_init(&hr_dev->dip_list_lock); ret = hns_roce_register_device(hr_dev); if (ret) @@ -1118,7 +1148,7 @@ error_failed_register_device: hr_dev->hw->hw_exit(hr_dev); error_failed_engine_init: - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev); error_failed_setup_hca: hns_roce_cleanup_hem(hr_dev); @@ -1148,7 +1178,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev); if (hr_dev->cmd_mod) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 9e05b57a2d67..93a48b41955b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -38,6 +38,7 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" +#include "hns_roce_trace.h" static u32 hw_index_to_key(int ind) { @@ -138,8 +139,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) - ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", - ret); + ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", + ret); } free_mr_pbl(hr_dev, mr); @@ -159,10 +160,11 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); + trace_hns_mr(mr); if (mr->type != MR_TYPE_FRMR) ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr); else - ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); + ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr); if (ret) { dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; @@ -435,24 +437,30 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) } int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int *sg_offset) + unsigned int *sg_offset_p) { + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_mtr *mtr = &mr->pbl_mtr; - int ret = 0; + int ret, sg_num = 0; + + if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) || + ibmr->page_size < HNS_HW_PAGE_SIZE || + ibmr->page_size > HNS_HW_MAX_PAGE_SIZE) + return sg_num; mr->npages = 0; mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, sizeof(dma_addr_t), GFP_KERNEL); if (!mr->page_list) - return ret; + return sg_num; - ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); - if (ret < 1) { + sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page); + if (sg_num < 1) { ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", - mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); + mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num); goto err_page_list; } @@ -463,17 +471,16 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages); if (ret) { ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); - ret = 0; + sg_num = 0; } else { mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); - ret = mr->npages; } err_page_list: kvfree(mr->page_list); mr->page_list = NULL; - return ret; + return sg_num; } static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, @@ -756,7 +763,7 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, + npage = hns_roce_get_umem_bufs(pages, page_count, mtr->umem, page_shift); else npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, @@ -809,11 +816,6 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; - /* if hopnum is 0, no need to map pages in this region */ - if (!r->hopnum) { - mapped_cnt += r->count; - continue; - } if (r->offset + r->count > page_cnt) { ret = -EINVAL; @@ -998,7 +1000,7 @@ static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev, if (attr->region_count > ARRAY_SIZE(attr->region) || attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) { ibdev_err(ibdev, - "invalid buf attr, region count %d, page shift %u.\n", + "invalid buf attr, region count %u, page shift %u.\n", attr->region_count, attr->page_shift); return false; } @@ -1146,6 +1148,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_device *ibdev = &hr_dev->ib_dev; int ret; + trace_hns_buf_attr(buf_attr); /* The caller has its own buffer list and invokes the hns_roce_mtr_map() * to finish the MTT configuration. */ diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f35a66325d9a..9f376a2232b0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,6 +39,25 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" +static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, + u32 qpn) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_qp *qp; + unsigned long flags; + + xa_lock_irqsave(&hr_dev->qp_table_xa, flags); + qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (qp) + refcount_inc(&qp->refcount); + xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); + + if (!qp) + dev_warn(dev, "async event for bogus QP %08x\n", qpn); + + return qp; +} + static void flush_work_handle(struct work_struct *work) { struct hns_roce_work *flush_work = container_of(work, @@ -71,11 +90,18 @@ static void flush_work_handle(struct work_struct *work) void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_work *flush_work = &hr_qp->flush_work; + unsigned long flags; + + spin_lock_irqsave(&hr_qp->flush_lock, flags); + /* Exit directly after destroy_qp() */ + if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) { + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + return; + } - flush_work->hr_dev = hr_dev; - INIT_WORK(&flush_work->work, flush_work_handle); refcount_inc(&hr_qp->refcount); queue_work(hr_dev->irq_workq, &flush_work->work); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); } void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) @@ -95,31 +121,28 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { - struct device *dev = hr_dev->dev; struct hns_roce_qp *qp; - xa_lock(&hr_dev->qp_table_xa); - qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (qp) - refcount_inc(&qp->refcount); - xa_unlock(&hr_dev->qp_table_xa); - - if (!qp) { - dev_warn(dev, "async event for bogus QP %08x\n", qpn); + qp = hns_roce_qp_lookup(hr_dev, qpn); + if (!qp) return; - } - if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION || - event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) { - qp->state = IB_QPS_ERR; + qp->event(qp, (enum hns_roce_event)event_type); - flush_cqe(hr_dev, qp); - } + if (refcount_dec_and_test(&qp->refcount)) + complete(&qp->free); +} - qp->event(qp, (enum hns_roce_event)event_type); +void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn) +{ + struct hns_roce_qp *qp; + + qp = hns_roce_qp_lookup(hr_dev, qpn); + if (!qp) + return; + + qp->state = IB_QPS_ERR; + flush_cqe(hr_dev, qp); if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free); @@ -410,7 +433,8 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) bankid = get_qp_bankid(hr_qp->qpn); - ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); + ida_free(&hr_dev->qp_table.bank[bankid].ida, + hr_qp->qpn / HNS_ROCE_QP_BANK_NUM); mutex_lock(&hr_dev->qp_table.bank_mutex); hr_dev->qp_table.bank[bankid].inuse--; @@ -531,13 +555,15 @@ static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi, { unsigned int inline_sge; - inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE; + if (!max_inline_data) + return 0; /* * if max_inline_data less than * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE, * In addition to ud's mode, no need to extend sge. */ + inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE; if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE) inline_sge = 0; @@ -842,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp *ucmd, struct hns_roce_ib_create_qp_resp *resp) { + bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd); struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); + bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp); struct ib_device *ibdev = &hr_dev->ib_dev; int ret; - if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { + if (has_sdb) { ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, @@ -858,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; } - if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { + if (has_rdb) { ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, @@ -872,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, return 0; err_sdb: - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) + if (has_sdb) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); err_out: return ret; @@ -1093,35 +1121,34 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibucontext); hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); - if (ret) + if (ret) { ibdev_err(ibdev, "failed to set user SQ size, ret = %d.\n", ret); + return ret; + } ret = set_congest_param(hr_dev, hr_qp, ucmd); - if (ret) - return ret; } else { if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) hr_qp->config = HNS_ROCE_EXSGE_FLAGS; + default_congest_type(hr_dev, hr_qp); ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, "failed to set kernel SQ size, ret = %d.\n", ret); - - default_congest_type(hr_dev, hr_qp); } return ret; } static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, - struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct hns_roce_qp *hr_qp) { + struct hns_roce_work *flush_work = &hr_qp->flush_work; struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; @@ -1130,9 +1157,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, mutex_init(&hr_qp->mutex); spin_lock_init(&hr_qp->sq.lock); spin_lock_init(&hr_qp->rq.lock); + spin_lock_init(&hr_qp->flush_lock); hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + flush_work->hr_dev = hr_dev; + INIT_WORK(&flush_work->work, flush_work_handle); if (init_attr->create_flags) return -EOPNOTSUPP; @@ -1140,7 +1170,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); - return ret; + goto err_out; } if (!udata) { @@ -1148,7 +1178,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ret) { ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", ret); - return ret; + goto err_out; } } @@ -1190,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, min(udata->outlen, sizeof(resp))); if (ret) { ibdev_err(ibdev, "copy qp resp failed!\n"); - goto err_store; + goto err_flow_ctrl; } } @@ -1219,6 +1249,8 @@ err_qpn: free_qp_buf(hr_dev, hr_qp); err_buf: free_kernel_wrid(hr_qp); +err_out: + mutex_destroy(&hr_qp->mutex); return ret; } @@ -1234,6 +1266,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, free_qp_buf(hr_dev, hr_qp); free_kernel_wrid(hr_qp); free_qp_db(hr_dev, hr_qp, udata); + mutex_destroy(&hr_qp->mutex); } static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, @@ -1271,7 +1304,6 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, struct ib_device *ibdev = qp->device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); struct hns_roce_qp *hr_qp = to_hr_qp(qp); - struct ib_pd *pd = qp->pd; int ret; ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); @@ -1286,9 +1318,9 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; } - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); + ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp); if (ret) - ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", + ibdev_err(ibdev, "create QP type %d failed(%d)\n", init_attr->qp_type, ret); err_out: @@ -1386,6 +1418,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_ib_modify_qp_resp resp = {}; struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); enum ib_qp_state cur_state, new_state; int ret = -EINVAL; @@ -1427,6 +1460,18 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state, new_state, udata); + if (ret) + goto out; + + if (udata && udata->outlen) { + resp.tc_mode = hr_qp->tc_mode; + resp.priority = hr_qp->sl; + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to copy modify qp resp.\n"); + } out: mutex_unlock(&hr_qp->mutex); @@ -1443,19 +1488,19 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) __acquire(&send_cq->lock); __acquire(&recv_cq->lock); } else if (unlikely(send_cq != NULL && recv_cq == NULL)) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (unlikely(send_cq == NULL && recv_cq != NULL)) { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); __acquire(&send_cq->lock); } else if (send_cq == recv_cq) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } @@ -1475,13 +1520,13 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, spin_unlock(&recv_cq->lock); } else if (send_cq == recv_cq) { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } } @@ -1529,14 +1574,10 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) unsigned int reserved_from_bot; unsigned int i; - qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps, - sizeof(u32), GFP_KERNEL); - if (!qp_table->idx_table.spare_idx) - return -ENOMEM; - mutex_init(&qp_table->scc_mutex); mutex_init(&qp_table->bank_mutex); xa_init(&hr_dev->qp_table_xa); + xa_init(&qp_table->dip_xa); reserved_from_bot = hr_dev->caps.reserved_qps; @@ -1561,5 +1602,8 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); - kfree(hr_dev->qp_table.idx_table.spare_idx); + xa_destroy(&hr_dev->qp_table.dip_xa); + xa_destroy(&hr_dev->qp_table_xa); + mutex_destroy(&hr_dev->qp_table.bank_mutex); + mutex_destroy(&hr_dev->qp_table.scc_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 356d98816949..f637b73b946e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -4,7 +4,6 @@ #include <rdma/rdma_cm.h> #include <rdma/restrack.h> #include <uapi/rdma/rdma_netlink.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 4abae9477854..1090051f493b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -51,7 +51,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, break; default: dev_err(hr_dev->dev, - "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n", + "hns_roce:Unexpected event type %d on SRQ %06lx\n", event_type, srq->srqn); return; } @@ -123,7 +123,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) return ret; } - ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + ret = xa_err(xa_store_irq(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); if (ret) { ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); goto err_put; @@ -136,7 +136,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) return 0; err_xa: - xa_erase(&srq_table->xa, srq->srqn); + xa_erase_irq(&srq_table->xa, srq->srqn); err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); @@ -151,10 +151,10 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, srq->srqn); if (ret) - dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", - ret, srq->srqn); + dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", + ret, srq->srqn); - xa_erase(&srq_table->xa, srq->srqn); + xa_erase_irq(&srq_table->xa, srq->srqn); if (refcount_dec_and_test(&srq->refcount)) complete(&srq->free); @@ -250,7 +250,7 @@ static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); } -static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +static int alloc_srq_wrid(struct hns_roce_srq *srq) { srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) @@ -297,7 +297,7 @@ static int set_srq_basic_param(struct hns_roce_srq *srq, max_sge = proc_srq_sge(hr_dev, srq, !!udata); if (attr->max_wr > hr_dev->caps.max_srq_wrs || - attr->max_sge > max_sge) { + attr->max_sge > max_sge || !attr->max_sge) { ibdev_err(&hr_dev->ib_dev, "invalid SRQ attr, depth = %u, sge = %u.\n", attr->max_wr, attr->max_sge); @@ -366,7 +366,7 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, goto err_idx; if (!udata) { - ret = alloc_srq_wrid(hr_dev, srq); + ret = alloc_srq_wrid(srq); if (ret) goto err_wqe_buf; } @@ -518,6 +518,7 @@ err_srq_db: err_srq_buf: free_srq_buf(hr_dev, srq); err_out: + mutex_destroy(&srq->mutex); atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]); return ret; @@ -532,6 +533,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) free_srqn(hr_dev, srq); free_srq_db(hr_dev, srq, udata); free_srq_buf(hr_dev, srq); + mutex_destroy(&srq->mutex); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_trace.h b/drivers/infiniband/hw/hns/hns_roce_trace.h new file mode 100644 index 000000000000..59ceb591b3a1 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_trace.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2025 Hisilicon Limited. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hns_roce + +#if !defined(__HNS_ROCE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HNS_ROCE_TRACE_H + +#include <linux/tracepoint.h> +#include <linux/string_choices.h> +#include "hns_roce_device.h" +#include "hns_roce_hw_v2.h" + +DECLARE_EVENT_CLASS(flush_head_template, + TP_PROTO(unsigned long qpn, u32 pi, + enum hns_roce_trace_type type), + TP_ARGS(qpn, pi, type), + + TP_STRUCT__entry(__field(unsigned long, qpn) + __field(u32, pi) + __field(enum hns_roce_trace_type, type) + ), + + TP_fast_assign(__entry->qpn = qpn; + __entry->pi = pi; + __entry->type = type; + ), + + TP_printk("%s 0x%lx flush head 0x%x.", + trace_type_to_str(__entry->type), + __entry->qpn, __entry->pi) +); + +DEFINE_EVENT(flush_head_template, hns_sq_flush_cqe, + TP_PROTO(unsigned long qpn, u32 pi, + enum hns_roce_trace_type type), + TP_ARGS(qpn, pi, type)); +DEFINE_EVENT(flush_head_template, hns_rq_flush_cqe, + TP_PROTO(unsigned long qpn, u32 pi, + enum hns_roce_trace_type type), + TP_ARGS(qpn, pi, type)); + +#define MAX_SGE_PER_WQE 64 +#define MAX_WQE_SIZE (MAX_SGE_PER_WQE * HNS_ROCE_SGE_SIZE) +DECLARE_EVENT_CLASS(wqe_template, + TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, + u64 id, enum hns_roce_trace_type type), + TP_ARGS(qpn, idx, wqe, len, id, type), + + TP_STRUCT__entry(__field(unsigned long, qpn) + __field(u32, idx) + __array(u32, wqe, + MAX_WQE_SIZE / sizeof(__le32)) + __field(u32, len) + __field(u64, id) + __field(enum hns_roce_trace_type, type) + ), + + TP_fast_assign(__entry->qpn = qpn; + __entry->idx = idx; + __entry->id = id; + __entry->len = len / sizeof(__le32); + __entry->type = type; + for (int i = 0; i < __entry->len; i++) + __entry->wqe[i] = le32_to_cpu(((__le32 *)wqe)[i]); + ), + + TP_printk("%s 0x%lx wqe(0x%x/0x%llx): %s", + trace_type_to_str(__entry->type), + __entry->qpn, __entry->idx, __entry->id, + __print_array(__entry->wqe, __entry->len, + sizeof(__le32))) +); + +DEFINE_EVENT(wqe_template, hns_sq_wqe, + TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id, + enum hns_roce_trace_type type), + TP_ARGS(qpn, idx, wqe, len, id, type)); +DEFINE_EVENT(wqe_template, hns_rq_wqe, + TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id, + enum hns_roce_trace_type type), + TP_ARGS(qpn, idx, wqe, len, id, type)); +DEFINE_EVENT(wqe_template, hns_srq_wqe, + TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id, + enum hns_roce_trace_type type), + TP_ARGS(qpn, idx, wqe, len, id, type)); + +TRACE_EVENT(hns_ae_info, + TP_PROTO(int event_type, void *aeqe, unsigned int len), + TP_ARGS(event_type, aeqe, len), + + TP_STRUCT__entry(__field(int, event_type) + __array(u32, aeqe, + HNS_ROCE_V3_EQE_SIZE / sizeof(__le32)) + __field(u32, len) + ), + + TP_fast_assign(__entry->event_type = event_type; + __entry->len = len / sizeof(__le32); + for (int i = 0; i < __entry->len; i++) + __entry->aeqe[i] = le32_to_cpu(((__le32 *)aeqe)[i]); + ), + + TP_printk("event %2d aeqe: %s", __entry->event_type, + __print_array(__entry->aeqe, __entry->len, sizeof(__le32))) +); + +TRACE_EVENT(hns_mr, + TP_PROTO(struct hns_roce_mr *mr), + TP_ARGS(mr), + + TP_STRUCT__entry(__field(u64, iova) + __field(u64, size) + __field(u32, key) + __field(u32, pd) + __field(u32, pbl_hop_num) + __field(u32, npages) + __field(int, type) + __field(int, enabled) + ), + + TP_fast_assign(__entry->iova = mr->iova; + __entry->size = mr->size; + __entry->key = mr->key; + __entry->pd = mr->pd; + __entry->pbl_hop_num = mr->pbl_hop_num; + __entry->npages = mr->npages; + __entry->type = mr->type; + __entry->enabled = mr->enabled; + ), + + TP_printk("iova:0x%llx, size:%llu, key:%u, pd:%u, pbl_hop:%u, npages:%u, type:%d, status:%d", + __entry->iova, __entry->size, __entry->key, + __entry->pd, __entry->pbl_hop_num, __entry->npages, + __entry->type, __entry->enabled) +); + +TRACE_EVENT(hns_buf_attr, + TP_PROTO(struct hns_roce_buf_attr *attr), + TP_ARGS(attr), + + TP_STRUCT__entry(__field(unsigned int, region_count) + __field(unsigned int, region0_size) + __field(int, region0_hopnum) + __field(unsigned int, region1_size) + __field(int, region1_hopnum) + __field(unsigned int, region2_size) + __field(int, region2_hopnum) + __field(unsigned int, page_shift) + __field(bool, mtt_only) + ), + + TP_fast_assign(__entry->region_count = attr->region_count; + __entry->region0_size = attr->region[0].size; + __entry->region0_hopnum = attr->region[0].hopnum; + __entry->region1_size = attr->region[1].size; + __entry->region1_hopnum = attr->region[1].hopnum; + __entry->region2_size = attr->region[2].size; + __entry->region2_hopnum = attr->region[2].hopnum; + __entry->page_shift = attr->page_shift; + __entry->mtt_only = attr->mtt_only; + ), + + TP_printk("rg cnt:%u, pg_sft:0x%x, mtt_only:%s, rg 0 (sz:%u, hop:%u), rg 1 (sz:%u, hop:%u), rg 2 (sz:%u, hop:%u)\n", + __entry->region_count, __entry->page_shift, + str_yes_no(__entry->mtt_only), + __entry->region0_size, __entry->region0_hopnum, + __entry->region1_size, __entry->region1_hopnum, + __entry->region2_size, __entry->region2_hopnum) +); + +DECLARE_EVENT_CLASS(cmdq, + TP_PROTO(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc), + TP_ARGS(hr_dev, desc), + + TP_STRUCT__entry(__string(dev_name, dev_name(hr_dev->dev)) + __field(u16, opcode) + __field(u16, flag) + __field(u16, retval) + __array(u32, data, 6) + ), + + TP_fast_assign(__assign_str(dev_name); + __entry->opcode = le16_to_cpu(desc->opcode); + __entry->flag = le16_to_cpu(desc->flag); + __entry->retval = le16_to_cpu(desc->retval); + for (int i = 0; i < 6; i++) + __entry->data[i] = le32_to_cpu(desc->data[i]); + ), + + TP_printk("%s cmdq opcode:0x%x, flag:0x%x, retval:0x%x, data:%s\n", + __get_str(dev_name), __entry->opcode, + __entry->flag, __entry->retval, + __print_array(__entry->data, 6, sizeof(__le32))) +); + +DEFINE_EVENT(cmdq, hns_cmdq_req, + TP_PROTO(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc), + TP_ARGS(hr_dev, desc)); +DEFINE_EVENT(cmdq, hns_cmdq_resp, + TP_PROTO(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc), + TP_ARGS(hr_dev, desc)); + +#endif /* __HNS_ROCE_TRACE_H */ + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE hns_roce_trace +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> |