summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hns/hns_roce_qp.c
diff options
context:
space:
mode:
authorLuoyouming <luoyouming@huawei.com>2022-11-08 21:38:47 +0800
committerJason Gunthorpe <jgg@nvidia.com>2022-11-18 20:19:49 -0400
commit0c5e259b06a8efc69f929ad777ea49281bb58e37 (patch)
treed97cda0a6b3fdbc1ba4273c17439ccb22c54baf0 /drivers/infiniband/hw/hns/hns_roce_qp.c
parent8eaa6f7d569b4a22bfc1b0a3fdfeeb401feb65a4 (diff)
RDMA/hns: Fix incorrect sge nums calculation
The user usually configures the number of sge through the max_send_sge parameter when creating qp, and configures the maximum size of inline data that can be sent through max_inline_data. Inline uses sge to fill data to send. Expect the following: 1) When the sge space cannot hold inline data, the sge space needs to be expanded to accommodate all inline data 2) When the sge space is enough to accommodate inline data, the upper limit of inline data can be increased so that users can send larger inline data Currently case one is not implemented. When the inline data is larger than the sge space, an error of insufficient sge space occurs. This part of the code needs to be reimplemented according to the expected rules. The calculation method of sge num is modified to take the maximum value of max_send_sge and the sge for max_inline_data to solve this problem. Fixes: 05201e01be93 ("RDMA/hns: Refactor process of setting extended sge") Fixes: 30b707886aeb ("RDMA/hns: Support inline data in extented sge space for RC") Link: https://lore.kernel.org/r/20221108133847.2304539-3-xuhaoyue1@hisilicon.com Signed-off-by: Luoyouming <luoyouming@huawei.com> Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_qp.c')
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c107
1 files changed, 90 insertions, 17 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f0bd82a18069..0ae335fb205c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -476,38 +476,109 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return 0;
}
-static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
+static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap)
{
- /* GSI/UD QP only has extended sge */
- if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
- return qp->sq.max_gs;
-
- if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+ if (cap->max_inline_data) {
+ cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
+ return min(cap->max_inline_data,
+ hr_dev->caps.max_sq_inline);
+ }
return 0;
}
+static void update_inline_data(struct hns_roce_qp *hr_qp,
+ struct ib_qp_cap *cap)
+{
+ u32 sge_num = hr_qp->sq.ext_sge_cnt;
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD))
+ sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
+
+ cap->max_inline_data = max(cap->max_inline_data,
+ sge_num * HNS_ROCE_SGE_SIZE);
+ }
+
+ hr_qp->max_inline_data = cap->max_inline_data;
+}
+
+static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
+ u32 max_send_sge)
+{
+ unsigned int std_sge_num;
+ unsigned int min_sge;
+
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud_or_gsi ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
+
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
+ u32 max_inline_data)
+{
+ unsigned int inline_sge;
+
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
+ inline_sge = 0;
+
+ return inline_sge;
+}
+
static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
{
+ bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD);
+ unsigned int std_sge_num;
+ u32 inline_ext_sge = 0;
+ u32 ext_wqe_sge_cnt;
u32 total_sge_cnt;
- u32 wqe_sge_cnt;
+
+ cap->max_inline_data = get_max_inline_data(hr_dev, cap);
hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
+ cap->max_send_sge);
- hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud_or_gsi,
+ cap->max_inline_data));
+ hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
- wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp);
+ hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+
+ ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
+ } else {
+ hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+ hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
+ }
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
- if (wqe_sge_cnt) {
- total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt);
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
hr_qp->sge.sge_cnt = max(total_sge_cnt,
(u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
}
+
+ update_inline_data(hr_qp, cap);
}
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
@@ -556,6 +627,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
hr_qp->sq.wqe_cnt = cnt;
+ cap->max_send_sge = hr_qp->sq.max_gs;
return 0;
}
@@ -986,13 +1058,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct hns_roce_ib_create_qp *ucmd)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ucontext *uctx;
int ret;
- if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
- init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
-
- hr_qp->max_inline_data = init_attr->cap.max_inline_data;
-
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
@@ -1015,12 +1083,17 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
return ret;
}
+ uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext);
+ hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
if (ret)
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
} else {
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,