summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hns/hns_roce_mr.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-06 16:43:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-06 16:43:36 -0700
commitd7806bbd22cabc3e3b0a985cfcffa29cf156bb30 (patch)
treeef24f40c658c2f015b7f96f429e47dd16ab6e5b4 /drivers/infiniband/hw/hns/hns_roce_mr.c
parentd6efb3ac3e6c19ab722b28bdb9252bae0b9676b6 (diff)
parent23fcc7dee2c6aba1060558683988263851e74bab (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "A quiet cycle after the larger 5.8 effort. Substantially cleanup and driver work with a few smaller features this time. - Driver updates for hfi1, rxe, mlx5, hns, qedr, usnic, bnxt_re - Removal of dead or redundant code across the drivers - RAW resource tracker dumps to include a device specific data blob for device objects to aide device debugging - Further advance the IOCTL interface, remove the ability to turn it off. Add QUERY_CONTEXT, QUERY_MR, and QUERY_PD commands - Remove stubs related to devices with no pkey table - A shared CQ scheme to allow multiple ULPs to share the CQ rings of a device to give higher performance - Several more static checker, syzkaller and rare crashers fixed" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (121 commits) RDMA/mlx5: Fix flow destination setting for RDMA TX flow table RDMA/rxe: Remove pkey table RDMA/umem: Add a schedule point in ib_umem_get() RDMA/hns: Fix the unneeded process when getting a general type of CQE error RDMA/hns: Fix error during modify qp RTS2RTS RDMA/hns: Delete unnecessary memset when allocating VF resource RDMA/hns: Remove redundant parameters in set_rc_wqe() RDMA/hns: Remove support for HIP08_A RDMA/hns: Refactor hns_roce_v2_set_hem() RDMA/hns: Remove redundant hardware opcode definitions RDMA/netlink: Remove CAP_NET_RAW check when dump a raw QP RDMA/include: Replace license text with SPDX tags RDMA/rtrs: remove WQ_MEM_RECLAIM for rtrs_wq RDMA/rtrs-clt: add an additional random 8 seconds before reconnecting RDMA/cma: Execute rdma_cm destruction from a handler properly RDMA/cma: Remove unneeded locking for req paths RDMA/cma: Using the standard locking pattern when delivering the removal event RDMA/cma: Simplify DEVICE_REMOVAL for internal_id RDMA/efa: Add EFA 0xefa1 PCI ID RDMA/efa: User/kernel compatibility handshake mechanism ...
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_mr.c')
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c210
1 files changed, 127 insertions, 83 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 6b226a5eb7db..e5df3884b41d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -415,7 +415,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
@@ -871,6 +871,15 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int err;
int i;
+ /*
+ * Only use the first page address as root ba when hopnum is 0, this
+ * is because the addresses of all pages are consecutive in this case.
+ */
+ if (mtr->hem_cfg.is_direct) {
+ mtr->hem_cfg.root_ba = pages[0];
+ return 0;
+ }
+
for (i = 0; i < mtr->hem_cfg.region_count; i++) {
r = &mtr->hem_cfg.region[i];
if (r->offset + r->count > page_cnt) {
@@ -896,6 +905,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int start_index;
int mtt_count;
int total = 0;
__le64 *mtts;
@@ -907,26 +918,32 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
goto done;
/* no mtt memory in direct mode, so just return the buffer address */
- if (mtr->hem_cfg.is_direct) {
- npage = offset;
- for (total = 0; total < mtt_max; total++, npage++) {
- addr = mtr->hem_cfg.root_ba +
- (npage << mtr->hem_cfg.buf_pg_shift);
-
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ for (mtt_count = 0; mtt_count < cfg->region_count &&
+ total < mtt_max; mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
+
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
mtt_buf[total] = to_hr_hw_page_addr(addr);
else
mtt_buf[total] = addr;
+
+ total++;
}
goto done;
}
+ start_index = offset >> cfg->buf_pg_shift;
left = mtt_max;
while (left > 0) {
mtt_count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
- offset + total,
+ start_index + total,
&mtt_count, NULL);
if (!mtts || !mtt_count)
goto done;
@@ -939,104 +956,136 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
done:
if (base_addr)
- *base_addr = mtr->hem_cfg.root_ba;
+ *base_addr = cfg->root_ba;
return total;
}
-/* convert buffer size to page index and page count */
-static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr,
- int page_cnt,
- struct hns_roce_buf_region *regions,
- int region_cnt, unsigned int page_shift)
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr,
+ struct hns_roce_hem_cfg *cfg,
+ unsigned int *buf_page_shift)
{
- unsigned int page_size = 1 << page_shift;
- int max_region = attr->region_count;
struct hns_roce_buf_region *r;
- unsigned int i = 0;
- int page_idx = 0;
-
- for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) {
- r = &regions[i];
- r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ?
- 0 : attr->region[i].hopnum;
- r->offset = page_idx;
- r->count = DIV_ROUND_UP(attr->region[i].size, page_size);
- page_idx += r->count;
+ unsigned int page_shift = 0;
+ int page_cnt = 0;
+ size_t buf_size;
+ int region_cnt;
+
+ if (cfg->is_direct) {
+ buf_size = cfg->buf_pg_count << cfg->buf_pg_shift;
+ page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE);
+ /*
+ * When HEM buffer use level-0 addressing, the page size equals
+ * the buffer size, and the the page size = 4K * 2^N.
+ */
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt);
+ if (attr->region_count > 1) {
+ cfg->buf_pg_count = page_cnt;
+ page_shift = HNS_HW_PAGE_SHIFT;
+ } else {
+ cfg->buf_pg_count = 1;
+ page_shift = cfg->buf_pg_shift;
+ if (buf_size != 1 << page_shift) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to check direct size %zu shift %d.\n",
+ buf_size, page_shift);
+ return -EINVAL;
+ }
+ }
+ } else {
+ page_shift = cfg->buf_pg_shift;
+ }
+
+ /* convert buffer size to page index and page count */
+ for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count &&
+ region_cnt < attr->region_count &&
+ region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
+ r = &cfg->region[region_cnt];
+ r->offset = page_cnt;
+ buf_size = hr_hw_page_align(attr->region[region_cnt].size);
+ r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
+ page_cnt += r->count;
+ r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
+ r->count);
+ }
+
+ if (region_cnt < 1) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to check mtr region count, pages = %d.\n",
+ cfg->buf_pg_count);
+ return -ENOBUFS;
}
- return i;
+ cfg->region_count = region_cnt;
+ *buf_page_shift = page_shift;
+
+ return page_cnt;
}
/**
* hns_roce_mtr_create - Create hns memory translate region.
*
* @mtr: memory translate region
- * @init_attr: init attribute for creating mtr
- * @page_shift: page shift for multi-hop base address table
+ * @buf_attr: buffer attribute for creating mtr
+ * @ba_page_shift: page shift for multi-hop base address table
* @udata: user space context, if it's NULL, means kernel space
* @user_addr: userspace virtual address to start at
- * @buf_alloced: mtr has private buffer, true means need to alloc
*/
int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct hns_roce_buf_attr *buf_attr,
- unsigned int page_shift, struct ib_udata *udata,
+ unsigned int ba_page_shift, struct ib_udata *udata,
unsigned long user_addr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int buf_page_shift = 0;
dma_addr_t *pages = NULL;
- int region_cnt = 0;
int all_pg_cnt;
int get_pg_cnt;
- bool has_mtt;
- int err = 0;
+ int ret = 0;
+
+ /* if disable mtt, all pages must in a continuous address range */
+ cfg->is_direct = !mtr_has_mtt(buf_attr);
- has_mtt = mtr_has_mtt(buf_attr);
/* if buffer only need mtt, just init the hem cfg */
if (buf_attr->mtt_only) {
- mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift;
- mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >>
- buf_attr->page_shift;
+ cfg->buf_pg_shift = buf_attr->page_shift;
+ cfg->buf_pg_count = mtr_bufs_size(buf_attr) >>
+ buf_attr->page_shift;
mtr->umem = NULL;
mtr->kmem = NULL;
} else {
- err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata,
- user_addr);
- if (err) {
- ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n",
- err);
- return err;
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct,
+ udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
}
}
- /* alloc mtt memory */
- all_pg_cnt = mtr->hem_cfg.buf_pg_count;
- hns_roce_hem_list_init(&mtr->hem_list);
- mtr->hem_cfg.is_direct = !has_mtt;
- mtr->hem_cfg.ba_pg_shift = page_shift;
- mtr->hem_cfg.region_count = 0;
- region_cnt = mtr_init_region(buf_attr, all_pg_cnt,
- mtr->hem_cfg.region,
- ARRAY_SIZE(mtr->hem_cfg.region),
- mtr->hem_cfg.buf_pg_shift);
- if (region_cnt < 1) {
- err = -ENOBUFS;
- ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt);
+ all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift);
+ if (all_pg_cnt < 1) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to init mtr buf cfg.\n");
goto err_alloc_bufs;
}
- mtr->hem_cfg.region_count = region_cnt;
-
- if (has_mtt) {
- err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
- mtr->hem_cfg.region, region_cnt,
- page_shift);
- if (err) {
- ibdev_err(ibdev, "Failed to request mtr hem, err %d\n",
- err);
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n",
+ ret);
goto err_alloc_bufs;
}
- mtr->hem_cfg.root_ba = mtr->hem_list.root_ba;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
}
/* no buffer to map */
@@ -1046,31 +1095,26 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
/* alloc a tmp array to store buffer's dma address */
pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
if (!pages) {
- err = -ENOMEM;
- ibdev_err(ibdev, "Failed to alloc mtr page list %d\n",
+ ret = -ENOMEM;
+ ibdev_err(ibdev, "failed to alloc mtr page list %d.\n",
all_pg_cnt);
goto err_alloc_hem_list;
}
get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
- mtr->hem_cfg.buf_pg_shift);
+ buf_page_shift);
if (get_pg_cnt != all_pg_cnt) {
- ibdev_err(ibdev, "Failed to get mtr page %d != %d\n",
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n",
get_pg_cnt, all_pg_cnt);
- err = -ENOBUFS;
+ ret = -ENOBUFS;
goto err_alloc_page_list;
}
- if (!has_mtt) {
- mtr->hem_cfg.root_ba = pages[0];
- } else {
- /* write buffer's dma address to BA table */
- err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
- if (err) {
- ibdev_err(ibdev, "Failed to map mtr pages, err %d\n",
- err);
- goto err_alloc_page_list;
- }
+ /* write buffer's dma address to BA table */
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
+ goto err_alloc_page_list;
}
/* drop tmp array */
@@ -1082,7 +1126,7 @@ err_alloc_hem_list:
hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
err_alloc_bufs:
mtr_free_bufs(hr_dev, mtr);
- return err;
+ return ret;
}
void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)