summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hns/hns_roce_main.c
diff options
context:
space:
mode:
authorYixing Liu <liuyixing1@huawei.com>2021-12-07 20:49:01 +0800
committerJason Gunthorpe <jgg@nvidia.com>2021-12-14 19:59:07 -0400
commit0045e0d3f42ed7d05434bb5bc16acfc793ea4891 (patch)
tree43beb583ac3757ab6d66cc2d566bc47570a8869d /drivers/infiniband/hw/hns/hns_roce_main.c
parentb1a4da64bfc189510e08df1ccb1c589e667dc7a3 (diff)
RDMA/hns: Support direct wqe of userspace
The current write wqe mechanism is to write to DDR first, and then notify the hardware through doorbell to read the data. Direct wqe is a mechanism to fill wqe directly into the hardware. In the case of light load, the wqe will be filled into pcie bar space of the hardware, this will reduce one memory access operation and therefore reduce the latency. SIMD instructions allows cpu to write the 512 bits at one time to device memory, thus it can be used for posting direct wqe. Add direct wqe enable switch and address mapping. Link: https://lore.kernel.org/r/20211207124901.42123-2-liangwenpeng@huawei.com Signed-off-by: Yixing Liu <liuyixing1@huawei.com> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_main.c')
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c36
1 files changed, 30 insertions, 6 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index a906c6078b72..d0b976a86cd5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -310,9 +310,25 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
entry->address = address;
entry->mmap_type = mmap_type;
- ret = rdma_user_mmap_entry_insert_exact(
- ucontext, &entry->rdma_entry, length,
- mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1);
+ switch (mmap_type) {
+ case HNS_ROCE_MMAP_TYPE_DB:
+ ret = rdma_user_mmap_entry_insert_exact(
+ ucontext, &entry->rdma_entry, length, 0);
+ break;
+ case HNS_ROCE_MMAP_TYPE_TPTR:
+ ret = rdma_user_mmap_entry_insert_exact(
+ ucontext, &entry->rdma_entry, length, 1);
+ break;
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ ret = rdma_user_mmap_entry_insert_range(
+ ucontext, &entry->rdma_entry, length, 2,
+ U32_MAX);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
if (ret) {
kfree(entry);
return NULL;
@@ -439,10 +455,18 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
entry = to_hns_mmap(rdma_entry);
pfn = entry->address >> PAGE_SHIFT;
- prot = vma->vm_page_prot;
- if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
- prot = pgprot_device(prot);
+ switch (entry->mmap_type) {
+ case HNS_ROCE_MMAP_TYPE_DB:
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
+ case HNS_ROCE_MMAP_TYPE_TPTR:
+ prot = vma->vm_page_prot;
+ break;
+ default:
+ return -EINVAL;
+ }
ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
prot, rdma_entry);