diff options
Diffstat (limited to 'drivers/infiniband/sw/rdmavt/mr.c')
| -rw-r--r-- | drivers/infiniband/sw/rdmavt/mr.c | 426 |
1 files changed, 179 insertions, 247 deletions
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index aa5f9ea318e4..86e482593a85 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/slab.h> @@ -96,12 +54,13 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) for (i = 0; i < rdi->lkey_table.max; i++) RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); + rdi->dparms.props.max_mr = rdi->lkey_table.max; return 0; } /** - *rvt_mr_exit: clean up MR - *@rdi: rvt dev structure + * rvt_mr_exit - clean up MR + * @rdi: rvt dev structure * * called when drivers have unregistered or perhaps failed to register with us */ @@ -283,7 +242,7 @@ static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) /* Allocate struct plus pointers to first level page tables. */ m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; - mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); + mr = kzalloc(struct_size(mr, mr.map, m), GFP_KERNEL); if (!mr) goto bail; @@ -323,8 +282,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -370,34 +327,39 @@ bail: * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register + * @virt_addr: associated virtual address * @mr_access_flags: access flags for this memory region + * @dmah: dma handle * @udata: unused by the driver * * Return: the memory region on success, otherwise returns an errno. */ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, + struct ib_dmah *dmah, struct ib_udata *udata) { struct rvt_mr *mr; struct ib_umem *umem; - struct scatterlist *sg; - int n, m, entry; + struct sg_page_iter sg_iter; + int n, m; struct ib_mr *ret; + if (dmah) + return ERR_PTR(-EOPNOTSUPP); + if (length == 0) return ERR_PTR(-EINVAL); - umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); + umem = ib_umem_get(pd->device, start, length, mr_access_flags); if (IS_ERR(umem)) - return (void *)umem; + return ERR_CAST(umem); - n = umem->nmap; + n = ib_umem_num_pages(umem); mr = __rvt_alloc_mr(n, pd); if (IS_ERR(mr)) { - ret = (struct ib_mr *)mr; + ret = ERR_CAST(mr); goto bail_umem; } @@ -408,23 +370,21 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.access_flags = mr_access_flags; mr->umem = umem; - mr->mr.page_shift = umem->page_shift; + mr->mr.page_shift = PAGE_SHIFT; m = 0; n = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { void *vaddr; - vaddr = page_address(sg_page(sg)); + vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail_inval; } mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = BIT(umem->page_shift); - trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, - BIT(umem->page_shift)); - n++; - if (n == RVT_SEGSZ) { + mr->mr.map[m]->segs[n].length = PAGE_SIZE; + trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, PAGE_SIZE); + if (++n == RVT_SEGSZ) { m++; n = 0; } @@ -441,37 +401,129 @@ bail_umem: } /** + * rvt_dereg_clean_qp_cb - callback from iterator + * @qp: the qp + * @v: the mregion (as u64) + * + * This routine fields the callback for all QPs and + * for QPs in the same PD as the MR will call the + * rvt_qp_mr_clean() to potentially cleanup references. + */ +static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v) +{ + struct rvt_mregion *mr = (struct rvt_mregion *)v; + + /* skip PDs that are not ours */ + if (mr->pd != qp->ibqp.pd) + return; + rvt_qp_mr_clean(qp, mr->lkey); +} + +/** + * rvt_dereg_clean_qps - find QPs for reference cleanup + * @mr: the MR that is being deregistered + * + * This routine iterates RC QPs looking for references + * to the lkey noted in mr. + */ +static void rvt_dereg_clean_qps(struct rvt_mregion *mr) +{ + struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); + + rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb); +} + +/** + * rvt_check_refs - check references + * @mr: the megion + * @t: the caller identification + * + * This routine checks MRs holding a reference during + * when being de-registered. + * + * If the count is non-zero, the code calls a clean routine then + * waits for the timeout for the count to zero. + */ +static int rvt_check_refs(struct rvt_mregion *mr, const char *t) +{ + unsigned long timeout; + struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); + + if (mr->lkey) { + /* avoid dma mr */ + rvt_dereg_clean_qps(mr); + /* @mr was indexed on rcu protected @lkey_table */ + synchronize_rcu(); + } + + timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); + if (!timeout) { + rvt_pr_err(rdi, + "%s timeout mr %p pd %p lkey %x refcount %ld\n", + t, mr, mr->pd, mr->lkey, + atomic_long_read(&mr->refcount.data->count)); + rvt_get_mr(mr); + return -EBUSY; + } + return 0; +} + +/** + * rvt_mr_has_lkey - is MR + * @mr: the mregion + * @lkey: the lkey + */ +bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) +{ + return mr && lkey == mr->lkey; +} + +/** + * rvt_ss_has_lkey - is mr in sge tests + * @ss: the sge state + * @lkey: the lkey + * + * This code tests for an MR in the indicated + * sge state. + */ +bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) +{ + int i; + bool rval = false; + + if (!ss->num_sge) + return rval; + /* first one */ + rval = rvt_mr_has_lkey(ss->sge.mr, lkey); + /* any others */ + for (i = 0; !rval && i < ss->num_sge - 1; i++) + rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey); + return rval; +} + +/** * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free - * + * @udata: unused by the driver * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). * * Returns 0 on success. */ -int rvt_dereg_mr(struct ib_mr *ibmr) +int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rvt_mr *mr = to_imr(ibmr); - struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); - int ret = 0; - unsigned long timeout; + int ret; rvt_free_lkey(&mr->mr); rvt_put_mr(&mr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); - if (!timeout) { - rvt_pr_err(rdi, - "rvt_dereg_mr timeout mr %p pd %p\n", - mr, mr->mr.pd); - rvt_get_mr(&mr->mr); - ret = -EBUSY; + ret = rvt_check_refs(&mr->mr, __func__); + if (ret) goto out; - } rvt_deinit_mregion(&mr->mr); - if (mr->umem) - ib_umem_release(mr->umem); + ib_umem_release(mr->umem); kfree(mr); out: return ret; @@ -485,8 +537,7 @@ out: * * Return: the memory region on success, otherwise return an errno. */ -struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, +struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { struct rvt_mr *mr; @@ -496,7 +547,7 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, mr = __rvt_alloc_mr(max_num_sg, pd); if (IS_ERR(mr)) - return (struct ib_mr *)mr; + return ERR_CAST(mr); return &mr->ibmr; } @@ -518,17 +569,12 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) if (unlikely(mapped_segs == mr->mr.max_segs)) return -ENOMEM; - if (mr->mr.length == 0) { - mr->mr.user_base = addr; - mr->mr.iova = addr; - } - m = mapped_segs / RVT_SEGSZ; n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; - trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); return 0; } @@ -540,17 +586,25 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) * @sg_nents: number of entries in sg * @sg_offset: offset in bytes into sg * + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. + * * Return: number of sg elements mapped to the memory region */ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct rvt_mr *mr = to_imr(ibmr); + int ret; mr->mr.length = 0; mr->mr.page_shift = PAGE_SHIFT; - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, - rvt_set_page); + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); + mr->mr.user_base = ibmr->iova; + mr->mr.iova = ibmr->iova; + mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; + mr->mr.length = (size_t)ibmr->length; + trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset); + return ret; } /** @@ -581,6 +635,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, ibmr->rkey = key; mr->mr.lkey = key; mr->mr.access_flags = access; + mr->mr.iova = ibmr->iova; atomic_set(&mr->mr.lkey_invalid, 0); return 0; @@ -620,161 +675,31 @@ bail: EXPORT_SYMBOL(rvt_invalidate_rkey); /** - * rvt_alloc_fmr - allocate a fast memory region - * @pd: the protection domain for this memory region - * @mr_access_flags: access flags for this memory region - * @fmr_attr: fast memory region attributes - * - * Return: the memory region on success, otherwise returns an errno. - */ -struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct rvt_fmr *fmr; - int m; - struct ib_fmr *ret; - int rval = -ENOMEM; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; - fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); - if (!fmr) - goto bail; - - rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages, - PERCPU_REF_INIT_ATOMIC); - if (rval) - goto bail; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ - rval = rvt_alloc_lkey(&fmr->mr, 0); - if (rval) - goto bail_mregion; - fmr->ibfmr.rkey = fmr->mr.lkey; - fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->mr.page_shift = fmr_attr->page_shift; - - ret = &fmr->ibfmr; -done: - return ret; - -bail_mregion: - rvt_deinit_mregion(&fmr->mr); -bail: - kfree(fmr); - ret = ERR_PTR(rval); - goto done; -} - -/** - * rvt_map_phys_fmr - set up a fast memory region - * @ibmfr: the fast memory region to set up - * @page_list: the list of pages to associate with the fast memory region - * @list_len: the number of pages to associate with the fast memory region - * @iova: the virtual address of the start of the fast memory region + * rvt_sge_adjacent - is isge compressible + * @last_sge: last outgoing SGE written + * @sge: SGE to check * - * This may be called from interrupt context. + * If adjacent will update last_sge to add length. * - * Return: 0 on success + * Return: true if isge is adjacent to last sge */ - -int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) +static inline bool rvt_sge_adjacent(struct rvt_sge *last_sge, + struct ib_sge *sge) { - struct rvt_fmr *fmr = to_ifmr(ibfmr); - struct rvt_lkey_table *rkt; - unsigned long flags; - int m, n; - unsigned long i; - u32 ps; - struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); - - i = atomic_long_read(&fmr->mr.refcount.count); - if (i > 2) - return -EBUSY; - - if (list_len > fmr->mr.max_segs) - return -EINVAL; - - rkt = &rdi->lkey_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = iova; - fmr->mr.iova = iova; - ps = 1 << fmr->mr.page_shift; - fmr->mr.length = list_len * ps; - m = 0; - n = 0; - for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; - fmr->mr.map[m]->segs[n].length = ps; - trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); - if (++n == RVT_SEGSZ) { - m++; - n = 0; + if (last_sge && sge->lkey == last_sge->mr->lkey && + ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) { + if (sge->lkey) { + if (unlikely((sge->addr - last_sge->mr->user_base + + sge->length > last_sge->mr->length))) + return false; /* overrun, caller will catch */ + } else { + last_sge->length += sge->length; } + last_sge->sge_length += sge->length; + trace_rvt_sge_adjacent(last_sge, sge); + return true; } - spin_unlock_irqrestore(&rkt->lock, flags); - return 0; -} - -/** - * rvt_unmap_fmr - unmap fast memory regions - * @fmr_list: the list of fast memory regions to unmap - * - * Return: 0 on success. - */ -int rvt_unmap_fmr(struct list_head *fmr_list) -{ - struct rvt_fmr *fmr; - struct rvt_lkey_table *rkt; - unsigned long flags; - struct rvt_dev_info *rdi; - - list_for_each_entry(fmr, fmr_list, ibfmr.list) { - rdi = ib_to_rvt(fmr->ibfmr.device); - rkt = &rdi->lkey_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - spin_unlock_irqrestore(&rkt->lock, flags); - } - return 0; -} - -/** - * rvt_dealloc_fmr - deallocate a fast memory region - * @ibfmr: the fast memory region to deallocate - * - * Return: 0 on success. - */ -int rvt_dealloc_fmr(struct ib_fmr *ibfmr) -{ - struct rvt_fmr *fmr = to_ifmr(ibfmr); - int ret = 0; - unsigned long timeout; - - rvt_free_lkey(&fmr->mr); - rvt_put_mr(&fmr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); - if (!timeout) { - rvt_get_mr(&fmr->mr); - ret = -EBUSY; - goto out; - } - rvt_deinit_mregion(&fmr->mr); - kfree(fmr); -out: - return ret; + return false; } /** @@ -782,19 +707,20 @@ out: * @rkt: table containing lkey to check SGE against * @pd: protection domain * @isge: outgoing internal SGE + * @last_sge: last outgoing SGE written * @sge: SGE to check * @acc: access flags * * Check the IB SGE for validity and initialize our internal version * of it. * - * Return: 1 if valid and successful, otherwise returns 0. - * - * increments the reference count upon success + * Increments the reference count when a new sge is stored. * + * Return: 0 if compressed, 1 if added , otherwise returns -errno. */ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc) + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc) { struct rvt_mregion *mr; unsigned n, m; @@ -802,14 +728,16 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ - rcu_read_lock(); if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); if (pd->user) - goto bail; + return -EINVAL; + if (rvt_sge_adjacent(last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; @@ -824,6 +752,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } + if (rvt_sge_adjacent(last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (!mr) goto bail; @@ -874,12 +805,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->m = m; isge->n = n; ok: + trace_rvt_sge_new(isge, sge); return 1; bail_unref: rvt_put_mr(mr); bail: rcu_read_unlock(); - return 0; + return -EINVAL; } EXPORT_SYMBOL(rvt_lkey_ok); @@ -907,7 +839,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { |
