diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-18 09:39:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-18 09:39:22 -0700 |
commit | 9ea446352047d8350553250db51da2c73a610688 (patch) | |
tree | f75712fb5bfb5d2a83685023b2838f3c9ea40320 /drivers/infiniband/hw/mlx5 | |
parent | 9dffdb38d864ae89e16ff7b3a09451270736e35b (diff) | |
parent | 082eaa50838c6b70a8244f8b01d7ed7d686f84db (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford:
"Initial roundup of 4.6 merge window patches.
This is the first of two pull requests. It is the smaller request,
but touches for more different things (this is everything but what is
in or going into staging). The pull request for the code in
staging/rdma is on hold until after we decide what to do on the
write/writev API issue and may be partially deferred until 4.7 as a
result.
Summary:
- cxgb4 updates
- nes updates
- unification of iwarp portmapper code to core
- add drain_cq API
- various ib_core updates
- minor ipoib updates
- minor mlx4 updates
- more significant mlx5 updates (including a minor merge conflict
with net-next tree...merge is simple to resolve and Stephen's
resolution was confirmed by Mellanox)
- trivial net/9p rdma conversion
- ocrdma RoCEv2 update
- srpt updates"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (85 commits)
iwpm: crash fix for large connections test
iw_cxgb3: support for iWARP port mapping
iw_cxgb4: remove port mapper related code
iw_nes: remove port mapper related code
iwcm: common code for port mapper
net/9p: convert to new CQ API
IB/mlx5: Add support for don't trap rules
net/mlx5_core: Introduce forward to next priority action
net/mlx5_core: Create anchor of last flow table
iser: Accept arbitrary sg lists mapping if the device supports it
mlx5: Add arbitrary sg list support
IB/core: Add arbitrary sg_list support
IB/mlx5: Expose correct max_fast_reg_page_list_len
IB/mlx5: Make coding style more consistent
IB/mlx5: Convert UMR CQ to new CQ API
IB/ocrdma: Skip using unneeded intermediate variable
IB/ocrdma: Skip using unneeded intermediate variable
IB/ocrdma: Delete unnecessary variable initialisations in 11 functions
IB/core: Documentation fix in the MAD header file
IB/core: trivial prink cleanup.
...
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 104 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/gsi.c | 548 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mad.c | 166 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 119 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 108 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 601 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 10 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 271 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/user.h | 7 |
10 files changed, 1715 insertions, 221 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 27a70159e2ea..4e851889355a 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fd1de31e0611..a00ba4418de9 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -207,7 +207,10 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, break; case MLX5_CQE_RESP_SEND: wc->opcode = IB_WC_RECV; - wc->wc_flags = 0; + wc->wc_flags = IB_WC_IP_CSUM_OK; + if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK)))) + wc->wc_flags = 0; break; case MLX5_CQE_RESP_SEND_IMM: wc->opcode = IB_WC_RECV; @@ -431,7 +434,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mr *mmr; + struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; @@ -536,17 +539,17 @@ repoll: case MLX5_CQE_SIG_ERR: sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; - read_lock(&dev->mdev->priv.mr_table.lock); - mmr = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmr)) { - read_unlock(&dev->mdev->priv.mr_table.lock); + read_lock(&dev->mdev->priv.mkey_table.lock); + mmkey = __mlx5_mr_lookup(dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmkey)) { + read_unlock(&dev->mdev->priv.mkey_table.lock); mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); return -EINVAL; } - mr = to_mibmr(mmr); + mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; mr->sig->sigerr_count++; @@ -558,25 +561,51 @@ repoll: mr->sig->err_item.expected, mr->sig->err_item.actual); - read_unlock(&dev->mdev->priv.mr_table.lock); + read_unlock(&dev->mdev->priv.mkey_table.lock); goto repoll; } return 0; } +static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_ib_wc *soft_wc, *next; + int npolled = 0; + + list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { + if (npolled >= num_entries) + break; + + mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", + cq->mcq.cqn); + + wc[npolled++] = soft_wc->wc; + list_del(&soft_wc->list); + kfree(soft_wc); + } + + return npolled; +} + int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; unsigned long flags; + int soft_polled = 0; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); - for (npolled = 0; npolled < num_entries; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + npolled); + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc); + + for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { + err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); if (err) break; } @@ -587,7 +616,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) - return npolled; + return soft_polled + npolled; else return err; } @@ -595,16 +624,27 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; + struct mlx5_ib_cq *cq = to_mcq(ibcq); void __iomem *uar_page = mdev->priv.uuari.uars[0].map; + unsigned long irq_flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, irq_flags); + if (cq->notify_flags != IB_CQ_NEXT_COMP) + cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; - mlx5_cq_arm(&to_mcq(ibcq)->mcq, + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list)) + ret = 1; + spin_unlock_irqrestore(&cq->lock, irq_flags); + + mlx5_cq_arm(&cq->mcq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, uar_page, MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), to_mcq(ibcq)->mcq.cons_index); - return 0; + return ret; } static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, @@ -757,6 +797,14 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, &cq->db); } +static void notify_soft_wc_handler(struct work_struct *work) +{ + struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, + notify_work); + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -807,6 +855,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, &index, &inlen); if (err) goto err_create; + + INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } cq->cqe_size = cqe_size; @@ -832,6 +882,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; + INIT_LIST_HEAD(&cq->wc_list); + if (context) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; @@ -1219,3 +1271,27 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) cq = to_mcq(ibcq); return cq->cqe_size; } + +/* Called from atomic context */ +int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) +{ + struct mlx5_ib_wc *soft_wc; + struct mlx5_ib_cq *cq = to_mcq(ibcq); + unsigned long flags; + + soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); + if (!soft_wc) + return -ENOMEM; + + soft_wc->wc = *wc; + spin_lock_irqsave(&cq->lock, flags); + list_add_tail(&soft_wc->list, &cq->wc_list); + if (cq->notify_flags == IB_CQ_NEXT_COMP || + wc->status != IB_WC_SUCCESS) { + cq->notify_flags = 0; + schedule_work(&cq->notify_work); + } + spin_unlock_irqrestore(&cq->lock, flags); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c new file mode 100644 index 000000000000..53e03c8ede79 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx5_ib.h" + +struct mlx5_ib_gsi_wr { + struct ib_cqe cqe; + struct ib_wc wc; + int send_flags; + bool completed:1; +}; + +struct mlx5_ib_gsi_qp { + struct ib_qp ibqp; + struct ib_qp *rx_qp; + u8 port_num; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + /* Serialize qp state modifications */ + struct mutex mutex; + struct ib_cq *cq; + struct mlx5_ib_gsi_wr *outstanding_wrs; + u32 outstanding_pi, outstanding_ci; + int num_qps; + /* Protects access to the tx_qps. Post send operations synchronize + * with tx_qp creation in setup_qp(). Also protects the + * outstanding_wrs array and indices. + */ + spinlock_t lock; + struct ib_qp **tx_qps; +}; + +static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) +{ + return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); +} + +static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); +} + +static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index) +{ + return ++index % gsi->cap.max_send_wr; +} + +#define for_each_outstanding_wr(gsi, index) \ + for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \ + index = next_outstanding(gsi, index)) + +/* Call with gsi->lock locked */ +static void generate_completions(struct mlx5_ib_gsi_qp *gsi) +{ + struct ib_cq *gsi_cq = gsi->ibqp.send_cq; + struct mlx5_ib_gsi_wr *wr; + u32 index; + + for_each_outstanding_wr(gsi, index) { + wr = &gsi->outstanding_wrs[index]; + + if (!wr->completed) + break; + + if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || + wr->send_flags & IB_SEND_SIGNALED) + WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); + + wr->completed = false; + } + + gsi->outstanding_ci = index; +} + +static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_gsi_qp *gsi = cq->cq_context; + struct mlx5_ib_gsi_wr *wr = + container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); + u64 wr_id; + unsigned long flags; + + spin_lock_irqsave(&gsi->lock, flags); + wr->completed = true; + wr_id = wr->wc.wr_id; + wr->wc = *wc; + wr->wc.wr_id = wr_id; + wr->wc.qp = &gsi->ibqp; + + generate_completions(gsi); + spin_unlock_irqrestore(&gsi->lock, flags); +} + +struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_gsi_qp *gsi; + struct ib_qp_init_attr hw_init_attr = *init_attr; + const u8 port_num = init_attr->port_num; + const int num_pkeys = pd->device->attrs.max_pkeys; + const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; + int ret; + + mlx5_ib_dbg(dev, "creating GSI QP\n"); + + if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) { + mlx5_ib_warn(dev, + "invalid port number %d during GSI QP creation\n", + port_num); + return ERR_PTR(-EINVAL); + } + + gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); + if (!gsi) + return ERR_PTR(-ENOMEM); + + gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); + if (!gsi->tx_qps) { + ret = -ENOMEM; + goto err_free; + } + + gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, + sizeof(*gsi->outstanding_wrs), + GFP_KERNEL); + if (!gsi->outstanding_wrs) { + ret = -ENOMEM; + goto err_free_tx; + } + + mutex_init(&gsi->mutex); + + mutex_lock(&dev->devr.mutex); + + if (dev->devr.ports[port_num - 1].gsi) { + mlx5_ib_warn(dev, "GSI QP already exists on port %d\n", + port_num); + ret = -EBUSY; + goto err_free_wrs; + } + gsi->num_qps = num_qps; + spin_lock_init(&gsi->lock); + + gsi->cap = init_attr->cap; + gsi->sq_sig_type = init_attr->sq_sig_type; + gsi->ibqp.qp_num = 1; + gsi->port_num = port_num; + + gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, + IB_POLL_SOFTIRQ); + if (IS_ERR(gsi->cq)) { + mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", + PTR_ERR(gsi->cq)); + ret = PTR_ERR(gsi->cq); + goto err_free_wrs; + } + + hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI; + hw_init_attr.send_cq = gsi->cq; + if (num_qps) { + hw_init_attr.cap.max_send_wr = 0; + hw_init_attr.cap.max_send_sge = 0; + hw_init_attr.cap.max_inline_data = 0; + } + gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); + if (IS_ERR(gsi->rx_qp)) { + mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", + PTR_ERR(gsi->rx_qp)); + ret = PTR_ERR(gsi->rx_qp); + goto err_destroy_cq; + } + + dev->devr.ports[init_attr->port_num - 1].gsi = gsi; + + mutex_unlock(&dev->devr.mutex); + + return &gsi->ibqp; + +err_destroy_cq: + ib_free_cq(gsi->cq); +err_free_wrs: + mutex_unlock(&dev->devr.mutex); + kfree(gsi->outstanding_wrs); +err_free_tx: + kfree(gsi->tx_qps); +err_free: + kfree(gsi); + return ERR_PTR(ret); +} + +int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + const int port_num = gsi->port_num; + int qp_index; + int ret; + + mlx5_ib_dbg(dev, "destroying GSI QP\n"); + + mutex_lock(&dev->devr.mutex); + ret = ib_destroy_qp(gsi->rx_qp); + if (ret) { + mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", + ret); + mutex_unlock(&dev->devr.mutex); + return ret; + } + dev->devr.ports[port_num - 1].gsi = NULL; + mutex_unlock(&dev->devr.mutex); + gsi->rx_qp = NULL; + + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) { + if (!gsi->tx_qps[qp_index]) + continue; + WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index])); + gsi->tx_qps[qp_index] = NULL; + } + + ib_free_cq(gsi->cq); + + kfree(gsi->outstanding_wrs); + kfree(gsi->tx_qps); + kfree(gsi); + + return 0; +} + +static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) +{ + struct ib_pd *pd = gsi->rx_qp->pd; + struct ib_qp_init_attr init_attr = { + .event_handler = gsi->rx_qp->event_handler, + .qp_context = gsi->rx_qp->qp_context, + .send_cq = gsi->cq, + .recv_cq = gsi->rx_qp->recv_cq, + .cap = { + .max_send_wr = gsi->cap.max_send_wr, + .max_send_sge = gsi->cap.max_send_sge, + .max_inline_data = gsi->cap.max_inline_data, + }, + .sq_sig_type = gsi->sq_sig_type, + .qp_type = IB_QPT_UD, + .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + }; + + return ib_create_qp(pd, &init_attr); +} + +static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, + u16 qp_index) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct ib_qp_attr attr; + int mask; + int ret; + + mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; + attr.qp_state = IB_QPS_INIT; + attr.pkey_index = qp_index; + attr.qkey = IB_QP1_QKEY; + attr.port_num = gsi->port_num; + ret = ib_modify_qp(qp, &attr, mask); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n", + qp->qp_num, ret); + return ret; + } + + attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n", + qp->qp_num, ret); + return ret; + } + + attr.qp_state = IB_QPS_RTS; + attr.sq_psn = 0; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n", + qp->qp_num, ret); + return ret; + } + + return 0; +} + +static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) +{ + struct ib_device *device = gsi->rx_qp->device; + struct mlx5_ib_dev *dev = to_mdev(device); + struct ib_qp *qp; + unsigned long flags; + u16 pkey; + int ret; + + ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); + if (ret) { + mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", + gsi->port_num, qp_index); + return; + } + + if (!pkey) { + mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n", + gsi->port_num, qp_index); + return; + } + + spin_lock_irqsave(&gsi->lock, flags); + qp = gsi->tx_qps[qp_index]; + spin_unlock_irqrestore(&gsi->lock, flags); + if (qp) { + mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n", + gsi->port_num, qp_index); + return; + } + + qp = create_gsi_ud_qp(gsi); + if (IS_ERR(qp)) { + mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n", + PTR_ERR(qp)); + return; + } + + ret = modify_to_rts(gsi, qp, qp_index); + if (ret) + goto err_destroy_qp; + + spin_lock_irqsave(&gsi->lock, flags); + WARN_ON_ONCE(gsi->tx_qps[qp_index]); + gsi->tx_qps[qp_index] = qp; + spin_unlock_irqrestore(&gsi->lock, flags); + + return; + +err_destroy_qp: + WARN_ON_ONCE(qp); +} + +static void setup_qps(struct mlx5_ib_gsi_qp *gsi) +{ + u16 qp_index; + + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) + setup_qp(gsi, qp_index); +} + +int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + int ret; + + mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); + + mutex_lock(&gsi->mutex); + ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); + if (ret) { + mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); + goto unlock; + } + + if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) + setup_qps(gsi); + +unlock: + mutex_unlock(&gsi->mutex); + + return ret; +} + +int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + int ret; + + mutex_lock(&gsi->mutex); + ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); + qp_init_attr->cap = gsi->cap; + mutex_unlock(&gsi->mutex); + + return ret; +} + +/* Call with gsi->lock locked */ +static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, + struct ib_ud_wr *wr, struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + struct mlx5_ib_gsi_wr *gsi_wr; + + if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) { + mlx5_ib_warn(dev, "no available GSI work request.\n"); + return -ENOMEM; + } + + gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi]; + gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi); + + if (!wc) { + memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); + gsi_wr->wc.pkey_index = wr->pkey_index; + gsi_wr->wc.wr_id = wr->wr.wr_id; + } else { + gsi_wr->wc = *wc; + gsi_wr->completed = true; + } + + gsi_wr->cqe.done = &handle_single_completion; + wr->wr.wr_cqe = &gsi_wr->cqe; + + return 0; +} + +/* Call with gsi->lock locked */ +static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, + struct ib_ud_wr *wr) +{ + struct ib_wc wc = { + { .wr_id = wr->wr.wr_id }, + .status = IB_WC_SUCCESS, + .opcode = IB_WC_SEND, + .qp = &gsi->ibqp, + }; + int ret; + + ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); + if (ret) + return ret; + + generate_completions(gsi); + + return 0; +} + +/* Call with gsi->lock locked */ +static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) +{ + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + int qp_index = wr->pkey_index; + + if (!mlx5_ib_deth_sqpn_cap(dev)) + return gsi->rx_qp; + + if (qp_index >= gsi->num_qps) + return NULL; + + return gsi->tx_qps[qp_index]; +} + +int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct ib_qp *tx_qp; + unsigned long flags; + int ret; + + for (; wr; wr = wr->next) { + struct ib_ud_wr cur_wr = *ud_wr(wr); + + cur_wr.wr.next = NULL; + + spin_lock_irqsave(&gsi->lock, flags); + tx_qp = get_tx_qp(gsi, &cur_wr); + if (!tx_qp) { + ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); + if (ret) + goto err; + spin_unlock_irqrestore(&gsi->lock, flags); + continue; + } + + ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); + if (ret) + goto err; + + ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); + if (ret) { + /* Undo the effect of adding the outstanding wr */ + gsi->outstanding_pi = (gsi->outstanding_pi - 1) % + gsi->cap.max_send_wr; + goto err; + } + spin_unlock_irqrestore(&gsi->lock, flags); + } + + return 0; + +err: + spin_unlock_irqrestore(&gsi->lock, flags); + *bad_wr = wr; + return ret; +} + +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + + return ib_post_recv(gsi->rx_qp, wr, bad_wr); +} + +void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) +{ + if (!gsi) + return; + + mutex_lock(&gsi->mutex); + setup_qps(gsi); + mutex_unlock(&gsi->mutex); +} diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index b84d13a487cc..41d8a0036465 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -31,8 +31,10 @@ */ #include <linux/mlx5/cmd.h> +#include <linux/mlx5/vport.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> +#include <rdma/ib_pma.h> #include "mlx5_ib.h" enum { @@ -57,20 +59,12 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); } -int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { u16 slid; int err; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); @@ -117,6 +111,156 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, + void *out) +{ +#define MLX5_SUM_CNT(p, cntr1, cntr2) \ + (MLX5_GET64(query_vport_counter_out, p, cntr1) + \ + MLX5_GET64(query_vport_counter_out, p, cntr2)) + + pma_cnt_ext->port_xmit_data = + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets, + transmitted_ib_multicast.octets) >> 2); + pma_cnt_ext->port_xmit_data = + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets, + received_ib_multicast.octets) >> 2); + pma_cnt_ext->port_xmit_packets = + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets, + transmitted_ib_multicast.packets)); + pma_cnt_ext->port_rcv_packets = + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets, + received_ib_multicast.packets)); + pma_cnt_ext->port_unicast_xmit_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, transmitted_ib_unicast.packets); + pma_cnt_ext->port_unicast_rcv_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, received_ib_unicast.packets); + pma_cnt_ext->port_multicast_xmit_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, transmitted_ib_multicast.packets); + pma_cnt_ext->port_multicast_rcv_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, received_ib_multicast.packets); +} + +static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, + void *out) +{ + /* Traffic counters will be reported in + * their 64bit form via ib_pma_portcounters_ext by default. + */ + void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out, + counter_set); + +#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name) { \ + counter_var = MLX5_GET_BE(typeof(counter_var), \ + ib_port_cntrs_grp_data_layout, \ + out_pma, counter_name); \ + } + + MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter, + symbol_error_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter, + link_error_recovery_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter, + link_downed_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors, + port_rcv_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors, + port_rcv_remote_physical_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors, + port_rcv_switch_relay_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards, + port_xmit_discards); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors, + port_xmit_constraint_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors, + port_rcv_constraint_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors, + link_overrun_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped, + vl_15_dropped); +} + +static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, + const struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + int err; + void *out_cnt; + + /* Decalring support of extended counters */ + if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { + struct ib_class_port_info cpi = {}; + + cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + memcpy((out_mad->data + 40), &cpi, sizeof(cpi)); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + } + + if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) { + struct ib_pma_portcounters_ext *pma_cnt_ext = + (struct ib_pma_portcounters_ext *)(out_mad->data + 40); + int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); + + out_cnt = mlx5_vzalloc(sz); + if (!out_cnt) + return IB_MAD_RESULT_FAILURE; + + err = mlx5_core_query_vport_counter(dev->mdev, 0, + port_num, out_cnt, sz); + if (!err) + pma_cnt_ext_assign(pma_cnt_ext, out_cnt); + } else { + struct ib_pma_portcounters *pma_cnt = + (struct ib_pma_portcounters *)(out_mad->data + 40); + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + out_cnt = mlx5_vzalloc(sz); + if (!out_cnt) + return IB_MAD_RESULT_FAILURE; + + err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num, + out_cnt, sz); + if (!err) + pma_cnt_assign(pma_cnt, out_cnt); + } + + kvfree(out_cnt); + if (err) + return IB_MAD_RESULT_FAILURE; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; + + memset(out_mad->data, 0, sizeof(out_mad->data)); + + if (MLX5_CAP_GEN(mdev, vport_counters) && + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { + return process_pma_cmd(ibdev, port_num, in_mad, out_mad); + } else { + return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in_mad, out_mad); + } +} + int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_smp *in_mad = NULL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 03c418ccbc98..5afbb697e691 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -487,6 +487,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; + if (MLX5_CAP_GEN(mdev, imaicl)) { + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_WINDOW_TYPE_2B; + props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + /* We support 'Gappy' memory registration too */ + props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; + } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; @@ -504,6 +511,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, (MLX5_CAP_ETH(dev->mdev, csum_cap))) props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + props->device_cap_flags |= IB_DEVICE_UD_TSO; + } + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; @@ -529,7 +541,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; - props->max_fast_reg_page_list_len = (unsigned int)-1; + props->max_fast_reg_page_list_len = + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); get_atomic_caps(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); @@ -1369,11 +1382,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) return 0; } +static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) +{ + priority *= 2; + if (!dont_trap) + priority++; + return priority; +} + #define MLX5_FS_MAX_TYPES 10 #define MLX5_FS_MAX_ENTRIES 32000UL static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, struct ib_flow_attr *flow_attr) { + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_table *ft; @@ -1383,10 +1405,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int err = 0; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (flow_is_multicast_only(flow_attr)) + if (flow_is_multicast_only(flow_attr) && + !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else - priority = flow_attr->priority; + priority = ib_prio_to_core_prio(flow_attr->priority, + dont_trap); ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); num_entries = MLX5_FS_MAX_ENTRIES; @@ -1434,6 +1458,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, unsigned int spec_index; u32 *match_c; u32 *match_v; + u32 action; int err = 0; if (!is_valid_attr(flow_attr)) @@ -1459,9 +1484,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, /* Outer header support only */ match_criteria_enable = (!outer_header_zero(match_c)) << 0; + action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_c, match_v, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + action, MLX5_FS_DEFAULT_FLOW_TAG, dst); @@ -1481,6 +1508,29 @@ free: return err ? ERR_PTR(err) : handler; } +static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_dst = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); + if (!IS_ERR(handler)) { + handler_dst = create_flow_rule(dev, ft_prio, + flow_attr, dst); + if (IS_ERR(handler_dst)) { + mlx5_del_flow_rule(handler->rule); + kfree(handler); + handler = handler_dst; + } else { + list_add(&handler_dst->list, &handler->list); + } + } + + return handler; +} enum { LEFTOVERS_MC, LEFTOVERS_UC, @@ -1558,7 +1608,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, if (domain != IB_FLOW_DOMAIN_USER || flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || - flow_attr->flags) + (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) return ERR_PTR(-EINVAL); dst = kzalloc(sizeof(*dst), GFP_KERNEL); @@ -1577,8 +1627,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - handler = create_flow_rule(dev, ft_prio, flow_attr, - dst); + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { + handler = create_dont_trap_rule(dev, ft_prio, + flow_attr, dst); + } else { + handler = create_flow_rule(dev, ft_prio, flow_attr, + dst); + } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, @@ -1716,6 +1771,17 @@ static struct device_attribute *mlx5_class_attributes[] = { &dev_attr_reg_pages, }; +static void pkey_change_handler(struct work_struct *work) +{ + struct mlx5_ib_port_resources *ports = + container_of(work, struct mlx5_ib_port_resources, + pkey_change_work); + + mutex_lock(&ports->devr->mutex); + mlx5_ib_gsi_pkey_change(ports->gsi); + mutex_unlock(&ports->devr->mutex); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -1752,6 +1818,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, case MLX5_DEV_EVENT_PKEY_CHANGE: ibev.event = IB_EVENT_PKEY_CHANGE; port = (u8)param; + + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; case MLX5_DEV_EVENT_GUID_CHANGE: @@ -1838,7 +1906,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); mlx5_ib_destroy_qp(dev->umrc.qp); - ib_destroy_cq(dev->umrc.cq); + ib_free_cq(dev->umrc.cq); ib_dealloc_pd(dev->umrc.pd); } @@ -1853,7 +1921,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev) struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - struct ib_cq_init_attr cq_attr = {}; int ret; attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -1870,15 +1937,12 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_0; } - cq_attr.cqe = 128; - cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, - &cq_attr); + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); goto error_2; } - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); init_attr->send_cq = cq; init_attr->recv_cq = cq; @@ -1945,7 +2009,7 @@ error_4: mlx5_ib_destroy_qp(qp); error_3: - ib_destroy_cq(cq); + ib_free_cq(cq); error_2: ib_dealloc_pd(pd); @@ -1961,10 +2025,13 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; + int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); + mutex_init(&devr->mutex); + devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->p0)) { ret = PTR_ERR(devr->p0); @@ -2052,6 +2119,12 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { + INIT_WORK(&devr->ports[port].pkey_change_work, + pkey_change_handler); + devr->ports[port].devr = devr; + } + return 0; error5: @@ -2070,12 +2143,20 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { + struct mlx5_ib_dev *dev = + container_of(devr, struct mlx5_ib_dev, devr); + int port; + mlx5_ib_destroy_srq(devr->s1); mlx5_ib_destroy_srq(devr->s0); mlx5_ib_dealloc_xrcd(devr->x0); mlx5_ib_dealloc_xrcd(devr->x1); mlx5_ib_destroy_cq(devr->c0); mlx5_ib_dealloc_pd(devr->p0); + + /* Make sure no change P_Key work items are still executing */ + for (port = 0; port < dev->num_ports; ++port) + cancel_work_sync(&devr->ports[port].pkey_change_work); } static u32 get_core_cap_flags(struct ib_device *ibdev) @@ -2198,6 +2279,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | @@ -2258,6 +2340,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; + dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; @@ -2269,6 +2352,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + if (MLX5_CAP_GEN(mdev, imaicl)) { + dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; + dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + } + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d2b9737baa36..76b2b42e0535 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -43,6 +43,7 @@ #include <linux/mlx5/srq.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> +#include <rdma/ib_user_verbs.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -126,7 +127,7 @@ struct mlx5_ib_pd { }; #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) -#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) +#define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1) #if (MLX5_IB_FLOW_LAST_PRIO <= 0) #error "Invalid number of bypass priorities" #endif @@ -162,9 +163,31 @@ struct mlx5_ib_flow_db { #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) + +#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3) +#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4) +#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END + #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 +/* + * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI + * creates the actual hardware QP. + */ +#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 +/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. + * + * These flags are intended for internal use by the mlx5_ib driver, and they + * rely on the range reserved for that use in the ib_qp_create_flags enum. + */ + +/* Create a UD QP whose source QP number is 1 */ +static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) +{ + return IB_QP_CREATE_RESERVED_START; +} + struct wr_list { u16 opcode; u16 next; @@ -325,11 +348,14 @@ struct mlx5_ib_cq_buf { }; enum mlx5_ib_qp_flags { - MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, - MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, - MLX5_IB_QP_CROSS_CHANNEL = 1 << 2, - MLX5_IB_QP_MANAGED_SEND = 1 << 3, - MLX5_IB_QP_MANAGED_RECV = 1 << 4, + MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, + MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, + MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, + MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, + MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, + /* QP uses 1 as its source QP number */ + MLX5_IB_QP_SQPN_QP1 = 1 << 6, }; struct mlx5_umr_wr { @@ -373,6 +399,14 @@ struct mlx5_ib_cq { struct ib_umem *resize_umem; int cqe_size; u32 create_flags; + struct list_head wc_list; + enum ib_cq_notify_flags notify_flags; + struct work_struct notify_work; +}; + +struct mlx5_ib_wc { + struct ib_wc wc; + struct list_head list; }; struct mlx5_ib_srq { @@ -413,7 +447,8 @@ struct mlx5_ib_mr { int ndescs; int max_descs; int desc_size; - struct mlx5_core_mr mmr; + int access_mode; + struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; @@ -425,19 +460,20 @@ struct mlx5_ib_mr { struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; + int access_flags; /* Needed for rereg MR */ +}; + +struct mlx5_ib_mw { + struct ib_mw ibmw; + struct mlx5_core_mkey mmkey; }; struct mlx5_ib_umr_context { + struct ib_cqe cqe; enum ib_wc_status status; struct completion done; }; -static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) -{ - context->status = -1; - init_completion(&context->done); -} - struct umr_common { struct ib_pd *pd; struct ib_cq *cq; @@ -487,6 +523,14 @@ struct mlx5_mr_cache { unsigned long last_add; }; +struct mlx5_ib_gsi_qp; + +struct mlx5_ib_port_resources { + struct mlx5_ib_resources *devr; + struct mlx5_ib_gsi_qp *gsi; + struct work_struct pkey_change_work; +}; + struct mlx5_ib_resources { struct ib_cq *c0; struct ib_xrcd *x0; @@ -494,6 +538,9 @@ struct mlx5_ib_resources { struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; + struct mlx5_ib_port_resources ports[2]; + /* Protects changes to the port resources */ + struct mutex mutex; }; struct mlx5_roce { @@ -558,9 +605,9 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } -static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) { - return container_of(mmr, struct mlx5_ib_mr, mmr); + return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) @@ -588,6 +635,11 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx5_ib_mr, ibmr); } +static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx5_ib_mw, ibmw); +} + struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; @@ -648,8 +700,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); +int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap); +int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -700,7 +758,6 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); -void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); @@ -739,6 +796,23 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int index); +/* GSI QP helper functions */ +struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr); +int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); +int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask); +int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); + +int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -758,7 +832,7 @@ static inline u8 convert_access(int acc) static inline int is_qp1(enum ib_qp_type qp_type) { - return qp_type == IB_QPT_GSI; + return qp_type == MLX5_IB_QPT_HW_GSI; } #define MLX5_MAX_UMR_SHIFT 16 diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6000f7aeede9..4d5bff151cdf 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -40,6 +40,7 @@ #include <rdma/ib_umem_odp.h> #include <rdma/ib_verbs.h> #include "mlx5_ib.h" +#include "user.h" enum { MAX_PENDING_REG_MR = 8, @@ -57,7 +58,7 @@ static int clean_mr(struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); + int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ @@ -77,6 +78,40 @@ static int order2idx(struct mlx5_ib_dev *dev, int order) return order - cache->ent[0].order; } +static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) +{ + return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= + length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); +} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static void update_odp_mr(struct mlx5_ib_mr *mr) +{ + if (mr->umem->odp_data) { + /* + * This barrier prevents the compiler from moving the + * setting of umem->odp_data->private to point to our + * MR, before reg_umr finished, to ensure that the MR + * initialization have finished before starting to + * handle invalidations. + */ + smp_wmb(); + mr->umem->odp_data->private = mr; + /* + * Make sure we will see the new + * umem->odp_data->private value in the invalidation + * routines, before we can get page faults on the + * MR. Page faults can happen once we put the MR in + * the tree, below this line. Without the barrier, + * there can be a fault handling and an invalidation + * before umem->odp_data->private == mr is visible to + * the invalidation handler. + */ + smp_wmb(); + } +} +#endif + static void reg_mr_callback(int status, void *context) { struct mlx5_ib_mr *mr = context; @@ -86,7 +121,7 @@ static void reg_mr_callback(int status, void *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; int err; spin_lock_irqsave(&ent->lock, flags); @@ -113,7 +148,7 @@ static void reg_mr_callback(int status, void *context) spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; cache->last_add = jiffies; @@ -124,10 +159,10 @@ static void reg_mr_callback(int status, void *context) spin_unlock_irqrestore(&ent->lock, flags); write_lock_irqsave(&table->lock, flags); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), - &mr->mmr); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey); if (err) - pr_err("Error inserting to mr tree. 0x%x\n", -err); + pr_err("Error inserting to mkey tree. 0x%x\n", -err); write_unlock_irqrestore(&table->lock, flags); } @@ -168,7 +203,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), reg_mr_callback, mr, &mr->out); if (err) { @@ -657,14 +692,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); seg->start_addr = 0; - err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, NULL); if (err) goto err_in; kfree(in); - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; return &mr->ibmr; @@ -693,10 +728,40 @@ static int use_umr(int order) return order <= MLX5_MAX_UMR_SHIFT; } -static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, - struct ib_sge *sg, u64 dma, int n, u32 key, - int page_shift, u64 virt_addr, u64 len, - int access_flags) +static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int npages, int page_shift, int *size, + __be64 **mr_pas, dma_addr_t *dma) +{ + __be64 *pas; + struct device *ddev = dev->ib_dev.dma_device; + + /* + * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the pas array, we allocate + * a little more. + */ + *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); + *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); + if (!(*mr_pas)) + return -ENOMEM; + + pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); + mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); + /* Clear padding after the actual pages. */ + memset(pas + npages, 0, *size - npages * sizeof(u64)); + + *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, *dma)) { + kfree(*mr_pas); + return -ENOMEM; + } + + return 0; +} + +static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -706,7 +771,6 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, sg->lkey = dev->umrc.pd->local_dma_lkey; wr->next = NULL; - wr->send_flags = 0; wr->sg_list = sg; if (n) wr->num_sge = 1; @@ -718,6 +782,19 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, umrwr->npages = n; umrwr->page_shift = page_shift; umrwr->mkey = key; +} + +static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift, u64 virt_addr, u64 len, + int access_flags) +{ + struct mlx5_umr_wr *umrwr = umr_wr(wr); + + prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); + + wr->send_flags = 0; + umrwr->target.virt_addr = virt_addr; umrwr->length = len; umrwr->access_flags = access_flags; @@ -734,26 +811,45 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, umrwr->mkey = key; } -void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) +static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, + int access_flags, int *npages, + int *page_shift, int *ncont, int *order) { - struct mlx5_ib_umr_context *context; - struct ib_wc wc; - int err; - - while (1) { - err = ib_poll_cq(cq, 1, &wc); - if (err < 0) { - pr_warn("poll cq error %d\n", err); - return; - } - if (err == 0) - break; + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + if (IS_ERR(umem)) { + mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); + return (void *)umem; + } - context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; - context->status = wc.status; - complete(&context->done); + mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); + if (!*npages) { + mlx5_ib_warn(dev, "avoid zero region\n"); + ib_umem_release(umem); + return ERR_PTR(-EINVAL); } - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", + *npages, *ncont, *order, *page_shift); + + return umem; +} + +static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_umr_context *context = + container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); + + context->status = wc->status; + complete(&context->done); +} + +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ + context->cqe.done = mlx5_ib_umr_done; + context->status = -1; + init_completion(&context->done); } static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, @@ -764,13 +860,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct mlx5_umr_wr umrwr; + struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; int size; __be64 *mr_pas; - __be64 *pas; dma_addr_t dma; int err = 0; int i; @@ -790,33 +885,17 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (!mr) return ERR_PTR(-EAGAIN); - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the pas array, we allocate - * a little more. */ - size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); - mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); - if (!mr_pas) { - err = -ENOMEM; + err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, + &dma); + if (err) goto free_mr; - } - pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); - mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); - /* Clear padding after the actual pages. */ - memset(pas + npages, 0, size - npages * sizeof(u64)); - - dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - err = -ENOMEM; - goto free_pas; - } + mlx5_ib_init_umr_context(&umr_context); - memset(&umrwr, 0, sizeof(umrwr)); - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + umrwr.wr.wr_cqe = &umr_context.cqe; + prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift, virt_addr, len, access_flags); - mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { @@ -830,9 +909,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, } } - mr->mmr.iova = virt_addr; - mr->mmr.size = len; - mr->mmr.pd = to_mpd(pd)->pdn; + mr->mmkey.iova = virt_addr; + mr->mmkey.size = len; + mr->mmkey.pd = to_mpd(pd)->pdn; mr->live = 1; @@ -840,7 +919,6 @@ unmap_dma: up(&umrc->sem); dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); -free_pas: kfree(mr_pas); free_mr: @@ -929,8 +1007,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + mlx5_ib_init_umr_context(&umr_context); + memset(&wr, 0, sizeof(wr)); - wr.wr.wr_id = (u64)(unsigned long)&umr_context; + wr.wr.wr_cqe = &umr_context.cqe; sg.addr = dma; sg.length = ALIGN(npages * sizeof(u64), @@ -944,10 +1024,9 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, wr.wr.opcode = MLX5_IB_WR_UMR; wr.npages = sg.length / sizeof(u64); wr.page_shift = PAGE_SHIFT; - wr.mkey = mr->mmr.key; + wr.mkey = mr->mmkey.key; wr.target.offset = start_page_index; - mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); err = ib_post_send(umrc->qp, &wr.wr, &bad); if (err) { @@ -974,10 +1053,14 @@ free_pas: } #endif -static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, - u64 length, struct ib_umem *umem, - int npages, int page_shift, - int access_flags) +/* + * If ibmr is NULL it will be allocated by reg_create. + * Else, the given ibmr will be used. + */ +static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, + u64 virt_addr, u64 length, + struct ib_umem *umem, int npages, + int page_shift, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_create_mkey_mbox_in *in; @@ -986,7 +1069,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -1013,7 +1096,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL, NULL, NULL); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); @@ -1024,7 +1107,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, mr->live = 1; kvfree(in); - mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); + mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); return mr; @@ -1032,11 +1115,23 @@ err_2: kvfree(in); err_1: - kfree(mr); + if (!ibmr) + kfree(mr); return ERR_PTR(err); } +static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + int npages, u64 length, int access_flags) +{ + mr->npages = npages; + atomic_add(npages, &dev->mdev->priv.reg_pages); + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->ibmr.length = length; + mr->access_flags = access_flags; +} + struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -1052,22 +1147,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - umem = ib_umem_get(pd->uobject->context, start, length, access_flags, - 0); - if (IS_ERR(umem)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); - return (void *)umem; - } + umem = mr_umem_get(pd, start, length, access_flags, &npages, + &page_shift, &ncont, &order); - mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); - if (!npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - err = -EINVAL; - goto error; - } - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - npages, ncont, order, page_shift); + if (IS_ERR(umem)) + return (void *)umem; if (use_umr(order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, @@ -1083,45 +1167,21 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } if (!mr) - mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, - access_flags); + mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, + page_shift, access_flags); if (IS_ERR(mr)) { err = PTR_ERR(mr); goto error; } - mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + set_mr_fileds(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem->odp_data) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - mr->umem->odp_data->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } + update_odp_mr(mr); #endif return &mr->ibmr; @@ -1135,15 +1195,15 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct mlx5_umr_wr umrwr; + struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; int err; - memset(&umrwr.wr, 0, sizeof(umrwr)); - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); - mlx5_ib_init_umr_context(&umr_context); + + umrwr.wr.wr_cqe = &umr_context.cqe; + prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); + down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { @@ -1165,6 +1225,167 @@ error: return err; } +static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, + u64 length, int npages, int page_shift, int order, + int access_flags, int flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_ib_umr_context umr_context; + struct ib_send_wr *bad; + struct mlx5_umr_wr umrwr = {}; + struct ib_sge sg; + struct umr_common *umrc = &dev->umrc; + dma_addr_t dma = 0; + __be64 *mr_pas = NULL; + int size; + int err; + + mlx5_ib_init_umr_context(&umr_context); + + umrwr.wr.wr_cqe = &umr_context.cqe; + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; + + if (flags & IB_MR_REREG_TRANS) { + err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, + &mr_pas, &dma); + if (err) + return err; + + umrwr.target.virt_addr = virt_addr; + umrwr.length = length; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + } + + prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, + page_shift); + + if (flags & IB_MR_REREG_PD) { + umrwr.pd = pd; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; + } + + if (flags & IB_MR_REREG_ACCESS) { + umrwr.access_flags = access_flags; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; + } + + /* post send request to UMR QP */ + down(&umrc->sem); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); + + if (err) { + mlx5_ib_warn(dev, "post send failed, err %d\n", err); + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed (%u)\n", + umr_context.status); + err = -EFAULT; + } + } + + up(&umrc->sem); + if (flags & IB_MR_REREG_TRANS) { + dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + kfree(mr_pas); + } + return err; +} + +int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int new_access_flags, + struct ib_pd *new_pd, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); + struct mlx5_ib_mr *mr = to_mmr(ib_mr); + struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; + int access_flags = flags & IB_MR_REREG_ACCESS ? + new_access_flags : + mr->access_flags; + u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; + u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; + int page_shift = 0; + int npages = 0; + int ncont = 0; + int order = 0; + int err; + + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, virt_addr, length, access_flags); + + if (flags != IB_MR_REREG_PD) { + /* + * Replace umem. This needs to be done whether or not UMR is + * used. + */ + flags |= IB_MR_REREG_TRANS; + ib_umem_release(mr->umem); + mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, + &page_shift, &ncont, &order); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + mr->umem = NULL; + return err; + } + } + + if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { + /* + * UMR can't be used - MKey needs to be replaced. + */ + if (mr->umred) { + err = unreg_umr(dev, mr); + if (err) + mlx5_ib_warn(dev, "Failed to unregister MR\n"); + } else { + err = destroy_mkey(dev, mr); + if (err) + mlx5_ib_warn(dev, "Failed to destroy MKey\n"); + } + if (err) + return err; + + mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, + page_shift, access_flags); + + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->umred = 0; + } else { + /* + * Send a UMR WQE + */ + err = rereg_umr(pd, mr, addr, len, npages, page_shift, + order, access_flags, flags); + if (err) { + mlx5_ib_warn(dev, "Failed to rereg UMR\n"); + return err; + } + } + + if (flags & IB_MR_REREG_PD) { + ib_mr->pd = pd; + mr->mmkey.pd = to_mpd(pd)->pdn; + } + + if (flags & IB_MR_REREG_ACCESS) + mr->access_flags = access_flags; + + if (flags & IB_MR_REREG_TRANS) { + atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + set_mr_fileds(dev, mr, npages, len, access_flags); + mr->mmkey.iova = addr; + mr->mmkey.size = len; + } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + update_odp_mr(mr); +#endif + + return 0; +} + static int mlx5_alloc_priv_descs(struct ib_device *device, struct mlx5_ib_mr *mr, @@ -1236,7 +1457,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - mr->mmr.key, err); + mr->mmkey.key, err); return err; } } else { @@ -1300,8 +1521,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; - int access_mode, err; - int ndescs = roundup(max_num_sg, 4); + int ndescs = ALIGN(max_num_sg, 4); + int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -1319,7 +1540,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); if (mr_type == IB_MR_TYPE_MEM_REG) { - access_mode = MLX5_ACCESS_MODE_MTT; + mr->access_mode = MLX5_ACCESS_MODE_MTT; in->seg.log2_page_size = PAGE_SHIFT; err = mlx5_alloc_priv_descs(pd->device, mr, @@ -1329,6 +1550,15 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->desc_size = sizeof(u64); mr->max_descs = ndescs; + } else if (mr_type == IB_MR_TYPE_SG_GAPS) { + mr->access_mode = MLX5_ACCESS_MODE_KLM; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(struct mlx5_klm)); + if (err) + goto err_free_in; + mr->desc_size = sizeof(struct mlx5_klm); + mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; @@ -1347,7 +1577,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free_sig; - access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_ACCESS_MODE_KLM; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; @@ -1361,14 +1591,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - in->seg.flags = MLX5_PERM_UMR_EN | access_mode; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), + in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode; + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, NULL); if (err) goto err_destroy_psv; - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; kfree(in); @@ -1395,6 +1625,88 @@ err_free: return ERR_PTR(err); } +struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in = NULL; + struct mlx5_ib_mw *mw = NULL; + int ndescs; + int err; + struct mlx5_ib_alloc_mw req = {}; + struct { + __u32 comp_mask; + __u32 response_length; + } resp = {}; + + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); + if (err) + return ERR_PTR(err); + + if (req.comp_mask || req.reserved1 || req.reserved2) + return ERR_PTR(-EOPNOTSUPP); + + if (udata->inlen > sizeof(req) && + !ib_is_udata_cleared(udata, sizeof(req), + udata->inlen - sizeof(req))) + return ERR_PTR(-EOPNOTSUPP); + + ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); + + mw = kzalloc(sizeof(*mw), GFP_KERNEL); + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!mw || !in) { + err = -ENOMEM; + goto free; + } + + in->seg.status = MLX5_MKEY_STATUS_FREE; + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM | + MLX5_PERM_LOCAL_READ; + if (type == IB_MW_TYPE_2) + in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto free; + + mw->ibmw.rkey = mw->mmkey.key; + + resp.response_length = min(offsetof(typeof(resp), response_length) + + sizeof(resp.response_length), udata->outlen); + if (resp.response_length) { + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) { + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); + goto free; + } + } + + kfree(in); + return &mw->ibmw; + +free: + kfree(mw); + kfree(in); + return ERR_PTR(err); +} + +int mlx5_ib_dealloc_mw(struct ib_mw *mw) +{ + struct mlx5_ib_mw *mmw = to_mmw(mw); + int err; + + err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, + &mmw->mmkey); + if (!err) + kfree(mmw); + return err; +} + int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status) { @@ -1436,6 +1748,32 @@ done: return ret; } +static int +mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, + struct scatterlist *sgl, + unsigned short sg_nents) +{ + struct scatterlist *sg = sgl; + struct mlx5_klm *klms = mr->descs; + u32 lkey = mr->ibmr.pd->local_dma_lkey; + int i; + + mr->ibmr.iova = sg_dma_address(sg); + mr->ibmr.length = 0; + mr->ndescs = sg_nents; + + for_each_sg(sgl, sg, sg_nents, i) { + if (unlikely(i > mr->max_descs)) + break; + klms[i].va = cpu_to_be64(sg_dma_address(sg)); + klms[i].bcount = cpu_to_be32(sg_dma_len(sg)); + klms[i].key = cpu_to_be32(lkey); + mr->ibmr.length += sg_dma_len(sg); + } + + return i; +} + static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -1463,7 +1801,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); - n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents); + else + n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, mr->desc_size * mr->max_descs, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b8d76361a48d..34e79e709c67 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -142,13 +142,13 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, u32 key) { u32 base_key = mlx5_base_mkey(key); - struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); - struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr); + struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key); + struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mmr || mmr->key != key || !mr->live) + if (!mmkey || mmkey->key != key || !mr->live) return NULL; - return container_of(mmr, struct mlx5_ib_mr, mmr); + return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, @@ -232,7 +232,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp, io_virt += pfault->mpfault.bytes_committed; bcnt -= pfault->mpfault.bytes_committed; - start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT; + start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; if (mr->umem->writable) access_mask |= ODP_WRITE_ALLOWED_BIT; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 34cb8e87c7b8..8dee8bc1e0fe 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -58,6 +58,7 @@ enum { static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_LSO] = MLX5_OPCODE_LSO, [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, @@ -72,6 +73,9 @@ static const u32 mlx5_ib_opcode[] = { [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; static int is_qp0(enum ib_qp_type qp_type) { @@ -260,11 +264,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, return 0; } -static int sq_overhead(enum ib_qp_type qp_type) +static int sq_overhead(struct ib_qp_init_attr *attr) { int size = 0; - switch (qp_type) { + switch (attr->qp_type) { case IB_QPT_XRC_INI: size += sizeof(struct mlx5_wqe_xrc_seg); /* fall through */ @@ -287,8 +291,12 @@ static int sq_overhead(enum ib_qp_type qp_type) break; case IB_QPT_UD: + if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) + size += sizeof(struct mlx5_wqe_eth_pad) + + sizeof(struct mlx5_wqe_eth_seg); + /* fall through */ case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_datagram_seg); break; @@ -311,7 +319,7 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) int inl_size = 0; int size; - size = sq_overhead(attr->qp_type); + size = sq_overhead(attr); if (size < 0) return size; @@ -348,8 +356,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return -EINVAL; } - qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - - sizeof(struct mlx5_wqe_inline_seg); + qp->max_inline_data = wqe_size - sq_overhead(attr) - + sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) @@ -590,7 +598,7 @@ static int to_mlx5_st(enum ib_qp_type type) case IB_QPT_XRC_INI: case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; case IB_QPT_SMI: return MLX5_QP_ST_QP0; - case IB_QPT_GSI: return MLX5_QP_ST_QP1; + case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; case IB_QPT_RAW_PACKET: case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; @@ -783,7 +791,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev->priv.uuari; - if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_QP_CREATE_IPOIB_UD_LSO | + mlx5_ib_create_qp_sqpn_qp1())) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -828,6 +839,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, (*in)->ctx.params1 |= cpu_to_be32(1 << 11); (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + (*in)->ctx.deth_sqpn = cpu_to_be32(1); + qp->flags |= MLX5_IB_QP_SQPN_QP1; + } + mlx5_fill_page_array(&qp->buf, (*in)->pas); err = mlx5_db_alloc(dev->mdev, &qp->db); @@ -1228,6 +1244,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) qp->flags |= MLX5_IB_QP_MANAGED_RECV; } + + if (init_attr->qp_type == IB_QPT_UD && + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) + if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { + mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); + return -EOPNOTSUPP; + } + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; @@ -1271,6 +1295,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, ucmd.sq_wqe_count, max_wqes); return -EINVAL; } + if (init_attr->create_flags & + mlx5_ib_create_qp_sqpn_qp1()) { + mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); + return -EINVAL; + } err = create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, &inlen, base); if (err) @@ -1385,6 +1414,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, /* 0xffffff means we ask to work with cqe version 0 */ MLX5_SET(qpc, qpc, user_index, uidx); } + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ + if (init_attr->qp_type == IB_QPT_UD && + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); + qp->flags |= MLX5_IB_QP_LSO; + } if (init_attr->qp_type == IB_QPT_RAW_PACKET) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; @@ -1494,7 +1530,7 @@ static void get_cqs(struct mlx5_ib_qp *qp, break; case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: @@ -1657,7 +1693,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: case MLX5_IB_QPT_REG_UMR: qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) @@ -1686,6 +1722,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, break; + case IB_QPT_GSI: + return mlx5_ib_gsi_create_qp(pd, init_attr); + case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_MAX: @@ -1704,6 +1743,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); + if (unlikely(qp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_destroy_qp(qp); + destroy_qp_common(dev, mqp); kfree(mqp); @@ -2161,8 +2203,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context = &in->ctx; err = to_mlx5_st(ibqp->qp_type); - if (err < 0) + if (err < 0) { + mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); goto out; + } context->flags = cpu_to_be32(err << 16); @@ -2182,7 +2226,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { @@ -2284,6 +2328,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); + if (qp->flags & MLX5_IB_QP_SQPN_QP1) + context->deth_sqpn = cpu_to_be32(1); mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); @@ -2363,11 +2409,18 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); + + qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? + IB_QPT_GSI : ibqp->qp_type; + mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -2378,32 +2431,46 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } - if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && - !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, - ll)) + if (qp_type != MLX5_IB_QPT_REG_UMR && + !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { + mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", + cur_state, new_state, ibqp->qp_type, attr_mask); goto out; + } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || - attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) + attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) { + mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n", + attr->port_num, dev->num_ports); goto out; + } if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) + dev->mdev->port_caps[port - 1].pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", + attr->pkey_index); goto out; + } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { + mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", + attr->max_rd_atomic); goto out; + } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { + mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", + attr->max_dest_rd_atomic); goto out; + } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; @@ -2442,6 +2509,59 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } +static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, + struct ib_send_wr *wr, void *qend, + struct mlx5_ib_qp *qp, int *size) +{ + void *seg = eseg; + + memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); + + if (wr->send_flags & IB_SEND_IP_CSUM) + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | + MLX5_ETH_WQE_L4_CSUM; + + seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; + + if (wr->opcode == IB_WR_LSO) { + struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); + int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start); + u64 left, leftlen, copysz; + void *pdata = ud_wr->header; + + left = ud_wr->hlen; + eseg->mss = cpu_to_be16(ud_wr->mss); + eseg->inline_hdr_sz = cpu_to_be16(left); + + /* + * check if there is space till the end of queue, if yes, + * copy all in one shot, otherwise copy till the end of queue, + * rollback and than the copy the left + */ + leftlen = qend - (void *)eseg->inline_hdr_start; + copysz = min_t(u64, leftlen, left); + + memcpy(seg - size_of_inl_hdr_start, pdata, copysz); + + if (likely(copysz > size_of_inl_hdr_start)) { + seg += ALIGN(copysz - size_of_inl_hdr_start, 16); + *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; + } + + if (unlikely(copysz < left)) { /* the last wqe in the queue */ + seg = mlx5_get_send_wqe(qp, 0); + left -= copysz; + pdata += copysz; + memcpy(seg, pdata, left); + seg += ALIGN(left, 16); + *size += ALIGN(left, 16) / 16; + } + } + + return seg; +} + static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -2509,6 +2629,11 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, int ndescs = mr->ndescs; memset(umr, 0, sizeof(*umr)); + + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + umr->flags = MLX5_UMR_CHECK_NOT_FREE; umr->klm_octowords = get_klm_octo(ndescs); umr->mkey_mask = frwr_mkey_mask(); @@ -2558,6 +2683,44 @@ static __be64 get_umr_update_mtt_mask(void) return cpu_to_be64(result); } +static __be64 get_umr_update_translation_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_access_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_pd_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { @@ -2576,9 +2739,15 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = get_umr_update_mtt_mask(); umr->bsf_octowords = get_klm_octo(umrwr->target.offset); umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; - } else { - umr->mkey_mask = get_umr_reg_mr_mask(); } + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) + umr->mkey_mask |= get_umr_update_translation_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS) + umr->mkey_mask |= get_umr_update_access_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) + umr->mkey_mask |= get_umr_update_pd_mask(); + if (!umr->mkey_mask) + umr->mkey_mask = get_umr_reg_mr_mask(); } else { umr->mkey_mask = get_umr_unreg_mr_mask(); } @@ -2603,13 +2772,19 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, int ndescs = ALIGN(mr->ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; + + if (mr->access_mode == MLX5_ACCESS_MODE_MTT) + seg->log2_page_size = ilog2(mr->ibmr.page_size); + else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + + seg->flags = get_umr_flags(access) | mr->access_mode; seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); seg->start_addr = cpu_to_be64(mr->ibmr.iova); seg->len = cpu_to_be64(mr->ibmr.length); seg->xlt_oct_size = cpu_to_be32(ndescs); - seg->log2_page_size = ilog2(mr->ibmr.page_size); } static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) @@ -2630,7 +2805,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w seg->flags = convert_access(umrwr->access_flags); if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + if (umrwr->pd) + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); } seg->len = cpu_to_be64(umrwr->length); @@ -3196,13 +3372,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; - struct mlx5_bf *bf = qp->bf; + struct mlx5_bf *bf; int uninitialized_var(size); - void *qend = qp->sq.qend; + void *qend; unsigned long flags; unsigned idx; int err = 0; @@ -3214,6 +3390,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u8 next_fence = 0; u8 fence; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); + + qp = to_mqp(ibqp); + bf = qp->bf; + qend = qp->sq.qend; + spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -3373,16 +3556,37 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } break; - case IB_QPT_UD: case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); break; + case IB_QPT_UD: + set_datagram_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_datagram_seg); + size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + + /* handle qp that supports ud offload */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { + struct mlx5_wqe_eth_pad *pad; + + pad = seg; + memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); + seg += sizeof(struct mlx5_wqe_eth_pad); + size += sizeof(struct mlx5_wqe_eth_pad) / 16; + seg = set_eth_seg(seg, wr, qend, qp, &size); + + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + } + break; case MLX5_IB_QPT_REG_UMR: if (wr->opcode != MLX5_IB_WR_UMR) { err = -EINVAL; @@ -3502,6 +3706,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int ind; int i; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); + spin_lock_irqsave(&qp->rq.lock, flags); ind = qp->rq.head & (qp->rq.wqe_cnt - 1); @@ -3822,6 +4029,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int err = 0; u8 raw_packet_qp_state; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, + qp_init_attr); + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* * Wait for any outstanding page faults, in case the user frees memory @@ -3874,6 +4085,8 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; + if (qp->flags & MLX5_IB_QP_SQPN_QP1) + qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index b94a55404a59..61bc308bb802 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -152,6 +152,13 @@ struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; +struct mlx5_ib_alloc_mw { + __u32 comp_mask; + __u8 num_klms; + __u8 reserved1; + __u16 reserved2; +}; + static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, |