diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/cq.c')
| -rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 1086 |
1 files changed, 862 insertions, 224 deletions
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 344ab03948a3..651d76bca114 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,10 +32,16 @@ #include <linux/kref.h> #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_cache.h> #include "mlx5_ib.h" -#include "user.h" +#include "srq.h" +#include "qp.h" -static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe) { struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; @@ -63,14 +69,14 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) } } -static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size) +static void *get_cqe(struct mlx5_ib_cq *cq, int n) { - return mlx5_buf_offset(&buf->buf, n * size); + return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n); } -static void *get_cqe(struct mlx5_ib_cq *cq, int n) +static u8 sw_ownership_bit(int n, int nent) { - return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); + return (n & nent) ? 1 : 0; } static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) @@ -79,8 +85,13 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) struct mlx5_cqe64 *cqe64; cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ - !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; + + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { + return cqe; + } else { + return NULL; + } } static void *next_cqe_sw(struct mlx5_ib_cq *cq) @@ -97,8 +108,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; - case IB_WR_FAST_REG_MR: - return IB_WC_FAST_REG_MR; + case IB_WR_REG_MR: + return IB_WC_REG_MR; default: pr_warn("unknown completion status\n"); @@ -113,11 +124,13 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { case MLX5_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IB_WC_WITH_IMM; + fallthrough; case MLX5_OPCODE_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; case MLX5_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; + fallthrough; case MLX5_OPCODE_SEND: case MLX5_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; @@ -142,9 +155,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; - case MLX5_OPCODE_BIND_MW: - wc->opcode = IB_WC_BIND_MW; - break; case MLX5_OPCODE_UMR: wc->opcode = get_umr_comp(wq, idx); break; @@ -159,19 +169,22 @@ enum { static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_ib_qp *qp) { + enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_srq *srq = NULL; struct mlx5_ib_wq *wq; u16 wqe_ctr; + u8 roce_packet_type; + bool vlan_present; u8 g; if (qp->ibqp.srq || qp->ibqp.xrcd) { struct mlx5_core_srq *msrq = NULL; if (qp->ibqp.xrcd) { - msrq = mlx5_core_get_srq(&dev->mdev, - be32_to_cpu(cqe->srqn)); - srq = to_mibsrq(msrq); + msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); + if (msrq) + srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); } @@ -179,8 +192,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wqe_ctr = be16_to_cpu(cqe->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_ib_free_srq_wqe(srq, wqe_ctr); - if (msrq && atomic_dec_and_test(&msrq->refcount)) - complete(&msrq->free); + if (msrq) + mlx5_core_res_put(&msrq->common); } } else { wq = &qp->rq; @@ -189,53 +202,88 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } wc->byte_len = be32_to_cpu(cqe->byte_cnt); - switch (cqe->op_own >> 4) { + switch (get_cqe_opcode(cqe)) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->imm_inval_pkey; + wc->ex.imm_data = cqe->immediate; break; case MLX5_CQE_RESP_SEND: wc->opcode = IB_WC_RECV; - wc->wc_flags = 0; + wc->wc_flags = IB_WC_IP_CSUM_OK; + if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK)))) + wc->wc_flags = 0; break; case MLX5_CQE_RESP_SEND_IMM: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->imm_inval_pkey; + wc->ex.imm_data = cqe->immediate; break; case MLX5_CQE_RESP_SEND_INV: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_INVALIDATE; - wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); + wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey); break; } - wc->slid = be16_to_cpu(cqe->slid); - wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; wc->dlid_path_bits = cqe->ml_path; g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; wc->wc_flags |= g ? IB_WC_GRH : 0; - wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; + if (is_qp1(qp->type)) { + u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff; + + ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey, + &wc->pkey_index); + } else { + wc->pkey_index = 0; + } + + if (ll != IB_LINK_LAYER_ETHERNET) { + wc->slid = be16_to_cpu(cqe->slid); + wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; + return; + } + + wc->slid = 0; + vlan_present = cqe->l4_l3_hdr_type & 0x1; + roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3; + if (vlan_present) { + wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff; + wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7; + wc->wc_flags |= IB_WC_WITH_VLAN; + } else { + wc->sl = 0; + } + + switch (roce_packet_type) { + case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: + wc->network_hdr_type = RDMA_NETWORK_ROCE_V1; + break; + case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: + wc->network_hdr_type = RDMA_NETWORK_IPV6; + break; + case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4: + wc->network_hdr_type = RDMA_NETWORK_IPV4; + break; + } + wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; } -static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) +static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, + struct ib_wc *wc, const char *level) { - __be32 *p = (__be32 *)cqe; - int i; - - mlx5_ib_warn(dev, "dump error cqe\n"); - for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) - pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), - be32_to_cpu(p[1]), be32_to_cpu(p[2]), - be32_to_cpu(p[3])); + mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status, + ib_wc_status_msg(wc->status)); + print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1, + cqe, sizeof(*cqe), false); } static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, struct ib_wc *wc) { - int dump = 1; + const char *dump = KERN_WARNING; switch (cqe->syndrome) { case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: @@ -245,10 +293,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_LOC_QP_OP_ERR; break; case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_LOC_PROT_ERR; break; case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: - dump = 0; + dump = NULL; wc->status = IB_WC_WR_FLUSH_ERR; break; case MLX5_CQE_SYNDROME_MW_BIND_ERR: @@ -264,18 +313,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_REM_INV_REQ_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_ACCESS_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_OP_ERR; break; case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RNR_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: wc->status = IB_WC_REM_ABORT_ERR; @@ -287,68 +338,113 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->vendor_err = cqe->vendor_err_synd; if (dump) - dump_cqe(dev, cqe); + dump_cqe(dev, cqe, wc, dump); } -static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) +static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + u16 tail, u16 head) { - /* TBD: waiting decision - */ - return 0; + u16 idx; + + do { + idx = tail & (qp->sq.wqe_cnt - 1); + if (idx == head) + break; + + tail = qp->sq.w_list[idx].next; + } while (1); + tail = qp->sq.w_list[idx].next; + qp->sq.last_poll = tail; } -static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { - struct mlx5_wqe_data_seg *dpseg; - void *addr; + mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); +} - dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_atomic_seg); - addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); - return addr; +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, + struct ib_sig_err *item) +{ + u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR (1 << 13) +#define APPTAG_ERR (1 << 12) +#define REFTAG_ERR (1 << 11) + + if (syndrome & GUARD_ERR) { + item->err_type = IB_SIG_BAD_GUARD; + item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; + item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; + } else + if (syndrome & REFTAG_ERR) { + item->err_type = IB_SIG_BAD_REFTAG; + item->expected = be32_to_cpu(cqe->expected_reftag); + item->actual = be32_to_cpu(cqe->actual_reftag); + } else + if (syndrome & APPTAG_ERR) { + item->err_type = IB_SIG_BAD_APPTAG; + item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; + item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; + } else { + pr_err("Got signature completion error with bad syndrome %04x\n", + syndrome); + } + + item->sig_err_offset = be64_to_cpu(cqe->err_offset); + item->key = be32_to_cpu(cqe->mkey); } -static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - uint16_t idx) +static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, + int *npolled, bool is_send) { - void *addr; - int byte_count; + struct mlx5_ib_wq *wq; + unsigned int cur; + int np; int i; - if (!is_atomic_response(qp, idx)) + wq = (is_send) ? &qp->sq : &qp->rq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) return; - byte_count = be32_to_cpu(cqe64->byte_cnt); - addr = mlx5_get_atomic_laddr(qp, idx); + for (i = 0; i < cur && np < num_entries; i++) { + unsigned int idx; - if (byte_count == 4) { - *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); - } else { - for (i = 0; i < byte_count; i += 8) { - *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); - addr += 8; - } + idx = (is_send) ? wq->last_poll : wq->tail; + idx &= (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + if (is_send) + wq->last_poll = wq->w_list[idx].next; + np++; + wc->qp = &qp->ibqp; + wc++; } - - return; + *npolled = np; } -static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - u16 tail, u16 head) +static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc, int *npolled) { - int idx; - - do { - idx = tail & (qp->sq.wqe_cnt - 1); - handle_atomic(qp, cqe64, idx); - if (idx == head) - break; + struct mlx5_ib_qp *qp; + + *npolled = 0; + /* Find uncompleted WQEs belonging to that cq and return mmics ones */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + sw_comp(qp, num_entries, wc + *npolled, npolled, true); + if (*npolled >= num_entries) + return; + } - tail = qp->sq.w_list[idx].next; - } while (1); - tail = qp->sq.w_list[idx].next; - qp->sq.last_poll = tail; + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + sw_comp(qp, num_entries, wc + *npolled, npolled, false); + if (*npolled >= num_entries) + return; + } } static int mlx5_poll_one(struct mlx5_ib_cq *cq, @@ -366,6 +462,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, void *cqe; int idx; +repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; @@ -379,26 +476,30 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, */ rmb(); - /* TBD: resize CQ */ + opcode = get_cqe_opcode(cqe64); + if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { + if (likely(cq->resize_buf)) { + free_cq_buf(dev, &cq->buf); + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + goto repoll; + } else { + mlx5_ib_warn(dev, "unexpected resize cqe\n"); + } + } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; - if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) { /* We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ - mqp = __mlx5_qp_lookup(&dev->mdev, qpn); - if (unlikely(!mqp)) { - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", - cq->mcq.cqn, qpn); - return -EINVAL; - } - + mqp = radix_tree_lookup(&dev->qp_table.tree, qpn); *cur_qp = to_mibqp(mqp); } wc->qp = &(*cur_qp)->ibqp; - opcode = cqe64->op_own >> 4; switch (opcode) { case MLX5_CQE_REQ: wq = &(*cur_qp)->sq; @@ -428,6 +529,10 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, "Requestor" : "Responder", cq->mcq.cqn); mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", err_cqe->syndrome, err_cqe->vendor_err_synd); + if (wc->status != IB_WC_WR_FLUSH_ERR && + (*cur_qp)->type == MLX5_IB_QPT_REG_UMR) + dev->umrc.state = MLX5_UMR_STATE_RECOVER; + if (opcode == MLX5_CQE_REQ_ERR) { wq = &(*cur_qp)->sq; wqe_ctr = be16_to_cpu(cqe64->wqe_counter); @@ -449,179 +554,376 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, } } break; + case MLX5_CQE_SIG_ERR: { + struct mlx5_sig_err_cqe *sig_err_cqe = + (struct mlx5_sig_err_cqe *)cqe64; + struct mlx5_core_sig_ctx *sig; + + xa_lock(&dev->sig_mrs); + sig = xa_load(&dev->sig_mrs, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + get_sig_err_item(sig_err_cqe, &sig->err_item); + sig->sig_err_exists = true; + sig->sigerr_count++; + + mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", + cq->mcq.cqn, sig->err_item.key, + sig->err_item.err_type, + sig->err_item.sig_err_offset, + sig->err_item.expected, + sig->err_item.actual); + + xa_unlock(&dev->sig_mrs); + goto repoll; + } } return 0; } +static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc, bool is_fatal_err) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_ib_wc *soft_wc, *next; + int npolled = 0; + + list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { + if (npolled >= num_entries) + break; + + mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", + cq->mcq.cqn); + + if (unlikely(is_fatal_err)) { + soft_wc->wc.status = IB_WC_WR_FLUSH_ERR; + soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + } + wc[npolled++] = soft_wc->wc; + list_del(&soft_wc->list); + kfree(soft_wc); + } + + return npolled; +} + int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; + int soft_polled = 0; int npolled; - int err = 0; spin_lock_irqsave(&cq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + /* make sure no soft wqe's are waiting */ + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc, true); + + mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled, + wc + soft_polled, &npolled); + goto out; + } - for (npolled = 0; npolled < num_entries; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + npolled); - if (err) + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc, false); + + for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { + if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) break; } if (npolled) mlx5_cq_set_ci(&cq->mcq); - +out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return npolled; - else - return err; + return soft_polled + npolled; } int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { - mlx5_cq_arm(&to_mcq(ibcq)->mcq, + struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; + struct mlx5_ib_cq *cq = to_mcq(ibcq); + void __iomem *uar_page = mdev->priv.bfreg.up->map; + unsigned long irq_flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, irq_flags); + if (cq->notify_flags != IB_CQ_NEXT_COMP) + cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; + + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list)) + ret = 1; + spin_unlock_irqrestore(&cq->lock, irq_flags); + + mlx5_cq_arm(&cq->mcq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, - to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map, - MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock)); + uar_page, to_mcq(ibcq)->mcq.cons_index); - return 0; + return ret; } -static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, - int nent, int cqe_size) +static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, + struct mlx5_ib_cq_buf *buf, + int nent, + int cqe_size) { + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); + u8 log_wq_sz = ilog2(cqe_size); int err; - err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size, - PAGE_SIZE * 2, &buf->buf); + err = mlx5_frag_buf_alloc_node(dev->mdev, + nent * cqe_size, + frag_buf, + dev->mdev->priv.numa_node); if (err) return err; + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + buf->cqe_size = cqe_size; + buf->nent = nent; return 0; } -static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) +enum { + MLX5_CQE_RES_FORMAT_HASH = 0, + MLX5_CQE_RES_FORMAT_CSUM = 1, + MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3, +}; + +static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) { - mlx5_buf_free(&dev->mdev, &buf->buf); + switch (format) { + case MLX5_IB_CQE_RES_FORMAT_HASH: + return MLX5_CQE_RES_FORMAT_HASH; + case MLX5_IB_CQE_RES_FORMAT_CSUM: + return MLX5_CQE_RES_FORMAT_CSUM; + case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX: + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + return MLX5_CQE_RES_FORMAT_CSUM_STRIDX; + return -EOPNOTSUPP; + default: + return -EINVAL; + } } static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, - struct ib_ucontext *context, struct mlx5_ib_cq *cq, - int entries, struct mlx5_create_cq_mbox_in **cqb, - int *cqe_size, int *index, int *inlen) + struct mlx5_ib_cq *cq, int entries, u32 **cqb, + int *cqe_size, int *index, int *inlen, + struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_create_cq ucmd; - int page_shift; - int npages; + struct mlx5_ib_create_cq ucmd = {}; + unsigned long page_size; + unsigned int page_offset_quantized; + size_t ucmdlen; + __be64 *pas; int ncont; + void *cqc; int err; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + ucmdlen = min(udata->inlen, sizeof(ucmd)); + if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags)) + return -EINVAL; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; - if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) + if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD | + MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX | + MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS))) + return -EINVAL; + + if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) || + ucmd.reserved0 || ucmd.reserved1) return -EINVAL; *cqe_size = ucmd.cqe_size; - cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, - entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + cq->buf.umem = + ib_umem_get(&dev->ib_dev, ucmd.buf_addr, + entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, - &cq->db); + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + + err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db); if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, - &ncont, NULL); - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, + ib_umem_num_pages(cq->buf.umem), page_size, ncont); - *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont; - *cqb = mlx5_vzalloc(*inlen); + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; + *cqb = kvzalloc(*inlen, GFP_KERNEL); if (!*cqb) { err = -ENOMEM; goto err_db; } - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); - (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; - *index = to_mucontext(context)->uuari.uars[0].index; + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX)) { + err = uverbs_copy_from(index, attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX); + if (err) + goto err_cqb; + } else if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { + *index = ucmd.uar_page_index; + } else if (context->bfregi.lib_uar_dyn) { + err = -EINVAL; + goto err_cqb; + } else { + *index = context->bfregi.sys_pages[0]; + } + + if (ucmd.cqe_comp_en == 1) { + int mini_cqe_format; + + if (!((*cqe_size == 128 && + MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || + (*cqe_size == 64 && + MLX5_CAP_GEN(dev->mdev, cqe_compression)))) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", + *cqe_size); + goto err_cqb; + } + + mini_cqe_format = + mini_cqe_res_format_to_hw(dev, + ucmd.cqe_comp_res_format); + if (mini_cqe_format < 0) { + err = mini_cqe_format; + mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n", + ucmd.cqe_comp_res_format, err); + goto err_cqb; + } + + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format); + } + + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { + if (*cqe_size != 128 || + !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, + "CQE padding is not supported for CQE size of %dB!\n", + *cqe_size); + goto err_cqb; + } + + cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; + } + + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS) + cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS; + MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid); return 0; +err_cqb: + kvfree(*cqb); + err_db: - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + mlx5_ib_db_unmap_user(context, &cq->db); err_umem: ib_umem_release(cq->buf.umem); return err; } -static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + mlx5_ib_db_unmap_user(context, &cq->db); ib_umem_release(cq->buf.umem); } -static void init_cq_buf(struct mlx5_ib_cq *cq, int nent) +static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf) { int i; void *cqe; struct mlx5_cqe64 *cqe64; - for (i = 0; i < nent; i++) { - cqe = get_cqe(cq, i); - cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64; - cqe64->op_own = 0xf1; + for (i = 0; i < buf->nent; i++) { + cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i); + cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; + cqe64->op_own = MLX5_CQE_INVALID << 4; } } static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size, - struct mlx5_create_cq_mbox_in **cqb, - int *index, int *inlen) + u32 **cqb, int *index, int *inlen) { + __be64 *pas; + void *cqc; int err; - err = mlx5_db_alloc(&dev->mdev, &cq->db); + err = mlx5_db_alloc(dev->mdev, &cq->db); if (err) return err; cq->mcq.set_ci_db = cq->db.db; cq->mcq.arm_db = cq->db.db + 1; - *cq->mcq.set_ci_db = 0; - *cq->mcq.arm_db = 0; cq->mcq.cqe_sz = cqe_size; - err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size); + err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size); if (err) goto err_db; - init_cq_buf(cq, entries); + init_cq_frag_buf(&cq->buf); - *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; - *cqb = mlx5_vzalloc(*inlen); + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * + cq->buf.frag_buf.npages; + *cqb = kvzalloc(*inlen, GFP_KERNEL); if (!*cqb) { err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); - (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; - *index = dev->mdev.priv.uuari.uars[0].index; + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + cq->buf.frag_buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + + *index = dev->mdev->priv.bfreg.up->index; return 0; @@ -629,135 +931,164 @@ err_buf: free_cq_buf(dev, &cq->buf); err_db: - mlx5_db_free(&dev->mdev, &cq->db); + mlx5_db_free(dev->mdev, &cq->db); return err; } static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) { free_cq_buf(dev, &cq->buf); - mlx5_db_free(&dev->mdev, &cq->db); + mlx5_db_free(dev->mdev, &cq->db); +} + +static void notify_soft_wc_handler(struct work_struct *work) +{ + struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, + notify_work); + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, - struct ib_udata *udata) +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct uverbs_attr_bundle *attrs) { - struct mlx5_create_cq_mbox_in *cqb = NULL; + struct ib_udata *udata = &attrs->driver_udata; + struct ib_device *ibdev = ibcq->device; + int entries = attr->cqe; + int vector = attr->comp_vector; struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_cq *cq; - int uninitialized_var(index); - int uninitialized_var(inlen); + struct mlx5_ib_cq *cq = to_mcq(ibcq); + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + int index; + int inlen; + u32 *cqb = NULL; + void *cqc; int cqe_size; - int irqn; int eqn; int err; - entries = roundup_pow_of_two(entries + 1); - if (entries < 1 || entries > dev->mdev.caps.max_cqes) - return ERR_PTR(-EINVAL); + if (entries < 0 || + (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) + return -EINVAL; - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + if (check_cq_create_flags(attr->flags)) + return -EOPNOTSUPP; + + entries = roundup_pow_of_two(entries + 1); + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) + return -EINVAL; cq->ibcq.cqe = entries - 1; mutex_init(&cq->resize_mutex); spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + cq->create_flags = attr->flags; + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); - if (context) { - err = create_cq_user(dev, udata, context, cq, entries, - &cqb, &cqe_size, &index, &inlen); + if (udata) { + err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, + &index, &inlen, attrs); if (err) - goto err_create; + return err; } else { - /* for now choose 64 bytes till we have a proper interface */ - cqe_size = 64; + cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) - goto err_create; + return err; + + INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - cq->cqe_size = cqe_size; - cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; - cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); - err = mlx5_vector2eqn(dev, vector, &eqn, &irqn); + err = mlx5_comp_eqn_get(dev->mdev, vector, &eqn); if (err) goto err_cqb; - cqb->ctx.c_eqn = cpu_to_be16(eqn); - cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma); + cq->cqe_size = cqe_size; - err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen); + cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + MLX5_SET(cqc, cqc, uar_page, index); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); + if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) + MLX5_SET(cqc, cqc, oi, 1); + + if (udata) { + cq->mcq.comp = mlx5_add_cq_to_tasklet; + cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; + } else { + cq->mcq.comp = mlx5_ib_cq_comp; + } + + err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out)); if (err) goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - cq->mcq.irqn = irqn; - cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; - if (context) + INIT_LIST_HEAD(&cq->wc_list); + + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; goto err_cmd; } - mlx5_vfree(cqb); - return &cq->ibcq; + kvfree(cqb); + return 0; err_cmd: - mlx5_core_destroy_cq(&dev->mdev, &cq->mcq); + mlx5_core_destroy_cq(dev->mdev, &cq->mcq); err_cqb: - mlx5_vfree(cqb); - if (context) - destroy_cq_user(cq, context); + kvfree(cqb); + if (udata) + destroy_cq_user(cq, udata); else destroy_cq_kernel(dev, cq); - -err_create: - kfree(cq); - - return ERR_PTR(err); + return err; } - -int mlx5_ib_destroy_cq(struct ib_cq *cq) +int mlx5_ib_pre_destroy_cq(struct ib_cq *cq) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); - struct ib_ucontext *context = NULL; - - if (cq->uobject) - context = cq->uobject->context; - mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq); - if (context) - destroy_cq_user(mcq, context); - else - destroy_cq_kernel(dev, mcq); - - kfree(mcq); + return mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); +} - return 0; +void mlx5_ib_post_destroy_cq(struct ib_cq *cq) +{ + destroy_cq_kernel(to_mdev(cq->device), to_mcq(cq)); } -static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq, - u32 rsn) +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - u32 lrsn; + int ret; + + ret = mlx5_ib_pre_destroy_cq(cq); + if (ret) + return ret; - if (srq) - lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff; + if (udata) + destroy_cq_user(to_mcq(cq), udata); else - lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff; + mlx5_ib_post_destroy_cq(cq); + return 0; +} - return rsn == lrsn; +static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) +{ + return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff); } void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) @@ -787,8 +1118,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - if (is_equal_rsn(cqe64, srq, rsn)) { - if (srq) + if (is_equal_rsn(cqe64, rsn)) { + if (srq && (ntohl(cqe64->srqn) & 0xffffff)) mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); ++nfreed; } else if (nfreed) { @@ -823,15 +1154,284 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - return -ENOSYS; + struct mlx5_ib_dev *dev = to_mdev(cq->device); + struct mlx5_ib_cq *mcq = to_mcq(cq); + int err; + + if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) + return -EOPNOTSUPP; + + if (cq_period > MLX5_MAX_CQ_PERIOD) + return -EINVAL; + + err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, + cq_period, cq_count); + if (err) + mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); + + return err; +} + +static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, struct ib_udata *udata, + int *cqe_size) +{ + struct mlx5_ib_resize_cq ucmd; + struct ib_umem *umem; + int err; + + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) + return err; + + if (ucmd.reserved0 || ucmd.reserved1) + return -EINVAL; + + /* check multiplication overflow */ + if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) + return -EINVAL; + + umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr, + (size_t)ucmd.cqe_size * entries, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + return err; + } + + cq->resize_umem = umem; + *cqe_size = ucmd.cqe_size; + + return 0; +} + +static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, int cqe_size) +{ + int err; + + cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); + if (!cq->resize_buf) + return -ENOMEM; + + err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size); + if (err) + goto ex; + + init_cq_frag_buf(cq->resize_buf); + + return 0; + +ex: + kfree(cq->resize_buf); + return err; +} + +static int copy_resize_cqes(struct mlx5_ib_cq *cq) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_cqe64 *scqe64; + struct mlx5_cqe64 *dcqe64; + void *start_cqe; + void *scqe; + void *dcqe; + int ssize; + int dsize; + int i; + u8 sw_own; + + ssize = cq->buf.cqe_size; + dsize = cq->resize_buf->cqe_size; + if (ssize != dsize) { + mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); + return -EINVAL; + } + + i = cq->mcq.cons_index; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + start_cqe = scqe; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) { + dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc, + (i + 1) & cq->resize_buf->nent); + dcqe64 = dsize == 64 ? dcqe : dcqe + 64; + sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); + memcpy(dcqe, scqe, dsize); + dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; + + ++i; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + if (scqe == start_cqe) { + pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", + cq->mcq.cqn); + return -ENOMEM; + } + } + ++cq->mcq.cons_index; + return 0; } int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { - return -ENOSYS; + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); + struct mlx5_ib_cq *cq = to_mcq(ibcq); + void *cqc; + u32 *in; + int err; + int npas; + __be64 *pas; + unsigned int page_offset_quantized = 0; + unsigned int page_shift; + int inlen; + int cqe_size; + unsigned long flags; + + if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) { + pr_info("Firmware does not support resize CQ\n"); + return -ENOSYS; + } + + if (entries < 1 || + entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) { + mlx5_ib_warn(dev, "wrong entries number %d, max %d\n", + entries, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)); + return -EINVAL; + } + + entries = roundup_pow_of_two(entries + 1); + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) + return -EINVAL; + + if (entries == ibcq->cqe + 1) + return 0; + + mutex_lock(&cq->resize_mutex); + if (udata) { + unsigned long page_size; + + err = resize_user(dev, cq, entries, udata, &cqe_size); + if (err) + goto ex; + + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->resize_umem, cqc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto ex_resize; + } + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); + page_shift = order_base_2(page_size); + } else { + struct mlx5_frag_buf *frag_buf; + + cqe_size = 64; + err = resize_kernel(dev, cq, entries, cqe_size); + if (err) + goto ex; + frag_buf = &cq->resize_buf->frag_buf; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; + } + + inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto ex_resize; + } + + pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); + if (udata) + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, + 0); + else + mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); + + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.resize_field_select.resize_field_select, + MLX5_MODIFY_CQ_MASK_LOG_SIZE | + MLX5_MODIFY_CQ_MASK_PG_OFFSET | + MLX5_MODIFY_CQ_MASK_PG_SIZE); + + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + + MLX5_SET(cqc, cqc, log_page_size, + page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + + MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); + MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn); + + err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen); + if (err) + goto ex_alloc; + + if (udata) { + cq->ibcq.cqe = entries - 1; + ib_umem_release(cq->buf.umem); + cq->buf.umem = cq->resize_umem; + cq->resize_umem = NULL; + } else { + struct mlx5_ib_cq_buf tbuf; + int resized = 0; + + spin_lock_irqsave(&cq->lock, flags); + if (cq->resize_buf) { + err = copy_resize_cqes(cq); + if (!err) { + tbuf = cq->buf; + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + resized = 1; + } + } + cq->ibcq.cqe = entries - 1; + spin_unlock_irqrestore(&cq->lock, flags); + if (resized) + free_cq_buf(dev, &tbuf); + } + mutex_unlock(&cq->resize_mutex); + + kvfree(in); + return 0; + +ex_alloc: + kvfree(in); + +ex_resize: + ib_umem_release(cq->resize_umem); + if (!udata) { + free_cq_buf(dev, cq->resize_buf); + cq->resize_buf = NULL; + } +ex: + mutex_unlock(&cq->resize_mutex); + return err; } -int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) +int mlx5_ib_get_cqe_size(struct ib_cq *ibcq) { struct mlx5_ib_cq *cq; @@ -841,3 +1441,41 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) cq = to_mcq(ibcq); return cq->cqe_size; } + +/* Called from atomic context */ +int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) +{ + struct mlx5_ib_wc *soft_wc; + struct mlx5_ib_cq *cq = to_mcq(ibcq); + unsigned long flags; + + soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); + if (!soft_wc) + return -ENOMEM; + + soft_wc->wc = *wc; + spin_lock_irqsave(&cq->lock, flags); + list_add_tail(&soft_wc->list, &cq->wc_list); + if (cq->notify_flags == IB_CQ_NEXT_COMP || + wc->status != IB_WC_SUCCESS) { + cq->notify_flags = 0; + schedule_work(&cq->notify_work); + } + spin_unlock_irqrestore(&cq->lock, flags); + + return 0; +} + +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_cq_create, + UVERBS_OBJECT_CQ, + UVERBS_METHOD_CQ_CREATE, + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL)); + +const struct uapi_definition mlx5_ib_create_cq_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create), + {}, +}; |
