summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5/qp.c
diff options
context:
space:
mode:
authorPatrisious Haddad <phaddad@nvidia.com>2023-01-04 11:43:36 +0200
committerLeon Romanovsky <leon@kernel.org>2023-01-15 12:23:15 +0200
commit8067fd8b26bf5d5f9917e934d878e4f3794d082e (patch)
treeae2db4b173bf5a8be6666e244f45398d9af6200c /drivers/infiniband/hw/mlx5/qp.c
parent312b8f79eb05479628ee71357749815b2eeeeea8 (diff)
RDMA/mlx5: Print error syndrome in case of fatal QP errors
Print syndromes in case of fatal QP events. This is helpful for upper level debugging, as there maybe no CQEs. Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Signed-off-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/edc794f622a33e4ee12d7f5d218d1a59aa7c6af5.1672821186.git.leonro@nvidia.com Reviewed-by: Saeed Mahameed <saeed@kernel.org> Signed-off-by: Leon Romanovsky <leon@kernel.org>
Diffstat (limited to 'drivers/infiniband/hw/mlx5/qp.c')
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c45
1 files changed, 44 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 9b85e1be0aed..7cc3b973dec7 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -310,6 +310,44 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
}
+static void mlx5_ib_qp_err_syndrome(struct ib_qp *ibqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ void *pas_ext_union, *err_syn;
+ u32 *outb;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev->mdev, qpc_extension) ||
+ !MLX5_CAP_GEN(dev->mdev, qp_error_syndrome))
+ return;
+
+ outb = kzalloc(outlen, GFP_KERNEL);
+ if (!outb)
+ return;
+
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ true);
+ if (err)
+ goto out;
+
+ pas_ext_union =
+ MLX5_ADDR_OF(query_qp_out, outb, qp_pas_or_qpc_ext_and_pas);
+ err_syn = MLX5_ADDR_OF(qpc_extension_and_pas_list_in, pas_ext_union,
+ qpc_data_extension.error_syndrome);
+
+ pr_err("%s/%d: QP %d error: %s (0x%x 0x%x 0x%x)\n",
+ ibqp->device->name, ibqp->port, ibqp->qp_num,
+ ib_wc_status_msg(
+ MLX5_GET(cqe_error_syndrome, err_syn, syndrome)),
+ MLX5_GET(cqe_error_syndrome, err_syn, vendor_error_syndrome),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_syndrome_type),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_error_syndrome));
+out:
+ kfree(outb);
+}
+
static void mlx5_ib_handle_qp_event(struct work_struct *_work)
{
struct mlx5_ib_qp_event_work *qpe_work =
@@ -350,6 +388,10 @@ static void mlx5_ib_handle_qp_event(struct work_struct *_work)
goto out;
}
+ if ((event.event == IB_EVENT_QP_FATAL) ||
+ (event.event == IB_EVENT_QP_ACCESS_ERR))
+ mlx5_ib_qp_err_syndrome(ibqp);
+
ibqp->event_handler(&event, ibqp->qp_context);
out:
@@ -4862,7 +4904,8 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (!outb)
return -ENOMEM;
- err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen);
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ false);
if (err)
goto out;