summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-03 08:05:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-03 08:05:59 -0700
commit25edbc383b72c2364c7b339245c1c5db84e615e1 (patch)
tree6f1924c768c6926a51bc53125612fe6141222da6 /drivers/infiniband/hw/mlx5
parentff0700f03609b9f0defacd4ce96d9519d721e0a2 (diff)
parentf1a090f09f42be5a5542009f0be310fdb3e768fc (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "A typical collection of patches this cycle, mostly fixing with a few new features: - Fixes from static tools. clang warnings, dead code, unused variable, coccinelle sweeps, etc - Driver bug fixes and minor improvements in rxe, bnxt_re, hfi1, mlx5, irdma, qedr - rtrs ULP bug fixes an improvments - Additional counters for bnxt_re - Support verbs CQ notifications in EFA - Continued reworking and fixing of rxe - netlink control to enable/disable optional device counters - rxe now can use AH objects for its UD path, fixing various bugs in the process - Add DMABUF support to EFA" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (103 commits) RDMA/core: Require the driver to set the IOVA correctly during rereg_mr RDMA/bnxt_re: Remove unsupported bnxt_re_modify_ah callback RDMA/irdma: optimize rx path by removing unnecessary copy RDMA/qed: Use helper function to set GUIDs RDMA/hns: Use the core code to manage the fixed mmap entries IB/opa_vnic: Rebranding of OPA VNIC driver to Cornelis Networks IB/qib: Rebranding of qib driver to Cornelis Networks IB/hfi1: Rebranding of hfi1 driver to Cornelis Networks RDMA/bnxt_re: Use helper function to set GUIDs RDMA/bnxt_re: Fix kernel panic when trying to access bnxt_re_stat_descs RDMA/qedr: Fix NULL deref for query_qp on the GSI QP RDMA/hns: Modify the value of MAX_LP_MSG_LEN to meet hardware compatibility RDMA/hns: Fix initial arm_st of CQ RDMA/rxe: Make rxe_type_info static const RDMA/rxe: Use 'bitmap_zalloc()' when applicable RDMA/rxe: Save a few bytes from struct rxe_pool RDMA/irdma: Remove the unused variable local_qp RDMA/core: Fix missed initialization of rdma_hw_stats::lock RDMA/efa: Add support for dmabuf memory regions RDMA/umem: Allow pinned dmabuf umem usage ...
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c283
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c187
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h28
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c26
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c40
5 files changed, 493 insertions, 71 deletions
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 224ba36f2946..945758f39523 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -12,6 +12,7 @@
struct mlx5_ib_counter {
const char *name;
size_t offset;
+ u32 type;
};
#define INIT_Q_COUNTER(_name) \
@@ -75,6 +76,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
};
+#define INIT_OP_COUNTER(_name, _type) \
+ { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
+
+static const struct mlx5_ib_counter basic_op_cnts[] = {
+ INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
+};
+
+static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
+ INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
+};
+
+static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
+ INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
+};
+
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
struct uverbs_attr_bundle *attrs)
@@ -161,17 +177,34 @@ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
return cnts->set_id;
}
+static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
+{
+ struct rdma_hw_stats *stats;
+ u32 num_hw_counters;
+ int i;
+
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ stats = rdma_alloc_hw_stats_struct(cnts->descs,
+ num_hw_counters +
+ cnts->num_op_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ if (!stats)
+ return NULL;
+
+ for (i = 0; i < cnts->num_op_counters; i++)
+ set_bit(num_hw_counters + i, stats->is_disabled);
+
+ return stats;
+}
+
static struct rdma_hw_stats *
mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ return do_alloc_stats(cnts);
}
static struct rdma_hw_stats *
@@ -180,11 +213,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ return do_alloc_stats(cnts);
}
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
@@ -241,9 +270,9 @@ free:
return ret;
}
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u32 port_num, int index)
+static int do_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
@@ -295,6 +324,88 @@ done:
return num_counters;
}
+static int do_get_op_stat(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ const struct mlx5_ib_op_fc *opfcs;
+ u64 packets = 0, bytes;
+ u32 type;
+ int ret;
+
+ cnts = get_counters(dev, port_num - 1);
+ opfcs = cnts->opfcs;
+ type = *(u32 *)cnts->descs[index].priv;
+ if (type >= MLX5_IB_OPCOUNTER_MAX)
+ return -EINVAL;
+
+ if (!opfcs[type].fc)
+ goto out;
+
+ ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+out:
+ stats->value[index] = packets;
+ return index;
+}
+
+static int do_get_op_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ int index, ret, num_hw_counters;
+
+ cnts = get_counters(dev, port_num - 1);
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ for (index = num_hw_counters;
+ index < (num_hw_counters + cnts->num_op_counters); index++) {
+ ret = do_get_op_stat(ibdev, stats, port_num, index);
+ if (ret != index)
+ return ret;
+ }
+
+ return cnts->num_op_counters;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ int num_counters, num_hw_counters, num_op_counters;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+
+ cnts = get_counters(dev, port_num - 1);
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ num_counters = num_hw_counters + cnts->num_op_counters;
+
+ if (index < 0 || index > num_counters)
+ return -EINVAL;
+ else if (index > 0 && index < num_hw_counters)
+ return do_get_hw_stats(ibdev, stats, port_num, index);
+ else if (index >= num_hw_counters && index < num_counters)
+ return do_get_op_stat(ibdev, stats, port_num, index);
+
+ num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
+ if (num_hw_counters < 0)
+ return num_hw_counters;
+
+ num_op_counters = do_get_op_stats(ibdev, stats, port_num);
+ if (num_op_counters < 0)
+ return num_op_counters;
+
+ return num_hw_counters + num_op_counters;
+}
+
static struct rdma_hw_stats *
mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
{
@@ -302,11 +413,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
const struct mlx5_ib_counters *cnts =
get_counters(dev, counter->port - 1);
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ return do_alloc_stats(cnts);
}
static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
@@ -371,67 +478,89 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
return mlx5_ib_qp_set_counter(qp, NULL);
}
-
static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
+ struct rdma_stat_desc *descs, size_t *offsets)
{
int i;
int j = 0;
for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
- names[j] = basic_q_cnts[i].name;
+ descs[j].name = basic_q_cnts[i].name;
offsets[j] = basic_q_cnts[i].offset;
}
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
- names[j] = out_of_seq_q_cnts[i].name;
+ descs[j].name = out_of_seq_q_cnts[i].name;
offsets[j] = out_of_seq_q_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
- names[j] = retrans_q_cnts[i].name;
+ descs[j].name = retrans_q_cnts[i].name;
offsets[j] = retrans_q_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
- names[j] = extended_err_cnts[i].name;
+ descs[j].name = extended_err_cnts[i].name;
offsets[j] = extended_err_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
- names[j] = roce_accl_cnts[i].name;
+ descs[j].name = roce_accl_cnts[i].name;
offsets[j] = roce_accl_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
- names[j] = cong_cnts[i].name;
+ descs[j].name = cong_cnts[i].name;
offsets[j] = cong_cnts[i].offset;
}
}
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
- names[j] = ext_ppcnt_cnts[i].name;
+ descs[j].name = ext_ppcnt_cnts[i].name;
offsets[j] = ext_ppcnt_cnts[i].offset;
}
}
+
+ for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
+ descs[j].name = basic_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &basic_op_cnts[i].type;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+ for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
+ descs[j].name = rdmarx_cnp_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
+ }
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+ for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
+ descs[j].name = rdmatx_cnp_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
+ }
+ }
}
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts)
{
- u32 num_counters;
+ u32 num_counters, num_op_counters;
num_counters = ARRAY_SIZE(basic_q_cnts);
@@ -457,20 +586,34 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
}
- cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
- if (!cnts->names)
+
+ num_op_counters = ARRAY_SIZE(basic_op_cnts);
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode))
+ num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode))
+ num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
+
+ cnts->num_op_counters = num_op_counters;
+ num_counters += num_op_counters;
+ cnts->descs = kcalloc(num_counters,
+ sizeof(struct rdma_stat_desc), GFP_KERNEL);
+ if (!cnts->descs)
return -ENOMEM;
cnts->offsets = kcalloc(num_counters,
sizeof(*cnts->offsets), GFP_KERNEL);
if (!cnts->offsets)
- goto err_names;
+ goto err;
return 0;
-err_names:
- kfree(cnts->names);
- cnts->names = NULL;
+err:
+ kfree(cnts->descs);
+ cnts->descs = NULL;
return -ENOMEM;
}
@@ -478,7 +621,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports;
- int i;
+ int i, j;
num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
@@ -491,8 +634,20 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
dev->port[i].cnts.set_id);
mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
}
- kfree(dev->port[i].cnts.names);
+ kfree(dev->port[i].cnts.descs);
kfree(dev->port[i].cnts.offsets);
+
+ for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
+ if (!dev->port[i].cnts.opfcs[j].fc)
+ continue;
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ mlx5_ib_fs_remove_op_fc(dev,
+ &dev->port[i].cnts.opfcs[j], j);
+ mlx5_fc_destroy(dev->mdev,
+ dev->port[i].cnts.opfcs[j].fc);
+ dev->port[i].cnts.opfcs[j].fc = NULL;
+ }
}
}
@@ -514,7 +669,7 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
if (err)
goto err_alloc;
- mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
+ mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
dev->port[i].cnts.offsets);
MLX5_SET(alloc_q_counter_in, in, uid,
@@ -672,6 +827,56 @@ void mlx5_ib_counters_clear_description(struct ib_counters *counters)
mutex_unlock(&mcounters->mcntrs_mutex);
}
+static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
+ unsigned int index, bool enable)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_counters *cnts;
+ struct mlx5_ib_op_fc *opfc;
+ u32 num_hw_counters, type;
+ int ret;
+
+ cnts = &dev->port[port - 1].cnts;
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ if (index < num_hw_counters ||
+ index >= (num_hw_counters + cnts->num_op_counters))
+ return -EINVAL;
+
+ if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+ return -EINVAL;
+
+ type = *(u32 *)cnts->descs[index].priv;
+ if (type >= MLX5_IB_OPCOUNTER_MAX)
+ return -EINVAL;
+
+ opfc = &cnts->opfcs[type];
+
+ if (enable) {
+ if (opfc->fc)
+ return -EEXIST;
+
+ opfc->fc = mlx5_fc_create(dev->mdev, false);
+ if (IS_ERR(opfc->fc))
+ return PTR_ERR(opfc->fc);
+
+ ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
+ if (ret) {
+ mlx5_fc_destroy(dev->mdev, opfc->fc);
+ opfc->fc = NULL;
+ }
+ return ret;
+ }
+
+ if (!opfc->fc)
+ return -EINVAL;
+
+ mlx5_ib_fs_remove_op_fc(dev, opfc, type);
+ mlx5_fc_destroy(dev->mdev, opfc->fc);
+ opfc->fc = NULL;
+ return 0;
+}
+
static const struct ib_device_ops hw_stats_ops = {
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
@@ -680,6 +885,8 @@ static const struct ib_device_ops hw_stats_ops = {
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
+ .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
+ mlx5_ib_modify_stat : NULL,
};
static const struct ib_device_ops hw_switchdev_stats_ops = {
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..b780185d9dc6 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
#include "mlx5_ib.h"
#include "counters.h"
#include "devx.h"
@@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return prio;
}
+enum {
+ RDMA_RX_ECN_OPCOUNTER_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+ RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.source_vhca_port) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ ft_field_support.source_vhca_port))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
+ misc_parameters.source_vhca_port);
+ MLX5_SET(fte_match_param, &spec->match_value,
+ misc_parameters.source_vhca_port, port_num);
+
+ return 0;
+}
+
+static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec, int ipv)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.outer_ip_version))
+ return -EOPNOTSUPP;
+
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_ecn);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
+ INET_ECN_CE);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+ ipv);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
+ IB_BTH_OPCODE_CNP);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_flow_namespace_type fn_type;
+ int priority, i, err, spec_num;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_spec *spec;
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 2;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ default:
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ prio = &dev->flow_db->opfcs[type];
+ if (!prio->flow_table) {
+ prio = _get_prio(ns, prio, priority,
+ dev->num_ports * MAX_OPFC_RULES, 1, 0);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto free;
+ }
+ }
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter_id = mlx5_fc_id(opfc->fc);
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ for (i -= 1; i >= 0; i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+free:
+ kfree(spec);
+ return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ int i;
+
+ for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, &dev->flow_db->opfcs[type], true);
+ }
+}
+
static void set_underlay_qp(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec,
u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e462e368c353..e636e954f6bf 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -263,6 +263,14 @@ struct mlx5_ib_pp {
struct mlx5_core_dev *mdev;
};
+enum mlx5_ib_optional_counter_type {
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+
+ MLX5_IB_OPCOUNTER_MAX,
+};
+
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
@@ -271,6 +279,7 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio fdb;
struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
@@ -804,15 +813,32 @@ struct mlx5_ib_resources {
struct mlx5_ib_port_resources ports[2];
};
+#define MAX_OPFC_RULES 2
+
+struct mlx5_ib_op_fc {
+ struct mlx5_fc *fc;
+ struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
+};
+
struct mlx5_ib_counters {
- const char **names;
+ struct rdma_stat_desc *descs;
size_t *offsets;
u32 num_q_counters;
u32 num_cong_counters;
u32 num_ext_ppcnt_counters;
+ u32 num_op_counters;
u16 set_id;
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
};
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d2044df30394..157d862fb864 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -605,29 +605,21 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
/* Return a MR already available in the cache */
static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
{
- struct mlx5_ib_dev *dev = req_ent->dev;
struct mlx5_ib_mr *mr = NULL;
struct mlx5_cache_ent *ent = req_ent;
- /* Try larger MR pools from the cache to satisfy the allocation */
- for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
- mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
- ent - dev->cache.ent);
-
- spin_lock_irq(&ent->lock);
- if (!list_empty(&ent->head)) {
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
- list);
- list_del(&mr->list);
- ent->available_mrs--;
- queue_adjust_cache_locked(ent);
- spin_unlock_irq(&ent->lock);
- mlx5_clear_mr(mr);
- return mr;
- }
+ spin_lock_irq(&ent->lock);
+ if (!list_empty(&ent->head)) {
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ list_del(&mr->list);
+ ent->available_mrs--;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
+ mlx5_clear_mr(mr);
+ return mr;
}
+ queue_adjust_cache_locked(ent);
+ spin_unlock_irq(&ent->lock);
req_ent->miss++;
return NULL;
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b1e2725f4586..91eb615b89ee 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1691,20 +1691,26 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
xa_lock(&dev->odp_mkeys);
mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
- if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR)
+ if (!mmkey || mmkey->key != lkey) {
+ mr = ERR_PTR(-ENOENT);
goto end;
+ }
+ if (mmkey->type != MLX5_MKEY_MR) {
+ mr = ERR_PTR(-EINVAL);
+ goto end;
+ }
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
if (mr->ibmr.pd != pd) {
- mr = NULL;
+ mr = ERR_PTR(-EPERM);
goto end;
}
/* prefetch with write-access must be supported by the MR */
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
!mr->umem->writable) {
- mr = NULL;
+ mr = ERR_PTR(-EPERM);
goto end;
}
@@ -1736,7 +1742,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
destroy_prefetch_work(work);
}
-static bool init_prefetch_work(struct ib_pd *pd,
+static int init_prefetch_work(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 pf_flags, struct prefetch_mr_work *work,
struct ib_sge *sg_list, u32 num_sge)
@@ -1747,17 +1753,19 @@ static bool init_prefetch_work(struct ib_pd *pd,
work->pf_flags = pf_flags;
for (i = 0; i < num_sge; ++i) {
- work->frags[i].io_virt = sg_list[i].addr;
- work->frags[i].length = sg_list[i].length;
- work->frags[i].mr =
- get_prefetchable_mr(pd, advice, sg_list[i].lkey);
- if (!work->frags[i].mr) {
+ struct mlx5_ib_mr *mr;
+
+ mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+ if (IS_ERR(mr)) {
work->num_sge = i;
- return false;
+ return PTR_ERR(mr);
}
+ work->frags[i].io_virt = sg_list[i].addr;
+ work->frags[i].length = sg_list[i].length;
+ work->frags[i].mr = mr;
}
work->num_sge = num_sge;
- return true;
+ return 0;
}
static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
@@ -1773,8 +1781,8 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
struct mlx5_ib_mr *mr;
mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
- if (!mr)
- return -ENOENT;
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
&bytes_mapped, pf_flags);
if (ret < 0) {
@@ -1794,6 +1802,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
u32 pf_flags = 0;
struct prefetch_mr_work *work;
+ int rc;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
@@ -1809,9 +1818,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
if (!work)
return -ENOMEM;
- if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
+ rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge);
+ if (rc) {
destroy_prefetch_work(work);
- return -EINVAL;
+ return rc;
}
queue_work(system_unbound_wq, &work->work);
return 0;