diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-03 08:05:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-03 08:05:59 -0700 |
commit | 25edbc383b72c2364c7b339245c1c5db84e615e1 (patch) | |
tree | 6f1924c768c6926a51bc53125612fe6141222da6 /drivers/infiniband/hw/mlx5 | |
parent | ff0700f03609b9f0defacd4ce96d9519d721e0a2 (diff) | |
parent | f1a090f09f42be5a5542009f0be310fdb3e768fc (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A typical collection of patches this cycle, mostly fixing with a few
new features:
- Fixes from static tools. clang warnings, dead code, unused
variable, coccinelle sweeps, etc
- Driver bug fixes and minor improvements in rxe, bnxt_re, hfi1,
mlx5, irdma, qedr
- rtrs ULP bug fixes an improvments
- Additional counters for bnxt_re
- Support verbs CQ notifications in EFA
- Continued reworking and fixing of rxe
- netlink control to enable/disable optional device counters
- rxe now can use AH objects for its UD path, fixing various bugs in
the process
- Add DMABUF support to EFA"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (103 commits)
RDMA/core: Require the driver to set the IOVA correctly during rereg_mr
RDMA/bnxt_re: Remove unsupported bnxt_re_modify_ah callback
RDMA/irdma: optimize rx path by removing unnecessary copy
RDMA/qed: Use helper function to set GUIDs
RDMA/hns: Use the core code to manage the fixed mmap entries
IB/opa_vnic: Rebranding of OPA VNIC driver to Cornelis Networks
IB/qib: Rebranding of qib driver to Cornelis Networks
IB/hfi1: Rebranding of hfi1 driver to Cornelis Networks
RDMA/bnxt_re: Use helper function to set GUIDs
RDMA/bnxt_re: Fix kernel panic when trying to access bnxt_re_stat_descs
RDMA/qedr: Fix NULL deref for query_qp on the GSI QP
RDMA/hns: Modify the value of MAX_LP_MSG_LEN to meet hardware compatibility
RDMA/hns: Fix initial arm_st of CQ
RDMA/rxe: Make rxe_type_info static const
RDMA/rxe: Use 'bitmap_zalloc()' when applicable
RDMA/rxe: Save a few bytes from struct rxe_pool
RDMA/irdma: Remove the unused variable local_qp
RDMA/core: Fix missed initialization of rdma_hw_stats::lock
RDMA/efa: Add support for dmabuf memory regions
RDMA/umem: Allow pinned dmabuf umem usage
...
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/counters.c | 283 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/fs.c | 187 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 28 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 26 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 40 |
5 files changed, 493 insertions, 71 deletions
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 224ba36f2946..945758f39523 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -12,6 +12,7 @@ struct mlx5_ib_counter { const char *name; size_t offset; + u32 type; }; #define INIT_Q_COUNTER(_name) \ @@ -75,6 +76,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), }; +#define INIT_OP_COUNTER(_name, _type) \ + { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type} + +static const struct mlx5_ib_counter basic_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS), +}; + +static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS), +}; + +static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), +}; + static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, struct uverbs_attr_bundle *attrs) @@ -161,17 +177,34 @@ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num) return cnts->set_id; } +static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts) +{ + struct rdma_hw_stats *stats; + u32 num_hw_counters; + int i; + + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + stats = rdma_alloc_hw_stats_struct(cnts->descs, + num_hw_counters + + cnts->num_op_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + if (!stats) + return NULL; + + for (i = 0; i < cnts->num_op_counters; i++) + set_bit(num_hw_counters + i, stats->is_disabled); + + return stats; +} + static struct rdma_hw_stats * mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static struct rdma_hw_stats * @@ -180,11 +213,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, @@ -241,9 +270,9 @@ free: return ret; } -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u32 port_num, int index) +static int do_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); @@ -295,6 +324,88 @@ done: return num_counters; } +static int do_get_op_stat(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + const struct mlx5_ib_op_fc *opfcs; + u64 packets = 0, bytes; + u32 type; + int ret; + + cnts = get_counters(dev, port_num - 1); + opfcs = cnts->opfcs; + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + if (!opfcs[type].fc) + goto out; + + ret = mlx5_fc_query(dev->mdev, opfcs[type].fc, + &packets, &bytes); + if (ret) + return ret; + +out: + stats->value[index] = packets; + return index; +} + +static int do_get_op_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + int index, ret, num_hw_counters; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + for (index = num_hw_counters; + index < (num_hw_counters + cnts->num_op_counters); index++) { + ret = do_get_op_stat(ibdev, stats, port_num, index); + if (ret != index) + return ret; + } + + return cnts->num_op_counters; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + int num_counters, num_hw_counters, num_op_counters; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + num_counters = num_hw_counters + cnts->num_op_counters; + + if (index < 0 || index > num_counters) + return -EINVAL; + else if (index > 0 && index < num_hw_counters) + return do_get_hw_stats(ibdev, stats, port_num, index); + else if (index >= num_hw_counters && index < num_counters) + return do_get_op_stat(ibdev, stats, port_num, index); + + num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index); + if (num_hw_counters < 0) + return num_hw_counters; + + num_op_counters = do_get_op_stats(ibdev, stats, port_num); + if (num_op_counters < 0) + return num_op_counters; + + return num_hw_counters + num_op_counters; +} + static struct rdma_hw_stats * mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) { @@ -302,11 +413,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port - 1); - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) @@ -371,67 +478,89 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) return mlx5_ib_qp_set_counter(qp, NULL); } - static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) + struct rdma_stat_desc *descs, size_t *offsets) { int i; int j = 0; for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; + descs[j].name = basic_q_cnts[i].name; offsets[j] = basic_q_cnts[i].offset; } if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; + descs[j].name = out_of_seq_q_cnts[i].name; offsets[j] = out_of_seq_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; + descs[j].name = retrans_q_cnts[i].name; offsets[j] = retrans_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; + descs[j].name = extended_err_cnts[i].name; offsets[j] = extended_err_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; + descs[j].name = roce_accl_cnts[i].name; offsets[j] = roce_accl_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; + descs[j].name = cong_cnts[i].name; offsets[j] = cong_cnts[i].offset; } } if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; + descs[j].name = ext_ppcnt_cnts[i].name; offsets[j] = ext_ppcnt_cnts[i].offset; } } + + for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { + descs[j].name = basic_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &basic_op_cnts[i].type; + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmarx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmarx_cnp_op_cnts[i].type; + } + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmatx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmatx_cnp_op_cnts[i].type; + } + } } static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, struct mlx5_ib_counters *cnts) { - u32 num_counters; + u32 num_counters, num_op_counters; num_counters = ARRAY_SIZE(basic_q_cnts); @@ -457,20 +586,34 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } - cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); - if (!cnts->names) + + num_op_counters = ARRAY_SIZE(basic_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); + + cnts->num_op_counters = num_op_counters; + num_counters += num_op_counters; + cnts->descs = kcalloc(num_counters, + sizeof(struct rdma_stat_desc), GFP_KERNEL); + if (!cnts->descs) return -ENOMEM; cnts->offsets = kcalloc(num_counters, sizeof(*cnts->offsets), GFP_KERNEL); if (!cnts->offsets) - goto err_names; + goto err; return 0; -err_names: - kfree(cnts->names); - cnts->names = NULL; +err: + kfree(cnts->descs); + cnts->descs = NULL; return -ENOMEM; } @@ -478,7 +621,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; int num_cnt_ports; - int i; + int i, j; num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; @@ -491,8 +634,20 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) dev->port[i].cnts.set_id); mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); } - kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.descs); kfree(dev->port[i].cnts.offsets); + + for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) { + if (!dev->port[i].cnts.opfcs[j].fc) + continue; + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + mlx5_ib_fs_remove_op_fc(dev, + &dev->port[i].cnts.opfcs[j], j); + mlx5_fc_destroy(dev->mdev, + dev->port[i].cnts.opfcs[j].fc); + dev->port[i].cnts.opfcs[j].fc = NULL; + } } } @@ -514,7 +669,7 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) if (err) goto err_alloc; - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, + mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, dev->port[i].cnts.offsets); MLX5_SET(alloc_q_counter_in, in, uid, @@ -672,6 +827,56 @@ void mlx5_ib_counters_clear_description(struct ib_counters *counters) mutex_unlock(&mcounters->mcntrs_mutex); } +static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, + unsigned int index, bool enable) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_counters *cnts; + struct mlx5_ib_op_fc *opfc; + u32 num_hw_counters, type; + int ret; + + cnts = &dev->port[port - 1].cnts; + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + if (index < num_hw_counters || + index >= (num_hw_counters + cnts->num_op_counters)) + return -EINVAL; + + if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) + return -EINVAL; + + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + opfc = &cnts->opfcs[type]; + + if (enable) { + if (opfc->fc) + return -EEXIST; + + opfc->fc = mlx5_fc_create(dev->mdev, false); + if (IS_ERR(opfc->fc)) + return PTR_ERR(opfc->fc); + + ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type); + if (ret) { + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + } + return ret; + } + + if (!opfc->fc) + return -EINVAL; + + mlx5_ib_fs_remove_op_fc(dev, opfc, type); + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + return 0; +} + static const struct ib_device_ops hw_stats_ops = { .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, .get_hw_stats = mlx5_ib_get_hw_stats, @@ -680,6 +885,8 @@ static const struct ib_device_ops hw_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, + .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? + mlx5_ib_modify_stat : NULL, }; static const struct ib_device_ops hw_switchdev_stats_ops = { diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 5fbc0a8454b9..b780185d9dc6 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -10,12 +10,14 @@ #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_cmds.h> #include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/ib_hdrs.h> #include <rdma/ib_umem.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <linux/mlx5/eswitch.h> +#include <net/inet_ecn.h> #include "mlx5_ib.h" #include "counters.h" #include "devx.h" @@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return prio; } +enum { + RDMA_RX_ECN_OPCOUNTER_PRIO, + RDMA_RX_CNP_OPCOUNTER_PRIO, +}; + +enum { + RDMA_TX_CNP_OPCOUNTER_PRIO, +}; + +static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.source_vhca_port) || + !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + ft_field_support.source_vhca_port)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria, + misc_parameters.source_vhca_port); + MLX5_SET(fte_match_param, &spec->match_value, + misc_parameters.source_vhca_port, port_num); + + return 0; +} + +static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec, int ipv) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.outer_ip_version)) + return -EOPNOTSUPP; + + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_ecn); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn, + INET_ECN_CE); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + ipv); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.bth_opcode); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode, + IB_BTH_OPCODE_CNP); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_flow_namespace_type fn_type; + int priority, i, err, spec_num; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dst; + struct mlx5_flow_namespace *ns; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_spec *spec; + + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + if (set_ecn_ce_spec(dev, port_num, &spec[0], + MLX5_FS_IPV4_VERSION) || + set_ecn_ce_spec(dev, port_num, &spec[1], + MLX5_FS_IPV6_VERSION)) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 2; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_ECN_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_CNP_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_CNP_OPCOUNTER_PRIO; + break; + + default: + err = -EOPNOTSUPP; + goto free; + } + + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + if (!ns) { + err = -EOPNOTSUPP; + goto free; + } + + prio = &dev->flow_db->opfcs[type]; + if (!prio->flow_table) { + prio = _get_prio(ns, prio, priority, + dev->num_ports * MAX_OPFC_RULES, 1, 0); + if (IS_ERR(prio)) { + err = PTR_ERR(prio); + goto free; + } + } + + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst.counter_id = mlx5_fc_id(opfc->fc); + + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + for (i = 0; i < spec_num; i++) { + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], + &flow_act, &dst, 1); + if (IS_ERR(opfc->rule[i])) { + err = PTR_ERR(opfc->rule[i]); + goto del_rules; + } + } + prio->refcount += spec_num; + kfree(spec); + + return 0; + +del_rules: + for (i -= 1; i >= 0; i--) + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, prio, false); +free: + kfree(spec); + return err; +} + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + int i; + + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, &dev->flow_db->opfcs[type], true); + } +} + static void set_underlay_qp(struct mlx5_ib_dev *dev, struct mlx5_flow_spec *spec, u32 underlay_qpn) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e462e368c353..e636e954f6bf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -263,6 +263,14 @@ struct mlx5_ib_pp { struct mlx5_core_dev *mdev; }; +enum mlx5_ib_optional_counter_type { + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, + + MLX5_IB_OPCOUNTER_MAX, +}; + struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; @@ -271,6 +279,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio fdb; struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -804,15 +813,32 @@ struct mlx5_ib_resources { struct mlx5_ib_port_resources ports[2]; }; +#define MAX_OPFC_RULES 2 + +struct mlx5_ib_op_fc { + struct mlx5_fc *fc; + struct mlx5_flow_handle *rule[MAX_OPFC_RULES]; +}; + struct mlx5_ib_counters { - const char **names; + struct rdma_stat_desc *descs; size_t *offsets; u32 num_q_counters; u32 num_cong_counters; u32 num_ext_ppcnt_counters; + u32 num_op_counters; u16 set_id; + struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX]; }; +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + struct mlx5_ib_multiport_info; struct mlx5_ib_multiport { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d2044df30394..157d862fb864 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -605,29 +605,21 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, /* Return a MR already available in the cache */ static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent = req_ent; - /* Try larger MR pools from the cache to satisfy the allocation */ - for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { - mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, - ent - dev->cache.ent); - - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; - } + spin_lock_irq(&ent->lock); + if (!list_empty(&ent->head)) { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); + mlx5_clear_mr(mr); + return mr; } + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); req_ent->miss++; return NULL; } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b1e2725f4586..91eb615b89ee 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1691,20 +1691,26 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); - if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + if (!mmkey || mmkey->key != lkey) { + mr = ERR_PTR(-ENOENT); goto end; + } + if (mmkey->type != MLX5_MKEY_MR) { + mr = ERR_PTR(-EINVAL); + goto end; + } mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); if (mr->ibmr.pd != pd) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && !mr->umem->writable) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } @@ -1736,7 +1742,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) destroy_prefetch_work(work); } -static bool init_prefetch_work(struct ib_pd *pd, +static int init_prefetch_work(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 pf_flags, struct prefetch_mr_work *work, struct ib_sge *sg_list, u32 num_sge) @@ -1747,17 +1753,19 @@ static bool init_prefetch_work(struct ib_pd *pd, work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - work->frags[i].io_virt = sg_list[i].addr; - work->frags[i].length = sg_list[i].length; - work->frags[i].mr = - get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!work->frags[i].mr) { + struct mlx5_ib_mr *mr; + + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (IS_ERR(mr)) { work->num_sge = i; - return false; + return PTR_ERR(mr); } + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = mr; } work->num_sge = num_sge; - return true; + return 0; } static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, @@ -1773,8 +1781,8 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) - return -ENOENT; + if (IS_ERR(mr)) + return PTR_ERR(mr); ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); if (ret < 0) { @@ -1794,6 +1802,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { u32 pf_flags = 0; struct prefetch_mr_work *work; + int rc; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1809,9 +1818,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge); + if (rc) { destroy_prefetch_work(work); - return -EINVAL; + return rc; } queue_work(system_unbound_wq, &work->work); return 0; |