diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
115 files changed, 6765 insertions, 2235 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 33ba0a5c38ac..edcc6f662618 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -236,7 +236,7 @@ static void dump_err_buf(struct mlx4_dev *dev) static void poll_catas(struct timer_list *t) { - struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer); + struct mlx4_priv *priv = timer_container_of(priv, t, catas_err.timer); struct mlx4_dev *dev = &priv->dev; u32 slave_read; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index cd754cd76bde..2aeaafcfb993 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -38,7 +38,7 @@ /* mlx4_en_read_clock - read raw cycle counter (to be used by time counter) */ -static u64 mlx4_en_read_clock(const struct cyclecounter *tc) +static u64 mlx4_en_read_clock(struct cyclecounter *tc) { struct mlx4_en_dev *mdev = container_of(tc, struct mlx4_en_dev, cycles); @@ -249,7 +249,7 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; - u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles = freq_khz * 1000ULL * MLX4_EN_WRAP_AROUND_SEC; u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 752a72499b4f..be80da03a594 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -290,9 +290,6 @@ static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); struct ieee_ets *my_ets = &priv->ets; - if (!my_ets) - return -EINVAL; - ets->ets_cap = IEEE_8021QAZ_MAX_TCS; ets->cbs = my_ets->cbs; memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index cd17a3f4faf8..a68cd3f0304c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1897,6 +1897,7 @@ static int mlx4_en_get_ts_info(struct net_device *dev, if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 281b34af0bb4..d2071aff7b8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2670,8 +2670,7 @@ static int mlx4_udp_tunnel_sync(struct net_device *dev, unsigned int table) static const struct udp_tunnel_nic_info mlx4_udp_tunnels = { .sync_table = mlx4_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, }, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b33285d755b9..92a16ddb7d86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -460,9 +460,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, truesize += frag_info->frag_stride; if (frag_info->frag_stride == PAGE_SIZE / 2) { + struct netmem_desc *desc = pp_page_to_nmdesc(page); + frags->page_offset ^= PAGE_SIZE / 2; release = page_count(page) != 1 || - atomic_long_read(&page->pp_ref_count) != 1 || + atomic_long_read(&desc->pp_ref_count) != 1 || page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } else if (!priv->rx_headroom) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index febeadfdd5a5..03d2fc7d9b09 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -49,6 +49,8 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> +#include <rdma/ib_verbs.h> + #include "mlx4.h" #include "fw.h" #include "icm.h" @@ -1246,14 +1248,6 @@ err_out: return err ? err : count; } -enum ibta_mtu { - IB_MTU_256 = 1, - IB_MTU_512 = 2, - IB_MTU_1024 = 3, - IB_MTU_2048 = 4, - IB_MTU_4096 = 5 -}; - static inline int int_to_ibta_mtu(int mtu) { switch (mtu) { @@ -1266,7 +1260,7 @@ static inline int int_to_ibta_mtu(int mtu) } } -static inline int ibta_mtu_to_int(enum ibta_mtu mtu) +static inline int ibta_mtu_to_int(enum ib_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index d7444782bfdd..698a5d1f0d7e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -106,7 +106,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *), + buddy->bits = kcalloc(buddy->max_order + 1, sizeof(*buddy->bits), GFP_KERNEL); buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 568bbe5f83f5..650df18a9216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -29,7 +29,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \ - lib/crypto.o lib/sd.o + lib/crypto.o lib/sd.o en/pcie_cong_event.o # # Netdev extra @@ -154,7 +154,8 @@ mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/cmd.o \ steering/hws/vport.o \ steering/hws/bwc_complex.o \ steering/hws/fs_hws_pools.o \ - steering/hws/fs_hws.o + steering/hws/fs_hws.o \ + steering/hws/action_ste_pool.o # # SF device diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e53dbdc0a7a1..e395ef5f356e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -927,8 +927,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force static void cb_timeout_handler(struct work_struct *work) { - struct delayed_work *dwork = container_of(work, struct delayed_work, - work); + struct delayed_work *dwork = to_delayed_work(work); struct mlx5_cmd_work_ent *ent = container_of(dwork, struct mlx5_cmd_work_ent, cb_timeout_work); @@ -1948,8 +1947,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, pages_queue, token, force_polling); - if (callback) - return err; + if (callback && !err) + return 0; if (err > 0) /* Failed in FW, command didn't execute */ err = deliv_status_to_err(err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 73cd74644378..3ffa3fbacd16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -35,6 +35,55 @@ static u16 mlx5_fw_ver_subminor(u32 version) return version & 0xffff; } +static int mlx5_devlink_serial_numbers_put(struct mlx5_core_dev *dev, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct pci_dev *pdev = dev->pdev; + unsigned int vpd_size, kw_len; + char *str, *end; + u8 *vpd_data; + int err = 0; + int start; + + vpd_data = pci_vpd_alloc(pdev, &vpd_size); + if (IS_ERR(vpd_data)) + return 0; + + start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, + PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len); + if (start >= 0) { + str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL); + if (!str) { + err = -ENOMEM; + goto end; + } + end = strchrnul(str, ' '); + *end = '\0'; + err = devlink_info_board_serial_number_put(req, str); + kfree(str); + if (err) + goto end; + } + + start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len); + if (start >= 0) { + str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL); + if (!str) { + err = -ENOMEM; + goto end; + } + err = devlink_info_serial_number_put(req, str); + kfree(str); + if (err) + goto end; + } + +end: + kfree(vpd_data); + return err; +} + #define DEVLINK_FW_STRING_LEN 32 static int @@ -49,6 +98,10 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, if (!mlx5_core_is_pf(dev)) return 0; + err = mlx5_devlink_serial_numbers_put(dev, req, extack); + if (err) + return err; + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; @@ -323,6 +376,8 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, + .rate_leaf_tc_bw_set = mlx5_esw_devlink_rate_leaf_tc_bw_set, + .rate_node_tc_bw_set = mlx5_esw_devlink_rate_node_tc_bw_set, .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, .rate_node_new = mlx5_esw_devlink_rate_node_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 32ed4963b8ad..0dd3bc0f4caa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -84,9 +84,10 @@ struct page_pool; #define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9) #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) #define MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE (PAGE_SHIFT - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) -#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64) -#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024) -#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096) +#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT (6) +#define MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT (12) +#define MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE (16) +#define MLX5E_SHAMPO_WQ_RESRV_SIZE BIT(MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE) #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ @@ -278,10 +279,6 @@ enum packet_merge { struct mlx5e_packet_merge_param { enum packet_merge type; u32 timeout; - struct { - u8 match_criteria_type; - u8 alignment_granularity; - } shampo; }; struct mlx5e_params { @@ -378,7 +375,7 @@ struct mlx5e_sq_dma { enum mlx5e_dma_map_type type; }; -/* Keep this enum consistent with with the corresponding strings array +/* Keep this enum consistent with the corresponding strings array * declared in en/reporter_tx.c */ enum { @@ -387,7 +384,6 @@ enum { MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_DIM, - MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, MLX5E_NUM_SQ_STATES, /* Must be kept last */ @@ -520,6 +516,12 @@ struct mlx5e_xdpsq { struct mlx5e_channel *channel; } ____cacheline_aligned_in_smp; +struct mlx5e_xdp_buff { + struct xdp_buff xdp; + struct mlx5_cqe64 *cqe; + struct mlx5e_rq *rq; +}; + struct mlx5e_ktls_resync_resp; struct mlx5e_icosq { @@ -551,7 +553,7 @@ struct mlx5e_icosq { } ____cacheline_aligned_in_smp; struct mlx5e_frag_page { - struct page *page; + netmem_ref netmem; u16 frags; }; @@ -628,15 +630,13 @@ struct mlx5e_dma_info { }; struct mlx5e_shampo_hd { - u32 mkey; struct mlx5e_frag_page *pages; u32 hd_per_wq; u16 hd_per_wqe; - u16 pages_per_wq; unsigned long *bitmap; u16 pi; u16 ci; - __be32 key; + __be32 mkey_be; }; struct mlx5e_hw_gro_data { @@ -715,12 +715,18 @@ struct mlx5e_rq { struct bpf_prog __rcu *xdp_prog; struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); + + /* page pools */ struct page_pool *page_pool; + struct page_pool *hd_page_pool; + + struct mlx5e_xdp_buff mxbuf; /* AF_XDP zero-copy */ struct xsk_buff_pool *xsk_pool; struct work_struct recover_work; + struct work_struct rx_timeout_work; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -915,6 +921,8 @@ struct mlx5e_priv { struct notifier_block events_nb; struct notifier_block blocking_events_nb; + struct mlx5e_pcie_cong_event *cong_event; + struct udp_tunnel_nic_info nic_info; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index b5c3a2a9d2a5..9560fcba643f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -18,7 +18,8 @@ enum { enum { MLX5E_TC_PRIO = 0, - MLX5E_NIC_PRIO + MLX5E_PROMISC_PRIO, + MLX5E_NIC_PRIO, }; struct mlx5e_flow_table { @@ -68,9 +69,13 @@ struct mlx5e_l2_table { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -/* NIC prio FTS */ +/* NIC promisc FT level */ enum { MLX5E_PROMISC_FT_LEVEL, +}; + +/* NIC prio FTS */ +enum { MLX5E_VLAN_FT_LEVEL, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h index 9e276fd3c0cf..c21fe36527a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h @@ -11,6 +11,11 @@ int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool); void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool); void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs); void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs); +int mlx5e_ethtool_set_rxfh_fields(struct mlx5e_priv *priv, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack); +int mlx5e_ethtool_get_rxfh_fields(struct mlx5e_priv *priv, + struct ethtool_rxfh_fields *nfc); int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs); @@ -20,6 +25,15 @@ static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { } static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { } static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { } +static inline int +mlx5e_ethtool_set_rxfh_fields(struct mlx5e_priv *priv, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack) +{ return -EOPNOTSUPP; } +static inline int +mlx5e_ethtool_get_rxfh_fields(struct mlx5e_priv *priv, + struct ethtool_rxfh_fields *nfc) +{ return -EOPNOTSUPP; } static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) { return -EOPNOTSUPP; } static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 81523825faa2..cb972b2d46e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -114,6 +114,7 @@ int mlx5e_health_recover_channels(struct mlx5e_priv *priv) int err = 0; rtnl_lock(); + netdev_lock(priv->netdev); mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) @@ -123,6 +124,7 @@ int mlx5e_health_recover_channels(struct mlx5e_priv *priv) out: mutex_unlock(&priv->state_lock); + netdev_unlock(priv->netdev); rtnl_unlock(); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 58ec5e44aa7a..3cca06a74cf9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -414,25 +414,10 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, return params->log_rq_mtu_frames - log_pkts_per_wqe; } -u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) +static u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5e_params *params) { - return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE)); -} - -u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE); -} - -u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * - MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE; - - return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu)); + return order_base_2(DIV_ROUND_UP(MLX5E_SHAMPO_WQ_RESRV_SIZE, + params->sw_mtu)); } u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, @@ -834,13 +819,12 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * - MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE; u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); - int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params)); int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); int wqe_size = BIT(log_stride_sz) * num_strides; + int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE; /* +1 is for the case that the pkt_per_rsrv dont consume the reservation * so we get a filler cqe for the rest of the reservation. @@ -901,6 +885,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 lro_timeout; int ndsegs = 1; int err; @@ -926,22 +911,27 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_wqe_stride_size, log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); - if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { - MLX5_SET(wq, wq, shampo_enable, true); - MLX5_SET(wq, wq, log_reservation_size, - mlx5e_shampo_get_log_rsrv_size(mdev, params)); - MLX5_SET(wq, wq, - log_max_num_of_packets_per_reservation, - mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); - MLX5_SET(wq, wq, log_headers_entry_size, - mlx5e_shampo_get_log_hd_entry_size(mdev, params)); - MLX5_SET(rqc, rqc, reservation_timeout, - mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); - MLX5_SET(rqc, rqc, shampo_match_criteria_type, - params->packet_merge.shampo.match_criteria_type); - MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, - params->packet_merge.shampo.alignment_granularity); - } + if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) + break; + + MLX5_SET(wq, wq, shampo_enable, true); + MLX5_SET(wq, wq, log_reservation_size, + MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE - + MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT); + MLX5_SET(wq, wq, + log_max_num_of_packets_per_reservation, + mlx5e_shampo_get_log_pkt_per_rsrv(params)); + MLX5_SET(wq, wq, log_headers_entry_size, + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE - + MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT); + lro_timeout = + mlx5e_choose_lro_timeout(mdev, + MLX5E_DEFAULT_SHAMPO_TIMEOUT); + MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout); + MLX5_SET(rqc, rqc, shampo_match_criteria_type, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED); + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE); break; } default: /* MLX5_WQ_TYPE_CYCLIC */ @@ -1044,18 +1034,17 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param) { - int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * - MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE; u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL)); - int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL); + int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params)); int wqe_size = BIT(log_stride_sz) * num_strides; + int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE; u32 hd_per_wqe; /* Assumption: hd_per_wqe % 8 == 0. */ - hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv; - mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n", - __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv); + hd_per_wqe = (wqe_size / rsrv_size) * pkt_per_rsrv; + mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_rsrv = %d\n", + __func__, hd_per_wqe, rsrv_size, wqe_size, pkt_per_rsrv); return hd_per_wqe; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index bd5877acc5b1..488ccdbc1e2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -95,12 +95,6 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); -u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); -u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c new file mode 100644 index 000000000000..0ed017569a19 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. + +#include "en.h" +#include "pcie_cong_event.h" + +#define MLX5E_CONG_HIGH_STATE 0x7 + +enum { + MLX5E_INBOUND_CONG = BIT(0), + MLX5E_OUTBOUND_CONG = BIT(1), +}; + +struct mlx5e_pcie_cong_thresh { + u16 inbound_high; + u16 inbound_low; + u16 outbound_high; + u16 outbound_low; +}; + +struct mlx5e_pcie_cong_stats { + u32 pci_bw_inbound_high; + u32 pci_bw_inbound_low; + u32 pci_bw_outbound_high; + u32 pci_bw_outbound_low; +}; + +struct mlx5e_pcie_cong_event { + u64 obj_id; + + struct mlx5e_priv *priv; + + /* For event notifier and workqueue. */ + struct work_struct work; + struct mlx5_nb nb; + + /* Stores last read state. */ + u8 state; + + /* For ethtool stats group. */ + struct mlx5e_pcie_cong_stats stats; +}; + +/* In units of 0.01 % */ +static const struct mlx5e_pcie_cong_thresh default_thresh_config = { + .inbound_high = 9000, + .inbound_low = 7500, + .outbound_high = 9000, + .outbound_low = 7500, +}; + +static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_inbound_high) }, + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_inbound_low) }, + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_outbound_high) }, + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_outbound_low) }, +}; + +#define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong) +{ + return priv->cong_event ? NUM_PCIE_CONG_COUNTERS : 0; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong) +{ + if (!priv->cong_event) + return; + + for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) + ethtool_puts(data, mlx5e_pcie_cong_stats_desc[i].format); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong) +{ + if (!priv->cong_event) + return; + + for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) { + u32 ctr = MLX5E_READ_CTR32_CPU(&priv->cong_event->stats, + mlx5e_pcie_cong_stats_desc, + i); + + mlx5e_ethtool_put_stat(data, ctr); + } +} + +MLX5E_DEFINE_STATS_GRP(pcie_cong, 0); + +static int +mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev, + const struct mlx5e_pcie_cong_thresh *config, + u64 *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *cong_obj; + void *hdr; + int err; + + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); + cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); + + MLX5_SET(pcie_cong_event_obj, cong_obj, inbound_event_en, 1); + MLX5_SET(pcie_cong_event_obj, cong_obj, outbound_event_en, 1); + + MLX5_SET(pcie_cong_event_obj, cong_obj, + inbound_cong_high_threshold, config->inbound_high); + MLX5_SET(pcie_cong_event_obj, cong_obj, + inbound_cong_low_threshold, config->inbound_low); + + MLX5_SET(pcie_cong_event_obj, cong_obj, + outbound_cong_high_threshold, config->outbound_high); + MLX5_SET(pcie_cong_event_obj, cong_obj, + outbound_cong_low_threshold, config->outbound_low); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n", + *obj_id, + config->inbound_high, config->inbound_low, + config->outbound_high, config->outbound_low); + + return 0; +} + +static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev, + u64 obj_id) +{ + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *hdr; + + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev, + u64 obj_id, + u32 *state) +{ + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; + u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)]; + void *obj; + void *hdr; + u8 cong; + int err; + + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, + MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj); + + if (state) { + cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state); + if (cong == MLX5E_CONG_HIGH_STATE) + *state |= MLX5E_INBOUND_CONG; + + cong = MLX5_GET(pcie_cong_event_obj, obj, outbound_cong_state); + if (cong == MLX5E_CONG_HIGH_STATE) + *state |= MLX5E_OUTBOUND_CONG; + } + + return 0; +} + +static void mlx5e_pcie_cong_event_work(struct work_struct *work) +{ + struct mlx5e_pcie_cong_event *cong_event; + struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; + u32 new_cong_state = 0; + u32 changes; + int err; + + cong_event = container_of(work, struct mlx5e_pcie_cong_event, work); + priv = cong_event->priv; + dev = priv->mdev; + + err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id, + &new_cong_state); + if (err) { + mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n", + err, cong_event->obj_id); + return; + } + + changes = cong_event->state ^ new_cong_state; + if (!changes) + return; + + cong_event->state = new_cong_state; + + if (changes & MLX5E_INBOUND_CONG) { + if (new_cong_state & MLX5E_INBOUND_CONG) + cong_event->stats.pci_bw_inbound_high++; + else + cong_event->stats.pci_bw_inbound_low++; + } + + if (changes & MLX5E_OUTBOUND_CONG) { + if (new_cong_state & MLX5E_OUTBOUND_CONG) + cong_event->stats.pci_bw_outbound_high++; + else + cong_event->stats.pci_bw_outbound_low++; + } +} + +static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb, + unsigned long event, void *eqe) +{ + struct mlx5e_pcie_cong_event *cong_event; + + cong_event = mlx5_nb_cof(nb, struct mlx5e_pcie_cong_event, nb); + queue_work(cong_event->priv->wq, &cong_event->work); + + return NOTIFY_OK; +} + +int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_cong_event *cong_event; + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!mlx5_pcie_cong_event_supported(mdev)) + return 0; + + cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL, + mdev->priv.numa_node); + if (!cong_event) + return -ENOMEM; + + INIT_WORK(&cong_event->work, mlx5e_pcie_cong_event_work); + MLX5_NB_INIT(&cong_event->nb, mlx5e_pcie_cong_event_handler, + OBJECT_CHANGE); + + cong_event->priv = priv; + + err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config, + &cong_event->obj_id); + if (err) { + mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n"); + goto err_free; + } + + err = mlx5_eq_notifier_register(mdev, &cong_event->nb); + if (err) { + mlx5_core_warn(mdev, "Error registering notifier for the PCIe congestion event\n"); + goto err_obj_destroy; + } + + priv->cong_event = cong_event; + + return 0; + +err_obj_destroy: + mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id); +err_free: + kvfree(cong_event); + + return err; +} + +void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_cong_event *cong_event = priv->cong_event; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!cong_event) + return; + + priv->cong_event = NULL; + + mlx5_eq_notifier_unregister(mdev, &cong_event->nb); + cancel_work_sync(&cong_event->work); + + if (mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id)) + mlx5_core_warn(mdev, "Error destroying PCIe congestion event (obj_id=%llu)\n", + cong_event->obj_id); + + kvfree(cong_event); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h new file mode 100644 index 000000000000..b1ea46bf648a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_PCIE_CONG_EVENT_H__ +#define __MLX5_PCIE_CONG_EVENT_H__ + +int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv); +void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv); + +#endif /* __MLX5_PCIE_CONG_EVENT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 8e25f4ef5ccc..5ae787656a7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -331,6 +331,9 @@ static int port_set_buffer(struct mlx5e_priv *priv, if (err) goto out; + /* RO bits should be set to 0 on write */ + MLX5_SET(pbmc_reg, in, port_buffer_size, 0); + err = mlx5e_port_set_pbmc(mdev, in); out: kfree(in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 131ed97ca997..391b4e9c9dc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -8,6 +8,7 @@ #include "en/fs_tt_redirect.h" #include <linux/list.h> #include <linux/spinlock.h> +#include <net/netdev_lock.h> struct mlx5e_ptp_fs { struct mlx5_flow_handle *l2_rule; @@ -339,8 +340,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, sq->stats = &c->priv->ptp_stats.sq[tc]; sq->ptpsq = ptpsq; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); - if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); sq->stop_room = param->stop_room; sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); @@ -449,8 +448,22 @@ static void mlx5e_ptpsq_unhealthy_work(struct work_struct *work) { struct mlx5e_ptpsq *ptpsq = container_of(work, struct mlx5e_ptpsq, report_unhealthy_work); + struct mlx5e_txqsq *sq = &ptpsq->txqsq; + + /* Recovering the PTP SQ means re-enabling NAPI, which requires the + * netdev instance lock. However, SQ closing has to wait for this work + * task to finish while also holding the same lock. So either get the + * lock or find that the SQ is no longer enabled and thus this work is + * not relevant anymore. + */ + while (!netdev_trylock(sq->netdev)) { + if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) + return; + msleep(20); + } mlx5e_reporter_tx_ptpsq_unhealthy(ptpsq); + netdev_unlock(sq->netdev); } static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, @@ -892,7 +905,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, if (err) goto err_free; - netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll); + netif_napi_add_locked(netdev, &c->napi, mlx5e_ptp_napi_poll); mlx5e_ptp_build_params(c, cparams, params); @@ -910,7 +923,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, return 0; err_napi_del: - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); err_free: kvfree(cparams); kvfree(c); @@ -920,7 +933,7 @@ err_free: void mlx5e_ptp_close(struct mlx5e_ptp *c) { mlx5e_ptp_close_queues(c); - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); kvfree(c); } @@ -929,7 +942,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) { int tc; - napi_enable(&c->napi); + napi_enable_locked(&c->napi); if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { for (tc = 0; tc < c->num_tc; tc++) @@ -957,7 +970,7 @@ void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c) mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); } - napi_disable(&c->napi); + napi_disable_locked(&c->napi); } int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index f0744a45db92..4e461cb03b83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -374,7 +374,7 @@ void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_que void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); if (!qdisc) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index e75759533ae0..16c44d628eda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -170,16 +170,23 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) static int mlx5e_rx_reporter_timeout_recover(void *ctx) { struct mlx5_eq_comp *eq; + struct mlx5e_priv *priv; struct mlx5e_rq *rq; int err; rq = ctx; + priv = rq->priv; + + mutex_lock(&priv->state_lock); + eq = rq->cq.mcq.eq; err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats); if (err && rq->icosq) clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); + mutex_unlock(&priv->state_lock); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index dbd9482359e1..85d5cb39b107 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -13,7 +13,6 @@ static const char * const sq_sw_state_type_name[] = { [MLX5E_SQ_STATE_RECOVERING] = "recovering", [MLX5E_SQ_STATE_IPSEC] = "ipsec", [MLX5E_SQ_STATE_DIM] = "dim", - [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", }; @@ -107,9 +106,7 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); - rtnl_lock(); mlx5e_activate_txqsq(sq); - rtnl_unlock(); if (sq->channel) mlx5e_trigger_napi_icosq(sq->channel); @@ -176,7 +173,6 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) priv = ptpsq->txqsq.priv; - rtnl_lock(); mutex_lock(&priv->state_lock); chs = &priv->channels; netdev = priv->netdev; @@ -196,7 +192,6 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) netif_carrier_on(netdev); mutex_unlock(&priv->state_lock); - rtnl_unlock(); return err; } @@ -316,6 +311,30 @@ out: mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static void +mlx5e_tx_reporter_diagnose_tis_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + u8 num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); + u32 tc, i, tisn; + + devlink_fmsg_arr_pair_nest_start(fmsg, "TIS Config"); + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) { + for (tc = 0; tc < num_tc; tc++) { + tisn = mlx5e_profile_get_tisn(priv->mdev, priv, + priv->profile, i, tc); + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_u32_pair_put(fmsg, "lag port", i); + devlink_fmsg_u32_pair_put(fmsg, "tc", tc); + devlink_fmsg_u32_pair_put(fmsg, "tisn", tisn); + devlink_fmsg_obj_nest_end(fmsg); + } + } + devlink_fmsg_arr_pair_nest_end(fmsg); +} + static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, struct netlink_ext_ack *extack) @@ -331,6 +350,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, goto unlock; mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); + mlx5e_tx_reporter_diagnose_tis_config(reporter, fmsg); devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); for (i = 0; i < priv->channels.num; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index 74cd111ee320..c68ba0e58fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -567,7 +567,8 @@ inner_tir: return final_err; } -int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) +void mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric) { if (indir) memcpy(indir, rss->indir.table, @@ -582,8 +583,6 @@ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bo if (symmetric) *symmetric = rss->hash.symmetric; - - return 0; } int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index 8ac902190010..c6c1b2847cf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -47,7 +47,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss); int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, struct mlx5e_packet_merge_param *pkt_merge_param); -int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric); +void mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric); int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, const u8 *key, const u8 *hfunc, const bool *symmetric, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 5fcbe47337b0..a2acbfee2b77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -71,17 +71,12 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, return 0; } -int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch) +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; struct mlx5e_rss *rss; - int i; - - for (i = 1; i < MLX5E_MAX_NUM_RSS; i++) - if (!res->rss[i]) - break; - if (i == MLX5E_MAX_NUM_RSS) + if (WARN_ON_ONCE(res->rss[rss_idx])) return -ENOSPC; rss = mlx5e_rss_init(res->mdev, inner_ft_support, res->drop_rqn, @@ -97,8 +92,7 @@ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int i mlx5e_rss_enable(rss, res->rss_rqns, vhca_ids, res->rss_nch); } - res->rss[i] = rss; - *rss_idx = i; + res->rss[rss_idx] = rss; return 0; } @@ -193,19 +187,17 @@ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int n mlx5e_rss_set_indir_uniform(res->rss[0], nch); } -int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) { - struct mlx5e_rss *rss; - - if (rss_idx >= MLX5E_MAX_NUM_RSS) - return -EINVAL; + struct mlx5e_rss *rss = NULL; - rss = res->rss[rss_idx]; - if (!rss) - return -ENOENT; + if (rss_idx < MLX5E_MAX_NUM_RSS) + rss = res->rss[rss_idx]; + if (WARN_ON_ONCE(!rss)) + return; - return mlx5e_rss_get_rxfh(rss, indir, key, hfunc, symmetric); + mlx5e_rss_get_rxfh(rss, indir, key, hfunc, symmetric); } int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, @@ -579,8 +571,6 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann for (ix = 0; ix < nch; ix++) mlx5e_rx_res_channel_activate_direct(res, chs, ix); - for (ix = nch; ix < res->max_nch; ix++) - mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { u32 rqn; @@ -603,7 +593,7 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) mlx5e_rx_res_rss_disable(res); - for (ix = 0; ix < res->max_nch; ix++) + for (ix = 0; ix < res->rss_nch; ix++) mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 3e09d91281af..1d049e2aa264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -48,8 +48,9 @@ void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *ch /* Configuration API */ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); -int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - u32 *indir, u8 *key, u8 *hfunc, bool *symmetric); +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric); int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, const u32 *indir, const u8 *key, const u8 *hfunc, const bool *symmetric); @@ -61,7 +62,7 @@ int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, struct mlx5e_packet_merge_param *pkt_merge_param); -int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch); +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int init_nch); int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx); int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res); int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index a13c5e707b83..9bdb5820c553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -94,29 +94,30 @@ mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, struct net_device **out_dev, struct netlink_ext_ack *extack) { - struct net_device *vlan_dev = *out_dev; - struct flow_action_entry vlan_act = { - .id = FLOW_ACTION_VLAN_PUSH, - .vlan.vid = vlan_dev_vlan_id(vlan_dev), - .vlan.proto = vlan_dev_vlan_proto(vlan_dev), - .vlan.prio = 0, - }; - int err; - - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); - if (err) - return err; - - rcu_read_lock(); - *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); - rcu_read_unlock(); - if (!*out_dev) - return -ENODEV; + do { + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, + &attr->action, extack, NULL); + if (err) + return err; - if (is_vlan_dev(*out_dev)) - err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack); + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), + dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + } while (is_vlan_dev(*out_dev)); - return err; + return 0; } int diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 81332cd4a582..870d12364f99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1195,6 +1195,7 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, struct flow_action_entry *meta_action; unsigned long cookie = flow->cookie; struct mlx5_ct_entry *entry; + bool has_nat; int err; meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); @@ -1236,6 +1237,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); if (err) goto err_set; + has_nat = memcmp(&entry->tuple, &entry->tuple_nat, + sizeof(entry->tuple)); spin_lock_bh(&ct_priv->ht_lock); @@ -1244,7 +1247,7 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (err) goto err_entries; - if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { + if (has_nat) { err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 140606fcd23b..b5c19396e096 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -149,7 +149,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); t->stats = &priv->trap_stats.ch; - netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll); + netif_napi_add_locked(netdev, &t->napi, mlx5e_trap_napi_poll); err = mlx5e_open_trap_rq(priv, t); if (unlikely(err)) @@ -164,7 +164,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) err_close_trap_rq: mlx5e_close_trap_rq(&t->rq); err_napi_del: - netif_napi_del(&t->napi); + netif_napi_del_locked(&t->napi); kvfree(t); return ERR_PTR(err); } @@ -173,13 +173,13 @@ void mlx5e_close_trap(struct mlx5e_trap *trap) { mlx5e_tir_destroy(&trap->tir); mlx5e_close_trap_rq(&trap->rq); - netif_napi_del(&trap->napi); + netif_napi_del_locked(&trap->napi); kvfree(trap); } static void mlx5e_activate_trap(struct mlx5e_trap *trap) { - napi_enable(&trap->napi); + napi_enable_locked(&trap->napi); mlx5e_activate_rq(&trap->rq); mlx5e_trigger_napi_sched(&trap->napi); } @@ -189,7 +189,7 @@ void mlx5e_deactivate_trap(struct mlx5e_priv *priv) struct mlx5e_trap *trap = priv->en_trap; mlx5e_deactivate_rq(&trap->rq); - napi_disable(&trap->napi); + napi_disable_locked(&trap->napi); } static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv) @@ -285,6 +285,7 @@ int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; + netdev_lock(priv->netdev); switch (trap_ctx->action) { case DEVLINK_TRAP_ACTION_TRAP: err = mlx5e_handle_action_trap(priv, trap_ctx->id); @@ -297,6 +298,7 @@ int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ trap_ctx->action); err = -EINVAL; } + netdev_unlock(priv->netdev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index e837c21d3d21..5dc04bbfc71b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -322,14 +322,24 @@ mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) } static inline void -mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, - enum mlx5e_dma_map_type map_type) +mlx5e_dma_push_single(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size) { struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); dma->addr = addr; dma->size = size; - dma->type = map_type; + dma->type = MLX5E_DMA_MAP_SINGLE; +} + +static inline void +mlx5e_dma_push_netmem(struct mlx5e_txqsq *sq, netmem_ref netmem, + dma_addr_t addr, u32 size) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + netmem_dma_unmap_addr_set(netmem, dma, addr, addr); + dma->size = size; + dma->type = MLX5E_DMA_MAP_PAGE; } static inline @@ -362,7 +372,8 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); break; case MLX5E_DMA_MAP_PAGE: - dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + netmem_dma_unmap_page_attrs(pdev, dma->addr, dma->size, + DMA_TO_DEVICE, 0); break; default: WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index f803e1c93590..5d51600935a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -707,10 +707,11 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); page = xdpi.page.page; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as we know this is a page_pool page. + /* No need to check page_pool_page_is_pp() as we + * know this is a page_pool page. */ - page_pool_recycle_direct(page->pp, page); + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, + page); } while (++n < num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 446e492c6bb8..46ab0a9e8cdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -45,12 +45,6 @@ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ sizeof(struct mlx5_wqe_inline_seg)) -struct mlx5e_xdp_buff { - struct xdp_buff xdp; - struct mlx5_cqe64 *cqe; - struct mlx5e_rq *rq; -}; - /* XDP packets can be transmitted in different ways. On completion, we need to * distinguish between them to clean up things in a proper way. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 2dd842aac6fc..00e77c71e201 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -36,6 +36,7 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <net/netevent.h> +#include <net/ipv6_stubs.h> #include "en.h" #include "eswitch.h" @@ -259,10 +260,15 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct mlx5e_ipsec_addr *addrs = &attrs->addrs; + struct net_device *netdev = sa_entry->dev; struct xfrm_state *x = sa_entry->x; - struct net_device *netdev; + struct dst_entry *rt_dst_entry; + struct flowi4 fl4 = {}; + struct flowi6 fl6 = {}; struct neighbour *n; u8 addr[ETH_ALEN]; + struct rtable *rt; const void *pkey; u8 *dst, *src; @@ -270,25 +276,94 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->type != XFRM_DEV_OFFLOAD_PACKET) return; - netdev = x->xso.real_dev; - mlx5_query_mac_address(mdev, addr); switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: src = attrs->dmac; dst = attrs->smac; - pkey = &attrs->addrs.saddr.a4; + + switch (addrs->family) { + case AF_INET: + fl4.flowi4_proto = x->sel.proto; + fl4.daddr = addrs->saddr.a4; + fl4.saddr = addrs->daddr.a4; + pkey = &addrs->saddr.a4; + break; + case AF_INET6: + fl6.flowi6_proto = x->sel.proto; + memcpy(fl6.daddr.s6_addr32, addrs->saddr.a6, 16); + memcpy(fl6.saddr.s6_addr32, addrs->daddr.a6, 16); + pkey = &addrs->saddr.a6; + break; + default: + return; + } break; case XFRM_DEV_OFFLOAD_OUT: src = attrs->smac; dst = attrs->dmac; - pkey = &attrs->addrs.daddr.a4; + switch (addrs->family) { + case AF_INET: + fl4.flowi4_proto = x->sel.proto; + fl4.daddr = addrs->daddr.a4; + fl4.saddr = addrs->saddr.a4; + pkey = &addrs->daddr.a4; + break; + case AF_INET6: + fl6.flowi6_proto = x->sel.proto; + memcpy(fl6.daddr.s6_addr32, addrs->daddr.a6, 16); + memcpy(fl6.saddr.s6_addr32, addrs->saddr.a6, 16); + pkey = &addrs->daddr.a6; + break; + default: + return; + } break; default: return; } ether_addr_copy(src, addr); + + /* Destination can refer to a routed network, so perform FIB lookup + * to resolve nexthop and get its MAC. Neighbour resolution is used as + * fallback. + */ + switch (addrs->family) { + case AF_INET: + rt = ip_route_output_key(dev_net(netdev), &fl4); + if (IS_ERR(rt)) + goto neigh; + + if (rt->rt_type != RTN_UNICAST) { + ip_rt_put(rt); + goto neigh; + } + rt_dst_entry = &rt->dst; + break; + case AF_INET6: + rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow( + dev_net(netdev), NULL, &fl6, NULL); + if (IS_ERR(rt_dst_entry)) + goto neigh; + break; + default: + return; + } + + n = dst_neigh_lookup(rt_dst_entry, pkey); + if (!n) { + dst_release(rt_dst_entry); + goto neigh; + } + + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(dst, addr); + dst_release(rt_dst_entry); + neigh_release(n); + return; + +neigh: n = neigh_lookup(&arp_tbl, pkey, netdev); if (!n) { n = neigh_create(&arp_tbl, pkey, netdev); @@ -692,17 +767,17 @@ static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) return 0; } -static int mlx5e_xfrm_add_state(struct xfrm_state *x, +static int mlx5e_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; - struct net_device *netdev = x->xso.real_dev; struct mlx5e_ipsec *ipsec; struct mlx5e_priv *priv; gfp_t gfp; int err; - priv = netdev_priv(netdev); + priv = netdev_priv(dev); if (!priv->ipsec) return -EOPNOTSUPP; @@ -713,6 +788,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return -ENOMEM; sa_entry->x = x; + sa_entry->dev = dev; sa_entry->ipsec = ipsec; /* Check if this SA is originated from acquire flow temporary SA */ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) @@ -809,7 +885,7 @@ err_xfrm: return err; } -static void mlx5e_xfrm_del_state(struct xfrm_state *x) +static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; @@ -822,7 +898,7 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) WARN_ON(old != sa_entry); } -static void mlx5e_xfrm_free_state(struct xfrm_state *x) +static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; @@ -855,8 +931,6 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5e_ipsec *ipsec; struct neighbour *n = ptr; - struct net_device *netdev; - struct xfrm_state *x; unsigned long idx; if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) @@ -876,11 +950,9 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, continue; } - x = sa_entry->x; - netdev = x->xso.real_dev; data = sa_entry->work->data; - neigh_ha_snapshot(data->addr, n, netdev); + neigh_ha_snapshot(data->addr, n, sa_entry->dev); queue_work(ipsec->wq, &sa_entry->work->work); } @@ -996,8 +1068,8 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) size_t headers; lockdep_assert(lockdep_is_held(&x->lock) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock)); + lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) || + lockdep_is_held(&net->xfrm.xfrm_state_lock)); if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) return; @@ -1170,7 +1242,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xdo.real_dev; + struct net_device *netdev = x->xdo.dev; struct mlx5e_ipsec_pol_entry *pol_entry; struct mlx5e_priv *priv; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index a63c2289f8af..ffcd0cdeb775 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -274,6 +274,7 @@ struct mlx5e_ipsec_limits { struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_esn_state esn_state; struct xfrm_state *x; + struct net_device *dev; struct mlx5e_ipsec *ipsec; struct mlx5_accel_esp_xfrm_attrs attrs; void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 820debf3fbbf..ef7322d381af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -42,8 +42,7 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS || - (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS && - is_mdev_legacy_mode(mdev)))) { + is_mdev_legacy_mode(mdev))) { if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) && MLX5_CAP_FLOWTABLE_NIC_RX(mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 727fa7c18523..6056106edcc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -327,6 +327,10 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, if (unlikely(!sa_entry)) { rcu_read_unlock(); atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + /* Clear secpath to prevent invalid dereference + * in downstream XFRM policy checks. + */ + secpath_reset(skb); return; } xfrm_state_hold(sa_entry->x); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 3db31cc10719..08f06984407b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -744,7 +744,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn) dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); - mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + mlx5e_dma_push_netmem(sq, skb_frag_netmem(frag), dma_addr, fsz); tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag)); sq->pc += MLX5E_KTLS_DUMP_WQEBBS; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 8705cffc747f..5fe016e477b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1147,6 +1147,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) bool reset = true; int err; + netdev_lock(priv->netdev); mutex_lock(&priv->state_lock); new_params = priv->channels.params; @@ -1162,6 +1163,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) &trust_state, reset); mutex_unlock(&priv->state_lock); + netdev_unlock(priv->netdev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 298bb74ec5e9..d1d629697e28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable) __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); } else { __clear_bit(MLX5E_RQ_STATE_DIM, &rq->state); - + synchronize_net(); mlx5e_dim_disable(rq->dim); rq->dim = NULL; } @@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable) __set_bit(MLX5E_SQ_STATE_DIM, &sq->state); } else { __clear_bit(MLX5E_SQ_STATE_DIM, &sq->state); - + synchronize_net(); mlx5e_dim_disable(sq->dim); sq->dim = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fdf9e9bb99ac..d507366d773e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include <linux/dim.h> #include <linux/ethtool_netlink.h> +#include <net/netdev_queues.h> #include "en.h" #include "en/channels.h" @@ -43,7 +44,6 @@ #include "en/fs_ethtool.h" #define LANES_UNKNOWN 0 -#define MAX_LANES 8 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -366,11 +366,6 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; param->tx_pending = 1 << priv->channels.params.log_sq_size; - - kernel_param->tcp_data_split = - (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ? - ETHTOOL_TCP_DATA_SPLIT_ENABLED : - ETHTOOL_TCP_DATA_SPLIT_DISABLED; } static void mlx5e_get_ringparam(struct net_device *dev, @@ -383,6 +378,27 @@ static void mlx5e_get_ringparam(struct net_device *dev, mlx5e_ethtool_get_ringparam(priv, param, kernel_param); } +static bool mlx5e_ethtool_set_tcp_data_split(struct mlx5e_priv *priv, + u8 tcp_data_split, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = priv->netdev; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + !(dev->features & NETIF_F_GRO_HW)) { + NL_SET_ERR_MSG_MOD(extack, + "TCP-data-split is not supported when GRO HW is disabled"); + return false; + } + + /* Might need to disable HW-GRO if it was kept on due to hds. */ + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && + dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + netdev_update_features(priv->netdev); + + return true; +} + int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param, struct netlink_ext_ack *extack) @@ -441,6 +457,11 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); + if (!mlx5e_ethtool_set_tcp_data_split(priv, + kernel_param->tcp_data_split, + extack)) + return -EINVAL; + return mlx5e_ethtool_set_ringparam(priv, param, extack); } @@ -1098,10 +1119,8 @@ static void get_link_properties(struct net_device *netdev, speed = info->speed; lanes = info->lanes; duplex = DUPLEX_FULL; - } else if (data_rate_oper) { + } else if (data_rate_oper) speed = 100 * data_rate_oper; - lanes = MAX_LANES; - } out: link_ksettings->base.duplex = duplex; @@ -1461,61 +1480,121 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) static int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { struct mlx5e_priv *priv = netdev_priv(netdev); - u32 rss_context = rxfh->rss_context; bool symmetric; - int err; mutex_lock(&priv->state_lock); - err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, - rxfh->indir, rxfh->key, &rxfh->hfunc, &symmetric); + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, 0, rxfh->indir, rxfh->key, + &rxfh->hfunc, &symmetric); mutex_unlock(&priv->state_lock); - if (err) - return err; - if (symmetric) rxfh->input_xfrm = RXH_XFRM_SYM_OR_XOR; return 0; } -static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack) +static int mlx5e_rxfh_hfunc_check(struct mlx5e_priv *priv, + const struct ethtool_rxfh_param *rxfh) { - bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; - struct mlx5e_priv *priv = netdev_priv(dev); - u32 *rss_context = &rxfh->rss_context; - u8 hfunc = rxfh->hfunc; unsigned int count; - int err; - - mutex_lock(&priv->state_lock); count = priv->channels.params.num_channels; - if (hfunc == ETH_RSS_HASH_XOR) { + if (rxfh->hfunc == ETH_RSS_HASH_XOR) { unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); if (count > xor8_max_channels) { - err = -EINVAL; netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n", __func__, count, xor8_max_channels); - goto unlock; + return -EINVAL; } } - if (*rss_context && rxfh->rss_delete) { - err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); + return 0; +} + +static int mlx5e_set_rxfh(struct net_device *dev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) goto unlock; - } - if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); - if (err) - goto unlock; - } + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, + rxfh->indir, rxfh->key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_create_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) + goto unlock; + + err = mlx5e_rx_res_rss_init(priv->rx_res, rxfh->rss_context, + priv->channels.params.num_channels); + if (err) + goto unlock; + + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, + rxfh->indir, rxfh->key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); + if (err) + goto unlock; + + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rxfh->rss_context, + ethtool_rxfh_context_indir(ctx), + ethtool_rxfh_context_key(ctx), + &ctx->hfunc, &symmetric); + if (symmetric) + ctx->input_xfrm = RXH_XFRM_SYM_OR_XOR; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_modify_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) + goto unlock; - err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, rxfh->indir, rxfh->key, hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); @@ -1525,6 +1604,20 @@ unlock: return err; } +static int mlx5e_remove_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_destroy(priv->rx_res, rss_context); + mutex_unlock(&priv->state_lock); + return err; +} + #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 #define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000 #define MLX5E_PFC_PREVEN_MINOR_PRECENT 85 @@ -1689,6 +1782,7 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, return 0; info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; @@ -2059,14 +2153,9 @@ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, if (err) return err; - dev_hold(dev); - rtnl_unlock(); - err = mlx5_firmware_flash(mdev, fw, NULL); release_firmware(fw); - rtnl_lock(); - dev_put(dev); return err; } @@ -2384,6 +2473,23 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } +static int mlx5e_get_rxfh_fields(struct net_device *dev, + struct ethtool_rxfh_fields *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_rxfh_fields(priv, info); +} + +static int mlx5e_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *cmd, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_rxfh_fields(priv, cmd, extack); +} + static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { @@ -2622,13 +2728,15 @@ static void mlx5e_get_ts_stats(struct net_device *netdev, const struct ethtool_ops mlx5e_ethtool_ops = { .cap_link_lanes_supported = true, - .cap_rss_ctx_supported = true, + .rxfh_per_ctx_fields = true, .rxfh_per_ctx_key = true, + .rxfh_max_num_contexts = MLX5E_MAX_NUM_RSS, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_USE_CQE, .supported_input_xfrm = RXH_XFRM_SYM_OR_XOR, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ext_state = mlx5e_get_link_ext_state, @@ -2649,6 +2757,11 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, + .get_rxfh_fields = mlx5e_get_rxfh_fields, + .set_rxfh_fields = mlx5e_set_rxfh_fields, + .create_rxfh_context = mlx5e_create_rxfh_context, + .modify_rxfh_context = mlx5e_modify_rxfh_context, + .remove_rxfh_context = mlx5e_remove_rxfh_context, .get_rxnfc = mlx5e_get_rxnfc, .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 05058710d2c7..265c4ca85f7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -484,7 +484,9 @@ static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs, } /* Need to fix some features.. */ + netdev_lock(netdev); netdev_update_features(netdev); + netdev_unlock(netdev); return err; } @@ -521,7 +523,9 @@ int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, } else if (be16_to_cpu(proto) == ETH_P_8021AD) { clear_bit(vid, fs->vlan->active_svlans); mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_lock(netdev); netdev_update_features(netdev); + netdev_unlock(netdev); } return 0; @@ -776,7 +780,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; ft_attr.autogroup.max_num_groups = 1; ft_attr.level = MLX5E_PROMISC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.prio = MLX5E_PROMISC_PRIO; ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d68230a7b9f4..79916f1abd14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -894,17 +894,17 @@ static int flow_type_to_traffic_type(u32 flow_type) } } -static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, - struct ethtool_rxnfc *nfc) +int mlx5e_ethtool_set_rxfh_fields(struct mlx5e_priv *priv, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack) { u8 rx_hash_field = 0; u32 flow_type = 0; - u32 rss_idx = 0; + u32 rss_idx; int err; int tt; - if (nfc->flow_type & FLOW_RSS) - rss_idx = nfc->rss_context; + rss_idx = nfc->rss_context; flow_type = flow_type_mask(nfc->flow_type); tt = flow_type_to_traffic_type(flow_type); @@ -941,16 +941,15 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, return err; } -static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, - struct ethtool_rxnfc *nfc) +int mlx5e_ethtool_get_rxfh_fields(struct mlx5e_priv *priv, + struct ethtool_rxfh_fields *nfc) { int hash_field = 0; u32 flow_type = 0; - u32 rss_idx = 0; + u32 rss_idx; int tt; - if (nfc->flow_type & FLOW_RSS) - rss_idx = nfc->rss_context; + rss_idx = nfc->rss_context; flow_type = flow_type_mask(nfc->flow_type); tt = flow_type_to_traffic_type(flow_type); @@ -986,9 +985,6 @@ int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXCLSRLDEL: err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); break; - case ETHTOOL_SRXFH: - err = mlx5e_set_rss_hash_opt(priv, cmd); - break; default: err = -EOPNOTSUPP; break; @@ -1013,9 +1009,6 @@ int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, case ETHTOOL_GRXCLSRLALL: err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); break; - case ETHTOOL_GRXFH: - err = mlx5e_get_rss_hash_opt(priv, info); - break; default: err = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9bd166f489e7..21bb88c5d3dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,7 +39,9 @@ #include <linux/debugfs.h> #include <linux/if_bridge.h> #include <linux/filter.h> +#include <net/netdev_lock.h> #include <net/netdev_queues.h> +#include <net/netdev_rx_queue.h> #include <net/page_pool/types.h> #include <net/pkt_sched.h> #include <net/xdp_sock_drv.h> @@ -74,10 +76,12 @@ #include "en/trap.h" #include "lib/devcom.h" #include "lib/sd.h" +#include "en/pcie_cong_event.h" static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) { - if (!MLX5_CAP_GEN(mdev, shampo)) + if (!MLX5_CAP_GEN(mdev, shampo) || + !MLX5_CAP_SHAMPO(mdev, shampo_header_split_data_merge)) return false; /* Our HW-GRO implementation relies on "KSM Mkey" for @@ -330,47 +334,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node) -{ - rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), - GFP_KERNEL, node); - if (!rq->mpwqe.shampo) - return -ENOMEM; - return 0; -} - -static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq) -{ - kvfree(rq->mpwqe.shampo); -} - -static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) -{ - struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - - shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, - node); - shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, - sizeof(*shampo->pages)), - GFP_KERNEL, node); - if (!shampo->bitmap || !shampo->pages) - goto err_nomem; - - return 0; - -err_nomem: - bitmap_free(shampo->bitmap); - kvfree(shampo->pages); - - return -ENOMEM; -} - -static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) -{ - bitmap_free(rq->mpwqe.shampo->bitmap); - kvfree(rq->mpwqe.shampo->pages); -} - static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); @@ -583,19 +546,26 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq } static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, - struct mlx5e_rq *rq) + u16 hd_per_wq, __be32 *umr_mkey) { u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + u32 mkey; + int err; - if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) { + if (max_ksm_size < hd_per_wq) { mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", - max_ksm_size, rq->mpwqe.shampo->hd_per_wq); + max_ksm_size, hd_per_wq); return -EINVAL; } - return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, - MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, - &rq->mpwqe.shampo->mkey); + err = mlx5e_create_umr_ksm_mkey(mdev, hd_per_wq, + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, + &mkey); + if (err) + return err; + + *umr_mkey = cpu_to_be32(mkey); + return 0; } static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) @@ -706,6 +676,27 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_rq_cqe_err(rq); } +static void mlx5e_rq_timeout_work(struct work_struct *timeout_work) +{ + struct mlx5e_rq *rq = container_of(timeout_work, + struct mlx5e_rq, + rx_timeout_work); + + /* Acquire netdev instance lock to synchronize with channel close and + * reopen flows. Either successfully obtain the lock, or detect that + * channels are closing for another reason, making this work no longer + * necessary. + */ + while (!netdev_trylock(rq->netdev)) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) + return; + msleep(20); + } + + mlx5e_reporter_rx_timeout(rq); + netdev_unlock(rq->netdev); +} + static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) { rq->wqe_overflow.page = alloc_page(GFP_KERNEL); @@ -757,6 +748,42 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param xdp_frag_size); } +static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, u16 hd_per_wq, + int node) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + + shampo->hd_per_wq = hd_per_wq; + + shampo->bitmap = bitmap_zalloc_node(hd_per_wq, GFP_KERNEL, node); + shampo->pages = kvzalloc_node(array_size(hd_per_wq, + sizeof(*shampo->pages)), + GFP_KERNEL, node); + if (!shampo->bitmap || !shampo->pages) + goto err_nomem; + + return 0; + +err_nomem: + kvfree(shampo->pages); + bitmap_free(shampo->bitmap); + + return -ENOMEM; +} + +static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo->pages); + bitmap_free(rq->mpwqe.shampo->bitmap); +} + +static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq) +{ + struct netdev_rx_queue *rxq = __netif_get_rx_queue(rq->netdev, rq->ix); + + return !!rxq->mp_params.mp_ops; +} + static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rqp, @@ -764,42 +791,80 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, u32 *pool_size, int node) { + void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + u32 hd_pool_size; + u16 hd_per_wq; + int wq_size; int err; if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) return 0; - err = mlx5e_rq_shampo_hd_alloc(rq, node); - if (err) - goto out; - rq->mpwqe.shampo->hd_per_wq = - mlx5e_shampo_hd_per_wq(mdev, params, rqp); - err = mlx5e_create_rq_hd_umr_mkey(mdev, rq); + + rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), + GFP_KERNEL, node); + if (!rq->mpwqe.shampo) + return -ENOMEM; + + /* split headers data structures */ + hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp); + err = mlx5e_rq_shampo_hd_info_alloc(rq, hd_per_wq, node); if (err) - goto err_shampo_hd; - err = mlx5e_rq_shampo_hd_info_alloc(rq, node); + goto err_shampo_hd_info_alloc; + + err = mlx5e_create_rq_hd_umr_mkey(mdev, hd_per_wq, + &rq->mpwqe.shampo->mkey_be); if (err) - goto err_shampo_info; + goto err_umr_mkey; + + rq->mpwqe.shampo->hd_per_wqe = + mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / + MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + + if (mlx5_rq_needs_separate_hd_pool(rq)) { + /* Separate page pool for shampo headers */ + struct page_pool_params pp_params = { }; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = hd_pool_size; + pp_params.nid = node; + pp_params.dev = rq->pdev; + pp_params.napi = rq->cq.napi; + pp_params.netdev = rq->netdev; + pp_params.dma_dir = rq->buff.map_dir; + pp_params.max_len = PAGE_SIZE; + + rq->hd_page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->hd_page_pool)) { + err = PTR_ERR(rq->hd_page_pool); + rq->hd_page_pool = NULL; + goto err_hds_page_pool; + } + } else { + /* Common page pool, reserve space for headers. */ + *pool_size += hd_pool_size; + rq->hd_page_pool = NULL; + } + + /* gro only data structures */ rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); if (!rq->hw_gro_data) { err = -ENOMEM; goto err_hw_gro_data; } - rq->mpwqe.shampo->key = - cpu_to_be32(rq->mpwqe.shampo->mkey); - rq->mpwqe.shampo->hd_per_wqe = - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); - rq->mpwqe.shampo->pages_per_wq = - rq->mpwqe.shampo->hd_per_wq / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; - *pool_size += rq->mpwqe.shampo->pages_per_wq; + return 0; err_hw_gro_data: + page_pool_destroy(rq->hd_page_pool); +err_hds_page_pool: + mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.shampo->mkey_be)); +err_umr_mkey: mlx5e_rq_shampo_hd_info_free(rq); -err_shampo_info: - mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey); -err_shampo_hd: - mlx5e_rq_shampo_hd_free(rq); -out: +err_shampo_hd_info_alloc: + kvfree(rq->mpwqe.shampo); return err; } @@ -809,9 +874,12 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) return; kvfree(rq->hw_gro_data); + if (rq->hd_page_pool != rq->page_pool) + page_pool_destroy(rq->hd_page_pool); mlx5e_rq_shampo_hd_info_free(rq); - mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey); - mlx5e_rq_shampo_hd_free(rq); + mlx5_core_destroy_mkey(rq->mdev, + be32_to_cpu(rq->mpwqe.shampo->mkey_be)); + kvfree(rq->mpwqe.shampo); } static int mlx5e_alloc_rq(struct mlx5e_params *params, @@ -829,6 +897,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); + INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work); if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); @@ -914,6 +983,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); + if (err) + goto err_free_by_rq_type; xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); } else { /* Create a page_pool and register it with rxq */ @@ -928,6 +999,11 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pp_params.netdev = rq->netdev; pp_params.dma_dir = rq->buff.map_dir; pp_params.max_len = PAGE_SIZE; + pp_params.queue_idx = rq->ix; + + /* Shampo header data split allow for unreadable netmem */ + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; /* page_pool can be used even when there is no rq->xdp_prog, * given page_pool does not handle DMA mapping there is no @@ -940,12 +1016,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->page_pool = NULL; goto err_free_by_rq_type; } - if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + if (!rq->hd_page_pool) + rq->hd_page_pool = rq->page_pool; + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); + if (err) + goto err_destroy_page_pool; + } } - if (err) - goto err_destroy_page_pool; for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -1073,7 +1152,8 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_cou if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { MLX5_SET(wq, wq, log_headers_buffer_entry_num, order_base_2(rq->mpwqe.shampo->hd_per_wq)); - MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey); + MLX5_SET(wq, wq, headers_mkey, + be32_to_cpu(rq->mpwqe.shampo->mkey_be)); } mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, @@ -1203,7 +1283,8 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); - mlx5e_reporter_rx_timeout(rq); + queue_work(rq->priv->wq, &rq->rx_timeout_work); + return -ETIMEDOUT; } @@ -1374,6 +1455,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq) if (rq->dim) cancel_work_sync(&rq->dim->work); cancel_work_sync(&rq->recover_work); + cancel_work_sync(&rq->rx_timeout_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); mlx5e_free_rq(rq); @@ -1629,8 +1711,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); - if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (mlx5_ipsec_device_caps(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (param->is_mpw) @@ -1903,7 +1983,20 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, recover_work); + /* Recovering queues means re-enabling NAPI, which requires the netdev + * instance lock. However, SQ closing flows have to wait for work tasks + * to finish while also holding the netdev instance lock. So either get + * the lock or find that the SQ is no longer enabled and thus this work + * is not relevant anymore. + */ + while (!netdev_trylock(sq->netdev)) { + if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) + return; + msleep(20); + } + mlx5e_reporter_tx_err_cqe(sq); + netdev_unlock(sq->netdev); } static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) @@ -2705,8 +2798,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(mdev, ix); - netif_napi_add_config(netdev, &c->napi, mlx5e_napi_poll, ix); - netif_napi_set_irq(&c->napi, irq); + netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); + netif_napi_set_irq_locked(&c->napi, irq); err = mlx5e_open_queues(c, params, cparam); if (unlikely(err)) @@ -2728,7 +2821,7 @@ err_close_queues: mlx5e_close_queues(c); err_napi_del: - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); err_free: kvfree(cparam); @@ -2741,7 +2834,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) { int tc; - napi_enable(&c->napi); + napi_enable_locked(&c->napi); for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); @@ -2773,7 +2866,7 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_txqsq(&c->sq[tc]); mlx5e_qos_deactivate_queues(c); - napi_disable(&c->napi); + napi_disable_locked(&c->napi); } static void mlx5e_close_channel(struct mlx5e_channel *c) @@ -2782,7 +2875,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_xsk(c); mlx5e_close_queues(c); mlx5e_qos_close_queues(c); - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); kvfree(c); } @@ -4029,10 +4122,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable) if (enable) { new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; - new_params.packet_merge.shampo.match_criteria_type = - MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; - new_params.packet_merge.shampo.alignment_granularity = - MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE; } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; } else { @@ -4276,7 +4365,7 @@ void mlx5e_set_xdp_feature(struct net_device *netdev) if (!netdev->netdev_ops->ndo_bpf || params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { - xdp_clear_features_flag(netdev); + xdp_set_features_flag_locked(netdev, 0); return; } @@ -4285,7 +4374,7 @@ void mlx5e_set_xdp_feature(struct net_device *netdev) NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG; - xdp_set_features_flag(netdev, val); + xdp_set_features_flag_locked(netdev, val); } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) @@ -4359,6 +4448,7 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { + struct netdev_config *cfg = netdev->cfg_pending; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; @@ -4425,6 +4515,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + /* The header-data split ring param requires HW GRO to stay enabled. */ + if (cfg && cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + !(features & NETIF_F_GRO_HW)) { + netdev_warn(netdev, "Keeping HW-GRO enabled, TCP header-data split depends on it\n"); + features |= NETIF_F_GRO_HW; + } + if (mlx5e_is_uplink_rep(priv)) { features = mlx5e_fix_uplink_rep_features(netdev, features); netdev->netns_immutable = true; @@ -4968,21 +5065,19 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) struct net_device *netdev = priv->netdev; int i; - /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues - * through this flow. However, channel closing flows have to wait for - * this work to finish while holding rtnl lock too. So either get the - * lock or find that channels are being closed for other reason and - * this work is not relevant anymore. + /* Recovering the TX queues implies re-enabling NAPI, which requires + * the netdev instance lock. + * However, channel closing flows have to wait for this work to finish + * while holding the same lock. So either get the lock or find that + * channels are being closed for other reason and this work is not + * relevant anymore. */ - while (!rtnl_trylock()) { + while (!netdev_trylock(netdev)) { if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) return; msleep(20); } - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; - for (i = 0; i < netdev->real_num_tx_queues; i++) { struct netdev_queue *dev_queue = netdev_get_tx_queue(netdev, i); @@ -4996,8 +5091,7 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) break; } -unlock: - rtnl_unlock(); + netdev_unlock(netdev); } static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) @@ -5292,8 +5386,7 @@ void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv) priv->nic_info.set_port = mlx5e_vxlan_set_port; priv->nic_info.unset_port = mlx5e_vxlan_unset_port; - priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; + priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; /* Don't count the space hard-coded to the IANA port */ priv->nic_info.tables[0].n_entries = @@ -5321,7 +5414,6 @@ static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, struct mlx5e_rq_stats *xskrq_stats; struct mlx5e_rq_stats *rq_stats; - ASSERT_RTNL(); if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) return; @@ -5341,7 +5433,6 @@ static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_sq_stats *sq_stats; - ASSERT_RTNL(); if (!priv->stats_nch) return; @@ -5362,7 +5453,6 @@ static void mlx5e_get_base_stats(struct net_device *dev, struct mlx5e_ptp *ptp_channel; int i, tc; - ASSERT_RTNL(); if (!mlx5e_is_uplink_rep(priv)) { rx->packets = 0; rx->bytes = 0; @@ -5446,6 +5536,103 @@ static const struct netdev_stat_ops mlx5e_stat_ops = { .get_base_stats = mlx5e_get_base_stats, }; +struct mlx5_qmgmt_data { + struct mlx5e_channel *c; + struct mlx5e_channel_param cparam; +}; + +static int mlx5e_queue_mem_alloc(struct net_device *dev, void *newq, + int queue_index) +{ + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5e_params params = chs->params; + struct mlx5_core_dev *mdev; + int err; + + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = -ENODEV; + goto unlock; + } + + if (queue_index >= chs->num) { + err = -ERANGE; + goto unlock; + } + + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || + chs->params.ptp_rx || + chs->params.xdp_prog || + priv->htb) { + netdev_err(priv->netdev, + "Cloning channels with Port/rx PTP, XDP or HTB is not supported\n"); + err = -EOPNOTSUPP; + goto unlock; + } + + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index); + err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam); + if (err) + goto unlock; + + err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c); +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static void mlx5e_queue_mem_free(struct net_device *dev, void *mem) +{ + struct mlx5_qmgmt_data *data = (struct mlx5_qmgmt_data *)mem; + + /* not supposed to happen since mlx5e_queue_start never fails + * but this is how this should be implemented just in case + */ + if (data->c) + mlx5e_close_channel(data->c); +} + +static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index) +{ + /* In mlx5 a txq cannot be simply stopped in isolation, only restarted. + * mlx5e_queue_start does not fail, we stop the old queue there. + * TODO: Improve this. + */ + return 0; +} + +static int mlx5e_queue_start(struct net_device *dev, void *newq, + int queue_index) +{ + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channel *old; + + mutex_lock(&priv->state_lock); + + /* stop and close the old */ + old = priv->channels.c[queue_index]; + mlx5e_deactivate_priv_channels(priv); + /* close old before activating new, to avoid napi conflict */ + mlx5e_close_channel(old); + + /* start the new */ + priv->channels.c[queue_index] = new->c; + mlx5e_activate_priv_channels(priv); + mutex_unlock(&priv->state_lock); + return 0; +} + +static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), + .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, + .ndo_queue_mem_free = mlx5e_queue_mem_free, + .ndo_queue_start = mlx5e_queue_start, + .ndo_queue_stop = mlx5e_queue_stop, +}; + static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -5456,8 +5643,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->queue_mgmt_ops = &mlx5e_queue_mgmt_ops; netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops; + netdev->request_ops_lock = true; + netdev_lockdep_set_classes(netdev); mlx5e_dcbnl_build_netdev(netdev); @@ -5496,17 +5686,17 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) MLX5E_MPWRQ_UMR_MODE_ALIGNED)) netdev->vlan_features |= NETIF_F_LRO; + if (mlx5e_hw_gro_supported(mdev) && + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + netdev->vlan_features |= NETIF_F_GRO_HW; + netdev->hw_features = netdev->vlan_features; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - if (mlx5e_hw_gro_supported(mdev) && - mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, - MLX5E_MPWRQ_UMR_MODE_ALIGNED)) - netdev->hw_features |= NETIF_F_GRO_HW; - if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; @@ -5585,6 +5775,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->netmem_tx = true; + netif_set_tso_max_size(netdev, GSO_MAX_SIZE); mlx5e_set_xdp_feature(netdev); mlx5e_set_netdev_dev_addr(netdev); @@ -5831,6 +6023,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_init(priv); + mlx5e_pcie_cong_event_init(priv); mlx5e_hv_vhca_stats_create(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -5839,9 +6032,11 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_nic_set_rx_mode(priv); rtnl_lock(); + netdev_lock(netdev); if (netif_running(netdev)) mlx5e_open(netdev); udp_tunnel_nic_reset_ntf(priv->netdev); + netdev_unlock(netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -5854,23 +6049,26 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5e_dcbnl_delete_app(priv); rtnl_lock(); + netdev_lock(priv->netdev); if (netif_running(priv->netdev)) mlx5e_close(priv->netdev); netif_device_detach(priv->netdev); + if (priv->en_trap) { + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; + } + netdev_unlock(priv->netdev); rtnl_unlock(); mlx5e_nic_set_rx_mode(priv); + mlx5e_pcie_cong_event_cleanup(priv); mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_blocking_events(priv); - if (priv->en_trap) { - mlx5e_deactivate_trap(priv); - mlx5e_close_trap(priv->en_trap); - priv->en_trap = NULL; - } mlx5e_disable_async_events(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); @@ -6125,7 +6323,9 @@ static void mlx5e_update_features(struct net_device *netdev) return; /* features will be updated on netdev registration */ rtnl_lock(); + netdev_lock(netdev); netdev_update_features(netdev); + netdev_unlock(netdev); rtnl_unlock(); } @@ -6136,7 +6336,7 @@ static void mlx5e_reset_channels(struct net_device *netdev) int mlx5e_attach_netdev(struct mlx5e_priv *priv) { - const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; + const bool need_lock = priv->netdev->reg_state == NETREG_REGISTERED; const struct mlx5e_profile *profile = priv->profile; int max_nch; int err; @@ -6178,15 +6378,19 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) * 2. Set our default XPS cpumask. * 3. Build the RQT. * - * rtnl_lock is required by netif_set_real_num_*_queues in case the + * Locking is required by netif_set_real_num_*_queues in case the * netdev has been registered by this point (if this function was called * in the reload or resume flow). */ - if (take_rtnl) + if (need_lock) { rtnl_lock(); + netdev_lock(priv->netdev); + } err = mlx5e_num_channels_changed(priv); - if (take_rtnl) + if (need_lock) { + netdev_unlock(priv->netdev); rtnl_unlock(); + } if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2abab241f03b..63a7a788fb0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -33,6 +33,7 @@ #include <linux/dim.h> #include <linux/debugfs.h> #include <linux/mlx5/fs.h> +#include <net/netdev_lock.h> #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/act_api.h> @@ -803,6 +804,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -885,6 +887,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, { SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops_rep; + netdev->request_ops_lock = true; + netdev_lockdep_set_classes(netdev); eth_hw_addr_random(netdev); netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; @@ -1344,9 +1348,11 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) netdev->wanted_features |= NETIF_F_HW_TC; rtnl_lock(); + netdev_lock(netdev); if (netif_running(netdev)) mlx5e_open(netdev); udp_tunnel_nic_reset_ntf(priv->netdev); + netdev_unlock(netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -1356,9 +1362,11 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; rtnl_lock(); + netdev_lock(priv->netdev); if (netif_running(priv->netdev)) mlx5e_close(priv->netdev); netif_device_detach(priv->netdev); + netdev_unlock(priv->netdev); rtnl_unlock(); mlx5e_rep_bridge_cleanup(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5fd70b4d55be..218b1a09534c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -273,33 +273,32 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, #define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) -static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, +static int mlx5e_page_alloc_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { - struct page *page; + netmem_ref netmem = page_pool_dev_alloc_netmems(pp); - page = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!page)) + if (unlikely(!netmem)) return -ENOMEM; - page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); + page_pool_fragment_netmem(netmem, MLX5E_PAGECNT_BIAS_MAX); *frag_page = (struct mlx5e_frag_page) { - .page = page, + .netmem = netmem, .frags = 0, }; return 0; } -static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, +static void mlx5e_page_release_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; - struct page *page = frag_page->page; + netmem_ref netmem = frag_page->netmem; - if (page_pool_unref_page(page, drain_count) == 0) - page_pool_put_unrefed_page(rq->page_pool, page, -1, true); + if (page_pool_unref_netmem(netmem, drain_count) == 0) + page_pool_put_unrefed_netmem(pp, netmem, -1, true); } static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, @@ -313,7 +312,8 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); + err = mlx5e_page_alloc_fragmented(rq->page_pool, + frag->frag_page); return err; } @@ -332,7 +332,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { if (mlx5e_frag_can_release(frag)) - mlx5e_page_release_fragmented(rq, frag->frag_page); + mlx5e_page_release_fragmented(rq->page_pool, frag->frag_page); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -358,7 +358,7 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); headroom = i == 0 ? rq->buff.headroom : 0; - addr = page_pool_get_dma_addr(frag->frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag->frag_page->netmem); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } @@ -499,9 +499,10 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, u32 frag_offset, u32 len) { + netmem_ref netmem = frag_page->netmem; skb_frag_t *frag; - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); if (!xdp_buff_has_frags(xdp)) { @@ -514,9 +515,9 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf } frag = &sinfo->frags[sinfo->nr_frags++]; - skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); + skb_frag_fill_netmem_desc(frag, netmem, frag_offset, len); - if (page_is_pfmemalloc(frag_page->page)) + if (netmem_is_pfmemalloc(netmem)) xdp_buff_set_frag_pfmemalloc(xdp); sinfo->xdp_frags_size += len; } @@ -527,27 +528,29 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, u32 frag_offset, u32 len, unsigned int truesize) { - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(frag_page->netmem); u8 next_frag = skb_shinfo(skb)->nr_frags; + netmem_ref netmem = frag_page->netmem; dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); - if (skb_can_coalesce(skb, next_frag, frag_page->page, frag_offset)) { + if (skb_can_coalesce_netmem(skb, next_frag, netmem, frag_offset)) { skb_coalesce_rx_frag(skb, next_frag - 1, len, truesize); - } else { - frag_page->frags++; - skb_add_rx_frag(skb, next_frag, frag_page->page, - frag_offset, len, truesize); + return; } + + frag_page->frags++; + skb_add_rx_frag_netmem(skb, next_frag, netmem, + frag_offset, len, truesize); } static inline void mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, - struct page *page, dma_addr_t addr, + netmem_ref netmem, dma_addr_t addr, int offset_from, int dma_offset, u32 headlen) { - const void *from = page_address(page) + offset_from; + const void *from = netmem_address(netmem) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ unsigned int len = ALIGN(headlen, sizeof(long)); @@ -584,7 +587,8 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) struct mlx5e_frag_page *frag_page; frag_page = &wi->alloc_units.frag_pages[i]; - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->page_pool, + frag_page); } } } @@ -672,19 +676,18 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries); pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); - build_ksm_umr(sq, umr_wqe, shampo->key, index, ksm_entries); + build_ksm_umr(sq, umr_wqe, shampo->mkey_be, index, ksm_entries); WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)); while (i < ksm_entries) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); u64 addr; - err = mlx5e_page_alloc_fragmented(rq, frag_page); + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); if (unlikely(err)) goto err_unmap; - - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { header_offset = mlx5e_shampo_hd_offset(index++); @@ -715,7 +718,8 @@ err_unmap: if (!header_offset) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->hd_page_pool, + frag_page); } } @@ -791,10 +795,11 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { dma_addr_t addr; - err = mlx5e_page_alloc_fragmented(rq, frag_page); + err = mlx5e_page_alloc_fragmented(rq->page_pool, frag_page); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(frag_page->page); + + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; @@ -836,7 +841,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_unmap: while (--i >= 0) { frag_page--; - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->page_pool, frag_page); } bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); @@ -855,7 +860,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); } clear_bit(header_index, shampo->bitmap); } @@ -1100,6 +1105,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) if (rq->page_pool) page_pool_nid_changed(rq->page_pool, numa_mem_id()); + if (rq->hd_page_pool) + page_pool_nid_changed(rq->hd_page_pool, numa_mem_id()); head = rq->mpwqe.actual_wq_head; i = missing; @@ -1154,8 +1161,9 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) } } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, - u32 cqe_bcnt) +static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct tcphdr *tcp; @@ -1205,6 +1213,8 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, tcp->check = tcp_v6_check(payload_len, &ipv6->saddr, &ipv6->daddr, check); } + + return (unsigned int)((unsigned char *)tcp + tcp->doff * 4 - skb->data); } static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) @@ -1212,7 +1222,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; - return page_address(frag_page->page) + head_offset; + return netmem_address(frag_page->netmem) + head_offset; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -1561,8 +1571,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); - skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + unsigned int hdrlen = mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); + + skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt - hdrlen, lro_num_seg); /* Subtract one since we already counted this as one * "regular" packet in mlx5e_complete_rx_cqe() */ @@ -1673,28 +1684,28 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, dma_addr_t addr; u32 frag_size; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, frag_size, rq->buff.map_dir); net_prefetch(data); prog = rcu_dereference(rq->xdp_prog); if (prog) { - struct mlx5e_xdp_buff mxbuf; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; net_prefetchw(va); /* xdp_frame data area */ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, - cqe_bcnt, &mxbuf); - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) + cqe_bcnt, mxbuf); + if (mlx5e_xdp_handle(rq, prog, mxbuf)) return NULL; /* page/packet was consumed by XDP */ - rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; - metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; - cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; + rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start; + metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta; + cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data; } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); @@ -1713,11 +1724,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; struct mlx5e_wqe_frag_info *head_wi = wi; u16 rx_headroom = rq->buff.headroom; struct mlx5e_frag_page *frag_page; struct skb_shared_info *sinfo; - struct mlx5e_xdp_buff mxbuf; u32 frag_consumed_bytes; struct bpf_prog *prog; struct sk_buff *skb; @@ -1727,18 +1738,18 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi frag_page = wi->frag_page; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ net_prefetch(va + rx_headroom); mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, - frag_consumed_bytes, &mxbuf); - sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + frag_consumed_bytes, mxbuf); + sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp); truesize = 0; cqe_bcnt -= frag_consumed_bytes; @@ -1750,8 +1761,9 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, - wi->offset, frag_consumed_bytes); + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp, + frag_page, wi->offset, + frag_consumed_bytes); truesize += frag_info->frag_stride; cqe_bcnt -= frag_consumed_bytes; @@ -1760,7 +1772,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } prog = rcu_dereference(rq->xdp_prog); - if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_wqe_frag_info *pwi; @@ -1770,21 +1782,23 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi return NULL; /* page/packet was consumed by XDP */ } - skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz, - mxbuf.xdp.data - mxbuf.xdp.data_hard_start, - mxbuf.xdp.data_end - mxbuf.xdp.data, - mxbuf.xdp.data - mxbuf.xdp.data_meta); + skb = mlx5e_build_linear_skb( + rq, mxbuf->xdp.data_hard_start, rq->buff.frame0_sz, + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, + mxbuf->xdp.data_end - mxbuf->xdp.data, + mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) return NULL; skb_mark_for_recycle(skb); head_wi->frag_page->frags++; - if (xdp_buff_has_frags(&mxbuf.xdp)) { + if (xdp_buff_has_frags(&mxbuf->xdp)) { /* sinfo->nr_frags is reset by build_skb, calculate again. */ xdp_update_skb_shared_info(skb, wi - head_wi - 1, sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + xdp_buff_is_frag_pfmemalloc( + &mxbuf->xdp)); for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) pwi->frag_page->frags++; @@ -1984,10 +1998,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); struct mlx5e_frag_page *head_page = frag_page; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; u32 frag_offset = head_offset; u32 byte_cnt = cqe_bcnt; struct skb_shared_info *sinfo; - struct mlx5e_xdp_buff mxbuf; unsigned int truesize = 0; struct bpf_prog *prog; struct sk_buff *skb; @@ -2000,12 +2014,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { /* area for bpf_xdp_[store|load]_bytes */ - net_prefetchw(page_address(frag_page->page) + frag_offset); - if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) { + net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); + if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, + &wi->linear_page))) { rq->stats->buff_alloc_err++; return NULL; } - va = page_address(wi->linear_page.page); + + va = netmem_address(wi->linear_page.netmem); net_prefetchw(va); /* xdp_frame data area */ linear_hr = XDP_PACKET_HEADROOM; linear_data_len = 0; @@ -2033,9 +2049,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } } - mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, + linear_data_len, mxbuf); - sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp); while (byte_cnt) { /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ @@ -2046,7 +2063,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w else truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset, + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp, + frag_page, frag_offset, pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; @@ -2054,7 +2072,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } if (prog) { - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_frag_page *pfp; @@ -2063,30 +2081,33 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w wi->linear_page.frags++; } - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, + &wi->linear_page); return NULL; /* page/packet was consumed by XDP */ } - skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, - linear_frame_sz, - mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0, - mxbuf.xdp.data - mxbuf.xdp.data_meta); + skb = mlx5e_build_linear_skb( + rq, mxbuf->xdp.data_hard_start, linear_frame_sz, + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0, + mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) { - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, + &wi->linear_page); return NULL; } skb_mark_for_recycle(skb); wi->linear_page.frags++; - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, &wi->linear_page); - if (xdp_buff_has_frags(&mxbuf.xdp)) { + if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; /* sinfo->nr_frags is reset by build_skb, calculate again. */ xdp_update_skb_shared_info(skb, frag_page - head_page, sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + xdp_buff_is_frag_pfmemalloc( + &mxbuf->xdp)); pagep = head_page; do @@ -2097,12 +2118,13 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } else { dma_addr_t addr; - if (xdp_buff_has_frags(&mxbuf.xdp)) { + if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; xdp_update_skb_shared_info(skb, sinfo->nr_frags, sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + xdp_buff_is_frag_pfmemalloc( + &mxbuf->xdp)); pagep = frag_page - sinfo->nr_frags; do @@ -2110,8 +2132,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w while (++pagep < frag_page); } /* copy header */ - addr = page_pool_get_dma_addr(head_page->page); - mlx5e_copy_skb_header(rq, skb, head_page->page, addr, + addr = page_pool_get_dma_addr_netmem(head_page->netmem); + mlx5e_copy_skb_header(rq, skb, head_page->netmem, addr, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; @@ -2141,31 +2163,31 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(frag_page->page) + head_offset; + va = netmem_address(frag_page->netmem) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, frag_size, rq->buff.map_dir); net_prefetch(data); prog = rcu_dereference(rq->xdp_prog); if (prog) { - struct mlx5e_xdp_buff mxbuf; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; net_prefetchw(va); /* xdp_frame data area */ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, - cqe_bcnt, &mxbuf); - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + cqe_bcnt, mxbuf); + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) frag_page->frags++; return NULL; /* page/packet was consumed by XDP */ } - rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; - metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; - cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; + rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start; + metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta; + cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data; } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); @@ -2184,16 +2206,19 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - dma_addr_t page_dma_addr = page_pool_get_dma_addr(frag_page->page); u16 head_offset = mlx5e_shampo_hd_offset(header_index); - dma_addr_t dma_addr = page_dma_addr + head_offset; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; + dma_addr_t page_dma_addr; + dma_addr_t dma_addr; void *hdr, *data; u32 frag_size; - hdr = page_address(frag_page->page) + head_offset; + page_dma_addr = page_pool_get_dma_addr_netmem(frag_page->netmem); + dma_addr = page_dma_addr + head_offset; + + hdr = netmem_address(frag_page->netmem) + head_offset; data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); @@ -2218,7 +2243,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } net_prefetchw(skb->data); - mlx5e_copy_skb_header(rq, skb, frag_page->page, dma_addr, + mlx5e_copy_skb_header(rq, skb, frag_page->netmem, dma_addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ @@ -2312,11 +2337,23 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (!*skb) { - if (likely(head_size)) + if (likely(head_size)) { *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); - else - *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt, - data_offset, page_idx); + } else { + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[page_idx]; + /* Drop packets with header in unreadable data area to + * prevent the kernel from touching it. + */ + if (unlikely(netmem_is_net_iov(frag_page->netmem))) + goto free_hd_entry; + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, + cqe_bcnt, + data_offset, + page_idx); + } + if (unlikely(!*skb)) goto free_hd_entry; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1c121b435016..87536f158d07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -2424,8 +2424,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) } if (priv->rx_ptp_opened) { for (i = 0; i < NUM_PTP_RQ_STATS; i++) - ethtool_sprintf(data, ptp_rq_stats_desc[i].format, - MLX5E_PTP_CHANNEL_IX); + ethtool_puts(data, ptp_rq_stats_desc[i].format); } } @@ -2613,6 +2612,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { #ifdef CONFIG_MLX5_MACSEC &MLX5E_STATS_GRP(macsec_hw), #endif + &MLX5E_STATS_GRP(pcie_cong), }; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 8de6fcbd3a03..72dbcc1928ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -54,7 +54,7 @@ #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) -#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq0_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) @@ -535,5 +535,6 @@ extern MLX5E_DECLARE_STATS_GRP(ipsec_hw); extern MLX5E_DECLARE_STATS_GRP(ipsec_sw); extern MLX5E_DECLARE_STATS_GRP(ptp); extern MLX5E_DECLARE_STATS_GRP(macsec_hw); +extern MLX5E_DECLARE_STATS_GRP(pcie_cong); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f1d908f61134..32c07a8b03d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2028,9 +2028,8 @@ err_out: return err; } -static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec) { - struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -2069,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } complete_all(&flow->del_hw_done); - if (mlx5_flow_has_geneve_opt(flow)) + if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); if (flow->decap_route) @@ -2574,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); if (err) { - kvfree(tmp_spec); NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); - return err; + } else { + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); } - err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + if (mlx5_flow_has_geneve_opt(tmp_spec)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); kvfree(tmp_spec); if (err) return err; @@ -5446,7 +5446,7 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) goto err_action_counter; } - err = dev_get_port_parent_id(priv->netdev, &ppid, false); + err = netif_get_port_parent_id(priv->netdev, &ppid, false); if (!err) { memcpy(&key, &ppid.id, sizeof(key)); mlx5_esw_offloads_devcom_init(esw, key); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 4fd853d19e31..319061d31602 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -196,7 +196,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); - mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); + mlx5e_dma_push_single(sq, dma_addr, headlen); num_dma++; dseg++; } @@ -214,7 +214,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); - mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + mlx5e_dma_push_netmem(sq, skb_frag_netmem(frag), dma_addr, fsz); num_dma++; dseg++; } @@ -256,8 +256,7 @@ mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, mode = sq->min_inline_mode; - if (skb_vlan_tag_present(skb) && - test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + if (skb_vlan_tag_present(skb)) mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); return mode; @@ -337,10 +336,11 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at }; } -static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) +static void mlx5e_tx_skb_update_ts_flags(struct sk_buff *skb) { if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_tx_timestamp(skb); } static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) @@ -392,7 +392,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); - mlx5e_tx_skb_update_hwts_flags(skb); + mlx5e_tx_skb_update_ts_flags(skb); sq->pc += wi->num_wqebbs; @@ -482,12 +482,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, } eseg->inline_hdr.sz |= cpu_to_be16(ihs); dseg += wqe_attr->ds_cnt_inl; - } else if (skb_vlan_tag_present(skb)) { - eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); - if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) - eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); - eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); - stats->added_vlan_packets++; } dseg += wqe_attr->ds_cnt_ids; @@ -622,10 +616,10 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; - mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); + mlx5e_dma_push_single(sq, txd.dma_addr, txd.len); mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); + mlx5e_tx_skb_update_ts_flags(skb); if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index dfb079e59d85..1ab77159409d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -585,6 +585,9 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + if (mlx5_pcie_cong_event_supported(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + mask[0] = async_event_mask; if (MLX5_CAP_GEN(dev, event_cap)) @@ -873,19 +876,25 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); struct mlx5_eq_table *table = dev->priv.eq_table; - struct irq_affinity_desc af_desc = {}; + struct irq_affinity_desc *af_desc; struct mlx5_irq *irq; - /* In case SF irq pool does not exist, fallback to the PF irqs*/ + /* In case SF irq pool does not exist, fallback to the PF irqs */ if (!mlx5_irq_pool_is_sf_pool(pool)) return comp_irq_request_pci(dev, vecidx); - af_desc.is_managed = false; - cpumask_copy(&af_desc.mask, cpu_online_mask); - cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); - irq = mlx5_irq_affinity_request(dev, pool, &af_desc); - if (IS_ERR(irq)) + af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return -ENOMEM; + + af_desc->is_managed = false; + cpumask_copy(&af_desc->mask, cpu_online_mask); + cpumask_andnot(&af_desc->mask, &af_desc->mask, &table->used_cpus); + irq = mlx5_irq_affinity_request(dev, pool, af_desc); + if (IS_ERR(irq)) { + kvfree(af_desc); return PTR_ERR(irq); + } cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", @@ -893,6 +902,8 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + kvfree(af_desc); + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index b6ae384396b3..91d863c8c152 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -64,11 +64,19 @@ static void esw_qos_domain_release(struct mlx5_eswitch *esw) enum sched_node_type { SCHED_NODE_TYPE_VPORTS_TSAR, SCHED_NODE_TYPE_VPORT, + SCHED_NODE_TYPE_TC_ARBITER_TSAR, + SCHED_NODE_TYPE_RATE_LIMITER, + SCHED_NODE_TYPE_VPORT_TC, + SCHED_NODE_TYPE_VPORTS_TC_TSAR, }; static const char * const sched_node_type_str[] = { [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR", [SCHED_NODE_TYPE_VPORT] = "vport", + [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR", + [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter", + [SCHED_NODE_TYPE_VPORT_TC] = "vport TC", + [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR", }; struct mlx5_esw_sched_node { @@ -92,6 +100,8 @@ struct mlx5_esw_sched_node { struct mlx5_vport *vport; /* Level in the hierarchy. The root node level is 1. */ u8 level; + /* Valid only when this node represents a traffic class. */ + u8 tc; }; static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) @@ -106,6 +116,13 @@ static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) } } +static int esw_qos_num_tcs(struct mlx5_core_dev *dev) +{ + int num_tcs = mlx5_max_tc(dev) + 1; + + return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX; +} + static void esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent) { @@ -116,8 +133,38 @@ esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_ esw_qos_node_attach_to_parent(node); } +static void esw_qos_nodes_set_parent(struct list_head *nodes, + struct mlx5_esw_sched_node *parent) +{ + struct mlx5_esw_sched_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, nodes, entry) { + esw_qos_node_set_parent(node, parent); + if (!list_empty(&node->children) && + parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + struct mlx5_esw_sched_node *child; + + list_for_each_entry(child, &node->children, entry) { + struct mlx5_vport *vport = child->vport; + + if (vport) + vport->qos.sched_node->parent = parent; + } + } + } +} + void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport) { + if (vport->qos.sched_nodes) { + int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev); + int i; + + for (i = 0; i < num_tcs; i++) + kfree(vport->qos.sched_nodes[i]); + kfree(vport->qos.sched_nodes); + } + kfree(vport->qos.sched_node); memset(&vport->qos, 0, sizeof(vport->qos)); } @@ -141,16 +188,37 @@ mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport) static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op) { - if (node->vport) { + switch (node->type) { + case SCHED_NODE_TYPE_VPORTS_TC_TSAR: + esw_warn(node->esw->dev, + "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n", + op, sched_node_type_str[node->type], node->tc, err); + break; + case SCHED_NODE_TYPE_VPORT_TC: + esw_warn(node->esw->dev, + "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n", + op, + sched_node_type_str[node->type], + node->vport->vport, node->tc, err); + break; + case SCHED_NODE_TYPE_VPORT: esw_warn(node->esw->dev, "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n", op, sched_node_type_str[node->type], node->vport->vport, err); - return; + break; + case SCHED_NODE_TYPE_RATE_LIMITER: + case SCHED_NODE_TYPE_TC_ARBITER_TSAR: + case SCHED_NODE_TYPE_VPORTS_TSAR: + esw_warn(node->esw->dev, + "E-Switch %s %s scheduling element failed (err=%d)\n", + op, sched_node_type_str[node->type], err); + break; + default: + esw_warn(node->esw->dev, + "E-Switch %s scheduling element failed (err=%d)\n", + op, err); + break; } - - esw_warn(node->esw->dev, - "E-Switch %s %s scheduling element failed (err=%d)\n", - op, sched_node_type_str[node->type], err); } static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx, @@ -233,6 +301,24 @@ static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_r return 0; } +static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + + if (!mlx5_qos_element_type_supported( + node->esw->dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT, + SCHEDULING_HIERARCHY_E_SWITCH)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT); + + return esw_qos_node_create_sched_element(node, sched_ctx, extack); +} + static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent) { @@ -266,11 +352,13 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, return 0; } -static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max) { if (!divider) return 0; - return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max); + return min_t(u32, fw_max, + max_t(u32, + DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE)); } static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node, @@ -297,7 +385,13 @@ static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, if (node->esw != esw || node->ix == esw->qos.root_tsar_ix) continue; - esw_qos_update_sched_node_bw_share(node, divider, extack); + /* Vports TC TSARs don't have a minimum rate configured, + * so there's no need to update the bw_share on them. + */ + if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) { + esw_qos_update_sched_node_bw_share(node, divider, + extack); + } if (list_empty(&node->children)) continue; @@ -306,6 +400,20 @@ static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, } } +static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw) +{ + u32 total = 0; + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) + total += tc_bw[i]; + + /* If total is zero, tc-bw config is disabled and we shouldn't reach + * here. + */ + return WARN_ON(!total) ? 1 : total; +} + static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node, u32 min_rate, struct netlink_ext_ack *extack) { @@ -350,28 +458,64 @@ esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, tsar_ix); } -static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, - struct netlink_ext_ack *extack) +static int +esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, + struct netlink_ext_ack *extack) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = vport_node->esw->dev; void *attr; - if (!mlx5_qos_element_type_supported(dev, - SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT, - SCHEDULING_HIERARCHY_E_SWITCH)) + if (!mlx5_qos_element_type_supported( + dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT, + SCHEDULING_HIERARCHY_E_SWITCH)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, vport_node->parent->ix); - MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, + vport_node->parent->ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, + vport_node->max_rate); return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack); } +static int +esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node, + u32 rate_limit_elem_ix, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = vport_tc_node->esw->dev; + void *attr; + + if (!mlx5_qos_element_type_supported( + dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC, + SCHEDULING_HIERARCHY_E_SWITCH)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC); + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_tc_element, attr, vport_number, + vport_tc_node->vport->vport); + MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc); + MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id, + rate_limit_elem_ix); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, + vport_tc_node->parent->ix); + MLX5_SET(scheduling_context, sched_ctx, bw_share, + vport_tc_node->bw_share); + + return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx, + extack); +} + static struct mlx5_esw_sched_node * __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type, struct mlx5_esw_sched_node *parent) @@ -388,6 +532,14 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type node->parent = parent; INIT_LIST_HEAD(&node->children); esw_qos_node_attach_to_parent(node); + if (!parent) { + /* The caller is responsible for inserting the node into the + * parent list if necessary. This function can also be used with + * a NULL parent, which doesn't necessarily indicate that it + * refers to the root scheduling element. + */ + list_del_init(&node->entry); + } return node; } @@ -404,6 +556,149 @@ static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlin __esw_qos_free_node(node); } +static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent, + u8 tc, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = parent->esw->dev; + struct mlx5_esw_sched_node *vports_tc_node; + void *attr; + int err; + + if (!mlx5_qos_element_type_supported( + dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, + SCHEDULING_HIERARCHY_E_SWITCH) || + !mlx5_qos_tsar_type_supported(dev, + TSAR_ELEMENT_TSAR_TYPE_DWRR, + SCHEDULING_HIERARCHY_E_SWITCH)) + return -EOPNOTSUPP; + + vports_tc_node = __esw_qos_alloc_node(parent->esw, 0, + SCHED_NODE_TYPE_VPORTS_TC_TSAR, + parent); + if (!vports_tc_node) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed"); + esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc); + return -ENOMEM; + } + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + MLX5_SET(tsar_element, attr, traffic_class, tc); + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix); + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx, + extack); + if (err) + goto err_create_sched_element; + + vports_tc_node->tc = tc; + + return 0; + +err_create_sched_element: + __esw_qos_free_node(vports_tc_node); + return err; +} + +static void +esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, + u32 *tc_bw) +{ + struct mlx5_esw_sched_node *vports_tc_node; + + list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) + tc_bw[vports_tc_node->tc] = vports_tc_node->bw_share; +} + +static void +esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, + u32 *tc_bw, struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = tc_arbiter_node->esw; + struct mlx5_esw_sched_node *vports_tc_node; + u32 divider, fw_max_bw_share; + + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + divider = esw_qos_calculate_tc_bw_divider(tc_bw); + list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) { + u8 tc = vports_tc_node->tc; + u32 bw_share; + + bw_share = tc_bw[tc] * fw_max_bw_share; + bw_share = esw_qos_calc_bw_share(bw_share, divider, + fw_max_bw_share); + esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack); + } +} + +static void +esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vports_tc_node, *tmp; + + list_for_each_entry_safe(vports_tc_node, tmp, + &tc_arbiter_node->children, entry) + esw_qos_destroy_node(vports_tc_node, extack); +} + +static int +esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = tc_arbiter_node->esw; + int err, i, num_tcs = esw_qos_num_tcs(esw->dev); + + for (i = 0; i < num_tcs; i++) { + err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack); + if (err) + goto err_tc_node_create; + } + + return 0; + +err_tc_node_create: + esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL); + return err; +} + +static int esw_qos_create_tc_arbiter_sched_elem( + struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + u32 tsar_parent_ix; + void *attr; + + if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev, + TSAR_ELEMENT_TSAR_TYPE_TC_ARB, + SCHEDULING_HIERARCHY_E_SWITCH)) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch TC Arbiter scheduling element is not supported"); + return -EOPNOTSUPP; + } + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB); + tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix : + tc_arbiter_node->esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + tsar_parent_ix); + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, + tc_arbiter_node->max_rate); + MLX5_SET(scheduling_context, tsar_ctx, bw_share, + tc_arbiter_node->bw_share); + + return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx, + extack); +} + static struct mlx5_esw_sched_node * __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) @@ -426,6 +721,7 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch goto err_alloc_node; } + list_add_tail(&node->entry, &esw->qos.domain->nodes); esw_qos_normalize_min_rate(esw, NULL, extack); trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix); @@ -467,6 +763,9 @@ static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netl { struct mlx5_eswitch *esw = node->esw; + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + esw_qos_destroy_vports_tc_nodes(node, extack); + trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix); esw_qos_destroy_node(node, extack); esw_qos_normalize_min_rate(esw, NULL, extack); @@ -498,6 +797,9 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta SCHED_NODE_TYPE_VPORTS_TSAR, NULL)) esw->qos.node0 = ERR_PTR(-ENOMEM); + else + list_add_tail(&esw->qos.node0->entry, + &esw->qos.domain->nodes); } if (IS_ERR(esw->qos.node0)) { err = PTR_ERR(esw->qos.node0); @@ -555,12 +857,239 @@ static void esw_qos_put(struct mlx5_eswitch *esw) esw_qos_destroy(esw); } +static void +esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + /* Clean up all Vports TC nodes within the TC arbiter node. */ + esw_qos_destroy_vports_tc_nodes(node, extack); + /* Destroy the scheduling element for the TC arbiter node itself. */ + esw_qos_node_destroy_sched_element(node, extack); +} + +static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + u32 curr_ix = node->ix; + int err; + + err = esw_qos_create_tc_arbiter_sched_elem(node, extack); + if (err) + return err; + /* Initialize the vports TC nodes within created TC arbiter TSAR. */ + err = esw_qos_create_vports_tc_nodes(node, extack); + if (err) + goto err_vports_tc_nodes; + + node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR; + + return 0; + +err_vports_tc_nodes: + /* If initialization fails, clean up the scheduling element + * for the TC arbiter node. + */ + esw_qos_node_destroy_sched_element(node, NULL); + node->ix = curr_ix; + return err; +} + +static int +esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport, + u32 rate_limit_elem_ix, + struct mlx5_esw_sched_node *vports_tc_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + struct mlx5_esw_sched_node *vport_tc_node; + u8 tc = vports_tc_node->tc; + int err; + + vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0, + SCHED_NODE_TYPE_VPORT_TC, + vports_tc_node); + if (!vport_tc_node) + return -ENOMEM; + + vport_tc_node->min_rate = vport_node->min_rate; + vport_tc_node->tc = tc; + vport_tc_node->vport = vport; + err = esw_qos_vport_tc_create_sched_element(vport_tc_node, + rate_limit_elem_ix, + extack); + if (err) + goto err_out; + + vport->qos.sched_nodes[tc] = vport_tc_node; + + return 0; +err_out: + __esw_qos_free_node(vport_tc_node); + return err; +} + +static void +esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport, + struct netlink_ext_ack *extack) +{ + int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev); + + for (i = 0; i < num_tcs; i++) { + if (vport->qos.sched_nodes[i]) { + __esw_qos_destroy_node(vport->qos.sched_nodes[i], + extack); + } + } + + kfree(vport->qos.sched_nodes); + vport->qos.sched_nodes = NULL; +} + +static int +esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport, + enum sched_node_type type, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node; + int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev); + u32 rate_limit_elem_ix; + + vport->qos.sched_nodes = kcalloc(num_tcs, + sizeof(struct mlx5_esw_sched_node *), + GFP_KERNEL); + if (!vport->qos.sched_nodes) { + NL_SET_ERR_MSG_MOD(extack, + "Allocating the vport TC scheduling elements failed."); + return -ENOMEM; + } + + rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ? + vport_node->ix : 0; + tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ? + vport_node->parent : vport_node; + list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) { + err = esw_qos_create_vport_tc_sched_node(vport, + rate_limit_elem_ix, + vports_tc_node, + extack); + if (err) + goto err_create_vport_tc; + } + + return 0; + +err_create_vport_tc: + esw_qos_destroy_vport_tc_sched_elements(vport, NULL); + + return err; +} + +static int +esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + int err, new_level, max_level; + + if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + /* Increase the parent's level by 2 to account for both the + * TC arbiter and the vports TC scheduling element. + */ + new_level = vport_node->parent->level + 2; + max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev, + log_esw_max_sched_depth); + if (new_level > max_level) { + NL_SET_ERR_MSG_MOD(extack, + "TC arbitration on leafs is not supported beyond max scheduling depth"); + return -EOPNOTSUPP; + } + } + + esw_assert_qos_lock_held(vport->dev->priv.eswitch); + + if (type == SCHED_NODE_TYPE_RATE_LIMITER) + err = esw_qos_create_rate_limit_element(vport_node, extack); + else + err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack); + if (err) + return err; + + /* Rate limiters impact multiple nodes not directly connected to them + * and are not direct members of the QoS hierarchy. + * Unlink it from the parent to reflect that. + */ + if (type == SCHED_NODE_TYPE_RATE_LIMITER) { + list_del_init(&vport_node->entry); + vport_node->level = 0; + } + + err = esw_qos_create_vport_tc_sched_elements(vport, type, extack); + if (err) + goto err_sched_nodes; + + return 0; + +err_sched_nodes: + if (type == SCHED_NODE_TYPE_RATE_LIMITER) { + esw_qos_node_destroy_sched_element(vport_node, NULL); + list_add_tail(&vport_node->entry, + &vport_node->parent->children); + vport_node->level = vport_node->parent->level + 1; + } else { + esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL); + } + return err; +} + +static void esw_qos_vport_tc_disable(struct mlx5_vport *vport, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + enum sched_node_type curr_type = vport_node->type; + + esw_qos_destroy_vport_tc_sched_elements(vport, extack); + + if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER) + esw_qos_node_destroy_sched_element(vport_node, extack); + else + esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack); +} + +static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport, + u32 min_rate, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev); + + for (i = 0; i < num_tcs; i++) { + err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i], + min_rate, extack); + if (err) + goto err_out; + } + vport_node->min_rate = min_rate; + + return 0; +err_out: + for (--i; i >= 0; i--) { + esw_qos_set_node_min_rate(vport->qos.sched_nodes[i], + vport_node->min_rate, extack); + } + return err; +} + static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; struct mlx5_esw_sched_node *parent = vport_node->parent; + enum sched_node_type curr_type = vport_node->type; - esw_qos_node_destroy_sched_element(vport_node, extack); + if (curr_type == SCHED_NODE_TYPE_VPORT) + esw_qos_node_destroy_sched_element(vport_node, extack); + else + esw_qos_vport_tc_disable(vport, extack); vport_node->bw_share = 0; list_del_init(&vport_node->entry); @@ -569,7 +1098,9 @@ static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_a trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport); } -static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, +static int esw_qos_vport_enable(struct mlx5_vport *vport, + enum sched_node_type type, + struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { int err; @@ -577,10 +1108,16 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_ esw_assert_qos_lock_held(vport->dev->priv.eswitch); esw_qos_node_set_parent(vport->qos.sched_node, parent); - err = esw_qos_vport_create_sched_element(vport->qos.sched_node, extack); + if (type == SCHED_NODE_TYPE_VPORT) { + err = esw_qos_vport_create_sched_element(vport->qos.sched_node, + extack); + } else { + err = esw_qos_vport_tc_enable(vport, type, extack); + } if (err) return err; + vport->qos.sched_node->type = type; esw_qos_normalize_min_rate(parent->esw, parent, extack); trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport->qos.sched_node->max_rate, @@ -611,9 +1148,8 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t sched_node->min_rate = min_rate; sched_node->vport = vport; vport->qos.sched_node = sched_node; - err = esw_qos_vport_enable(vport, parent, extack); + err = esw_qos_vport_enable(vport, type, parent, extack); if (err) { - __esw_qos_free_node(sched_node); esw_qos_put(esw); vport->qos.sched_node = NULL; } @@ -666,6 +1202,8 @@ static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rat if (!vport_node) return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate, extack); + else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER) + return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack); else return esw_qos_set_node_min_rate(vport_node, min_rate, extack); } @@ -698,12 +1236,73 @@ bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *m return enabled; } +static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type, + enum sched_node_type new_type, + struct netlink_ext_ack *extack) +{ + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && + new_type == SCHED_NODE_TYPE_RATE_LIMITER) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot switch from vport-level TC arbitration to node-level TC arbitration"); + return -EOPNOTSUPP; + } + + if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER && + new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot switch from node-level TC arbitration to vport-level TC arbitration"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int esw_qos_vport_update(struct mlx5_vport *vport, + enum sched_node_type type, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent = vport->qos.sched_node->parent; + enum sched_node_type curr_type = vport->qos.sched_node->type; + u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; + int err; + + esw_assert_qos_lock_held(vport->dev->priv.eswitch); + parent = parent ?: curr_parent; + if (curr_type == type && curr_parent == parent) + return 0; + + err = esw_qos_vport_tc_check_type(curr_type, type, extack); + if (err) + return err; + + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { + esw_qos_tc_arbiter_get_bw_shares(vport->qos.sched_node, + curr_tc_bw); + } + + esw_qos_vport_disable(vport, extack); + + err = esw_qos_vport_enable(vport, type, parent, extack); + if (err) { + esw_qos_vport_enable(vport, curr_type, curr_parent, NULL); + extack = NULL; + } + + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { + esw_qos_set_tc_arbiter_bw_shares(vport->qos.sched_node, + curr_tc_bw, extack); + } + + return err; +} + static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; struct mlx5_esw_sched_node *curr_parent; - int err; + enum sched_node_type type; esw_assert_qos_lock_held(esw); curr_parent = vport->qos.sched_node->parent; @@ -711,15 +1310,206 @@ static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw if (curr_parent == parent) return 0; - esw_qos_vport_disable(vport, extack); + /* Set vport QoS type based on parent node type if different from + * default QoS; otherwise, use the vport's current QoS type. + */ + if (parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + type = SCHED_NODE_TYPE_RATE_LIMITER; + else if (curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + type = SCHED_NODE_TYPE_VPORT; + else + type = vport->qos.sched_node->type; + + return esw_qos_vport_update(vport, type, parent, extack); +} - err = esw_qos_vport_enable(vport, parent, extack); +static void +esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node, + struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp; + + vports_tc_node = list_first_entry(&tc_arbiter_node->children, + struct mlx5_esw_sched_node, + entry); + + list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children, + entry) + esw_qos_vport_update_parent(vport_tc_node->vport, node, extack); +} + +static int esw_qos_switch_tc_arbiter_node_to_vports( + struct mlx5_esw_sched_node *tc_arbiter_node, + struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + u32 parent_tsar_ix = node->parent ? + node->parent->ix : node->esw->qos.root_tsar_ix; + int err; + + err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix, + node->max_rate, node->bw_share, + &node->ix); if (err) { - if (esw_qos_vport_enable(vport, curr_parent, NULL)) - esw_warn(parent->esw->dev, "vport restore QoS failed (vport=%d)\n", - vport->vport); + NL_SET_ERR_MSG_MOD(extack, + "Failed to create scheduling element for vports node when disabling vports TC QoS"); + return err; + } + + node->type = SCHED_NODE_TYPE_VPORTS_TSAR; + + /* Disable TC QoS for vports in the arbiter node. */ + esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack); + + return 0; +} + +static int esw_qos_switch_vports_node_to_tc_arbiter( + struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node, *tmp; + struct mlx5_vport *vport; + int err; + + /* Enable TC QoS for each vport in the node. */ + list_for_each_entry_safe(vport_node, tmp, &node->children, entry) { + vport = vport_node->vport; + err = esw_qos_vport_update_parent(vport, tc_arbiter_node, + extack); + if (err) + goto err_out; + } + + /* Destroy the current vports node TSAR. */ + err = mlx5_destroy_scheduling_element_cmd(node->esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + node->ix); + if (err) + goto err_out; + + return 0; +err_out: + /* Restore vports back into the node if an error occurs. */ + esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL); + + return err; +} + +static struct mlx5_esw_sched_node * +esw_qos_move_node(struct mlx5_esw_sched_node *curr_node) +{ + struct mlx5_esw_sched_node *new_node; + + new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix, + curr_node->type, NULL); + if (!new_node) + return ERR_PTR(-ENOMEM); + + esw_qos_nodes_set_parent(&curr_node->children, new_node); + return new_node; +} + +static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_node; + int err; + + if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR) + return 0; + + /* Allocate a new rate node to hold the current state, which will allow + * for restoring the vports back to this node after disabling TC + * arbitration. + */ + curr_node = esw_qos_move_node(node); + if (IS_ERR(curr_node)) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node"); + return PTR_ERR(curr_node); + } + + /* Disable TC QoS for all vports, and assign them back to the node. */ + err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack); + if (err) + goto err_out; + + /* Clean up the TC arbiter node after disabling TC QoS for vports. */ + esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack); + goto out; +err_out: + esw_qos_nodes_set_parent(&curr_node->children, node); +out: + __esw_qos_free_node(curr_node); + return err; +} + +static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_node, *child; + int err, new_level, max_level; + + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + return 0; + + /* Increase the hierarchy level by one to account for the additional + * vports TC scheduling node, and verify that the new level does not + * exceed the maximum allowed depth. + */ + new_level = node->level + 1; + max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); + if (new_level > max_level) { + NL_SET_ERR_MSG_MOD(extack, + "TC arbitration on nodes is not supported beyond max scheduling depth"); + return -EOPNOTSUPP; + } + + /* Ensure the node does not contain non-leaf children before assigning + * TC bandwidth. + */ + if (!list_empty(&node->children)) { + list_for_each_entry(child, &node->children, entry) { + if (!child->vport) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot configure TC bandwidth on a node with non-leaf children"); + return -EOPNOTSUPP; + } + } } + /* Allocate a new node that will store the information of the current + * node. This will be used later to restore the node if necessary. + */ + curr_node = esw_qos_move_node(node); + if (IS_ERR(curr_node)) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS"); + return PTR_ERR(curr_node); + } + + /* Initialize the TC arbiter node for QoS management. + * This step prepares the node for handling Traffic Class arbitration. + */ + err = esw_qos_tc_arbiter_scheduling_setup(node, extack); + if (err) + goto err_setup; + + /* Enable TC QoS for each vport within the current node. */ + err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack); + if (err) + goto err_switch_vports; + goto out; + +err_switch_vports: + esw_qos_tc_arbiter_scheduling_teardown(node, NULL); + node->ix = curr_node->ix; + node->type = curr_node->type; +err_setup: + esw_qos_nodes_set_parent(&curr_node->children, node); +out: + __esw_qos_free_node(curr_node); return err; } @@ -848,6 +1638,41 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * return 0; } +static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, + u32 *tc_bw) +{ + int i, num_tcs = esw_qos_num_tcs(esw->dev); + + for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) { + if (tc_bw[i]) + return false; + } + + return true; +} + +static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, + u32 *tc_bw) +{ + struct mlx5_eswitch *esw = vport->qos.sched_node ? + vport->qos.sched_node->parent->esw : + vport->dev->priv.eswitch; + + return esw_qos_validate_unsupported_tc_bw(esw, tc_bw); +} + +static bool esw_qos_tc_bw_disabled(u32 *tc_bw) +{ + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) { + if (tc_bw[i]) + return false; + } + + return true; +} + int mlx5_esw_qos_init(struct mlx5_eswitch *esw) { if (esw->qos.domain) @@ -906,6 +1731,90 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * return err; } +int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node; + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + bool disable; + int err = 0; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + disable = esw_qos_tc_bw_disabled(tc_bw); + esw_qos_lock(esw); + + if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch traffic classes number is not supported"); + err = -EOPNOTSUPP; + goto unlock; + } + + vport_node = vport->qos.sched_node; + if (disable && !vport_node) + goto unlock; + + if (disable) { + if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT, + NULL, extack); + goto unlock; + } + + if (!vport_node) { + err = mlx5_esw_qos_vport_enable(vport, + SCHED_NODE_TYPE_TC_ARBITER_TSAR, + NULL, 0, 0, extack); + vport_node = vport->qos.sched_node; + } else { + err = esw_qos_vport_update(vport, + SCHED_NODE_TYPE_TC_ARBITER_TSAR, + NULL, extack); + } + if (!err) + esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack); +unlock: + esw_qos_unlock(esw); + return err; +} + +int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *node = priv; + struct mlx5_eswitch *esw = node->esw; + bool disable; + int err; + + if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch traffic classes number is not supported"); + return -EOPNOTSUPP; + } + + disable = esw_qos_tc_bw_disabled(tc_bw); + esw_qos_lock(esw); + if (disable) { + err = esw_qos_node_disable_tc_arbitration(node, extack); + goto unlock; + } + + err = esw_qos_node_enable_tc_arbitration(node, extack); + if (!err) + esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack); +unlock: + esw_qos_unlock(esw); + return err; +} + int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { @@ -996,10 +1905,16 @@ int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_s } esw_qos_lock(esw); - if (!vport->qos.sched_node && parent) - err = mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, parent, 0, 0, extack); - else if (vport->qos.sched_node) + if (!vport->qos.sched_node && parent) { + enum sched_node_type type; + + type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ? + SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT; + err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0, + extack); + } else if (vport->qos.sched_node) { err = esw_qos_vport_update_parent(vport, parent, extack); + } esw_qos_unlock(esw); return err; } @@ -1019,6 +1934,20 @@ int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, return mlx5_esw_qos_vport_update_parent(vport, node, extack); } +static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node) +{ + if (list_empty(&node->children)) + return true; + + if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR) + return false; + + node = list_first_entry(&node->children, struct mlx5_esw_sched_node, + entry); + + return esw_qos_is_node_empty(node); +} + static int mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent, @@ -1032,13 +1961,26 @@ mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, return -EOPNOTSUPP; } - if (!list_empty(&node->children)) { + if (!esw_qos_is_node_empty(node)) { NL_SET_ERR_MSG_MOD(extack, "Cannot reassign a node that contains rate objects"); return -EOPNOTSUPP; } + if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot attach a node to a parent with TC bandwidth configured"); + return -EOPNOTSUPP; + } + new_level = parent ? parent->level + 1 : 2; + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + /* Increase by one to account for the vports TC scheduling + * element. + */ + new_level += 1; + } + max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); if (new_level > max_level) { NL_SET_ERR_MSG_MOD(extack, @@ -1049,6 +1991,32 @@ mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, return 0; } +static int +esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent = node->parent; + u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; + struct mlx5_eswitch *esw = node->esw; + int err; + + esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw); + esw_qos_tc_arbiter_scheduling_teardown(node, extack); + esw_qos_node_set_parent(node, parent); + err = esw_qos_tc_arbiter_scheduling_setup(node, extack); + if (err) { + esw_qos_node_set_parent(node, curr_parent); + if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) { + esw_warn(esw->dev, "Node restore QoS failed\n"); + return err; + } + } + esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack); + + return err; +} + static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) @@ -1076,6 +2044,7 @@ static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, return err; } esw_qos_node_set_parent(node, parent); + node->bw_share = 0; return 0; } @@ -1094,7 +2063,13 @@ static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node, esw_qos_lock(esw); curr_parent = node->parent; - err = esw_qos_vports_node_update_parent(node, parent, extack); + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + err = esw_qos_tc_arbiter_node_update_parent(node, parent, + extack); + } else { + err = esw_qos_vports_node_update_parent(node, parent, extack); + } + if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index ed40ec8f027e..0a50982b0e27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -21,6 +21,14 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void u64 tx_share, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_node, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, u64 tx_share, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7fb8a3381f84..4917d185d0c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, - enabled_events); - if (ret) - goto ec_vf_err; - } + } + + /* Enable ECVF vports */ + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; } /* Enable VF vports */ @@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs); + if (mlx5_ecpf_vport_exists(esw->dev)) { - if (mlx5_core_ec_sriov_enabled(esw->dev)) - mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8573d36785f4..b0b8ef3ec3c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -212,10 +212,20 @@ struct mlx5_vport { struct mlx5_vport_info info; - /* Protected with the E-Switch qos domain lock. */ + /* Protected with the E-Switch qos domain lock. The Vport QoS can + * either be disabled (sched_node is NULL) or in one of three states: + * 1. Regular QoS (sched_node is a vport node). + * 2. TC QoS enabled on the vport (sched_node is a TC arbiter). + * 3. TC QoS enabled on the vport's parent node + * (sched_node is a rate limit node). + * When TC is enabled in either mode, the vport owns vport TC scheduling + * nodes. + */ struct { - /* Vport scheduling element node. */ + /* Vport scheduling node. */ struct mlx5_esw_sched_node *sched_node; + /* Array of vport traffic class scheduling nodes. */ + struct mlx5_esw_sched_node **sched_nodes; } qos; u16 vport; @@ -817,7 +827,7 @@ void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); /** - * mlx5_esw_event_info - Indicates eswitch mode changed/changing. + * struct mlx5_esw_event_info - Indicates eswitch mode changed/changing. * * @new_mode: New mode of eswitch. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0e3a977d5332..bee906661282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1182,19 +1182,19 @@ static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { + struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_handle **flows; - /* total vports is the same for both e-switches */ - int nvports = esw->total_vports; struct mlx5_flow_handle *flow; + struct mlx5_vport *peer_vport; struct mlx5_flow_spec *spec; - struct mlx5_vport *vport; int err, pfindex; unsigned long i; void *misc; - if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!MLX5_VPORT_MANAGER(peer_dev) && + !mlx5_core_is_ecpf_esw_manager(peer_dev)) return 0; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -1203,7 +1203,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, peer_miss_rules_setup(esw, peer_dev, spec, &dest); - flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL); + flows = kvcalloc(peer_esw->total_vports, sizeof(*flows), GFP_KERNEL); if (!flows) { err = -ENOMEM; goto alloc_flows_err; @@ -1213,10 +1213,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, - spec, MLX5_VPORT_PF); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, + MLX5_VPORT_PF); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1224,11 +1224,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_pf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1236,13 +1236,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_ecpf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { esw_set_peer_miss_rule_source_port(esw, - peer_dev->priv.eswitch, - spec, vport->vport); + peer_esw, + spec, peer_vport->vport); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1250,22 +1251,22 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_vf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - if (i >= mlx5_core_max_ec_vfs(peer_dev)) - break; - esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, - spec, vport->vport); + if (mlx5_core_ec_sriov_enabled(peer_dev)) { + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, peer_esw, + spec, + peer_vport->vport); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); goto add_ec_vf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } } @@ -1282,25 +1283,27 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, return 0; add_ec_vf_flow_err: - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - if (!flows[vport->index]) + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) { + if (!flows[peer_vport->index]) continue; - mlx5_del_flow_rules(flows[vport->index]); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_vf_flow_err: - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { - if (!flows[vport->index]) + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { + if (!flows[peer_vport->index]) continue; - mlx5_del_flow_rules(flows[vport->index]); + mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_ecpf_flow_err: - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_pf_flow_err: esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); @@ -1313,37 +1316,34 @@ alloc_flows_err: static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { + struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; u16 peer_index = mlx5_get_dev_index(peer_dev); struct mlx5_flow_handle **flows; - struct mlx5_vport *vport; + struct mlx5_vport *peer_vport; unsigned long i; flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; if (!flows) return; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - /* The flow for a particular vport could be NULL if the other ECPF - * has fewer or no VFs enabled - */ - if (!flows[vport->index]) - continue; - mlx5_del_flow_rules(flows[vport->index]); - } + if (mlx5_core_ec_sriov_enabled(peer_dev)) { + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) + mlx5_del_flow_rules(flows[peer_vport->index]); } - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) - mlx5_del_flow_rules(flows[vport->index]); + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) + mlx5_del_flow_rules(flows[peer_vport->index]); - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[peer_vport->index]); } kvfree(flows); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index a47c29571f64..1af76da8b132 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -527,7 +527,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; - int reformat_id = 0; + u32 reformat_id = 0; unsigned int inlen; int dst_cnt_size; u32 *in, action; @@ -580,23 +580,21 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, action, action); if (!extended_dest && fte->act_dests.action.pkt_reformat) { - struct mlx5_pkt_reformat *pkt_reformat = fte->act_dests.action.pkt_reformat; - - if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { - reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat); - if (reformat_id < 0) { - mlx5_core_err(dev, - "Unsupported SW-owned pkt_reformat type (%d) in FW-owned table\n", - pkt_reformat->reformat_type); - err = reformat_id; - goto err_out; - } - } else { - reformat_id = fte->act_dests.action.pkt_reformat->id; + struct mlx5_pkt_reformat *pkt_reformat = + fte->act_dests.action.pkt_reformat; + + err = mlx5_fs_get_packet_reformat_id(pkt_reformat, + &reformat_id); + if (err) { + mlx5_core_err(dev, + "Unsupported pkt_reformat type (%d)\n", + pkt_reformat->reformat_type); + goto err_out; } } - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + reformat_id); if (fte->act_dests.action.modify_hdr) { if (fte->act_dests.action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6163bc98d94a..d87392360dbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -113,13 +113,16 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, +/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, * {IPsec RoCE MPV,Alias table},IPsec RoCE policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 11 +#define KERNEL_NIC_PRIO_NUM_LEVELS 10 #define KERNEL_NIC_NUM_PRIOS 1 -/* One more level for tc */ -#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) +/* One more level for tc, and one more for promisc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2) + +#define KERNEL_NIC_PROMISC_NUM_PRIOS 1 +#define KERNEL_NIC_PROMISC_NUM_LEVELS 1 #define KERNEL_NIC_TC_NUM_PRIOS 1 #define KERNEL_NIC_TC_NUM_LEVELS 3 @@ -187,6 +190,8 @@ static struct init_tree_node { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_PROMISC_NUM_PRIOS, + KERNEL_NIC_PROMISC_NUM_LEVELS), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, @@ -1830,14 +1835,35 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, return err; } +int mlx5_fs_get_packet_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *id) +{ + switch (pkt_reformat->owner) { + case MLX5_FLOW_RESOURCE_OWNER_FW: + *id = pkt_reformat->id; + return 0; + case MLX5_FLOW_RESOURCE_OWNER_SW: + return mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat, id); + case MLX5_FLOW_RESOURCE_OWNER_HWS: + return mlx5_fs_hws_action_get_pkt_reformat_id(pkt_reformat, id); + default: + return -EINVAL; + } +} + static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1, struct mlx5_pkt_reformat *p2) { - return p1->owner == p2->owner && - (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ? - p1->id == p2->id : - mlx5_fs_dr_action_get_pkt_reformat_id(p1) == - mlx5_fs_dr_action_get_pkt_reformat_id(p2)); + int err1, err2; + u32 id1, id2; + + if (p1->owner != p2->owner) + return false; + + err1 = mlx5_fs_get_packet_reformat_id(p1, &id1); + err2 = mlx5_fs_get_packet_reformat_id(p2, &id2); + + return !err1 && !err2 && id1 == id2; } static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, @@ -2207,6 +2233,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; struct match_list *iter; bool take_write = false; + bool try_again = false; struct fs_fte *fte; u64 version = 0; int err; @@ -2271,6 +2298,7 @@ skip_search: nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); if (!g->node.active) { + try_again = true; up_write_ref_node(&g->node, false); continue; } @@ -2292,7 +2320,8 @@ skip_search: tree_put_node(&fte->node, false); return rule; } - rule = ERR_PTR(-ENOENT); + err = try_again ? -EAGAIN : -ENOENT; + rule = ERR_PTR(err); out: kmem_cache_free(steering->ftes_cache, fte); return rule; @@ -3221,34 +3250,62 @@ static int init_rdma_transport_rx_root_ns_one(struct mlx5_flow_steering *steering, int vport_idx) { + struct mlx5_flow_root_namespace *root_ns; struct fs_prio *prio; + int ret; + int i; steering->rdma_transport_rx_root_ns[vport_idx] = create_root_ns(steering, FS_FT_RDMA_TRANSPORT_RX); if (!steering->rdma_transport_rx_root_ns[vport_idx]) return -ENOMEM; - /* create 1 prio*/ - prio = fs_create_prio(&steering->rdma_transport_rx_root_ns[vport_idx]->ns, - MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1); - return PTR_ERR_OR_ZERO(prio); + root_ns = steering->rdma_transport_rx_root_ns[vport_idx]; + + for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) { + prio = fs_create_prio(&root_ns->ns, i, 1); + if (IS_ERR(prio)) { + ret = PTR_ERR(prio); + goto err; + } + } + set_prio_attrs(root_ns); + return 0; + +err: + cleanup_root_ns(root_ns); + return ret; } static int init_rdma_transport_tx_root_ns_one(struct mlx5_flow_steering *steering, int vport_idx) { + struct mlx5_flow_root_namespace *root_ns; struct fs_prio *prio; + int ret; + int i; steering->rdma_transport_tx_root_ns[vport_idx] = create_root_ns(steering, FS_FT_RDMA_TRANSPORT_TX); if (!steering->rdma_transport_tx_root_ns[vport_idx]) return -ENOMEM; - /* create 1 prio*/ - prio = fs_create_prio(&steering->rdma_transport_tx_root_ns[vport_idx]->ns, - MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1); - return PTR_ERR_OR_ZERO(prio); + root_ns = steering->rdma_transport_tx_root_ns[vport_idx]; + + for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) { + prio = fs_create_prio(&root_ns->ns, i, 1); + if (IS_ERR(prio)) { + ret = PTR_ERR(prio); + goto err; + } + } + set_prio_attrs(root_ns); + return 0; + +err: + cleanup_root_ns(root_ns); + return ret; } static int init_rdma_transport_rx_root_ns(struct mlx5_flow_steering *steering) @@ -3895,6 +3952,8 @@ int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) if (mlx5_fs_dr_is_supported(dev)) steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else if (mlx5_fs_hws_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_HMFS; else steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 0767239f651c..500826229b0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -58,6 +58,7 @@ struct mlx5_flow_definer { enum mlx5_flow_resource_owner { MLX5_FLOW_RESOURCE_OWNER_FW, MLX5_FLOW_RESOURCE_OWNER_SW, + MLX5_FLOW_RESOURCE_OWNER_HWS, }; struct mlx5_modify_hdr { @@ -386,6 +387,9 @@ u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace struct mlx5_flow_root_namespace *find_root(struct fs_node *node); +int mlx5_fs_get_packet_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *id); + #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } #define fs_list_for_each_entry(pos, root) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 5442a02c4097..69933addd921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -278,7 +278,8 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) #define MLX5_RESET_POLL_INTERVAL (HZ / 10) static void poll_sync_reset(struct timer_list *t) { - struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer); + struct mlx5_fw_reset *fw_reset = timer_container_of(fw_reset, t, + timer); struct mlx5_core_dev *dev = fw_reset->dev; u32 fatal_error; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 624452ddebc0..cf7a1edd0530 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -779,7 +779,8 @@ static void mlx5_health_log_ts_update(struct work_struct *work) static void poll_health(struct timer_list *t) { - struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); + struct mlx5_core_dev *dev = timer_container_of(dev, t, + priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; u32 fatal_error; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 9772327d5124..4b3430ac3905 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -238,6 +238,23 @@ static u32 mlx5i_flow_type_mask(u32 flow_type) return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); } +static int mlx5i_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *cmd, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_rxfh_fields(priv, cmd, extack); +} + +static int mlx5i_get_rxfh_fields(struct net_device *dev, + struct ethtool_rxfh_fields *info) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_rxfh_fields(priv, info); +} + static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -283,6 +300,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_coalesce = mlx5i_get_coalesce, .set_coalesce = mlx5i_set_coalesce, .get_ts_info = mlx5i_get_ts_info, + .get_rxfh_fields = mlx5i_get_rxfh_fields, + .set_rxfh_fields = mlx5i_set_rxfh_fields, .get_rxnfc = mlx5i_get_rxnfc, .set_rxnfc = mlx5i_set_rxnfc, .get_link_ksettings = mlx5i_get_link_ksettings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 0979d672d47f..79ae3a51a4b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -32,6 +32,7 @@ #include <rdma/ib_verbs.h> #include <linux/mlx5/fs.h> +#include <net/netdev_lock.h> #include "en.h" #include "en/params.h" #include "ipoib.h" @@ -102,6 +103,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev) netdev->netdev_ops = &mlx5i_netdev_ops; netdev->ethtool_ops = &mlx5i_ethtool_ops; + netdev->request_ops_lock = true; + netdev_lockdep_set_classes(netdev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 2691d88cdee1..82d3c2568244 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -47,29 +47,40 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, static struct mlx5_irq * irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { - struct irq_affinity_desc auto_desc = {}; + struct irq_affinity_desc *auto_desc; struct mlx5_irq *irq; u32 irq_index; int err; + auto_desc = kvzalloc(sizeof(*auto_desc), GFP_KERNEL); + if (!auto_desc) + return ERR_PTR(-ENOMEM); + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); - if (err) + if (err) { + kvfree(auto_desc); return ERR_PTR(err); + } + if (pool->irqs_per_cpu) { if (cpumask_weight(&af_desc->mask) > 1) /* if req_mask contain more then one CPU, set the least loadad CPU * of req_mask */ cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask), - &auto_desc.mask); + &auto_desc->mask); else cpu_get(pool, cpumask_first(&af_desc->mask)); } + irq = mlx5_irq_alloc(pool, irq_index, - cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + cpumask_empty(&auto_desc->mask) ? af_desc : auto_desc, NULL); if (IS_ERR(irq)) xa_erase(&pool->irqs, irq_index); + + kvfree(auto_desc); + return irq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 65a94e46edcf..214d732d18e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -343,7 +343,7 @@ static u64 mlx5_read_time(struct mlx5_core_dev *dev, (u64)timer_l | (u64)timer_h1 << 32; } -static u64 read_internal_timer(const struct cyclecounter *cc) +static u64 read_internal_timer(struct cyclecounter *cc) { struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles); struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer); @@ -643,13 +643,6 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, int pin = -1; int err = 0; - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests to enable time stamping on both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -820,12 +813,6 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo return 0; } -static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags) -{ - return ((!mlx5_npps_real_time_supported(mdev) && flags) || - (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE)); -} - static int mlx5_perout_configure(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) @@ -861,12 +848,6 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, goto unlock; } - /* Reject requests with unsupported flags */ - if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) { - err = -EOPNOTSUPP; - goto unlock; - } - if (on) { pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; @@ -1034,6 +1015,13 @@ static void mlx5_init_pin_config(struct mlx5_core_dev *mdev) clock->ptp_info.verify = mlx5_ptp_verify; clock->ptp_info.pps = 1; + clock->ptp_info.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; + + if (mlx5_npps_real_time_supported(mdev)) + clock->ptp_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE; + for (i = 0; i < clock->ptp_info.n_pins; i++) { snprintf(clock->ptp_info.pin_config[i].name, sizeof(clock->ptp_info.pin_config[i].name), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index 7c5516b0a844..8115071c34a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -30,7 +30,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) dm = kzalloc(sizeof(*dm), GFP_KERNEL); if (!dm) - return ERR_PTR(-ENOMEM); + return NULL; spin_lock_init(&dm->lock); @@ -96,7 +96,7 @@ err_modify_hdr: err_steering: kfree(dm); - return ERR_PTR(-ENOMEM); + return NULL; } void mlx5_dm_cleanup(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 41e8660c819c..e7bcd0f0a709 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1102,9 +1102,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) } dev->dm = mlx5_dm_create(dev); - if (IS_ERR(dev->dm)) - mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm)); - dev->tracer = mlx5_fw_tracer_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev); dev->rsc_dump = mlx5_rsc_dump_create(dev); @@ -2257,6 +2254,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */ + { PCI_VDEVICE(MELLANOX, 0x1027) }, /* ConnectX-10 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 2e02bdea8361..c518380c4ce7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -495,4 +495,17 @@ static inline int mlx5_max_eq_cap_get(const struct mlx5_core_dev *dev) return 1 << MLX5_CAP_GEN(dev, log_max_eq); } + +static inline bool mlx5_pcie_cong_event_supported(struct mlx5_core_dev *dev) +{ + u64 features = MLX5_CAP_GEN_2_64(dev, general_obj_types_127_64); + + if (!(features & MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT)) + return false; + + if (dev->sd) + return false; + + return true; +} #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 972e8e9df585..9bc9bd83c232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); - int nid = dev_to_node(device); + int nid = dev->priv.numa_node; struct page *page; u64 zero_addr = 1; u64 addr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 2c5f850c31f6..692ef9c2f729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -148,7 +148,7 @@ out: * Free the IRQ and other resources such as rmap from the system. * BUT doesn't free or remove reference from mlx5. * This function is very important for the shutdown flow, where we need to - * cleanup system resoruces but keep mlx5 objects alive, + * cleanup system resources but keep mlx5 objects alive, * see mlx5_irq_table_free_irqs(). */ static void mlx5_system_free_irq(struct mlx5_irq *irq) @@ -470,26 +470,32 @@ void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq) struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) { struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); - struct irq_affinity_desc af_desc; + struct irq_affinity_desc *af_desc; struct mlx5_irq *irq; - cpumask_copy(&af_desc.mask, cpu_online_mask); - af_desc.is_managed = false; + af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return ERR_PTR(-ENOMEM); + + cpumask_copy(&af_desc->mask, cpu_online_mask); + af_desc->is_managed = false; if (!mlx5_irq_pool_is_sf_pool(pool)) { /* In case we are allocating a control IRQ from a pci device's pool. * This can happen also for a SF if the SFs pool is empty. */ if (!pool->xa_num_irqs.max) { - cpumask_clear(&af_desc.mask); + cpumask_clear(&af_desc->mask); /* In case we only have a single IRQ for PF/VF */ - cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc->mask); } /* Allocate the IRQ in index 0. The vector was already allocated */ - irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); + irq = irq_pool_request_vector(pool, 0, af_desc, NULL); } else { - irq = mlx5_irq_affinity_request(dev, pool, &af_desc); + irq = mlx5_irq_affinity_request(dev, pool, af_desc); } + kvfree(af_desc); + return irq; } @@ -548,16 +554,26 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, { struct mlx5_irq_table *table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = table->pcif_pool; - struct irq_affinity_desc af_desc; int offset = MLX5_IRQ_VEC_COMP_BASE; + struct irq_affinity_desc *af_desc; + struct mlx5_irq *irq; + + af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return ERR_PTR(-ENOMEM); if (!pool->xa_num_irqs.max) offset = 0; - af_desc.is_managed = false; - cpumask_clear(&af_desc.mask); - cpumask_set_cpu(cpu, &af_desc.mask); - return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); + af_desc->is_managed = false; + cpumask_clear(&af_desc->mask); + cpumask_set_cpu(cpu, &af_desc->mask); + + irq = mlx5_irq_request(dev, vecidx + offset, af_desc, rmap); + + kvfree(af_desc); + + return irq; } static struct mlx5_irq_pool * @@ -588,7 +604,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) struct mlx5_irq *irq; unsigned long index; - /* There are cases in which we are destrying the irq_table before + /* There are cases in which we are destroying the irq_table before * freeing all the IRQs, fast teardown for example. Hence, free the irqs * which might not have been freed. */ @@ -617,7 +633,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec, if (!mlx5_sf_max_functions(dev)) return 0; if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { - mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); + mlx5_core_dbg(dev, "Not enough IRQs for SFs. SF may run at lower performance\n"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index b5332c54d4fb..396804369b00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -72,6 +72,11 @@ enum mlx5hws_action_type mlx5hws_action_get_type(struct mlx5hws_action *action) return action->type; } +struct mlx5_core_dev *mlx5hws_action_get_dev(struct mlx5hws_action *action) +{ + return action->ctx->mdev; +} + static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, enum mlx5hws_context_shared_stc_type stc_type, u8 tbl_type) @@ -238,6 +243,7 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx, enum mlx5hws_table_type table_type, bool is_mirror) { + struct mlx5hws_pool *pool; bool use_fixup = false; u32 fw_tbl_type; u32 base_id; @@ -253,13 +259,11 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx, use_fixup = true; break; } + pool = stc_attr->ste_table.ste_pool; if (!is_mirror) - base_id = mlx5hws_pool_chunk_get_base_id(stc_attr->ste_table.ste_pool, - &stc_attr->ste_table.ste); + base_id = mlx5hws_pool_get_base_id(pool); else - base_id = - mlx5hws_pool_chunk_get_base_mirror_id(stc_attr->ste_table.ste_pool, - &stc_attr->ste_table.ste); + base_id = mlx5hws_pool_get_base_mirror_id(pool); *fixup_stc_attr = *stc_attr; fixup_stc_attr->ste_table.ste_obj_id = base_id; @@ -337,7 +341,7 @@ __must_hold(&ctx->ctrl_lock) if (!mlx5hws_context_cap_dynamic_reparse(ctx)) stc_attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - obj_0_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc); + obj_0_id = mlx5hws_pool_get_base_id(stc_pool); /* According to table/action limitation change the stc_attr */ use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, table_type, false); @@ -353,7 +357,7 @@ __must_hold(&ctx->ctrl_lock) if (table_type == MLX5HWS_TABLE_TYPE_FDB) { u32 obj_1_id; - obj_1_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc); + obj_1_id = mlx5hws_pool_get_base_mirror_id(stc_pool); use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, @@ -393,11 +397,11 @@ __must_hold(&ctx->ctrl_lock) stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_DROP; stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; stc_attr.stc_offset = stc->offset; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc); + obj_id = mlx5hws_pool_get_base_id(stc_pool); mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr); if (table_type == MLX5HWS_TABLE_TYPE_FDB) { - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc); + obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool); mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr); } @@ -1186,14 +1190,15 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action, struct mlx5hws_action_mh_pattern *pattern, u32 log_bulk_size) { + u16 num_actions, max_mh_actions = 0, hw_max_actions; struct mlx5hws_context *ctx = action->ctx; - u16 num_actions, max_mh_actions = 0; int i, ret, size_in_bytes; u32 pat_id, arg_id = 0; __be64 *new_pattern; size_t pat_max_sz; pat_max_sz = MLX5HWS_ARG_CHUNK_SIZE_MAX * MLX5HWS_ARG_DATA_SIZE; + hw_max_actions = pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE; size_in_bytes = pat_max_sz * sizeof(__be64); new_pattern = kcalloc(num_of_patterns, size_in_bytes, GFP_KERNEL); if (!new_pattern) @@ -1203,16 +1208,20 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action, for (i = 0; i < num_of_patterns; i++) { size_t new_num_actions; size_t cur_num_actions; - u32 nope_location; + u32 nop_locations; cur_num_actions = pattern[i].sz / MLX5HWS_MODIFY_ACTION_SIZE; - mlx5hws_pat_calc_nope(pattern[i].data, cur_num_actions, - pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE, - &new_num_actions, &nope_location, - &new_pattern[i * pat_max_sz]); + ret = mlx5hws_pat_calc_nop(pattern[i].data, cur_num_actions, + hw_max_actions, &new_num_actions, + &nop_locations, + &new_pattern[i * pat_max_sz]); + if (ret) { + mlx5hws_err(ctx, "Too many actions after nop insertion\n"); + goto free_new_pat; + } - action[i].modify_header.nope_locations = nope_location; + action[i].modify_header.nop_locations = nop_locations; action[i].modify_header.num_of_actions = new_num_actions; max_mh_actions = max(max_mh_actions, new_num_actions); @@ -1259,7 +1268,7 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action, MLX5_GET(set_action_in, pattern[i].data, action_type); } else { /* Multiple modify actions require a pattern */ - if (unlikely(action[i].modify_header.nope_locations)) { + if (unlikely(action[i].modify_header.nop_locations)) { size_t pattern_sz; pattern_sz = action[i].modify_header.num_of_actions * @@ -1349,20 +1358,17 @@ free_action: } struct mlx5hws_action * -mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, - size_t num_dest, +mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, - u32 flow_source, - u32 flags) + bool ignore_flow_level, u32 flags) { struct mlx5hws_cmd_set_fte_dest *dest_list = NULL; struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; struct mlx5hws_cmd_forward_tbl *fw_island; struct mlx5hws_action *action; - u32 i /*, packet_reformat_id*/; - int ret; + int ret, last_dest_idx = -1; + u32 i; if (num_dest <= 1) { mlx5hws_err(ctx, "Action must have multiple dests\n"); @@ -1392,11 +1398,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte_attr.ignore_flow_level = ignore_flow_level; - /* ToDo: In SW steering we have a handling of 'go to WIRE' - * destination here by upper layer setting 'is_wire_ft' flag - * if the destination is wire. - * This is because uplink should be last dest in the list. - */ + if (dests[i].is_wire_ft) + last_dest_idx = i; break; case MLX5HWS_ACTION_TYP_VPORT: dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -1420,6 +1423,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, } } + if (last_dest_idx != -1) + swap(dest_list[last_dest_idx], dest_list[num_dest - 1]); + fte_attr.dests_num = num_dest; fte_attr.dests = dest_list; @@ -1575,17 +1581,15 @@ hws_action_create_dest_match_range_definer(struct mlx5hws_context *ctx) return definer; } -static struct mlx5hws_matcher_action_ste * +static struct mlx5hws_range_action_table * hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, struct mlx5hws_definer *definer, u32 miss_ft_id) { struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0}; - struct mlx5hws_action_default_stc *default_stc; - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_pool_attr pool_attr = {0}; struct mlx5hws_pool *ste_pool, *stc_pool; - struct mlx5hws_pool_chunk *ste; u32 *rtc_0_id, *rtc_1_id; u32 obj_id; int ret; @@ -1604,7 +1608,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL; pool_attr.alloc_log_sz = 1; table_ste->pool = mlx5hws_pool_create(ctx, &pool_attr); if (!table_ste->pool) { @@ -1616,8 +1619,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, rtc_0_id = &table_ste->rtc_0_id; rtc_1_id = &table_ste->rtc_1_id; ste_pool = table_ste->pool; - ste = &table_ste->ste; - ste->order = 1; rtc_attr.log_size = 0; rtc_attr.log_depth = 0; @@ -1629,18 +1630,16 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, rtc_attr.fw_gen_wqe = true; rtc_attr.is_scnd_range = true; - obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_id(ste_pool); rtc_attr.pd = ctx->pd_num; rtc_attr.ste_base = obj_id; - rtc_attr.ste_offset = ste->offset; rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false); /* STC is a single resource (obj_id), use any STC for the ID */ stc_pool = ctx->stc_pool; - default_stc = ctx->common_res.default_stc; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_id(stc_pool); rtc_attr.stc_base = obj_id; ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id); @@ -1650,11 +1649,11 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, } /* Create mirror RTC */ - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_mirror_id(ste_pool); rtc_attr.ste_base = obj_id; rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, true); - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool); rtc_attr.stc_base = obj_id; ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id); @@ -1677,9 +1676,9 @@ free_ste: return NULL; } -static void -hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx, - struct mlx5hws_matcher_action_ste *table_ste) +static void hws_action_destroy_dest_match_range_table( + struct mlx5hws_context *ctx, + struct mlx5hws_range_action_table *table_ste) { mutex_lock(&ctx->ctrl_lock); @@ -1691,12 +1690,11 @@ hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx, mutex_unlock(&ctx->ctrl_lock); } -static int -hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx, - struct mlx5hws_matcher_action_ste *table_ste, - struct mlx5hws_action *hit_ft_action, - struct mlx5hws_definer *range_definer, - u32 min, u32 max) +static int hws_action_create_dest_match_range_fill_table( + struct mlx5hws_context *ctx, + struct mlx5hws_range_action_table *table_ste, + struct mlx5hws_action *hit_ft_action, + struct mlx5hws_definer *range_definer, u32 min, u32 max) { struct mlx5hws_wqe_gta_data_seg_ste match_wqe_data = {0}; struct mlx5hws_wqe_gta_data_seg_ste range_wqe_data = {0}; @@ -1792,7 +1790,7 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx, u32 min, u32 max, u32 flags) { struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_action *hit_ft_action; struct mlx5hws_definer *definer; struct mlx5hws_action *action; @@ -1837,7 +1835,6 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx, stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - stc_attr.ste_table.ste = table_ste->ste; stc_attr.ste_table.ste_pool = table_ste->pool; stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; @@ -2110,21 +2107,23 @@ static void hws_action_modify_write(struct mlx5hws_send_engine *queue, u32 arg_idx, u8 *arg_data, u16 num_of_actions, - u32 nope_locations) + u32 nop_locations) { u8 *new_arg_data = NULL; int i, j; - if (unlikely(nope_locations)) { + if (unlikely(nop_locations)) { new_arg_data = kcalloc(num_of_actions, MLX5HWS_MODIFY_ACTION_SIZE, GFP_KERNEL); if (unlikely(!new_arg_data)) return; - for (i = 0, j = 0; i < num_of_actions; i++, j++) { - memcpy(&new_arg_data[j], arg_data, MLX5HWS_MODIFY_ACTION_SIZE); - if (BIT(i) & nope_locations) + for (i = 0, j = 0; j < num_of_actions; i++, j++) { + if (BIT(i) & nop_locations) j++; + memcpy(&new_arg_data[j * MLX5HWS_MODIFY_ACTION_SIZE], + &arg_data[i * MLX5HWS_MODIFY_ACTION_SIZE], + MLX5HWS_MODIFY_ACTION_SIZE); } } @@ -2220,6 +2219,7 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply, struct mlx5hws_action *action; u32 arg_sz, arg_idx; u8 *single_action; + u8 max_actions; __be32 stc_idx; rule_action = &apply->rule_action[setter->idx_double]; @@ -2247,21 +2247,23 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply, apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = *(__be32 *)MLX5_ADDR_OF(set_action_in, single_action, data); - } else { - /* Argument offset multiple with number of args per these actions */ - arg_sz = mlx5hws_arg_get_arg_size(action->modify_header.max_num_of_actions); - arg_idx = rule_action->modify_header.offset * arg_sz; - - apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); - - if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) { - apply->require_dep = 1; - hws_action_modify_write(apply->queue, - action->modify_header.arg_id + arg_idx, - rule_action->modify_header.data, - action->modify_header.num_of_actions, - action->modify_header.nope_locations); - } + return; + } + + /* Argument offset multiple with number of args per these actions */ + max_actions = action->modify_header.max_num_of_actions; + arg_sz = mlx5hws_arg_get_arg_size(max_actions); + arg_idx = rule_action->modify_header.offset * arg_sz; + + apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); + + if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) { + apply->require_dep = 1; + hws_action_modify_write(apply->queue, + action->modify_header.arg_id + arg_idx, + rule_action->modify_header.data, + action->modify_header.num_of_actions, + action->modify_header.nop_locations); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h index 64b76075f7f8..55a079fdd08f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h @@ -118,6 +118,12 @@ struct mlx5hws_action_template { u8 only_term; }; +struct mlx5hws_range_action_table { + struct mlx5hws_pool *pool; + u32 rtc_0_id; + u32 rtc_1_id; +}; + struct mlx5hws_action { u8 type; u8 flags; @@ -130,7 +136,7 @@ struct mlx5hws_action { u32 pat_id; u32 arg_id; __be64 single_action; - u32 nope_locations; + u32 nop_locations; u8 num_of_patterns; u8 single_action_type; u8 num_of_actions; @@ -186,7 +192,7 @@ struct mlx5hws_action { size_t size; } remove_header; struct { - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_action *hit_ft_action; struct mlx5hws_definer *definer; } range; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c new file mode 100644 index 000000000000..5766a9c82f96 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c @@ -0,0 +1,467 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#include "internal.h" + +static const char * +hws_pool_opt_to_str(enum mlx5hws_pool_optimize opt) +{ + switch (opt) { + case MLX5HWS_POOL_OPTIMIZE_NONE: + return "rx-and-tx"; + case MLX5HWS_POOL_OPTIMIZE_ORIG: + return "rx-only"; + case MLX5HWS_POOL_OPTIMIZE_MIRROR: + return "tx-only"; + default: + return "unknown"; + } +} + +static int +hws_action_ste_table_create_pool(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz) +{ + struct mlx5hws_pool_attr pool_attr = { 0 }; + + pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; + pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; + pool_attr.flags = MLX5HWS_POOL_FLAG_BUDDY; + pool_attr.opt_type = opt; + pool_attr.alloc_log_sz = log_sz; + + action_tbl->pool = mlx5hws_pool_create(ctx, &pool_attr); + if (!action_tbl->pool) { + mlx5hws_err(ctx, "Failed to allocate STE pool\n"); + return -EINVAL; + } + + return 0; +} + +static int hws_action_ste_table_create_single_rtc( + struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz, bool tx) +{ + struct mlx5hws_cmd_rtc_create_attr rtc_attr = { 0 }; + u32 *rtc_id; + + rtc_attr.log_depth = 0; + rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; + /* Action STEs use the default always hit definer. */ + rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer; + rtc_attr.is_frst_jumbo = false; + rtc_attr.miss_ft_id = 0; + rtc_attr.pd = ctx->pd_num; + rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); + + if (tx) { + rtc_attr.table_type = FS_FT_FDB_TX; + rtc_attr.ste_base = + mlx5hws_pool_get_base_mirror_id(action_tbl->pool); + rtc_attr.stc_base = + mlx5hws_pool_get_base_mirror_id(ctx->stc_pool); + rtc_attr.log_size = + opt == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_sz; + rtc_id = &action_tbl->rtc_1_id; + } else { + rtc_attr.table_type = FS_FT_FDB_RX; + rtc_attr.ste_base = mlx5hws_pool_get_base_id(action_tbl->pool); + rtc_attr.stc_base = mlx5hws_pool_get_base_id(ctx->stc_pool); + rtc_attr.log_size = + opt == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_sz; + rtc_id = &action_tbl->rtc_0_id; + } + + return mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_id); +} + +static int +hws_action_ste_table_create_rtcs(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz) +{ + int err; + + err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt, + log_sz, false); + if (err) + return err; + + err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt, + log_sz, true); + if (err) { + mlx5hws_cmd_rtc_destroy(ctx->mdev, action_tbl->rtc_0_id); + return err; + } + + return 0; +} + +static void +hws_action_ste_table_destroy_rtcs(struct mlx5hws_action_ste_table *action_tbl) +{ + mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev, + action_tbl->rtc_1_id); + mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev, + action_tbl->rtc_0_id); +} + +static int +hws_action_ste_table_create_stc(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl) +{ + struct mlx5hws_cmd_stc_modify_attr stc_attr = { 0 }; + + stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; + stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; + stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; + stc_attr.ste_table.ste_pool = action_tbl->pool; + stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; + + return mlx5hws_action_alloc_single_stc(ctx, &stc_attr, + MLX5HWS_TABLE_TYPE_FDB, + &action_tbl->stc); +} + +static struct mlx5hws_action_ste_table * +hws_action_ste_table_alloc(struct mlx5hws_action_ste_pool_element *parent_elem) +{ + enum mlx5hws_pool_optimize opt = parent_elem->opt; + struct mlx5hws_context *ctx = parent_elem->ctx; + struct mlx5hws_action_ste_table *action_tbl; + size_t log_sz; + int err; + + log_sz = min(parent_elem->log_sz ? + parent_elem->log_sz + + MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ : + MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ, + MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ); + + action_tbl = kzalloc(sizeof(*action_tbl), GFP_KERNEL); + if (!action_tbl) + return ERR_PTR(-ENOMEM); + + err = hws_action_ste_table_create_pool(ctx, action_tbl, opt, log_sz); + if (err) + goto free_tbl; + + err = hws_action_ste_table_create_rtcs(ctx, action_tbl, opt, log_sz); + if (err) + goto destroy_pool; + + err = hws_action_ste_table_create_stc(ctx, action_tbl); + if (err) + goto destroy_rtcs; + + action_tbl->parent_elem = parent_elem; + INIT_LIST_HEAD(&action_tbl->list_node); + action_tbl->last_used = jiffies; + list_add(&action_tbl->list_node, &parent_elem->available); + parent_elem->log_sz = log_sz; + + mlx5hws_dbg(ctx, + "Allocated %s action STE table log_sz %zu; STEs (%d, %d); RTCs (%d, %d); STC %d\n", + hws_pool_opt_to_str(opt), log_sz, + mlx5hws_pool_get_base_id(action_tbl->pool), + mlx5hws_pool_get_base_mirror_id(action_tbl->pool), + action_tbl->rtc_0_id, action_tbl->rtc_1_id, + action_tbl->stc.offset); + + return action_tbl; + +destroy_rtcs: + hws_action_ste_table_destroy_rtcs(action_tbl); +destroy_pool: + mlx5hws_pool_destroy(action_tbl->pool); +free_tbl: + kfree(action_tbl); + + return ERR_PTR(err); +} + +static void +hws_action_ste_table_destroy(struct mlx5hws_action_ste_table *action_tbl) +{ + struct mlx5hws_context *ctx = action_tbl->parent_elem->ctx; + + mlx5hws_dbg(ctx, + "Destroying %s action STE table: STEs (%d, %d); RTCs (%d, %d); STC %d\n", + hws_pool_opt_to_str(action_tbl->parent_elem->opt), + mlx5hws_pool_get_base_id(action_tbl->pool), + mlx5hws_pool_get_base_mirror_id(action_tbl->pool), + action_tbl->rtc_0_id, action_tbl->rtc_1_id, + action_tbl->stc.offset); + + mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB, + &action_tbl->stc); + hws_action_ste_table_destroy_rtcs(action_tbl); + mlx5hws_pool_destroy(action_tbl->pool); + + list_del(&action_tbl->list_node); + kfree(action_tbl); +} + +static int +hws_action_ste_pool_element_init(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_pool_element *elem, + enum mlx5hws_pool_optimize opt) +{ + elem->ctx = ctx; + elem->opt = opt; + INIT_LIST_HEAD(&elem->available); + INIT_LIST_HEAD(&elem->full); + + return 0; +} + +static void hws_action_ste_pool_element_destroy( + struct mlx5hws_action_ste_pool_element *elem) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + + /* This should be empty, but attempt to free its elements anyway. */ + list_for_each_entry_safe(action_tbl, p, &elem->full, list_node) + hws_action_ste_table_destroy(action_tbl); + + list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) + hws_action_ste_table_destroy(action_tbl); +} + +static int hws_action_ste_pool_init(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_pool *pool) +{ + enum mlx5hws_pool_optimize opt; + int err; + + mutex_init(&pool->lock); + + /* Rules which are added for both RX and TX must use the same action STE + * indices for both. If we were to use a single table, then RX-only and + * TX-only rules would waste the unused entries. Thus, we use separate + * table sets for the three cases. + */ + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX; + opt++) { + err = hws_action_ste_pool_element_init(ctx, &pool->elems[opt], + opt); + if (err) + goto destroy_elems; + pool->elems[opt].parent_pool = pool; + } + + return 0; + +destroy_elems: + while (opt-- > MLX5HWS_POOL_OPTIMIZE_NONE) + hws_action_ste_pool_element_destroy(&pool->elems[opt]); + + return err; +} + +static void hws_action_ste_pool_destroy(struct mlx5hws_action_ste_pool *pool) +{ + int opt; + + for (opt = MLX5HWS_POOL_OPTIMIZE_MAX - 1; + opt >= MLX5HWS_POOL_OPTIMIZE_NONE; opt--) + hws_action_ste_pool_element_destroy(&pool->elems[opt]); +} + +static void hws_action_ste_pool_element_collect_stale( + struct mlx5hws_action_ste_pool_element *elem, struct list_head *cleanup) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + unsigned long expire_time, now; + + expire_time = secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS); + now = jiffies; + + list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) { + if (mlx5hws_pool_full(action_tbl->pool) && + time_before(action_tbl->last_used + expire_time, now)) + list_move(&action_tbl->list_node, cleanup); + } +} + +static void hws_action_ste_table_cleanup_list(struct list_head *cleanup) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + + list_for_each_entry_safe(action_tbl, p, cleanup, list_node) + hws_action_ste_table_destroy(action_tbl); +} + +static void hws_action_ste_pool_cleanup(struct work_struct *work) +{ + enum mlx5hws_pool_optimize opt; + struct mlx5hws_context *ctx; + LIST_HEAD(cleanup); + int i; + + ctx = container_of(work, struct mlx5hws_context, + action_ste_cleanup.work); + + for (i = 0; i < ctx->queues; i++) { + struct mlx5hws_action_ste_pool *p = &ctx->action_ste_pool[i]; + + mutex_lock(&p->lock); + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; + opt < MLX5HWS_POOL_OPTIMIZE_MAX; opt++) + hws_action_ste_pool_element_collect_stale( + &p->elems[opt], &cleanup); + mutex_unlock(&p->lock); + } + + hws_action_ste_table_cleanup_list(&cleanup); + + schedule_delayed_work(&ctx->action_ste_cleanup, + secs_to_jiffies( + MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS)); +} + +int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx) +{ + struct mlx5hws_action_ste_pool *pool; + size_t queues = ctx->queues; + int i, err; + + pool = kcalloc(queues, sizeof(*pool), GFP_KERNEL); + if (!pool) + return -ENOMEM; + + for (i = 0; i < queues; i++) { + err = hws_action_ste_pool_init(ctx, &pool[i]); + if (err) + goto free_pool; + } + + ctx->action_ste_pool = pool; + + INIT_DELAYED_WORK(&ctx->action_ste_cleanup, + hws_action_ste_pool_cleanup); + schedule_delayed_work( + &ctx->action_ste_cleanup, + secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS)); + + return 0; + +free_pool: + while (i--) + hws_action_ste_pool_destroy(&pool[i]); + kfree(pool); + + return err; +} + +void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx) +{ + size_t queues = ctx->queues; + int i; + + cancel_delayed_work_sync(&ctx->action_ste_cleanup); + + for (i = 0; i < queues; i++) + hws_action_ste_pool_destroy(&ctx->action_ste_pool[i]); + + kfree(ctx->action_ste_pool); +} + +static struct mlx5hws_action_ste_pool_element * +hws_action_ste_choose_elem(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx) +{ + if (skip_rx) + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_MIRROR]; + + if (skip_tx) + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_ORIG]; + + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_NONE]; +} + +static int +hws_action_ste_table_chunk_alloc(struct mlx5hws_action_ste_table *action_tbl, + struct mlx5hws_action_ste_chunk *chunk) +{ + int err; + + err = mlx5hws_pool_chunk_alloc(action_tbl->pool, &chunk->ste); + if (err) + return err; + + chunk->action_tbl = action_tbl; + action_tbl->last_used = jiffies; + + return 0; +} + +int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx, + struct mlx5hws_action_ste_chunk *chunk) +{ + struct mlx5hws_action_ste_pool_element *elem; + struct mlx5hws_action_ste_table *action_tbl; + bool found; + int err; + + if (skip_rx && skip_tx) + return -EINVAL; + + mutex_lock(&pool->lock); + + elem = hws_action_ste_choose_elem(pool, skip_rx, skip_tx); + + mlx5hws_dbg(elem->ctx, + "Allocating action STEs skip_rx %d skip_tx %d order %d\n", + skip_rx, skip_tx, chunk->ste.order); + + found = false; + list_for_each_entry(action_tbl, &elem->available, list_node) { + if (!hws_action_ste_table_chunk_alloc(action_tbl, chunk)) { + found = true; + break; + } + } + + if (!found) { + action_tbl = hws_action_ste_table_alloc(elem); + if (IS_ERR(action_tbl)) { + err = PTR_ERR(action_tbl); + goto out; + } + + err = hws_action_ste_table_chunk_alloc(action_tbl, chunk); + if (err) + goto out; + } + + if (mlx5hws_pool_empty(action_tbl->pool)) + list_move(&action_tbl->list_node, &elem->full); + + err = 0; + +out: + mutex_unlock(&pool->lock); + + return err; +} + +void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk) +{ + struct mutex *lock = &chunk->action_tbl->parent_elem->parent_pool->lock; + + mlx5hws_dbg(chunk->action_tbl->pool->ctx, + "Freeing action STEs offset %d order %d\n", + chunk->ste.offset, chunk->ste.order); + + mutex_lock(lock); + mlx5hws_pool_chunk_free(chunk->action_tbl->pool, &chunk->ste); + chunk->action_tbl->last_used = jiffies; + list_move(&chunk->action_tbl->list_node, + &chunk->action_tbl->parent_elem->available); + mutex_unlock(lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h new file mode 100644 index 000000000000..a8ba97359e31 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#ifndef ACTION_STE_POOL_H_ +#define ACTION_STE_POOL_H_ + +#define MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ 10 +#define MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ 1 +#define MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ 20 + +#define MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS 300 +#define MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS 300 + +struct mlx5hws_action_ste_pool_element; + +struct mlx5hws_action_ste_table { + struct mlx5hws_action_ste_pool_element *parent_elem; + /* Wraps the RTC and STE range for this given action. */ + struct mlx5hws_pool *pool; + /* Match STEs use this STC to jump to this pool's RTC. */ + struct mlx5hws_pool_chunk stc; + u32 rtc_0_id; + u32 rtc_1_id; + struct list_head list_node; + unsigned long last_used; +}; + +struct mlx5hws_action_ste_pool_element { + struct mlx5hws_context *ctx; + struct mlx5hws_action_ste_pool *parent_pool; + size_t log_sz; /* Size of the largest table so far. */ + enum mlx5hws_pool_optimize opt; + struct list_head available; + struct list_head full; +}; + +/* Central repository of action STEs. The context contains one of these pools + * per queue. + */ +struct mlx5hws_action_ste_pool { + /* Protects the entire pool. We have one pool per queue and only one + * operation can be active per rule at a given time. Thus this lock + * protects solely against concurrent garbage collection and we expect + * very little contention. + */ + struct mutex lock; + struct mlx5hws_action_ste_pool_element elems[MLX5HWS_POOL_OPTIMIZE_MAX]; +}; + +/* A chunk of STEs and the table it was allocated from. Used by rules. */ +struct mlx5hws_action_ste_chunk { + struct mlx5hws_action_ste_table *action_tbl; + struct mlx5hws_pool_chunk ste; +}; + +int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx); + +void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx); + +/* Callers are expected to fill chunk->ste.order. On success, this function + * populates chunk->tbl and chunk->ste.offset. + */ +int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx, + struct mlx5hws_action_ste_chunk *chunk); + +void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk); + +#endif /* ACTION_STE_POOL_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 19dce1ba512d..92de4b761a83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -46,10 +46,14 @@ static void hws_bwc_unlock_all_queues(struct mlx5hws_context *ctx) } } -static void hws_bwc_matcher_init_attr(struct mlx5hws_matcher_attr *attr, +static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u32 priority, - u8 size_log) + u8 size_log_rx, u8 size_log_tx, + struct mlx5hws_matcher_attr *attr) { + struct mlx5hws_bwc_matcher *first_matcher = + bwc_matcher->complex_first_bwc_matcher; + memset(attr, 0, sizeof(*attr)); attr->priority = priority; @@ -58,9 +62,137 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_matcher_attr *attr, attr->optimize_flow_src = MLX5HWS_MATCHER_FLOW_SRC_ANY; attr->insert_mode = MLX5HWS_MATCHER_INSERT_BY_HASH; attr->distribute_mode = MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH; - attr->rule.num_log = size_log; + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].rule.num_log = size_log_rx; + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].rule.num_log = size_log_tx; attr->resizable = true; attr->max_num_of_at_attach = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + + attr->isolated_matcher_end_ft_id = + first_matcher ? first_matcher->matcher->end_ft_id : 0; +} + +static int +hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + bool move_error = false, poll_error = false, drain_error = false; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + u16 bwc_queues = mlx5hws_bwc_queues(ctx); + struct mlx5hws_rule_attr rule_attr; + struct mlx5hws_bwc_rule *bwc_rule; + struct mlx5hws_send_engine *queue; + struct list_head *rules_list; + u32 pending_rules; + int i, ret = 0; + + mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + + for (i = 0; i < bwc_queues; i++) { + if (list_empty(&bwc_matcher->rules[i])) + continue; + + pending_rules = 0; + rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + rules_list = &bwc_matcher->rules[i]; + + list_for_each_entry(bwc_rule, rules_list, list_node) { + ret = mlx5hws_matcher_resize_rule_move(matcher, + bwc_rule->rule, + &rule_attr); + if (unlikely(ret && !move_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = true; + } + + pending_rules++; + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + false); + if (unlikely(ret && !poll_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = true; + } + } + + if (pending_rules) { + queue = &ctx->send_queue[rule_attr.queue_id]; + mlx5hws_send_engine_flush_queue(queue); + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + true); + if (unlikely(ret && !drain_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n", + ret); + drain_error = true; + } + } + } + + if (move_error || poll_error || drain_error) + ret = -EINVAL; + + return ret; +} + +static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + if (!bwc_matcher->complex) + return hws_bwc_matcher_move_all_simple(bwc_matcher); + + return mlx5hws_bwc_matcher_move_all_complex(bwc_matcher); +} + +static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher_attr matcher_attr = {0}; + struct mlx5hws_matcher *old_matcher; + struct mlx5hws_matcher *new_matcher; + int ret; + + hws_bwc_matcher_init_attr(bwc_matcher, + bwc_matcher->priority, + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, + &matcher_attr); + + old_matcher = bwc_matcher->matcher; + new_matcher = mlx5hws_matcher_create(old_matcher->tbl, + &bwc_matcher->mt, 1, + bwc_matcher->at, + bwc_matcher->num_of_at, + &matcher_attr); + if (!new_matcher) { + mlx5hws_err(ctx, "Rehash error: matcher creation failed\n"); + return -ENOMEM; + } + + ret = mlx5hws_matcher_resize_set_target(old_matcher, new_matcher); + if (ret) { + mlx5hws_err(ctx, "Rehash error: failed setting resize target\n"); + return ret; + } + + ret = hws_bwc_matcher_move_all(bwc_matcher); + if (ret) + mlx5hws_err(ctx, "Rehash error: moving rules failed, attempting to remove the old matcher\n"); + + /* Error during rehash can't be rolled back. + * The best option here is to allow the rehash to complete and remove + * the old matcher - can't leave the matcher in the 'in_resize' state. + */ + + bwc_matcher->matcher = new_matcher; + mlx5hws_matcher_destroy(old_matcher); + + return ret; } int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, @@ -83,12 +215,19 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, for (i = 0; i < bwc_queues; i++) INIT_LIST_HEAD(&bwc_matcher->rules[i]); - hws_bwc_matcher_init_attr(&attr, + hws_bwc_matcher_init_attr(bwc_matcher, priority, - MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG); + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, + &attr); bwc_matcher->priority = priority; - bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + + bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array, + sizeof(*bwc_matcher->at), GFP_KERNEL); + if (!bwc_matcher->at) + goto free_bwc_matcher_rules; /* create dummy action template */ bwc_matcher->at[0] = @@ -96,7 +235,7 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, action_types : init_action_types); if (!bwc_matcher->at[0]) { mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n"); - goto free_bwc_matcher_rules; + goto free_bwc_matcher_at_array; } bwc_matcher->num_of_at = 1; @@ -126,12 +265,28 @@ free_mt: mlx5hws_match_template_destroy(bwc_matcher->mt); free_at: mlx5hws_action_template_destroy(bwc_matcher->at[0]); +free_bwc_matcher_at_array: + kfree(bwc_matcher->at); free_bwc_matcher_rules: kfree(bwc_matcher->rules); err: return -EINVAL; } +static void +hws_bwc_matcher_init_size_rxtx(struct mlx5hws_bwc_matcher_size *size) +{ + size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + atomic_set(&size->num_of_rules, 0); + atomic_set(&size->rehash_required, false); +} + +static void hws_bwc_matcher_init_size(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + hws_bwc_matcher_init_size_rxtx(&bwc_matcher->rx_size); + hws_bwc_matcher_init_size_rxtx(&bwc_matcher->tx_size); +} + struct mlx5hws_bwc_matcher * mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, u32 priority, @@ -152,7 +307,7 @@ mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, if (!bwc_matcher) return NULL; - atomic_set(&bwc_matcher->num_of_rules, 0); + hws_bwc_matcher_init_size(bwc_matcher); /* Check if the required match params can be all matched * in single STE, otherwise complex matcher is needed. @@ -192,6 +347,7 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) for (i = 0; i < bwc_matcher->num_of_at; i++) mlx5hws_action_template_destroy(bwc_matcher->at[i]); + kfree(bwc_matcher->at); mlx5hws_match_template_destroy(bwc_matcher->mt); kfree(bwc_matcher->rules); @@ -201,23 +357,27 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) { - u32 num_of_rules = atomic_read(&bwc_matcher->num_of_rules); + u32 rx_rules = atomic_read(&bwc_matcher->rx_size.num_of_rules); + u32 tx_rules = atomic_read(&bwc_matcher->tx_size.num_of_rules); - if (num_of_rules) + if (rx_rules || tx_rules) mlx5hws_err(bwc_matcher->matcher->tbl->ctx, - "BWC matcher destroy: matcher still has %d rules\n", - num_of_rules); + "BWC matcher destroy: matcher still has %u RX and %u TX rules\n", + rx_rules, tx_rules); - mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + if (bwc_matcher->complex) + mlx5hws_bwc_matcher_destroy_complex(bwc_matcher); + else + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); kfree(bwc_matcher); return 0; } -static int hws_bwc_queue_poll(struct mlx5hws_context *ctx, - u16 queue_id, - u32 *pending_rules, - bool drain) +int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx, + u16 queue_id, + u32 *pending_rules, + bool drain) { unsigned long timeout = jiffies + secs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT); @@ -320,16 +480,12 @@ static void hws_bwc_rule_list_add(struct mlx5hws_bwc_rule *bwc_rule, u16 idx) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - atomic_inc(&bwc_matcher->num_of_rules); bwc_rule->bwc_queue_idx = idx; list_add(&bwc_rule->list_node, &bwc_matcher->rules[idx]); } static void hws_bwc_rule_list_remove(struct mlx5hws_bwc_rule *bwc_rule) { - struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - - atomic_dec(&bwc_matcher->num_of_rules); list_del_init(&bwc_rule->list_node); } @@ -352,7 +508,8 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); + ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); if (unlikely(ret)) return ret; @@ -366,6 +523,80 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule, return 0; } +static void hws_bwc_rule_cnt_dec(struct mlx5hws_bwc_rule *bwc_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + + if (!bwc_rule->skip_rx) + atomic_dec(&bwc_matcher->rx_size.num_of_rules); + if (!bwc_rule->skip_tx) + atomic_dec(&bwc_matcher->tx_size.num_of_rules); +} + +static int +hws_bwc_matcher_rehash_shrink(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_bwc_matcher_size *rx_size = &bwc_matcher->rx_size; + struct mlx5hws_bwc_matcher_size *tx_size = &bwc_matcher->tx_size; + + /* It is possible that another thread has added a rule. + * Need to check again if we really need rehash/shrink. + */ + if (atomic_read(&rx_size->num_of_rules) || + atomic_read(&tx_size->num_of_rules)) + return 0; + + /* If the current matcher RX/TX size is already at its initial size. */ + if (rx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG && + tx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG) + return 0; + + /* Now we've done all the checking - do the shrinking: + * - reset match RTC size to the initial size + * - create new matcher + * - move the rules, which will not do anything as the matcher is empty + * - destroy the old matcher + */ + + rx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + tx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + + return hws_bwc_matcher_move(bwc_matcher); +} + +static int hws_bwc_rule_cnt_dec_with_shrink(struct mlx5hws_bwc_rule *bwc_rule, + u16 bwc_queue_idx) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mutex *queue_lock; /* Protect the queue */ + int ret; + + hws_bwc_rule_cnt_dec(bwc_rule); + + if (atomic_read(&bwc_matcher->rx_size.num_of_rules) || + atomic_read(&bwc_matcher->tx_size.num_of_rules)) + return 0; + + /* Matcher has no more rules - shrink it to save ICM. */ + + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); + + hws_bwc_lock_all_queues(ctx); + ret = hws_bwc_matcher_rehash_shrink(bwc_matcher); + hws_bwc_unlock_all_queues(ctx); + + mutex_lock(queue_lock); + + if (unlikely(ret)) + mlx5hws_err(ctx, + "BWC rule deletion: shrinking empty matcher failed (%d)\n", + ret); + + return ret; +} + int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; @@ -383,6 +614,7 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr); hws_bwc_rule_list_remove(bwc_rule); + hws_bwc_rule_cnt_dec_with_shrink(bwc_rule, idx); mutex_unlock(queue_lock); @@ -391,9 +623,13 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) int mlx5hws_bwc_rule_destroy(struct mlx5hws_bwc_rule *bwc_rule) { - int ret; + bool is_complex = !!bwc_rule->bwc_matcher->complex; + int ret = 0; - ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); + if (is_complex) + ret = mlx5hws_bwc_rule_destroy_complex(bwc_rule); + else + ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); mlx5hws_bwc_rule_free(bwc_rule); return ret; @@ -433,9 +669,8 @@ hws_bwc_rule_create_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); - - return ret; + return mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); } static int @@ -456,7 +691,8 @@ hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); + ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); if (unlikely(ret)) mlx5hws_err(ctx, "Failed updating BWC rule (%d)\n", ret); @@ -464,37 +700,27 @@ hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule, } static bool -hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher) +hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size) { struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps; /* check the match RTC size */ - if ((bwc_matcher->size_log + - MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) > - (caps->ste_alloc_log_max - 1)) - return true; - - /* check the action RTC size */ - if ((bwc_matcher->size_log + - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP + - ilog2(roundup_pow_of_two(bwc_matcher->matcher->action_ste.max_stes)) + - MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT) > - (caps->ste_alloc_log_max - 1)) - return true; - - return false; + return (size->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) > + (caps->ste_alloc_log_max - 1); } static bool hws_bwc_matcher_rehash_size_needed(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size, u32 num_of_rules) { - if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher))) + if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher, size))) return false; if (unlikely((num_of_rules * 100 / MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH) >= - (1UL << bwc_matcher->size_log))) + (1UL << size->size_log))) return true; return false; @@ -520,6 +746,23 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_rule_action rule_actions[]) { enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS]; + void *p; + + if (unlikely(bwc_matcher->num_of_at >= bwc_matcher->size_of_at_array)) { + if (bwc_matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + bwc_matcher->size_of_at_array *= 2; + p = krealloc(bwc_matcher->at, + bwc_matcher->size_of_at_array * + sizeof(*bwc_matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + bwc_matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + bwc_matcher->at = p; + } hws_bwc_rule_actions_to_action_types(rule_actions, action_types); @@ -534,20 +777,21 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, } static int -hws_bwc_matcher_extend_size(struct mlx5hws_bwc_matcher *bwc_matcher) +hws_bwc_matcher_extend_size(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size) { struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_cmd_query_caps *caps = ctx->caps; - if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher))) { + if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher, size))) { mlx5hws_err(ctx, "Can't resize matcher: depth exceeds limit %d\n", caps->rtc_log_depth_max); return -ENOMEM; } - bwc_matcher->size_log = - min(bwc_matcher->size_log + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, - caps->ste_alloc_log_max - MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH); + size->size_log = min(size->size_log + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, + caps->ste_alloc_log_max - + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH); return 0; } @@ -580,190 +824,171 @@ hws_bwc_matcher_find_at(struct mlx5hws_bwc_matcher *bwc_matcher, return -1; } -static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) +static int +hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) { - struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; - u16 bwc_queues = mlx5hws_bwc_queues(ctx); - struct mlx5hws_bwc_rule **bwc_rules; - struct mlx5hws_rule_attr rule_attr; - u32 *pending_rules; - int i, j, ret = 0; - bool all_done; - u16 burst_th; + bool need_rx_rehash, need_tx_rehash; + int ret; - mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + need_rx_rehash = atomic_read(&bwc_matcher->rx_size.rehash_required); + need_tx_rehash = atomic_read(&bwc_matcher->tx_size.rehash_required); - pending_rules = kcalloc(bwc_queues, sizeof(*pending_rules), GFP_KERNEL); - if (!pending_rules) - return -ENOMEM; + /* It is possible that another rule has already performed rehash. + * Need to check again if we really need rehash. + */ + if (!need_rx_rehash && !need_tx_rehash) + return 0; - bwc_rules = kcalloc(bwc_queues, sizeof(*bwc_rules), GFP_KERNEL); - if (!bwc_rules) { - ret = -ENOMEM; - goto free_pending_rules; + /* If the current matcher RX/TX size is already at its max size, + * it can't be rehashed. + */ + if (need_rx_rehash && + hws_bwc_matcher_size_maxed_out(bwc_matcher, + &bwc_matcher->rx_size)) { + atomic_set(&bwc_matcher->rx_size.rehash_required, false); + need_rx_rehash = false; } - - for (i = 0; i < bwc_queues; i++) { - if (list_empty(&bwc_matcher->rules[i])) - bwc_rules[i] = NULL; - else - bwc_rules[i] = list_first_entry(&bwc_matcher->rules[i], - struct mlx5hws_bwc_rule, - list_node); + if (need_tx_rehash && + hws_bwc_matcher_size_maxed_out(bwc_matcher, + &bwc_matcher->tx_size)) { + atomic_set(&bwc_matcher->tx_size.rehash_required, false); + need_tx_rehash = false; } - do { - all_done = true; - - for (i = 0; i < bwc_queues; i++) { - rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); - burst_th = hws_bwc_get_burst_th(ctx, rule_attr.queue_id); - - for (j = 0; j < burst_th && bwc_rules[i]; j++) { - rule_attr.burst = !!((j + 1) % burst_th); - ret = mlx5hws_matcher_resize_rule_move(bwc_matcher->matcher, - bwc_rules[i]->rule, - &rule_attr); - if (unlikely(ret)) { - mlx5hws_err(ctx, - "Moving BWC rule failed during rehash (%d)\n", - ret); - goto free_bwc_rules; - } - - all_done = false; - pending_rules[i]++; - bwc_rules[i] = list_is_last(&bwc_rules[i]->list_node, - &bwc_matcher->rules[i]) ? - NULL : list_next_entry(bwc_rules[i], list_node); - - ret = hws_bwc_queue_poll(ctx, rule_attr.queue_id, - &pending_rules[i], false); - if (unlikely(ret)) { - mlx5hws_err(ctx, - "Moving BWC rule failed during rehash (%d)\n", - ret); - goto free_bwc_rules; - } - } - } - } while (!all_done); + /* If both RX and TX rehash flags are now off, it means that whatever + * we wanted to rehash is now at its max size - no rehash can be done. + * Return and try adding the rule again - perhaps there was some change. + */ + if (!need_rx_rehash && !need_tx_rehash) + return 0; - /* drain all the bwc queues */ - for (i = 0; i < bwc_queues; i++) { - if (pending_rules[i]) { - u16 queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + /* Now we're done all the checking - do the rehash: + * - extend match RTC size + * - create new matcher + * - move all the rules to the new matcher + * - destroy the old matcher + */ + atomic_set(&bwc_matcher->rx_size.rehash_required, false); + atomic_set(&bwc_matcher->tx_size.rehash_required, false); - mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]); - ret = hws_bwc_queue_poll(ctx, queue_id, - &pending_rules[i], true); - if (unlikely(ret)) { - mlx5hws_err(ctx, - "Moving BWC rule failed during rehash (%d)\n", ret); - goto free_bwc_rules; - } - } + if (need_rx_rehash) { + ret = hws_bwc_matcher_extend_size(bwc_matcher, + &bwc_matcher->rx_size); + if (ret) + return ret; } -free_bwc_rules: - kfree(bwc_rules); -free_pending_rules: - kfree(pending_rules); - - return ret; -} + if (need_tx_rehash) { + ret = hws_bwc_matcher_extend_size(bwc_matcher, + &bwc_matcher->tx_size); + if (ret) + return ret; + } -static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - return hws_bwc_matcher_move_all_simple(bwc_matcher); + return hws_bwc_matcher_move(bwc_matcher); } -static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) +static int hws_bwc_rule_get_at_idx(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_rule_action rule_actions[], + u16 bwc_queue_idx) { + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; - struct mlx5hws_matcher_attr matcher_attr = {0}; - struct mlx5hws_matcher *old_matcher; - struct mlx5hws_matcher *new_matcher; - int ret; + struct mutex *queue_lock; /* Protect the queue */ + int at_idx, ret; - hws_bwc_matcher_init_attr(&matcher_attr, - bwc_matcher->priority, - bwc_matcher->size_log); + /* check if rehash needed due to missing action template */ + at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + if (likely(at_idx >= 0)) + return at_idx; - old_matcher = bwc_matcher->matcher; - new_matcher = mlx5hws_matcher_create(old_matcher->tbl, - &bwc_matcher->mt, 1, - bwc_matcher->at, - bwc_matcher->num_of_at, - &matcher_attr); - if (!new_matcher) { - mlx5hws_err(ctx, "Rehash error: matcher creation failed\n"); - return -ENOMEM; - } + /* we need to extend BWC matcher action templates array */ + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); + hws_bwc_lock_all_queues(ctx); - ret = mlx5hws_matcher_resize_set_target(old_matcher, new_matcher); - if (ret) { - mlx5hws_err(ctx, "Rehash error: failed setting resize target\n"); - return ret; + /* check again - perhaps other thread already did extend_at */ + at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + if (at_idx >= 0) + goto out; + + ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); + if (unlikely(ret)) { + mlx5hws_err(ctx, "BWC rule: failed extending AT (%d)", ret); + at_idx = -EINVAL; + goto out; } - ret = hws_bwc_matcher_move_all(bwc_matcher); - if (ret) { - mlx5hws_err(ctx, "Rehash error: moving rules failed\n"); - return -ENOMEM; + /* action templates array was extended, we need the last idx */ + at_idx = bwc_matcher->num_of_at - 1; + ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, + bwc_matcher->at[at_idx]); + if (unlikely(ret)) { + mlx5hws_err(ctx, "BWC rule: failed attaching new AT (%d)", ret); + at_idx = -EINVAL; + goto out; } - bwc_matcher->matcher = new_matcher; - mlx5hws_matcher_destroy(old_matcher); +out: + hws_bwc_unlock_all_queues(ctx); + mutex_lock(queue_lock); + return at_idx; +} - return 0; +static void hws_bwc_rule_cnt_inc_rxtx(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_bwc_matcher_size *size) +{ + u32 num_of_rules = atomic_inc_return(&size->num_of_rules); + + if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_rule->bwc_matcher, + size, num_of_rules))) + atomic_set(&size->rehash_required, true); } -static int -hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) +static void hws_bwc_rule_cnt_inc(struct mlx5hws_bwc_rule *bwc_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + + if (!bwc_rule->skip_rx) + hws_bwc_rule_cnt_inc_rxtx(bwc_rule, &bwc_matcher->rx_size); + if (!bwc_rule->skip_tx) + hws_bwc_rule_cnt_inc_rxtx(bwc_rule, &bwc_matcher->tx_size); +} + +static int hws_bwc_rule_cnt_inc_with_rehash(struct mlx5hws_bwc_rule *bwc_rule, + u16 bwc_queue_idx) { + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mutex *queue_lock; /* Protect the queue */ int ret; - /* If the current matcher size is already at its max size, we can't - * do the rehash. Skip it and try adding the rule again - perhaps - * there was some change. - */ - if (hws_bwc_matcher_size_maxed_out(bwc_matcher)) - return 0; + hws_bwc_rule_cnt_inc(bwc_rule); - /* It is possible that other rule has already performed rehash. - * Need to check again if we really need rehash. - * If the reason for rehash was size, but not any more - skip rehash. - */ - if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher, - atomic_read(&bwc_matcher->num_of_rules))) + if (!atomic_read(&bwc_matcher->rx_size.rehash_required) && + !atomic_read(&bwc_matcher->tx_size.rehash_required)) return 0; - /* Now we're done all the checking - do the rehash: - * - extend match RTC size - * - create new matcher - * - move all the rules to the new matcher - * - destroy the old matcher - */ + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); - ret = hws_bwc_matcher_extend_size(bwc_matcher); - if (ret) - return ret; + hws_bwc_lock_all_queues(ctx); + ret = hws_bwc_matcher_rehash_size(bwc_matcher); + hws_bwc_unlock_all_queues(ctx); - return hws_bwc_matcher_move(bwc_matcher); -} + mutex_lock(queue_lock); -static int -hws_bwc_matcher_rehash_at(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - /* Rehash by action template doesn't require any additional checking. - * The bwc_matcher already contains the new action template. - * Just do the usual rehash: - * - create new matcher - * - move all the rules to the new matcher - * - destroy the old matcher - */ - return hws_bwc_matcher_move(bwc_matcher); + if (likely(!ret)) + return 0; + + /* Failed to rehash. Print a diagnostic and rollback the counters. */ + mlx5hws_err(ctx, + "BWC rule insertion: rehash to sizes [%d, %d] failed (%d)\n", + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, ret); + hws_bwc_rule_cnt_dec(bwc_rule); + + return ret; } int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, @@ -776,7 +1001,6 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_rule_attr rule_attr; struct mutex *queue_lock; /* Protect the queue */ - u32 num_of_rules; int ret = 0; int at_idx; @@ -786,67 +1010,18 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, mutex_lock(queue_lock); - /* check if rehash needed due to missing action template */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, bwc_queue_idx); if (unlikely(at_idx < 0)) { - /* we need to extend BWC matcher action templates array */ mutex_unlock(queue_lock); - hws_bwc_lock_all_queues(ctx); - - ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); - if (unlikely(ret)) { - hws_bwc_unlock_all_queues(ctx); - return ret; - } - - /* action templates array was extended, we need the last idx */ - at_idx = bwc_matcher->num_of_at - 1; - - ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); - if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. - */ - ret = hws_bwc_matcher_rehash_at(bwc_matcher); - if (unlikely(ret)) { - mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]); - bwc_matcher->at[at_idx] = NULL; - bwc_matcher->num_of_at--; - - hws_bwc_unlock_all_queues(ctx); - - mlx5hws_err(ctx, - "BWC rule insertion: rehash AT failed (%d)\n", ret); - return ret; - } - } - - hws_bwc_unlock_all_queues(ctx); - mutex_lock(queue_lock); + mlx5hws_err(ctx, "BWC rule create: failed getting AT (%d)", + ret); + return -EINVAL; } - /* check if number of rules require rehash */ - num_of_rules = atomic_read(&bwc_matcher->num_of_rules); - - if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) { + ret = hws_bwc_rule_cnt_inc_with_rehash(bwc_rule, bwc_queue_idx); + if (unlikely(ret)) { mutex_unlock(queue_lock); - - hws_bwc_lock_all_queues(ctx); - ret = hws_bwc_matcher_rehash_size(bwc_matcher); - hws_bwc_unlock_all_queues(ctx); - - if (ret) { - mlx5hws_err(ctx, "BWC rule insertion: rehash size [%d -> %d] failed (%d)\n", - bwc_matcher->size_log - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, - bwc_matcher->size_log, - ret); - return ret; - } - - mutex_lock(queue_lock); + return ret; } ret = hws_bwc_rule_create_sync(bwc_rule, @@ -862,10 +1037,12 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, /* At this point the rule wasn't added. * It could be because there was collision, or some other problem. - * If we don't dive deeper than API, the only thing we know is that - * the status of completion is RTE_FLOW_OP_ERROR. * Try rehash by size and insert rule again - last chance. */ + if (!bwc_rule->skip_rx) + atomic_set(&bwc_matcher->rx_size.rehash_required, true); + if (!bwc_rule->skip_tx) + atomic_set(&bwc_matcher->tx_size.rehash_required, true); mutex_unlock(queue_lock); @@ -875,6 +1052,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (ret) { mlx5hws_err(ctx, "BWC rule insertion: rehash failed (%d)\n", ret); + hws_bwc_rule_cnt_dec(bwc_rule); return ret; } @@ -890,6 +1068,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) { mutex_unlock(queue_lock); mlx5hws_err(ctx, "BWC rule insertion failed (%d)\n", ret); + hws_bwc_rule_cnt_dec(bwc_rule); return ret; } @@ -919,13 +1098,24 @@ mlx5hws_bwc_rule_create(struct mlx5hws_bwc_matcher *bwc_matcher, if (unlikely(!bwc_rule)) return NULL; + bwc_rule->flow_source = flow_source; + mlx5hws_rule_skip(bwc_matcher->matcher, flow_source, + &bwc_rule->skip_rx, &bwc_rule->skip_tx); + bwc_queue_idx = hws_bwc_gen_queue_idx(ctx); - ret = mlx5hws_bwc_rule_create_simple(bwc_rule, - params->match_buf, - rule_actions, - flow_source, - bwc_queue_idx); + if (bwc_matcher->complex) + ret = mlx5hws_bwc_rule_create_complex(bwc_rule, + params, + flow_source, + rule_actions, + bwc_queue_idx); + else + ret = mlx5hws_bwc_rule_create_simple(bwc_rule, + params->match_buf, + rule_actions, + flow_source, + bwc_queue_idx); if (unlikely(ret)) { mlx5hws_bwc_rule_free(bwc_rule); return NULL; @@ -947,57 +1137,17 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, idx = bwc_rule->bwc_queue_idx; - mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, 0, &rule_attr); + mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, bwc_rule->flow_source, + &rule_attr); queue_lock = hws_bwc_get_queue_lock(ctx, idx); mutex_lock(queue_lock); - /* check if rehash needed due to missing action template */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, idx); if (unlikely(at_idx < 0)) { - /* we need to extend BWC matcher action templates array */ mutex_unlock(queue_lock); - hws_bwc_lock_all_queues(ctx); - - /* check again - perhaps other thread already did extend_at */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); - if (likely(at_idx < 0)) { - ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); - if (unlikely(ret)) { - hws_bwc_unlock_all_queues(ctx); - mlx5hws_err(ctx, "BWC rule update: failed extending AT (%d)", ret); - return -EINVAL; - } - - /* action templates array was extended, we need the last idx */ - at_idx = bwc_matcher->num_of_at - 1; - - ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); - if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. - */ - ret = hws_bwc_matcher_rehash_at(bwc_matcher); - if (unlikely(ret)) { - mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]); - bwc_matcher->at[at_idx] = NULL; - bwc_matcher->num_of_at--; - - hws_bwc_unlock_all_queues(ctx); - - mlx5hws_err(ctx, - "BWC rule update: rehash AT failed (%d)\n", - ret); - return ret; - } - } - } - - hws_bwc_unlock_all_queues(ctx); - mutex_lock(queue_lock); + mlx5hws_err(ctx, "BWC rule update: failed getting AT\n"); + return -EINVAL; } ret = hws_bwc_rule_update_sync(bwc_rule, @@ -1023,5 +1173,10 @@ int mlx5hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, return -EINVAL; } - return hws_bwc_rule_action_update(bwc_rule, rule_actions); + /* For complex rule, the update should happen on the second matcher */ + if (bwc_rule->isolated_bwc_rule) + return hws_bwc_rule_action_update(bwc_rule->isolated_bwc_rule, + rule_actions); + else + return hws_bwc_rule_action_update(bwc_rule, rule_actions); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 47f7ed141553..af391d70c14f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -10,9 +10,7 @@ #define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32 /* Max number of AT attach operations for the same matcher. - * When the limit is reached, next attempt to attach new AT - * will result in creation of a new matcher and moving all - * the rules to this matcher. + * When the limit is reached, a larger buffer is allocated for the ATs. */ #define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8 @@ -20,21 +18,37 @@ #define MLX5HWS_BWC_POLLING_TIMEOUT 60 +struct mlx5hws_bwc_matcher_complex_data; + +struct mlx5hws_bwc_matcher_size { + u8 size_log; + atomic_t num_of_rules; + atomic_t rehash_required; +}; + struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; - struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; - u32 priority; + struct mlx5hws_action_template **at; + struct mlx5hws_bwc_matcher_complex_data *complex; + struct mlx5hws_bwc_matcher *complex_first_bwc_matcher; u8 num_of_at; - u8 size_log; - atomic_t num_of_rules; + u8 size_of_at_array; + u32 priority; + struct mlx5hws_bwc_matcher_size rx_size; + struct mlx5hws_bwc_matcher_size tx_size; struct list_head *rules; }; struct mlx5hws_bwc_rule { struct mlx5hws_bwc_matcher *bwc_matcher; struct mlx5hws_rule *rule; + struct mlx5hws_bwc_rule *isolated_bwc_rule; + struct mlx5hws_bwc_complex_rule_hash_node *complex_hash_node; + u32 flow_source; u16 bwc_queue_idx; + bool skip_rx; + bool skip_tx; struct list_head list_node; }; @@ -65,6 +79,11 @@ void mlx5hws_bwc_rule_fill_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u32 flow_source, struct mlx5hws_rule_attr *rule_attr); +int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx, + u16 queue_id, + u32 *pending_rules, + bool drain); + static inline u16 mlx5hws_bwc_queues(struct mlx5hws_context *ctx) { /* Besides the control queue, half of the queues are diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index 9fb059a6511f..ca7501c57468 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -3,6 +3,22 @@ #include "internal.h" +#define HWS_CLEAR_MATCH_PARAM(mask, field) \ + MLX5_SET(fte_match_param, (mask)->match_buf, field, 0) + +#define HWS_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) + +static const struct rhashtable_params hws_refcount_hash = { + .key_len = sizeof_field(struct mlx5hws_bwc_complex_rule_hash_node, + match_buf), + .key_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, + match_buf), + .head_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, + hash_node), + .automatic_shrinking = true, + .min_size = 1, +}; + bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) @@ -48,20 +64,1093 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, return is_complex; } +static void +hws_bwc_matcher_complex_params_clear_fld(struct mlx5hws_context *ctx, + enum mlx5hws_definer_fname fname, + struct mlx5hws_match_parameters *mask) +{ + struct mlx5hws_cmd_query_caps *caps = ctx->caps; + + switch (fname) { + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O: + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I: + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O: + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I: + case MLX5HWS_DEFINER_FNAME_IP_VERSION_O: + case MLX5HWS_DEFINER_FNAME_IP_VERSION_I: + /* Because of the strict requirements for IP address matching + * that require ethtype/ip_version matching as well, don't clear + * these fields - have them in both parts of the complex matcher + */ + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_CFI_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_CFI_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_ID_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_ID_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_O: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_second_cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_second_svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_I: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_second_cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_second_svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_vid); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_IHL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ipv4_ihl); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_IHL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ipv4_ihl); + break; + case MLX5HWS_DEFINER_FNAME_IP_DSCP_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_dscp); + break; + case MLX5HWS_DEFINER_FNAME_IP_DSCP_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_dscp); + break; + case MLX5HWS_DEFINER_FNAME_IP_ECN_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_ecn); + break; + case MLX5HWS_DEFINER_FNAME_IP_ECN_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_ecn); + break; + case MLX5HWS_DEFINER_FNAME_IP_TTL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ttl_hoplimit); + break; + case MLX5HWS_DEFINER_FNAME_IP_TTL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ttl_hoplimit); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_DST_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_SRC_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_DST_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_SRC_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IP_FRAG_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.frag); + break; + case MLX5HWS_DEFINER_FNAME_IP_FRAG_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.frag); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_O: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_ipv6_flow_label); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_I: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_ipv6_flow_label); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_protocol); + break; + case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_protocol); + break; + case MLX5HWS_DEFINER_FNAME_L4_SPORT_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_sport); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_sport); + break; + case MLX5HWS_DEFINER_FNAME_L4_SPORT_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_L4_DPORT_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_L4_DPORT_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_TCP_FLAGS_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_flags); + break; + case MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM: + case MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_tcp_seq_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_tcp_ack_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.inner_tcp_seq_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.inner_tcp_ack_num); + break; + case MLX5HWS_DEFINER_FNAME_GTP_TEID: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_teid); + break; + case MLX5HWS_DEFINER_FNAME_GTP_MSG_TYPE: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_flags); + break; + case MLX5HWS_DEFINER_FNAME_GTPU_FIRST_EXT_DW0: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.gtpu_first_ext_dw_0); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_0); + break; + case MLX5HWS_DEFINER_FNAME_GTPU_DW2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_2); + break; + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_2.outer_first_mpls_over_gre); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_2.outer_first_mpls_over_udp); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.geneve_tlv_option_0_data); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_0); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_0); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_1); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_2); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_2); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_3); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_3); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_VNI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.vxlan_vni); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_FLAGS: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_flags); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_RSVD0: + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_PROTO: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_next_protocol); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_VNI: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_vni); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_OPT_LEN: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_opt_len); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_OAM: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_oam); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_PROTO: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.geneve_protocol_type); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_VNI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_vni); + break; + case MLX5HWS_DEFINER_FNAME_SOURCE_QP: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_sqn); + break; + case MLX5HWS_DEFINER_FNAME_SOURCE_GVMI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_port); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.source_eswitch_owner_vhca_id); + break; + case MLX5HWS_DEFINER_FNAME_REG_0: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_0); + break; + case MLX5HWS_DEFINER_FNAME_REG_1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_1); + break; + case MLX5HWS_DEFINER_FNAME_REG_2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_2); + break; + case MLX5HWS_DEFINER_FNAME_REG_3: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_3); + break; + case MLX5HWS_DEFINER_FNAME_REG_4: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_4); + break; + case MLX5HWS_DEFINER_FNAME_REG_5: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_5); + break; + case MLX5HWS_DEFINER_FNAME_REG_7: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_7); + break; + case MLX5HWS_DEFINER_FNAME_REG_A: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_a); + break; + case MLX5HWS_DEFINER_FNAME_GRE_C: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_c_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_K: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_k_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_S: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_s_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_protocol); + break; + case MLX5HWS_DEFINER_FNAME_GRE_OPT_KEY: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_key.key); + break; + case MLX5HWS_DEFINER_FNAME_ICMP_DW1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_header_data); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_code); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.icmpv6_header_data); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_code); + break; + case MLX5HWS_DEFINER_FNAME_MPLS0_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.outer_first_mpls); + break; + case MLX5HWS_DEFINER_FNAME_MPLS0_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.inner_first_mpls); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_0: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_0); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_1); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_2); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_3: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_3); + break; + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK: + /* assuming this is flex parser for geneve option */ + if ((fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 0) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 1) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 2) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 3) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 4) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 5) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 6) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 7)) { + mlx5hws_err(ctx, + "Complex params: unsupported field %s (%d), flex parser ID for geneve is %d\n", + mlx5hws_definer_fname_to_str(fname), fname, + caps->flex_parser_id_geneve_tlv_option_0); + break; + } + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.geneve_tlv_option_0_exist); + break; + case MLX5HWS_DEFINER_FNAME_REG_6: + default: + mlx5hws_err(ctx, "Complex params: unsupported field %s (%d)\n", + mlx5hws_definer_fname_to_str(fname), fname); + break; + } +} + +static bool +hws_bwc_matcher_complex_params_comb_is_valid(struct mlx5hws_definer_fc *fc, + int fc_sz, + u32 combination_num) +{ + bool m1[MLX5HWS_DEFINER_FNAME_MAX] = {0}; + bool m2[MLX5HWS_DEFINER_FNAME_MAX] = {0}; + bool is_first_matcher; + int i; + + for (i = 0; i < fc_sz; i++) { + is_first_matcher = !(combination_num & BIT(i)); + if (is_first_matcher) + m1[fc[i].fname] = true; + else + m2[fc[i].fname] = true; + } + + /* Not all the fields can be split into separate matchers. + * Some should be together on the same matcher. + * For example, IPv6 parts - the whole IPv6 address should be on the + * same matcher in order for us to deduce if it's IPv6 or IPv4 address. + */ + if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && + (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) + return false; + + if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && + (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) + return false; + + if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && + (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) + return false; + + if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && + (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) + return false; + + /* Don't split outer IPv6 dest address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O])) + return false; + + /* Don't split outer IPv6 source address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O])) + return false; + + /* Don't split inner IPv6 dest address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I])) + return false; + + /* Don't split inner IPv6 source address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I])) + return false; + + /* Don't split GRE parameters. */ + if ((m1[MLX5HWS_DEFINER_FNAME_GRE_C] || + m1[MLX5HWS_DEFINER_FNAME_GRE_K] || + m1[MLX5HWS_DEFINER_FNAME_GRE_S] || + m1[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL]) && + (m2[MLX5HWS_DEFINER_FNAME_GRE_C] || + m2[MLX5HWS_DEFINER_FNAME_GRE_K] || + m2[MLX5HWS_DEFINER_FNAME_GRE_S] || + m2[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL])) + return false; + + /* Don't split TCP ack/seq numbers. */ + if ((m1[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || + m1[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM]) && + (m2[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || + m2[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM])) + return false; + + /* Don't split flex parser. */ + if ((m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7]) && + (m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7])) + return false; + + return true; +} + +static void +hws_bwc_matcher_complex_params_comb_create(struct mlx5hws_context *ctx, + struct mlx5hws_match_parameters *m, + struct mlx5hws_match_parameters *m1, + struct mlx5hws_match_parameters *m2, + struct mlx5hws_definer_fc *fc, + int fc_sz, + u32 combination_num) +{ + bool is_first_matcher; + int i; + + memcpy(m1->match_buf, m->match_buf, m->match_sz); + memcpy(m2->match_buf, m->match_buf, m->match_sz); + + for (i = 0; i < fc_sz; i++) { + is_first_matcher = !(combination_num & BIT(i)); + hws_bwc_matcher_complex_params_clear_fld(ctx, + fc[i].fname, + is_first_matcher ? + m2 : m1); + } + + MLX5_SET(fte_match_param, m2->match_buf, + misc_parameters_2.metadata_reg_c_6, -1); +} + +static void +hws_bwc_matcher_complex_params_destroy(struct mlx5hws_match_parameters *mask_1, + struct mlx5hws_match_parameters *mask_2) +{ + kfree(mask_1->match_buf); + kfree(mask_2->match_buf); +} + +static int +hws_bwc_matcher_complex_params_create(struct mlx5hws_context *ctx, + u8 match_criteria, + struct mlx5hws_match_parameters *mask, + struct mlx5hws_match_parameters *mask_1, + struct mlx5hws_match_parameters *mask_2) +{ + struct mlx5hws_definer_fc *fc; + u32 num_of_combinations; + int fc_sz = 0; + int res = 0; + u32 i; + + if (MLX5_GET(fte_match_param, mask->match_buf, + misc_parameters_2.metadata_reg_c_6)) { + mlx5hws_err(ctx, "Complex matcher: REG_C_6 matching is reserved\n"); + res = -EINVAL; + goto out; + } + + mask_1->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + mask_2->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + if (!mask_1->match_buf || !mask_2->match_buf) { + mlx5hws_err(ctx, "Complex matcher: failed to allocate match_param\n"); + res = -ENOMEM; + goto free_params; + } + + mask_1->match_sz = mask->match_sz; + mask_2->match_sz = mask->match_sz; + + fc = mlx5hws_definer_conv_match_params_to_compressed_fc(ctx, + match_criteria, + mask->match_buf, + &fc_sz); + if (!fc) { + res = -ENOMEM; + goto free_params; + } + + if (fc_sz >= sizeof(num_of_combinations) * BITS_PER_BYTE) { + mlx5hws_err(ctx, + "Complex matcher: too many match parameters (%d)\n", + fc_sz); + res = -EINVAL; + goto free_fc; + } + + /* We have list of all the match fields from the match parameter. + * Now try all the possibilities of splitting them into two match + * buffers and look for the supported combination. + */ + num_of_combinations = 1 << fc_sz; + + /* Start from combination at index 1 - we know that 0 is unsupported */ + for (i = 1; i < num_of_combinations; i++) { + if (!hws_bwc_matcher_complex_params_comb_is_valid(fc, fc_sz, i)) + continue; + + hws_bwc_matcher_complex_params_comb_create(ctx, + mask, mask_1, mask_2, + fc, fc_sz, i); + /* We now have two separate sets of match params. + * Check if each of them can be used in its own matcher. + */ + if (!mlx5hws_bwc_match_params_is_complex(ctx, + match_criteria, + mask_1) && + !mlx5hws_bwc_match_params_is_complex(ctx, + match_criteria, + mask_2)) + break; + } + + if (i == num_of_combinations) { + /* We've scanned all the combinations, but to no avail */ + mlx5hws_err(ctx, "Complex matcher: couldn't find match params combination\n"); + res = -EINVAL; + goto free_fc; + } + + kfree(fc); + return 0; + +free_fc: + kfree(fc); +free_params: + hws_bwc_matcher_complex_params_destroy(mask_1, mask_2); +out: + return res; +} + +static int +hws_bwc_isolated_table_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table) +{ + struct mlx5hws_cmd_ft_modify_attr ft_attr = {0}; + struct mlx5hws_context *ctx = table->ctx; + struct mlx5hws_table_attr tbl_attr = {0}; + struct mlx5hws_table *isolated_tbl; + int ret = 0; + + tbl_attr.type = table->type; + tbl_attr.level = table->level; + + bwc_matcher->complex->isolated_tbl = + mlx5hws_table_create(ctx, &tbl_attr); + isolated_tbl = bwc_matcher->complex->isolated_tbl; + if (!isolated_tbl) + return -EINVAL; + + /* Set the default miss of the isolated table to + * point to the end anchor of the original matcher. + */ + mlx5hws_cmd_set_attr_connect_miss_tbl(ctx, + isolated_tbl->fw_ft_type, + isolated_tbl->type, + &ft_attr); + ft_attr.table_miss_id = bwc_matcher->matcher->end_ft_id; + + ret = mlx5hws_cmd_flow_table_modify(ctx->mdev, + &ft_attr, + isolated_tbl->ft_id); + if (ret) { + mlx5hws_err(ctx, "Failed setting isolated tbl default miss\n"); + goto destroy_tbl; + } + + return 0; + +destroy_tbl: + mlx5hws_table_destroy(isolated_tbl); + return ret; +} + +static void hws_bwc_isolated_table_destroy(struct mlx5hws_table *isolated_tbl) +{ + /* This table is isolated - no table is pointing to it, no need to + * disconnect it from anywhere, it won't affect any other table's miss. + */ + mlx5hws_table_destroy(isolated_tbl); +} + +static int +hws_bwc_isolated_matcher_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table, + u8 match_criteria_enable, + struct mlx5hws_match_parameters *mask) +{ + struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_context *ctx = table->ctx; + int ret; + + isolated_bwc_matcher = kzalloc(sizeof(*bwc_matcher), GFP_KERNEL); + if (!isolated_bwc_matcher) + return -ENOMEM; + + bwc_matcher->complex->isolated_bwc_matcher = isolated_bwc_matcher; + + /* Isolated BWC matcher needs access to the first BWC matcher */ + isolated_bwc_matcher->complex_first_bwc_matcher = bwc_matcher; + + /* Isolated matcher needs to match on REG_C_6, + * so make sure its criteria bit is on. + */ + match_criteria_enable |= MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2; + + ret = mlx5hws_bwc_matcher_create_simple(isolated_bwc_matcher, + isolated_tbl, + 0, + match_criteria_enable, + mask, + NULL); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated BWC matcher\n"); + goto free_matcher; + } + + return 0; + +free_matcher: + kfree(bwc_matcher->complex->isolated_bwc_matcher); + return ret; +} + +static void +hws_bwc_isolated_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + kfree(bwc_matcher); +} + +static int +hws_bwc_isolated_actions_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table) +{ + struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; + u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; + struct mlx5hws_context *ctx = table->ctx; + struct mlx5hws_action_mh_pattern ptrn; + int ret = 0; + + /* Create action to jump to isolated table */ + + bwc_matcher->complex->action_go_to_tbl = + mlx5hws_action_create_dest_table(ctx, + isolated_tbl, + MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_go_to_tbl) { + mlx5hws_err(ctx, "Complex matcher: failed to create go-to-tbl action\n"); + return -EINVAL; + } + + /* Create modify header action to set REG_C_6 */ + + MLX5_SET(set_action_in, modify_hdr_action, + action_type, MLX5_MODIFICATION_TYPE_SET); + MLX5_SET(set_action_in, modify_hdr_action, + field, MLX5_MODI_META_REG_C_6); + MLX5_SET(set_action_in, modify_hdr_action, + length, 0); /* zero means length of 32 */ + MLX5_SET(set_action_in, modify_hdr_action, offset, 0); + MLX5_SET(set_action_in, modify_hdr_action, data, 0); + + ptrn.data = (void *)modify_hdr_action; + ptrn.sz = MLX5HWS_ACTION_DOUBLE_SIZE; + + bwc_matcher->complex->action_metadata = + mlx5hws_action_create_modify_header(ctx, 1, &ptrn, 0, + MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_metadata) { + ret = -EINVAL; + goto destroy_action_go_to_tbl; + } + + /* Create last action */ + + bwc_matcher->complex->action_last = + mlx5hws_action_create_last(ctx, MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_last) { + mlx5hws_err(ctx, "Complex matcher: failed to create last action\n"); + ret = -EINVAL; + goto destroy_action_metadata; + } + + return 0; + +destroy_action_metadata: + mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); +destroy_action_go_to_tbl: + mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); + return ret; +} + +static void +hws_bwc_isolated_actions_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mlx5hws_action_destroy(bwc_matcher->complex->action_last); + mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); + mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); +} + int mlx5hws_bwc_matcher_create_complex(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_table *table, u32 priority, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) { - mlx5hws_err(table->ctx, "Complex matcher is not supported yet\n"); - return -EOPNOTSUPP; + enum mlx5hws_action_type complex_init_action_types[3]; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_match_parameters mask_1 = {0}; + struct mlx5hws_match_parameters mask_2 = {0}; + struct mlx5hws_context *ctx = table->ctx; + int ret; + + ret = hws_bwc_matcher_complex_params_create(table->ctx, + match_criteria_enable, + mask, &mask_1, &mask_2); + if (ret) + goto err; + + bwc_matcher->complex = + kzalloc(sizeof(*bwc_matcher->complex), GFP_KERNEL); + if (!bwc_matcher->complex) { + ret = -ENOMEM; + goto free_masks; + } + + ret = rhashtable_init(&bwc_matcher->complex->refcount_hash, + &hws_refcount_hash); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed to initialize rhashtable\n"); + goto free_complex; + } + + mutex_init(&bwc_matcher->complex->hash_lock); + ida_init(&bwc_matcher->complex->metadata_ida); + + /* Create initial action template for the first matcher. + * Usually the initial AT is just dummy, but in case of complex + * matcher we know exactly which actions should it have. + */ + + complex_init_action_types[0] = MLX5HWS_ACTION_TYP_MODIFY_HDR; + complex_init_action_types[1] = MLX5HWS_ACTION_TYP_TBL; + complex_init_action_types[2] = MLX5HWS_ACTION_TYP_LAST; + + /* Create the first matcher */ + + ret = mlx5hws_bwc_matcher_create_simple(bwc_matcher, + table, + priority, + match_criteria_enable, + &mask_1, + complex_init_action_types); + if (ret) + goto destroy_ida; + + /* Create isolated table to hold the second isolated matcher */ + + ret = hws_bwc_isolated_table_create(bwc_matcher, table); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated table\n"); + goto destroy_first_matcher; + } + + /* Now create the second BWC matcher - the isolated one */ + + ret = hws_bwc_isolated_matcher_create(bwc_matcher, table, + match_criteria_enable, &mask_2); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated matcher\n"); + goto destroy_isolated_tbl; + } + + /* Create action for isolated matcher's rules */ + + ret = hws_bwc_isolated_actions_create(bwc_matcher, table); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated actions\n"); + goto destroy_isolated_matcher; + } + + hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); + return 0; + +destroy_isolated_matcher: + isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; + hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); +destroy_isolated_tbl: + hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); +destroy_first_matcher: + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); +destroy_ida: + ida_destroy(&bwc_matcher->complex->metadata_ida); + mutex_destroy(&bwc_matcher->complex->hash_lock); + rhashtable_destroy(&bwc_matcher->complex->refcount_hash); +free_complex: + kfree(bwc_matcher->complex); + bwc_matcher->complex = NULL; +free_masks: + hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); +err: + return ret; } void mlx5hws_bwc_matcher_destroy_complex(struct mlx5hws_bwc_matcher *bwc_matcher) { - /* nothing to do here */ + struct mlx5hws_bwc_matcher *isolated_bwc_matcher = + bwc_matcher->complex->isolated_bwc_matcher; + + hws_bwc_isolated_actions_destroy(bwc_matcher); + hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); + hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + ida_destroy(&bwc_matcher->complex->metadata_ida); + mutex_destroy(&bwc_matcher->complex->hash_lock); + rhashtable_destroy(&bwc_matcher->complex->refcount_hash); + kfree(bwc_matcher->complex); + bwc_matcher->complex = NULL; +} + +static void +hws_bwc_matcher_complex_hash_lock(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mutex_lock(&bwc_matcher->complex->hash_lock); +} + +static void +hws_bwc_matcher_complex_hash_unlock(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mutex_unlock(&bwc_matcher->complex->hash_lock); +} + +static int +hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_match_parameters *params) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node; + struct rhashtable *refcount_hash; + int ret, i; + + bwc_rule->complex_hash_node = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) + return -ENOMEM; + + ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + if (ret < 0) + goto err_free_node; + node->tag = ret; + + refcount_set(&node->refcount, 1); + + /* Clear match buffer - turn off all the unrelated fields + * in accordance with the match params mask for the first + * matcher out of the two parts of the complex matcher. + * The resulting mask is the key for the hash. + */ + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + node->match_buf[i] = params->match_buf[i] & + bwc_matcher->mt->match_param[i]; + + refcount_hash = &bwc_matcher->complex->refcount_hash; + old_node = rhashtable_lookup_get_insert_fast(refcount_hash, + &node->hash_node, + hws_refcount_hash); + if (IS_ERR(old_node)) { + ret = PTR_ERR(old_node); + goto err_free_ida; + } + + if (old_node) { + /* Rule with the same tag already exists - update refcount */ + refcount_inc(&old_node->refcount); + /* Let the new rule use the same tag as the existing rule. + * Note that we don't have any indication for the rule creation + * process that a rule with similar matching params already + * exists - no harm done when this rule is be overwritten by + * the same STE. + * There's some performance advantage in skipping such cases, + * so this is left for future optimizations. + */ + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); + kfree(node); + node = old_node; + } + + bwc_rule->complex_hash_node = node; + return 0; + +err_free_ida: + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); +err_free_node: + kfree(node); + return ret; +} + +static void +hws_bwc_rule_complex_hash_node_put(struct mlx5hws_bwc_rule *bwc_rule, + bool *is_last_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_complex_rule_hash_node *node; + + if (is_last_rule) + *is_last_rule = false; + + node = bwc_rule->complex_hash_node; + if (refcount_dec_and_test(&node->refcount)) { + rhashtable_remove_fast(&bwc_matcher->complex->refcount_hash, + &node->hash_node, + hws_refcount_hash); + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); + kfree(node); + if (is_last_rule) + *is_last_rule = true; + } + + bwc_rule->complex_hash_node = NULL; } int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, @@ -70,19 +1159,271 @@ int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_rule_action rule_actions[], u16 bwc_queue_idx) { - mlx5hws_err(bwc_rule->bwc_matcher->matcher->tbl->ctx, - "Complex rule is not supported yet\n"); - return -EOPNOTSUPP; + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; + struct mlx5hws_rule_action rule_actions_1[3] = {0}; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + u32 *match_buf_2; + u32 metadata_val; + int ret = 0; + + isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; + bwc_rule->isolated_bwc_rule = + mlx5hws_bwc_rule_alloc(isolated_bwc_matcher); + if (unlikely(!bwc_rule->isolated_bwc_rule)) + return -ENOMEM; + + hws_bwc_matcher_complex_hash_lock(bwc_matcher); + + /* Get a new hash node for this complex rule. + * If this is a unique set of match params for the first matcher, + * we will get a new hash node with newly allocated IDA. + * Otherwise we will get an existing node with IDA and updated refcount. + */ + ret = hws_bwc_rule_complex_hash_node_get(bwc_rule, params); + if (unlikely(ret)) { + mlx5hws_err(ctx, "Complex rule: failed getting RHT node for this rule\n"); + goto free_isolated_rule; + } + + /* No need to clear match buffer's fields in accordance to what + * will actually be matched on first and second matchers. + * Both matchers were created with the appropriate masks + * and each of them holds the appropriate field copy array, + * so rule creation will use only the fields that will be copied + * in accordance with setters in field copy array. + * We do, however, need to temporary allocate match buffer + * for the second (isolated) rule in order to not modify + * user's match params buffer. + */ + + match_buf_2 = kmemdup(params->match_buf, + MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + if (unlikely(!match_buf_2)) { + mlx5hws_err(ctx, "Complex rule: failed allocating match_buf\n"); + ret = -ENOMEM; + goto hash_node_put; + } + + /* On 2nd matcher, use unique 32-bit ID as a matching tag */ + metadata_val = bwc_rule->complex_hash_node->tag; + MLX5_SET(fte_match_param, match_buf_2, + misc_parameters_2.metadata_reg_c_6, metadata_val); + + /* Isolated rule's rule_actions contain all the original actions */ + ret = mlx5hws_bwc_rule_create_simple(bwc_rule->isolated_bwc_rule, + match_buf_2, + rule_actions, + flow_source, + bwc_queue_idx); + kfree(match_buf_2); + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Complex rule: failed creating isolated BWC rule (%d)\n", + ret); + goto hash_node_put; + } + + /* First rule's rule_actions contain setting metadata and + * jump to isolated table that contains the second matcher. + * Set metadata value to a unique value for this rule. + */ + + MLX5_SET(set_action_in, modify_hdr_action, + action_type, MLX5_MODIFICATION_TYPE_SET); + MLX5_SET(set_action_in, modify_hdr_action, + field, MLX5_MODI_META_REG_C_6); + MLX5_SET(set_action_in, modify_hdr_action, + length, 0); /* zero means length of 32 */ + MLX5_SET(set_action_in, modify_hdr_action, + offset, 0); + MLX5_SET(set_action_in, modify_hdr_action, + data, metadata_val); + + rule_actions_1[0].action = bwc_matcher->complex->action_metadata; + rule_actions_1[0].modify_header.offset = 0; + rule_actions_1[0].modify_header.data = modify_hdr_action; + + rule_actions_1[1].action = bwc_matcher->complex->action_go_to_tbl; + rule_actions_1[2].action = bwc_matcher->complex->action_last; + + ret = mlx5hws_bwc_rule_create_simple(bwc_rule, + params->match_buf, + rule_actions_1, + flow_source, + bwc_queue_idx); + + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Complex rule: failed creating first BWC rule (%d)\n", + ret); + goto destroy_isolated_rule; + } + + hws_bwc_matcher_complex_hash_unlock(bwc_matcher); + + return 0; + +destroy_isolated_rule: + mlx5hws_bwc_rule_destroy_simple(bwc_rule->isolated_bwc_rule); +hash_node_put: + hws_bwc_rule_complex_hash_node_put(bwc_rule, NULL); +free_isolated_rule: + hws_bwc_matcher_complex_hash_unlock(bwc_matcher); + mlx5hws_bwc_rule_free(bwc_rule->isolated_bwc_rule); + return ret; } int mlx5hws_bwc_rule_destroy_complex(struct mlx5hws_bwc_rule *bwc_rule) { - return 0; + struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_bwc_rule *isolated_bwc_rule; + int ret_isolated, ret; + bool is_last_rule; + + hws_bwc_matcher_complex_hash_lock(bwc_rule->bwc_matcher); + + hws_bwc_rule_complex_hash_node_put(bwc_rule, &is_last_rule); + bwc_rule->rule->skip_delete = !is_last_rule; + + ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); + if (unlikely(ret)) + mlx5hws_err(ctx, "BWC complex rule: failed destroying first rule\n"); + + isolated_bwc_rule = bwc_rule->isolated_bwc_rule; + ret_isolated = mlx5hws_bwc_rule_destroy_simple(isolated_bwc_rule); + if (unlikely(ret_isolated)) + mlx5hws_err(ctx, "BWC complex rule: failed destroying second (isolated) rule\n"); + + hws_bwc_matcher_complex_hash_unlock(bwc_rule->bwc_matcher); + + mlx5hws_bwc_rule_free(isolated_bwc_rule); + + return ret || ret_isolated; } -int mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) +static void +hws_bwc_matcher_clear_hash_rtcs(struct mlx5hws_bwc_matcher *bwc_matcher) { - mlx5hws_err(bwc_matcher->matcher->tbl->ctx, - "Moving complex rule is not supported yet\n"); - return -EOPNOTSUPP; + struct mlx5hws_bwc_complex_rule_hash_node *node; + struct rhashtable_iter iter; + + rhashtable_walk_enter(&bwc_matcher->complex->refcount_hash, &iter); + rhashtable_walk_start(&iter); + + while ((node = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(node)) + continue; + node->rtc_valid = false; + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +int +mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + bool move_error = false, poll_error = false; + u16 bwc_queues = mlx5hws_bwc_queues(ctx); + struct mlx5hws_bwc_rule *tmp_bwc_rule; + struct mlx5hws_rule_attr rule_attr; + struct mlx5hws_table *isolated_tbl; + struct mlx5hws_rule *tmp_rule; + struct list_head *rules_list; + u32 expected_completions = 1; + u32 end_ft_id; + int i, ret; + + /* We are rehashing the matcher that is the first part of the complex + * matcher. Need to update the isolated matcher to point to the end_ft + * of this new matcher. This needs to be done before moving any rules + * to prevent possible steering loops. + */ + isolated_tbl = bwc_matcher->complex->isolated_tbl; + end_ft_id = bwc_matcher->matcher->resize_dst->end_ft_id; + ret = mlx5hws_matcher_update_end_ft_isolated(isolated_tbl, end_ft_id); + if (ret) { + mlx5hws_err(ctx, + "Failed updating end_ft of isolated matcher (%d)\n", + ret); + return ret; + } + + hws_bwc_matcher_clear_hash_rtcs(bwc_matcher); + + mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + + for (i = 0; i < bwc_queues; i++) { + rules_list = &bwc_matcher->rules[i]; + if (list_empty(rules_list)) + continue; + + rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + + list_for_each_entry(tmp_bwc_rule, rules_list, list_node) { + /* Check if a rule with similar tag has already + * been moved. + */ + if (tmp_bwc_rule->complex_hash_node->rtc_valid) { + /* This rule is a duplicate of rule with similar + * tag that has already been moved earlier. + * Just update this rule's RTCs. + */ + tmp_bwc_rule->rule->rtc_0 = + tmp_bwc_rule->complex_hash_node->rtc_0; + tmp_bwc_rule->rule->rtc_1 = + tmp_bwc_rule->complex_hash_node->rtc_1; + tmp_bwc_rule->rule->matcher = + tmp_bwc_rule->rule->matcher->resize_dst; + continue; + } + + /* First time we're moving rule with this tag. + * Move it for real. + */ + tmp_rule = tmp_bwc_rule->rule; + tmp_rule->skip_delete = false; + ret = mlx5hws_matcher_resize_rule_move(matcher, + tmp_rule, + &rule_attr); + if (unlikely(ret && !move_error)) { + mlx5hws_err(ctx, + "Moving complex BWC rule failed (%d), attempting to move rest of the rules\n", + ret); + move_error = true; + } + + expected_completions = 1; + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &expected_completions, + true); + if (unlikely(ret && !poll_error)) { + mlx5hws_err(ctx, + "Moving complex BWC rule: poll failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = true; + } + + /* Done moving the rule to the new matcher, + * now update RTCs for all the duplicated rules. + */ + tmp_bwc_rule->complex_hash_node->rtc_0 = + tmp_bwc_rule->rule->rtc_0; + tmp_bwc_rule->complex_hash_node->rtc_1 = + tmp_bwc_rule->rule->rtc_1; + + tmp_bwc_rule->complex_hash_node->rtc_valid = true; + } + } + + if (move_error || poll_error) + ret = -EINVAL; + + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h index 340f0688e394..a6887c7e39d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h @@ -4,6 +4,27 @@ #ifndef HWS_BWC_COMPLEX_H_ #define HWS_BWC_COMPLEX_H_ +struct mlx5hws_bwc_complex_rule_hash_node { + u32 match_buf[MLX5_ST_SZ_DW_MATCH_PARAM]; + u32 tag; + refcount_t refcount; + bool rtc_valid; + u32 rtc_0; + u32 rtc_1; + struct rhash_head hash_node; +}; + +struct mlx5hws_bwc_matcher_complex_data { + struct mlx5hws_table *isolated_tbl; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_action *action_metadata; + struct mlx5hws_action *action_go_to_tbl; + struct mlx5hws_action *action_last; + struct rhashtable refcount_hash; + struct mutex hash_lock; /* Protect the refcount rhashtable */ + struct ida metadata_ida; +}; + bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index e8f98c109b99..9c83753e4592 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -406,7 +406,6 @@ int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev, MLX5_SET(rtc, attr, match_definer_1, rtc_attr->match_definer_1); MLX5_SET(rtc, attr, stc_id, rtc_attr->stc_base); MLX5_SET(rtc, attr, ste_table_base_id, rtc_attr->ste_base); - MLX5_SET(rtc, attr, ste_table_offset, rtc_attr->ste_offset); MLX5_SET(rtc, attr, miss_flow_table_id, rtc_attr->miss_ft_id); MLX5_SET(rtc, attr, reparse_mode, rtc_attr->reparse_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h index 51d9e0291ac1..fa6bff210266 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h @@ -70,7 +70,6 @@ struct mlx5hws_cmd_rtc_create_attr { u32 pd; u32 stc_base; u32 ste_base; - u32 ste_offset; u32 miss_ft_id; bool fw_gen_wqe; u8 update_index_mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c index 9cda2774fd64..428dae869706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c @@ -34,7 +34,6 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx) /* Create an STC pool per FT type */ pool_attr.pool_type = MLX5HWS_POOL_TYPE_STC; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STC_POOL; max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max); pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran); @@ -159,10 +158,16 @@ static int hws_context_init_hws(struct mlx5hws_context *ctx, if (ret) goto pools_uninit; + ret = mlx5hws_action_ste_pool_init(ctx); + if (ret) + goto close_queues; + INIT_LIST_HEAD(&ctx->tbl_list); return 0; +close_queues: + mlx5hws_send_queues_close(ctx); pools_uninit: hws_context_pools_uninit(ctx); uninit_pd: @@ -175,6 +180,7 @@ static void hws_context_uninit_hws(struct mlx5hws_context *ctx) if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT)) return; + mlx5hws_action_ste_pool_uninit(ctx); mlx5hws_send_queues_close(ctx); hws_context_pools_uninit(ctx); hws_context_uninit_pd(ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h index 38c3647444ad..3f8938c73dc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h @@ -39,6 +39,8 @@ struct mlx5hws_context { struct mlx5hws_cmd_query_caps *caps; u32 pd_num; struct mlx5hws_pool *stc_pool; + struct mlx5hws_action_ste_pool *action_ste_pool; /* One per queue */ + struct delayed_work action_ste_cleanup; struct mlx5hws_context_common_res common_res; struct mlx5hws_pattern_cache *pattern_cache; struct mlx5hws_definer_cache *definer_cache; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c index 696275fd0ce2..2ec8cb10139a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c @@ -99,17 +99,19 @@ hws_debug_dump_matcher_attr(struct seq_file *f, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; - seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d\n", + seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d,-1,-1,%d,%d\n", MLX5HWS_DEBUG_RES_TYPE_MATCHER_ATTR, HWS_PTR_TO_ID(matcher), attr->priority, attr->mode, - attr->table.sz_row_log, - attr->table.sz_col_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].table.sz_row_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].table.sz_col_log, attr->optimize_using_rule_idx, attr->optimize_flow_src, attr->insert_mode, - attr->distribute_mode); + attr->distribute_mode, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].table.sz_row_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].table.sz_col_log); return 0; } @@ -118,8 +120,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma { enum mlx5hws_table_type tbl_type = matcher->tbl->type; struct mlx5hws_cmd_ft_query_attr ft_attr = {0}; - struct mlx5hws_pool_chunk *ste; - struct mlx5hws_pool *ste_pool; u64 icm_addr_0 = 0; u64 icm_addr_1 = 0; u32 ste_0_id = -1; @@ -134,13 +134,9 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma matcher->end_ft_id, matcher->col_matcher ? HWS_PTR_TO_ID(matcher->col_matcher) : 0); - ste = &matcher->match_ste.ste; - ste_pool = matcher->match_ste.pool; - if (ste_pool) { - ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); - if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) - ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); - } + ste_0_id = matcher->match_ste.ste_0_base; + if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) + ste_1_id = matcher->match_ste.ste_1_base; seq_printf(f, ",%d,%d,%d,%d", matcher->match_ste.rtc_0_id, @@ -148,19 +144,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma matcher->match_ste.rtc_1_id, (int)ste_1_id); - ste = &matcher->action_ste.ste; - ste_pool = matcher->action_ste.pool; - if (ste_pool) { - ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); - if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) - ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); - else - ste_1_id = -1; - } else { - ste_0_id = -1; - ste_1_id = -1; - } - ft_attr.type = matcher->tbl->fw_ft_type; ret = mlx5hws_cmd_flow_table_query(matcher->tbl->ctx->mdev, matcher->end_ft_id, @@ -170,10 +153,7 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma if (ret) return ret; - seq_printf(f, ",%d,%d,%d,%d,%d,0x%llx,0x%llx\n", - matcher->action_ste.rtc_0_id, (int)ste_0_id, - matcher->action_ste.rtc_1_id, (int)ste_1_id, - 0, + seq_printf(f, ",-1,-1,-1,-1,0,0x%llx,0x%llx\n", mlx5hws_debug_icm_to_idx(icm_addr_0), mlx5hws_debug_icm_to_idx(icm_addr_1)); @@ -387,14 +367,17 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context if (!stc_pool) return 0; - if (stc_pool->resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->resource[0]); + if (stc_pool->resource) { + ret = hws_debug_dump_context_stc_resource(f, ctx, + stc_pool->resource); if (ret) return ret; } - if (stc_pool->mirror_resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->mirror_resource[0]); + if (stc_pool->mirror_resource) { + struct mlx5hws_pool_resource *res = stc_pool->mirror_resource; + + ret = hws_debug_dump_context_stc_resource(f, ctx, res); if (ret) return ret; } @@ -402,10 +385,41 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context return 0; } +static void +hws_debug_dump_action_ste_table(struct seq_file *f, + struct mlx5hws_action_ste_table *action_tbl) +{ + int ste_0_id = mlx5hws_pool_get_base_id(action_tbl->pool); + int ste_1_id = mlx5hws_pool_get_base_mirror_id(action_tbl->pool); + + seq_printf(f, "%d,0x%llx,%d,%d,%d,%d\n", + MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE, + HWS_PTR_TO_ID(action_tbl), + action_tbl->rtc_0_id, ste_0_id, + action_tbl->rtc_1_id, ste_1_id); +} + +static void hws_debug_dump_action_ste_pool(struct seq_file *f, + struct mlx5hws_action_ste_pool *pool) +{ + struct mlx5hws_action_ste_table *action_tbl; + enum mlx5hws_pool_optimize opt; + + mutex_lock(&pool->lock); + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX; + opt++) { + list_for_each_entry(action_tbl, &pool->elems[opt].available, + list_node) { + hws_debug_dump_action_ste_table(f, action_tbl); + } + } + mutex_unlock(&pool->lock); +} + static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ctx) { struct mlx5hws_table *tbl; - int ret; + int ret, i; ret = hws_debug_dump_context_info(f, ctx); if (ret) @@ -425,6 +439,9 @@ static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ct return ret; } + for (i = 0; i < ctx->queues; i++) + hws_debug_dump_action_ste_pool(f, &ctx->action_ste_pool[i]); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h index e44e7ae28f93..89c396f9f266 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h @@ -26,6 +26,8 @@ enum mlx5hws_debug_res_type { MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_HASH_DEFINER = 4205, MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_RANGE_DEFINER = 4206, MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_COMPARE_MATCH_DEFINER = 4207, + + MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE = 4300, }; static inline u64 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index c8cc0c8115f5..c6436c3a7a83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -158,6 +158,218 @@ struct mlx5hws_definer_conv_data { u32 match_flags; }; +#define HWS_DEFINER_ENTRY(name)[MLX5HWS_DEFINER_FNAME_##name] = #name + +static const char * const hws_definer_fname_to_str[] = { + HWS_DEFINER_ENTRY(ETH_SMAC_47_16_O), + HWS_DEFINER_ENTRY(ETH_SMAC_47_16_I), + HWS_DEFINER_ENTRY(ETH_SMAC_15_0_O), + HWS_DEFINER_ENTRY(ETH_SMAC_15_0_I), + HWS_DEFINER_ENTRY(ETH_DMAC_47_16_O), + HWS_DEFINER_ENTRY(ETH_DMAC_47_16_I), + HWS_DEFINER_ENTRY(ETH_DMAC_15_0_O), + HWS_DEFINER_ENTRY(ETH_DMAC_15_0_I), + HWS_DEFINER_ENTRY(ETH_TYPE_O), + HWS_DEFINER_ENTRY(ETH_TYPE_I), + HWS_DEFINER_ENTRY(ETH_L3_TYPE_O), + HWS_DEFINER_ENTRY(ETH_L3_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_TYPE_O), + HWS_DEFINER_ENTRY(VLAN_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_O), + HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_I), + HWS_DEFINER_ENTRY(VLAN_CFI_O), + HWS_DEFINER_ENTRY(VLAN_CFI_I), + HWS_DEFINER_ENTRY(VLAN_ID_O), + HWS_DEFINER_ENTRY(VLAN_ID_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_ID_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_ID_I), + HWS_DEFINER_ENTRY(IPV4_IHL_O), + HWS_DEFINER_ENTRY(IPV4_IHL_I), + HWS_DEFINER_ENTRY(IP_DSCP_O), + HWS_DEFINER_ENTRY(IP_DSCP_I), + HWS_DEFINER_ENTRY(IP_ECN_O), + HWS_DEFINER_ENTRY(IP_ECN_I), + HWS_DEFINER_ENTRY(IP_TTL_O), + HWS_DEFINER_ENTRY(IP_TTL_I), + HWS_DEFINER_ENTRY(IPV4_DST_O), + HWS_DEFINER_ENTRY(IPV4_DST_I), + HWS_DEFINER_ENTRY(IPV4_SRC_O), + HWS_DEFINER_ENTRY(IPV4_SRC_I), + HWS_DEFINER_ENTRY(IP_VERSION_O), + HWS_DEFINER_ENTRY(IP_VERSION_I), + HWS_DEFINER_ENTRY(IP_FRAG_O), + HWS_DEFINER_ENTRY(IP_FRAG_I), + HWS_DEFINER_ENTRY(IP_LEN_O), + HWS_DEFINER_ENTRY(IP_LEN_I), + HWS_DEFINER_ENTRY(IP_TOS_O), + HWS_DEFINER_ENTRY(IP_TOS_I), + HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_O), + HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_I), + HWS_DEFINER_ENTRY(IPV6_DST_127_96_O), + HWS_DEFINER_ENTRY(IPV6_DST_95_64_O), + HWS_DEFINER_ENTRY(IPV6_DST_63_32_O), + HWS_DEFINER_ENTRY(IPV6_DST_31_0_O), + HWS_DEFINER_ENTRY(IPV6_DST_127_96_I), + HWS_DEFINER_ENTRY(IPV6_DST_95_64_I), + HWS_DEFINER_ENTRY(IPV6_DST_63_32_I), + HWS_DEFINER_ENTRY(IPV6_DST_31_0_I), + HWS_DEFINER_ENTRY(IPV6_SRC_127_96_O), + HWS_DEFINER_ENTRY(IPV6_SRC_95_64_O), + HWS_DEFINER_ENTRY(IPV6_SRC_63_32_O), + HWS_DEFINER_ENTRY(IPV6_SRC_31_0_O), + HWS_DEFINER_ENTRY(IPV6_SRC_127_96_I), + HWS_DEFINER_ENTRY(IPV6_SRC_95_64_I), + HWS_DEFINER_ENTRY(IPV6_SRC_63_32_I), + HWS_DEFINER_ENTRY(IPV6_SRC_31_0_I), + HWS_DEFINER_ENTRY(IP_PROTOCOL_O), + HWS_DEFINER_ENTRY(IP_PROTOCOL_I), + HWS_DEFINER_ENTRY(L4_SPORT_O), + HWS_DEFINER_ENTRY(L4_SPORT_I), + HWS_DEFINER_ENTRY(L4_DPORT_O), + HWS_DEFINER_ENTRY(L4_DPORT_I), + HWS_DEFINER_ENTRY(TCP_FLAGS_I), + HWS_DEFINER_ENTRY(TCP_FLAGS_O), + HWS_DEFINER_ENTRY(TCP_SEQ_NUM), + HWS_DEFINER_ENTRY(TCP_ACK_NUM), + HWS_DEFINER_ENTRY(GTP_TEID), + HWS_DEFINER_ENTRY(GTP_MSG_TYPE), + HWS_DEFINER_ENTRY(GTP_EXT_FLAG), + HWS_DEFINER_ENTRY(GTP_NEXT_EXT_HDR), + HWS_DEFINER_ENTRY(GTP_EXT_HDR_PDU), + HWS_DEFINER_ENTRY(GTP_EXT_HDR_QFI), + HWS_DEFINER_ENTRY(GTPU_DW0), + HWS_DEFINER_ENTRY(GTPU_FIRST_EXT_DW0), + HWS_DEFINER_ENTRY(GTPU_DW2), + HWS_DEFINER_ENTRY(FLEX_PARSER_0), + HWS_DEFINER_ENTRY(FLEX_PARSER_1), + HWS_DEFINER_ENTRY(FLEX_PARSER_2), + HWS_DEFINER_ENTRY(FLEX_PARSER_3), + HWS_DEFINER_ENTRY(FLEX_PARSER_4), + HWS_DEFINER_ENTRY(FLEX_PARSER_5), + HWS_DEFINER_ENTRY(FLEX_PARSER_6), + HWS_DEFINER_ENTRY(FLEX_PARSER_7), + HWS_DEFINER_ENTRY(VPORT_REG_C_0), + HWS_DEFINER_ENTRY(VXLAN_FLAGS), + HWS_DEFINER_ENTRY(VXLAN_VNI), + HWS_DEFINER_ENTRY(VXLAN_GPE_FLAGS), + HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD0), + HWS_DEFINER_ENTRY(VXLAN_GPE_PROTO), + HWS_DEFINER_ENTRY(VXLAN_GPE_VNI), + HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD1), + HWS_DEFINER_ENTRY(GENEVE_OPT_LEN), + HWS_DEFINER_ENTRY(GENEVE_OAM), + HWS_DEFINER_ENTRY(GENEVE_PROTO), + HWS_DEFINER_ENTRY(GENEVE_VNI), + HWS_DEFINER_ENTRY(SOURCE_QP), + HWS_DEFINER_ENTRY(SOURCE_GVMI), + HWS_DEFINER_ENTRY(REG_0), + HWS_DEFINER_ENTRY(REG_1), + HWS_DEFINER_ENTRY(REG_2), + HWS_DEFINER_ENTRY(REG_3), + HWS_DEFINER_ENTRY(REG_4), + HWS_DEFINER_ENTRY(REG_5), + HWS_DEFINER_ENTRY(REG_6), + HWS_DEFINER_ENTRY(REG_7), + HWS_DEFINER_ENTRY(REG_8), + HWS_DEFINER_ENTRY(REG_9), + HWS_DEFINER_ENTRY(REG_10), + HWS_DEFINER_ENTRY(REG_11), + HWS_DEFINER_ENTRY(REG_A), + HWS_DEFINER_ENTRY(REG_B), + HWS_DEFINER_ENTRY(GRE_KEY_PRESENT), + HWS_DEFINER_ENTRY(GRE_C), + HWS_DEFINER_ENTRY(GRE_K), + HWS_DEFINER_ENTRY(GRE_S), + HWS_DEFINER_ENTRY(GRE_PROTOCOL), + HWS_DEFINER_ENTRY(GRE_OPT_KEY), + HWS_DEFINER_ENTRY(GRE_OPT_SEQ), + HWS_DEFINER_ENTRY(GRE_OPT_CHECKSUM), + HWS_DEFINER_ENTRY(INTEGRITY_O), + HWS_DEFINER_ENTRY(INTEGRITY_I), + HWS_DEFINER_ENTRY(ICMP_DW1), + HWS_DEFINER_ENTRY(ICMP_DW2), + HWS_DEFINER_ENTRY(ICMP_DW3), + HWS_DEFINER_ENTRY(IPSEC_SPI), + HWS_DEFINER_ENTRY(IPSEC_SEQUENCE_NUMBER), + HWS_DEFINER_ENTRY(IPSEC_SYNDROME), + HWS_DEFINER_ENTRY(MPLS0_O), + HWS_DEFINER_ENTRY(MPLS1_O), + HWS_DEFINER_ENTRY(MPLS2_O), + HWS_DEFINER_ENTRY(MPLS3_O), + HWS_DEFINER_ENTRY(MPLS4_O), + HWS_DEFINER_ENTRY(MPLS0_I), + HWS_DEFINER_ENTRY(MPLS1_I), + HWS_DEFINER_ENTRY(MPLS2_I), + HWS_DEFINER_ENTRY(MPLS3_I), + HWS_DEFINER_ENTRY(MPLS4_I), + HWS_DEFINER_ENTRY(FLEX_PARSER0_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER1_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER2_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER3_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER4_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER5_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER6_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER7_OK), + HWS_DEFINER_ENTRY(OKS2_MPLS0_O), + HWS_DEFINER_ENTRY(OKS2_MPLS1_O), + HWS_DEFINER_ENTRY(OKS2_MPLS2_O), + HWS_DEFINER_ENTRY(OKS2_MPLS3_O), + HWS_DEFINER_ENTRY(OKS2_MPLS4_O), + HWS_DEFINER_ENTRY(OKS2_MPLS0_I), + HWS_DEFINER_ENTRY(OKS2_MPLS1_I), + HWS_DEFINER_ENTRY(OKS2_MPLS2_I), + HWS_DEFINER_ENTRY(OKS2_MPLS3_I), + HWS_DEFINER_ENTRY(OKS2_MPLS4_I), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_0), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_1), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_2), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_3), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_4), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_5), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_6), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_7), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_0), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_1), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_2), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_3), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_4), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_5), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_6), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_7), + HWS_DEFINER_ENTRY(IB_L4_OPCODE), + HWS_DEFINER_ENTRY(IB_L4_QPN), + HWS_DEFINER_ENTRY(IB_L4_A), + HWS_DEFINER_ENTRY(RANDOM_NUM), + HWS_DEFINER_ENTRY(PTYPE_L2_O), + HWS_DEFINER_ENTRY(PTYPE_L2_I), + HWS_DEFINER_ENTRY(PTYPE_L3_O), + HWS_DEFINER_ENTRY(PTYPE_L3_I), + HWS_DEFINER_ENTRY(PTYPE_L4_O), + HWS_DEFINER_ENTRY(PTYPE_L4_I), + HWS_DEFINER_ENTRY(PTYPE_L4_EXT_O), + HWS_DEFINER_ENTRY(PTYPE_L4_EXT_I), + HWS_DEFINER_ENTRY(PTYPE_FRAG_O), + HWS_DEFINER_ENTRY(PTYPE_FRAG_I), + HWS_DEFINER_ENTRY(TNL_HDR_0), + HWS_DEFINER_ENTRY(TNL_HDR_1), + HWS_DEFINER_ENTRY(TNL_HDR_2), + HWS_DEFINER_ENTRY(TNL_HDR_3), + [MLX5HWS_DEFINER_FNAME_MAX] = "DEFINER_FNAME_UNKNOWN", +}; + +const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname) +{ + if (fname > MLX5HWS_DEFINER_FNAME_MAX) + fname = MLX5HWS_DEFINER_FNAME_MAX; + return hws_definer_fname_to_str[fname]; +} + static void hws_definer_ones_set(struct mlx5hws_definer_fc *fc, void *match_param, @@ -509,18 +721,33 @@ static int hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, u32 *match_param) { - bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set; + bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set; struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; u32 *s_ipv6, *d_ipv6; if (HWS_IS_FLD_SET_SZ(match_param, outer_headers.l4_type, 0x2) || - HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c2, 0xe) || - HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c4, 0x4)) { + HWS_IS_FLD_SET_SZ(match_param, outer_headers.l4_type_ext, 0x4) || + HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c6, 0xa) || + HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_d4, 0x4)) { mlx5hws_err(cd->ctx, "Unsupported outer parameters set\n"); return -EINVAL; } + ip_addr_set = HWS_IS_FLD_SET_SZ(match_param, + outer_headers.src_ipv4_src_ipv6, + 0x80) || + HWS_IS_FLD_SET_SZ(match_param, + outer_headers.dst_ipv4_dst_ipv6, 0x80); + ip_ver_set = HWS_IS_FLD_SET(match_param, outer_headers.ip_version) || + HWS_IS_FLD_SET(match_param, outer_headers.ethertype); + + if (ip_addr_set && !ip_ver_set) { + mlx5hws_err(cd->ctx, + "Unsupported match on IP address without version or ethertype\n"); + return -EINVAL; + } + /* L2 Check ethertype */ HWS_SET_HDR(fc, match_param, ETH_TYPE_O, outer_headers.ethertype, @@ -559,6 +786,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O, outer_headers.ip_protocol, eth_l3_outer.protocol_next_header); + HWS_SET_HDR(fc, match_param, IP_VERSION_O, + outer_headers.ip_version, + eth_l3_outer.ip_version); HWS_SET_HDR(fc, match_param, IP_TTL_O, outer_headers.ttl_hoplimit, eth_l3_outer.time_to_live_hop_limit); @@ -570,10 +800,16 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout); /* Assume IPv6 is used if ipv6 bits are set */ - is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2]; - is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] || + d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + + /* IHL is an IPv4-specific field. */ + if (is_ipv6 && HWS_IS_FLD_SET(match_param, outer_headers.ipv4_ihl)) { + mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n"); + return -EINVAL; + } - if (is_s_ipv6) { + if (is_ipv6) { /* Handle IPv6 source address */ HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_O, outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -587,13 +823,6 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_O, outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_src_outer.ipv6_address_31_0); - } else { - /* Handle IPv4 source address */ - HWS_SET_HDR(fc, match_param, IPV4_SRC_O, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, - ipv4_src_dest_outer.source_address); - } - if (is_d_ipv6) { /* Handle IPv6 destination address */ HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_O, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -608,6 +837,10 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_dst_outer.ipv6_address_31_0); } else { + /* Handle IPv4 source address */ + HWS_SET_HDR(fc, match_param, IPV4_SRC_O, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, + ipv4_src_dest_outer.source_address); /* Handle IPv4 destination address */ HWS_SET_HDR(fc, match_param, IPV4_DST_O, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, @@ -665,18 +898,33 @@ static int hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, u32 *match_param) { - bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set; + bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set; struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; u32 *s_ipv6, *d_ipv6; if (HWS_IS_FLD_SET_SZ(match_param, inner_headers.l4_type, 0x2) || - HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c2, 0xe) || - HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c4, 0x4)) { + HWS_IS_FLD_SET_SZ(match_param, inner_headers.l4_type_ext, 0x4) || + HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c6, 0xa) || + HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_d4, 0x4)) { mlx5hws_err(cd->ctx, "Unsupported inner parameters set\n"); return -EINVAL; } + ip_addr_set = HWS_IS_FLD_SET_SZ(match_param, + inner_headers.src_ipv4_src_ipv6, + 0x80) || + HWS_IS_FLD_SET_SZ(match_param, + inner_headers.dst_ipv4_dst_ipv6, 0x80); + ip_ver_set = HWS_IS_FLD_SET(match_param, inner_headers.ip_version) || + HWS_IS_FLD_SET(match_param, inner_headers.ethertype); + + if (ip_addr_set && !ip_ver_set) { + mlx5hws_err(cd->ctx, + "Unsupported match on IP address without version or ethertype\n"); + return -EINVAL; + } + /* L2 Check ethertype */ HWS_SET_HDR(fc, match_param, ETH_TYPE_I, inner_headers.ethertype, @@ -728,10 +976,16 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, inner_headers.dst_ipv4_dst_ipv6.ipv6_layout); /* Assume IPv6 is used if ipv6 bits are set */ - is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2]; - is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] || + d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; - if (is_s_ipv6) { + /* IHL is an IPv4-specific field. */ + if (is_ipv6 && HWS_IS_FLD_SET(match_param, inner_headers.ipv4_ihl)) { + mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n"); + return -EINVAL; + } + + if (is_ipv6) { /* Handle IPv6 source address */ HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_I, inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -745,13 +999,6 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_I, inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_src_inner.ipv6_address_31_0); - } else { - /* Handle IPv4 source address */ - HWS_SET_HDR(fc, match_param, IPV4_SRC_I, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, - ipv4_src_dest_inner.source_address); - } - if (is_d_ipv6) { /* Handle IPv6 destination address */ HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_I, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -766,6 +1013,10 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_dst_inner.ipv6_address_31_0); } else { + /* Handle IPv4 source address */ + HWS_SET_HDR(fc, match_param, IPV4_SRC_I, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, + ipv4_src_dest_inner.source_address); /* Handle IPv4 destination address */ HWS_SET_HDR(fc, match_param, IPV4_DST_I, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, @@ -1030,7 +1281,8 @@ hws_definer_conv_misc2(struct mlx5hws_definer_conv_data *cd, struct mlx5hws_definer_fc *curr_fc; if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1a0, 0x8) || - HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1b8, 0x8) || + HWS_IS_FLD_SET_SZ(match_param, + misc_parameters_2.ipsec_next_header, 0x8) || HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1c0, 0x40) || HWS_IS_FLD_SET(match_param, misc_parameters_2.macsec_syndrome) || HWS_IS_FLD_SET(match_param, misc_parameters_2.ipsec_syndrome)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h index 5c1a2086efba..62da55389331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h @@ -831,4 +831,6 @@ mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx, u32 *match_param, int *fc_sz); +const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname); + #endif /* HWS_DEFINER_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 1b787cd66e6f..57592b92e24b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -571,14 +571,12 @@ static void mlx5_fs_put_dest_action_sampler(struct mlx5_fs_hws_context *fs_ctx, static struct mlx5hws_action * mlx5_fs_create_action_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_action_dest_attr *dests, - u32 num_of_dests, bool ignore_flow_level, - u32 flow_source) + u32 num_of_dests, bool ignore_flow_level) { u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; return mlx5hws_action_create_dest_array(ctx, num_of_dests, dests, - ignore_flow_level, - flow_source, flags); + ignore_flow_level, flags); } static struct mlx5hws_action * @@ -966,6 +964,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, switch (attr->type) { case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst); + if (dst->dest_attr.ft->flags & + MLX5_FLOW_TABLE_UPLINK_VPORT) + dest_actions[num_dest_actions].is_wire_ft = true; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx, @@ -1012,7 +1013,6 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, } (*ractions)[num_actions++].action = dest_actions->dest; } else if (num_dest_actions > 1) { - u32 flow_source = fte->act_dests.flow_context.flow_source; bool ignore_flow_level; if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || @@ -1022,10 +1022,10 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, } ignore_flow_level = !!(fte_action->flags & FLOW_ACT_IGNORE_FLOW_LEVEL); - tmp_action = mlx5_fs_create_action_dest_array(ctx, dest_actions, - num_dest_actions, - ignore_flow_level, - flow_source); + tmp_action = + mlx5_fs_create_action_dest_array(ctx, dest_actions, + num_dest_actions, + ignore_flow_level); if (!tmp_action) { err = -EOPNOTSUPP; goto free_actions; @@ -1081,13 +1081,8 @@ static int mlx5_cmd_hws_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5hws_bwc_rule *rule; int err = 0; - if (mlx5_fs_cmd_is_fw_term_table(ft)) { - /* Packet reformat on terminamtion table not supported yet */ - if (fte->act_dests.action.action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - return -EOPNOTSUPP; + if (mlx5_fs_cmd_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); - } err = mlx5_fs_fte_get_hws_actions(ns, ft, group, fte, &ractions); if (err) @@ -1362,7 +1357,8 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, pkt_reformat->fs_hws_action.pr_data = pr_data; } - pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW; + mutex_init(&pkt_reformat->fs_hws_action.lock); + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS; pkt_reformat->fs_hws_action.hws_action = hws_action; return 0; @@ -1380,6 +1376,15 @@ static void mlx5_cmd_hws_packet_reformat_dealloc(struct mlx5_flow_root_namespace struct mlx5_fs_hws_pr *pr_data; struct mlx5_fs_pool *pr_pool; + if (pkt_reformat->fs_hws_action.fw_reformat_id != 0) { + struct mlx5_pkt_reformat fw_pkt_reformat = { 0 }; + + fw_pkt_reformat.id = pkt_reformat->fs_hws_action.fw_reformat_id; + mlx5_fs_cmd_get_fw_cmds()-> + packet_reformat_dealloc(ns, &fw_pkt_reformat); + pkt_reformat->fs_hws_action.fw_reformat_id = 0; + } + if (pkt_reformat->reformat_type == MLX5_REFORMAT_TYPE_REMOVE_HDR) return; @@ -1532,6 +1537,58 @@ static void mlx5_cmd_hws_modify_header_dealloc(struct mlx5_flow_root_namespace * modify_hdr->fs_hws_action.mh_data = NULL; } +int +mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) +{ + enum mlx5_flow_namespace_type ns_type = pkt_reformat->ns_type; + struct mutex *lock = &pkt_reformat->fs_hws_action.lock; + u32 *id = &pkt_reformat->fs_hws_action.fw_reformat_id; + struct mlx5_pkt_reformat fw_pkt_reformat = { 0 }; + struct mlx5_pkt_reformat_params params = { 0 }; + struct mlx5_flow_root_namespace *ns; + struct mlx5_core_dev *dev; + int ret; + + mutex_lock(lock); + + if (*id != 0) { + *reformat_id = *id; + ret = 0; + goto unlock; + } + + dev = mlx5hws_action_get_dev(pkt_reformat->fs_hws_action.hws_action); + if (!dev) { + ret = -EINVAL; + goto unlock; + } + + ns = mlx5_get_root_namespace(dev, ns_type); + if (!ns) { + ret = -EINVAL; + goto unlock; + } + + params.type = pkt_reformat->reformat_type; + params.size = pkt_reformat->fs_hws_action.pr_data->data_size; + params.data = pkt_reformat->fs_hws_action.pr_data->data; + + ret = mlx5_fs_cmd_get_fw_cmds()-> + packet_reformat_alloc(ns, ¶ms, ns_type, &fw_pkt_reformat); + if (ret) + goto unlock; + + *id = fw_pkt_reformat.id; + *reformat_id = *id; + ret = 0; + +unlock: + mutex_unlock(lock); + + return ret; +} + static int mlx5_cmd_hws_create_match_definer(struct mlx5_flow_root_namespace *ns, u16 format_id, u32 *match_mask) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h index 8b56298288da..b92d55b2d147 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h @@ -41,6 +41,11 @@ struct mlx5_fs_hws_action { struct mlx5_fs_pool *fs_pool; struct mlx5_fs_hws_pr *pr_data; struct mlx5_fs_hws_mh *mh_data; + u32 fw_reformat_id; + /* Protect `fw_reformat_id` against being initialized from multiple + * threads. + */ + struct mutex lock; }; struct mlx5_fs_hws_matcher { @@ -84,12 +89,23 @@ void mlx5_fs_put_hws_action(struct mlx5_fs_hws_data *fs_hws_data); #ifdef CONFIG_MLX5_HW_STEERING +int +mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id); + bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev); const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void); #else +static inline int +mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) +{ + return -EOPNOTSUPP; +} + static inline bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev) { return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h index 30ccd635b505..21279d503117 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h @@ -17,6 +17,7 @@ #include "context.h" #include "table.h" #include "send.h" +#include "action_ste_pool.h" #include "rule.h" #include "cmd.h" #include "action.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index b61864b32053..f3ea09caba2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -3,25 +3,6 @@ #include "internal.h" -enum mlx5hws_matcher_rtc_type { - HWS_MATCHER_RTC_TYPE_MATCH, - HWS_MATCHER_RTC_TYPE_STE_ARRAY, - HWS_MATCHER_RTC_TYPE_MAX, -}; - -static const char * const mlx5hws_matcher_rtc_type_str[] = { - [HWS_MATCHER_RTC_TYPE_MATCH] = "MATCH", - [HWS_MATCHER_RTC_TYPE_STE_ARRAY] = "STE_ARRAY", - [HWS_MATCHER_RTC_TYPE_MAX] = "UNKNOWN", -}; - -static const char *hws_matcher_rtc_type_to_str(enum mlx5hws_matcher_rtc_type rtc_type) -{ - if (rtc_type > HWS_MATCHER_RTC_TYPE_MAX) - rtc_type = HWS_MATCHER_RTC_TYPE_MAX; - return mlx5hws_matcher_rtc_type_str[rtc_type]; -} - static bool hws_matcher_requires_col_tbl(u8 log_num_of_rules) { /* Collision table concatenation is done only for large rule tables */ @@ -42,19 +23,199 @@ static void hws_matcher_destroy_end_ft(struct mlx5hws_matcher *matcher) mlx5hws_table_destroy_default_ft(matcher->tbl, matcher->end_ft_id); } +int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl, + u32 miss_ft_id) +{ + struct mlx5hws_matcher *tmp_matcher; + + if (list_empty(&tbl->matchers_list)) + return -EINVAL; + + /* Update isolated_matcher_end_ft_id attribute for all + * the matchers in isolated table. + */ + list_for_each_entry(tmp_matcher, &tbl->matchers_list, list_node) + tmp_matcher->attr.isolated_matcher_end_ft_id = miss_ft_id; + + tmp_matcher = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + + return mlx5hws_table_ft_set_next_ft(tbl->ctx, + tmp_matcher->end_ft_id, + tbl->fw_ft_type, + miss_ft_id); +} + +static int hws_matcher_connect_end_ft_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + u32 end_ft_id; + int ret; + + /* Reset end_ft next RTCs */ + ret = mlx5hws_table_ft_set_next_rtc(tbl->ctx, + matcher->end_ft_id, + matcher->tbl->fw_ft_type, + 0, 0); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n"); + return ret; + } + + /* Connect isolated matcher's end_ft to the complex matcher's end FT */ + end_ft_id = matcher->attr.isolated_matcher_end_ft_id; + ret = mlx5hws_table_ft_set_next_ft(tbl->ctx, + matcher->end_ft_id, + matcher->tbl->fw_ft_type, + end_ft_id); + + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n"); + return ret; + } + + return 0; +} + +static int hws_matcher_create_end_ft_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + int ret; + + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + &matcher->end_ft_id); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to create end flow table\n"); + return ret; + } + + ret = hws_matcher_connect_end_ft_isolated(matcher); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to connect end FT\n"); + goto destroy_default_ft; + } + + return 0; + +destroy_default_ft: + mlx5hws_table_destroy_default_ft(tbl, matcher->end_ft_id); + return ret; +} + static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher) { struct mlx5hws_table *tbl = matcher->tbl; int ret; - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &matcher->end_ft_id); + if (mlx5hws_matcher_is_isolated(matcher)) + ret = hws_matcher_create_end_ft_isolated(matcher); + else + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, + &matcher->end_ft_id); + if (ret) { mlx5hws_err(tbl->ctx, "Failed to create matcher end flow table\n"); return ret; } + + return 0; +} + +static int hws_matcher_connect_isolated_first(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + int ret; + + /* Isolated matcher's end_ft is already pointing to the end_ft + * of the complex matcher - it was set at creation of end_ft, + * so no need to connect it. + * We still need to connect the isolated table's start FT to + * this matcher's RTC. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + matcher->match_ste.rtc_0_id, + matcher->match_ste.rtc_1_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to connect start FT to match RTC\n"); + return ret; + } + + /* Reset table's FT default miss (drop refcount) */ + ret = mlx5hws_table_ft_set_default_next_ft(tbl, tbl->ft_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to reset table ft default miss\n"); + return ret; + } + + list_add(&matcher->list_node, &tbl->matchers_list); + + return ret; +} + +static int hws_matcher_connect_isolated_last(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + struct mlx5hws_matcher *last; + int ret; + + last = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + + /* New matcher's end_ft is already pointing to the end_ft of + * the complex matcher. + * Connect previous matcher's end_ft to this new matcher RTC. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + last->end_ft_id, + tbl->fw_ft_type, + matcher->match_ste.rtc_0_id, + matcher->match_ste.rtc_1_id); + if (ret) { + mlx5hws_err(ctx, + "Isolated matcher: failed to connect matcher end_ft to new match RTC\n"); + return ret; + } + + /* Reset prev matcher FT default miss (drop refcount) */ + ret = mlx5hws_table_ft_set_default_next_ft(tbl, last->end_ft_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to reset matcher ft default miss\n"); + return ret; + } + + /* Insert after the last matcher */ + list_add(&matcher->list_node, &last->list_node); + return 0; } +static int hws_matcher_connect_isolated(struct mlx5hws_matcher *matcher) +{ + /* Isolated matcher is expected to be the only one in its table. + * However, it can have a collision matcher, and it can go through + * rehash process, in which case we will temporary have both old and + * new matchers in the isolated table. + * Check if this is the first matcher in the isolated table. + */ + if (list_empty(&matcher->tbl->matchers_list)) + return hws_matcher_connect_isolated_first(matcher); + + /* If this wasn't the first matcher, then we have 3 possible cases: + * - this is a collision matcher for the first matcher + * - this is a new rehash dest matcher + * - this is a collision matcher for the new rehash dest matcher + * The logic to add new matcher is the same for all these cases. + */ + return hws_matcher_connect_isolated_last(matcher); +} + static int hws_matcher_connect(struct mlx5hws_matcher *matcher) { struct mlx5hws_table *tbl = matcher->tbl; @@ -64,6 +225,9 @@ static int hws_matcher_connect(struct mlx5hws_matcher *matcher) struct mlx5hws_matcher *tmp_matcher; int ret; + if (mlx5hws_matcher_is_isolated(matcher)) + return hws_matcher_connect_isolated(matcher); + /* Find location in matcher list */ if (list_empty(&tbl->matchers_list)) { list_add(&matcher->list_node, &tbl->matchers_list); @@ -140,6 +304,92 @@ remove_from_list: return ret; } +static int hws_matcher_disconnect_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_matcher *first, *last, *prev, *next; + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + u32 end_ft_id; + int ret; + + first = list_first_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + last = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + prev = list_prev_entry(matcher, list_node); + next = list_next_entry(matcher, list_node); + + list_del_init(&matcher->list_node); + + if (first == last) { + /* This was the only matcher in the list. + * Reset isolated table FT next RTCs and connect it + * to the whole complex matcher end FT instead. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + 0, 0); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n"); + return ret; + } + + end_ft_id = matcher->attr.isolated_matcher_end_ft_id; + ret = mlx5hws_table_ft_set_next_ft(tbl->ctx, + tbl->ft_id, + tbl->fw_ft_type, + end_ft_id); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n"); + return ret; + } + + return 0; + } + + /* At this point we know that there are more matchers in the list */ + + if (matcher == first) { + /* We've disconnected the first matcher. + * Now update isolated table default FT. + */ + if (!next) + return -EINVAL; + return mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + next->match_ste.rtc_0_id, + next->match_ste.rtc_1_id); + } + + if (matcher == last) { + /* If we've disconnected the last matcher - update prev + * matcher's end_ft to point to the complex matcher end_ft. + */ + if (!prev) + return -EINVAL; + return hws_matcher_connect_end_ft_isolated(prev); + } + + /* This wasn't the first or the last matcher, which means that it has + * both prev and next matchers. Note that this only happens if we're + * disconnecting collision matcher of the old matcher during rehash. + */ + if (!prev || !next || + !(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) + return -EINVAL; + + /* Update prev end FT to point to next match RTC */ + return mlx5hws_table_ft_set_next_rtc(ctx, + prev->end_ft_id, + tbl->fw_ft_type, + next->match_ste.rtc_0_id, + next->match_ste.rtc_1_id); +} + static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher *next = NULL, *prev = NULL; @@ -147,6 +397,9 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) u32 prev_ft_id = tbl->ft_id; int ret; + if (mlx5hws_matcher_is_isolated(matcher)) + return hws_matcher_disconnect_isolated(matcher); + if (!list_is_first(&matcher->list_node, &tbl->matchers_list)) { prev = list_prev_entry(matcher, list_node); prev_ft_id = prev->end_ft_id; @@ -197,149 +450,98 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher, struct mlx5hws_cmd_rtc_create_attr *rtc_attr, - enum mlx5hws_matcher_rtc_type rtc_type, bool is_mirror) { - struct mlx5hws_pool_chunk *ste = &matcher->action_ste.ste; enum mlx5hws_matcher_flow_src flow_src = matcher->attr.optimize_flow_src; - bool is_match_rtc = rtc_type == HWS_MATCHER_RTC_TYPE_MATCH; if ((flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT && !is_mirror) || (flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE && is_mirror)) { /* Optimize FDB RTC */ rtc_attr->log_size = 0; rtc_attr->log_depth = 0; - } else { - /* Keep original values */ - rtc_attr->log_size = is_match_rtc ? matcher->attr.table.sz_row_log : ste->order; - rtc_attr->log_depth = is_match_rtc ? matcher->attr.table.sz_col_log : 0; } } -static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, - enum mlx5hws_matcher_rtc_type rtc_type) +static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0}; struct mlx5hws_match_template *mt = matcher->mt; struct mlx5hws_context *ctx = matcher->tbl->ctx; - struct mlx5hws_action_default_stc *default_stc; - struct mlx5hws_matcher_action_ste *action_ste; + union mlx5hws_matcher_size *size_rx, *size_tx; struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool *ste_pool, *stc_pool; - struct mlx5hws_pool_chunk *ste; - u32 *rtc_0_id, *rtc_1_id; u32 obj_id; int ret; - switch (rtc_type) { - case HWS_MATCHER_RTC_TYPE_MATCH: - rtc_0_id = &matcher->match_ste.rtc_0_id; - rtc_1_id = &matcher->match_ste.rtc_1_id; - ste_pool = matcher->match_ste.pool; - ste = &matcher->match_ste.ste; - ste->order = attr->table.sz_col_log + attr->table.sz_row_log; - - rtc_attr.log_size = attr->table.sz_row_log; - rtc_attr.log_depth = attr->table.sz_col_log; - rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt); - rtc_attr.is_scnd_range = 0; - rtc_attr.miss_ft_id = matcher->end_ft_id; - - if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { - /* The usual Hash Table */ - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH; - - /* The first mt is used since all share the same definer */ - rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); - } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) { - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; - rtc_attr.num_hash_definer = 1; - - if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) { - /* Hash Split Table */ - rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH; - rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); - } else if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) { - /* Linear Lookup Table */ - rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR; - rtc_attr.match_definer_0 = ctx->caps->linear_match_definer; - } - } - - /* Match pool requires implicit allocation */ - ret = mlx5hws_pool_chunk_alloc(ste_pool, ste); - if (ret) { - mlx5hws_err(ctx, "Failed to allocate STE for %s RTC", - hws_matcher_rtc_type_to_str(rtc_type)); - return ret; + size_rx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; + + rtc_attr.log_size = size_rx->table.sz_row_log; + rtc_attr.log_depth = size_rx->table.sz_col_log; + rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt); + rtc_attr.is_scnd_range = 0; + rtc_attr.miss_ft_id = matcher->end_ft_id; + + if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { + /* The usual Hash Table */ + rtc_attr.update_index_mode = + MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH; + + /* The first mt is used since all share the same definer */ + rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); + } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) { + rtc_attr.update_index_mode = + MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; + rtc_attr.num_hash_definer = 1; + + if (attr->distribute_mode == + MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) { + /* Hash Split Table */ + rtc_attr.access_index_mode = + MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH; + rtc_attr.match_definer_0 = + mlx5hws_definer_get_id(mt->definer); + } else if (attr->distribute_mode == + MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) { + /* Linear Lookup Table */ + rtc_attr.access_index_mode = + MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR; + rtc_attr.match_definer_0 = + ctx->caps->linear_match_definer; } - break; - - case HWS_MATCHER_RTC_TYPE_STE_ARRAY: - action_ste = &matcher->action_ste; - - rtc_0_id = &action_ste->rtc_0_id; - rtc_1_id = &action_ste->rtc_1_id; - ste_pool = action_ste->pool; - ste = &action_ste->ste; - /* Action RTC size calculation: - * log((max number of rules in matcher) * - * (max number of action STEs per rule) * - * (2 to support writing new STEs for update rule)) - */ - ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - attr->table.sz_row_log + - MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT; - rtc_attr.log_size = ste->order; - rtc_attr.log_depth = 0; - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; - /* The action STEs use the default always hit definer */ - rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer; - rtc_attr.is_frst_jumbo = false; - rtc_attr.miss_ft_id = 0; - break; - - default: - mlx5hws_err(ctx, "HWS Invalid RTC type\n"); - return -EINVAL; } - obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); - rtc_attr.pd = ctx->pd_num; - rtc_attr.ste_base = obj_id; - rtc_attr.ste_offset = ste->offset; + rtc_attr.ste_base = matcher->match_ste.ste_0_base; rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, false); - hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false); + hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, false); /* STC is a single resource (obj_id), use any STC for the ID */ - stc_pool = ctx->stc_pool; - default_stc = ctx->common_res.default_stc; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_id(ctx->stc_pool); rtc_attr.stc_base = obj_id; - ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id); + ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, + &matcher->match_ste.rtc_0_id); if (ret) { - mlx5hws_err(ctx, "Failed to create matcher RTC of type %s", - hws_matcher_rtc_type_to_str(rtc_type)); - goto free_ste; + mlx5hws_err(ctx, "Failed to create matcher RTC\n"); + return ret; } if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); - rtc_attr.ste_base = obj_id; + rtc_attr.log_size = size_tx->table.sz_row_log; + rtc_attr.log_depth = size_tx->table.sz_col_log; + rtc_attr.ste_base = matcher->match_ste.ste_1_base; rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true); - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_mirror_id(ctx->stc_pool); rtc_attr.stc_base = obj_id; - hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, true); + hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, true); - ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id); + ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, + &matcher->match_ste.rtc_1_id); if (ret) { - mlx5hws_err(ctx, "Failed to create peer matcher RTC of type %s", - hws_matcher_rtc_type_to_str(rtc_type)); + mlx5hws_err(ctx, "Failed to create mirror matcher RTC\n"); goto destroy_rtc_0; } } @@ -347,46 +549,18 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, return 0; destroy_rtc_0: - mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id); -free_ste: - if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH) - mlx5hws_pool_chunk_free(ste_pool, ste); + mlx5hws_cmd_rtc_destroy(ctx->mdev, matcher->match_ste.rtc_0_id); return ret; } -static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher, - enum mlx5hws_matcher_rtc_type rtc_type) +static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher) { - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool_chunk *ste; - struct mlx5hws_pool *ste_pool; - u32 rtc_0_id, rtc_1_id; - - switch (rtc_type) { - case HWS_MATCHER_RTC_TYPE_MATCH: - rtc_0_id = matcher->match_ste.rtc_0_id; - rtc_1_id = matcher->match_ste.rtc_1_id; - ste_pool = matcher->match_ste.pool; - ste = &matcher->match_ste.ste; - break; - case HWS_MATCHER_RTC_TYPE_STE_ARRAY: - action_ste = &matcher->action_ste; - rtc_0_id = action_ste->rtc_0_id; - rtc_1_id = action_ste->rtc_1_id; - ste_pool = action_ste->pool; - ste = &action_ste->ste; - break; - default: - return; - } + struct mlx5_core_dev *mdev = matcher->tbl->ctx->mdev; - if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_1_id); + if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) + mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_1_id); - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_0_id); - if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH) - mlx5hws_pool_chunk_free(ste_pool, ste); + mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_0_id); } static int @@ -394,43 +568,38 @@ hws_matcher_check_attr_sz(struct mlx5hws_cmd_query_caps *caps, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; + struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size; + int i; - if (attr->table.sz_col_log > caps->rtc_log_depth_max) { - mlx5hws_err(matcher->tbl->ctx, "Matcher depth exceeds limit %d\n", - caps->rtc_log_depth_max); - return -EOPNOTSUPP; - } + for (i = 0; i < 2; i++) { + size = &attr->size[i]; - if (attr->table.sz_col_log + attr->table.sz_row_log > caps->ste_alloc_log_max) { - mlx5hws_err(matcher->tbl->ctx, "Total matcher size exceeds limit %d\n", - caps->ste_alloc_log_max); - return -EOPNOTSUPP; - } + if (size->table.sz_col_log > caps->rtc_log_depth_max) { + mlx5hws_err(ctx, "Matcher depth exceeds limit %d\n", + caps->rtc_log_depth_max); + return -EOPNOTSUPP; + } - if (attr->table.sz_col_log + attr->table.sz_row_log < caps->ste_alloc_log_gran) { - mlx5hws_err(matcher->tbl->ctx, "Total matcher size below limit %d\n", - caps->ste_alloc_log_gran); - return -EOPNOTSUPP; + if (size->table.sz_col_log + size->table.sz_row_log > + caps->ste_alloc_log_max) { + mlx5hws_err(ctx, + "Total matcher size exceeds limit %d\n", + caps->ste_alloc_log_max); + return -EOPNOTSUPP; + } + + if (size->table.sz_col_log + size->table.sz_row_log < + caps->ste_alloc_log_gran) { + mlx5hws_err(ctx, "Total matcher size below limit %d\n", + caps->ste_alloc_log_gran); + return -EOPNOTSUPP; + } } return 0; } -static void hws_matcher_set_pool_attr(struct mlx5hws_pool_attr *attr, - struct mlx5hws_matcher *matcher) -{ - switch (matcher->attr.optimize_flow_src) { - case MLX5HWS_MATCHER_FLOW_SRC_VPORT: - attr->opt_type = MLX5HWS_POOL_OPTIMIZE_ORIG; - break; - case MLX5HWS_MATCHER_FLOW_SRC_WIRE: - attr->opt_type = MLX5HWS_POOL_OPTIMIZE_MIRROR; - break; - default: - break; - } -} - static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher, struct mlx5hws_action_template *at) { @@ -454,85 +623,17 @@ static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher, return 0; } -static int hws_matcher_resize_init(struct mlx5hws_matcher *src_matcher) -{ - struct mlx5hws_matcher_resize_data *resize_data; - - resize_data = kzalloc(sizeof(*resize_data), GFP_KERNEL); - if (!resize_data) - return -ENOMEM; - - resize_data->max_stes = src_matcher->action_ste.max_stes; - - resize_data->stc = src_matcher->action_ste.stc; - resize_data->rtc_0_id = src_matcher->action_ste.rtc_0_id; - resize_data->rtc_1_id = src_matcher->action_ste.rtc_1_id; - resize_data->pool = src_matcher->action_ste.max_stes ? - src_matcher->action_ste.pool : NULL; - - /* Place the new resized matcher on the dst matcher's list */ - list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data); - - /* Move all the previous resized matchers to the dst matcher's list */ - while (!list_empty(&src_matcher->resize_data)) { - resize_data = list_first_entry(&src_matcher->resize_data, - struct mlx5hws_matcher_resize_data, - list_node); - list_del_init(&resize_data->list_node); - list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data); - } - - return 0; -} - -static void hws_matcher_resize_uninit(struct mlx5hws_matcher *matcher) -{ - struct mlx5hws_matcher_resize_data *resize_data; - - if (!mlx5hws_matcher_is_resizable(matcher)) - return; - - while (!list_empty(&matcher->resize_data)) { - resize_data = list_first_entry(&matcher->resize_data, - struct mlx5hws_matcher_resize_data, - list_node); - list_del_init(&resize_data->list_node); - - if (resize_data->max_stes) { - mlx5hws_action_free_single_stc(matcher->tbl->ctx, - matcher->tbl->type, - &resize_data->stc); - - if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->rtc_1_id); - - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->rtc_0_id); - - if (resize_data->pool) - mlx5hws_pool_destroy(resize_data->pool); - } - - kfree(resize_data); - } -} - static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); - struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool_attr pool_attr = {0}; - struct mlx5hws_context *ctx = tbl->ctx; - u32 required_stes; - u8 max_stes = 0; + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 required_stes, max_stes; int i, ret; if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION) return 0; + max_stes = 0; for (i = 0; i < matcher->num_of_at; i++) { struct mlx5hws_action_template *at = &matcher->at[i]; @@ -548,81 +649,40 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher) /* Future: Optimize reparse */ } - /* There are no additional STEs required for matcher */ - if (!max_stes) - return 0; - - matcher->action_ste.max_stes = max_stes; - - action_ste = &matcher->action_ste; - - /* Allocate action STE mempool */ - pool_attr.table_type = tbl->type; - pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL; - /* Pool size is similar to action RTC size */ - pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - matcher->attr.table.sz_row_log + - MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT; - hws_matcher_set_pool_attr(&pool_attr, matcher); - action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr); - if (!action_ste->pool) { - mlx5hws_err(ctx, "Failed to create action ste pool\n"); - return -EINVAL; - } - - /* Allocate action RTC */ - ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY); - if (ret) { - mlx5hws_err(ctx, "Failed to create action RTC\n"); - goto free_ste_pool; - } - - /* Allocate STC for jumps to STE */ - stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; - stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; - stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - stc_attr.ste_table.ste = action_ste->ste; - stc_attr.ste_table.ste_pool = action_ste->pool; - stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; - - ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl->type, - &action_ste->stc); - if (ret) { - mlx5hws_err(ctx, "Failed to create action jump to table STC\n"); - goto free_rtc; - } + matcher->num_of_action_stes = max_stes; return 0; - -free_rtc: - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY); -free_ste_pool: - mlx5hws_pool_destroy(action_ste->pool); - return ret; } -static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher) +static void hws_matcher_set_ip_version_match(struct mlx5hws_matcher *matcher) { - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - - action_ste = &matcher->action_ste; - - if (!action_ste->max_stes || - matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION || - mlx5hws_matcher_is_in_resize(matcher)) - return; + int i; - mlx5hws_action_free_single_stc(tbl->ctx, tbl->type, &action_ste->stc); - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY); - mlx5hws_pool_destroy(action_ste->pool); + for (i = 0; i < matcher->mt->fc_sz; i++) { + switch (matcher->mt->fc[i].fname) { + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O: + matcher->matches_outer_ethertype = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O: + matcher->matches_outer_ip_version = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I: + matcher->matches_inner_ethertype = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I: + matcher->matches_inner_ip_version = 1; + break; + default: + break; + } + } } static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) { + struct mlx5hws_cmd_ste_create_attr ste_attr = {}; struct mlx5hws_context *ctx = matcher->tbl->ctx; - struct mlx5hws_pool_attr pool_attr = {0}; + union mlx5hws_matcher_size *size; int ret; /* Calculate match, range and hash definers */ @@ -635,23 +695,41 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) } } - /* Create an STE pool per matcher*/ - pool_attr.table_type = matcher->tbl->type; - pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL; - pool_attr.alloc_log_sz = matcher->attr.table.sz_col_log + - matcher->attr.table.sz_row_log; - hws_matcher_set_pool_attr(&pool_attr, matcher); - - matcher->match_ste.pool = mlx5hws_pool_create(ctx, &pool_attr); - if (!matcher->match_ste.pool) { - mlx5hws_err(ctx, "Failed to allocate matcher STE pool\n"); - ret = -EOPNOTSUPP; + hws_matcher_set_ip_version_match(matcher); + + /* Create an STE range each for RX and TX. */ + ste_attr.table_type = FS_FT_FDB_RX; + size = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + ste_attr.log_obj_range = + matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_VPORT ? + 0 : size->table.sz_col_log + size->table.sz_row_log; + + ret = mlx5hws_cmd_ste_create(ctx->mdev, &ste_attr, + &matcher->match_ste.ste_0_base); + if (ret) { + mlx5hws_err(ctx, "Failed to allocate RX STE range (%d)\n", ret); goto uninit_match_definer; } + ste_attr.table_type = FS_FT_FDB_TX; + size = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; + ste_attr.log_obj_range = + matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_WIRE ? + 0 : size->table.sz_col_log + size->table.sz_row_log; + + ret = mlx5hws_cmd_ste_create(ctx->mdev, &ste_attr, + &matcher->match_ste.ste_1_base); + if (ret) { + mlx5hws_err(ctx, "Failed to allocate TX STE range (%d)\n", ret); + goto destroy_rx_ste_range; + } + return 0; +destroy_rx_ste_range: + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_0_base); uninit_match_definer: if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) mlx5hws_definer_mt_uninit(ctx, matcher->mt); @@ -660,9 +738,12 @@ uninit_match_definer: static void hws_matcher_unbind_mt(struct mlx5hws_matcher *matcher) { - mlx5hws_pool_destroy(matcher->match_ste.pool); + struct mlx5hws_context *ctx = matcher->tbl->ctx; + + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_1_base); + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_0_base); if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) - mlx5hws_definer_mt_uninit(matcher->tbl->ctx, matcher->mt); + mlx5hws_definer_mt_uninit(ctx, matcher->mt); } static int @@ -671,6 +752,10 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, { struct mlx5hws_matcher_attr *attr = &matcher->attr; struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size_rx, *size_tx; + + size_rx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; switch (attr->insert_mode) { case MLX5HWS_MATCHER_INSERT_BY_HASH: @@ -681,7 +766,7 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, break; case MLX5HWS_MATCHER_INSERT_BY_INDEX: - if (attr->table.sz_col_log) { + if (size_rx->table.sz_col_log || size_tx->table.sz_col_log) { mlx5hws_err(ctx, "Matcher with INSERT_BY_INDEX supports only Nx1 table size\n"); return -EOPNOTSUPP; } @@ -701,7 +786,10 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, return -EOPNOTSUPP; } - if (attr->table.sz_row_log > MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX) { + if (size_rx->table.sz_row_log > + MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX || + size_tx->table.sz_row_log > + MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX) { mlx5hws_err(ctx, "Matcher with linear distribute: rows exceed limit %d", MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX); return -EOPNOTSUPP; @@ -725,6 +813,10 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; + union mlx5hws_matcher_size *size_rx, *size_tx; + + size_rx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; if (hws_matcher_validate_insert_mode(caps, matcher)) return -EOPNOTSUPP; @@ -736,10 +828,16 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps, /* Convert number of rules to the required depth */ if (attr->mode == MLX5HWS_MATCHER_RESOURCE_MODE_RULE && - attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) - attr->table.sz_col_log = hws_matcher_rules_to_tbl_depth(attr->rule.num_log); + attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { + size_rx->table.sz_col_log = + hws_matcher_rules_to_tbl_depth(size_rx->rule.num_log); + size_tx->table.sz_col_log = + hws_matcher_rules_to_tbl_depth(size_tx->rule.num_log); + } matcher->flags |= attr->resizable ? MLX5HWS_MATCHER_FLAGS_RESIZABLE : 0; + matcher->flags |= attr->isolated_matcher_end_ft_id ? + MLX5HWS_MATCHER_FLAGS_ISOLATED : 0; return hws_matcher_check_attr_sz(caps, matcher); } @@ -761,10 +859,10 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher) /* Create matcher end flow table anchor */ ret = hws_matcher_create_end_ft(matcher); if (ret) - goto unbind_at; + goto unbind_mt; /* Allocate the RTC for the new matcher */ - ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH); + ret = hws_matcher_create_rtc(matcher); if (ret) goto destroy_end_ft; @@ -776,11 +874,9 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher) return 0; destroy_rtc: - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH); + hws_matcher_destroy_rtc(matcher); destroy_end_ft: hws_matcher_destroy_end_ft(matcher); -unbind_at: - hws_matcher_unbind_at(matcher); unbind_mt: hws_matcher_unbind_mt(matcher); return ret; @@ -788,11 +884,9 @@ unbind_mt: static void hws_matcher_destroy_and_disconnect(struct mlx5hws_matcher *matcher) { - hws_matcher_resize_uninit(matcher); hws_matcher_disconnect(matcher); - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH); + hws_matcher_destroy_rtc(matcher); hws_matcher_destroy_end_ft(matcher); - hws_matcher_unbind_at(matcher); hws_matcher_unbind_mt(matcher); } @@ -800,22 +894,25 @@ static int hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) { struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size_rx, *size_tx; struct mlx5hws_matcher *col_matcher; - int ret; + int i, ret; + + size_rx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; if (matcher->attr.mode != MLX5HWS_MATCHER_RESOURCE_MODE_RULE || matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) return 0; - if (!hws_matcher_requires_col_tbl(matcher->attr.rule.num_log)) + if (!hws_matcher_requires_col_tbl(size_rx->rule.num_log) && + !hws_matcher_requires_col_tbl(size_tx->rule.num_log)) return 0; col_matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); if (!col_matcher) return -ENOMEM; - INIT_LIST_HEAD(&col_matcher->resize_data); - col_matcher->tbl = matcher->tbl; col_matcher->mt = matcher->mt; col_matcher->at = matcher->at; @@ -826,12 +923,20 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) col_matcher->flags |= MLX5HWS_MATCHER_FLAGS_COLLISION; col_matcher->attr.mode = MLX5HWS_MATCHER_RESOURCE_MODE_HTABLE; col_matcher->attr.optimize_flow_src = matcher->attr.optimize_flow_src; - col_matcher->attr.table.sz_row_log = matcher->attr.rule.num_log; - col_matcher->attr.table.sz_col_log = MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH; - if (col_matcher->attr.table.sz_row_log > MLX5HWS_MATCHER_ASSURED_ROW_RATIO) - col_matcher->attr.table.sz_row_log -= MLX5HWS_MATCHER_ASSURED_ROW_RATIO; + for (i = 0; i < 2; i++) { + union mlx5hws_matcher_size *dst = &col_matcher->attr.size[i]; + union mlx5hws_matcher_size *src = &matcher->attr.size[i]; + + dst->table.sz_row_log = src->rule.num_log; + dst->table.sz_col_log = MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH; + if (dst->table.sz_row_log > MLX5HWS_MATCHER_ASSURED_ROW_RATIO) + dst->table.sz_row_log -= + MLX5HWS_MATCHER_ASSURED_ROW_RATIO; + } col_matcher->attr.max_num_of_at_attach = matcher->attr.max_num_of_at_attach; + col_matcher->attr.isolated_matcher_end_ft_id = + matcher->attr.isolated_matcher_end_ft_id; ret = hws_matcher_process_attr(ctx->caps, col_matcher); if (ret) @@ -869,8 +974,6 @@ static int hws_matcher_init(struct mlx5hws_matcher *matcher) struct mlx5hws_context *ctx = matcher->tbl->ctx; int ret; - INIT_LIST_HEAD(&matcher->resize_data); - mutex_lock(&ctx->ctrl_lock); /* Allocate matcher resource and connect to the packet pipe */ @@ -905,18 +1008,44 @@ static int hws_matcher_uninit(struct mlx5hws_matcher *matcher) return 0; } +static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher) +{ + void *p; + + if (matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + + matcher->size_of_at_array *= 2; + p = krealloc(matcher->at, + matcher->size_of_at_array * sizeof(*matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + matcher->at = p; + + return 0; +} + int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, struct mlx5hws_action_template *at) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); - struct mlx5hws_context *ctx = matcher->tbl->ctx; u32 required_stes; int ret; - if (!matcher->attr.max_num_of_at_attach) { - mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n", - matcher->num_of_at); - return -EOPNOTSUPP; + if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) { + ret = hws_matcher_grow_at_array(matcher); + if (ret) + return ret; + + if (matcher->col_matcher) { + ret = hws_matcher_grow_at_array(matcher->col_matcher); + if (ret) + return ret; + } } ret = hws_matcher_check_and_process_at(matcher, at); @@ -924,15 +1053,11 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, return ret; required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term); - if (matcher->action_ste.max_stes < required_stes) { - mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n", - required_stes, matcher->action_ste.max_stes); - return -ENOMEM; - } + if (matcher->num_of_action_stes < required_stes) + matcher->num_of_action_stes = required_stes; matcher->at[matcher->num_of_at] = *at; matcher->num_of_at += 1; - matcher->attr.max_num_of_at_attach -= 1; if (matcher->col_matcher) matcher->col_matcher->num_of_at = matcher->num_of_at; @@ -960,8 +1085,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher, if (!matcher->mt) return -ENOMEM; - matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach, - sizeof(*matcher->at), + matcher->size_of_at_array = + num_of_at + matcher->attr.max_num_of_at_attach; + matcher->at = kvcalloc(matcher->size_of_at_array, sizeof(*matcher->at), GFP_KERNEL); if (!matcher->at) { mlx5hws_err(ctx, "Failed to allocate action template array\n"); @@ -1110,7 +1236,7 @@ static int hws_matcher_resize_precheck(struct mlx5hws_matcher *src_matcher, return -EINVAL; } - if (src_matcher->action_ste.max_stes > dst_matcher->action_ste.max_stes) { + if (src_matcher->num_of_action_stes > dst_matcher->num_of_action_stes) { mlx5hws_err(ctx, "Src/dst matcher max STEs mismatch\n"); return -EINVAL; } @@ -1139,10 +1265,6 @@ int mlx5hws_matcher_resize_set_target(struct mlx5hws_matcher *src_matcher, src_matcher->resize_dst = dst_matcher; - ret = hws_matcher_resize_init(src_matcher); - if (ret) - src_matcher->resize_dst = NULL; - out: mutex_unlock(&src_matcher->tbl->ctx->ctrl_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h index 020de70270c5..ae20bcebfdde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h @@ -23,6 +23,9 @@ */ #define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1 +/* Maximum number of action templates that can be attached to a matcher. */ +#define MLX5HWS_MATCHER_MAX_AT 128 + enum mlx5hws_matcher_offset { MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12, MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13, @@ -31,6 +34,7 @@ enum mlx5hws_matcher_offset { enum mlx5hws_matcher_flags { MLX5HWS_MATCHER_FLAGS_COLLISION = 1 << 2, MLX5HWS_MATCHER_FLAGS_RESIZABLE = 1 << 3, + MLX5HWS_MATCHER_FLAGS_ISOLATED = 1 << 4, }; struct mlx5hws_match_template { @@ -42,28 +46,16 @@ struct mlx5hws_match_template { }; struct mlx5hws_matcher_match_ste { - struct mlx5hws_pool_chunk ste; - u32 rtc_0_id; - u32 rtc_1_id; - struct mlx5hws_pool *pool; -}; - -struct mlx5hws_matcher_action_ste { - struct mlx5hws_pool_chunk ste; - struct mlx5hws_pool_chunk stc; u32 rtc_0_id; u32 rtc_1_id; - struct mlx5hws_pool *pool; - u8 max_stes; + u32 ste_0_base; + u32 ste_1_base; }; -struct mlx5hws_matcher_resize_data { - struct mlx5hws_pool_chunk stc; - u32 rtc_0_id; - u32 rtc_1_id; - struct mlx5hws_pool *pool; - u8 max_stes; - struct list_head list_node; +enum { + MLX5HWS_MATCHER_IPV_UNSET = 0, + MLX5HWS_MATCHER_IPV_4 = 1, + MLX5HWS_MATCHER_IPV_6 = 2, }; struct mlx5hws_matcher { @@ -72,16 +64,22 @@ struct mlx5hws_matcher { struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at; u8 num_of_at; + u8 size_of_at_array; u8 num_of_mt; + u8 num_of_action_stes; /* enum mlx5hws_matcher_flags */ u8 flags; + u8 matches_outer_ethertype:1; + u8 matches_outer_ip_version:1; + u8 matches_inner_ethertype:1; + u8 matches_inner_ip_version:1; + u8 outer_ip_version:2; + u8 inner_ip_version:2; u32 end_ft_id; struct mlx5hws_matcher *col_matcher; struct mlx5hws_matcher *resize_dst; struct mlx5hws_matcher_match_ste match_ste; - struct mlx5hws_matcher_action_ste action_ste; struct list_head list_node; - struct list_head resize_data; }; static inline bool @@ -100,9 +98,17 @@ static inline bool mlx5hws_matcher_is_in_resize(struct mlx5hws_matcher *matcher) return !!matcher->resize_dst; } +static inline bool mlx5hws_matcher_is_isolated(struct mlx5hws_matcher *matcher) +{ + return !!(matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED); +} + static inline bool mlx5hws_matcher_is_insert_by_idx(struct mlx5hws_matcher *matcher) { return matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX; } +int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl, + u32 miss_ft_id); + #endif /* HWS_MATCHER_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 5121951f2778..59c14745ed0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -93,6 +93,23 @@ enum mlx5hws_matcher_distribute_mode { MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR = 0x1, }; +enum mlx5hws_matcher_size_type { + MLX5HWS_MATCHER_SIZE_TYPE_RX, + MLX5HWS_MATCHER_SIZE_TYPE_TX, + MLX5HWS_MATCHER_SIZE_TYPE_MAX, +}; + +union mlx5hws_matcher_size { + struct { + u8 sz_row_log; + u8 sz_col_log; + } table; + + struct { + u8 num_log; + } rule; +}; + struct mlx5hws_matcher_attr { /* Processing priority inside table */ u32 priority; @@ -107,18 +124,11 @@ struct mlx5hws_matcher_attr { enum mlx5hws_matcher_distribute_mode distribute_mode; /* Define whether the created matcher supports resizing into a bigger matcher */ bool resizable; - union { - struct { - u8 sz_row_log; - u8 sz_col_log; - } table; - - struct { - u8 num_log; - } rule; - }; + union mlx5hws_matcher_size size[MLX5HWS_MATCHER_SIZE_TYPE_MAX]; /* Optional AT attach configuration - Max number of additional AT */ u8 max_num_of_at_attach; + /* Optional end FT (miss FT ID) for match RTC (for isolated matcher) */ + u32 isolated_matcher_end_ft_id; }; struct mlx5hws_rule_attr { @@ -211,6 +221,7 @@ struct mlx5hws_action_dest_attr { struct mlx5hws_action *dest; /* Optional reformat action */ struct mlx5hws_action *reformat; + bool is_wire_ft; }; /** @@ -502,6 +513,15 @@ enum mlx5hws_action_type mlx5hws_action_get_type(struct mlx5hws_action *action); /** + * mlx5hws_action_get_dev - Get mlx5 core device. + * + * @action: The action to get the device from. + * + * Return: mlx5 core device. + */ +struct mlx5_core_dev *mlx5hws_action_get_dev(struct mlx5hws_action *action); + +/** * mlx5hws_action_create_dest_drop - Create a direct rule drop action. * * @ctx: The context in which the new action will be created. @@ -715,18 +735,14 @@ mlx5hws_action_create_push_vlan(struct mlx5hws_context *ctx, u32 flags); * @dests: The destination array. Each contains a destination action and can * have additional actions. * @ignore_flow_level: Whether to turn on 'ignore_flow_level' for this dest. - * @flow_source: Source port of the traffic for this actions. * @flags: Action creation flags (enum mlx5hws_action_flags). * * Return: pointer to mlx5hws_action on success NULL otherwise. */ struct mlx5hws_action * -mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, - size_t num_dest, +mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, - u32 flow_source, - u32 flags); + bool ignore_flow_level, u32 flags); /** * mlx5hws_action_create_insert_header - Create insert header action. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index f51ed24526b9..51e4c551e0ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -490,8 +490,8 @@ hws_action_modify_get_target_fields(u8 action_type, __be64 *pattern, switch (action_type) { case MLX5_ACTION_TYPE_SET: case MLX5_ACTION_TYPE_ADD: - *src_field = MLX5_GET(set_action_in, pattern, field); - *dst_field = INVALID_FIELD; + *src_field = INVALID_FIELD; + *dst_field = MLX5_GET(set_action_in, pattern, field); break; case MLX5_ACTION_TYPE_COPY: *src_field = MLX5_GET(copy_action_in, pattern, src_field); @@ -522,57 +522,59 @@ bool mlx5hws_pat_verify_actions(struct mlx5hws_context *ctx, __be64 pattern[], s return true; } -void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions, - size_t max_actions, size_t *new_size, - u32 *nope_location, __be64 *new_pat) +int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, + size_t max_actions, size_t *new_size, + u32 *nop_locations, __be64 *new_pat) { - u16 prev_src_field = 0, prev_dst_field = 0; + u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD; u16 src_field, dst_field; u8 action_type; + bool dependent; size_t i, j; *new_size = num_actions; - *nope_location = 0; + *nop_locations = 0; if (num_actions == 1) - return; + return 0; for (i = 0, j = 0; i < num_actions; i++, j++) { - action_type = MLX5_GET(set_action_in, &pattern[i], action_type); + if (j >= max_actions) + return -EINVAL; + action_type = MLX5_GET(set_action_in, &pattern[i], action_type); hws_action_modify_get_target_fields(action_type, &pattern[i], &src_field, &dst_field); - if (i % 2) { - if (action_type == MLX5_ACTION_TYPE_COPY && - (prev_src_field == src_field || - prev_dst_field == dst_field)) { - /* need Nope */ - *new_size += 1; - *nope_location |= BIT(i); - memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE); - MLX5_SET(set_action_in, &new_pat[j], - action_type, - MLX5_MODIFICATION_TYPE_NOP); - j++; - } else if (prev_src_field == src_field) { - /* need Nope*/ - *new_size += 1; - *nope_location |= BIT(i); - MLX5_SET(set_action_in, &new_pat[j], - action_type, - MLX5_MODIFICATION_TYPE_NOP); - j++; - } - } - memcpy(&new_pat[j], &pattern[i], MLX5HWS_MODIFY_ACTION_SIZE); - /* check if no more space */ - if (j > max_actions) { - *new_size = num_actions; - *nope_location = 0; - return; + + /* For every action, look at it and the previous one. The two + * actions are dependent if: + */ + dependent = + (i > 0) && + /* At least one of the actions is a write and */ + (dst_field != INVALID_FIELD || + prev_dst_field != INVALID_FIELD) && + /* One reads from the other's source */ + (dst_field == prev_src_field || + src_field == prev_dst_field || + /* Or both write to the same destination */ + dst_field == prev_dst_field); + + if (dependent) { + *new_size += 1; + *nop_locations |= BIT(i); + memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE); + MLX5_SET(set_action_in, &new_pat[j], action_type, + MLX5_MODIFICATION_TYPE_NOP); + j++; + if (j >= max_actions) + return -EINVAL; } + memcpy(&new_pat[j], &pattern[i], MLX5HWS_MODIFY_ACTION_SIZE); prev_src_field = src_field; prev_dst_field = dst_field; } + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h index 8ddb51980044..7fbd8dc7aa18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h @@ -96,6 +96,7 @@ int mlx5hws_arg_write_inline_arg_data(struct mlx5hws_context *ctx, u8 *arg_data, size_t data_size); -void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions, size_t max_actions, - size_t *new_size, u32 *nope_location, __be64 *new_pat); +int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, + size_t max_actions, size_t *new_size, + u32 *nop_locations, __be64 *new_pat); #endif /* MLX5HWS_PAT_ARG_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c index 50a81d360bb2..7e37d6e9eb83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c @@ -20,15 +20,14 @@ static void hws_pool_free_one_resource(struct mlx5hws_pool_resource *resource) kfree(resource); } -static void hws_pool_resource_free(struct mlx5hws_pool *pool, - int resource_idx) +static void hws_pool_resource_free(struct mlx5hws_pool *pool) { - hws_pool_free_one_resource(pool->resource[resource_idx]); - pool->resource[resource_idx] = NULL; + hws_pool_free_one_resource(pool->resource); + pool->resource = NULL; if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) { - hws_pool_free_one_resource(pool->mirror_resource[resource_idx]); - pool->mirror_resource[resource_idx] = NULL; + hws_pool_free_one_resource(pool->mirror_resource); + pool->mirror_resource = NULL; } } @@ -61,10 +60,8 @@ hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range, ret = -EINVAL; } - if (ret) { - mlx5hws_err(pool->ctx, "Failed to allocate resource objects\n"); + if (ret) goto free_resource; - } resource->pool = pool; resource->range = 1 << log_range; @@ -77,199 +74,114 @@ free_resource: return NULL; } -static int -hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx) +static int hws_pool_resource_alloc(struct mlx5hws_pool *pool) { struct mlx5hws_pool_resource *resource; u32 fw_ft_type, opt_log_range; fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, false); - opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_range; + opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? + 0 : pool->alloc_log_sz; resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type); if (!resource) { - mlx5hws_err(pool->ctx, "Failed allocating resource\n"); + mlx5hws_err(pool->ctx, "Failed to allocate resource\n"); return -EINVAL; } - pool->resource[idx] = resource; + pool->resource = resource; if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) { struct mlx5hws_pool_resource *mirror_resource; fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, true); - opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_range; + opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? + 0 : pool->alloc_log_sz; mirror_resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type); if (!mirror_resource) { - mlx5hws_err(pool->ctx, "Failed allocating mirrored resource\n"); + mlx5hws_err(pool->ctx, "Failed to allocate mirrored resource\n"); hws_pool_free_one_resource(resource); - pool->resource[idx] = NULL; + pool->resource = NULL; return -EINVAL; } - pool->mirror_resource[idx] = mirror_resource; + pool->mirror_resource = mirror_resource; } return 0; } -static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range) -{ - unsigned long *cur_bmp; - - cur_bmp = bitmap_zalloc(1 << log_range, GFP_KERNEL); - if (!cur_bmp) - return NULL; - - bitmap_fill(cur_bmp, 1 << log_range); - - return cur_bmp; -} - -static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static int hws_pool_buddy_init(struct mlx5hws_pool *pool) { struct mlx5hws_buddy_mem *buddy; - buddy = pool->db.buddy_manager->buddies[chunk->resource_idx]; + buddy = mlx5hws_buddy_create(pool->alloc_log_sz); if (!buddy) { - mlx5hws_err(pool->ctx, "No such buddy (%d)\n", chunk->resource_idx); - return; - } - - mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order); -} - -static struct mlx5hws_buddy_mem * -hws_pool_buddy_get_next_buddy(struct mlx5hws_pool *pool, int idx, - u32 order, bool *is_new_buddy) -{ - static struct mlx5hws_buddy_mem *buddy; - u32 new_buddy_size; - - buddy = pool->db.buddy_manager->buddies[idx]; - if (buddy) - return buddy; - - new_buddy_size = max(pool->alloc_log_sz, order); - *is_new_buddy = true; - buddy = mlx5hws_buddy_create(new_buddy_size); - if (!buddy) { - mlx5hws_err(pool->ctx, "Failed to create buddy order: %d index: %d\n", - new_buddy_size, idx); - return NULL; + mlx5hws_err(pool->ctx, "Failed to create buddy order: %zu\n", + pool->alloc_log_sz); + return -ENOMEM; } - if (hws_pool_resource_alloc(pool, new_buddy_size, idx) != 0) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, new_buddy_size, idx); + if (hws_pool_resource_alloc(pool) != 0) { + mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n", + pool->type, pool->alloc_log_sz); mlx5hws_buddy_cleanup(buddy); - return NULL; + return -ENOMEM; } - pool->db.buddy_manager->buddies[idx] = buddy; + pool->db.buddy = buddy; - return buddy; + return 0; } -static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool, - int order, - u32 *buddy_idx, - int *seg) +static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - struct mlx5hws_buddy_mem *buddy; - bool new_mem = false; - int ret = 0; - int i; - - *seg = -1; - - /* Find the next free place from the buddy array */ - while (*seg < 0) { - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - buddy = hws_pool_buddy_get_next_buddy(pool, i, - order, - &new_mem); - if (!buddy) { - ret = -ENOMEM; - goto out; - } - - *seg = mlx5hws_buddy_alloc_mem(buddy, order); - if (*seg >= 0) - goto found; - - if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) { - mlx5hws_err(pool->ctx, - "Fail to allocate seg for one resource pool\n"); - ret = -ENOMEM; - goto out; - } - - if (new_mem) { - /* We have new memory pool, should be place for us */ - mlx5hws_err(pool->ctx, - "No memory for order: %d with buddy no: %d\n", - order, i); - ret = -ENOMEM; - goto out; - } - } + struct mlx5hws_buddy_mem *buddy = pool->db.buddy; + + if (!buddy) { + mlx5hws_err(pool->ctx, "Bad buddy state\n"); + return -EINVAL; } -found: - *buddy_idx = i; -out: - return ret; + chunk->offset = mlx5hws_buddy_alloc_mem(buddy, chunk->order); + if (chunk->offset >= 0) + return 0; + + return -ENOMEM; } -static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - int ret = 0; + struct mlx5hws_buddy_mem *buddy; - /* Go over the buddies and find next free slot */ - ret = hws_pool_buddy_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); + buddy = pool->db.buddy; + if (!buddy) { + mlx5hws_err(pool->ctx, "Bad buddy state\n"); + return; + } - return ret; + mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order); } static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool) { struct mlx5hws_buddy_mem *buddy; - int i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - buddy = pool->db.buddy_manager->buddies[i]; - if (buddy) { - mlx5hws_buddy_cleanup(buddy); - kfree(buddy); - pool->db.buddy_manager->buddies[i] = NULL; - } - } - kfree(pool->db.buddy_manager); + buddy = pool->db.buddy; + if (buddy) { + mlx5hws_buddy_cleanup(buddy); + kfree(buddy); + pool->db.buddy = NULL; + } } -static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range) +static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool) { - pool->db.buddy_manager = kzalloc(sizeof(*pool->db.buddy_manager), GFP_KERNEL); - if (!pool->db.buddy_manager) - return -ENOMEM; - - if (pool->flags & MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE) { - bool new_buddy; + int ret; - if (!hws_pool_buddy_get_next_buddy(pool, 0, log_range, &new_buddy)) { - mlx5hws_err(pool->ctx, - "Failed allocating memory on create log_sz: %d\n", log_range); - kfree(pool->db.buddy_manager); - return -ENOMEM; - } - } + ret = hws_pool_buddy_init(pool); + if (ret) + return ret; pool->p_db_uninit = &hws_pool_buddy_db_uninit; pool->p_get_chunk = &hws_pool_buddy_db_get_chunk; @@ -278,261 +190,105 @@ static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range) return 0; } -static int hws_pool_create_resource_on_index(struct mlx5hws_pool *pool, - u32 alloc_size, int idx) -{ - int ret = hws_pool_resource_alloc(pool, alloc_size, idx); - - if (ret) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - return ret; - } - - return 0; -} - -static struct mlx5hws_pool_elements * -hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order, int idx) +static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range) { - struct mlx5hws_pool_elements *elem; - u32 alloc_size; - - alloc_size = pool->alloc_log_sz; + unsigned long *bitmap; - elem = kzalloc(sizeof(*elem), GFP_KERNEL); - if (!elem) + bitmap = bitmap_zalloc(1 << log_range, GFP_KERNEL); + if (!bitmap) return NULL; - /* Sharing the same resource, also means that all the elements are with size 1 */ - if ((pool->flags & MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS) && - !(pool->flags & MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) { - /* Currently all chunks in size 1 */ - elem->bitmap = hws_pool_create_and_init_bitmap(alloc_size - order); - if (!elem->bitmap) { - mlx5hws_err(pool->ctx, - "Failed to create bitmap type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - goto free_elem; - } - - elem->log_size = alloc_size - order; - } - - if (hws_pool_create_resource_on_index(pool, alloc_size, idx)) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - goto free_db; - } - - pool->db.element_manager->elements[idx] = elem; - - return elem; + bitmap_fill(bitmap, 1 << log_range); -free_db: - bitmap_free(elem->bitmap); -free_elem: - kfree(elem); - return NULL; + return bitmap; } -static int hws_pool_element_find_seg(struct mlx5hws_pool_elements *elem, int *seg) +static int hws_pool_bitmap_init(struct mlx5hws_pool *pool) { - unsigned int segment, size; - - size = 1 << elem->log_size; + unsigned long *bitmap; - segment = find_first_bit(elem->bitmap, size); - if (segment >= size) { - elem->is_full = true; + bitmap = hws_pool_create_and_init_bitmap(pool->alloc_log_sz); + if (!bitmap) { + mlx5hws_err(pool->ctx, "Failed to create bitmap order: %zu\n", + pool->alloc_log_sz); return -ENOMEM; } - bitmap_clear(elem->bitmap, segment, 1); - *seg = segment; - return 0; -} - -static int -hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order, - u32 *idx, int *seg) -{ - struct mlx5hws_pool_elements *elem; - - elem = pool->db.element_manager->elements[0]; - if (!elem) - elem = hws_pool_element_create_new_elem(pool, order, 0); - if (!elem) - goto err_no_elem; - - if (hws_pool_element_find_seg(elem, seg) != 0) { - mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order); + if (hws_pool_resource_alloc(pool) != 0) { + mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %zu\n", + pool->type, pool->alloc_log_sz); + bitmap_free(bitmap); return -ENOMEM; } - *idx = 0; - elem->num_of_elements++; - return 0; + pool->db.bitmap = bitmap; -err_no_elem: - mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order); - return -ENOMEM; -} - -static int -hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order, - u32 *idx, int *seg) -{ - int ret, i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - if (!pool->resource[i]) { - ret = hws_pool_create_resource_on_index(pool, order, i); - if (ret) - goto err_no_res; - *idx = i; - *seg = 0; /* One memory slot in that element */ - return 0; - } - } - - mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order); - return -ENOMEM; - -err_no_res: - mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order); - return -ENOMEM; + return 0; } -static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static int hws_pool_bitmap_db_get_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - int ret; - - /* Go over all memory elements and find/allocate free slot */ - ret = hws_pool_general_element_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); + unsigned long *bitmap, size; - return ret; -} + if (chunk->order != 0) { + mlx5hws_err(pool->ctx, "Pool only supports order 0 allocs\n"); + return -EINVAL; + } -static void hws_pool_general_element_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) -{ - if (unlikely(!pool->resource[chunk->resource_idx])) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); + bitmap = pool->db.bitmap; + if (!bitmap) { + mlx5hws_err(pool->ctx, "Bad bitmap state\n"); + return -EINVAL; + } - if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE) - hws_pool_resource_free(pool, chunk->resource_idx); -} + size = 1 << pool->alloc_log_sz; -static void hws_pool_general_element_db_uninit(struct mlx5hws_pool *pool) -{ - (void)pool; -} + chunk->offset = find_first_bit(bitmap, size); + if (chunk->offset >= size) + return -ENOMEM; -/* This memory management works as the following: - * - At start doesn't allocate no mem at all. - * - When new request for chunk arrived: - * allocate resource and give it. - * - When free that chunk: - * the resource is freed. - */ -static int hws_pool_general_element_db_init(struct mlx5hws_pool *pool) -{ - pool->p_db_uninit = &hws_pool_general_element_db_uninit; - pool->p_get_chunk = &hws_pool_general_element_db_get_chunk; - pool->p_put_chunk = &hws_pool_general_element_db_put_chunk; + bitmap_clear(bitmap, chunk->offset, 1); return 0; } -static void hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool, - struct mlx5hws_pool_elements *elem, - struct mlx5hws_pool_chunk *chunk) -{ - if (unlikely(!pool->resource[chunk->resource_idx])) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); - - hws_pool_resource_free(pool, chunk->resource_idx); - kfree(elem); - pool->db.element_manager->elements[chunk->resource_idx] = NULL; -} - -static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_bitmap_db_put_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - struct mlx5hws_pool_elements *elem; + unsigned long *bitmap; - if (unlikely(chunk->resource_idx)) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); - - elem = pool->db.element_manager->elements[chunk->resource_idx]; - if (!elem) { - mlx5hws_err(pool->ctx, "No such element (%d)\n", chunk->resource_idx); + bitmap = pool->db.bitmap; + if (!bitmap) { + mlx5hws_err(pool->ctx, "Bad bitmap state\n"); return; } - bitmap_set(elem->bitmap, chunk->offset, 1); - elem->is_full = false; - elem->num_of_elements--; - - if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE && - !elem->num_of_elements) - hws_onesize_element_db_destroy_element(pool, elem, chunk); + bitmap_set(bitmap, chunk->offset, 1); } -static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_bitmap_db_uninit(struct mlx5hws_pool *pool) { - int ret = 0; + unsigned long *bitmap; - /* Go over all memory elements and find/allocate free slot */ - ret = hws_pool_onesize_element_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); - - return ret; -} - -static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool) -{ - struct mlx5hws_pool_elements *elem; - int i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - elem = pool->db.element_manager->elements[i]; - if (elem) { - bitmap_free(elem->bitmap); - kfree(elem); - pool->db.element_manager->elements[i] = NULL; - } + bitmap = pool->db.bitmap; + if (bitmap) { + bitmap_free(bitmap); + pool->db.bitmap = NULL; } - kfree(pool->db.element_manager); } -/* This memory management works as the following: - * - At start doesn't allocate no mem at all. - * - When new request for chunk arrived: - * aloocate the first and only slot of memory/resource - * when it ended return error. - */ -static int hws_pool_onesize_element_db_init(struct mlx5hws_pool *pool) +static int hws_pool_bitmap_db_init(struct mlx5hws_pool *pool) { - pool->db.element_manager = kzalloc(sizeof(*pool->db.element_manager), GFP_KERNEL); - if (!pool->db.element_manager) - return -ENOMEM; + int ret; - pool->p_db_uninit = &hws_onesize_element_db_uninit; - pool->p_get_chunk = &hws_onesize_element_db_get_chunk; - pool->p_put_chunk = &hws_onesize_element_db_put_chunk; + ret = hws_pool_bitmap_init(pool); + if (ret) + return ret; + + pool->p_db_uninit = &hws_pool_bitmap_db_uninit; + pool->p_get_chunk = &hws_pool_bitmap_db_get_chunk; + pool->p_put_chunk = &hws_pool_bitmap_db_put_chunk; return 0; } @@ -542,15 +298,14 @@ static int hws_pool_db_init(struct mlx5hws_pool *pool, { int ret; - if (db_type == MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE) - ret = hws_pool_general_element_db_init(pool); - else if (db_type == MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE) - ret = hws_pool_onesize_element_db_init(pool); + if (db_type == MLX5HWS_POOL_DB_TYPE_BITMAP) + ret = hws_pool_bitmap_db_init(pool); else - ret = hws_pool_buddy_db_init(pool, pool->alloc_log_sz); + ret = hws_pool_buddy_db_init(pool); if (ret) { - mlx5hws_err(pool->ctx, "Failed to init general db : %d (ret: %d)\n", db_type, ret); + mlx5hws_err(pool->ctx, "Failed to init pool type: %d (ret: %d)\n", + db_type, ret); return ret; } @@ -569,6 +324,8 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, mutex_lock(&pool->lock); ret = pool->p_get_chunk(pool, chunk); + if (ret == 0) + pool->available_elems -= 1 << chunk->order; mutex_unlock(&pool->lock); return ret; @@ -579,6 +336,7 @@ void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool, { mutex_lock(&pool->lock); pool->p_put_chunk(pool, chunk); + pool->available_elems += 1 << chunk->order; mutex_unlock(&pool->lock); } @@ -599,17 +357,13 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_ pool->tbl_type = pool_attr->table_type; pool->opt_type = pool_attr->opt_type; - /* Support general db */ - if (pool->flags == (MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE | - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) - res_db_type = MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE; - else if (pool->flags == (MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS)) - res_db_type = MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE; - else + if (pool->flags & MLX5HWS_POOL_FLAG_BUDDY) res_db_type = MLX5HWS_POOL_DB_TYPE_BUDDY; + else + res_db_type = MLX5HWS_POOL_DB_TYPE_BITMAP; pool->alloc_log_sz = pool_attr->alloc_log_sz; + pool->available_elems = 1 << pool_attr->alloc_log_sz; if (hws_pool_db_init(pool, res_db_type)) goto free_pool; @@ -623,18 +377,17 @@ free_pool: return NULL; } -int mlx5hws_pool_destroy(struct mlx5hws_pool *pool) +void mlx5hws_pool_destroy(struct mlx5hws_pool *pool) { - int i; - mutex_destroy(&pool->lock); - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) - if (pool->resource[i]) - hws_pool_resource_free(pool, i); + if (pool->available_elems != 1 << pool->alloc_log_sz) + mlx5hws_err(pool->ctx, "Attempting to destroy non-empty pool\n"); + + if (pool->resource) + hws_pool_resource_free(pool); hws_pool_db_unint(pool); kfree(pool); - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h index 621298b352b2..33e33d5f1fb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h @@ -6,16 +6,12 @@ #define MLX5HWS_POOL_STC_LOG_SZ 15 -#define MLX5HWS_POOL_RESOURCE_ARR_SZ 100 - enum mlx5hws_pool_type { MLX5HWS_POOL_TYPE_STE, MLX5HWS_POOL_TYPE_STC, }; struct mlx5hws_pool_chunk { - u32 resource_idx; - /* Internal offset, relative to base index */ int offset; int order; }; @@ -27,35 +23,17 @@ struct mlx5hws_pool_resource { }; enum mlx5hws_pool_flags { - /* Only a one resource in that pool */ - MLX5HWS_POOL_FLAGS_ONE_RESOURCE = 1 << 0, - MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE = 1 << 1, - /* No sharing resources between chunks */ - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK = 1 << 2, - /* All objects are in the same size */ - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS = 1 << 3, - /* Managed by buddy allocator */ - MLX5HWS_POOL_FLAGS_BUDDY_MANAGED = 1 << 4, - /* Allocate pool_type memory on pool creation */ - MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE = 1 << 5, - - /* These values should be used by the caller */ - MLX5HWS_POOL_FLAGS_FOR_STC_POOL = - MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS, - MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL = - MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE | - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK, - MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL = - MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_BUDDY_MANAGED | - MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE, + /* Managed by a buddy allocator. If this is not set only allocations of + * order 0 are supported. + */ + MLX5HWS_POOL_FLAG_BUDDY = BIT(0), }; enum mlx5hws_pool_optimize { MLX5HWS_POOL_OPTIMIZE_NONE = 0x0, MLX5HWS_POOL_OPTIMIZE_ORIG = 0x1, MLX5HWS_POOL_OPTIMIZE_MIRROR = 0x2, + MLX5HWS_POOL_OPTIMIZE_MAX = 0x3, }; struct mlx5hws_pool_attr { @@ -68,34 +46,17 @@ struct mlx5hws_pool_attr { }; enum mlx5hws_db_type { - /* Uses for allocating chunk of big memory, each element has its own resource in the FW*/ - MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE, - /* One resource only, all the elements are with same one size */ - MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE, - /* Many resources, the memory allocated with buddy mechanism */ + /* Uses a bitmap, supports only allocations of order 0. */ + MLX5HWS_POOL_DB_TYPE_BITMAP, + /* Entries are managed using a buddy mechanism. */ MLX5HWS_POOL_DB_TYPE_BUDDY, }; -struct mlx5hws_buddy_manager { - struct mlx5hws_buddy_mem *buddies[MLX5HWS_POOL_RESOURCE_ARR_SZ]; -}; - -struct mlx5hws_pool_elements { - u32 num_of_elements; - unsigned long *bitmap; - u32 log_size; - bool is_full; -}; - -struct mlx5hws_element_manager { - struct mlx5hws_pool_elements *elements[MLX5HWS_POOL_RESOURCE_ARR_SZ]; -}; - struct mlx5hws_pool_db { enum mlx5hws_db_type type; union { - struct mlx5hws_element_manager *element_manager; - struct mlx5hws_buddy_manager *buddy_manager; + unsigned long *bitmap; + struct mlx5hws_buddy_mem *buddy; }; }; @@ -111,11 +72,11 @@ struct mlx5hws_pool { enum mlx5hws_pool_flags flags; struct mutex lock; /* protect the pool */ size_t alloc_log_sz; + size_t available_elems; enum mlx5hws_table_type tbl_type; enum mlx5hws_pool_optimize opt_type; - struct mlx5hws_pool_resource *resource[MLX5HWS_POOL_RESOURCE_ARR_SZ]; - struct mlx5hws_pool_resource *mirror_resource[MLX5HWS_POOL_RESOURCE_ARR_SZ]; - /* DB */ + struct mlx5hws_pool_resource *resource; + struct mlx5hws_pool_resource *mirror_resource; struct mlx5hws_pool_db db; /* Functions */ mlx5hws_pool_unint_db p_db_uninit; @@ -127,7 +88,7 @@ struct mlx5hws_pool * mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_attr); -int mlx5hws_pool_destroy(struct mlx5hws_pool *pool); +void mlx5hws_pool_destroy(struct mlx5hws_pool *pool); int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, struct mlx5hws_pool_chunk *chunk); @@ -135,17 +96,37 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool, struct mlx5hws_pool_chunk *chunk); -static inline u32 -mlx5hws_pool_chunk_get_base_id(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static inline u32 mlx5hws_pool_get_base_id(struct mlx5hws_pool *pool) { - return pool->resource[chunk->resource_idx]->base_id; + return pool->resource->base_id; } -static inline u32 -mlx5hws_pool_chunk_get_base_mirror_id(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static inline u32 mlx5hws_pool_get_base_mirror_id(struct mlx5hws_pool *pool) { - return pool->mirror_resource[chunk->resource_idx]->base_id; + return pool->mirror_resource->base_id; +} + +static inline bool +mlx5hws_pool_empty(struct mlx5hws_pool *pool) +{ + bool ret; + + mutex_lock(&pool->lock); + ret = pool->available_elems == 0; + mutex_unlock(&pool->lock); + + return ret; +} + +static inline bool +mlx5hws_pool_full(struct mlx5hws_pool *pool) +{ + bool ret; + + mutex_lock(&pool->lock); + ret = pool->available_elems == (1 << pool->alloc_log_sz); + mutex_unlock(&pool->lock); + + return ret; } #endif /* MLX5HWS_POOL_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c index a27a2d5ffc7b..a94f094e72ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c @@ -3,10 +3,8 @@ #include "internal.h" -static void hws_rule_skip(struct mlx5hws_matcher *matcher, - struct mlx5hws_match_template *mt, - u32 flow_source, - bool *skip_rx, bool *skip_tx) +void mlx5hws_rule_skip(struct mlx5hws_matcher *matcher, u32 flow_source, + bool *skip_rx, bool *skip_tx) { /* By default FDB rules are added to both RX and TX */ *skip_rx = false; @@ -14,20 +12,21 @@ static void hws_rule_skip(struct mlx5hws_matcher *matcher, if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT) { *skip_rx = true; - } else if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) { + return; + } + + if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) { *skip_tx = true; - } else { - /* If no flow source was set for current rule, - * check for flow source in matcher attributes. - */ - if (matcher->attr.optimize_flow_src) { - *skip_tx = - matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE; - *skip_rx = - matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT; - return; - } + return; } + + /* If no flow source was set for current rule, + * check for flow source in matcher attributes. + */ + *skip_tx = matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_WIRE; + *skip_rx = matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_VPORT; } static void @@ -66,7 +65,8 @@ static void hws_rule_init_dep_wqe(struct mlx5hws_send_ring_dep_wqe *dep_wqe, attr->rule_idx : 0; if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - hws_rule_skip(matcher, mt, attr->flow_source, &skip_rx, &skip_tx); + mlx5hws_rule_skip(matcher, attr->flow_source, + &skip_rx, &skip_tx); if (!skip_rx) { dep_wqe->rtc_0 = matcher->match_ste.rtc_0_id; @@ -195,44 +195,30 @@ hws_rule_load_delete_info(struct mlx5hws_rule *rule, } } -static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule) +static int mlx5hws_rule_alloc_action_ste(struct mlx5hws_rule *rule, + u16 queue_id, bool skip_rx, + bool skip_tx) { struct mlx5hws_matcher *matcher = rule->matcher; - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_pool_chunk ste = {0}; - int ret; - - action_ste = &matcher->action_ste; - ste.order = ilog2(roundup_pow_of_two(action_ste->max_stes)); - ret = mlx5hws_pool_chunk_alloc(action_ste->pool, &ste); - if (unlikely(ret)) { - mlx5hws_err(matcher->tbl->ctx, - "Failed to allocate STE for rule actions"); - return ret; - } - - rule->action_ste.pool = matcher->action_ste.pool; - rule->action_ste.num_stes = matcher->action_ste.max_stes; - rule->action_ste.index = ste.offset; + struct mlx5hws_context *ctx = matcher->tbl->ctx; - return 0; + rule->action_ste.ste.order = + ilog2(roundup_pow_of_two(matcher->num_of_action_stes)); + return mlx5hws_action_ste_chunk_alloc(&ctx->action_ste_pool[queue_id], + skip_rx, skip_tx, + &rule->action_ste); } -void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste) +void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste) { - struct mlx5hws_pool_chunk ste = {0}; - - if (!action_ste->num_stes) + if (!action_ste->action_tbl) return; - ste.order = ilog2(roundup_pow_of_two(action_ste->num_stes)); - ste.offset = action_ste->index; - /* This release is safe only when the rule match STE was deleted * (when the rule is being deleted) or replaced with the new STE that * isn't pointing to old action STEs (when the rule is being updated). */ - mlx5hws_pool_chunk_free(action_ste->pool, &ste); + mlx5hws_action_ste_chunk_free(action_ste); } static void hws_rule_create_init(struct mlx5hws_rule *rule, @@ -250,22 +236,15 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule, rule->rtc_0 = 0; rule->rtc_1 = 0; - rule->action_ste.pool = NULL; - rule->action_ste.num_stes = 0; - rule->action_ste.index = -1; - rule->status = MLX5HWS_RULE_STATUS_CREATING; } else { rule->status = MLX5HWS_RULE_STATUS_UPDATING; + /* Save the old action STE info so we can free it after writing + * new action STEs and a corresponding match STE. + */ + rule->old_action_ste = rule->action_ste; } - /* Initialize the old action STE info - shallow-copy action_ste. - * In create flow this will set old_action_ste fields to initial values. - * In update flow this will save the existing action STE info, - * so that we will later use it to free old STEs. - */ - rule->old_action_ste = rule->action_ste; - rule->pending_wqes = 0; /* Init default send STE attributes */ @@ -277,7 +256,6 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule, /* Init default action apply */ apply->tbl_type = tbl->type; apply->common_res = &ctx->common_res; - apply->jump_to_action_stc = matcher->action_ste.stc.offset; apply->require_dep = 0; } @@ -353,17 +331,24 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule, if (action_stes) { /* Allocate action STEs for rules that need more than match STE */ - ret = hws_rule_alloc_action_ste(rule); + ret = mlx5hws_rule_alloc_action_ste(rule, attr->queue_id, + !!ste_attr.rtc_0, + !!ste_attr.rtc_1); if (ret) { mlx5hws_err(ctx, "Failed to allocate action memory %d", ret); mlx5hws_send_abort_new_dep_wqe(queue); return ret; } + apply.jump_to_action_stc = + rule->action_ste.action_tbl->stc.offset; /* Skip RX/TX based on the dep_wqe init */ - ste_attr.rtc_0 = dep_wqe->rtc_0 ? matcher->action_ste.rtc_0_id : 0; - ste_attr.rtc_1 = dep_wqe->rtc_1 ? matcher->action_ste.rtc_1_id : 0; + ste_attr.rtc_0 = dep_wqe->rtc_0 ? + rule->action_ste.action_tbl->rtc_0_id : 0; + ste_attr.rtc_1 = dep_wqe->rtc_1 ? + rule->action_ste.action_tbl->rtc_1_id : 0; /* Action STEs are written to a specific index last to first */ - ste_attr.direct_index = rule->action_ste.index + action_stes; + ste_attr.direct_index = + rule->action_ste.ste.offset + action_stes; apply.next_direct_idx = ste_attr.direct_index; } else { apply.next_direct_idx = 0; @@ -670,6 +655,124 @@ int mlx5hws_rule_move_hws_add(struct mlx5hws_rule *rule, return 0; } +static u8 hws_rule_ethertype_to_matcher_ipv(u32 ethertype) +{ + switch (ethertype) { + case ETH_P_IP: + return MLX5HWS_MATCHER_IPV_4; + case ETH_P_IPV6: + return MLX5HWS_MATCHER_IPV_6; + default: + return MLX5HWS_MATCHER_IPV_UNSET; + } +} + +static u8 hws_rule_ip_version_to_matcher_ipv(u32 ip_version) +{ + switch (ip_version) { + case 4: + return MLX5HWS_MATCHER_IPV_4; + case 6: + return MLX5HWS_MATCHER_IPV_6; + default: + return MLX5HWS_MATCHER_IPV_UNSET; + } +} + +static int hws_rule_check_outer_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 outer_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET; + u8 outer_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET; + u8 outer_ipv, ver; + + if (matcher->matches_outer_ethertype) { + ver = MLX5_GET(fte_match_param, match_param, + outer_headers.ethertype); + outer_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver); + } + if (matcher->matches_outer_ip_version) { + ver = MLX5_GET(fte_match_param, match_param, + outer_headers.ip_version); + outer_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver); + } + + if (outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv_ether != outer_ipv_ip) { + mlx5hws_err(ctx, "Rule matches on inconsistent outer ethertype and ip version\n"); + return -EINVAL; + } + + outer_ipv = outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ? + outer_ipv_ether : outer_ipv_ip; + if (outer_ipv != MLX5HWS_MATCHER_IPV_UNSET && + matcher->outer_ip_version != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv != matcher->outer_ip_version) { + mlx5hws_err(ctx, "Matcher and rule disagree on outer IP version\n"); + return -EINVAL; + } + matcher->outer_ip_version = outer_ipv; + + return 0; +} + +static int hws_rule_check_inner_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 inner_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET; + u8 inner_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET; + u8 inner_ipv, ver; + + if (matcher->matches_inner_ethertype) { + ver = MLX5_GET(fte_match_param, match_param, + inner_headers.ethertype); + inner_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver); + } + if (matcher->matches_inner_ip_version) { + ver = MLX5_GET(fte_match_param, match_param, + inner_headers.ip_version); + inner_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver); + } + + if (inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv_ether != inner_ipv_ip) { + mlx5hws_err(ctx, "Rule matches on inconsistent inner ethertype and ip version\n"); + return -EINVAL; + } + + inner_ipv = inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ? + inner_ipv_ether : inner_ipv_ip; + if (inner_ipv != MLX5HWS_MATCHER_IPV_UNSET && + matcher->inner_ip_version != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv != matcher->inner_ip_version) { + mlx5hws_err(ctx, "Matcher and rule disagree on inner IP version\n"); + return -EINVAL; + } + matcher->inner_ip_version = inner_ipv; + + return 0; +} + +static int hws_rule_check_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + int ret; + + ret = hws_rule_check_outer_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + + ret = hws_rule_check_inner_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + + return 0; +} + int mlx5hws_rule_create(struct mlx5hws_matcher *matcher, u8 mt_idx, u32 *match_param, @@ -680,6 +783,10 @@ int mlx5hws_rule_create(struct mlx5hws_matcher *matcher, { int ret; + ret = hws_rule_check_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + rule_handle->matcher = matcher; ret = hws_rule_enqueue_precheck_create(rule_handle, attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h index b5ee94ac449b..d0f082b8dbf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h @@ -43,12 +43,6 @@ struct mlx5hws_rule_match_tag { }; }; -struct mlx5hws_rule_action_ste_info { - struct mlx5hws_pool *pool; - int index; /* STE array index */ - u8 num_stes; -}; - struct mlx5hws_rule_resize_info { u32 rtc_0; u32 rtc_1; @@ -64,8 +58,8 @@ struct mlx5hws_rule { struct mlx5hws_rule_match_tag tag; struct mlx5hws_rule_resize_info *resize_info; }; - struct mlx5hws_rule_action_ste_info action_ste; - struct mlx5hws_rule_action_ste_info old_action_ste; + struct mlx5hws_action_ste_chunk action_ste; + struct mlx5hws_action_ste_chunk old_action_ste; u32 rtc_0; /* The RTC into which the STE was inserted */ u32 rtc_1; /* The RTC into which the STE was inserted */ u8 status; /* enum mlx5hws_rule_status */ @@ -75,7 +69,10 @@ struct mlx5hws_rule { */ }; -void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste); +void mlx5hws_rule_skip(struct mlx5hws_matcher *matcher, u32 flow_source, + bool *skip_rx, bool *skip_tx); + +void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste); int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule, void *queue, void *user_data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index cb6abc4ab7df..c4b22be19a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -344,18 +344,133 @@ hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue, } } +static void hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine *queue, + struct mlx5hws_send_ring_priv *priv, + struct mlx5_cqe64 *cqe) +{ + u8 wqe_opcode = cqe ? be32_to_cpu(cqe->sop_drop_qpn) >> 24 : 0; + struct mlx5hws_context *ctx = priv->rule->matcher->tbl->ctx; + u32 opcode = cqe ? get_cqe_opcode(cqe) : 0; + struct mlx5hws_rule *rule = priv->rule; + + /* If something bad happens and lots of rules are failing, we don't + * want to pollute dmesg. Print only the first bad cqe per engine, + * the one that started the avalanche. + */ + if (queue->error_cqe_printed) + return; + + queue->error_cqe_printed = true; + + if (mlx5hws_rule_move_in_progress(rule)) + mlx5hws_err(ctx, + "--- rule 0x%08llx: error completion moving rule: phase %s, wqes left %d\n", + HWS_PTR_TO_ID(rule), + rule->resize_info->state == + MLX5HWS_RULE_RESIZE_STATE_WRITING ? "WRITING" : + rule->resize_info->state == + MLX5HWS_RULE_RESIZE_STATE_DELETING ? "DELETING" : + "UNKNOWN", + rule->pending_wqes); + else + mlx5hws_err(ctx, + "--- rule 0x%08llx: error completion %s (%d), wqes left %d\n", + HWS_PTR_TO_ID(rule), + rule->status == + MLX5HWS_RULE_STATUS_CREATING ? "CREATING" : + rule->status == + MLX5HWS_RULE_STATUS_DELETING ? "DELETING" : + rule->status == + MLX5HWS_RULE_STATUS_FAILING ? "FAILING" : + rule->status == + MLX5HWS_RULE_STATUS_UPDATING ? "UPDATING" : "NA", + rule->status, + rule->pending_wqes); + + mlx5hws_err(ctx, " rule 0x%08llx: matcher 0x%llx %s\n", + HWS_PTR_TO_ID(rule), + HWS_PTR_TO_ID(rule->matcher), + (rule->matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED) ? + "(isolated)" : ""); + + if (!cqe) { + mlx5hws_err(ctx, " rule 0x%08llx: no CQE\n", + HWS_PTR_TO_ID(rule)); + return; + } + + mlx5hws_err(ctx, " rule 0x%08llx: cqe->opcode = %d %s\n", + HWS_PTR_TO_ID(rule), opcode, + opcode == MLX5_CQE_REQ ? "(MLX5_CQE_REQ)" : + opcode == MLX5_CQE_REQ_ERR ? "(MLX5_CQE_REQ_ERR)" : " "); + + if (opcode == MLX5_CQE_REQ_ERR) { + struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; + + mlx5hws_err(ctx, + " rule 0x%08llx: |--- hw_error_syndrome = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->rsvd1[16]); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- hw_syndrome_type = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->rsvd1[17] >> 4); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- vendor_err_synd = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->vendor_err_synd); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- syndrome = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->syndrome); + } + + mlx5hws_err(ctx, + " rule 0x%08llx: cqe->byte_cnt = 0x%08x\n", + HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->byte_cnt)); + mlx5hws_err(ctx, + " rule 0x%08llx: |-- UPDATE STATUS = %s\n", + HWS_PTR_TO_ID(rule), + (be32_to_cpu(cqe->byte_cnt) & 0x80000000) ? + "FAILURE" : "SUCCESS"); + mlx5hws_err(ctx, + " rule 0x%08llx: |------- SYNDROME = %s\n", + HWS_PTR_TO_ID(rule), + ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 1) ? + "SET_FLOW_FAIL" : + ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 2) ? + "DISABLE_FLOW_FAIL" : "UNKNOWN"); + mlx5hws_err(ctx, + " rule 0x%08llx: cqe->sop_drop_qpn = 0x%08x\n", + HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->sop_drop_qpn)); + mlx5hws_err(ctx, + " rule 0x%08llx: |-send wqe opcode = 0x%02x %s\n", + HWS_PTR_TO_ID(rule), wqe_opcode, + wqe_opcode == MLX5HWS_WQE_OPCODE_TBL_ACCESS ? + "(MLX5HWS_WQE_OPCODE_TBL_ACCESS)" : "(UNKNOWN)"); + mlx5hws_err(ctx, + " rule 0x%08llx: |------------ qpn = 0x%06x\n", + HWS_PTR_TO_ID(rule), + be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff); +} + static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue, struct mlx5hws_send_ring_priv *priv, u16 wqe_cnt, - enum mlx5hws_flow_op_status *status) + enum mlx5hws_flow_op_status *status, + struct mlx5_cqe64 *cqe) { priv->rule->pending_wqes--; - if (*status == MLX5HWS_FLOW_OP_ERROR) { + if (unlikely(*status == MLX5HWS_FLOW_OP_ERROR)) { if (priv->retry_id) { + /* If there is a retry_id, then it's not an error yet, + * retry to insert this rule in the collision RTC. + */ hws_send_engine_retry_post_send(queue, priv, wqe_cnt); return; } + hws_send_engine_dump_error_cqe(queue, priv, cqe); /* Some part of the rule failed */ priv->rule->status = MLX5HWS_RULE_STATUS_FAILING; *priv->used_id = 0; @@ -420,7 +535,8 @@ static void hws_send_engine_update(struct mlx5hws_send_engine *queue, if (priv->user_data) { if (priv->rule) { - hws_send_engine_update_rule(queue, priv, wqe_cnt, &status); + hws_send_engine_update_rule(queue, priv, wqe_cnt, + &status, cqe); /* Completion is provided on the last rule WQE */ if (priv->rule->pending_wqes) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h index f833092235c1..3fb8e99309b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h @@ -140,6 +140,7 @@ struct mlx5hws_send_engine { u16 used_entries; u16 num_entries; bool err; + bool error_cqe_printed; struct mutex lock; /* Protects the send engine */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index ab1297531232..568f691733f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -342,10 +342,10 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx, return mlx5hws_cmd_flow_table_modify(ctx->mdev, &ft_attr, ft_id); } -static int hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, - u32 ft_id, - u32 fw_ft_type, - u32 next_ft_id) +int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, + u32 ft_id, + u32 fw_ft_type, + u32 next_ft_id) { struct mlx5hws_cmd_ft_modify_attr ft_attr = {0}; @@ -389,10 +389,10 @@ int mlx5hws_table_connect_to_miss_table(struct mlx5hws_table *src_tbl, if (dst_tbl) { if (list_empty(&dst_tbl->matchers_list)) { /* Connect src_tbl last_ft to dst_tbl start anchor */ - ret = hws_table_ft_set_next_ft(src_tbl->ctx, - last_ft_id, - src_tbl->fw_ft_type, - dst_tbl->ft_id); + ret = mlx5hws_table_ft_set_next_ft(src_tbl->ctx, + last_ft_id, + src_tbl->fw_ft_type, + dst_tbl->ft_id); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h index dd50420eec9e..0400cce0c317 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h @@ -65,4 +65,9 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx, u32 rtc_0_id, u32 rtc_1_id); +int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, + u32 ft_id, + u32 fw_ft_type, + u32 next_ft_id); + #endif /* MLX5HWS_TABLE_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c index 8007d3f523c9..f367997ab61e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c @@ -833,15 +833,21 @@ static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, return steering_caps; } -int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +int +mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) { + struct mlx5dr_action *dr_action; + switch (pkt_reformat->reformat_type) { case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: case MLX5_REFORMAT_TYPE_INSERT_HDR: - return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->fs_dr_action.dr_action); + dr_action = pkt_reformat->fs_dr_action.dr_action; + *reformat_id = mlx5dr_action_get_pkt_reformat_id(dr_action); + return 0; } return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h index 99a3b2eff6b8..f869f2daefbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h @@ -38,7 +38,9 @@ struct mlx5_fs_dr_table { bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); -int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat); +int +mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id); const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); @@ -49,9 +51,11 @@ static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) return NULL; } -static inline u32 mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +static inline int +mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) { - return 0; + return -EOPNOTSUPP; } static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index d10d4c396040..da5c24fc7b30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -465,19 +465,22 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) { u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - mlx5_query_nic_vport_context(mdev, 0, out); + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; *node_guid = MLX5_GET64(query_nic_vport_context_out, out, nic_vport_context.node_guid); - +out: kvfree(out); - return 0; + return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); @@ -519,19 +522,22 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, { u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - mlx5_query_nic_vport_context(mdev, 0, out); + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, nic_vport_context.qkey_violation_counter); - +out: kvfree(out); - return 0; + return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c index 740b719e7072..2f0316616fa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -378,6 +378,9 @@ err_create_cq: mlx5_free_bfreg(mdev, &sq->bfreg); err_alloc_bfreg: kfree(sq); + + if (mdev->wc_state == MLX5_WC_STATE_UNSUPPORTED) + mlx5_core_warn(mdev, "Write combining is not supported\n"); } bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index fb2e5b844c15..d1f8a72cae53 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -142,8 +142,10 @@ static int mlxbf_gige_open(struct net_device *netdev) mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: clean_port failed: %pe\n", ERR_PTR(err)); return err; + } /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -154,19 +156,25 @@ static int mlxbf_gige_open(struct net_device *netdev) phy_start(phydev); err = mlxbf_gige_tx_init(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: tx_init failed: %pe\n", ERR_PTR(err)); goto phy_deinit; + } err = mlxbf_gige_rx_init(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: rx_init failed: %pe\n", ERR_PTR(err)); goto tx_deinit; + } netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll); napi_enable(&priv->napi); netif_start_queue(netdev); err = mlxbf_gige_request_irqs(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: request_irqs failed: %pe\n", ERR_PTR(err)); goto napi_deinit; + } mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); @@ -418,8 +426,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) /* Attach MDIO device */ err = mlxbf_gige_mdio_probe(pdev, priv); - if (err) + if (err) { + dev_err(priv->dev, "probe: mdio_probe failed: %pe\n", ERR_PTR(err)); return err; + } priv->base = base; priv->llu_base = llu_base; @@ -438,7 +448,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); + dev_err(&pdev->dev, "DMA configuration failed: %pe\n", ERR_PTR(err)); goto out; } @@ -447,8 +457,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX); phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy", 0); - if (phy_irq < 0) { - dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead"); + if (phy_irq == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto out; + } else if (phy_irq < 0) { phy_irq = PHY_POLL; } @@ -466,7 +478,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) mlxbf_gige_link_cfgs[priv->hw_version].adjust_link, mlxbf_gige_link_cfgs[priv->hw_version].phy_mode); if (err) { - dev_err(&pdev->dev, "Could not attach to PHY\n"); + dev_err(&pdev->dev, "Could not attach to PHY: %pe\n", ERR_PTR(err)); goto out; } @@ -477,7 +489,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) err = register_netdev(netdev); if (err) { - dev_err(&pdev->dev, "Failed to register netdev\n"); + dev_err(&pdev->dev, "Failed to register netdev: %pe\n", ERR_PTR(err)); phy_disconnect(phydev); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index e746cd9c68ed..eac9a14a6058 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -205,11 +205,11 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, return 0; } -static struct thermal_zone_params mlxsw_thermal_params = { +static const struct thermal_zone_params mlxsw_thermal_params = { .no_hwmon = true, }; -static struct thermal_zone_device_ops mlxsw_thermal_ops = { +static const struct thermal_zone_device_ops mlxsw_thermal_ops = { .should_bind = mlxsw_thermal_should_bind, .get_temp = mlxsw_thermal_get_temp, }; @@ -252,7 +252,7 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, return 0; } -static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { +static const struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .should_bind = mlxsw_thermal_module_should_bind, .get_temp = mlxsw_thermal_module_temp_get, }; @@ -280,7 +280,7 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, return 0; } -static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { +static const struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { .should_bind = mlxsw_thermal_module_should_bind, .get_temp = mlxsw_thermal_gearbox_temp_get, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 058dcabfaa2e..8769cba2c746 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -156,7 +156,7 @@ static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) } strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx", sizeof(mlxsw_pci->napi_dev_rx->name)); - dev_set_threaded(mlxsw_pci->napi_dev_rx, true); + netif_threaded_enable(mlxsw_pci->napi_dev_rx); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3080ea032e7f..618957d65663 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1159,63 +1159,31 @@ static int mlxsw_sp_set_features(struct net_device *dev, return 0; } -static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; - int err; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + config, extack); } -static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; - int err; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + config); } static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) { - struct hwtstamp_config config = {0}; - - mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); -} - -static int -mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct kernel_hwtstamp_config config = {}; - switch (cmd) { - case SIOCSHWTSTAMP: - return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); - case SIOCGHWTSTAMP: - return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); - default: - return -EOPNOTSUPP; - } + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config, + NULL); } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -1232,7 +1200,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, - .ndo_eth_ioctl = mlxsw_sp_port_ioctl, + .ndo_hwtstamp_get = mlxsw_sp_port_hwtstamp_get, + .ndo_hwtstamp_set = mlxsw_sp_port_hwtstamp_set, }; static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 37cd1d002b3b..b03ff9e044f9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -233,9 +233,10 @@ struct mlxsw_sp_ptp_ops { u16 local_port); int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void (*shaper_work)(struct work_struct *work); int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, struct kernel_ethtool_ts_info *info); @@ -351,7 +352,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_flow_block *eg_flow_block; struct { struct delayed_work shaper_dw; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; u16 ing_types; u16 egr_types; struct mlxsw_sp_ptp_port_stats stats; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 3f64cdbabfa3..0a8fb9c842d3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -262,7 +262,7 @@ err_port_pause_configure: } struct mlxsw_sp_port_hw_stats { - char str[ETH_GSTRING_LEN]; + char str[ETH_GSTRING_LEN] __nonstring; u64 (*getter)(const char *payload); bool cells_bytes; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index ca8b9d18fbb9..5b9f0844b8f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -46,7 +46,7 @@ struct mlxsw_sp2_ptp_state { refcount_t ptp_port_enabled_ref; /* Number of ports with time stamping * enabled. */ - struct hwtstamp_config config; + struct kernel_hwtstamp_config config; struct mutex lock; /* Protects 'config' and HW configuration. */ }; @@ -131,7 +131,7 @@ static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp1_ptp_clock *clock, return (u64) frc_l | (u64) frc_h2 << 32; } -static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc) +static u64 mlxsw_sp1_ptp_read_frc(struct cyclecounter *cc) { struct mlxsw_sp1_ptp_clock *clock = container_of(cc, struct mlxsw_sp1_ptp_clock, cycles); @@ -1083,14 +1083,14 @@ void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) } int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { *config = mlxsw_sp_port->ptp.hwtstamp_config; return 0; } static int -mlxsw_sp1_ptp_get_message_types(const struct hwtstamp_config *config, +mlxsw_sp1_ptp_get_message_types(const struct kernel_hwtstamp_config *config, u16 *p_ing_types, u16 *p_egr_types, enum hwtstamp_rx_filters *p_rx_filter) { @@ -1246,7 +1246,8 @@ void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) } int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { enum hwtstamp_rx_filters rx_filter; u16 ing_types; @@ -1270,7 +1271,7 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; - /* Notify the ioctl caller what we are actually timestamping. */ + /* Notify the caller what we are actually timestamping. */ config->rx_filter = rx_filter; return 0; @@ -1451,7 +1452,7 @@ void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, } int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct mlxsw_sp2_ptp_state *ptp_state; @@ -1465,7 +1466,7 @@ int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp2_ptp_get_message_types(const struct hwtstamp_config *config, +mlxsw_sp2_ptp_get_message_types(const struct kernel_hwtstamp_config *config, u16 *p_ing_types, u16 *p_egr_types, enum hwtstamp_rx_filters *p_rx_filter) { @@ -1542,7 +1543,7 @@ static int mlxsw_sp2_ptp_mtpcpc_set(struct mlxsw_sp *mlxsw_sp, bool ptp_trap_en, static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, u16 egr_types, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); int err; @@ -1556,7 +1557,7 @@ static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, } static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); int err; @@ -1571,7 +1572,7 @@ static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, u16 ing_types, u16 egr_types, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state; int err; @@ -1592,7 +1593,7 @@ static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp2_ptp_deconfigure_port(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state; int err; @@ -1614,11 +1615,12 @@ err_ptp_disable: } int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { + struct kernel_hwtstamp_config new_config; struct mlxsw_sp2_ptp_state *ptp_state; enum hwtstamp_rx_filters rx_filter; - struct hwtstamp_config new_config; u16 new_ing_types, new_egr_types; bool ptp_enabled; int err; @@ -1652,7 +1654,7 @@ int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->ptp.ing_types = new_ing_types; mlxsw_sp_port->ptp.egr_types = new_egr_types; - /* Notify the ioctl caller what we are actually timestamping. */ + /* Notify the caller what we are actually timestamping. */ config->rx_filter = rx_filter; mutex_unlock(&ptp_state->lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index 102db9060135..df37f1470830 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -34,10 +34,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, u64 timestamp); int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); @@ -65,10 +66,11 @@ void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, u16 local_port); int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, struct kernel_ethtool_ts_info *info); @@ -117,14 +119,15 @@ mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, static inline int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } static inline int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } @@ -181,14 +184,15 @@ static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, static inline int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } static inline int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } |